PostgreSQL Source Code git master
Loading...
Searching...
No Matches
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 
struct  BatchMVCCState
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 
typedef struct BatchMVCCState BatchMVCCState
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
int HeapTupleSatisfiesMVCCBatch (Snapshot snapshot, Buffer buffer, int ntups, BatchMVCCState *batchmvcc, OffsetNumber *vistuples_dense)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BatchMVCCState

◆ BitmapHeapScanDesc

Definition at line 110 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 46 of file heapam.h.

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_ON_ACCESS
Definition heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)
extern

Definition at line 2092 of file heapam.c.

2093{
2094 if (bistate->current_buf != InvalidBuffer)
2095 ReleaseBuffer(bistate->current_buf);
2096 FreeAccessStrategy(bistate->strategy);
2097 pfree(bistate);
2098}
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5502
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
void pfree(void *pointer)
Definition mcxt.c:1616
BufferAccessStrategy strategy
Definition hio.h:31
Buffer current_buf
Definition hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )
extern

Definition at line 2075 of file heapam.c.

2076{
2077 BulkInsertState bistate;
2078
2081 bistate->current_buf = InvalidBuffer;
2082 bistate->next_free = InvalidBlockNumber;
2083 bistate->last_free = InvalidBlockNumber;
2084 bistate->already_extended_by = 0;
2085 return bistate;
2086}
#define InvalidBlockNumber
Definition block.h:33
@ BAS_BULKWRITE
Definition bufmgr.h:39
#define palloc_object(type)
Definition fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition heapam.h:46
BlockNumber last_free
Definition hio.h:49
uint32 already_extended_by
Definition hio.h:50
BlockNumber next_free
Definition hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6255 of file heapam.c.

6256{
6258 ItemId lp;
6259 HeapTupleData tp;
6260 Page page;
6261 BlockNumber block;
6262 Buffer buffer;
6263
6265
6266 block = ItemPointerGetBlockNumber(tid);
6267 buffer = ReadBuffer(relation, block);
6268 page = BufferGetPage(buffer);
6269
6271
6272 /*
6273 * Page can't be all visible, we just inserted into it, and are still
6274 * running.
6275 */
6276 Assert(!PageIsAllVisible(page));
6277
6280
6281 tp.t_tableOid = RelationGetRelid(relation);
6282 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6283 tp.t_len = ItemIdGetLength(lp);
6284 tp.t_self = *tid;
6285
6286 /*
6287 * Sanity check that the tuple really is a speculatively inserted tuple,
6288 * inserted by us.
6289 */
6290 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6291 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6292 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6293 elog(ERROR, "attempted to kill a non-speculative tuple");
6295
6296 /*
6297 * No need to check for serializable conflicts here. There is never a
6298 * need for a combo CID, either. No need to extract replica identity, or
6299 * do anything special with infomask bits.
6300 */
6301
6303
6304 /*
6305 * The tuple will become DEAD immediately. Flag that this page is a
6306 * candidate for pruning by setting xmin to TransactionXmin. While not
6307 * immediately prunable, it is the oldest xid we can cheaply determine
6308 * that's safe against wraparound / being older than the table's
6309 * relfrozenxid. To defend against the unlikely case of a new relation
6310 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6311 * if so (vacuum can't subsequently move relfrozenxid to beyond
6312 * TransactionXmin, so there's no race here).
6313 */
6315 {
6316 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6318
6319 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6320 prune_xid = relfrozenxid;
6321 else
6324 }
6325
6326 /* store transaction information of xact deleting the tuple */
6329
6330 /*
6331 * Set the tuple header xmin to InvalidTransactionId. This makes the
6332 * tuple immediately invisible everyone. (In particular, to any
6333 * transactions waiting on the speculative token, woken up later.)
6334 */
6336
6337 /* Clear the speculative insertion token too */
6338 tp.t_data->t_ctid = tp.t_self;
6339
6340 MarkBufferDirty(buffer);
6341
6342 /*
6343 * XLOG stuff
6344 *
6345 * The WAL records generated here match heap_delete(). The same recovery
6346 * routines are used.
6347 */
6348 if (RelationNeedsWAL(relation))
6349 {
6352
6354 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
6355 tp.t_data->t_infomask2);
6357 xlrec.xmax = xid;
6358
6362
6363 /* No replica identity & replication origin logged */
6364
6366
6367 PageSetLSN(page, recptr);
6368 }
6369
6371
6373
6374 if (HeapTupleHasExternal(&tp))
6375 {
6376 Assert(!IsToastRelation(relation));
6377 heap_toast_delete(relation, &tp, true);
6378 }
6379
6380 /*
6381 * Never need to mark tuple for invalidation, since catalogs don't support
6382 * speculative insertion
6383 */
6384
6385 /* Now we can release the buffer */
6386 ReleaseBuffer(buffer);
6387
6388 /* count deletion, as we counted the insertion too */
6389 pgstat_count_heap_delete(relation);
6390}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3057
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:865
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
#define PageSetPrunable(page, xid)
Definition bufpage.h:446
#define Assert(condition)
Definition c.h:885
uint32 TransactionId
Definition c.h:678
bool IsToastRelation(Relation relation)
Definition catalog.c:206
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition heapam.c:2798
#define XLOG_HEAP_DELETE
Definition heapam_xlog.h:34
#define SizeOfHeapDelete
#define XLH_DELETE_IS_SUPER
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
#define HEAP_XMAX_BITS
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
#define HEAP_MOVED
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
static int fb(int x)
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationNeedsWAL(relation)
Definition rel.h:637
TransactionId TransactionXmin
Definition snapmgr.c:159
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
TransactionId t_xmin
union HeapTupleHeaderData::@51 t_choice
ItemPointerData t_ctid
HeapTupleFields t_heap
Form_pg_class rd_rel
Definition rel.h:111
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, fb(), xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)
extern

Definition at line 1164 of file heapam.c.

1168{
1169 HeapScanDesc scan;
1170
1171 /*
1172 * increment relation ref count while scanning relation
1173 *
1174 * This is just to make really sure the relcache entry won't go away while
1175 * the scan has a pointer to it. Caller should be holding the rel open
1176 * anyway, so this is redundant in all normal scenarios...
1177 */
1179
1180 /*
1181 * allocate and initialize scan descriptor
1182 */
1183 if (flags & SO_TYPE_BITMAPSCAN)
1184 {
1186
1187 /*
1188 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1189 * does not have, so no special initializations required here.
1190 */
1191 scan = (HeapScanDesc) bscan;
1192 }
1193 else
1195
1196 scan->rs_base.rs_rd = relation;
1197 scan->rs_base.rs_snapshot = snapshot;
1198 scan->rs_base.rs_nkeys = nkeys;
1199 scan->rs_base.rs_flags = flags;
1200 scan->rs_base.rs_parallel = parallel_scan;
1201 scan->rs_strategy = NULL; /* set in initscan */
1202 scan->rs_cbuf = InvalidBuffer;
1203
1204 /*
1205 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1206 */
1207 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1209
1210 /* Check that a historic snapshot is not used for non-catalog tables */
1211 if (snapshot &&
1212 IsHistoricMVCCSnapshot(snapshot) &&
1214 {
1215 ereport(ERROR,
1217 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1218 RelationGetRelationName(relation))));
1219 }
1220
1221 /*
1222 * For seqscan and sample scans in a serializable transaction, acquire a
1223 * predicate lock on the entire relation. This is required not only to
1224 * lock all the matching tuples, but also to conflict with new insertions
1225 * into the table. In an indexscan, we take page locks on the index pages
1226 * covering the range specified in the scan qual, but in a heap scan there
1227 * is nothing more fine-grained to lock. A bitmap scan is a different
1228 * story, there we have already scanned the index and locked the index
1229 * pages covering the predicate. But in that case we still have to lock
1230 * any matching heap tuples. For sample scan we could optimize the locking
1231 * to be at least page-level granularity, but we'd need to add per-tuple
1232 * locking for that.
1233 */
1235 {
1236 /*
1237 * Ensure a missing snapshot is noticed reliably, even if the
1238 * isolation mode means predicate locking isn't performed (and
1239 * therefore the snapshot isn't used here).
1240 */
1241 Assert(snapshot);
1242 PredicateLockRelation(relation, snapshot);
1243 }
1244
1245 /* we only need to set this up once */
1246 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1247
1248 /*
1249 * Allocate memory to keep track of page allocation for parallel workers
1250 * when doing a parallel scan.
1251 */
1252 if (parallel_scan != NULL)
1254 else
1256
1257 /*
1258 * we do this here instead of in initscan() because heap_rescan also calls
1259 * initscan() and we don't want to allocate memory again
1260 */
1261 if (nkeys > 0)
1262 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1263 else
1264 scan->rs_base.rs_key = NULL;
1265
1266 initscan(scan, key, false);
1267
1268 scan->rs_read_stream = NULL;
1269
1270 /*
1271 * Set up a read stream for sequential scans and TID range scans. This
1272 * should be done after initscan() because initscan() allocates the
1273 * BufferAccessStrategy object passed to the read stream API.
1274 */
1275 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1277 {
1279
1280 if (scan->rs_base.rs_parallel)
1282 else
1284
1285 /* ---
1286 * It is safe to use batchmode as the only locks taken by `cb`
1287 * are never taken while waiting for IO:
1288 * - SyncScanLock is used in the non-parallel case
1289 * - in the parallel case, only spinlocks and atomics are used
1290 * ---
1291 */
1294 scan->rs_strategy,
1295 scan->rs_base.rs_rd,
1297 cb,
1298 scan,
1299 0);
1300 }
1301 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1302 {
1305 scan->rs_strategy,
1306 scan->rs_base.rs_rd,
1309 scan,
1310 sizeof(TBMIterateResult));
1311 }
1312
1313
1314 return (TableScanDesc) scan;
1315}
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:252
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:292
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition heapam.c:317
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition heapam.c:357
struct HeapScanDescData * HeapScanDesc
Definition heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2574
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition read_stream.h:77
#define READ_STREAM_DEFAULT
Definition read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition read_stream.h:36
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
void RelationIncrementReferenceCount(Relation rel)
Definition relcache.c:2182
@ MAIN_FORKNUM
Definition relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition heapam.h:73
Buffer rs_cbuf
Definition heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition heapam.h:95
HeapTupleData rs_ctup
Definition heapam.h:75
ReadStream * rs_read_stream
Definition heapam.h:78
TableScanDescData rs_base
Definition heapam.h:58
Relation rs_rd
Definition relscan.h:35
uint32 rs_flags
Definition relscan.h:63
struct ScanKeyData * rs_key
Definition relscan.h:38
struct SnapshotData * rs_snapshot
Definition relscan.h:36
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:65
@ SO_TYPE_TIDRANGESCAN
Definition tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition tableam.h:51
@ SO_TYPE_SEQSCAN
Definition tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition tableam.h:50

References Assert, bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, fb(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)
extern

Definition at line 2843 of file heapam.c.

2846{
2847 TM_Result result;
2849 ItemId lp;
2850 HeapTupleData tp;
2851 Page page;
2852 BlockNumber block;
2853 Buffer buffer;
2854 Buffer vmbuffer = InvalidBuffer;
2855 TransactionId new_xmax;
2858 bool have_tuple_lock = false;
2859 bool iscombo;
2860 bool all_visible_cleared = false;
2861 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2862 bool old_key_copied = false;
2863
2865
2866 AssertHasSnapshotForToast(relation);
2867
2868 /*
2869 * Forbid this during a parallel operation, lest it allocate a combo CID.
2870 * Other workers might need that combo CID for visibility checks, and we
2871 * have no provision for broadcasting it to them.
2872 */
2873 if (IsInParallelMode())
2874 ereport(ERROR,
2876 errmsg("cannot delete tuples during a parallel operation")));
2877
2878 block = ItemPointerGetBlockNumber(tid);
2879 buffer = ReadBuffer(relation, block);
2880 page = BufferGetPage(buffer);
2881
2882 /*
2883 * Before locking the buffer, pin the visibility map page if it appears to
2884 * be necessary. Since we haven't got the lock yet, someone else might be
2885 * in the middle of changing this, so we'll need to recheck after we have
2886 * the lock.
2887 */
2888 if (PageIsAllVisible(page))
2889 visibilitymap_pin(relation, block, &vmbuffer);
2890
2892
2895
2896 tp.t_tableOid = RelationGetRelid(relation);
2897 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2898 tp.t_len = ItemIdGetLength(lp);
2899 tp.t_self = *tid;
2900
2901l1:
2902
2903 /*
2904 * If we didn't pin the visibility map page and the page has become all
2905 * visible while we were busy locking the buffer, we'll have to unlock and
2906 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2907 * unfortunate, but hopefully shouldn't happen often.
2908 */
2909 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2910 {
2912 visibilitymap_pin(relation, block, &vmbuffer);
2914 }
2915
2916 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2917
2918 if (result == TM_Invisible)
2919 {
2920 UnlockReleaseBuffer(buffer);
2921 ereport(ERROR,
2923 errmsg("attempted to delete invisible tuple")));
2924 }
2925 else if (result == TM_BeingModified && wait)
2926 {
2929
2930 /* must copy state data before unlocking buffer */
2933
2934 /*
2935 * Sleep until concurrent transaction ends -- except when there's a
2936 * single locker and it's our own transaction. Note we don't care
2937 * which lock mode the locker has, because we need the strongest one.
2938 *
2939 * Before sleeping, we need to acquire tuple lock to establish our
2940 * priority for the tuple (see heap_lock_tuple). LockTuple will
2941 * release us when we are next-in-line for the tuple.
2942 *
2943 * If we are forced to "start over" below, we keep the tuple lock;
2944 * this arranges that we stay at the head of the line while rechecking
2945 * tuple state.
2946 */
2948 {
2949 bool current_is_member = false;
2950
2953 {
2955
2956 /*
2957 * Acquire the lock, if necessary (but skip it when we're
2958 * requesting a lock and already have one; avoids deadlock).
2959 */
2960 if (!current_is_member)
2963
2964 /* wait for multixact */
2966 relation, &(tp.t_self), XLTW_Delete,
2967 NULL);
2969
2970 /*
2971 * If xwait had just locked the tuple then some other xact
2972 * could update this tuple before we get to this point. Check
2973 * for xmax change, and start over if so.
2974 *
2975 * We also must start over if we didn't pin the VM page, and
2976 * the page has become all visible.
2977 */
2978 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2981 xwait))
2982 goto l1;
2983 }
2984
2985 /*
2986 * You might think the multixact is necessarily done here, but not
2987 * so: it could have surviving members, namely our own xact or
2988 * other subxacts of this backend. It is legal for us to delete
2989 * the tuple in either case, however (the latter case is
2990 * essentially a situation of upgrading our former shared lock to
2991 * exclusive). We don't bother changing the on-disk hint bits
2992 * since we are about to overwrite the xmax altogether.
2993 */
2994 }
2996 {
2997 /*
2998 * Wait for regular transaction to end; but first, acquire tuple
2999 * lock.
3000 */
3004 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
3006
3007 /*
3008 * xwait is done, but if xwait had just locked the tuple then some
3009 * other xact could update this tuple before we get to this point.
3010 * Check for xmax change, and start over if so.
3011 *
3012 * We also must start over if we didn't pin the VM page, and the
3013 * page has become all visible.
3014 */
3015 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
3018 xwait))
3019 goto l1;
3020
3021 /* Otherwise check if it committed or aborted */
3022 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
3023 }
3024
3025 /*
3026 * We may overwrite if previous xmax aborted, or if it committed but
3027 * only locked the tuple without updating it.
3028 */
3029 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3032 result = TM_Ok;
3033 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
3034 result = TM_Updated;
3035 else
3036 result = TM_Deleted;
3037 }
3038
3039 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3040 if (result != TM_Ok)
3041 {
3042 Assert(result == TM_SelfModified ||
3043 result == TM_Updated ||
3044 result == TM_Deleted ||
3045 result == TM_BeingModified);
3047 Assert(result != TM_Updated ||
3049 }
3050
3051 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3052 {
3053 /* Perform additional check for transaction-snapshot mode RI updates */
3054 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3055 result = TM_Updated;
3056 }
3057
3058 if (result != TM_Ok)
3059 {
3060 tmfd->ctid = tp.t_data->t_ctid;
3062 if (result == TM_SelfModified)
3064 else
3065 tmfd->cmax = InvalidCommandId;
3066 UnlockReleaseBuffer(buffer);
3067 if (have_tuple_lock)
3069 if (vmbuffer != InvalidBuffer)
3070 ReleaseBuffer(vmbuffer);
3071 return result;
3072 }
3073
3074 /*
3075 * We're about to do the actual delete -- check for conflict first, to
3076 * avoid possibly having to roll back work we've just done.
3077 *
3078 * This is safe without a recheck as long as there is no possibility of
3079 * another process scanning the page between this check and the delete
3080 * being visible to the scan (i.e., an exclusive buffer content lock is
3081 * continuously held from this point until the tuple delete is visible).
3082 */
3084
3085 /* replace cid with a combo CID if necessary */
3087
3088 /*
3089 * Compute replica identity tuple before entering the critical section so
3090 * we don't PANIC upon a memory allocation failure.
3091 */
3092 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3093
3094 /*
3095 * If this is the first possibly-multixact-able operation in the current
3096 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3097 * certain that the transaction will never become a member of any older
3098 * MultiXactIds than that. (We have to do this even if we end up just
3099 * using our own TransactionId below, since some other backend could
3100 * incorporate our XID into a MultiXact immediately afterwards.)
3101 */
3103
3106 xid, LockTupleExclusive, true,
3107 &new_xmax, &new_infomask, &new_infomask2);
3108
3110
3111 /*
3112 * If this transaction commits, the tuple will become DEAD sooner or
3113 * later. Set flag that this page is a candidate for pruning once our xid
3114 * falls below the OldestXmin horizon. If the transaction finally aborts,
3115 * the subsequent page pruning will be a no-op and the hint will be
3116 * cleared.
3117 */
3118 PageSetPrunable(page, xid);
3119
3120 if (PageIsAllVisible(page))
3121 {
3122 all_visible_cleared = true;
3123 PageClearAllVisible(page);
3124 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3125 vmbuffer, VISIBILITYMAP_VALID_BITS);
3126 }
3127
3128 /* store transaction information of xact deleting the tuple */
3134 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3136 /* Make sure there is no forward chain link in t_ctid */
3137 tp.t_data->t_ctid = tp.t_self;
3138
3139 /* Signal that this is actually a move into another partition */
3140 if (changingPart)
3142
3143 MarkBufferDirty(buffer);
3144
3145 /*
3146 * XLOG stuff
3147 *
3148 * NB: heap_abort_speculative() uses the same xlog record and replay
3149 * routines.
3150 */
3151 if (RelationNeedsWAL(relation))
3152 {
3156
3157 /*
3158 * For logical decode we need combo CIDs to properly decode the
3159 * catalog
3160 */
3162 log_heap_new_cid(relation, &tp);
3163
3164 xlrec.flags = 0;
3167 if (changingPart)
3169 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
3170 tp.t_data->t_infomask2);
3172 xlrec.xmax = new_xmax;
3173
3174 if (old_key_tuple != NULL)
3175 {
3176 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3178 else
3180 }
3181
3184
3186
3187 /*
3188 * Log replica identity of the deleted tuple if there is one
3189 */
3190 if (old_key_tuple != NULL)
3191 {
3192 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3193 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3194 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3195
3197 XLogRegisterData((char *) old_key_tuple->t_data
3199 old_key_tuple->t_len
3201 }
3202
3203 /* filtering by origin on a row level is much more efficient */
3205
3207
3208 PageSetLSN(page, recptr);
3209 }
3210
3212
3214
3215 if (vmbuffer != InvalidBuffer)
3216 ReleaseBuffer(vmbuffer);
3217
3218 /*
3219 * If the tuple has toasted out-of-line attributes, we need to delete
3220 * those items too. We have to do this before releasing the buffer
3221 * because we need to look at the contents of the tuple, but it's OK to
3222 * release the content lock on the buffer first.
3223 */
3224 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3225 relation->rd_rel->relkind != RELKIND_MATVIEW)
3226 {
3227 /* toast table entries should never be recursively toasted */
3229 }
3230 else if (HeapTupleHasExternal(&tp))
3231 heap_toast_delete(relation, &tp, false);
3232
3233 /*
3234 * Mark tuple for invalidation from system caches at next command
3235 * boundary. We have to do this before releasing the buffer because we
3236 * need to look at the contents of the tuple.
3237 */
3238 CacheInvalidateHeapTuple(relation, &tp, NULL);
3239
3240 /* Now we can release the buffer */
3241 ReleaseBuffer(buffer);
3242
3243 /*
3244 * Release the lmgr tuple lock, if we had it.
3245 */
3246 if (have_tuple_lock)
3248
3249 pgstat_count_heap_delete(relation);
3250
3253
3254 return TM_Ok;
3255}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5519
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
#define InvalidCommandId
Definition c.h:695
TransactionId MultiXactId
Definition c.h:688
uint16_t uint16
Definition c.h:557
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition heapam.c:7676
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition heapam.c:9141
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition heapam.c:5395
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition heapam.c:5346
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition heapam.c:9222
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition heapam.c:7854
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition heapam.c:2820
#define UnlockTupleTuplock(rel, tup, mode)
Definition heapam.c:169
static void AssertHasSnapshotForToast(Relation rel)
Definition heapam.c:225
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition heapam.c:2053
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
#define XLH_DELETE_ALL_VISIBLE_CLEARED
#define SizeOfHeapHeader
#define XLH_DELETE_IS_PARTITION_MOVE
#define XLH_DELETE_CONTAINS_OLD_TUPLE
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1435
#define SizeofHeapTupleHeader
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_IS_MULTI
#define HEAP_XMAX_INVALID
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition lmgr.c:663
@ XLTW_Delete
Definition lmgr.h:28
@ LockWaitBlock
Definition lockoptions.h:40
@ LockTupleExclusive
Definition lockoptions.h:59
void MultiXactIdSetOldestMember(void)
Definition multixact.c:537
@ MultiXactStatusUpdate
Definition multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4334
#define InvalidSnapshot
Definition snapshot.h:119
TransactionId xmax
Definition tableam.h:150
CommandId cmax
Definition tableam.h:151
ItemPointerData ctid
Definition tableam.h:149
TM_Result
Definition tableam.h:73
@ TM_Ok
Definition tableam.h:78
@ TM_BeingModified
Definition tableam.h:100
@ TM_Deleted
Definition tableam.h:93
@ TM_Updated
Definition tableam.h:90
@ TM_SelfModified
Definition tableam.h:84
@ TM_Invisible
Definition tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:942
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)
extern

Definition at line 1371 of file heapam.c.

1372{
1374
1375 /* Note: no locking manipulations needed */
1376
1377 /*
1378 * unpin scan buffers
1379 */
1380 if (BufferIsValid(scan->rs_cbuf))
1381 ReleaseBuffer(scan->rs_cbuf);
1382
1383 /*
1384 * Must free the read stream before freeing the BufferAccessStrategy.
1385 */
1386 if (scan->rs_read_stream)
1388
1389 /*
1390 * decrement relation reference count and free scan descriptor storage
1391 */
1393
1394 if (scan->rs_base.rs_key)
1395 pfree(scan->rs_base.rs_key);
1396
1397 if (scan->rs_strategy != NULL)
1399
1400 if (scan->rs_parallelworkerdata != NULL)
1402
1403 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1405
1406 pfree(scan);
1407}
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
void read_stream_end(ReadStream *stream)
void RelationDecrementReferenceCount(Relation rel)
Definition relcache.c:2195
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition tableam.h:65

References BufferIsValid(), fb(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 492 of file heapam.h.

493{
494 HeapTupleHeaderSetXmax(tuple, frz->xmax);
495
496 if (frz->frzflags & XLH_FREEZE_XVAC)
498
499 if (frz->frzflags & XLH_INVALID_XVAC)
501
502 tuple->t_infomask = frz->t_infomask;
503 tuple->t_infomask2 = frz->t_infomask2;
504}
#define XLH_INVALID_XVAC
#define XLH_FREEZE_XVAC
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
#define FrozenTransactionId
Definition transam.h:33

References fb(), FrozenTransactionId, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)
extern

Definition at line 1659 of file heapam.c.

1664{
1665 ItemPointer tid = &(tuple->t_self);
1666 ItemId lp;
1667 Buffer buffer;
1668 Page page;
1669 OffsetNumber offnum;
1670 bool valid;
1671
1672 /*
1673 * Fetch and pin the appropriate page of the relation.
1674 */
1675 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1676
1677 /*
1678 * Need share lock on buffer to examine tuple commit status.
1679 */
1681 page = BufferGetPage(buffer);
1682
1683 /*
1684 * We'd better check for out-of-range offnum in case of VACUUM since the
1685 * TID was obtained.
1686 */
1687 offnum = ItemPointerGetOffsetNumber(tid);
1689 {
1691 ReleaseBuffer(buffer);
1693 tuple->t_data = NULL;
1694 return false;
1695 }
1696
1697 /*
1698 * get the item line pointer corresponding to the requested tid
1699 */
1700 lp = PageGetItemId(page, offnum);
1701
1702 /*
1703 * Must check for deleted tuple.
1704 */
1705 if (!ItemIdIsNormal(lp))
1706 {
1708 ReleaseBuffer(buffer);
1710 tuple->t_data = NULL;
1711 return false;
1712 }
1713
1714 /*
1715 * fill in *tuple fields
1716 */
1717 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1718 tuple->t_len = ItemIdGetLength(lp);
1719 tuple->t_tableOid = RelationGetRelid(relation);
1720
1721 /*
1722 * check tuple visibility, then release lock
1723 */
1724 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1725
1726 if (valid)
1727 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1729
1730 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1731
1733
1734 if (valid)
1735 {
1736 /*
1737 * All checks passed, so return the tuple as valid. Caller is now
1738 * responsible for releasing the buffer.
1739 */
1740 *userbuf = buffer;
1741
1742 return true;
1743 }
1744
1745 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1746 if (keep_buf)
1747 *userbuf = buffer;
1748 else
1749 {
1750 ReleaseBuffer(buffer);
1752 tuple->t_data = NULL;
1753 }
1754
1755 return false;
1756}
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition heapam.c:9326
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
uint16 OffsetNumber
Definition off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2619

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), fb(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6168 of file heapam.c.

6169{
6170 Buffer buffer;
6171 Page page;
6172 OffsetNumber offnum;
6173 ItemId lp;
6174 HeapTupleHeader htup;
6175
6176 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6178 page = BufferGetPage(buffer);
6179
6180 offnum = ItemPointerGetOffsetNumber(tid);
6182 elog(ERROR, "offnum out of range");
6183 lp = PageGetItemId(page, offnum);
6184 if (!ItemIdIsNormal(lp))
6185 elog(ERROR, "invalid lp");
6186
6187 htup = (HeapTupleHeader) PageGetItem(page, lp);
6188
6189 /* NO EREPORT(ERROR) from here till changes are logged */
6191
6193
6194 MarkBufferDirty(buffer);
6195
6196 /*
6197 * Replace the speculative insertion token with a real t_ctid, pointing to
6198 * itself like it does on regular tuples.
6199 */
6200 htup->t_ctid = *tid;
6201
6202 /* XLOG stuff */
6203 if (RelationNeedsWAL(relation))
6204 {
6207
6209
6211
6212 /* We want the same filtering on this as on a plain insert */
6214
6217
6219
6220 PageSetLSN(page, recptr);
6221 }
6222
6224
6225 UnlockReleaseBuffer(buffer);
6226}
#define SizeOfHeapConfirm
#define XLOG_HEAP_CONFIRM
Definition heapam_xlog.h:38
OffsetNumber offnum

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, fb(), HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7461 of file heapam.c.

7462{
7463 Page page = BufferGetPage(buffer);
7464
7465 for (int i = 0; i < ntuples; i++)
7466 {
7467 HeapTupleFreeze *frz = tuples + i;
7468 ItemId itemid = PageGetItemId(page, frz->offset);
7469 HeapTupleHeader htup;
7470
7471 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7473 }
7474}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition heapam.h:492
int i
Definition isn.c:77

References BufferGetPage(), fb(), heap_execute_freeze_tuple(), i, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)
extern

Definition at line 7483 of file heapam.c.

7486{
7488 bool do_freeze;
7489 bool totally_frozen;
7490 struct VacuumCutoffs cutoffs;
7491 HeapPageFreeze pagefrz;
7492
7493 cutoffs.relfrozenxid = relfrozenxid;
7494 cutoffs.relminmxid = relminmxid;
7495 cutoffs.OldestXmin = FreezeLimit;
7496 cutoffs.OldestMxact = MultiXactCutoff;
7497 cutoffs.FreezeLimit = FreezeLimit;
7498 cutoffs.MultiXactCutoff = MultiXactCutoff;
7499
7500 pagefrz.freeze_required = true;
7501 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7502 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7503 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7504 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7505
7506 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7507 &pagefrz, &frz, &totally_frozen);
7508
7509 /*
7510 * Note that because this is not a WAL-logged operation, we don't need to
7511 * fill in the offset in the freeze record.
7512 */
7513
7514 if (do_freeze)
7516 return do_freeze;
7517}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7135
bool freeze_required
Definition heapam.h:182
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290

References fb(), VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)
extern

Definition at line 1931 of file heapam.c.

1933{
1934 Relation relation = sscan->rs_rd;
1935 Snapshot snapshot = sscan->rs_snapshot;
1936 ItemPointerData ctid;
1938
1939 /*
1940 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1941 * Assume that t_ctid links are valid however - there shouldn't be invalid
1942 * ones in the table.
1943 */
1945
1946 /*
1947 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1948 * need to examine, and *tid is the TID we will return if ctid turns out
1949 * to be bogus.
1950 *
1951 * Note that we will loop until we reach the end of the t_ctid chain.
1952 * Depending on the snapshot passed, there might be at most one visible
1953 * version of the row, but we don't try to optimize for that.
1954 */
1955 ctid = *tid;
1956 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1957 for (;;)
1958 {
1959 Buffer buffer;
1960 Page page;
1961 OffsetNumber offnum;
1962 ItemId lp;
1963 HeapTupleData tp;
1964 bool valid;
1965
1966 /*
1967 * Read, pin, and lock the page.
1968 */
1969 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1971 page = BufferGetPage(buffer);
1972
1973 /*
1974 * Check for bogus item number. This is not treated as an error
1975 * condition because it can happen while following a t_ctid link. We
1976 * just assume that the prior tid is OK and return it unchanged.
1977 */
1978 offnum = ItemPointerGetOffsetNumber(&ctid);
1980 {
1981 UnlockReleaseBuffer(buffer);
1982 break;
1983 }
1984 lp = PageGetItemId(page, offnum);
1985 if (!ItemIdIsNormal(lp))
1986 {
1987 UnlockReleaseBuffer(buffer);
1988 break;
1989 }
1990
1991 /* OK to access the tuple */
1992 tp.t_self = ctid;
1993 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1994 tp.t_len = ItemIdGetLength(lp);
1995 tp.t_tableOid = RelationGetRelid(relation);
1996
1997 /*
1998 * After following a t_ctid link, we might arrive at an unrelated
1999 * tuple. Check for XMIN match.
2000 */
2003 {
2004 UnlockReleaseBuffer(buffer);
2005 break;
2006 }
2007
2008 /*
2009 * Check tuple visibility; if visible, set it as the new result
2010 * candidate.
2011 */
2012 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
2013 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
2014 if (valid)
2015 *tid = ctid;
2016
2017 /*
2018 * If there's a valid t_ctid link, follow it, else we're done.
2019 */
2020 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2024 {
2025 UnlockReleaseBuffer(buffer);
2026 break;
2027 }
2028
2029 ctid = tp.t_data->t_ctid;
2031 UnlockReleaseBuffer(buffer);
2032 } /* end of loop */
2033}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), fb(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)
extern

Definition at line 1895 of file pruneheap.c.

1896{
1897 OffsetNumber offnum,
1898 maxoff;
1899
1902
1903 maxoff = PageGetMaxOffsetNumber(page);
1904 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1905 {
1906 ItemId lp = PageGetItemId(page, offnum);
1907 HeapTupleHeader htup;
1910
1911 /* skip unused and dead items */
1912 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1913 continue;
1914
1915 if (ItemIdIsNormal(lp))
1916 {
1917 htup = (HeapTupleHeader) PageGetItem(page, lp);
1918
1919 /*
1920 * Check if this tuple is part of a HOT-chain rooted at some other
1921 * tuple. If so, skip it for now; we'll process it when we find
1922 * its root.
1923 */
1924 if (HeapTupleHeaderIsHeapOnly(htup))
1925 continue;
1926
1927 /*
1928 * This is either a plain tuple or the root of a HOT-chain.
1929 * Remember it in the mapping.
1930 */
1931 root_offsets[offnum - 1] = offnum;
1932
1933 /* If it's not the start of a HOT-chain, we're done with it */
1934 if (!HeapTupleHeaderIsHotUpdated(htup))
1935 continue;
1936
1937 /* Set up to scan the HOT-chain */
1940 }
1941 else
1942 {
1943 /* Must be a redirect item. We do not set its root_offsets entry */
1945 /* Set up to scan the HOT-chain */
1948 }
1949
1950 /*
1951 * Now follow the HOT-chain and collect other tuples in the chain.
1952 *
1953 * Note: Even though this is a nested loop, the complexity of the
1954 * function is O(N) because a tuple in the page should be visited not
1955 * more than twice, once in the outer loop and once in HOT-chain
1956 * chases.
1957 */
1958 for (;;)
1959 {
1960 /* Sanity check (pure paranoia) */
1961 if (offnum < FirstOffsetNumber)
1962 break;
1963
1964 /*
1965 * An offset past the end of page's line pointer array is possible
1966 * when the array was truncated
1967 */
1968 if (offnum > maxoff)
1969 break;
1970
1971 lp = PageGetItemId(page, nextoffnum);
1972
1973 /* Check for broken chains */
1974 if (!ItemIdIsNormal(lp))
1975 break;
1976
1977 htup = (HeapTupleHeader) PageGetItem(page, lp);
1978
1981 break;
1982
1983 /* Remember the root line pointer for this item */
1984 root_offsets[nextoffnum - 1] = offnum;
1985
1986 /* Advance to next chain member, if any */
1987 if (!HeapTupleHeaderIsHotUpdated(htup))
1988 break;
1989
1990 /* HOT implies it can't have moved to different partition */
1992
1995 }
1996 }
1997}
#define MemSet(start, val, len)
Definition c.h:1035
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27

References Assert, fb(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)
extern

Definition at line 1410 of file heapam.c.

1411{
1413
1414 /*
1415 * This is still widely used directly, without going through table AM, so
1416 * add a safety check. It's possible we should, at a later point,
1417 * downgrade this to an assert. The reason for checking the AM routine,
1418 * rather than the AM oid, is that this allows to write regression tests
1419 * that create another AM reusing the heap handler.
1420 */
1421 if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
1422 ereport(ERROR,
1424 errmsg_internal("only heap AM is supported")));
1425
1426 /* Note: no locking manipulations needed */
1427
1429 heapgettup_pagemode(scan, direction,
1430 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1431 else
1432 heapgettup(scan, direction,
1433 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1434
1435 if (scan->rs_ctup.t_data == NULL)
1436 return NULL;
1437
1438 /*
1439 * if we get here it means we have a new current scan tuple, so point to
1440 * the proper return buffer and return the tuple.
1441 */
1442
1444
1445 return &scan->rs_ctup;
1446}
#define unlikely(x)
Definition c.h:424
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:960
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:1070
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition pgstat.h:699
@ SO_ALLOW_PAGEMODE
Definition tableam.h:62

References ereport, errcode(), errmsg_internal(), ERROR, fb(), GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1449 of file heapam.c.

1450{
1452
1453 /* Note: no locking manipulations needed */
1454
1455 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1456 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1457 else
1458 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1459
1460 if (scan->rs_ctup.t_data == NULL)
1461 {
1462 ExecClearTuple(slot);
1463 return false;
1464 }
1465
1466 /*
1467 * if we get here it means we have a new current scan tuple, so point to
1468 * the proper return buffer and return the tuple.
1469 */
1470
1472
1473 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1474 scan->rs_cbuf);
1475 return true;
1476}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1552 of file heapam.c.

1554{
1556 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1557 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1558
1559 /* Note: no locking manipulations needed */
1560 for (;;)
1561 {
1562 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1563 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1564 else
1565 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1566
1567 if (scan->rs_ctup.t_data == NULL)
1568 {
1569 ExecClearTuple(slot);
1570 return false;
1571 }
1572
1573 /*
1574 * heap_set_tidrange will have used heap_setscanlimits to limit the
1575 * range of pages we scan to only ones that can contain the TID range
1576 * we're scanning for. Here we must filter out any tuples from these
1577 * pages that are outside of that range.
1578 */
1579 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1580 {
1581 ExecClearTuple(slot);
1582
1583 /*
1584 * When scanning backwards, the TIDs will be in descending order.
1585 * Future tuples in this direction will be lower still, so we can
1586 * just return false to indicate there will be no more tuples.
1587 */
1588 if (ScanDirectionIsBackward(direction))
1589 return false;
1590
1591 continue;
1592 }
1593
1594 /*
1595 * Likewise for the final page, we must filter out TIDs greater than
1596 * maxtid.
1597 */
1598 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1599 {
1600 ExecClearTuple(slot);
1601
1602 /*
1603 * When scanning forward, the TIDs will be in ascending order.
1604 * Future tuples in this direction will be higher still, so we can
1605 * just return false to indicate there will be no more tuples.
1606 */
1607 if (ScanDirectionIsForward(direction))
1608 return false;
1609 continue;
1610 }
1611
1612 break;
1613 }
1614
1615 /*
1616 * if we get here it means we have a new current scan tuple, so point to
1617 * the proper return buffer and return the tuple.
1618 */
1620
1621 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1622 return true;
1623}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition sdir.h:50

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)
extern

Definition at line 1779 of file heapam.c.

1782{
1783 Page page = BufferGetPage(buffer);
1785 BlockNumber blkno;
1786 OffsetNumber offnum;
1787 bool at_chain_start;
1788 bool valid;
1789 bool skip;
1790 GlobalVisState *vistest = NULL;
1791
1792 /* If this is not the first call, previous call returned a (live!) tuple */
1793 if (all_dead)
1795
1796 blkno = ItemPointerGetBlockNumber(tid);
1797 offnum = ItemPointerGetOffsetNumber(tid);
1799 skip = !first_call;
1800
1801 /* XXX: we should assert that a snapshot is pushed or registered */
1803 Assert(BufferGetBlockNumber(buffer) == blkno);
1804
1805 /* Scan through possible multiple members of HOT-chain */
1806 for (;;)
1807 {
1808 ItemId lp;
1809
1810 /* check for bogus TID */
1812 break;
1813
1814 lp = PageGetItemId(page, offnum);
1815
1816 /* check for unused, dead, or redirected items */
1817 if (!ItemIdIsNormal(lp))
1818 {
1819 /* We should only see a redirect at start of chain */
1821 {
1822 /* Follow the redirect */
1823 offnum = ItemIdGetRedirect(lp);
1824 at_chain_start = false;
1825 continue;
1826 }
1827 /* else must be end of chain */
1828 break;
1829 }
1830
1831 /*
1832 * Update heapTuple to point to the element of the HOT chain we're
1833 * currently investigating. Having t_self set correctly is important
1834 * because the SSI checks and the *Satisfies routine for historical
1835 * MVCC snapshots need the correct tid to decide about the visibility.
1836 */
1837 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1838 heapTuple->t_len = ItemIdGetLength(lp);
1839 heapTuple->t_tableOid = RelationGetRelid(relation);
1840 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1841
1842 /*
1843 * Shouldn't see a HEAP_ONLY tuple at chain start.
1844 */
1846 break;
1847
1848 /*
1849 * The xmin should match the previous xmax value, else chain is
1850 * broken.
1851 */
1855 break;
1856
1857 /*
1858 * When first_call is true (and thus, skip is initially false) we'll
1859 * return the first tuple we find. But on later passes, heapTuple
1860 * will initially be pointing to the tuple we returned last time.
1861 * Returning it again would be incorrect (and would loop forever), so
1862 * we skip it and return the next match we find.
1863 */
1864 if (!skip)
1865 {
1866 /* If it's visible per the snapshot, we must return it */
1867 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1869 buffer, snapshot);
1870
1871 if (valid)
1872 {
1873 ItemPointerSetOffsetNumber(tid, offnum);
1874 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1876 if (all_dead)
1877 *all_dead = false;
1878 return true;
1879 }
1880 }
1881 skip = false;
1882
1883 /*
1884 * If we can't see it, maybe no one else can either. At caller
1885 * request, check whether all chain members are dead to all
1886 * transactions.
1887 *
1888 * Note: if you change the criterion here for what is "dead", fix the
1889 * planner's get_actual_variable_range() function to match.
1890 */
1891 if (all_dead && *all_dead)
1892 {
1893 if (!vistest)
1894 vistest = GlobalVisTestFor(relation);
1895
1896 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1897 *all_dead = false;
1898 }
1899
1900 /*
1901 * Check to see if HOT chain continues past this tuple; if so fetch
1902 * the next offnum and loop around.
1903 */
1905 {
1906 Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
1907 blkno);
1908 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1909 at_chain_start = false;
1911 }
1912 else
1913 break; /* end of chain */
1914 }
1915
1916 return false;
1917}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition itemptr.h:158
static const struct exclude_list_item skip[]
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4118
TransactionId RecentXmin
Definition snapmgr.c:160

References Assert, BufferGetBlockNumber(), BufferGetPage(), fb(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)
extern

Definition at line 8199 of file heapam.c.

8200{
8201 /* Initial assumption is that earlier pruning took care of conflict */
8202 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8205 Page page = NULL;
8208#ifdef USE_PREFETCH
8211#endif
8213 int finalndeltids = 0,
8214 nblocksaccessed = 0;
8215
8216 /* State that's only used in bottom-up index deletion case */
8217 int nblocksfavorable = 0;
8218 int curtargetfreespace = delstate->bottomupfreespace,
8219 lastfreespace = 0,
8220 actualfreespace = 0;
8221 bool bottomup_final_block = false;
8222
8224
8225 /* Sort caller's deltids array by TID for further processing */
8227
8228 /*
8229 * Bottom-up case: resort deltids array in an order attuned to where the
8230 * greatest number of promising TIDs are to be found, and determine how
8231 * many blocks from the start of sorted array should be considered
8232 * favorable. This will also shrink the deltids array in order to
8233 * eliminate completely unfavorable blocks up front.
8234 */
8235 if (delstate->bottomup)
8237
8238#ifdef USE_PREFETCH
8239 /* Initialize prefetch state. */
8241 prefetch_state.next_item = 0;
8242 prefetch_state.ndeltids = delstate->ndeltids;
8243 prefetch_state.deltids = delstate->deltids;
8244
8245 /*
8246 * Determine the prefetch distance that we will attempt to maintain.
8247 *
8248 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8249 * sure that isn't a catalog relation before we call code that does
8250 * syscache lookups, to avoid risk of deadlock.
8251 */
8252 if (IsCatalogRelation(rel))
8254 else
8257
8258 /* Cap initial prefetch distance for bottom-up deletion caller */
8259 if (delstate->bottomup)
8260 {
8264 }
8265
8266 /* Start prefetching. */
8268#endif
8269
8270 /* Iterate over deltids, determine which to delete, check their horizon */
8271 Assert(delstate->ndeltids > 0);
8272 for (int i = 0; i < delstate->ndeltids; i++)
8273 {
8274 TM_IndexDelete *ideltid = &delstate->deltids[i];
8275 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8276 ItemPointer htid = &ideltid->tid;
8277 OffsetNumber offnum;
8278
8279 /*
8280 * Read buffer, and perform required extra steps each time a new block
8281 * is encountered. Avoid refetching if it's the same block as the one
8282 * from the last htid.
8283 */
8284 if (blkno == InvalidBlockNumber ||
8286 {
8287 /*
8288 * Consider giving up early for bottom-up index deletion caller
8289 * first. (Only prefetch next-next block afterwards, when it
8290 * becomes clear that we're at least going to access the next
8291 * block in line.)
8292 *
8293 * Sometimes the first block frees so much space for bottom-up
8294 * caller that the deletion process can end without accessing any
8295 * more blocks. It is usually necessary to access 2 or 3 blocks
8296 * per bottom-up deletion operation, though.
8297 */
8298 if (delstate->bottomup)
8299 {
8300 /*
8301 * We often allow caller to delete a few additional items
8302 * whose entries we reached after the point that space target
8303 * from caller was satisfied. The cost of accessing the page
8304 * was already paid at that point, so it made sense to finish
8305 * it off. When that happened, we finalize everything here
8306 * (by finishing off the whole bottom-up deletion operation
8307 * without needlessly paying the cost of accessing any more
8308 * blocks).
8309 */
8311 break;
8312
8313 /*
8314 * Give up when we didn't enable our caller to free any
8315 * additional space as a result of processing the page that we
8316 * just finished up with. This rule is the main way in which
8317 * we keep the cost of bottom-up deletion under control.
8318 */
8320 break;
8321 lastfreespace = actualfreespace; /* for next time */
8322
8323 /*
8324 * Deletion operation (which is bottom-up) will definitely
8325 * access the next block in line. Prepare for that now.
8326 *
8327 * Decay target free space so that we don't hang on for too
8328 * long with a marginal case. (Space target is only truly
8329 * helpful when it allows us to recognize that we don't need
8330 * to access more than 1 or 2 blocks to satisfy caller due to
8331 * agreeable workload characteristics.)
8332 *
8333 * We are a bit more patient when we encounter contiguous
8334 * blocks, though: these are treated as favorable blocks. The
8335 * decay process is only applied when the next block in line
8336 * is not a favorable/contiguous block. This is not an
8337 * exception to the general rule; we still insist on finding
8338 * at least one deletable item per block accessed. See
8339 * bottomup_nblocksfavorable() for full details of the theory
8340 * behind favorable blocks and heap block locality in general.
8341 *
8342 * Note: The first block in line is always treated as a
8343 * favorable block, so the earliest possible point that the
8344 * decay can be applied is just before we access the second
8345 * block in line. The Assert() verifies this for us.
8346 */
8348 if (nblocksfavorable > 0)
8350 else
8351 curtargetfreespace /= 2;
8352 }
8353
8354 /* release old buffer */
8355 if (BufferIsValid(buf))
8357
8359 buf = ReadBuffer(rel, blkno);
8361 Assert(!delstate->bottomup ||
8363
8364#ifdef USE_PREFETCH
8365
8366 /*
8367 * To maintain the prefetch distance, prefetch one more page for
8368 * each page we read.
8369 */
8371#endif
8372
8374
8375 page = BufferGetPage(buf);
8376 maxoff = PageGetMaxOffsetNumber(page);
8377 }
8378
8379 /*
8380 * In passing, detect index corruption involving an index page with a
8381 * TID that points to a location in the heap that couldn't possibly be
8382 * correct. We only do this with actual TIDs from caller's index page
8383 * (not items reached by traversing through a HOT chain).
8384 */
8386
8387 if (istatus->knowndeletable)
8388 Assert(!delstate->bottomup && !istatus->promising);
8389 else
8390 {
8391 ItemPointerData tmp = *htid;
8393
8394 /* Are any tuples from this HOT chain non-vacuumable? */
8396 &heapTuple, NULL, true))
8397 continue; /* can't delete entry */
8398
8399 /* Caller will delete, since whole HOT chain is vacuumable */
8400 istatus->knowndeletable = true;
8401
8402 /* Maintain index free space info for bottom-up deletion case */
8403 if (delstate->bottomup)
8404 {
8405 Assert(istatus->freespace > 0);
8406 actualfreespace += istatus->freespace;
8408 bottomup_final_block = true;
8409 }
8410 }
8411
8412 /*
8413 * Maintain snapshotConflictHorizon value for deletion operation as a
8414 * whole by advancing current value using heap tuple headers. This is
8415 * loosely based on the logic for pruning a HOT chain.
8416 */
8418 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8419 for (;;)
8420 {
8421 ItemId lp;
8422 HeapTupleHeader htup;
8423
8424 /* Sanity check (pure paranoia) */
8425 if (offnum < FirstOffsetNumber)
8426 break;
8427
8428 /*
8429 * An offset past the end of page's line pointer array is possible
8430 * when the array was truncated
8431 */
8432 if (offnum > maxoff)
8433 break;
8434
8435 lp = PageGetItemId(page, offnum);
8437 {
8438 offnum = ItemIdGetRedirect(lp);
8439 continue;
8440 }
8441
8442 /*
8443 * We'll often encounter LP_DEAD line pointers (especially with an
8444 * entry marked knowndeletable by our caller up front). No heap
8445 * tuple headers get examined for an htid that leads us to an
8446 * LP_DEAD item. This is okay because the earlier pruning
8447 * operation that made the line pointer LP_DEAD in the first place
8448 * must have considered the original tuple header as part of
8449 * generating its own snapshotConflictHorizon value.
8450 *
8451 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8452 * the same strategy that index vacuuming uses in all cases. Index
8453 * VACUUM WAL records don't even have a snapshotConflictHorizon
8454 * field of their own for this reason.
8455 */
8456 if (!ItemIdIsNormal(lp))
8457 break;
8458
8459 htup = (HeapTupleHeader) PageGetItem(page, lp);
8460
8461 /*
8462 * Check the tuple XMIN against prior XMAX, if any
8463 */
8466 break;
8467
8469 &snapshotConflictHorizon);
8470
8471 /*
8472 * If the tuple is not HOT-updated, then we are at the end of this
8473 * HOT-chain. No need to visit later tuples from the same update
8474 * chain (they get their own index entries) -- just move on to
8475 * next htid from index AM caller.
8476 */
8477 if (!HeapTupleHeaderIsHotUpdated(htup))
8478 break;
8479
8480 /* Advance to next HOT chain member */
8481 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8482 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8484 }
8485
8486 /* Enable further/final shrinking of deltids for caller */
8487 finalndeltids = i + 1;
8488 }
8489
8491
8492 /*
8493 * Shrink deltids array to exclude non-deletable entries at the end. This
8494 * is not just a minor optimization. Final deltids array size might be
8495 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8496 * ndeltids being zero in all cases with zero total deletable entries.
8497 */
8498 Assert(finalndeltids > 0 || delstate->bottomup);
8499 delstate->ndeltids = finalndeltids;
8500
8501 return snapshotConflictHorizon;
8502}
int maintenance_io_concurrency
Definition bufmgr.c:192
#define Min(x, y)
Definition c.h:1019
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition heapam.c:8756
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8054
#define BOTTOMUP_MAX_NBLOCKS
Definition heapam.c:189
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition heapam.c:8139
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition heapam.c:1779
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition heapam.c:8544
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition spccache.c:230

References Assert, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), fb(), FirstOffsetNumber, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), maintenance_io_concurrency, Min, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationData::rd_rel, ReadBuffer(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void arg 
)
extern

Definition at line 6437 of file heapam.c.

6440{
6441 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6442 TM_Result result;
6443 bool ret;
6444
6445#ifdef USE_ASSERT_CHECKING
6446 if (RelationGetRelid(relation) == RelationRelationId)
6448#endif
6449
6450 Assert(BufferIsValid(buffer));
6451
6452 /*
6453 * Register shared cache invals if necessary. Other sessions may finish
6454 * inplace updates of this tuple between this step and LockTuple(). Since
6455 * inplace updates don't change cache keys, that's harmless.
6456 *
6457 * While it's tempting to register invals only after confirming we can
6458 * return true, the following obstacle precludes reordering steps that
6459 * way. Registering invals might reach a CatalogCacheInitializeCache()
6460 * that locks "buffer". That would hang indefinitely if running after our
6461 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6462 */
6464
6465 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6467
6468 /*----------
6469 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6470 *
6471 * - wait unconditionally
6472 * - already locked tuple above, since inplace needs that unconditionally
6473 * - don't recheck header after wait: simpler to defer to next iteration
6474 * - don't try to continue even if the updater aborts: likewise
6475 * - no crosscheck
6476 */
6478 buffer);
6479
6480 if (result == TM_Invisible)
6481 {
6482 /* no known way this can happen */
6483 ereport(ERROR,
6485 errmsg_internal("attempted to overwrite invisible tuple")));
6486 }
6487 else if (result == TM_SelfModified)
6488 {
6489 /*
6490 * CREATE INDEX might reach this if an expression is silly enough to
6491 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6492 * statements might get here after a heap_update() of the same row, in
6493 * the absence of an intervening CommandCounterIncrement().
6494 */
6495 ereport(ERROR,
6497 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6498 }
6499 else if (result == TM_BeingModified)
6500 {
6503
6505 infomask = oldtup.t_data->t_infomask;
6506
6508 {
6511 int remain;
6512
6514 lockmode, NULL))
6515 {
6518 ret = false;
6520 relation, &oldtup.t_self, XLTW_Update,
6521 &remain);
6522 }
6523 else
6524 ret = true;
6525 }
6527 ret = true;
6529 ret = true;
6530 else
6531 {
6534 ret = false;
6535 XactLockTableWait(xwait, relation, &oldtup.t_self,
6536 XLTW_Update);
6537 }
6538 }
6539 else
6540 {
6541 ret = (result == TM_Ok);
6542 if (!ret)
6543 {
6546 }
6547 }
6548
6549 /*
6550 * GetCatalogSnapshot() relies on invalidation messages to know when to
6551 * take a new snapshot. COMMIT of xwait is responsible for sending the
6552 * invalidation. We're not acquiring heavyweight locks sufficient to
6553 * block if not yet sent, so we must take a new snapshot to ensure a later
6554 * attempt has a fair chance. While we don't need this if xwait aborted,
6555 * don't bother optimizing that.
6556 */
6557 if (!ret)
6558 {
6559 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6562 }
6563 return ret;
6564}
Datum arg
Definition elog.c:1322
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition inval.c:1593
void ForgetInplace_Inval(void)
Definition inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:562
@ XLTW_Update
Definition lmgr.h:27
#define InplaceUpdateTupleLock
Definition lockdefs.h:48
LockTupleMode
Definition lockoptions.h:51
@ LockTupleNoKeyExclusive
Definition lockoptions.h:57
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition xact.c:830

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, fb(), ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)
extern

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)
extern

Definition at line 6575 of file heapam.c.

6578{
6579 HeapTupleHeader htup = oldtup->t_data;
6580 uint32 oldlen;
6581 uint32 newlen;
6582 char *dst;
6583 char *src;
6584 int nmsgs = 0;
6586 bool RelcacheInitFileInval = false;
6587
6588 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6589 oldlen = oldtup->t_len - htup->t_hoff;
6590 newlen = tuple->t_len - tuple->t_data->t_hoff;
6591 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6592 elog(ERROR, "wrong tuple length");
6593
6594 dst = (char *) htup + htup->t_hoff;
6595 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6596
6597 /* Like RecordTransactionCommit(), log only if needed */
6600 &RelcacheInitFileInval);
6601
6602 /*
6603 * Unlink relcache init files as needed. If unlinking, acquire
6604 * RelCacheInitLock until after associated invalidations. By doing this
6605 * in advance, if we checkpoint and then crash between inplace
6606 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6607 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6608 * neglect to PANIC on EIO.
6609 */
6611
6612 /*----------
6613 * NO EREPORT(ERROR) from here till changes are complete
6614 *
6615 * Our buffer lock won't stop a reader having already pinned and checked
6616 * visibility for this tuple. Hence, we write WAL first, then mutate the
6617 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6618 * checkpoint delay makes that acceptable. With the usual order of
6619 * changes, a crash after memcpy() and before XLogInsert() could allow
6620 * datfrozenxid to overtake relfrozenxid:
6621 *
6622 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6623 * ["R" is a VACUUM tbl]
6624 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6625 * D: systable_getnext() returns pg_class tuple of tbl
6626 * R: memcpy() into pg_class tuple of tbl
6627 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6628 * [crash]
6629 * [recovery restores datfrozenxid w/o relfrozenxid]
6630 *
6631 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6632 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6633 * The stack copy facilitates a FPI of the post-mutation block before we
6634 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6635 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6636 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6637 * This function, however, likely could avoid it with the following order
6638 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6639 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6640 * patterns to analyze. Inplace update isn't so frequent that it should
6641 * pursue the small optimization of skipping DELAY_CHKPT_START.
6642 */
6646
6647 /* XLOG stuff */
6648 if (RelationNeedsWAL(relation))
6649 {
6652 char *origdata = (char *) BufferGetBlock(buffer);
6653 Page page = BufferGetPage(buffer);
6654 uint16 lower = ((PageHeader) page)->pd_lower;
6655 uint16 upper = ((PageHeader) page)->pd_upper;
6657 RelFileLocator rlocator;
6658 ForkNumber forkno;
6659 BlockNumber blkno;
6661
6662 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6663 xlrec.dbId = MyDatabaseId;
6665 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6666 xlrec.nmsgs = nmsgs;
6667
6670 if (nmsgs != 0)
6672 nmsgs * sizeof(SharedInvalidationMessage));
6673
6674 /* register block matching what buffer will look like after changes */
6679 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6680 Assert(forkno == MAIN_FORKNUM);
6681 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6683 XLogRegisterBufData(0, src, newlen);
6684
6685 /* inplace updates aren't decoded atm, don't log the origin */
6686
6688
6689 PageSetLSN(page, recptr);
6690 }
6691
6692 memcpy(dst, src, newlen);
6693
6694 MarkBufferDirty(buffer);
6695
6697
6698 /*
6699 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6700 * do this before UnlockTuple().
6701 */
6703
6706 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6707
6708 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6709
6710 /*
6711 * Queue a transactional inval, for logical decoding and for third-party
6712 * code that might have been relying on it since long before inplace
6713 * update adopted immediate invalidation. See README.tuplock section
6714 * "Reading inplace-updated columns" for logical decoding details.
6715 */
6717 CacheInvalidateHeapTuple(relation, tuple, NULL);
6718}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4378
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
PageHeaderData * PageHeader
Definition bufpage.h:173
uint32_t uint32
Definition c.h:558
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
#define MinSizeOfHeapInplace
#define XLOG_HEAP_INPLACE
Definition heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1088
void PreInplace_Inval(void)
Definition inval.c:1250
void AtInplace_Inval(void)
Definition inval.c:1263
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define DELAY_CHKPT_START
Definition proc.h:136
ForkNumber
Definition relpath.h:56
PGPROC * MyProc
Definition proc.c:67
int delayChkptFlags
Definition proc.h:252
#define XLogStandbyInfoActive()
Definition xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition xloginsert.c:313

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, fb(), inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2142 of file heapam.c.

2144{
2147 Buffer buffer;
2148 Buffer vmbuffer = InvalidBuffer;
2149 bool all_visible_cleared = false;
2150
2151 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2154
2155 AssertHasSnapshotForToast(relation);
2156
2157 /*
2158 * Fill in tuple header fields and toast the tuple if necessary.
2159 *
2160 * Note: below this point, heaptup is the data we actually intend to store
2161 * into the relation; tup is the caller's original untoasted data.
2162 */
2163 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2164
2165 /*
2166 * Find buffer to insert this tuple into. If the page is all visible,
2167 * this will also pin the requisite visibility map page.
2168 */
2169 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2170 InvalidBuffer, options, bistate,
2171 &vmbuffer, NULL,
2172 0);
2173
2174 /*
2175 * We're about to do the actual insert -- but check for conflict first, to
2176 * avoid possibly having to roll back work we've just done.
2177 *
2178 * This is safe without a recheck as long as there is no possibility of
2179 * another process scanning the page between this check and the insert
2180 * being visible to the scan (i.e., an exclusive buffer content lock is
2181 * continuously held from this point until the tuple insert is visible).
2182 *
2183 * For a heap insert, we only need to check for table-level SSI locks. Our
2184 * new tuple can't possibly conflict with existing tuple locks, and heap
2185 * page locks are only consolidated versions of tuple locks; they do not
2186 * lock "gaps" as index page locks do. So we don't need to specify a
2187 * buffer when making the call, which makes for a faster check.
2188 */
2190
2191 /* NO EREPORT(ERROR) from here till changes are logged */
2193
2194 RelationPutHeapTuple(relation, buffer, heaptup,
2196
2197 if (PageIsAllVisible(BufferGetPage(buffer)))
2198 {
2199 all_visible_cleared = true;
2201 visibilitymap_clear(relation,
2203 vmbuffer, VISIBILITYMAP_VALID_BITS);
2204 }
2205
2206 /*
2207 * XXX Should we set PageSetPrunable on this page ?
2208 *
2209 * The inserting transaction may eventually abort thus making this tuple
2210 * DEAD and hence available for pruning. Though we don't want to optimize
2211 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2212 * aborted tuple will never be pruned until next vacuum is triggered.
2213 *
2214 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2215 */
2216
2217 MarkBufferDirty(buffer);
2218
2219 /* XLOG stuff */
2220 if (RelationNeedsWAL(relation))
2221 {
2225 Page page = BufferGetPage(buffer);
2226 uint8 info = XLOG_HEAP_INSERT;
2227 int bufflags = 0;
2228
2229 /*
2230 * If this is a catalog, we need to transmit combo CIDs to properly
2231 * decode, so log that as well.
2232 */
2234 log_heap_new_cid(relation, heaptup);
2235
2236 /*
2237 * If this is the single and first tuple on page, we can reinit the
2238 * page instead of restoring the whole thing. Set flag, and hide
2239 * buffer references from XLogInsert.
2240 */
2243 {
2244 info |= XLOG_HEAP_INIT_PAGE;
2246 }
2247
2248 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2249 xlrec.flags = 0;
2255
2256 /*
2257 * For logical decoding, we need the tuple even if we're doing a full
2258 * page write, so make sure it's included even if we take a full-page
2259 * image. (XXX We could alternatively store a pointer into the FPW).
2260 */
2261 if (RelationIsLogicallyLogged(relation) &&
2263 {
2266
2267 if (IsToastRelation(relation))
2269 }
2270
2273
2274 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2275 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2276 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2277
2278 /*
2279 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2280 * write the whole page to the xlog, we don't need to store
2281 * xl_heap_header in the xlog.
2282 */
2285 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2287 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2289
2290 /* filtering by origin on a row level is much more efficient */
2292
2293 recptr = XLogInsert(RM_HEAP_ID, info);
2294
2295 PageSetLSN(page, recptr);
2296 }
2297
2299
2300 UnlockReleaseBuffer(buffer);
2301 if (vmbuffer != InvalidBuffer)
2302 ReleaseBuffer(vmbuffer);
2303
2304 /*
2305 * If tuple is cacheable, mark it for invalidation from the caches in case
2306 * we abort. Note it is OK to do this after releasing the buffer, because
2307 * the heaptup data structure is all in local memory, not in the shared
2308 * buffer.
2309 */
2311
2312 /* Note: speculative insertions are counted too, even if aborted later */
2313 pgstat_count_heap_insert(relation, 1);
2314
2315 /*
2316 * If heaptup is a private copy, release it. Don't forget to copy t_self
2317 * back to the caller's image, too.
2318 */
2319 if (heaptup != tup)
2320 {
2321 tup->t_self = heaptup->t_self;
2323 }
2324}
uint8_t uint8
Definition c.h:556
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition heapam.c:2333
#define HEAP_INSERT_SPECULATIVE
Definition heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition heapam_xlog.h:33
#define SizeOfHeapInsert
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition hio.c:500
#define HeapTupleHeaderGetNatts(tup)
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetNumberOfAttributes(relation)
Definition rel.h:520
#define REGBUF_KEEP_DATA
Definition xloginsert.h:36
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References Assert, AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, fb(), FirstOffsetNumber, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)
extern

Definition at line 4644 of file heapam.c.

4648{
4649 TM_Result result;
4650 ItemPointer tid = &(tuple->t_self);
4651 ItemId lp;
4652 Page page;
4653 Buffer vmbuffer = InvalidBuffer;
4654 BlockNumber block;
4655 TransactionId xid,
4656 xmax;
4660 bool first_time = true;
4661 bool skip_tuple_lock = false;
4662 bool have_tuple_lock = false;
4663 bool cleared_all_frozen = false;
4664
4665 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4666 block = ItemPointerGetBlockNumber(tid);
4667
4668 /*
4669 * Before locking the buffer, pin the visibility map page if it appears to
4670 * be necessary. Since we haven't got the lock yet, someone else might be
4671 * in the middle of changing this, so we'll need to recheck after we have
4672 * the lock.
4673 */
4674 if (PageIsAllVisible(BufferGetPage(*buffer)))
4675 visibilitymap_pin(relation, block, &vmbuffer);
4676
4678
4679 page = BufferGetPage(*buffer);
4682
4683 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4684 tuple->t_len = ItemIdGetLength(lp);
4685 tuple->t_tableOid = RelationGetRelid(relation);
4686
4687l3:
4688 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4689
4690 if (result == TM_Invisible)
4691 {
4692 /*
4693 * This is possible, but only when locking a tuple for ON CONFLICT DO
4694 * SELECT/UPDATE. We return this value here rather than throwing an
4695 * error in order to give that case the opportunity to throw a more
4696 * specific error.
4697 */
4698 result = TM_Invisible;
4699 goto out_locked;
4700 }
4701 else if (result == TM_BeingModified ||
4702 result == TM_Updated ||
4703 result == TM_Deleted)
4704 {
4708 bool require_sleep;
4709 ItemPointerData t_ctid;
4710
4711 /* must copy state data before unlocking buffer */
4713 infomask = tuple->t_data->t_infomask;
4714 infomask2 = tuple->t_data->t_infomask2;
4715 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4716
4718
4719 /*
4720 * If any subtransaction of the current top transaction already holds
4721 * a lock as strong as or stronger than what we're requesting, we
4722 * effectively hold the desired lock already. We *must* succeed
4723 * without trying to take the tuple lock, else we will deadlock
4724 * against anyone wanting to acquire a stronger lock.
4725 *
4726 * Note we only do this the first time we loop on the HTSU result;
4727 * there is no point in testing in subsequent passes, because
4728 * evidently our own transaction cannot have acquired a new lock after
4729 * the first time we checked.
4730 */
4731 if (first_time)
4732 {
4733 first_time = false;
4734
4736 {
4737 int i;
4738 int nmembers;
4739 MultiXactMember *members;
4740
4741 /*
4742 * We don't need to allow old multixacts here; if that had
4743 * been the case, HeapTupleSatisfiesUpdate would have returned
4744 * MayBeUpdated and we wouldn't be here.
4745 */
4746 nmembers =
4747 GetMultiXactIdMembers(xwait, &members, false,
4749
4750 for (i = 0; i < nmembers; i++)
4751 {
4752 /* only consider members of our own transaction */
4753 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4754 continue;
4755
4756 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4757 {
4758 pfree(members);
4759 result = TM_Ok;
4760 goto out_unlocked;
4761 }
4762 else
4763 {
4764 /*
4765 * Disable acquisition of the heavyweight tuple lock.
4766 * Otherwise, when promoting a weaker lock, we might
4767 * deadlock with another locker that has acquired the
4768 * heavyweight tuple lock and is waiting for our
4769 * transaction to finish.
4770 *
4771 * Note that in this case we still need to wait for
4772 * the multixact if required, to avoid acquiring
4773 * conflicting locks.
4774 */
4775 skip_tuple_lock = true;
4776 }
4777 }
4778
4779 if (members)
4780 pfree(members);
4781 }
4783 {
4784 switch (mode)
4785 {
4786 case LockTupleKeyShare:
4790 result = TM_Ok;
4791 goto out_unlocked;
4792 case LockTupleShare:
4795 {
4796 result = TM_Ok;
4797 goto out_unlocked;
4798 }
4799 break;
4802 {
4803 result = TM_Ok;
4804 goto out_unlocked;
4805 }
4806 break;
4807 case LockTupleExclusive:
4810 {
4811 result = TM_Ok;
4812 goto out_unlocked;
4813 }
4814 break;
4815 }
4816 }
4817 }
4818
4819 /*
4820 * Initially assume that we will have to wait for the locking
4821 * transaction(s) to finish. We check various cases below in which
4822 * this can be turned off.
4823 */
4824 require_sleep = true;
4825 if (mode == LockTupleKeyShare)
4826 {
4827 /*
4828 * If we're requesting KeyShare, and there's no update present, we
4829 * don't need to wait. Even if there is an update, we can still
4830 * continue if the key hasn't been modified.
4831 *
4832 * However, if there are updates, we need to walk the update chain
4833 * to mark future versions of the row as locked, too. That way,
4834 * if somebody deletes that future version, we're protected
4835 * against the key going away. This locking of future versions
4836 * could block momentarily, if a concurrent transaction is
4837 * deleting a key; or it could return a value to the effect that
4838 * the transaction deleting the key has already committed. So we
4839 * do this before re-locking the buffer; otherwise this would be
4840 * prone to deadlocks.
4841 *
4842 * Note that the TID we're locking was grabbed before we unlocked
4843 * the buffer. For it to change while we're not looking, the
4844 * other properties we're testing for below after re-locking the
4845 * buffer would also change, in which case we would restart this
4846 * loop above.
4847 */
4849 {
4850 bool updated;
4851
4853
4854 /*
4855 * If there are updates, follow the update chain; bail out if
4856 * that cannot be done.
4857 */
4858 if (follow_updates && updated &&
4859 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4860 {
4861 TM_Result res;
4862
4863 res = heap_lock_updated_tuple(relation,
4864 infomask, xwait, &t_ctid,
4866 mode);
4867 if (res != TM_Ok)
4868 {
4869 result = res;
4870 /* recovery code expects to have buffer lock held */
4872 goto failed;
4873 }
4874 }
4875
4877
4878 /*
4879 * Make sure it's still an appropriate lock, else start over.
4880 * Also, if it wasn't updated before we released the lock, but
4881 * is updated now, we start over too; the reason is that we
4882 * now need to follow the update chain to lock the new
4883 * versions.
4884 */
4885 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4886 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4887 !updated))
4888 goto l3;
4889
4890 /* Things look okay, so we can skip sleeping */
4891 require_sleep = false;
4892
4893 /*
4894 * Note we allow Xmax to change here; other updaters/lockers
4895 * could have modified it before we grabbed the buffer lock.
4896 * However, this is not a problem, because with the recheck we
4897 * just did we ensure that they still don't conflict with the
4898 * lock we want.
4899 */
4900 }
4901 }
4902 else if (mode == LockTupleShare)
4903 {
4904 /*
4905 * If we're requesting Share, we can similarly avoid sleeping if
4906 * there's no update and no exclusive lock present.
4907 */
4910 {
4912
4913 /*
4914 * Make sure it's still an appropriate lock, else start over.
4915 * See above about allowing xmax to change.
4916 */
4919 goto l3;
4920 require_sleep = false;
4921 }
4922 }
4923 else if (mode == LockTupleNoKeyExclusive)
4924 {
4925 /*
4926 * If we're requesting NoKeyExclusive, we might also be able to
4927 * avoid sleeping; just ensure that there no conflicting lock
4928 * already acquired.
4929 */
4931 {
4933 mode, NULL))
4934 {
4935 /*
4936 * No conflict, but if the xmax changed under us in the
4937 * meantime, start over.
4938 */
4942 xwait))
4943 goto l3;
4944
4945 /* otherwise, we're good */
4946 require_sleep = false;
4947 }
4948 }
4950 {
4952
4953 /* if the xmax changed in the meantime, start over */
4956 xwait))
4957 goto l3;
4958 /* otherwise, we're good */
4959 require_sleep = false;
4960 }
4961 }
4962
4963 /*
4964 * As a check independent from those above, we can also avoid sleeping
4965 * if the current transaction is the sole locker of the tuple. Note
4966 * that the strength of the lock already held is irrelevant; this is
4967 * not about recording the lock in Xmax (which will be done regardless
4968 * of this optimization, below). Also, note that the cases where we
4969 * hold a lock stronger than we are requesting are already handled
4970 * above by not doing anything.
4971 *
4972 * Note we only deal with the non-multixact case here; MultiXactIdWait
4973 * is well equipped to deal with this situation on its own.
4974 */
4977 {
4978 /* ... but if the xmax changed in the meantime, start over */
4982 xwait))
4983 goto l3;
4985 require_sleep = false;
4986 }
4987
4988 /*
4989 * Time to sleep on the other transaction/multixact, if necessary.
4990 *
4991 * If the other transaction is an update/delete that's already
4992 * committed, then sleeping cannot possibly do any good: if we're
4993 * required to sleep, get out to raise an error instead.
4994 *
4995 * By here, we either have already acquired the buffer exclusive lock,
4996 * or we must wait for the locking transaction or multixact; so below
4997 * we ensure that we grab buffer lock after the sleep.
4998 */
4999 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
5000 {
5002 goto failed;
5003 }
5004 else if (require_sleep)
5005 {
5006 /*
5007 * Acquire tuple lock to establish our priority for the tuple, or
5008 * die trying. LockTuple will release us when we are next-in-line
5009 * for the tuple. We must do this even if we are share-locking,
5010 * but not if we already have a weaker lock on the tuple.
5011 *
5012 * If we are forced to "start over" below, we keep the tuple lock;
5013 * this arranges that we stay at the head of the line while
5014 * rechecking tuple state.
5015 */
5016 if (!skip_tuple_lock &&
5017 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
5019 {
5020 /*
5021 * This can only happen if wait_policy is Skip and the lock
5022 * couldn't be obtained.
5023 */
5024 result = TM_WouldBlock;
5025 /* recovery code expects to have buffer lock held */
5027 goto failed;
5028 }
5029
5031 {
5033
5034 /* We only ever lock tuples, never update them */
5035 if (status >= MultiXactStatusNoKeyUpdate)
5036 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5037
5038 /* wait for multixact to end, or die trying */
5039 switch (wait_policy)
5040 {
5041 case LockWaitBlock:
5043 relation, &tuple->t_self, XLTW_Lock, NULL);
5044 break;
5045 case LockWaitSkip:
5047 status, infomask, relation,
5048 NULL, false))
5049 {
5050 result = TM_WouldBlock;
5051 /* recovery code expects to have buffer lock held */
5053 goto failed;
5054 }
5055 break;
5056 case LockWaitError:
5058 status, infomask, relation,
5060 ereport(ERROR,
5062 errmsg("could not obtain lock on row in relation \"%s\"",
5063 RelationGetRelationName(relation))));
5064
5065 break;
5066 }
5067
5068 /*
5069 * Of course, the multixact might not be done here: if we're
5070 * requesting a light lock mode, other transactions with light
5071 * locks could still be alive, as well as locks owned by our
5072 * own xact or other subxacts of this backend. We need to
5073 * preserve the surviving MultiXact members. Note that it
5074 * isn't absolutely necessary in the latter case, but doing so
5075 * is simpler.
5076 */
5077 }
5078 else
5079 {
5080 /* wait for regular transaction to end, or die trying */
5081 switch (wait_policy)
5082 {
5083 case LockWaitBlock:
5084 XactLockTableWait(xwait, relation, &tuple->t_self,
5085 XLTW_Lock);
5086 break;
5087 case LockWaitSkip:
5089 {
5090 result = TM_WouldBlock;
5091 /* recovery code expects to have buffer lock held */
5093 goto failed;
5094 }
5095 break;
5096 case LockWaitError:
5098 ereport(ERROR,
5100 errmsg("could not obtain lock on row in relation \"%s\"",
5101 RelationGetRelationName(relation))));
5102 break;
5103 }
5104 }
5105
5106 /* if there are updates, follow the update chain */
5108 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5109 {
5110 TM_Result res;
5111
5112 res = heap_lock_updated_tuple(relation,
5113 infomask, xwait, &t_ctid,
5115 mode);
5116 if (res != TM_Ok)
5117 {
5118 result = res;
5119 /* recovery code expects to have buffer lock held */
5121 goto failed;
5122 }
5123 }
5124
5126
5127 /*
5128 * xwait is done, but if xwait had just locked the tuple then some
5129 * other xact could update this tuple before we get to this point.
5130 * Check for xmax change, and start over if so.
5131 */
5134 xwait))
5135 goto l3;
5136
5138 {
5139 /*
5140 * Otherwise check if it committed or aborted. Note we cannot
5141 * be here if the tuple was only locked by somebody who didn't
5142 * conflict with us; that would have been handled above. So
5143 * that transaction must necessarily be gone by now. But
5144 * don't check for this in the multixact case, because some
5145 * locker transactions might still be running.
5146 */
5147 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5148 }
5149 }
5150
5151 /* By here, we're certain that we hold buffer exclusive lock again */
5152
5153 /*
5154 * We may lock if previous xmax aborted, or if it committed but only
5155 * locked the tuple without updating it; or if we didn't have to wait
5156 * at all for whatever reason.
5157 */
5158 if (!require_sleep ||
5159 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5162 result = TM_Ok;
5163 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5164 result = TM_Updated;
5165 else
5166 result = TM_Deleted;
5167 }
5168
5169failed:
5170 if (result != TM_Ok)
5171 {
5172 Assert(result == TM_SelfModified || result == TM_Updated ||
5173 result == TM_Deleted || result == TM_WouldBlock);
5174
5175 /*
5176 * When locking a tuple under LockWaitSkip semantics and we fail with
5177 * TM_WouldBlock above, it's possible for concurrent transactions to
5178 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5179 * this assert is slightly different from the equivalent one in
5180 * heap_delete and heap_update.
5181 */
5182 Assert((result == TM_WouldBlock) ||
5183 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5184 Assert(result != TM_Updated ||
5185 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5186 tmfd->ctid = tuple->t_data->t_ctid;
5187 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5188 if (result == TM_SelfModified)
5189 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5190 else
5191 tmfd->cmax = InvalidCommandId;
5192 goto out_locked;
5193 }
5194
5195 /*
5196 * If we didn't pin the visibility map page and the page has become all
5197 * visible while we were busy locking the buffer, or during some
5198 * subsequent window during which we had it unlocked, we'll have to unlock
5199 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5200 * unfortunate, especially since we'll now have to recheck whether the
5201 * tuple has been locked or updated under us, but hopefully it won't
5202 * happen very often.
5203 */
5204 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5205 {
5207 visibilitymap_pin(relation, block, &vmbuffer);
5209 goto l3;
5210 }
5211
5212 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5213 old_infomask = tuple->t_data->t_infomask;
5214
5215 /*
5216 * If this is the first possibly-multixact-able operation in the current
5217 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5218 * certain that the transaction will never become a member of any older
5219 * MultiXactIds than that. (We have to do this even if we end up just
5220 * using our own TransactionId below, since some other backend could
5221 * incorporate our XID into a MultiXact immediately afterwards.)
5222 */
5224
5225 /*
5226 * Compute the new xmax and infomask to store into the tuple. Note we do
5227 * not modify the tuple just yet, because that would leave it in the wrong
5228 * state if multixact.c elogs.
5229 */
5231 GetCurrentTransactionId(), mode, false,
5232 &xid, &new_infomask, &new_infomask2);
5233
5235
5236 /*
5237 * Store transaction information of xact locking the tuple.
5238 *
5239 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5240 * possibly generating a useless combo CID. Moreover, if we're locking a
5241 * previously updated tuple, it's important to preserve the Cmax.
5242 *
5243 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5244 * we would break the HOT chain.
5245 */
5248 tuple->t_data->t_infomask |= new_infomask;
5249 tuple->t_data->t_infomask2 |= new_infomask2;
5252 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5253
5254 /*
5255 * Make sure there is no forward chain link in t_ctid. Note that in the
5256 * cases where the tuple has been updated, we must not overwrite t_ctid,
5257 * because it was set by the updater. Moreover, if the tuple has been
5258 * updated, we need to follow the update chain to lock the new versions of
5259 * the tuple as well.
5260 */
5262 tuple->t_data->t_ctid = *tid;
5263
5264 /* Clear only the all-frozen bit on visibility map if needed */
5265 if (PageIsAllVisible(page) &&
5266 visibilitymap_clear(relation, block, vmbuffer,
5268 cleared_all_frozen = true;
5269
5270
5271 MarkBufferDirty(*buffer);
5272
5273 /*
5274 * XLOG stuff. You might think that we don't need an XLOG record because
5275 * there is no state change worth restoring after a crash. You would be
5276 * wrong however: we have just written either a TransactionId or a
5277 * MultiXactId that may never have been seen on disk before, and we need
5278 * to make sure that there are XLOG entries covering those ID numbers.
5279 * Else the same IDs might be re-used after a crash, which would be
5280 * disastrous if this page made it to disk before the crash. Essentially
5281 * we have to enforce the WAL log-before-data rule even in this case.
5282 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5283 * entries for everything anyway.)
5284 */
5285 if (RelationNeedsWAL(relation))
5286 {
5289
5292
5293 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5294 xlrec.xmax = xid;
5295 xlrec.infobits_set = compute_infobits(new_infomask,
5296 tuple->t_data->t_infomask2);
5299
5300 /* we don't decode row locks atm, so no need to log the origin */
5301
5303
5304 PageSetLSN(page, recptr);
5305 }
5306
5308
5309 result = TM_Ok;
5310
5313
5315 if (BufferIsValid(vmbuffer))
5316 ReleaseBuffer(vmbuffer);
5317
5318 /*
5319 * Don't update the visibility map here. Locking a tuple doesn't change
5320 * visibility info.
5321 */
5322
5323 /*
5324 * Now that we have successfully marked the tuple as locked, we can
5325 * release the lmgr tuple lock, if we had it.
5326 */
5327 if (have_tuple_lock)
5328 UnlockTupleTuplock(relation, tid, mode);
5329
5330 return result;
5331}
#define TUPLOCK_from_mxstatus(status)
Definition heapam.c:218
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition heapam.c:7876
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition heapam.c:6115
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition heapam.c:4597
#define XLH_LOCK_ALL_FROZEN_CLEARED
#define XLOG_HEAP_LOCK
Definition heapam_xlog.h:39
#define SizeOfHeapLock
#define HEAP_KEYS_UPDATED
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition lmgr.c:739
@ XLTW_Lock
Definition lmgr.h:29
bool log_lock_failures
Definition lock.c:54
@ LockWaitSkip
Definition lockoptions.h:42
@ LockWaitError
Definition lockoptions.h:44
@ LockTupleShare
Definition lockoptions.h:55
@ LockTupleKeyShare
Definition lockoptions.h:53
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1113
static PgChecksumMode mode
@ TM_WouldBlock
Definition tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, fb(), get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2413 of file heapam.c.

2415{
2418 int i;
2419 int ndone;
2421 Page page;
2422 Buffer vmbuffer = InvalidBuffer;
2423 bool needwal;
2427 bool starting_with_empty_page = false;
2428 int npages = 0;
2429 int npages_used = 0;
2430
2431 /* currently not needed (thus unsupported) for heap_multi_insert() */
2433
2434 AssertHasSnapshotForToast(relation);
2435
2436 needwal = RelationNeedsWAL(relation);
2439
2440 /* Toast and set header data in all the slots */
2441 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2442 for (i = 0; i < ntuples; i++)
2443 {
2444 HeapTuple tuple;
2445
2446 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2447 slots[i]->tts_tableOid = RelationGetRelid(relation);
2448 tuple->t_tableOid = slots[i]->tts_tableOid;
2449 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2450 options);
2451 }
2452
2453 /*
2454 * We're about to do the actual inserts -- but check for conflict first,
2455 * to minimize the possibility of having to roll back work we've just
2456 * done.
2457 *
2458 * A check here does not definitively prevent a serialization anomaly;
2459 * that check MUST be done at least past the point of acquiring an
2460 * exclusive buffer content lock on every buffer that will be affected,
2461 * and MAY be done after all inserts are reflected in the buffers and
2462 * those locks are released; otherwise there is a race condition. Since
2463 * multiple buffers can be locked and unlocked in the loop below, and it
2464 * would not be feasible to identify and lock all of those buffers before
2465 * the loop, we must do a final check at the end.
2466 *
2467 * The check here could be omitted with no loss of correctness; it is
2468 * present strictly as an optimization.
2469 *
2470 * For heap inserts, we only need to check for table-level SSI locks. Our
2471 * new tuples can't possibly conflict with existing tuple locks, and heap
2472 * page locks are only consolidated versions of tuple locks; they do not
2473 * lock "gaps" as index page locks do. So we don't need to specify a
2474 * buffer when making the call, which makes for a faster check.
2475 */
2477
2478 ndone = 0;
2479 while (ndone < ntuples)
2480 {
2481 Buffer buffer;
2482 bool all_visible_cleared = false;
2483 bool all_frozen_set = false;
2484 int nthispage;
2485
2487
2488 /*
2489 * Compute number of pages needed to fit the to-be-inserted tuples in
2490 * the worst case. This will be used to determine how much to extend
2491 * the relation by in RelationGetBufferForTuple(), if needed. If we
2492 * filled a prior page from scratch, we can just update our last
2493 * computation, but if we started with a partially filled page,
2494 * recompute from scratch, the number of potentially required pages
2495 * can vary due to tuples needing to fit onto the page, page headers
2496 * etc.
2497 */
2498 if (ndone == 0 || !starting_with_empty_page)
2499 {
2500 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2502 npages_used = 0;
2503 }
2504 else
2505 npages_used++;
2506
2507 /*
2508 * Find buffer where at least the next tuple will fit. If the page is
2509 * all-visible, this will also pin the requisite visibility map page.
2510 *
2511 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2512 * empty page. See all_frozen_set below.
2513 */
2514 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2515 InvalidBuffer, options, bistate,
2516 &vmbuffer, NULL,
2517 npages - npages_used);
2518 page = BufferGetPage(buffer);
2519
2521
2523 {
2524 all_frozen_set = true;
2525 /* Lock the vmbuffer before entering the critical section */
2527 }
2528
2529 /* NO EREPORT(ERROR) from here till changes are logged */
2531
2532 /*
2533 * RelationGetBufferForTuple has ensured that the first tuple fits.
2534 * Put that on the page, and then as many other tuples as fit.
2535 */
2536 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2537
2538 /*
2539 * For logical decoding we need combo CIDs to properly decode the
2540 * catalog.
2541 */
2542 if (needwal && need_cids)
2543 log_heap_new_cid(relation, heaptuples[ndone]);
2544
2545 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2546 {
2548
2549 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2550 break;
2551
2552 RelationPutHeapTuple(relation, buffer, heaptup, false);
2553
2554 /*
2555 * For logical decoding we need combo CIDs to properly decode the
2556 * catalog.
2557 */
2558 if (needwal && need_cids)
2559 log_heap_new_cid(relation, heaptup);
2560 }
2561
2562 /*
2563 * If the page is all visible, need to clear that, unless we're only
2564 * going to add further frozen rows to it.
2565 *
2566 * If we're only adding already frozen rows to a previously empty
2567 * page, mark it as all-frozen and update the visibility map. We're
2568 * already holding a pin on the vmbuffer.
2569 */
2571 {
2572 all_visible_cleared = true;
2573 PageClearAllVisible(page);
2574 visibilitymap_clear(relation,
2575 BufferGetBlockNumber(buffer),
2576 vmbuffer, VISIBILITYMAP_VALID_BITS);
2577 }
2578 else if (all_frozen_set)
2579 {
2580 PageSetAllVisible(page);
2582 vmbuffer,
2585 relation->rd_locator);
2586 }
2587
2588 /*
2589 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2590 */
2591
2592 MarkBufferDirty(buffer);
2593
2594 /* XLOG stuff */
2595 if (needwal)
2596 {
2600 char *tupledata;
2601 int totaldatalen;
2602 char *scratchptr = scratch.data;
2603 bool init;
2604 int bufflags = 0;
2605
2606 /*
2607 * If the page was previously empty, we can reinit the page
2608 * instead of restoring the whole thing.
2609 */
2611
2612 /* allocate xl_heap_multi_insert struct from the scratch area */
2615
2616 /*
2617 * Allocate offsets array. Unless we're reinitializing the page,
2618 * in that case the tuples are stored in order starting at
2619 * FirstOffsetNumber and we don't need to store the offsets
2620 * explicitly.
2621 */
2622 if (!init)
2623 scratchptr += nthispage * sizeof(OffsetNumber);
2624
2625 /* the rest of the scratch space is used for tuple data */
2626 tupledata = scratchptr;
2627
2628 /* check that the mutually exclusive flags are not both set */
2630
2631 xlrec->flags = 0;
2634
2635 /*
2636 * We don't have to worry about including a conflict xid in the
2637 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2638 * visibility rules.
2639 */
2640 if (all_frozen_set)
2642
2643 xlrec->ntuples = nthispage;
2644
2645 /*
2646 * Write out an xl_multi_insert_tuple and the tuple data itself
2647 * for each tuple.
2648 */
2649 for (i = 0; i < nthispage; i++)
2650 {
2652 xl_multi_insert_tuple *tuphdr;
2653 int datalen;
2654
2655 if (!init)
2656 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2657 /* xl_multi_insert_tuple needs two-byte alignment. */
2659 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2660
2661 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2662 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2663 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2664
2665 /* write bitmap [+ padding] [+ oid] + data */
2666 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2668 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2669 datalen);
2670 tuphdr->datalen = datalen;
2671 scratchptr += datalen;
2672 }
2673 totaldatalen = scratchptr - tupledata;
2674 Assert((scratchptr - scratch.data) < BLCKSZ);
2675
2676 if (need_tuple_data)
2678
2679 /*
2680 * Signal that this is the last xl_heap_multi_insert record
2681 * emitted by this call to heap_multi_insert(). Needed for logical
2682 * decoding so it knows when to cleanup temporary data.
2683 */
2684 if (ndone + nthispage == ntuples)
2686
2687 if (init)
2688 {
2689 info |= XLOG_HEAP_INIT_PAGE;
2691 }
2692
2693 /*
2694 * If we're doing logical decoding, include the new tuple data
2695 * even if we take a full-page image of the page.
2696 */
2697 if (need_tuple_data)
2699
2701 XLogRegisterData(xlrec, tupledata - scratch.data);
2703 if (all_frozen_set)
2704 XLogRegisterBuffer(1, vmbuffer, 0);
2705
2706 XLogRegisterBufData(0, tupledata, totaldatalen);
2707
2708 /* filtering by origin on a row level is much more efficient */
2710
2711 recptr = XLogInsert(RM_HEAP2_ID, info);
2712
2713 PageSetLSN(page, recptr);
2714 if (all_frozen_set)
2715 {
2716 Assert(BufferIsDirty(vmbuffer));
2717 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2718 }
2719 }
2720
2722
2723 if (all_frozen_set)
2724 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2725
2726 UnlockReleaseBuffer(buffer);
2727 ndone += nthispage;
2728
2729 /*
2730 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2731 * likely that we'll insert into subsequent heap pages that are likely
2732 * to use the same vm page.
2733 */
2734 }
2735
2736 /* We're done with inserting all tuples, so release the last vmbuffer. */
2737 if (vmbuffer != InvalidBuffer)
2738 ReleaseBuffer(vmbuffer);
2739
2740 /*
2741 * We're done with the actual inserts. Check for conflicts again, to
2742 * ensure that all rw-conflicts in to these inserts are detected. Without
2743 * this final check, a sequential scan of the heap may have locked the
2744 * table after the "before" check, missing one opportunity to detect the
2745 * conflict, and then scanned the table before the new tuples were there,
2746 * missing the other chance to detect the conflict.
2747 *
2748 * For heap inserts, we only need to check for table-level SSI locks. Our
2749 * new tuples can't possibly conflict with existing tuple locks, and heap
2750 * page locks are only consolidated versions of tuple locks; they do not
2751 * lock "gaps" as index page locks do. So we don't need to specify a
2752 * buffer when making the call.
2753 */
2755
2756 /*
2757 * If tuples are cacheable, mark them for invalidation from the caches in
2758 * case we abort. Note it is OK to do this after releasing the buffer,
2759 * because the heaptuples data structure is all in local memory, not in
2760 * the shared buffer.
2761 */
2762 if (IsCatalogRelation(relation))
2763 {
2764 for (i = 0; i < ntuples; i++)
2766 }
2767
2768 /* copy t_self fields back to the caller's slots */
2769 for (i = 0; i < ntuples; i++)
2770 slots[i]->tts_tid = heaptuples[i]->t_self;
2771
2772 pgstat_count_heap_insert(relation, ntuples);
2773}
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3025
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
#define MAXALIGN(LEN)
Definition c.h:838
#define SHORTALIGN(LEN)
Definition c.h:834
size_t Size
Definition c.h:631
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition heapam.c:2381
#define HEAP_INSERT_FROZEN
Definition heapam.h:38
#define SizeOfHeapMultiInsert
#define XLOG_HEAP2_MULTI_INSERT
Definition heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition heapam_xlog.h:79
#define SizeOfMultiInsertTuple
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
#define init()
RelFileLocator rd_locator
Definition rel.h:57
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), fb(), GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, xl_multi_insert_tuple::t_hoff, xl_multi_insert_tuple::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)
extern

Definition at line 819 of file pruneheap.c.

824{
825 Buffer buffer = params->buffer;
826 Page page = BufferGetPage(buffer);
828 bool do_freeze;
829 bool do_prune;
830 bool do_hint_prune;
833
834 /* Initialize prstate */
835 prune_freeze_setup(params,
837 presult, &prstate);
838
839 /*
840 * Examine all line pointers and tuple visibility information to determine
841 * which line pointers should change state and which tuples may be frozen.
842 * Prepare queue of state changes to later be executed in a critical
843 * section.
844 */
846 buffer, &prstate, off_loc);
847
848 /*
849 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
850 * checking tuple visibility information in prune_freeze_plan() may have
851 * caused an FPI to be emitted.
852 */
854
855 do_prune = prstate.nredirected > 0 ||
856 prstate.ndead > 0 ||
857 prstate.nunused > 0;
858
859 /*
860 * Even if we don't prune anything, if we found a new value for the
861 * pd_prune_xid field or the page was marked full, we will update the hint
862 * bit.
863 */
864 do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
865 PageIsFull(page);
866
867 /*
868 * Decide if we want to go ahead with freezing according to the freeze
869 * plans we prepared, or not.
870 */
871 do_freeze = heap_page_will_freeze(params->relation, buffer,
873 do_prune,
875 &prstate);
876
877 /*
878 * While scanning the line pointers, we did not clear
879 * all_visible/all_frozen when encountering LP_DEAD items because we
880 * wanted the decision whether or not to freeze the page to be unaffected
881 * by the short-term presence of LP_DEAD items. These LP_DEAD items are
882 * effectively assumed to be LP_UNUSED items in the making. It doesn't
883 * matter which vacuum heap pass (initial pass or final pass) ends up
884 * setting the page all-frozen, as long as the ongoing VACUUM does it.
885 *
886 * Now that we finished determining whether or not to freeze the page,
887 * update all_visible and all_frozen so that they reflect the true state
888 * of the page for setting PD_ALL_VISIBLE and VM bits.
889 */
890 if (prstate.lpdead_items > 0)
891 prstate.all_visible = prstate.all_frozen = false;
892
893 Assert(!prstate.all_frozen || prstate.all_visible);
894
895 /* Any error while applying the changes is critical */
897
898 if (do_hint_prune)
899 {
900 /*
901 * Update the page's pd_prune_xid field to either zero, or the lowest
902 * XID of any soon-prunable tuple.
903 */
904 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
905
906 /*
907 * Also clear the "page is full" flag, since there's no point in
908 * repeating the prune/defrag process until something else happens to
909 * the page.
910 */
911 PageClearFull(page);
912
913 /*
914 * If that's all we had to do to the page, this is a non-WAL-logged
915 * hint. If we are going to freeze or prune the page, we will mark
916 * the buffer dirty below.
917 */
918 if (!do_freeze && !do_prune)
919 MarkBufferDirtyHint(buffer, true);
920 }
921
922 if (do_prune || do_freeze)
923 {
924 /* Apply the planned item changes and repair page fragmentation. */
925 if (do_prune)
926 {
927 heap_page_prune_execute(buffer, false,
928 prstate.redirected, prstate.nredirected,
929 prstate.nowdead, prstate.ndead,
930 prstate.nowunused, prstate.nunused);
931 }
932
933 if (do_freeze)
934 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
935
936 MarkBufferDirty(buffer);
937
938 /*
939 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
940 */
941 if (RelationNeedsWAL(params->relation))
942 {
943 /*
944 * The snapshotConflictHorizon for the whole record should be the
945 * most conservative of all the horizons calculated for any of the
946 * possible modifications. If this record will prune tuples, any
947 * transactions on the standby older than the youngest xmax of the
948 * most recently removed tuple this record will prune will
949 * conflict. If this record will freeze tuples, any transactions
950 * on the standby with xids older than the youngest tuple this
951 * record will freeze will conflict.
952 */
954
955 if (TransactionIdFollows(prstate.frz_conflict_horizon,
956 prstate.latest_xid_removed))
957 conflict_xid = prstate.frz_conflict_horizon;
958 else
959 conflict_xid = prstate.latest_xid_removed;
960
961 log_heap_prune_and_freeze(params->relation, buffer,
962 InvalidBuffer, /* vmbuffer */
963 0, /* vmflags */
965 true, params->reason,
966 prstate.frozen, prstate.nfrozen,
967 prstate.redirected, prstate.nredirected,
968 prstate.nowdead, prstate.ndead,
969 prstate.nowunused, prstate.nunused);
970 }
971 }
972
974
975 /* Copy information back for caller */
976 presult->ndeleted = prstate.ndeleted;
977 presult->nnewlpdead = prstate.ndead;
978 presult->nfrozen = prstate.nfrozen;
979 presult->live_tuples = prstate.live_tuples;
980 presult->recently_dead_tuples = prstate.recently_dead_tuples;
981 presult->all_visible = prstate.all_visible;
982 presult->all_frozen = prstate.all_frozen;
983 presult->hastup = prstate.hastup;
984
985 /*
986 * For callers planning to update the visibility map, the conflict horizon
987 * for that record must be the newest xmin on the page. However, if the
988 * page is completely frozen, there can be no conflict and the
989 * vm_conflict_horizon should remain InvalidTransactionId. This includes
990 * the case that we just froze all the tuples; the prune-freeze record
991 * included the conflict XID already so the caller doesn't need it.
992 */
993 if (presult->all_frozen)
994 presult->vm_conflict_horizon = InvalidTransactionId;
995 else
996 presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
997
998 presult->lpdead_items = prstate.lpdead_items;
999 /* the presult->deadoffsets array was already filled in */
1000
1001 if (prstate.attempt_freeze)
1002 {
1003 if (presult->nfrozen > 0)
1004 {
1005 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1006 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1007 }
1008 else
1009 {
1010 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1011 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1012 }
1013 }
1014}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5566
static void PageClearFull(Page page)
Definition bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition bufpage.h:412
int64_t int64
Definition c.h:555
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7461
WalUsage pgWalUsage
Definition instrument.c:22
static bool heap_page_will_freeze(Relation relation, Buffer buffer, bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:663
static void prune_freeze_plan(Oid reloid, Buffer buffer, PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:458
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2167
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:330
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:1671
PruneReason reason
Definition heapam.h:245
Relation relation
Definition heapam.h:238
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297

References Assert, PruneFreezeParams::buffer, BufferGetPage(), END_CRIT_SECTION, fb(), heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, log_heap_prune_and_freeze(), MarkBufferDirty(), MarkBufferDirtyHint(), PageClearFull(), PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, PruneFreezeParams::relation, RelationGetRelid, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)
extern

Definition at line 1671 of file pruneheap.c.

1675{
1676 Page page = BufferGetPage(buffer);
1677 OffsetNumber *offnum;
1679
1680 /* Shouldn't be called unless there's something to do */
1681 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1682
1683 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1684 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1685
1686 /* Update all redirected line pointers */
1687 offnum = redirected;
1688 for (int i = 0; i < nredirected; i++)
1689 {
1690 OffsetNumber fromoff = *offnum++;
1691 OffsetNumber tooff = *offnum++;
1694
1695#ifdef USE_ASSERT_CHECKING
1696
1697 /*
1698 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1699 * must be the first item from a HOT chain. If the item has tuple
1700 * storage then it can't be a heap-only tuple. Otherwise we are just
1701 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1702 * has been pruned at least once before now.
1703 */
1705 {
1707
1708 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1710 }
1711 else
1712 {
1713 /* We shouldn't need to redundantly set the redirect */
1715 }
1716
1717 /*
1718 * The item that we're about to set as an LP_REDIRECT (the 'from'
1719 * item) will point to an existing item (the 'to' item) that is
1720 * already a heap-only tuple. There can be at most one LP_REDIRECT
1721 * item per HOT chain.
1722 *
1723 * We need to keep around an LP_REDIRECT item (after original
1724 * non-heap-only root tuple gets pruned away) so that it's always
1725 * possible for VACUUM to easily figure out what TID to delete from
1726 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1727 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1728 * tuple can.
1729 *
1730 * This check may miss problems, e.g. the target of a redirect could
1731 * be marked as unused subsequently. The page_verify_redirects() check
1732 * below will catch such problems.
1733 */
1734 tolp = PageGetItemId(page, tooff);
1736 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1738#endif
1739
1741 }
1742
1743 /* Update all now-dead line pointers */
1744 offnum = nowdead;
1745 for (int i = 0; i < ndead; i++)
1746 {
1747 OffsetNumber off = *offnum++;
1748 ItemId lp = PageGetItemId(page, off);
1749
1750#ifdef USE_ASSERT_CHECKING
1751
1752 /*
1753 * An LP_DEAD line pointer must be left behind when the original item
1754 * (which is dead to everybody) could still be referenced by a TID in
1755 * an index. This should never be necessary with any individual
1756 * heap-only tuple item, though. (It's not clear how much of a problem
1757 * that would be, but there is no reason to allow it.)
1758 */
1759 if (ItemIdHasStorage(lp))
1760 {
1762 htup = (HeapTupleHeader) PageGetItem(page, lp);
1764 }
1765 else
1766 {
1767 /* Whole HOT chain becomes dead */
1769 }
1770#endif
1771
1773 }
1774
1775 /* Update all now-unused line pointers */
1776 offnum = nowunused;
1777 for (int i = 0; i < nunused; i++)
1778 {
1779 OffsetNumber off = *offnum++;
1780 ItemId lp = PageGetItemId(page, off);
1781
1782#ifdef USE_ASSERT_CHECKING
1783
1784 if (lp_truncate_only)
1785 {
1786 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1788 }
1789 else
1790 {
1791 /*
1792 * When heap_page_prune_and_freeze() was called, mark_unused_now
1793 * may have been passed as true, which allows would-be LP_DEAD
1794 * items to be made LP_UNUSED instead. This is only possible if
1795 * the relation has no indexes. If there are any dead items, then
1796 * mark_unused_now was not true and every item being marked
1797 * LP_UNUSED must refer to a heap-only tuple.
1798 */
1799 if (ndead > 0)
1800 {
1802 htup = (HeapTupleHeader) PageGetItem(page, lp);
1804 }
1805 else
1807 }
1808
1809#endif
1810
1812 }
1813
1814 if (lp_truncate_only)
1816 else
1817 {
1818 /*
1819 * Finally, repair any fragmentation, and update the page's hint bit
1820 * about whether it has free pointers.
1821 */
1823
1824 /*
1825 * Now that the page has been modified, assert that redirect items
1826 * still point to valid targets.
1827 */
1829 }
1830}
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:235
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void page_verify_redirects(Page page)
Definition pruneheap.c:1847

References Assert, BufferGetPage(), fb(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)
extern

Definition at line 209 of file pruneheap.c.

210{
211 Page page = BufferGetPage(buffer);
213 GlobalVisState *vistest;
215
216 /*
217 * We can't write WAL in recovery mode, so there's no point trying to
218 * clean the page. The primary will likely issue a cleaning WAL record
219 * soon anyway, so this is no particular loss.
220 */
221 if (RecoveryInProgress())
222 return;
223
224 /*
225 * First check whether there's any chance there's something to prune,
226 * determining the appropriate horizon is a waste if there's no prune_xid
227 * (i.e. no updates/deletes left potentially dead tuples around).
228 */
229 prune_xid = ((PageHeader) page)->pd_prune_xid;
231 return;
232
233 /*
234 * Check whether prune_xid indicates that there may be dead rows that can
235 * be cleaned up.
236 */
237 vistest = GlobalVisTestFor(relation);
238
240 return;
241
242 /*
243 * We prune when a previous UPDATE failed to find enough space on the page
244 * for a new tuple version, or when free space falls below the relation's
245 * fill-factor target (but not less than 10%).
246 *
247 * Checking free space here is questionable since we aren't holding any
248 * lock on the buffer; in the worst case we could get a bogus answer. It's
249 * unlikely to be *seriously* wrong, though, since reading either pd_lower
250 * or pd_upper is probably atomic. Avoiding taking a lock seems more
251 * important than sometimes getting a wrong answer in what is after all
252 * just a heuristic estimate.
253 */
256 minfree = Max(minfree, BLCKSZ / 10);
257
258 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
259 {
260 /* OK, try to get exclusive buffer lock */
262 return;
263
264 /*
265 * Now that we have buffer lock, get accurate information about the
266 * page's free space, and recheck the heuristic about whether to
267 * prune.
268 */
269 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
270 {
273
274 /*
275 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
276 * regardless of whether or not the relation has indexes, since we
277 * cannot safely determine that during on-access pruning with the
278 * current implementation.
279 */
280 PruneFreezeParams params = {
281 .relation = relation,
282 .buffer = buffer,
283 .reason = PRUNE_ON_ACCESS,
284 .options = 0,
285 .vistest = vistest,
286 .cutoffs = NULL,
287 };
288
290 NULL, NULL);
291
292 /*
293 * Report the number of tuples reclaimed to pgstats. This is
294 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
295 *
296 * We derive the number of dead tuples like this to avoid totally
297 * forgetting about items that were set to LP_DEAD, since they
298 * still need to be cleaned up by VACUUM. We only want to count
299 * heap-only tuples that just became LP_UNUSED in our report,
300 * which don't.
301 *
302 * VACUUM doesn't have to compensate in the same way when it
303 * tracks ndeleted, since it will set the same LP_DEAD items to
304 * LP_UNUSED separately.
305 */
306 if (presult.ndeleted > presult.nnewlpdead)
308 presult.ndeleted - presult.nnewlpdead);
309 }
310
311 /* And release buffer lock */
313
314 /*
315 * We avoid reuse of any free space created on the page by unrelated
316 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
317 * free space should be reused by UPDATEs to *this* page.
318 */
319 }
320}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6701
#define Max(x, y)
Definition c.h:1013
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition procarray.c:4275
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:819
bool RecoveryInProgress(void)
Definition xlog.c:6460

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), fb(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7408 of file heapam.c.

7410{
7411 Page page = BufferGetPage(buffer);
7412
7413 for (int i = 0; i < ntuples; i++)
7414 {
7415 HeapTupleFreeze *frz = tuples + i;
7416 ItemId itemid = PageGetItemId(page, frz->offset);
7417 HeapTupleHeader htup;
7418
7419 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7420
7421 /* Deliberately avoid relying on tuple hint bits here */
7422 if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
7423 {
7425
7427 if (unlikely(!TransactionIdDidCommit(xmin)))
7428 ereport(ERROR,
7430 errmsg_internal("uncommitted xmin %u needs to be frozen",
7431 xmin)));
7432 }
7433
7434 /*
7435 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7436 * left behind by transactions that were in progress during a crash,
7437 * so we can only check that xmax didn't commit
7438 */
7439 if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
7440 {
7442
7445 ereport(ERROR,
7447 errmsg_internal("cannot freeze committed xmax %u",
7448 xmax)));
7449 }
7450 }
7451}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
#define ERRCODE_DATA_CORRUPTED
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdIsNormal(xid)
Definition transam.h:42

References Assert, BufferGetPage(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)
extern

Definition at line 7135 of file heapam.c.

7139{
7140 bool xmin_already_frozen = false,
7141 xmax_already_frozen = false;
7142 bool freeze_xmin = false,
7143 replace_xvac = false,
7144 replace_xmax = false,
7145 freeze_xmax = false;
7146 TransactionId xid;
7147
7148 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7149 frz->t_infomask2 = tuple->t_infomask2;
7150 frz->t_infomask = tuple->t_infomask;
7151 frz->frzflags = 0;
7152 frz->checkflags = 0;
7153
7154 /*
7155 * Process xmin, while keeping track of whether it's already frozen, or
7156 * will become frozen iff our freeze plan is executed by caller (could be
7157 * neither).
7158 */
7159 xid = HeapTupleHeaderGetXmin(tuple);
7160 if (!TransactionIdIsNormal(xid))
7161 xmin_already_frozen = true;
7162 else
7163 {
7164 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7165 ereport(ERROR,
7167 errmsg_internal("found xmin %u from before relfrozenxid %u",
7168 xid, cutoffs->relfrozenxid)));
7169
7170 /* Will set freeze_xmin flags in freeze plan below */
7172
7173 /* Verify that xmin committed if and when freeze plan is executed */
7174 if (freeze_xmin)
7176 }
7177
7178 /*
7179 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7180 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7181 */
7182 xid = HeapTupleHeaderGetXvac(tuple);
7183 if (TransactionIdIsNormal(xid))
7184 {
7186 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7187
7188 /*
7189 * For Xvac, we always freeze proactively. This allows totally_frozen
7190 * tracking to ignore xvac.
7191 */
7192 replace_xvac = pagefrz->freeze_required = true;
7193
7194 /* Will set replace_xvac flags in freeze plan below */
7195 }
7196
7197 /* Now process xmax */
7198 xid = frz->xmax;
7199 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7200 {
7201 /* Raw xmax is a MultiXactId */
7203 uint16 flags;
7204
7205 /*
7206 * We will either remove xmax completely (in the "freeze_xmax" path),
7207 * process xmax by replacing it (in the "replace_xmax" path), or
7208 * perform no-op xmax processing. The only constraint is that the
7209 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7210 */
7211 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7212 &flags, pagefrz);
7213
7214 if (flags & FRM_NOOP)
7215 {
7216 /*
7217 * xmax is a MultiXactId, and nothing about it changes for now.
7218 * This is the only case where 'freeze_required' won't have been
7219 * set for us by FreezeMultiXactId, as well as the only case where
7220 * neither freeze_xmax nor replace_xmax are set (given a multi).
7221 *
7222 * This is a no-op, but the call to FreezeMultiXactId might have
7223 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7224 * for us (the "freeze page" variants, specifically). That'll
7225 * make it safe for our caller to freeze the page later on, while
7226 * leaving this particular xmax undisturbed.
7227 *
7228 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7229 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7230 * job. A call to heap_tuple_should_freeze for this same tuple
7231 * will take place below if 'freeze_required' isn't set already.
7232 * (This repeats work from FreezeMultiXactId, but allows "no
7233 * freeze" tracker maintenance to happen in only one place.)
7234 */
7237 }
7238 else if (flags & FRM_RETURN_IS_XID)
7239 {
7240 /*
7241 * xmax will become an updater Xid (original MultiXact's updater
7242 * member Xid will be carried forward as a simple Xid in Xmax).
7243 */
7245
7246 /*
7247 * NB -- some of these transformations are only valid because we
7248 * know the return Xid is a tuple updater (i.e. not merely a
7249 * locker.) Also note that the only reason we don't explicitly
7250 * worry about HEAP_KEYS_UPDATED is because it lives in
7251 * t_infomask2 rather than t_infomask.
7252 */
7253 frz->t_infomask &= ~HEAP_XMAX_BITS;
7254 frz->xmax = newxmax;
7255 if (flags & FRM_MARK_COMMITTED)
7256 frz->t_infomask |= HEAP_XMAX_COMMITTED;
7257 replace_xmax = true;
7258 }
7259 else if (flags & FRM_RETURN_IS_MULTI)
7260 {
7263
7264 /*
7265 * xmax is an old MultiXactId that we have to replace with a new
7266 * MultiXactId, to carry forward two or more original member XIDs.
7267 */
7269
7270 /*
7271 * We can't use GetMultiXactIdHintBits directly on the new multi
7272 * here; that routine initializes the masks to all zeroes, which
7273 * would lose other bits we need. Doing it this way ensures all
7274 * unrelated bits remain untouched.
7275 */
7276 frz->t_infomask &= ~HEAP_XMAX_BITS;
7277 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7279 frz->t_infomask |= newbits;
7280 frz->t_infomask2 |= newbits2;
7281 frz->xmax = newxmax;
7282 replace_xmax = true;
7283 }
7284 else
7285 {
7286 /*
7287 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7288 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7289 */
7290 Assert(flags & FRM_INVALIDATE_XMAX);
7292
7293 /* Will set freeze_xmax flags in freeze plan below */
7294 freeze_xmax = true;
7295 }
7296
7297 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7298 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7299 }
7300 else if (TransactionIdIsNormal(xid))
7301 {
7302 /* Raw xmax is normal XID */
7303 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7304 ereport(ERROR,
7306 errmsg_internal("found xmax %u from before relfrozenxid %u",
7307 xid, cutoffs->relfrozenxid)));
7308
7309 /* Will set freeze_xmax flags in freeze plan below */
7311
7312 /*
7313 * Verify that xmax aborted if and when freeze plan is executed,
7314 * provided it's from an update. (A lock-only xmax can be removed
7315 * independent of this, since the lock is released at xact end.)
7316 */
7318 frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
7319 }
7320 else if (!TransactionIdIsValid(xid))
7321 {
7322 /* Raw xmax is InvalidTransactionId XID */
7323 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7324 xmax_already_frozen = true;
7325 }
7326 else
7327 ereport(ERROR,
7329 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7330 xid, tuple->t_infomask)));
7331
7332 if (freeze_xmin)
7333 {
7335
7336 frz->t_infomask |= HEAP_XMIN_FROZEN;
7337 }
7338 if (replace_xvac)
7339 {
7340 /*
7341 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7342 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7343 * transaction succeeded.
7344 */
7345 Assert(pagefrz->freeze_required);
7346 if (tuple->t_infomask & HEAP_MOVED_OFF)
7347 frz->frzflags |= XLH_INVALID_XVAC;
7348 else
7349 frz->frzflags |= XLH_FREEZE_XVAC;
7350 }
7351 if (replace_xmax)
7352 {
7354 Assert(pagefrz->freeze_required);
7355
7356 /* Already set replace_xmax flags in freeze plan earlier */
7357 }
7358 if (freeze_xmax)
7359 {
7361
7362 frz->xmax = InvalidTransactionId;
7363
7364 /*
7365 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7366 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7367 * Also get rid of the HEAP_KEYS_UPDATED bit.
7368 */
7369 frz->t_infomask &= ~HEAP_XMAX_BITS;
7370 frz->t_infomask |= HEAP_XMAX_INVALID;
7371 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7372 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7373 }
7374
7375 /*
7376 * Determine if this tuple is already totally frozen, or will become
7377 * totally frozen (provided caller executes freeze plans for the page)
7378 */
7381
7382 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7384 {
7385 /*
7386 * So far no previous tuple from the page made freezing mandatory.
7387 * Does this tuple force caller to freeze the entire page?
7388 */
7389 pagefrz->freeze_required =
7390 heap_tuple_should_freeze(tuple, cutoffs,
7391 &pagefrz->NoFreezePageRelfrozenXid,
7392 &pagefrz->NoFreezePageRelminMxid);
7393 }
7394
7395 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7397}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition heapam.c:7527
#define FRM_RETURN_IS_XID
Definition heapam.c:6734
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition heapam.c:6785
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7946
#define FRM_MARK_COMMITTED
Definition heapam.c:6736
#define FRM_NOOP
Definition heapam.c:6732
#define FRM_RETURN_IS_MULTI
Definition heapam.c:6735
#define FRM_INVALIDATE_XMAX
Definition heapam.c:6733
#define HEAP_MOVED_OFF
#define HEAP_XMIN_FROZEN
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_COMMITTED
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2765
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
MultiXactId NoFreezePageRelminMxid
Definition heapam.h:220
TransactionId NoFreezePageRelfrozenXid
Definition heapam.h:219
TransactionId OldestXmin
Definition vacuum.h:279
MultiXactId OldestMxact
Definition vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)
extern

Definition at line 616 of file heapam.c.

617{
619 Buffer buffer = scan->rs_cbuf;
620 BlockNumber block = scan->rs_cblock;
621 Snapshot snapshot;
622 Page page;
623 int lines;
624 bool all_visible;
626
627 Assert(BufferGetBlockNumber(buffer) == block);
628
629 /* ensure we're not accidentally being used when not in pagemode */
631 snapshot = scan->rs_base.rs_snapshot;
632
633 /*
634 * Prune and repair fragmentation for the whole page, if possible.
635 */
636 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
637
638 /*
639 * We must hold share lock on the buffer content while examining tuple
640 * visibility. Afterwards, however, the tuples we have found to be
641 * visible are guaranteed good as long as we hold the buffer pin.
642 */
644
645 page = BufferGetPage(buffer);
646 lines = PageGetMaxOffsetNumber(page);
647
648 /*
649 * If the all-visible flag indicates that all tuples on the page are
650 * visible to everyone, we can skip the per-tuple visibility tests.
651 *
652 * Note: In hot standby, a tuple that's already visible to all
653 * transactions on the primary might still be invisible to a read-only
654 * transaction in the standby. We partly handle this problem by tracking
655 * the minimum xmin of visible tuples as the cut-off XID while marking a
656 * page all-visible on the primary and WAL log that along with the
657 * visibility map SET operation. In hot standby, we wait for (or abort)
658 * all transactions that can potentially may not see one or more tuples on
659 * the page. That's how index-only scans work fine in hot standby. A
660 * crucial difference between index-only scans and heap scans is that the
661 * index-only scan completely relies on the visibility map where as heap
662 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
663 * the page-level flag can be trusted in the same way, because it might
664 * get propagated somehow without being explicitly WAL-logged, e.g. via a
665 * full page write. Until we can prove that beyond doubt, let's check each
666 * tuple for visibility the hard way.
667 */
668 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
671
672 /*
673 * We call page_collect_tuples() with constant arguments, to get the
674 * compiler to constant fold the constant arguments. Separate calls with
675 * constant arguments, rather than variables, are needed on several
676 * compilers to actually perform constant folding.
677 */
678 if (likely(all_visible))
679 {
681 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
682 block, lines, true, false);
683 else
684 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
685 block, lines, true, true);
686 }
687 else
688 {
690 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
691 block, lines, false, false);
692 else
693 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
694 block, lines, false, true);
695 }
696
698}
#define likely(x)
Definition c.h:423
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition heapam.c:522
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:3989
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition pruneheap.c:209
uint32 rs_ntuples
Definition heapam.h:99
BlockNumber rs_cblock
Definition heapam.h:69
bool takenDuringRecovery
Definition snapshot.h:180

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), fb(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)
extern

Definition at line 1318 of file heapam.c.

1320{
1322
1323 if (set_params)
1324 {
1325 if (allow_strat)
1327 else
1329
1330 if (allow_sync)
1332 else
1334
1335 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1338 else
1340 }
1341
1342 /*
1343 * unpin scan buffers
1344 */
1345 if (BufferIsValid(scan->rs_cbuf))
1346 {
1347 ReleaseBuffer(scan->rs_cbuf);
1348 scan->rs_cbuf = InvalidBuffer;
1349 }
1350
1351 /*
1352 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1353 * additional data vs a normal HeapScan
1354 */
1355
1356 /*
1357 * The read stream is reset on rescan. This must be done before
1358 * initscan(), as some state referred to by read_stream_reset() is reset
1359 * in initscan().
1360 */
1361 if (scan->rs_read_stream)
1363
1364 /*
1365 * reinitialize scan descriptor
1366 */
1367 initscan(scan, key, true);
1368}
void read_stream_reset(ReadStream *stream)
@ SO_ALLOW_STRAT
Definition tableam.h:58
@ SO_ALLOW_SYNC
Definition tableam.h:60

References BufferIsValid(), fb(), initscan(), InvalidBuffer, IsMVCCSnapshot, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)
extern

Definition at line 1479 of file heapam.c.

1481{
1487
1488 /*
1489 * For relations without any pages, we can simply leave the TID range
1490 * unset. There will be no tuples to scan, therefore no tuples outside
1491 * the given TID range.
1492 */
1493 if (scan->rs_nblocks == 0)
1494 return;
1495
1496 /*
1497 * Set up some ItemPointers which point to the first and last possible
1498 * tuples in the heap.
1499 */
1502
1503 /*
1504 * If the given maximum TID is below the highest possible TID in the
1505 * relation, then restrict the range to that, otherwise we scan to the end
1506 * of the relation.
1507 */
1510
1511 /*
1512 * If the given minimum TID is above the lowest possible TID in the
1513 * relation, then restrict the range to only scan for TIDs above that.
1514 */
1517
1518 /*
1519 * Check for an empty range and protect from would be negative results
1520 * from the numBlks calculation below.
1521 */
1523 {
1524 /* Set an empty range of blocks to scan */
1526 return;
1527 }
1528
1529 /*
1530 * Calculate the first block and the number of blocks we must scan. We
1531 * could be more aggressive here and perform some more validation to try
1532 * and further narrow the scope of blocks to scan by checking if the
1533 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1534 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1535 * we could scan one fewer blocks. However, such an optimization does not
1536 * seem worth troubling over, currently.
1537 */
1539
1542
1543 /* Set the start block and number of blocks to scan */
1545
1546 /* Finally, set the TID range in sscan */
1547 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1548 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1549}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition heapam.c:500
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition itemptr.h:93
#define MaxOffsetNumber
Definition off.h:28
BlockNumber rs_nblocks
Definition heapam.h:61

References fb(), FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)
extern

Definition at line 500 of file heapam.c.

501{
503
504 Assert(!scan->rs_inited); /* else too late to change */
505 /* else rs_startblock is significant */
507
508 /* Check startBlk is valid (but allow case of zero blocks...) */
509 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
510
511 scan->rs_startblock = startBlk;
512 scan->rs_numblocks = numBlks;
513}
BlockNumber rs_startblock
Definition heapam.h:62
BlockNumber rs_numblocks
Definition heapam.h:63

References Assert, fb(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)
extern

Definition at line 7891 of file heapam.c.

7892{
7893 TransactionId xid;
7894
7895 /*
7896 * If xmin is a normal transaction ID, this tuple is definitely not
7897 * frozen.
7898 */
7899 xid = HeapTupleHeaderGetXmin(tuple);
7900 if (TransactionIdIsNormal(xid))
7901 return true;
7902
7903 /*
7904 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7905 */
7906 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7907 {
7908 MultiXactId multi;
7909
7910 multi = HeapTupleHeaderGetRawXmax(tuple);
7911 if (MultiXactIdIsValid(multi))
7912 return true;
7913 }
7914 else
7915 {
7916 xid = HeapTupleHeaderGetRawXmax(tuple);
7917 if (TransactionIdIsNormal(xid))
7918 return true;
7919 }
7920
7921 if (tuple->t_infomask & HEAP_MOVED)
7922 {
7923 xid = HeapTupleHeaderGetXvac(tuple);
7924 if (TransactionIdIsNormal(xid))
7925 return true;
7926 }
7927
7928 return false;
7929}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)
extern

Definition at line 7946 of file heapam.c.

7950{
7951 TransactionId xid;
7952 MultiXactId multi;
7953 bool freeze = false;
7954
7955 /* First deal with xmin */
7956 xid = HeapTupleHeaderGetXmin(tuple);
7957 if (TransactionIdIsNormal(xid))
7958 {
7960 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7961 *NoFreezePageRelfrozenXid = xid;
7962 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7963 freeze = true;
7964 }
7965
7966 /* Now deal with xmax */
7968 multi = InvalidMultiXactId;
7969 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7970 multi = HeapTupleHeaderGetRawXmax(tuple);
7971 else
7972 xid = HeapTupleHeaderGetRawXmax(tuple);
7973
7974 if (TransactionIdIsNormal(xid))
7975 {
7977 /* xmax is a non-permanent XID */
7978 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7979 *NoFreezePageRelfrozenXid = xid;
7980 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7981 freeze = true;
7982 }
7983 else if (!MultiXactIdIsValid(multi))
7984 {
7985 /* xmax is a permanent XID or invalid MultiXactId/XID */
7986 }
7987 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7988 {
7989 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7990 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7991 *NoFreezePageRelminMxid = multi;
7992 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7993 freeze = true;
7994 }
7995 else
7996 {
7997 /* xmax is a MultiXactId that may have an updater XID */
7998 MultiXactMember *members;
7999 int nmembers;
8000
8002 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8003 *NoFreezePageRelminMxid = multi;
8004 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
8005 freeze = true;
8006
8007 /* need to check whether any member of the mxact is old */
8008 nmembers = GetMultiXactIdMembers(multi, &members, false,
8010
8011 for (int i = 0; i < nmembers; i++)
8012 {
8013 xid = members[i].xid;
8015 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8016 *NoFreezePageRelfrozenXid = xid;
8017 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8018 freeze = true;
8019 }
8020 if (nmembers > 0)
8021 pfree(members);
8022 }
8023
8024 if (tuple->t_infomask & HEAP_MOVED)
8025 {
8026 xid = HeapTupleHeaderGetXvac(tuple);
8027 if (TransactionIdIsNormal(xid))
8028 {
8030 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8031 *NoFreezePageRelfrozenXid = xid;
8032 /* heap_prepare_freeze_tuple forces xvac freezing */
8033 freeze = true;
8034 }
8035 }
8036
8037 return freeze;
8038}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2779
#define InvalidMultiXactId
Definition multixact.h:25
TransactionId xid
Definition multixact.h:57

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 3312 of file heapam.c.

3316{
3317 TM_Result result;
3325 ItemId lp;
3329 bool old_key_copied = false;
3330 Page page;
3331 BlockNumber block;
3333 Buffer buffer,
3334 newbuf,
3335 vmbuffer = InvalidBuffer,
3337 bool need_toast;
3339 pagefree;
3340 bool have_tuple_lock = false;
3341 bool iscombo;
3342 bool use_hot_update = false;
3343 bool summarized_update = false;
3344 bool key_intact;
3345 bool all_visible_cleared = false;
3346 bool all_visible_cleared_new = false;
3347 bool checked_lockers;
3348 bool locker_remains;
3349 bool id_has_external = false;
3356
3358
3359 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3362
3363 AssertHasSnapshotForToast(relation);
3364
3365 /*
3366 * Forbid this during a parallel operation, lest it allocate a combo CID.
3367 * Other workers might need that combo CID for visibility checks, and we
3368 * have no provision for broadcasting it to them.
3369 */
3370 if (IsInParallelMode())
3371 ereport(ERROR,
3373 errmsg("cannot update tuples during a parallel operation")));
3374
3375#ifdef USE_ASSERT_CHECKING
3377#endif
3378
3379 /*
3380 * Fetch the list of attributes to be checked for various operations.
3381 *
3382 * For HOT considerations, this is wasted effort if we fail to update or
3383 * have to put the new tuple on a different page. But we must compute the
3384 * list before obtaining buffer lock --- in the worst case, if we are
3385 * doing an update on one of the relevant system catalogs, we could
3386 * deadlock if we try to fetch the list later. In any case, the relcache
3387 * caches the data so this is usually pretty cheap.
3388 *
3389 * We also need columns used by the replica identity and columns that are
3390 * considered the "key" of rows in the table.
3391 *
3392 * Note that we get copies of each bitmap, so we need not worry about
3393 * relcache flush happening midway through.
3394 */
3407
3409 INJECTION_POINT("heap_update-before-pin", NULL);
3410 buffer = ReadBuffer(relation, block);
3411 page = BufferGetPage(buffer);
3412
3413 /*
3414 * Before locking the buffer, pin the visibility map page if it appears to
3415 * be necessary. Since we haven't got the lock yet, someone else might be
3416 * in the middle of changing this, so we'll need to recheck after we have
3417 * the lock.
3418 */
3419 if (PageIsAllVisible(page))
3420 visibilitymap_pin(relation, block, &vmbuffer);
3421
3423
3425
3426 /*
3427 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3428 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3429 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3430 * of which indicates concurrent pruning.
3431 *
3432 * Failing with TM_Updated would be most accurate. However, unlike other
3433 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3434 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3435 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3436 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3437 * TM_Updated and TM_Deleted affects only the wording of error messages.
3438 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3439 * the specification of when tmfd->ctid is valid. Second, it creates
3440 * error log evidence that we took this branch.
3441 *
3442 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3443 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3444 * unrelated row, we'll fail with "duplicate key value violates unique".
3445 * XXX if otid is the live, newer version of the newtup row, we'll discard
3446 * changes originating in versions of this catalog row after the version
3447 * the caller got from syscache. See syscache-update-pruned.spec.
3448 */
3449 if (!ItemIdIsNormal(lp))
3450 {
3452
3453 UnlockReleaseBuffer(buffer);
3455 if (vmbuffer != InvalidBuffer)
3456 ReleaseBuffer(vmbuffer);
3457 tmfd->ctid = *otid;
3458 tmfd->xmax = InvalidTransactionId;
3459 tmfd->cmax = InvalidCommandId;
3461
3466 /* modified_attrs not yet initialized */
3468 return TM_Deleted;
3469 }
3470
3471 /*
3472 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3473 * properly.
3474 */
3475 oldtup.t_tableOid = RelationGetRelid(relation);
3476 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3477 oldtup.t_len = ItemIdGetLength(lp);
3478 oldtup.t_self = *otid;
3479
3480 /* the new tuple is ready, except for this: */
3481 newtup->t_tableOid = RelationGetRelid(relation);
3482
3483 /*
3484 * Determine columns modified by the update. Additionally, identify
3485 * whether any of the unmodified replica identity key attributes in the
3486 * old tuple is externally stored or not. This is required because for
3487 * such attributes the flattened value won't be WAL logged as part of the
3488 * new tuple so we must include it as part of the old_key_tuple. See
3489 * ExtractReplicaIdentity.
3490 */
3492 id_attrs, &oldtup,
3494
3495 /*
3496 * If we're not updating any "key" column, we can grab a weaker lock type.
3497 * This allows for more concurrency when we are running simultaneously
3498 * with foreign key checks.
3499 *
3500 * Note that if a column gets detoasted while executing the update, but
3501 * the value ends up being the same, this test will fail and we will use
3502 * the stronger lock. This is acceptable; the important case to optimize
3503 * is updates that don't manipulate key columns, not those that
3504 * serendipitously arrive at the same key values.
3505 */
3507 {
3508 *lockmode = LockTupleNoKeyExclusive;
3510 key_intact = true;
3511
3512 /*
3513 * If this is the first possibly-multixact-able operation in the
3514 * current transaction, set my per-backend OldestMemberMXactId
3515 * setting. We can be certain that the transaction will never become a
3516 * member of any older MultiXactIds than that. (We have to do this
3517 * even if we end up just using our own TransactionId below, since
3518 * some other backend could incorporate our XID into a MultiXact
3519 * immediately afterwards.)
3520 */
3522 }
3523 else
3524 {
3525 *lockmode = LockTupleExclusive;
3527 key_intact = false;
3528 }
3529
3530 /*
3531 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3532 * otid may very well point at newtup->t_self, which we will overwrite
3533 * with the new tuple's location, so there's great risk of confusion if we
3534 * use otid anymore.
3535 */
3536
3537l2:
3538 checked_lockers = false;
3539 locker_remains = false;
3540 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3541
3542 /* see below about the "no wait" case */
3543 Assert(result != TM_BeingModified || wait);
3544
3545 if (result == TM_Invisible)
3546 {
3547 UnlockReleaseBuffer(buffer);
3548 ereport(ERROR,
3550 errmsg("attempted to update invisible tuple")));
3551 }
3552 else if (result == TM_BeingModified && wait)
3553 {
3556 bool can_continue = false;
3557
3558 /*
3559 * XXX note that we don't consider the "no wait" case here. This
3560 * isn't a problem currently because no caller uses that case, but it
3561 * should be fixed if such a caller is introduced. It wasn't a
3562 * problem previously because this code would always wait, but now
3563 * that some tuple locks do not conflict with one of the lock modes we
3564 * use, it is possible that this case is interesting to handle
3565 * specially.
3566 *
3567 * This may cause failures with third-party code that calls
3568 * heap_update directly.
3569 */
3570
3571 /* must copy state data before unlocking buffer */
3573 infomask = oldtup.t_data->t_infomask;
3574
3575 /*
3576 * Now we have to do something about the existing locker. If it's a
3577 * multi, sleep on it; we might be awakened before it is completely
3578 * gone (or even not sleep at all in some cases); we need to preserve
3579 * it as locker, unless it is gone completely.
3580 *
3581 * If it's not a multi, we need to check for sleeping conditions
3582 * before actually going to sleep. If the update doesn't conflict
3583 * with the locks, we just continue without sleeping (but making sure
3584 * it is preserved).
3585 *
3586 * Before sleeping, we need to acquire tuple lock to establish our
3587 * priority for the tuple (see heap_lock_tuple). LockTuple will
3588 * release us when we are next-in-line for the tuple. Note we must
3589 * not acquire the tuple lock until we're sure we're going to sleep;
3590 * otherwise we're open for race conditions with other transactions
3591 * holding the tuple lock which sleep on us.
3592 *
3593 * If we are forced to "start over" below, we keep the tuple lock;
3594 * this arranges that we stay at the head of the line while rechecking
3595 * tuple state.
3596 */
3598 {
3600 int remain;
3601 bool current_is_member = false;
3602
3604 *lockmode, &current_is_member))
3605 {
3607
3608 /*
3609 * Acquire the lock, if necessary (but skip it when we're
3610 * requesting a lock and already have one; avoids deadlock).
3611 */
3612 if (!current_is_member)
3613 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3615
3616 /* wait for multixact */
3618 relation, &oldtup.t_self, XLTW_Update,
3619 &remain);
3620 checked_lockers = true;
3621 locker_remains = remain != 0;
3623
3624 /*
3625 * If xwait had just locked the tuple then some other xact
3626 * could update this tuple before we get to this point. Check
3627 * for xmax change, and start over if so.
3628 */
3629 if (xmax_infomask_changed(oldtup.t_data->t_infomask,
3630 infomask) ||
3632 xwait))
3633 goto l2;
3634 }
3635
3636 /*
3637 * Note that the multixact may not be done by now. It could have
3638 * surviving members; our own xact or other subxacts of this
3639 * backend, and also any other concurrent transaction that locked
3640 * the tuple with LockTupleKeyShare if we only got
3641 * LockTupleNoKeyExclusive. If this is the case, we have to be
3642 * careful to mark the updated tuple with the surviving members in
3643 * Xmax.
3644 *
3645 * Note that there could have been another update in the
3646 * MultiXact. In that case, we need to check whether it committed
3647 * or aborted. If it aborted we are safe to update it again;
3648 * otherwise there is an update conflict, and we have to return
3649 * TableTuple{Deleted, Updated} below.
3650 *
3651 * In the LockTupleExclusive case, we still need to preserve the
3652 * surviving members: those would include the tuple locks we had
3653 * before this one, which are important to keep in case this
3654 * subxact aborts.
3655 */
3656 if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
3658 else
3660
3661 /*
3662 * There was no UPDATE in the MultiXact; or it aborted. No
3663 * TransactionIdIsInProgress() call needed here, since we called
3664 * MultiXactIdWait() above.
3665 */
3668 can_continue = true;
3669 }
3671 {
3672 /*
3673 * The only locker is ourselves; we can avoid grabbing the tuple
3674 * lock here, but must preserve our locking information.
3675 */
3676 checked_lockers = true;
3677 locker_remains = true;
3678 can_continue = true;
3679 }
3681 {
3682 /*
3683 * If it's just a key-share locker, and we're not changing the key
3684 * columns, we don't need to wait for it to end; but we need to
3685 * preserve it as locker.
3686 */
3687 checked_lockers = true;
3688 locker_remains = true;
3689 can_continue = true;
3690 }
3691 else
3692 {
3693 /*
3694 * Wait for regular transaction to end; but first, acquire tuple
3695 * lock.
3696 */
3698 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3700 XactLockTableWait(xwait, relation, &oldtup.t_self,
3701 XLTW_Update);
3702 checked_lockers = true;
3704
3705 /*
3706 * xwait is done, but if xwait had just locked the tuple then some
3707 * other xact could update this tuple before we get to this point.
3708 * Check for xmax change, and start over if so.
3709 */
3710 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3713 goto l2;
3714
3715 /* Otherwise check if it committed or aborted */
3716 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3717 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3718 can_continue = true;
3719 }
3720
3721 if (can_continue)
3722 result = TM_Ok;
3723 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3724 result = TM_Updated;
3725 else
3726 result = TM_Deleted;
3727 }
3728
3729 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3730 if (result != TM_Ok)
3731 {
3732 Assert(result == TM_SelfModified ||
3733 result == TM_Updated ||
3734 result == TM_Deleted ||
3735 result == TM_BeingModified);
3736 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3737 Assert(result != TM_Updated ||
3738 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3739 }
3740
3741 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3742 {
3743 /* Perform additional check for transaction-snapshot mode RI updates */
3745 result = TM_Updated;
3746 }
3747
3748 if (result != TM_Ok)
3749 {
3750 tmfd->ctid = oldtup.t_data->t_ctid;
3751 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3752 if (result == TM_SelfModified)
3753 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3754 else
3755 tmfd->cmax = InvalidCommandId;
3756 UnlockReleaseBuffer(buffer);
3757 if (have_tuple_lock)
3758 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3759 if (vmbuffer != InvalidBuffer)
3760 ReleaseBuffer(vmbuffer);
3762
3769 return result;
3770 }
3771
3772 /*
3773 * If we didn't pin the visibility map page and the page has become all
3774 * visible while we were busy locking the buffer, or during some
3775 * subsequent window during which we had it unlocked, we'll have to unlock
3776 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3777 * bit unfortunate, especially since we'll now have to recheck whether the
3778 * tuple has been locked or updated under us, but hopefully it won't
3779 * happen very often.
3780 */
3781 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3782 {
3784 visibilitymap_pin(relation, block, &vmbuffer);
3786 goto l2;
3787 }
3788
3789 /* Fill in transaction status data */
3790
3791 /*
3792 * If the tuple we're updating is locked, we need to preserve the locking
3793 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3794 */
3796 oldtup.t_data->t_infomask,
3797 oldtup.t_data->t_infomask2,
3798 xid, *lockmode, true,
3801
3802 /*
3803 * And also prepare an Xmax value for the new copy of the tuple. If there
3804 * was no xmax previously, or there was one but all lockers are now gone,
3805 * then use InvalidTransactionId; otherwise, get the xmax from the old
3806 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3807 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3808 */
3809 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3810 HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
3813 else
3815
3817 {
3820 }
3821 else
3822 {
3823 /*
3824 * If we found a valid Xmax for the new tuple, then the infomask bits
3825 * to use on the new tuple depend on what was there on the old one.
3826 * Note that since we're doing an update, the only possibility is that
3827 * the lockers had FOR KEY SHARE lock.
3828 */
3829 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3830 {
3833 }
3834 else
3835 {
3838 }
3839 }
3840
3841 /*
3842 * Prepare the new tuple with the appropriate initial values of Xmin and
3843 * Xmax, as well as initial infomask bits as computed above.
3844 */
3845 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3846 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3847 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3849 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3850 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3852
3853 /*
3854 * Replace cid with a combo CID if necessary. Note that we already put
3855 * the plain cid into the new tuple.
3856 */
3858
3859 /*
3860 * If the toaster needs to be activated, OR if the new tuple will not fit
3861 * on the same page as the old, then we need to release the content lock
3862 * (but not the pin!) on the old tuple's buffer while we are off doing
3863 * TOAST and/or table-file-extension work. We must mark the old tuple to
3864 * show that it's locked, else other processes may try to update it
3865 * themselves.
3866 *
3867 * We need to invoke the toaster if there are already any out-of-line
3868 * toasted values present, or if the new tuple is over-threshold.
3869 */
3870 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3871 relation->rd_rel->relkind != RELKIND_MATVIEW)
3872 {
3873 /* toast table entries should never be recursively toasted */
3876 need_toast = false;
3877 }
3878 else
3881 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3882
3884
3885 newtupsize = MAXALIGN(newtup->t_len);
3886
3888 {
3892 bool cleared_all_frozen = false;
3893
3894 /*
3895 * To prevent concurrent sessions from updating the tuple, we have to
3896 * temporarily mark it locked, while we release the page-level lock.
3897 *
3898 * To satisfy the rule that any xid potentially appearing in a buffer
3899 * written out to disk, we unfortunately have to WAL log this
3900 * temporary modification. We can reuse xl_heap_lock for this
3901 * purpose. If we crash/error before following through with the
3902 * actual update, xmax will be of an aborted transaction, allowing
3903 * other sessions to proceed.
3904 */
3905
3906 /*
3907 * Compute xmax / infomask appropriate for locking the tuple. This has
3908 * to be done separately from the combo that's going to be used for
3909 * updating, because the potentially created multixact would otherwise
3910 * be wrong.
3911 */
3913 oldtup.t_data->t_infomask,
3914 oldtup.t_data->t_infomask2,
3915 xid, *lockmode, false,
3918
3920
3922
3923 /* Clear obsolete visibility flags ... */
3924 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3925 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3927 /* ... and store info about transaction updating this tuple */
3930 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3931 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3933
3934 /* temporarily make it look not-updated, but locked */
3935 oldtup.t_data->t_ctid = oldtup.t_self;
3936
3937 /*
3938 * Clear all-frozen bit on visibility map if needed. We could
3939 * immediately reset ALL_VISIBLE, but given that the WAL logging
3940 * overhead would be unchanged, that doesn't seem necessarily
3941 * worthwhile.
3942 */
3943 if (PageIsAllVisible(page) &&
3944 visibilitymap_clear(relation, block, vmbuffer,
3946 cleared_all_frozen = true;
3947
3948 MarkBufferDirty(buffer);
3949
3950 if (RelationNeedsWAL(relation))
3951 {
3954
3957
3958 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3960 xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
3961 oldtup.t_data->t_infomask2);
3962 xlrec.flags =
3966 PageSetLSN(page, recptr);
3967 }
3968
3970
3972
3973 /*
3974 * Let the toaster do its thing, if needed.
3975 *
3976 * Note: below this point, heaptup is the data we actually intend to
3977 * store into the relation; newtup is the caller's original untoasted
3978 * data.
3979 */
3980 if (need_toast)
3981 {
3982 /* Note we always use WAL and FSM during updates */
3984 newtupsize = MAXALIGN(heaptup->t_len);
3985 }
3986 else
3987 heaptup = newtup;
3988
3989 /*
3990 * Now, do we need a new page for the tuple, or not? This is a bit
3991 * tricky since someone else could have added tuples to the page while
3992 * we weren't looking. We have to recheck the available space after
3993 * reacquiring the buffer lock. But don't bother to do that if the
3994 * former amount of free space is still not enough; it's unlikely
3995 * there's more free now than before.
3996 *
3997 * What's more, if we need to get a new page, we will need to acquire
3998 * buffer locks on both old and new pages. To avoid deadlock against
3999 * some other backend trying to get the same two locks in the other
4000 * order, we must be consistent about the order we get the locks in.
4001 * We use the rule "lock the lower-numbered page of the relation
4002 * first". To implement this, we must do RelationGetBufferForTuple
4003 * while not holding the lock on the old page, and we must rely on it
4004 * to get the locks on both pages in the correct order.
4005 *
4006 * Another consideration is that we need visibility map page pin(s) if
4007 * we will have to clear the all-visible flag on either page. If we
4008 * call RelationGetBufferForTuple, we rely on it to acquire any such
4009 * pins; but if we don't, we have to handle that here. Hence we need
4010 * a loop.
4011 */
4012 for (;;)
4013 {
4014 if (newtupsize > pagefree)
4015 {
4016 /* It doesn't fit, must use RelationGetBufferForTuple. */
4017 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
4018 buffer, 0, NULL,
4019 &vmbuffer_new, &vmbuffer,
4020 0);
4021 /* We're all done. */
4022 break;
4023 }
4024 /* Acquire VM page pin if needed and we don't have it. */
4025 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4026 visibilitymap_pin(relation, block, &vmbuffer);
4027 /* Re-acquire the lock on the old tuple's page. */
4029 /* Re-check using the up-to-date free space */
4031 if (newtupsize > pagefree ||
4032 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
4033 {
4034 /*
4035 * Rats, it doesn't fit anymore, or somebody just now set the
4036 * all-visible flag. We must now unlock and loop to avoid
4037 * deadlock. Fortunately, this path should seldom be taken.
4038 */
4040 }
4041 else
4042 {
4043 /* We're all done. */
4044 newbuf = buffer;
4045 break;
4046 }
4047 }
4048 }
4049 else
4050 {
4051 /* No TOAST work needed, and it'll fit on same page */
4052 newbuf = buffer;
4053 heaptup = newtup;
4054 }
4055
4056 /*
4057 * We're about to do the actual update -- check for conflict first, to
4058 * avoid possibly having to roll back work we've just done.
4059 *
4060 * This is safe without a recheck as long as there is no possibility of
4061 * another process scanning the pages between this check and the update
4062 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4063 * continuously held from this point until the tuple update is visible).
4064 *
4065 * For the new tuple the only check needed is at the relation level, but
4066 * since both tuples are in the same relation and the check for oldtup
4067 * will include checking the relation level, there is no benefit to a
4068 * separate check for the new tuple.
4069 */
4070 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4071 BufferGetBlockNumber(buffer));
4072
4073 /*
4074 * At this point newbuf and buffer are both pinned and locked, and newbuf
4075 * has enough space for the new tuple. If they are the same buffer, only
4076 * one pin is held.
4077 */
4078
4079 if (newbuf == buffer)
4080 {
4081 /*
4082 * Since the new tuple is going into the same page, we might be able
4083 * to do a HOT update. Check if any of the index columns have been
4084 * changed.
4085 */
4087 {
4088 use_hot_update = true;
4089
4090 /*
4091 * If none of the columns that are used in hot-blocking indexes
4092 * were updated, we can apply HOT, but we do still need to check
4093 * if we need to update the summarizing indexes, and update those
4094 * indexes if the columns were updated, or we may fail to detect
4095 * e.g. value bound changes in BRIN minmax indexes.
4096 */
4098 summarized_update = true;
4099 }
4100 }
4101 else
4102 {
4103 /* Set a hint that the old page could use prune/defrag */
4104 PageSetFull(page);
4105 }
4106
4107 /*
4108 * Compute replica identity tuple before entering the critical section so
4109 * we don't PANIC upon a memory allocation failure.
4110 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4111 * logged. Pass old key required as true only if the replica identity key
4112 * columns are modified or it has external data.
4113 */
4118
4119 /* NO EREPORT(ERROR) from here till changes are logged */
4121
4122 /*
4123 * If this transaction commits, the old tuple will become DEAD sooner or
4124 * later. Set flag that this page is a candidate for pruning once our xid
4125 * falls below the OldestXmin horizon. If the transaction finally aborts,
4126 * the subsequent page pruning will be a no-op and the hint will be
4127 * cleared.
4128 *
4129 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4130 * there would be a prunable tuple in the newbuf; but for now we choose
4131 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4132 * sync if this decision changes.
4133 */
4134 PageSetPrunable(page, xid);
4135
4136 if (use_hot_update)
4137 {
4138 /* Mark the old tuple as HOT-updated */
4140 /* And mark the new tuple as heap-only */
4142 /* Mark the caller's copy too, in case different from heaptup */
4144 }
4145 else
4146 {
4147 /* Make sure tuples are correctly marked as not-HOT */
4151 }
4152
4153 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4154
4155
4156 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4157 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
4158 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4159 /* ... and store info about transaction updating this tuple */
4162 oldtup.t_data->t_infomask |= infomask_old_tuple;
4163 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4165
4166 /* record address of new tuple in t_ctid of old one */
4167 oldtup.t_data->t_ctid = heaptup->t_self;
4168
4169 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4170 if (PageIsAllVisible(BufferGetPage(buffer)))
4171 {
4172 all_visible_cleared = true;
4174 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4175 vmbuffer, VISIBILITYMAP_VALID_BITS);
4176 }
4177 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4178 {
4183 }
4184
4185 if (newbuf != buffer)
4187 MarkBufferDirty(buffer);
4188
4189 /* XLOG stuff */
4190 if (RelationNeedsWAL(relation))
4191 {
4193
4194 /*
4195 * For logical decoding we need combo CIDs to properly decode the
4196 * catalog.
4197 */
4199 {
4200 log_heap_new_cid(relation, &oldtup);
4201 log_heap_new_cid(relation, heaptup);
4202 }
4203
4204 recptr = log_heap_update(relation, buffer,
4209 if (newbuf != buffer)
4210 {
4212 }
4214 }
4215
4217
4218 if (newbuf != buffer)
4221
4222 /*
4223 * Mark old tuple for invalidation from system caches at next command
4224 * boundary, and mark the new tuple for invalidation in case we abort. We
4225 * have to do this before releasing the buffer because oldtup is in the
4226 * buffer. (heaptup is all in local memory, but it's necessary to process
4227 * both tuple versions in one call to inval.c so we can avoid redundant
4228 * sinval messages.)
4229 */
4231
4232 /* Now we can release the buffer(s) */
4233 if (newbuf != buffer)
4235 ReleaseBuffer(buffer);
4238 if (BufferIsValid(vmbuffer))
4239 ReleaseBuffer(vmbuffer);
4240
4241 /*
4242 * Release the lmgr tuple lock, if we had it.
4243 */
4244 if (have_tuple_lock)
4245 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4246
4247 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4248
4249 /*
4250 * If heaptup is a private copy, release it. Don't forget to copy t_self
4251 * back to the caller's image, too.
4252 */
4253 if (heaptup != newtup)
4254 {
4255 newtup->t_self = heaptup->t_self;
4257 }
4258
4259 /*
4260 * If it is a HOT update, the update may still need to update summarized
4261 * indexes, lest we fail to update those summaries and get incorrect
4262 * results (for example, minmax bounds of the block may change with this
4263 * update).
4264 */
4265 if (use_hot_update)
4266 {
4269 else
4271 }
4272 else
4274
4277
4284
4285 return TM_Ok;
4286}
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:901
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:575
static void PageSetFull(Page page)
Definition bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition heapam.c:4466
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition heapam.c:8919
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition heapam.c:7660
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
#define HEAP2_XACT_MASK
#define HEAP_XMAX_LOCK_ONLY
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
#define HEAP_XACT_MASK
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
#define HEAP_UPDATED
#define HEAP_XMAX_KEYSHR_LOCK
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition relcache.c:5298
@ INDEX_ATTR_BITMAP_KEY
Definition relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition syscache.c:762
@ TU_Summarizing
Definition tableam.h:119
@ TU_All
Definition tableam.h:116
@ TU_None
Definition tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188

References Assert, AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)
extern

Definition at line 626 of file vacuumlazy.c.

628{
630 bool verbose,
631 instrument,
632 skipwithvm,
640 TimestampTz starttime = 0;
642 startwritetime = 0;
645 ErrorContextCallback errcallback;
646 char **indnames = NULL;
648
649 verbose = (params.options & VACOPT_VERBOSE) != 0;
650 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
651 params.log_vacuum_min_duration >= 0));
652 if (instrument)
653 {
655 if (track_io_timing)
656 {
659 }
660 }
661
662 /* Used for instrumentation and stats report */
663 starttime = GetCurrentTimestamp();
664
666 RelationGetRelid(rel));
669 params.is_wraparound
672 else
675
676 /*
677 * Setup error traceback support for ereport() first. The idea is to set
678 * up an error context callback to display additional information on any
679 * error during a vacuum. During different phases of vacuum, we update
680 * the state so that the error context callback always display current
681 * information.
682 *
683 * Copy the names of heap rel into local memory for error reporting
684 * purposes, too. It isn't always safe to assume that we can get the name
685 * of each rel. It's convenient for code in lazy_scan_heap to always use
686 * these temp copies.
687 */
690 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
691 vacrel->relname = pstrdup(RelationGetRelationName(rel));
692 vacrel->indname = NULL;
694 vacrel->verbose = verbose;
695 errcallback.callback = vacuum_error_callback;
696 errcallback.arg = vacrel;
697 errcallback.previous = error_context_stack;
698 error_context_stack = &errcallback;
699
700 /* Set up high level stuff about rel and its indexes */
701 vacrel->rel = rel;
703 &vacrel->indrels);
704 vacrel->bstrategy = bstrategy;
705 if (instrument && vacrel->nindexes > 0)
706 {
707 /* Copy index names used by instrumentation (not error reporting) */
708 indnames = palloc_array(char *, vacrel->nindexes);
709 for (int i = 0; i < vacrel->nindexes; i++)
711 }
712
713 /*
714 * The index_cleanup param either disables index vacuuming and cleanup or
715 * forces it to go ahead when we would otherwise apply the index bypass
716 * optimization. The default is 'auto', which leaves the final decision
717 * up to lazy_vacuum().
718 *
719 * The truncate param allows user to avoid attempting relation truncation,
720 * though it can't force truncation to happen.
721 */
724 params.truncate != VACOPTVALUE_AUTO);
725
726 /*
727 * While VacuumFailSafeActive is reset to false before calling this, we
728 * still need to reset it here due to recursive calls.
729 */
730 VacuumFailsafeActive = false;
731 vacrel->consider_bypass_optimization = true;
732 vacrel->do_index_vacuuming = true;
733 vacrel->do_index_cleanup = true;
734 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
736 {
737 /* Force disable index vacuuming up-front */
738 vacrel->do_index_vacuuming = false;
739 vacrel->do_index_cleanup = false;
740 }
741 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
742 {
743 /* Force index vacuuming. Note that failsafe can still bypass. */
744 vacrel->consider_bypass_optimization = false;
745 }
746 else
747 {
748 /* Default/auto, make all decisions dynamically */
750 }
751
752 /* Initialize page counters explicitly (be tidy) */
753 vacrel->scanned_pages = 0;
754 vacrel->eager_scanned_pages = 0;
755 vacrel->removed_pages = 0;
756 vacrel->new_frozen_tuple_pages = 0;
757 vacrel->lpdead_item_pages = 0;
758 vacrel->missed_dead_pages = 0;
759 vacrel->nonempty_pages = 0;
760 /* dead_items_alloc allocates vacrel->dead_items later on */
761
762 /* Allocate/initialize output statistics state */
763 vacrel->new_rel_tuples = 0;
764 vacrel->new_live_tuples = 0;
765 vacrel->indstats = (IndexBulkDeleteResult **)
766 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
767
768 /* Initialize remaining counters (be tidy) */
769 vacrel->num_index_scans = 0;
770 vacrel->num_dead_items_resets = 0;
771 vacrel->total_dead_items_bytes = 0;
772 vacrel->tuples_deleted = 0;
773 vacrel->tuples_frozen = 0;
774 vacrel->lpdead_items = 0;
775 vacrel->live_tuples = 0;
776 vacrel->recently_dead_tuples = 0;
777 vacrel->missed_dead_tuples = 0;
778
779 vacrel->new_all_visible_pages = 0;
780 vacrel->new_all_visible_all_frozen_pages = 0;
781 vacrel->new_all_frozen_pages = 0;
782
783 /*
784 * Get cutoffs that determine which deleted tuples are considered DEAD,
785 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
786 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
787 * happen in this order to ensure that the OldestXmin cutoff field works
788 * as an upper bound on the XIDs stored in the pages we'll actually scan
789 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
790 *
791 * Next acquire vistest, a related cutoff that's used in pruning. We use
792 * vistest in combination with OldestXmin to ensure that
793 * heap_page_prune_and_freeze() always removes any deleted tuple whose
794 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
795 * whether a tuple should be frozen or removed. (In the future we might
796 * want to teach lazy_scan_prune to recompute vistest from time to time,
797 * to increase the number of dead tuples it can prune away.)
798 */
799 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
801 vacrel->vistest = GlobalVisTestFor(rel);
802
803 /* Initialize state used to track oldest extant XID/MXID */
804 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
805 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
806
807 /*
808 * Initialize state related to tracking all-visible page skipping. This is
809 * very important to determine whether or not it is safe to advance the
810 * relfrozenxid/relminmxid.
811 */
812 vacrel->skippedallvis = false;
813 skipwithvm = true;
815 {
816 /*
817 * Force aggressive mode, and disable skipping blocks using the
818 * visibility map (even those set all-frozen)
819 */
820 vacrel->aggressive = true;
821 skipwithvm = false;
822 }
823
824 vacrel->skipwithvm = skipwithvm;
825
826 /*
827 * Set up eager scan tracking state. This must happen after determining
828 * whether or not the vacuum must be aggressive, because only normal
829 * vacuums use the eager scan algorithm.
830 */
832
833 /* Report the vacuum mode: 'normal' or 'aggressive' */
835 vacrel->aggressive
838
839 if (verbose)
840 {
841 if (vacrel->aggressive)
843 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
844 vacrel->dbname, vacrel->relnamespace,
845 vacrel->relname)));
846 else
848 (errmsg("vacuuming \"%s.%s.%s\"",
849 vacrel->dbname, vacrel->relnamespace,
850 vacrel->relname)));
851 }
852
853 /*
854 * Allocate dead_items memory using dead_items_alloc. This handles
855 * parallel VACUUM initialization as part of allocating shared memory
856 * space used for dead_items. (But do a failsafe precheck first, to
857 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
858 * is already dangerously old.)
859 */
862
863 /*
864 * Call lazy_scan_heap to perform all required heap pruning, index
865 * vacuuming, and heap vacuuming (plus related processing)
866 */
868
869 /*
870 * Save dead items max_bytes and update the memory usage statistics before
871 * cleanup, they are freed in parallel vacuum cases during
872 * dead_items_cleanup().
873 */
874 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
875 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
876
877 /*
878 * Free resources managed by dead_items_alloc. This ends parallel mode in
879 * passing when necessary.
880 */
883
884 /*
885 * Update pg_class entries for each of rel's indexes where appropriate.
886 *
887 * Unlike the later update to rel's pg_class entry, this is not critical.
888 * Maintains relpages/reltuples statistics used by the planner only.
889 */
890 if (vacrel->do_index_cleanup)
892
893 /* Done with rel's indexes */
894 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
895
896 /* Optionally truncate rel */
899
900 /* Pop the error context stack */
901 error_context_stack = errcallback.previous;
902
903 /* Report that we are now doing final cleanup */
906
907 /*
908 * Prepare to update rel's pg_class entry.
909 *
910 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
911 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
912 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
913 */
914 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
915 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
916 vacrel->cutoffs.relfrozenxid,
917 vacrel->NewRelfrozenXid));
918 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
919 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
920 vacrel->cutoffs.relminmxid,
921 vacrel->NewRelminMxid));
922 if (vacrel->skippedallvis)
923 {
924 /*
925 * Must keep original relfrozenxid in a non-aggressive VACUUM that
926 * chose to skip an all-visible page range. The state that tracks new
927 * values will have missed unfrozen XIDs from the pages we skipped.
928 */
929 Assert(!vacrel->aggressive);
930 vacrel->NewRelfrozenXid = InvalidTransactionId;
931 vacrel->NewRelminMxid = InvalidMultiXactId;
932 }
933
934 /*
935 * For safety, clamp relallvisible to be not more than what we're setting
936 * pg_class.relpages to
937 */
938 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
942
943 /*
944 * An all-frozen block _must_ be all-visible. As such, clamp the count of
945 * all-frozen blocks to the count of all-visible blocks. This matches the
946 * clamping of relallvisible above.
947 */
950
951 /*
952 * Now actually update rel's pg_class entry.
953 *
954 * In principle new_live_tuples could be -1 indicating that we (still)
955 * don't know the tuple count. In practice that can't happen, since we
956 * scan every page that isn't skipped using the visibility map.
957 */
958 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
960 vacrel->nindexes > 0,
961 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
963
964 /*
965 * Report results to the cumulative stats system, too.
966 *
967 * Deliberately avoid telling the stats system about LP_DEAD items that
968 * remain in the table due to VACUUM bypassing index and heap vacuuming.
969 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
970 * It seems like a good idea to err on the side of not vacuuming again too
971 * soon in cases where the failsafe prevented significant amounts of heap
972 * vacuuming.
973 */
975 Max(vacrel->new_live_tuples, 0),
976 vacrel->recently_dead_tuples +
977 vacrel->missed_dead_tuples,
978 starttime);
980
981 if (instrument)
982 {
984
985 if (verbose || params.log_vacuum_min_duration == 0 ||
988 {
989 long secs_dur;
990 int usecs_dur;
991 WalUsage walusage;
992 BufferUsage bufferusage;
994 char *msgfmt;
995 int32 diff;
996 double read_rate = 0,
997 write_rate = 0;
1001
1003 memset(&walusage, 0, sizeof(WalUsage));
1005 memset(&bufferusage, 0, sizeof(BufferUsage));
1007
1008 total_blks_hit = bufferusage.shared_blks_hit +
1009 bufferusage.local_blks_hit;
1010 total_blks_read = bufferusage.shared_blks_read +
1011 bufferusage.local_blks_read;
1013 bufferusage.local_blks_dirtied;
1014
1016 if (verbose)
1017 {
1018 /*
1019 * Aggressiveness already reported earlier, in dedicated
1020 * VACUUM VERBOSE ereport
1021 */
1022 Assert(!params.is_wraparound);
1023 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1024 }
1025 else if (params.is_wraparound)
1026 {
1027 /*
1028 * While it's possible for a VACUUM to be both is_wraparound
1029 * and !aggressive, that's just a corner-case -- is_wraparound
1030 * implies aggressive. Produce distinct output for the corner
1031 * case all the same, just in case.
1032 */
1033 if (vacrel->aggressive)
1034 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 else
1036 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 }
1038 else
1039 {
1040 if (vacrel->aggressive)
1041 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 else
1043 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 }
1046 vacrel->dbname,
1047 vacrel->relnamespace,
1048 vacrel->relname,
1049 vacrel->num_index_scans);
1050 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1051 vacrel->removed_pages,
1053 vacrel->scanned_pages,
1054 orig_rel_pages == 0 ? 100.0 :
1055 100.0 * vacrel->scanned_pages /
1057 vacrel->eager_scanned_pages);
1059 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1060 vacrel->tuples_deleted,
1061 (int64) vacrel->new_rel_tuples,
1062 vacrel->recently_dead_tuples);
1063 if (vacrel->missed_dead_tuples > 0)
1065 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1066 vacrel->missed_dead_tuples,
1067 vacrel->missed_dead_pages);
1069 vacrel->cutoffs.OldestXmin);
1071 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1072 vacrel->cutoffs.OldestXmin, diff);
1074 {
1075 diff = (int32) (vacrel->NewRelfrozenXid -
1076 vacrel->cutoffs.relfrozenxid);
1078 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1079 vacrel->NewRelfrozenXid, diff);
1080 }
1081 if (minmulti_updated)
1082 {
1083 diff = (int32) (vacrel->NewRelminMxid -
1084 vacrel->cutoffs.relminmxid);
1086 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1087 vacrel->NewRelminMxid, diff);
1088 }
1089 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1090 vacrel->new_frozen_tuple_pages,
1091 orig_rel_pages == 0 ? 100.0 :
1092 100.0 * vacrel->new_frozen_tuple_pages /
1094 vacrel->tuples_frozen);
1095
1097 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1098 vacrel->new_all_visible_pages,
1099 vacrel->new_all_visible_all_frozen_pages +
1100 vacrel->new_all_frozen_pages,
1101 vacrel->new_all_frozen_pages);
1102 if (vacrel->do_index_vacuuming)
1103 {
1104 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1105 appendStringInfoString(&buf, _("index scan not needed: "));
1106 else
1107 appendStringInfoString(&buf, _("index scan needed: "));
1108
1109 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1110 }
1111 else
1112 {
1114 appendStringInfoString(&buf, _("index scan bypassed: "));
1115 else
1116 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1117
1118 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1119 }
1121 vacrel->lpdead_item_pages,
1122 orig_rel_pages == 0 ? 100.0 :
1123 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1124 vacrel->lpdead_items);
1125 for (int i = 0; i < vacrel->nindexes; i++)
1126 {
1127 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1128
1129 if (!istat)
1130 continue;
1131
1133 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1134 indnames[i],
1135 istat->num_pages,
1136 istat->pages_newly_deleted,
1137 istat->pages_deleted,
1138 istat->pages_free);
1139 }
1141 {
1142 /*
1143 * We bypass the changecount mechanism because this value is
1144 * only updated by the calling process. We also rely on the
1145 * above call to pgstat_progress_end_command() to not clear
1146 * the st_progress_param array.
1147 */
1148 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1150 }
1151 if (track_io_timing)
1152 {
1153 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1154 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1155
1156 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1157 read_ms, write_ms);
1158 }
1159 if (secs_dur > 0 || usecs_dur > 0)
1160 {
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1165 }
1166 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1169 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1174 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1175 walusage.wal_records,
1176 walusage.wal_fpi,
1177 walusage.wal_bytes,
1178 walusage.wal_fpi_bytes,
1179 walusage.wal_buffers_full);
1180
1181 /*
1182 * Report the dead items memory usage.
1183 *
1184 * The num_dead_items_resets counter increases when we reset the
1185 * collected dead items, so the counter is non-zero if at least
1186 * one dead items are collected, even if index vacuuming is
1187 * disabled.
1188 */
1190 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1191 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1192 vacrel->num_dead_items_resets),
1193 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1194 vacrel->num_dead_items_resets,
1195 (double) dead_items_max_bytes / (1024 * 1024));
1196 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1197
1198 ereport(verbose ? INFO : LOG,
1199 (errmsg_internal("%s", buf.data)));
1200 pfree(buf.data);
1201 }
1202 }
1203
1204 /* Cleanup index statistics and index names */
1205 for (int i = 0; i < vacrel->nindexes; i++)
1206 {
1207 if (vacrel->indstats[i])
1208 pfree(vacrel->indstats[i]);
1209
1210 if (instrument)
1211 pfree(indnames[i]);
1212 }
1213}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition bufmgr.c:177
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define ngettext(s, p, n)
Definition c.h:1198
int32_t int32
Definition c.h:554
int64 TimestampTz
Definition timestamp.h:39
ErrorContextCallback * error_context_stack
Definition elog.c:99
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
#define INFO
Definition elog.h:34
#define palloc0_object(type)
Definition fe_memutils.h:75
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3518
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
static int verbose
const void * data
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
int64 PgStat_Counter
Definition pgstat.h:71
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define RelationGetNamespace(relation)
Definition rel.h:555
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2362
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2405
bool VacuumFailsafeActive
Definition vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:499
static void vacuum_error_callback(void *arg)
static void lazy_truncate_heap(LVRelState *vacrel)
static bool should_attempt_truncation(LVRelState *vacrel)
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:224
static void lazy_scan_heap(LVRelState *vacrel)
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)
extern

Definition at line 9326 of file heapam.c.

9329{
9330 TransactionId xid;
9332
9333 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9334 return;
9335
9336 /*
9337 * Check to see whether the tuple has been written to by a concurrent
9338 * transaction, either to create it not visible to us, or to delete it
9339 * while it is visible to us. The "visible" bool indicates whether the
9340 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9341 * is going on with it.
9342 *
9343 * In the event of a concurrently inserted tuple that also happens to have
9344 * been concurrently updated (by a separate transaction), the xmin of the
9345 * tuple will be used -- not the updater's xid.
9346 */
9348 switch (htsvResult)
9349 {
9350 case HEAPTUPLE_LIVE:
9351 if (visible)
9352 return;
9353 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9354 break;
9357 if (visible)
9358 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9359 else
9360 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9361
9363 {
9364 /* This is like the HEAPTUPLE_DEAD case */
9365 Assert(!visible);
9366 return;
9367 }
9368 break;
9370 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9371 break;
9372 case HEAPTUPLE_DEAD:
9373 Assert(!visible);
9374 return;
9375 default:
9376
9377 /*
9378 * The only way to get to this default clause is if a new value is
9379 * added to the enum type without adding it to this switch
9380 * statement. That's a bug, so elog.
9381 */
9382 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9383
9384 /*
9385 * In spite of having all enum values covered and calling elog on
9386 * this default, some compilers think this is a code path which
9387 * allows xid to be used below without initialization. Silence
9388 * that warning.
9389 */
9391 }
9392
9395
9396 /*
9397 * Find top level xid. Bail out if xid is too early to be a conflict, or
9398 * if it's our own xid.
9399 */
9401 return;
9404 return;
9405
9406 CheckForSerializableConflictOut(relation, xid, snapshot);
9407}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4021
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:442

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, fb(), GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)
extern

Definition at line 1365 of file heapam_visibility.c.

1366{
1367 TransactionId xmax;
1368
1369 /* if there's no valid Xmax, then there's obviously no update either */
1370 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1371 return true;
1372
1373 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1374 return true;
1375
1376 /* invalid xmax means no update */
1378 return true;
1379
1380 /*
1381 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1382 * necessarily have been updated
1383 */
1384 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1385 return false;
1386
1387 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1388 xmax = HeapTupleGetUpdateXid(tuple);
1389
1390 /* not LOCKED_ONLY, so it has to have an xmax */
1392
1394 return false;
1395 if (TransactionIdIsInProgress(xmax))
1396 return false;
1397 if (TransactionIdDidCommit(xmax))
1398 return false;
1399
1400 /*
1401 * not current, not in progress, not committed -- must have aborted or
1402 * crashed
1403 */
1404 return true;
1405}
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1405

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)
extern

Definition at line 1310 of file heapam_visibility.c.

1311{
1312 HeapTupleHeader tuple = htup->t_data;
1313
1315 Assert(htup->t_tableOid != InvalidOid);
1316
1317 /*
1318 * If the inserting transaction is marked invalid, then it aborted, and
1319 * the tuple is definitely dead. If it's marked neither committed nor
1320 * invalid, then we assume it's still alive (since the presumption is that
1321 * all relevant hint bits were just set moments ago).
1322 */
1323 if (!HeapTupleHeaderXminCommitted(tuple))
1324 return HeapTupleHeaderXminInvalid(tuple);
1325
1326 /*
1327 * If the inserting transaction committed, but any deleting transaction
1328 * aborted, the tuple is still alive.
1329 */
1330 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1331 return false;
1332
1333 /*
1334 * If the XMAX is just a lock, the tuple is still alive.
1335 */
1337 return false;
1338
1339 /*
1340 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1341 * know without checking pg_multixact.
1342 */
1343 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1344 return false;
1345
1346 /* If deleter isn't known to have committed, assume it's still running. */
1347 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1348 return false;
1349
1350 /* Deleter committed, so tuple is dead if the XID is old enough. */
1351 return GlobalVisTestIsRemovableXid(vistest,
1353}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define InvalidOid

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesMVCCBatch()

int HeapTupleSatisfiesMVCCBatch ( Snapshot  snapshot,
Buffer  buffer,
int  ntups,
BatchMVCCState batchmvcc,
OffsetNumber vistuples_dense 
)
extern

Definition at line 1617 of file heapam_visibility.c.

1621{
1622 int nvis = 0;
1623
1624 Assert(IsMVCCSnapshot(snapshot));
1625
1626 for (int i = 0; i < ntups; i++)
1627 {
1628 bool valid;
1629 HeapTuple tup = &batchmvcc->tuples[i];
1630
1631 valid = HeapTupleSatisfiesMVCC(tup, snapshot, buffer);
1632 batchmvcc->visible[i] = valid;
1633
1634 if (likely(valid))
1635 {
1636 vistuples_dense[nvis] = tup->t_self.ip_posid;
1637 nvis++;
1638 }
1639 }
1640
1641 return nvis;
1642}
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)

References Assert, fb(), HeapTupleSatisfiesMVCC(), i, IsMVCCSnapshot, and likely.

Referenced by page_collect_tuples().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)
extern

Definition at line 440 of file heapam_visibility.c.

442{
443 HeapTupleHeader tuple = htup->t_data;
444
446 Assert(htup->t_tableOid != InvalidOid);
447
449 {
451 return TM_Invisible;
452
453 else if (!HeapTupleCleanMoved(tuple, buffer))
454 return TM_Invisible;
456 {
457 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
458 return TM_Invisible; /* inserted after scan started */
459
460 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
461 return TM_Ok;
462
464 {
465 TransactionId xmax;
466
467 xmax = HeapTupleHeaderGetRawXmax(tuple);
468
469 /*
470 * Careful here: even though this tuple was created by our own
471 * transaction, it might be locked by other transactions, if
472 * the original version was key-share locked when we updated
473 * it.
474 */
475
476 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
477 {
478 if (MultiXactIdIsRunning(xmax, true))
479 return TM_BeingModified;
480 else
481 return TM_Ok;
482 }
483
484 /*
485 * If the locker is gone, then there is nothing of interest
486 * left in this Xmax; otherwise, report the tuple as
487 * locked/updated.
488 */
489 if (!TransactionIdIsInProgress(xmax))
490 return TM_Ok;
491 return TM_BeingModified;
492 }
493
494 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
495 {
496 TransactionId xmax;
497
498 xmax = HeapTupleGetUpdateXid(tuple);
499
500 /* not LOCKED_ONLY, so it has to have an xmax */
502
503 /* deleting subtransaction must have aborted */
505 {
507 false))
508 return TM_BeingModified;
509 return TM_Ok;
510 }
511 else
512 {
513 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
514 return TM_SelfModified; /* updated after scan started */
515 else
516 return TM_Invisible; /* updated before scan started */
517 }
518 }
519
521 {
522 /* deleting subtransaction must have aborted */
523 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
525 return TM_Ok;
526 }
527
528 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
529 return TM_SelfModified; /* updated after scan started */
530 else
531 return TM_Invisible; /* updated before scan started */
532 }
534 return TM_Invisible;
536 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
538 else
539 {
540 /* it must have aborted or crashed */
541 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
543 return TM_Invisible;
544 }
545 }
546
547 /* by here, the inserting transaction has committed */
548
549 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
550 return TM_Ok;
551
552 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
553 {
555 return TM_Ok;
556 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
557 return TM_Updated; /* updated by other */
558 else
559 return TM_Deleted; /* deleted by other */
560 }
561
562 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
563 {
564 TransactionId xmax;
565
567 return TM_Ok;
568
570 {
572 return TM_BeingModified;
573
575 return TM_Ok;
576 }
577
578 xmax = HeapTupleGetUpdateXid(tuple);
579 if (!TransactionIdIsValid(xmax))
580 {
582 return TM_BeingModified;
583 }
584
585 /* not LOCKED_ONLY, so it has to have an xmax */
587
589 {
590 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
591 return TM_SelfModified; /* updated after scan started */
592 else
593 return TM_Invisible; /* updated before scan started */
594 }
595
597 return TM_BeingModified;
598
599 if (TransactionIdDidCommit(xmax))
600 {
601 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
602 return TM_Updated;
603 else
604 return TM_Deleted;
605 }
606
607 /*
608 * By here, the update in the Xmax is either aborted or crashed, but
609 * what about the other members?
610 */
611
613 {
614 /*
615 * There's no member, even just a locker, alive anymore, so we can
616 * mark the Xmax as invalid.
617 */
618 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
620 return TM_Ok;
621 }
622 else
623 {
624 /* There are lockers running */
625 return TM_BeingModified;
626 }
627 }
628
630 {
632 return TM_BeingModified;
633 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
634 return TM_SelfModified; /* updated after scan started */
635 else
636 return TM_Invisible; /* updated before scan started */
637 }
638
640 return TM_BeingModified;
641
643 {
644 /* it must have aborted or crashed */
645 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
647 return TM_Ok;
648 }
649
650 /* xmax transaction committed */
651
653 {
654 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
656 return TM_Ok;
657 }
658
659 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
661 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
662 return TM_Updated; /* updated by other */
663 else
664 return TM_Deleted; /* deleted by other */
665}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
#define HEAP_XMIN_INVALID
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:463

References Assert, HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)
extern

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)
extern

Definition at line 1076 of file heapam_visibility.c.

1077{
1078 HeapTupleHeader tuple = htup->t_data;
1079
1081 Assert(htup->t_tableOid != InvalidOid);
1083
1085
1086 /*
1087 * Has inserting transaction committed?
1088 *
1089 * If the inserting transaction aborted, then the tuple was never visible
1090 * to any other transaction, so we can delete it immediately.
1091 */
1092 if (!HeapTupleHeaderXminCommitted(tuple))
1093 {
1094 if (HeapTupleHeaderXminInvalid(tuple))
1095 return HEAPTUPLE_DEAD;
1096 else if (!HeapTupleCleanMoved(tuple, buffer))
1097 return HEAPTUPLE_DEAD;
1099 {
1100 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1102 /* only locked? run infomask-only check first, for performance */
1106 /* inserted and then deleted by same xact */
1109 /* deleting subtransaction must have aborted */
1111 }
1113 {
1114 /*
1115 * It'd be possible to discern between INSERT/DELETE in progress
1116 * here by looking at xmax - but that doesn't seem beneficial for
1117 * the majority of callers and even detrimental for some. We'd
1118 * rather have callers look at/wait for xmin than xmax. It's
1119 * always correct to return INSERT_IN_PROGRESS because that's
1120 * what's happening from the view of other backends.
1121 */
1123 }
1125 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1127 else
1128 {
1129 /*
1130 * Not in Progress, Not Committed, so either Aborted or crashed
1131 */
1132 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1134 return HEAPTUPLE_DEAD;
1135 }
1136
1137 /*
1138 * At this point the xmin is known committed, but we might not have
1139 * been able to set the hint bit yet; so we can no longer Assert that
1140 * it's set.
1141 */
1142 }
1143
1144 /*
1145 * Okay, the inserter committed, so it was good at some point. Now what
1146 * about the deleting transaction?
1147 */
1148 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1149 return HEAPTUPLE_LIVE;
1150
1152 {
1153 /*
1154 * "Deleting" xact really only locked it, so the tuple is live in any
1155 * case. However, we should make sure that either XMAX_COMMITTED or
1156 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1157 * examining the tuple for future xacts.
1158 */
1159 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1160 {
1161 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1162 {
1163 /*
1164 * If it's a pre-pg_upgrade tuple, the multixact cannot
1165 * possibly be running; otherwise have to check.
1166 */
1167 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1169 true))
1170 return HEAPTUPLE_LIVE;
1172 }
1173 else
1174 {
1176 return HEAPTUPLE_LIVE;
1177 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1179 }
1180 }
1181
1182 /*
1183 * We don't really care whether xmax did commit, abort or crash. We
1184 * know that xmax did lock the tuple, but it did not and will never
1185 * actually update it.
1186 */
1187
1188 return HEAPTUPLE_LIVE;
1189 }
1190
1191 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1192 {
1194
1195 /* already checked above */
1197
1198 /* not LOCKED_ONLY, so it has to have an xmax */
1200
1201 if (TransactionIdIsInProgress(xmax))
1203 else if (TransactionIdDidCommit(xmax))
1204 {
1205 /*
1206 * The multixact might still be running due to lockers. Need to
1207 * allow for pruning if below the xid horizon regardless --
1208 * otherwise we could end up with a tuple where the updater has to
1209 * be removed due to the horizon, but is not pruned away. It's
1210 * not a problem to prune that tuple, because any remaining
1211 * lockers will also be present in newer tuple versions.
1212 */
1213 *dead_after = xmax;
1215 }
1216 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1217 {
1218 /*
1219 * Not in Progress, Not Committed, so either Aborted or crashed.
1220 * Mark the Xmax as invalid.
1221 */
1223 }
1224
1225 return HEAPTUPLE_LIVE;
1226 }
1227
1228 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1229 {
1233 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1235 else
1236 {
1237 /*
1238 * Not in Progress, Not Committed, so either Aborted or crashed
1239 */
1240 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1242 return HEAPTUPLE_LIVE;
1243 }
1244
1245 /*
1246 * At this point the xmax is known committed, but we might not have
1247 * been able to set the hint bit yet; so we can no longer Assert that
1248 * it's set.
1249 */
1250 }
1251
1252 /*
1253 * Deleter committed, allow caller to check if it was recent enough that
1254 * some open transactions could still see the tuple.
1255 */
1258}

References Assert, fb(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_page_would_be_all_visible(), heap_prune_satisfies_vacuum(), heapam_scan_analyze_next_tuple(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)
extern

Definition at line 1655 of file heapam_visibility.c.

1656{
1657 switch (snapshot->snapshot_type)
1658 {
1659 case SNAPSHOT_MVCC:
1660 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1661 case SNAPSHOT_SELF:
1662 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1663 case SNAPSHOT_ANY:
1664 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1665 case SNAPSHOT_TOAST:
1666 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1667 case SNAPSHOT_DIRTY:
1668 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1670 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1672 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1673 }
1674
1675 return false; /* keep compiler quiet */
1676}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition snapshot.h:114
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition snapshot.h:105
@ SNAPSHOT_DIRTY
Definition snapshot.h:98
SnapshotType snapshot_type
Definition snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)
extern

Definition at line 141 of file heapam_visibility.c.

143{
144 /*
145 * The uses from heapam.c rely on being able to perform the hint bit
146 * updates, which can only be guaranteed if we are holding an exclusive
147 * lock on the buffer - which all callers are doing.
148 */
150
151 SetHintBits(tuple, buffer, infomask, xid);
152}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2998

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), fb(), and SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)
extern

Definition at line 2167 of file pruneheap.c.

2176{
2179 uint8 info;
2181
2182 /* The following local variables hold data registered in the WAL record: */
2186 xlhp_prune_items dead_items;
2189 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2191
2193
2194 xlrec.flags = 0;
2196
2197 /*
2198 * We can avoid an FPI of the heap page if the only modification we are
2199 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2200 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2201 * heap page with this record's LSN. Recovery skips records <= the stamped
2202 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2203 * torn page.
2204 */
2205 if (!do_prune &&
2206 nfrozen == 0 &&
2209
2210 /*
2211 * Prepare data for the buffer. The arrays are not actually in the
2212 * buffer, but we pretend that they are. When XLogInsert stores a full
2213 * page image, the arrays can be omitted.
2214 */
2217
2218 if (do_set_vm)
2219 XLogRegisterBuffer(1, vmbuffer, 0);
2220
2221 if (nfrozen > 0)
2222 {
2223 int nplans;
2224
2226
2227 /*
2228 * Prepare deduplicated representation for use in the WAL record. This
2229 * destructively sorts frozen tuples array in-place.
2230 */
2231 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2232
2233 freeze_plans.nplans = nplans;
2235 offsetof(xlhp_freeze_plans, plans));
2236 XLogRegisterBufData(0, plans,
2237 sizeof(xlhp_freeze_plan) * nplans);
2238 }
2239 if (nredirected > 0)
2240 {
2242
2243 redirect_items.ntargets = nredirected;
2246 XLogRegisterBufData(0, redirected,
2247 sizeof(OffsetNumber[2]) * nredirected);
2248 }
2249 if (ndead > 0)
2250 {
2251 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2252
2253 dead_items.ntargets = ndead;
2254 XLogRegisterBufData(0, &dead_items,
2256 XLogRegisterBufData(0, dead,
2257 sizeof(OffsetNumber) * ndead);
2258 }
2259 if (nunused > 0)
2260 {
2262
2263 unused_items.ntargets = nunused;
2266 XLogRegisterBufData(0, unused,
2267 sizeof(OffsetNumber) * nunused);
2268 }
2269 if (nfrozen > 0)
2271 sizeof(OffsetNumber) * nfrozen);
2272
2273 /*
2274 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2275 * flag above.
2276 */
2278 {
2279 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2281 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2282 }
2284 xlrec.flags |= XLHP_IS_CATALOG_REL;
2287 if (cleanup_lock)
2288 xlrec.flags |= XLHP_CLEANUP_LOCK;
2289 else
2290 {
2291 Assert(nredirected == 0 && ndead == 0);
2292 /* also, any items in 'unused' must've been LP_DEAD previously */
2293 }
2297
2298 switch (reason)
2299 {
2300 case PRUNE_ON_ACCESS:
2302 break;
2303 case PRUNE_VACUUM_SCAN:
2305 break;
2308 break;
2309 default:
2310 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2311 break;
2312 }
2313 recptr = XLogInsert(RM_HEAP2_ID, info);
2314
2315 if (do_set_vm)
2316 {
2317 Assert(BufferIsDirty(vmbuffer));
2318 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2319 }
2320
2321 /*
2322 * See comment at the top of the function about regbuf_flags_heap for
2323 * details on when we can advance the page LSN.
2324 */
2325 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2326 {
2327 Assert(BufferIsDirty(buffer));
2329 }
2330}
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2088
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References Assert, BufferGetPage(), BufferIsDirty(), data, elog, ERROR, fb(), heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)
extern

Definition at line 2104 of file heapam.c.

2105{
2106 if (bistate->current_buf != InvalidBuffer)
2107 ReleaseBuffer(bistate->current_buf);
2108 bistate->current_buf = InvalidBuffer;
2109
2110 /*
2111 * Despite the name, we also reset bulk relation extension state.
2112 * Otherwise we can end up erroring out due to looking for free space in
2113 * ->next_free of one partition, even though ->next_free was set when
2114 * extending another partition. It could obviously also be bad for
2115 * efficiency to look at existing blocks at offsets from another
2116 * partition, even if we don't error out.
2117 */
2118 bistate->next_free = InvalidBlockNumber;
2119 bistate->last_free = InvalidBlockNumber;
2120}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)
extern

Definition at line 5558 of file reorderbuffer.c.

5562{
5565 ForkNumber forkno;
5566 BlockNumber blockno;
5567 bool updated_mapping = false;
5568
5569 /*
5570 * Return unresolved if tuplecid_data is not valid. That's because when
5571 * streaming in-progress transactions we may run into tuples with the CID
5572 * before actually decoding them. Think e.g. about INSERT followed by
5573 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5574 * INSERT. So in such cases, we assume the CID is from the future
5575 * command.
5576 */
5577 if (tuplecid_data == NULL)
5578 return false;
5579
5580 /* be careful about padding */
5581 memset(&key, 0, sizeof(key));
5582
5583 Assert(!BufferIsLocal(buffer));
5584
5585 /*
5586 * get relfilelocator from the buffer, no convenient way to access it
5587 * other than that.
5588 */
5589 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5590
5591 /* tuples can only be in the main fork */
5592 Assert(forkno == MAIN_FORKNUM);
5593 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5594
5595 ItemPointerCopy(&htup->t_self,
5596 &key.tid);
5597
5598restart:
5601
5602 /*
5603 * failed to find a mapping, check whether the table was rewritten and
5604 * apply mapping if so, but only do that once - there can be no new
5605 * mappings while we are in here since we have to hold a lock on the
5606 * relation.
5607 */
5608 if (ent == NULL && !updated_mapping)
5609 {
5611 /* now check but don't update for a mapping again */
5612 updated_mapping = true;
5613 goto restart;
5614 }
5615 else if (ent == NULL)
5616 return false;
5617
5618 if (cmin)
5619 *cmin = ent->cmin;
5620 if (cmax)
5621 *cmax = ent->cmax;
5622 return true;
5623}
#define BufferIsLocal(buffer)
Definition buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_FIND
Definition hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition snapmgr.c:163

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 3266 of file heapam.c.

3267{
3268 TM_Result result;
3269 TM_FailureData tmfd;
3270
3271 result = heap_delete(relation, tid,
3273 true /* wait for commit */ ,
3274 &tmfd, false /* changingPart */ );
3275 switch (result)
3276 {
3277 case TM_SelfModified:
3278 /* Tuple was already updated in current command? */
3279 elog(ERROR, "tuple already updated by self");
3280 break;
3281
3282 case TM_Ok:
3283 /* done successfully */
3284 break;
3285
3286 case TM_Updated:
3287 elog(ERROR, "tuple concurrently updated");
3288 break;
3289
3290 case TM_Deleted:
3291 elog(ERROR, "tuple concurrently deleted");
3292 break;
3293
3294 default:
3295 elog(ERROR, "unrecognized heap_delete status: %u", result);
3296 break;
3297 }
3298}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition heapam.c:2843

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)
extern

Definition at line 2785 of file heapam.c.

2786{
2787 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2788}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition heapam.c:2142

References fb(), GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 4556 of file heapam.c.

4558{
4559 TM_Result result;
4560 TM_FailureData tmfd;
4561 LockTupleMode lockmode;
4562
4563 result = heap_update(relation, otid, tup,
4565 true /* wait for commit */ ,
4566 &tmfd, &lockmode, update_indexes);
4567 switch (result)
4568 {
4569 case TM_SelfModified:
4570 /* Tuple was already updated in current command? */
4571 elog(ERROR, "tuple already updated by self");
4572 break;
4573
4574 case TM_Ok:
4575 /* done successfully */
4576 break;
4577
4578 case TM_Updated:
4579 elog(ERROR, "tuple concurrently updated");
4580 break;
4581
4582 case TM_Deleted:
4583 elog(ERROR, "tuple concurrently deleted");
4584 break;
4585
4586 default:
4587 elog(ERROR, "unrecognized heap_update status: %u", result);
4588 break;
4589 }
4590}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition heapam.c:3312

References elog, ERROR, fb(), GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().