PostgreSQL Source Code git master
Loading...
Searching...
No Matches
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 
struct  BatchMVCCState
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 
typedef struct BatchMVCCState BatchMVCCState
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer, Buffer *vmbuffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
int HeapTupleSatisfiesMVCCBatch (Snapshot snapshot, Buffer buffer, int ntups, BatchMVCCState *batchmvcc, OffsetNumber *vistuples_dense)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 150 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 149 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BatchMVCCState

◆ BitmapHeapScanDesc

Definition at line 116 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 46 of file heapam.h.

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

Definition at line 108 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 136 of file heapam.h.

137{
138 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
139 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
140 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
141 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
142 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition heapam.h:137
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:140
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:141
@ HEAPTUPLE_LIVE
Definition heapam.h:139
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:142
@ HEAPTUPLE_DEAD
Definition heapam.h:138

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 250 of file heapam.h.

251{
252 PRUNE_ON_ACCESS, /* on-access pruning */
253 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
254 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition heapam.h:251
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:254
@ PRUNE_ON_ACCESS
Definition heapam.h:252
@ PRUNE_VACUUM_SCAN
Definition heapam.h:253

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)
extern

Definition at line 2102 of file heapam.c.

2103{
2104 if (bistate->current_buf != InvalidBuffer)
2105 ReleaseBuffer(bistate->current_buf);
2106 FreeAccessStrategy(bistate->strategy);
2107 pfree(bistate);
2108}
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5505
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
void pfree(void *pointer)
Definition mcxt.c:1616
BufferAccessStrategy strategy
Definition hio.h:31
Buffer current_buf
Definition hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )
extern

Definition at line 2085 of file heapam.c.

2086{
2087 BulkInsertState bistate;
2088
2091 bistate->current_buf = InvalidBuffer;
2092 bistate->next_free = InvalidBlockNumber;
2093 bistate->last_free = InvalidBlockNumber;
2094 bistate->already_extended_by = 0;
2095 return bistate;
2096}
#define InvalidBlockNumber
Definition block.h:33
@ BAS_BULKWRITE
Definition bufmgr.h:39
#define palloc_object(type)
Definition fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition heapam.h:46
BlockNumber last_free
Definition hio.h:49
uint32 already_extended_by
Definition hio.h:50
BlockNumber next_free
Definition hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6269 of file heapam.c.

6270{
6272 ItemId lp;
6273 HeapTupleData tp;
6274 Page page;
6275 BlockNumber block;
6276 Buffer buffer;
6277
6279
6280 block = ItemPointerGetBlockNumber(tid);
6281 buffer = ReadBuffer(relation, block);
6282 page = BufferGetPage(buffer);
6283
6285
6286 /*
6287 * Page can't be all visible, we just inserted into it, and are still
6288 * running.
6289 */
6290 Assert(!PageIsAllVisible(page));
6291
6294
6295 tp.t_tableOid = RelationGetRelid(relation);
6296 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6297 tp.t_len = ItemIdGetLength(lp);
6298 tp.t_self = *tid;
6299
6300 /*
6301 * Sanity check that the tuple really is a speculatively inserted tuple,
6302 * inserted by us.
6303 */
6304 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6305 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6306 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6307 elog(ERROR, "attempted to kill a non-speculative tuple");
6309
6310 /*
6311 * No need to check for serializable conflicts here. There is never a
6312 * need for a combo CID, either. No need to extract replica identity, or
6313 * do anything special with infomask bits.
6314 */
6315
6317
6318 /*
6319 * The tuple will become DEAD immediately. Flag that this page is a
6320 * candidate for pruning by setting xmin to TransactionXmin. While not
6321 * immediately prunable, it is the oldest xid we can cheaply determine
6322 * that's safe against wraparound / being older than the table's
6323 * relfrozenxid. To defend against the unlikely case of a new relation
6324 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6325 * if so (vacuum can't subsequently move relfrozenxid to beyond
6326 * TransactionXmin, so there's no race here).
6327 */
6329 {
6330 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6332
6333 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6334 prune_xid = relfrozenxid;
6335 else
6338 }
6339
6340 /* store transaction information of xact deleting the tuple */
6343
6344 /*
6345 * Set the tuple header xmin to InvalidTransactionId. This makes the
6346 * tuple immediately invisible everyone. (In particular, to any
6347 * transactions waiting on the speculative token, woken up later.)
6348 */
6350
6351 /* Clear the speculative insertion token too */
6352 tp.t_data->t_ctid = tp.t_self;
6353
6354 MarkBufferDirty(buffer);
6355
6356 /*
6357 * XLOG stuff
6358 *
6359 * The WAL records generated here match heap_delete(). The same recovery
6360 * routines are used.
6361 */
6362 if (RelationNeedsWAL(relation))
6363 {
6366
6368 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
6369 tp.t_data->t_infomask2);
6371 xlrec.xmax = xid;
6372
6376
6377 /* No replica identity & replication origin logged */
6378
6380
6381 PageSetLSN(page, recptr);
6382 }
6383
6385
6387
6388 if (HeapTupleHasExternal(&tp))
6389 {
6390 Assert(!IsToastRelation(relation));
6391 heap_toast_delete(relation, &tp, true);
6392 }
6393
6394 /*
6395 * Never need to mark tuple for invalidation, since catalogs don't support
6396 * speculative insertion
6397 */
6398
6399 /* Now we can release the buffer */
6400 ReleaseBuffer(buffer);
6401
6402 /* count deletion, as we counted the insertion too */
6403 pgstat_count_heap_delete(relation);
6404}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3063
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:874
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:470
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:332
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:455
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:269
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:379
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:417
PageData * Page
Definition bufpage.h:81
#define PageSetPrunable(page, xid)
Definition bufpage.h:479
#define Assert(condition)
Definition c.h:945
uint32 TransactionId
Definition c.h:738
bool IsToastRelation(Relation relation)
Definition catalog.c:206
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition heapam.c:2809
#define XLOG_HEAP_DELETE
Definition heapam_xlog.h:34
#define SizeOfHeapDelete
#define XLH_DELETE_IS_SUPER
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
#define HEAP_XMAX_BITS
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
#define HEAP_MOVED
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
static int fb(int x)
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationNeedsWAL(relation)
Definition rel.h:637
TransactionId TransactionXmin
Definition snapmgr.c:159
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
TransactionId t_xmin
union HeapTupleHeaderData::@51 t_choice
ItemPointerData t_ctid
HeapTupleFields t_heap
Form_pg_class rd_rel
Definition rel.h:111
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition xact.c:456
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:479
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:369
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:246
void XLogBeginInsert(void)
Definition xloginsert.c:153
#define REGBUF_STANDARD
Definition xloginsert.h:35

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, fb(), xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)
extern

Definition at line 1164 of file heapam.c.

1168{
1169 HeapScanDesc scan;
1170
1171 /*
1172 * increment relation ref count while scanning relation
1173 *
1174 * This is just to make really sure the relcache entry won't go away while
1175 * the scan has a pointer to it. Caller should be holding the rel open
1176 * anyway, so this is redundant in all normal scenarios...
1177 */
1179
1180 /*
1181 * allocate and initialize scan descriptor
1182 */
1183 if (flags & SO_TYPE_BITMAPSCAN)
1184 {
1186
1187 /*
1188 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1189 * does not have, so no special initializations required here.
1190 */
1191 scan = (HeapScanDesc) bscan;
1192 }
1193 else
1195
1196 scan->rs_base.rs_rd = relation;
1197 scan->rs_base.rs_snapshot = snapshot;
1198 scan->rs_base.rs_nkeys = nkeys;
1199 scan->rs_base.rs_flags = flags;
1200 scan->rs_base.rs_parallel = parallel_scan;
1201 scan->rs_strategy = NULL; /* set in initscan */
1202 scan->rs_cbuf = InvalidBuffer;
1203
1204 /*
1205 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1206 */
1207 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1209
1210 /* Check that a historic snapshot is not used for non-catalog tables */
1211 if (snapshot &&
1212 IsHistoricMVCCSnapshot(snapshot) &&
1214 {
1215 ereport(ERROR,
1217 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1218 RelationGetRelationName(relation))));
1219 }
1220
1221 /*
1222 * For seqscan and sample scans in a serializable transaction, acquire a
1223 * predicate lock on the entire relation. This is required not only to
1224 * lock all the matching tuples, but also to conflict with new insertions
1225 * into the table. In an indexscan, we take page locks on the index pages
1226 * covering the range specified in the scan qual, but in a heap scan there
1227 * is nothing more fine-grained to lock. A bitmap scan is a different
1228 * story, there we have already scanned the index and locked the index
1229 * pages covering the predicate. But in that case we still have to lock
1230 * any matching heap tuples. For sample scan we could optimize the locking
1231 * to be at least page-level granularity, but we'd need to add per-tuple
1232 * locking for that.
1233 */
1235 {
1236 /*
1237 * Ensure a missing snapshot is noticed reliably, even if the
1238 * isolation mode means predicate locking isn't performed (and
1239 * therefore the snapshot isn't used here).
1240 */
1241 Assert(snapshot);
1242 PredicateLockRelation(relation, snapshot);
1243 }
1244
1245 /* we only need to set this up once */
1246 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1247
1248 /*
1249 * Allocate memory to keep track of page allocation for parallel workers
1250 * when doing a parallel scan.
1251 */
1252 if (parallel_scan != NULL)
1254 else
1256
1257 /*
1258 * we do this here instead of in initscan() because heap_rescan also calls
1259 * initscan() and we don't want to allocate memory again
1260 */
1261 if (nkeys > 0)
1262 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1263 else
1264 scan->rs_base.rs_key = NULL;
1265
1266 initscan(scan, key, false);
1267
1268 scan->rs_read_stream = NULL;
1269
1270 /*
1271 * Set up a read stream for sequential scans and TID range scans. This
1272 * should be done after initscan() because initscan() allocates the
1273 * BufferAccessStrategy object passed to the read stream API.
1274 */
1275 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1277 {
1279
1280 if (scan->rs_base.rs_parallel)
1282 else
1284
1285 /* ---
1286 * It is safe to use batchmode as the only locks taken by `cb`
1287 * are never taken while waiting for IO:
1288 * - SyncScanLock is used in the non-parallel case
1289 * - in the parallel case, only spinlocks and atomics are used
1290 * ---
1291 */
1294 scan->rs_strategy,
1295 scan->rs_base.rs_rd,
1297 cb,
1298 scan,
1299 0);
1300 }
1301 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1302 {
1305 scan->rs_strategy,
1306 scan->rs_base.rs_rd,
1309 scan,
1310 sizeof(TBMIterateResult));
1311 }
1312
1313 scan->rs_vmbuffer = InvalidBuffer;
1314
1315 return (TableScanDesc) scan;
1316}
int errcode(int sqlerrcode)
Definition elog.c:874
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:252
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:292
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition heapam.c:317
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition heapam.c:357
struct HeapScanDescData * HeapScanDesc
Definition heapam.h:108
static char * errmsg
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2585
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition read_stream.h:77
#define READ_STREAM_DEFAULT
Definition read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition read_stream.h:36
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
void RelationIncrementReferenceCount(Relation rel)
Definition relcache.c:2176
@ MAIN_FORKNUM
Definition relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition snapmgr.h:67
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:59
Buffer rs_vmbuffer
Definition heapam.h:101
BufferAccessStrategy rs_strategy
Definition heapam.h:73
Buffer rs_cbuf
Definition heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition heapam.h:95
HeapTupleData rs_ctup
Definition heapam.h:75
ReadStream * rs_read_stream
Definition heapam.h:78
TableScanDescData rs_base
Definition heapam.h:58
Relation rs_rd
Definition relscan.h:35
uint32 rs_flags
Definition relscan.h:63
struct ScanKeyData * rs_key
Definition relscan.h:38
struct SnapshotData * rs_snapshot
Definition relscan.h:36
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:65
@ SO_TYPE_TIDRANGESCAN
Definition tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition tableam.h:51
@ SO_TYPE_SEQSCAN
Definition tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition tableam.h:50

References Assert, bitmapheap_stream_read_next(), ereport, errcode(), errmsg, ERROR, fb(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)
extern

Definition at line 2854 of file heapam.c.

2857{
2858 TM_Result result;
2860 ItemId lp;
2861 HeapTupleData tp;
2862 Page page;
2863 BlockNumber block;
2864 Buffer buffer;
2865 Buffer vmbuffer = InvalidBuffer;
2866 TransactionId new_xmax;
2869 bool have_tuple_lock = false;
2870 bool iscombo;
2871 bool all_visible_cleared = false;
2872 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2873 bool old_key_copied = false;
2874
2876
2877 AssertHasSnapshotForToast(relation);
2878
2879 /*
2880 * Forbid this during a parallel operation, lest it allocate a combo CID.
2881 * Other workers might need that combo CID for visibility checks, and we
2882 * have no provision for broadcasting it to them.
2883 */
2884 if (IsInParallelMode())
2885 ereport(ERROR,
2887 errmsg("cannot delete tuples during a parallel operation")));
2888
2889 block = ItemPointerGetBlockNumber(tid);
2890 buffer = ReadBuffer(relation, block);
2891 page = BufferGetPage(buffer);
2892
2893 /*
2894 * Before locking the buffer, pin the visibility map page if it appears to
2895 * be necessary. Since we haven't got the lock yet, someone else might be
2896 * in the middle of changing this, so we'll need to recheck after we have
2897 * the lock.
2898 */
2899 if (PageIsAllVisible(page))
2900 visibilitymap_pin(relation, block, &vmbuffer);
2901
2903
2906
2907 tp.t_tableOid = RelationGetRelid(relation);
2908 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2909 tp.t_len = ItemIdGetLength(lp);
2910 tp.t_self = *tid;
2911
2912l1:
2913
2914 /*
2915 * If we didn't pin the visibility map page and the page has become all
2916 * visible while we were busy locking the buffer, we'll have to unlock and
2917 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2918 * unfortunate, but hopefully shouldn't happen often.
2919 */
2920 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2921 {
2923 visibilitymap_pin(relation, block, &vmbuffer);
2925 }
2926
2927 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2928
2929 if (result == TM_Invisible)
2930 {
2931 UnlockReleaseBuffer(buffer);
2932 ereport(ERROR,
2934 errmsg("attempted to delete invisible tuple")));
2935 }
2936 else if (result == TM_BeingModified && wait)
2937 {
2940
2941 /* must copy state data before unlocking buffer */
2944
2945 /*
2946 * Sleep until concurrent transaction ends -- except when there's a
2947 * single locker and it's our own transaction. Note we don't care
2948 * which lock mode the locker has, because we need the strongest one.
2949 *
2950 * Before sleeping, we need to acquire tuple lock to establish our
2951 * priority for the tuple (see heap_lock_tuple). LockTuple will
2952 * release us when we are next-in-line for the tuple.
2953 *
2954 * If we are forced to "start over" below, we keep the tuple lock;
2955 * this arranges that we stay at the head of the line while rechecking
2956 * tuple state.
2957 */
2959 {
2960 bool current_is_member = false;
2961
2964 {
2966
2967 /*
2968 * Acquire the lock, if necessary (but skip it when we're
2969 * requesting a lock and already have one; avoids deadlock).
2970 */
2971 if (!current_is_member)
2974
2975 /* wait for multixact */
2977 relation, &(tp.t_self), XLTW_Delete,
2978 NULL);
2980
2981 /*
2982 * If xwait had just locked the tuple then some other xact
2983 * could update this tuple before we get to this point. Check
2984 * for xmax change, and start over if so.
2985 *
2986 * We also must start over if we didn't pin the VM page, and
2987 * the page has become all visible.
2988 */
2989 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2992 xwait))
2993 goto l1;
2994 }
2995
2996 /*
2997 * You might think the multixact is necessarily done here, but not
2998 * so: it could have surviving members, namely our own xact or
2999 * other subxacts of this backend. It is legal for us to delete
3000 * the tuple in either case, however (the latter case is
3001 * essentially a situation of upgrading our former shared lock to
3002 * exclusive). We don't bother changing the on-disk hint bits
3003 * since we are about to overwrite the xmax altogether.
3004 */
3005 }
3007 {
3008 /*
3009 * Wait for regular transaction to end; but first, acquire tuple
3010 * lock.
3011 */
3015 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
3017
3018 /*
3019 * xwait is done, but if xwait had just locked the tuple then some
3020 * other xact could update this tuple before we get to this point.
3021 * Check for xmax change, and start over if so.
3022 *
3023 * We also must start over if we didn't pin the VM page, and the
3024 * page has become all visible.
3025 */
3026 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
3029 xwait))
3030 goto l1;
3031
3032 /* Otherwise check if it committed or aborted */
3033 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
3034 }
3035
3036 /*
3037 * We may overwrite if previous xmax aborted, or if it committed but
3038 * only locked the tuple without updating it.
3039 */
3040 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3043 result = TM_Ok;
3044 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
3045 result = TM_Updated;
3046 else
3047 result = TM_Deleted;
3048 }
3049
3050 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3051 if (result != TM_Ok)
3052 {
3053 Assert(result == TM_SelfModified ||
3054 result == TM_Updated ||
3055 result == TM_Deleted ||
3056 result == TM_BeingModified);
3058 Assert(result != TM_Updated ||
3060 }
3061
3062 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3063 {
3064 /* Perform additional check for transaction-snapshot mode RI updates */
3065 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3066 result = TM_Updated;
3067 }
3068
3069 if (result != TM_Ok)
3070 {
3071 tmfd->ctid = tp.t_data->t_ctid;
3073 if (result == TM_SelfModified)
3075 else
3076 tmfd->cmax = InvalidCommandId;
3077 UnlockReleaseBuffer(buffer);
3078 if (have_tuple_lock)
3080 if (vmbuffer != InvalidBuffer)
3081 ReleaseBuffer(vmbuffer);
3082 return result;
3083 }
3084
3085 /*
3086 * We're about to do the actual delete -- check for conflict first, to
3087 * avoid possibly having to roll back work we've just done.
3088 *
3089 * This is safe without a recheck as long as there is no possibility of
3090 * another process scanning the page between this check and the delete
3091 * being visible to the scan (i.e., an exclusive buffer content lock is
3092 * continuously held from this point until the tuple delete is visible).
3093 */
3095
3096 /* replace cid with a combo CID if necessary */
3098
3099 /*
3100 * Compute replica identity tuple before entering the critical section so
3101 * we don't PANIC upon a memory allocation failure.
3102 */
3103 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3104
3105 /*
3106 * If this is the first possibly-multixact-able operation in the current
3107 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3108 * certain that the transaction will never become a member of any older
3109 * MultiXactIds than that. (We have to do this even if we end up just
3110 * using our own TransactionId below, since some other backend could
3111 * incorporate our XID into a MultiXact immediately afterwards.)
3112 */
3114
3117 xid, LockTupleExclusive, true,
3118 &new_xmax, &new_infomask, &new_infomask2);
3119
3121
3122 /*
3123 * If this transaction commits, the tuple will become DEAD sooner or
3124 * later. Set flag that this page is a candidate for pruning once our xid
3125 * falls below the OldestXmin horizon. If the transaction finally aborts,
3126 * the subsequent page pruning will be a no-op and the hint will be
3127 * cleared.
3128 */
3129 PageSetPrunable(page, xid);
3130
3131 if (PageIsAllVisible(page))
3132 {
3133 all_visible_cleared = true;
3134 PageClearAllVisible(page);
3135 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3136 vmbuffer, VISIBILITYMAP_VALID_BITS);
3137 }
3138
3139 /* store transaction information of xact deleting the tuple */
3145 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3147 /* Make sure there is no forward chain link in t_ctid */
3148 tp.t_data->t_ctid = tp.t_self;
3149
3150 /* Signal that this is actually a move into another partition */
3151 if (changingPart)
3153
3154 MarkBufferDirty(buffer);
3155
3156 /*
3157 * XLOG stuff
3158 *
3159 * NB: heap_abort_speculative() uses the same xlog record and replay
3160 * routines.
3161 */
3162 if (RelationNeedsWAL(relation))
3163 {
3167
3168 /*
3169 * For logical decode we need combo CIDs to properly decode the
3170 * catalog
3171 */
3173 log_heap_new_cid(relation, &tp);
3174
3175 xlrec.flags = 0;
3178 if (changingPart)
3180 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
3181 tp.t_data->t_infomask2);
3183 xlrec.xmax = new_xmax;
3184
3185 if (old_key_tuple != NULL)
3186 {
3187 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3189 else
3191 }
3192
3195
3197
3198 /*
3199 * Log replica identity of the deleted tuple if there is one
3200 */
3201 if (old_key_tuple != NULL)
3202 {
3203 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3204 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3205 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3206
3208 XLogRegisterData((char *) old_key_tuple->t_data
3210 old_key_tuple->t_len
3212 }
3213
3214 /* filtering by origin on a row level is much more efficient */
3216
3218
3219 PageSetLSN(page, recptr);
3220 }
3221
3223
3225
3226 if (vmbuffer != InvalidBuffer)
3227 ReleaseBuffer(vmbuffer);
3228
3229 /*
3230 * If the tuple has toasted out-of-line attributes, we need to delete
3231 * those items too. We have to do this before releasing the buffer
3232 * because we need to look at the contents of the tuple, but it's OK to
3233 * release the content lock on the buffer first.
3234 */
3235 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3236 relation->rd_rel->relkind != RELKIND_MATVIEW)
3237 {
3238 /* toast table entries should never be recursively toasted */
3240 }
3241 else if (HeapTupleHasExternal(&tp))
3242 heap_toast_delete(relation, &tp, false);
3243
3244 /*
3245 * Mark tuple for invalidation from system caches at next command
3246 * boundary. We have to do this before releasing the buffer because we
3247 * need to look at the contents of the tuple.
3248 */
3249 CacheInvalidateHeapTuple(relation, &tp, NULL);
3250
3251 /* Now we can release the buffer */
3252 ReleaseBuffer(buffer);
3253
3254 /*
3255 * Release the lmgr tuple lock, if we had it.
3256 */
3257 if (have_tuple_lock)
3259
3260 pgstat_count_heap_delete(relation);
3261
3264
3265 return TM_Ok;
3266}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5522
static void PageClearAllVisible(Page page)
Definition bufpage.h:465
#define InvalidCommandId
Definition c.h:755
TransactionId MultiXactId
Definition c.h:748
uint16_t uint16
Definition c.h:617
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition heapam.c:7695
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition heapam.c:9160
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition heapam.c:5409
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition heapam.c:5360
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition heapam.c:9241
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition heapam.c:7873
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition heapam.c:2831
#define UnlockTupleTuplock(rel, tup, mode)
Definition heapam.c:169
static void AssertHasSnapshotForToast(Relation rel)
Definition heapam.c:225
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition heapam.c:2063
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
#define XLH_DELETE_ALL_VISIBLE_CLEARED
#define SizeOfHeapHeader
#define XLH_DELETE_IS_PARTITION_MOVE
#define XLH_DELETE_CONTAINS_OLD_TUPLE
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1384
#define SizeofHeapTupleHeader
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_IS_MULTI
#define HEAP_XMAX_INVALID
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition lmgr.c:663
@ XLTW_Delete
Definition lmgr.h:28
@ LockWaitBlock
Definition lockoptions.h:40
@ LockTupleExclusive
Definition lockoptions.h:59
void MultiXactIdSetOldestMember(void)
Definition multixact.c:585
@ MultiXactStatusUpdate
Definition multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4345
#define InvalidSnapshot
Definition snapshot.h:119
TransactionId xmax
Definition tableam.h:150
CommandId cmax
Definition tableam.h:151
ItemPointerData ctid
Definition tableam.h:149
TM_Result
Definition tableam.h:73
@ TM_Ok
Definition tableam.h:78
@ TM_BeingModified
Definition tableam.h:100
@ TM_Deleted
Definition tableam.h:93
@ TM_Updated
Definition tableam.h:90
@ TM_SelfModified
Definition tableam.h:84
@ TM_Invisible
Definition tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:943
bool IsInParallelMode(void)
Definition xact.c:1091
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:461

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg, ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)
extern

Definition at line 1378 of file heapam.c.

1379{
1381
1382 /* Note: no locking manipulations needed */
1383
1384 /*
1385 * unpin scan buffers
1386 */
1387 if (BufferIsValid(scan->rs_cbuf))
1388 ReleaseBuffer(scan->rs_cbuf);
1389
1390 if (BufferIsValid(scan->rs_vmbuffer))
1392
1393 /*
1394 * Must free the read stream before freeing the BufferAccessStrategy.
1395 */
1396 if (scan->rs_read_stream)
1398
1399 /*
1400 * decrement relation reference count and free scan descriptor storage
1401 */
1403
1404 if (scan->rs_base.rs_key)
1405 pfree(scan->rs_base.rs_key);
1406
1407 if (scan->rs_strategy != NULL)
1409
1410 if (scan->rs_parallelworkerdata != NULL)
1412
1413 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1415
1416 pfree(scan);
1417}
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:421
void read_stream_end(ReadStream *stream)
void RelationDecrementReferenceCount(Relation rel)
Definition relcache.c:2189
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition tableam.h:65

References BufferIsValid(), fb(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 518 of file heapam.h.

519{
520 HeapTupleHeaderSetXmax(tuple, frz->xmax);
521
522 if (frz->frzflags & XLH_FREEZE_XVAC)
524
525 if (frz->frzflags & XLH_INVALID_XVAC)
527
528 tuple->t_infomask = frz->t_infomask;
529 tuple->t_infomask2 = frz->t_infomask2;
530}
#define XLH_INVALID_XVAC
#define XLH_FREEZE_XVAC
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
#define FrozenTransactionId
Definition transam.h:33

References fb(), FrozenTransactionId, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)
extern

Definition at line 1669 of file heapam.c.

1674{
1675 ItemPointer tid = &(tuple->t_self);
1676 ItemId lp;
1677 Buffer buffer;
1678 Page page;
1679 OffsetNumber offnum;
1680 bool valid;
1681
1682 /*
1683 * Fetch and pin the appropriate page of the relation.
1684 */
1685 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1686
1687 /*
1688 * Need share lock on buffer to examine tuple commit status.
1689 */
1691 page = BufferGetPage(buffer);
1692
1693 /*
1694 * We'd better check for out-of-range offnum in case of VACUUM since the
1695 * TID was obtained.
1696 */
1697 offnum = ItemPointerGetOffsetNumber(tid);
1699 {
1701 ReleaseBuffer(buffer);
1703 tuple->t_data = NULL;
1704 return false;
1705 }
1706
1707 /*
1708 * get the item line pointer corresponding to the requested tid
1709 */
1710 lp = PageGetItemId(page, offnum);
1711
1712 /*
1713 * Must check for deleted tuple.
1714 */
1715 if (!ItemIdIsNormal(lp))
1716 {
1718 ReleaseBuffer(buffer);
1720 tuple->t_data = NULL;
1721 return false;
1722 }
1723
1724 /*
1725 * fill in *tuple fields
1726 */
1727 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1728 tuple->t_len = ItemIdGetLength(lp);
1729 tuple->t_tableOid = RelationGetRelid(relation);
1730
1731 /*
1732 * check tuple visibility, then release lock
1733 */
1734 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1735
1736 if (valid)
1737 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1739
1740 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1741
1743
1744 if (valid)
1745 {
1746 /*
1747 * All checks passed, so return the tuple as valid. Caller is now
1748 * responsible for releasing the buffer.
1749 */
1750 *userbuf = buffer;
1751
1752 return true;
1753 }
1754
1755 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1756 if (keep_buf)
1757 *userbuf = buffer;
1758 else
1759 {
1760 ReleaseBuffer(buffer);
1762 tuple->t_data = NULL;
1763 }
1764
1765 return false;
1766}
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:397
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition heapam.c:9345
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
uint16 OffsetNumber
Definition off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2630

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), fb(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6182 of file heapam.c.

6183{
6184 Buffer buffer;
6185 Page page;
6186 OffsetNumber offnum;
6187 ItemId lp;
6188 HeapTupleHeader htup;
6189
6190 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6192 page = BufferGetPage(buffer);
6193
6194 offnum = ItemPointerGetOffsetNumber(tid);
6196 elog(ERROR, "offnum out of range");
6197 lp = PageGetItemId(page, offnum);
6198 if (!ItemIdIsNormal(lp))
6199 elog(ERROR, "invalid lp");
6200
6201 htup = (HeapTupleHeader) PageGetItem(page, lp);
6202
6203 /* NO EREPORT(ERROR) from here till changes are logged */
6205
6207
6208 MarkBufferDirty(buffer);
6209
6210 /*
6211 * Replace the speculative insertion token with a real t_ctid, pointing to
6212 * itself like it does on regular tuples.
6213 */
6214 htup->t_ctid = *tid;
6215
6216 /* XLOG stuff */
6217 if (RelationNeedsWAL(relation))
6218 {
6221
6223
6225
6226 /* We want the same filtering on this as on a plain insert */
6228
6231
6233
6234 PageSetLSN(page, recptr);
6235 }
6236
6238
6239 UnlockReleaseBuffer(buffer);
6240}
#define SizeOfHeapConfirm
#define XLOG_HEAP_CONFIRM
Definition heapam_xlog.h:38
OffsetNumber offnum

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, fb(), HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7479 of file heapam.c.

7480{
7481 Page page = BufferGetPage(buffer);
7482
7483 for (int i = 0; i < ntuples; i++)
7484 {
7485 HeapTupleFreeze *frz = tuples + i;
7486 ItemId itemid = PageGetItemId(page, frz->offset);
7487 HeapTupleHeader htup;
7488
7489 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7491 }
7492}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition heapam.h:518
int i
Definition isn.c:77

References BufferGetPage(), fb(), heap_execute_freeze_tuple(), i, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)
extern

Definition at line 7501 of file heapam.c.

7504{
7506 bool do_freeze;
7507 bool totally_frozen;
7508 struct VacuumCutoffs cutoffs;
7509 HeapPageFreeze pagefrz;
7510
7511 cutoffs.relfrozenxid = relfrozenxid;
7512 cutoffs.relminmxid = relminmxid;
7513 cutoffs.OldestXmin = FreezeLimit;
7514 cutoffs.OldestMxact = MultiXactCutoff;
7515 cutoffs.FreezeLimit = FreezeLimit;
7516 cutoffs.MultiXactCutoff = MultiXactCutoff;
7517
7518 pagefrz.freeze_required = true;
7519 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7520 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7521 pagefrz.FreezePageConflictXid = InvalidTransactionId;
7522 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7523 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7524
7525 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7526 &pagefrz, &frz, &totally_frozen);
7527
7528 /*
7529 * Note that because this is not a WAL-logged operation, we don't need to
7530 * fill in the offset in the freeze record.
7531 */
7532
7533 if (do_freeze)
7535 return do_freeze;
7536}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7146
bool freeze_required
Definition heapam.h:194
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290

References fb(), VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)
extern

Definition at line 1941 of file heapam.c.

1943{
1944 Relation relation = sscan->rs_rd;
1945 Snapshot snapshot = sscan->rs_snapshot;
1946 ItemPointerData ctid;
1948
1949 /*
1950 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1951 * Assume that t_ctid links are valid however - there shouldn't be invalid
1952 * ones in the table.
1953 */
1955
1956 /*
1957 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1958 * need to examine, and *tid is the TID we will return if ctid turns out
1959 * to be bogus.
1960 *
1961 * Note that we will loop until we reach the end of the t_ctid chain.
1962 * Depending on the snapshot passed, there might be at most one visible
1963 * version of the row, but we don't try to optimize for that.
1964 */
1965 ctid = *tid;
1966 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1967 for (;;)
1968 {
1969 Buffer buffer;
1970 Page page;
1971 OffsetNumber offnum;
1972 ItemId lp;
1973 HeapTupleData tp;
1974 bool valid;
1975
1976 /*
1977 * Read, pin, and lock the page.
1978 */
1979 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1981 page = BufferGetPage(buffer);
1982
1983 /*
1984 * Check for bogus item number. This is not treated as an error
1985 * condition because it can happen while following a t_ctid link. We
1986 * just assume that the prior tid is OK and return it unchanged.
1987 */
1988 offnum = ItemPointerGetOffsetNumber(&ctid);
1990 {
1991 UnlockReleaseBuffer(buffer);
1992 break;
1993 }
1994 lp = PageGetItemId(page, offnum);
1995 if (!ItemIdIsNormal(lp))
1996 {
1997 UnlockReleaseBuffer(buffer);
1998 break;
1999 }
2000
2001 /* OK to access the tuple */
2002 tp.t_self = ctid;
2003 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2004 tp.t_len = ItemIdGetLength(lp);
2005 tp.t_tableOid = RelationGetRelid(relation);
2006
2007 /*
2008 * After following a t_ctid link, we might arrive at an unrelated
2009 * tuple. Check for XMIN match.
2010 */
2013 {
2014 UnlockReleaseBuffer(buffer);
2015 break;
2016 }
2017
2018 /*
2019 * Check tuple visibility; if visible, set it as the new result
2020 * candidate.
2021 */
2022 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
2023 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
2024 if (valid)
2025 *tid = ctid;
2026
2027 /*
2028 * If there's a valid t_ctid link, follow it, else we're done.
2029 */
2030 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2034 {
2035 UnlockReleaseBuffer(buffer);
2036 break;
2037 }
2038
2039 ctid = tp.t_data->t_ctid;
2041 UnlockReleaseBuffer(buffer);
2042 } /* end of loop */
2043}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), fb(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)
extern

Definition at line 1890 of file pruneheap.c.

1891{
1892 OffsetNumber offnum,
1893 maxoff;
1894
1897
1898 maxoff = PageGetMaxOffsetNumber(page);
1899 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1900 {
1901 ItemId lp = PageGetItemId(page, offnum);
1902 HeapTupleHeader htup;
1905
1906 /* skip unused and dead items */
1907 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1908 continue;
1909
1910 if (ItemIdIsNormal(lp))
1911 {
1912 htup = (HeapTupleHeader) PageGetItem(page, lp);
1913
1914 /*
1915 * Check if this tuple is part of a HOT-chain rooted at some other
1916 * tuple. If so, skip it for now; we'll process it when we find
1917 * its root.
1918 */
1919 if (HeapTupleHeaderIsHeapOnly(htup))
1920 continue;
1921
1922 /*
1923 * This is either a plain tuple or the root of a HOT-chain.
1924 * Remember it in the mapping.
1925 */
1926 root_offsets[offnum - 1] = offnum;
1927
1928 /* If it's not the start of a HOT-chain, we're done with it */
1929 if (!HeapTupleHeaderIsHotUpdated(htup))
1930 continue;
1931
1932 /* Set up to scan the HOT-chain */
1935 }
1936 else
1937 {
1938 /* Must be a redirect item. We do not set its root_offsets entry */
1940 /* Set up to scan the HOT-chain */
1943 }
1944
1945 /*
1946 * Now follow the HOT-chain and collect other tuples in the chain.
1947 *
1948 * Note: Even though this is a nested loop, the complexity of the
1949 * function is O(N) because a tuple in the page should be visited not
1950 * more than twice, once in the outer loop and once in HOT-chain
1951 * chases.
1952 */
1953 for (;;)
1954 {
1955 /* Sanity check (pure paranoia) */
1956 if (offnum < FirstOffsetNumber)
1957 break;
1958
1959 /*
1960 * An offset past the end of page's line pointer array is possible
1961 * when the array was truncated
1962 */
1963 if (offnum > maxoff)
1964 break;
1965
1966 lp = PageGetItemId(page, nextoffnum);
1967
1968 /* Check for broken chains */
1969 if (!ItemIdIsNormal(lp))
1970 break;
1971
1972 htup = (HeapTupleHeader) PageGetItem(page, lp);
1973
1976 break;
1977
1978 /* Remember the root line pointer for this item */
1979 root_offsets[nextoffnum - 1] = offnum;
1980
1981 /* Advance to next chain member, if any */
1982 if (!HeapTupleHeaderIsHotUpdated(htup))
1983 break;
1984
1985 /* HOT implies it can't have moved to different partition */
1987
1990 }
1991 }
1992}
#define MemSet(start, val, len)
Definition c.h:1109
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27

References Assert, fb(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)
extern

Definition at line 1420 of file heapam.c.

1421{
1423
1424 /*
1425 * This is still widely used directly, without going through table AM, so
1426 * add a safety check. It's possible we should, at a later point,
1427 * downgrade this to an assert. The reason for checking the AM routine,
1428 * rather than the AM oid, is that this allows to write regression tests
1429 * that create another AM reusing the heap handler.
1430 */
1431 if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
1432 ereport(ERROR,
1434 errmsg_internal("only heap AM is supported")));
1435
1436 /* Note: no locking manipulations needed */
1437
1439 heapgettup_pagemode(scan, direction,
1440 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1441 else
1442 heapgettup(scan, direction,
1443 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1444
1445 if (scan->rs_ctup.t_data == NULL)
1446 return NULL;
1447
1448 /*
1449 * if we get here it means we have a new current scan tuple, so point to
1450 * the proper return buffer and return the tuple.
1451 */
1452
1454
1455 return &scan->rs_ctup;
1456}
#define unlikely(x)
Definition c.h:432
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:960
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:1070
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition pgstat.h:698
@ SO_ALLOW_PAGEMODE
Definition tableam.h:62

References ereport, errcode(), errmsg_internal(), ERROR, fb(), GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_repack(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1459 of file heapam.c.

1460{
1462
1463 /* Note: no locking manipulations needed */
1464
1465 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1466 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1467 else
1468 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1469
1470 if (scan->rs_ctup.t_data == NULL)
1471 {
1472 ExecClearTuple(slot);
1473 return false;
1474 }
1475
1476 /*
1477 * if we get here it means we have a new current scan tuple, so point to
1478 * the proper return buffer and return the tuple.
1479 */
1480
1482
1483 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1484 scan->rs_cbuf);
1485 return true;
1486}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:476

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1562 of file heapam.c.

1564{
1566 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1567 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1568
1569 /* Note: no locking manipulations needed */
1570 for (;;)
1571 {
1572 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1573 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1574 else
1575 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1576
1577 if (scan->rs_ctup.t_data == NULL)
1578 {
1579 ExecClearTuple(slot);
1580 return false;
1581 }
1582
1583 /*
1584 * heap_set_tidrange will have used heap_setscanlimits to limit the
1585 * range of pages we scan to only ones that can contain the TID range
1586 * we're scanning for. Here we must filter out any tuples from these
1587 * pages that are outside of that range.
1588 */
1589 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1590 {
1591 ExecClearTuple(slot);
1592
1593 /*
1594 * When scanning backwards, the TIDs will be in descending order.
1595 * Future tuples in this direction will be lower still, so we can
1596 * just return false to indicate there will be no more tuples.
1597 */
1598 if (ScanDirectionIsBackward(direction))
1599 return false;
1600
1601 continue;
1602 }
1603
1604 /*
1605 * Likewise for the final page, we must filter out TIDs greater than
1606 * maxtid.
1607 */
1608 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1609 {
1610 ExecClearTuple(slot);
1611
1612 /*
1613 * When scanning forward, the TIDs will be in ascending order.
1614 * Future tuples in this direction will be higher still, so we can
1615 * just return false to indicate there will be no more tuples.
1616 */
1617 if (ScanDirectionIsForward(direction))
1618 return false;
1619 continue;
1620 }
1621
1622 break;
1623 }
1624
1625 /*
1626 * if we get here it means we have a new current scan tuple, so point to
1627 * the proper return buffer and return the tuple.
1628 */
1630
1631 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1632 return true;
1633}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition sdir.h:50

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)
extern

Definition at line 1789 of file heapam.c.

1792{
1793 Page page = BufferGetPage(buffer);
1795 BlockNumber blkno;
1796 OffsetNumber offnum;
1797 bool at_chain_start;
1798 bool valid;
1799 bool skip;
1800 GlobalVisState *vistest = NULL;
1801
1802 /* If this is not the first call, previous call returned a (live!) tuple */
1803 if (all_dead)
1805
1806 blkno = ItemPointerGetBlockNumber(tid);
1807 offnum = ItemPointerGetOffsetNumber(tid);
1809 skip = !first_call;
1810
1811 /* XXX: we should assert that a snapshot is pushed or registered */
1813 Assert(BufferGetBlockNumber(buffer) == blkno);
1814
1815 /* Scan through possible multiple members of HOT-chain */
1816 for (;;)
1817 {
1818 ItemId lp;
1819
1820 /* check for bogus TID */
1822 break;
1823
1824 lp = PageGetItemId(page, offnum);
1825
1826 /* check for unused, dead, or redirected items */
1827 if (!ItemIdIsNormal(lp))
1828 {
1829 /* We should only see a redirect at start of chain */
1831 {
1832 /* Follow the redirect */
1833 offnum = ItemIdGetRedirect(lp);
1834 at_chain_start = false;
1835 continue;
1836 }
1837 /* else must be end of chain */
1838 break;
1839 }
1840
1841 /*
1842 * Update heapTuple to point to the element of the HOT chain we're
1843 * currently investigating. Having t_self set correctly is important
1844 * because the SSI checks and the *Satisfies routine for historical
1845 * MVCC snapshots need the correct tid to decide about the visibility.
1846 */
1847 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1848 heapTuple->t_len = ItemIdGetLength(lp);
1849 heapTuple->t_tableOid = RelationGetRelid(relation);
1850 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1851
1852 /*
1853 * Shouldn't see a HEAP_ONLY tuple at chain start.
1854 */
1856 break;
1857
1858 /*
1859 * The xmin should match the previous xmax value, else chain is
1860 * broken.
1861 */
1865 break;
1866
1867 /*
1868 * When first_call is true (and thus, skip is initially false) we'll
1869 * return the first tuple we find. But on later passes, heapTuple
1870 * will initially be pointing to the tuple we returned last time.
1871 * Returning it again would be incorrect (and would loop forever), so
1872 * we skip it and return the next match we find.
1873 */
1874 if (!skip)
1875 {
1876 /* If it's visible per the snapshot, we must return it */
1877 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1879 buffer, snapshot);
1880
1881 if (valid)
1882 {
1883 ItemPointerSetOffsetNumber(tid, offnum);
1884 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1886 if (all_dead)
1887 *all_dead = false;
1888 return true;
1889 }
1890 }
1891 skip = false;
1892
1893 /*
1894 * If we can't see it, maybe no one else can either. At caller
1895 * request, check whether all chain members are dead to all
1896 * transactions.
1897 *
1898 * Note: if you change the criterion here for what is "dead", fix the
1899 * planner's get_actual_variable_range() function to match.
1900 */
1901 if (all_dead && *all_dead)
1902 {
1903 if (!vistest)
1904 vistest = GlobalVisTestFor(relation);
1905
1906 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1907 *all_dead = false;
1908 }
1909
1910 /*
1911 * Check to see if HOT chain continues past this tuple; if so fetch
1912 * the next offnum and loop around.
1913 */
1915 {
1916 Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
1917 blkno);
1918 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1919 at_chain_start = false;
1921 }
1922 else
1923 break; /* end of chain */
1924 }
1925
1926 return false;
1927}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition itemptr.h:158
static const struct exclude_list_item skip[]
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4114
TransactionId RecentXmin
Definition snapmgr.c:160

References Assert, BufferGetBlockNumber(), BufferGetPage(), fb(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)
extern

Definition at line 8218 of file heapam.c.

8219{
8220 /* Initial assumption is that earlier pruning took care of conflict */
8221 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8224 Page page = NULL;
8227#ifdef USE_PREFETCH
8230#endif
8232 int finalndeltids = 0,
8233 nblocksaccessed = 0;
8234
8235 /* State that's only used in bottom-up index deletion case */
8236 int nblocksfavorable = 0;
8237 int curtargetfreespace = delstate->bottomupfreespace,
8238 lastfreespace = 0,
8239 actualfreespace = 0;
8240 bool bottomup_final_block = false;
8241
8243
8244 /* Sort caller's deltids array by TID for further processing */
8246
8247 /*
8248 * Bottom-up case: resort deltids array in an order attuned to where the
8249 * greatest number of promising TIDs are to be found, and determine how
8250 * many blocks from the start of sorted array should be considered
8251 * favorable. This will also shrink the deltids array in order to
8252 * eliminate completely unfavorable blocks up front.
8253 */
8254 if (delstate->bottomup)
8256
8257#ifdef USE_PREFETCH
8258 /* Initialize prefetch state. */
8260 prefetch_state.next_item = 0;
8261 prefetch_state.ndeltids = delstate->ndeltids;
8262 prefetch_state.deltids = delstate->deltids;
8263
8264 /*
8265 * Determine the prefetch distance that we will attempt to maintain.
8266 *
8267 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8268 * sure that isn't a catalog relation before we call code that does
8269 * syscache lookups, to avoid risk of deadlock.
8270 */
8271 if (IsCatalogRelation(rel))
8273 else
8276
8277 /* Cap initial prefetch distance for bottom-up deletion caller */
8278 if (delstate->bottomup)
8279 {
8283 }
8284
8285 /* Start prefetching. */
8287#endif
8288
8289 /* Iterate over deltids, determine which to delete, check their horizon */
8290 Assert(delstate->ndeltids > 0);
8291 for (int i = 0; i < delstate->ndeltids; i++)
8292 {
8293 TM_IndexDelete *ideltid = &delstate->deltids[i];
8294 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8295 ItemPointer htid = &ideltid->tid;
8296 OffsetNumber offnum;
8297
8298 /*
8299 * Read buffer, and perform required extra steps each time a new block
8300 * is encountered. Avoid refetching if it's the same block as the one
8301 * from the last htid.
8302 */
8303 if (blkno == InvalidBlockNumber ||
8305 {
8306 /*
8307 * Consider giving up early for bottom-up index deletion caller
8308 * first. (Only prefetch next-next block afterwards, when it
8309 * becomes clear that we're at least going to access the next
8310 * block in line.)
8311 *
8312 * Sometimes the first block frees so much space for bottom-up
8313 * caller that the deletion process can end without accessing any
8314 * more blocks. It is usually necessary to access 2 or 3 blocks
8315 * per bottom-up deletion operation, though.
8316 */
8317 if (delstate->bottomup)
8318 {
8319 /*
8320 * We often allow caller to delete a few additional items
8321 * whose entries we reached after the point that space target
8322 * from caller was satisfied. The cost of accessing the page
8323 * was already paid at that point, so it made sense to finish
8324 * it off. When that happened, we finalize everything here
8325 * (by finishing off the whole bottom-up deletion operation
8326 * without needlessly paying the cost of accessing any more
8327 * blocks).
8328 */
8330 break;
8331
8332 /*
8333 * Give up when we didn't enable our caller to free any
8334 * additional space as a result of processing the page that we
8335 * just finished up with. This rule is the main way in which
8336 * we keep the cost of bottom-up deletion under control.
8337 */
8339 break;
8340 lastfreespace = actualfreespace; /* for next time */
8341
8342 /*
8343 * Deletion operation (which is bottom-up) will definitely
8344 * access the next block in line. Prepare for that now.
8345 *
8346 * Decay target free space so that we don't hang on for too
8347 * long with a marginal case. (Space target is only truly
8348 * helpful when it allows us to recognize that we don't need
8349 * to access more than 1 or 2 blocks to satisfy caller due to
8350 * agreeable workload characteristics.)
8351 *
8352 * We are a bit more patient when we encounter contiguous
8353 * blocks, though: these are treated as favorable blocks. The
8354 * decay process is only applied when the next block in line
8355 * is not a favorable/contiguous block. This is not an
8356 * exception to the general rule; we still insist on finding
8357 * at least one deletable item per block accessed. See
8358 * bottomup_nblocksfavorable() for full details of the theory
8359 * behind favorable blocks and heap block locality in general.
8360 *
8361 * Note: The first block in line is always treated as a
8362 * favorable block, so the earliest possible point that the
8363 * decay can be applied is just before we access the second
8364 * block in line. The Assert() verifies this for us.
8365 */
8367 if (nblocksfavorable > 0)
8369 else
8370 curtargetfreespace /= 2;
8371 }
8372
8373 /* release old buffer */
8374 if (BufferIsValid(buf))
8376
8378 buf = ReadBuffer(rel, blkno);
8380 Assert(!delstate->bottomup ||
8382
8383#ifdef USE_PREFETCH
8384
8385 /*
8386 * To maintain the prefetch distance, prefetch one more page for
8387 * each page we read.
8388 */
8390#endif
8391
8393
8394 page = BufferGetPage(buf);
8395 maxoff = PageGetMaxOffsetNumber(page);
8396 }
8397
8398 /*
8399 * In passing, detect index corruption involving an index page with a
8400 * TID that points to a location in the heap that couldn't possibly be
8401 * correct. We only do this with actual TIDs from caller's index page
8402 * (not items reached by traversing through a HOT chain).
8403 */
8405
8406 if (istatus->knowndeletable)
8407 Assert(!delstate->bottomup && !istatus->promising);
8408 else
8409 {
8410 ItemPointerData tmp = *htid;
8412
8413 /* Are any tuples from this HOT chain non-vacuumable? */
8415 &heapTuple, NULL, true))
8416 continue; /* can't delete entry */
8417
8418 /* Caller will delete, since whole HOT chain is vacuumable */
8419 istatus->knowndeletable = true;
8420
8421 /* Maintain index free space info for bottom-up deletion case */
8422 if (delstate->bottomup)
8423 {
8424 Assert(istatus->freespace > 0);
8425 actualfreespace += istatus->freespace;
8427 bottomup_final_block = true;
8428 }
8429 }
8430
8431 /*
8432 * Maintain snapshotConflictHorizon value for deletion operation as a
8433 * whole by advancing current value using heap tuple headers. This is
8434 * loosely based on the logic for pruning a HOT chain.
8435 */
8437 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8438 for (;;)
8439 {
8440 ItemId lp;
8441 HeapTupleHeader htup;
8442
8443 /* Sanity check (pure paranoia) */
8444 if (offnum < FirstOffsetNumber)
8445 break;
8446
8447 /*
8448 * An offset past the end of page's line pointer array is possible
8449 * when the array was truncated
8450 */
8451 if (offnum > maxoff)
8452 break;
8453
8454 lp = PageGetItemId(page, offnum);
8456 {
8457 offnum = ItemIdGetRedirect(lp);
8458 continue;
8459 }
8460
8461 /*
8462 * We'll often encounter LP_DEAD line pointers (especially with an
8463 * entry marked knowndeletable by our caller up front). No heap
8464 * tuple headers get examined for an htid that leads us to an
8465 * LP_DEAD item. This is okay because the earlier pruning
8466 * operation that made the line pointer LP_DEAD in the first place
8467 * must have considered the original tuple header as part of
8468 * generating its own snapshotConflictHorizon value.
8469 *
8470 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8471 * the same strategy that index vacuuming uses in all cases. Index
8472 * VACUUM WAL records don't even have a snapshotConflictHorizon
8473 * field of their own for this reason.
8474 */
8475 if (!ItemIdIsNormal(lp))
8476 break;
8477
8478 htup = (HeapTupleHeader) PageGetItem(page, lp);
8479
8480 /*
8481 * Check the tuple XMIN against prior XMAX, if any
8482 */
8485 break;
8486
8488 &snapshotConflictHorizon);
8489
8490 /*
8491 * If the tuple is not HOT-updated, then we are at the end of this
8492 * HOT-chain. No need to visit later tuples from the same update
8493 * chain (they get their own index entries) -- just move on to
8494 * next htid from index AM caller.
8495 */
8496 if (!HeapTupleHeaderIsHotUpdated(htup))
8497 break;
8498
8499 /* Advance to next HOT chain member */
8500 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8501 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8503 }
8504
8505 /* Enable further/final shrinking of deltids for caller */
8506 finalndeltids = i + 1;
8507 }
8508
8510
8511 /*
8512 * Shrink deltids array to exclude non-deletable entries at the end. This
8513 * is not just a minor optimization. Final deltids array size might be
8514 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8515 * ndeltids being zero in all cases with zero total deletable entries.
8516 */
8517 Assert(finalndeltids > 0 || delstate->bottomup);
8518 delstate->ndeltids = finalndeltids;
8519
8520 return snapshotConflictHorizon;
8521}
int maintenance_io_concurrency
Definition bufmgr.c:207
#define Min(x, y)
Definition c.h:1093
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition heapam.c:8775
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8073
#define BOTTOMUP_MAX_NBLOCKS
Definition heapam.c:189
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition heapam.c:8158
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition heapam.c:1789
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition heapam.c:8563
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition spccache.c:230

References Assert, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), fb(), FirstOffsetNumber, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), maintenance_io_concurrency, Min, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationData::rd_rel, ReadBuffer(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void arg 
)
extern

Definition at line 6451 of file heapam.c.

6454{
6455 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6456 TM_Result result;
6457 bool ret;
6458
6459#ifdef USE_ASSERT_CHECKING
6460 if (RelationGetRelid(relation) == RelationRelationId)
6462#endif
6463
6464 Assert(BufferIsValid(buffer));
6465
6466 /*
6467 * Register shared cache invals if necessary. Other sessions may finish
6468 * inplace updates of this tuple between this step and LockTuple(). Since
6469 * inplace updates don't change cache keys, that's harmless.
6470 *
6471 * While it's tempting to register invals only after confirming we can
6472 * return true, the following obstacle precludes reordering steps that
6473 * way. Registering invals might reach a CatalogCacheInitializeCache()
6474 * that locks "buffer". That would hang indefinitely if running after our
6475 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6476 */
6478
6479 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6481
6482 /*----------
6483 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6484 *
6485 * - wait unconditionally
6486 * - already locked tuple above, since inplace needs that unconditionally
6487 * - don't recheck header after wait: simpler to defer to next iteration
6488 * - don't try to continue even if the updater aborts: likewise
6489 * - no crosscheck
6490 */
6492 buffer);
6493
6494 if (result == TM_Invisible)
6495 {
6496 /* no known way this can happen */
6497 ereport(ERROR,
6499 errmsg_internal("attempted to overwrite invisible tuple")));
6500 }
6501 else if (result == TM_SelfModified)
6502 {
6503 /*
6504 * CREATE INDEX might reach this if an expression is silly enough to
6505 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6506 * statements might get here after a heap_update() of the same row, in
6507 * the absence of an intervening CommandCounterIncrement().
6508 */
6509 ereport(ERROR,
6511 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6512 }
6513 else if (result == TM_BeingModified)
6514 {
6517
6519 infomask = oldtup.t_data->t_infomask;
6520
6522 {
6525 int remain;
6526
6528 lockmode, NULL))
6529 {
6532 ret = false;
6534 relation, &oldtup.t_self, XLTW_Update,
6535 &remain);
6536 }
6537 else
6538 ret = true;
6539 }
6541 ret = true;
6543 ret = true;
6544 else
6545 {
6548 ret = false;
6549 XactLockTableWait(xwait, relation, &oldtup.t_self,
6550 XLTW_Update);
6551 }
6552 }
6553 else
6554 {
6555 ret = (result == TM_Ok);
6556 if (!ret)
6557 {
6560 }
6561 }
6562
6563 /*
6564 * GetCatalogSnapshot() relies on invalidation messages to know when to
6565 * take a new snapshot. COMMIT of xwait is responsible for sending the
6566 * invalidation. We're not acquiring heavyweight locks sufficient to
6567 * block if not yet sent, so we must take a new snapshot to ensure a later
6568 * attempt has a fair chance. While we don't need this if xwait aborted,
6569 * don't bother optimizing that.
6570 */
6571 if (!ret)
6572 {
6573 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6576 }
6577 return ret;
6578}
Datum arg
Definition elog.c:1322
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition inval.c:1593
void ForgetInplace_Inval(void)
Definition inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:562
@ XLTW_Update
Definition lmgr.h:27
#define InplaceUpdateTupleLock
Definition lockdefs.h:48
LockTupleMode
Definition lockoptions.h:51
@ LockTupleNoKeyExclusive
Definition lockoptions.h:57
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition xact.c:831

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg, errmsg_internal(), ERROR, fb(), ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)
extern

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)
extern

Definition at line 6589 of file heapam.c.

6592{
6593 HeapTupleHeader htup = oldtup->t_data;
6594 uint32 oldlen;
6595 uint32 newlen;
6596 char *dst;
6597 char *src;
6598 int nmsgs = 0;
6600 bool RelcacheInitFileInval = false;
6601
6602 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6603 oldlen = oldtup->t_len - htup->t_hoff;
6604 newlen = tuple->t_len - tuple->t_data->t_hoff;
6605 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6606 elog(ERROR, "wrong tuple length");
6607
6608 dst = (char *) htup + htup->t_hoff;
6609 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6610
6611 /* Like RecordTransactionCommit(), log only if needed */
6614 &RelcacheInitFileInval);
6615
6616 /*
6617 * Unlink relcache init files as needed. If unlinking, acquire
6618 * RelCacheInitLock until after associated invalidations. By doing this
6619 * in advance, if we checkpoint and then crash between inplace
6620 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6621 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6622 * neglect to PANIC on EIO.
6623 */
6625
6626 /*----------
6627 * NO EREPORT(ERROR) from here till changes are complete
6628 *
6629 * Our exclusive buffer lock won't stop a reader having already pinned and
6630 * checked visibility for this tuple. With the usual order of changes
6631 * (i.e. updating the buffer contents before WAL logging), a reader could
6632 * observe our not-yet-persistent update to relfrozenxid and update
6633 * datfrozenxid based on that. A crash in that moment could allow
6634 * datfrozenxid to overtake relfrozenxid:
6635 *
6636 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6637 * ["R" is a VACUUM tbl]
6638 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6639 * D: systable_getnext() returns pg_class tuple of tbl
6640 * R: memcpy() into pg_class tuple of tbl
6641 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6642 * [crash]
6643 * [recovery restores datfrozenxid w/o relfrozenxid]
6644 *
6645 * We avoid that by using a temporary copy of the buffer to hide our
6646 * change from other backends until the change has been WAL-logged. We
6647 * apply our change to the temporary copy and WAL-log it, before modifying
6648 * the real page. That way any action a reader of the in-place-updated
6649 * value takes will be WAL logged after this change.
6650 */
6652
6653 MarkBufferDirty(buffer);
6654
6655 /* XLOG stuff */
6656 if (RelationNeedsWAL(relation))
6657 {
6660 char *origdata = (char *) BufferGetBlock(buffer);
6661 Page page = BufferGetPage(buffer);
6662 uint16 lower = ((PageHeader) page)->pd_lower;
6663 uint16 upper = ((PageHeader) page)->pd_upper;
6665 RelFileLocator rlocator;
6666 ForkNumber forkno;
6667 BlockNumber blkno;
6669
6670 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6671 xlrec.dbId = MyDatabaseId;
6673 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6674 xlrec.nmsgs = nmsgs;
6675
6678 if (nmsgs != 0)
6680 nmsgs * sizeof(SharedInvalidationMessage));
6681
6682 /* register block matching what buffer will look like after changes */
6687 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6688 Assert(forkno == MAIN_FORKNUM);
6689 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6691 XLogRegisterBufData(0, src, newlen);
6692
6693 /* inplace updates aren't decoded atm, don't log the origin */
6694
6696
6697 PageSetLSN(page, recptr);
6698 }
6699
6700 memcpy(dst, src, newlen);
6701
6703
6704 /*
6705 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6706 * do this before UnlockTuple().
6707 */
6709
6711 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6712
6713 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6714
6715 /*
6716 * Queue a transactional inval, for logical decoding and for third-party
6717 * code that might have been relying on it since long before inplace
6718 * update adopted immediate invalidation. See README.tuplock section
6719 * "Reading inplace-updated columns" for logical decoding details.
6720 */
6722 CacheInvalidateHeapTuple(relation, tuple, NULL);
6723}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4378
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:437
PageHeaderData * PageHeader
Definition bufpage.h:199
uint32_t uint32
Definition c.h:618
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
#define MinSizeOfHeapInplace
#define XLOG_HEAP_INPLACE
Definition heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1088
void PreInplace_Inval(void)
Definition inval.c:1250
void AtInplace_Inval(void)
Definition inval.c:1263
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
ForkNumber
Definition relpath.h:56
#define XLogStandbyInfoActive()
Definition xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:410
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition xloginsert.c:314

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ERROR, fb(), inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2152 of file heapam.c.

2154{
2157 Buffer buffer;
2158 Buffer vmbuffer = InvalidBuffer;
2159 bool all_visible_cleared = false;
2160
2161 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2164
2165 AssertHasSnapshotForToast(relation);
2166
2167 /*
2168 * Fill in tuple header fields and toast the tuple if necessary.
2169 *
2170 * Note: below this point, heaptup is the data we actually intend to store
2171 * into the relation; tup is the caller's original untoasted data.
2172 */
2173 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2174
2175 /*
2176 * Find buffer to insert this tuple into. If the page is all visible,
2177 * this will also pin the requisite visibility map page.
2178 */
2179 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2180 InvalidBuffer, options, bistate,
2181 &vmbuffer, NULL,
2182 0);
2183
2184 /*
2185 * We're about to do the actual insert -- but check for conflict first, to
2186 * avoid possibly having to roll back work we've just done.
2187 *
2188 * This is safe without a recheck as long as there is no possibility of
2189 * another process scanning the page between this check and the insert
2190 * being visible to the scan (i.e., an exclusive buffer content lock is
2191 * continuously held from this point until the tuple insert is visible).
2192 *
2193 * For a heap insert, we only need to check for table-level SSI locks. Our
2194 * new tuple can't possibly conflict with existing tuple locks, and heap
2195 * page locks are only consolidated versions of tuple locks; they do not
2196 * lock "gaps" as index page locks do. So we don't need to specify a
2197 * buffer when making the call, which makes for a faster check.
2198 */
2200
2201 /* NO EREPORT(ERROR) from here till changes are logged */
2203
2204 RelationPutHeapTuple(relation, buffer, heaptup,
2206
2207 if (PageIsAllVisible(BufferGetPage(buffer)))
2208 {
2209 all_visible_cleared = true;
2211 visibilitymap_clear(relation,
2213 vmbuffer, VISIBILITYMAP_VALID_BITS);
2214 }
2215
2216 /*
2217 * XXX Should we set PageSetPrunable on this page ?
2218 *
2219 * The inserting transaction may eventually abort thus making this tuple
2220 * DEAD and hence available for pruning. Though we don't want to optimize
2221 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2222 * aborted tuple will never be pruned until next vacuum is triggered.
2223 *
2224 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2225 */
2226
2227 MarkBufferDirty(buffer);
2228
2229 /* XLOG stuff */
2230 if (RelationNeedsWAL(relation))
2231 {
2235 Page page = BufferGetPage(buffer);
2236 uint8 info = XLOG_HEAP_INSERT;
2237 int bufflags = 0;
2238
2239 /*
2240 * If this is a catalog, we need to transmit combo CIDs to properly
2241 * decode, so log that as well.
2242 */
2244 log_heap_new_cid(relation, heaptup);
2245
2246 /*
2247 * If this is the single and first tuple on page, we can reinit the
2248 * page instead of restoring the whole thing. Set flag, and hide
2249 * buffer references from XLogInsert.
2250 */
2253 {
2254 info |= XLOG_HEAP_INIT_PAGE;
2256 }
2257
2258 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2259 xlrec.flags = 0;
2265
2266 /*
2267 * For logical decoding, we need the tuple even if we're doing a full
2268 * page write, so make sure it's included even if we take a full-page
2269 * image. (XXX We could alternatively store a pointer into the FPW).
2270 */
2271 if (RelationIsLogicallyLogged(relation) &&
2273 {
2276
2277 if (IsToastRelation(relation))
2279 }
2280
2283
2284 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2285 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2286 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2287
2288 /*
2289 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2290 * write the whole page to the xlog, we don't need to store
2291 * xl_heap_header in the xlog.
2292 */
2295 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2297 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2299
2300 /* filtering by origin on a row level is much more efficient */
2302
2303 recptr = XLogInsert(RM_HEAP_ID, info);
2304
2305 PageSetLSN(page, recptr);
2306 }
2307
2309
2310 UnlockReleaseBuffer(buffer);
2311 if (vmbuffer != InvalidBuffer)
2312 ReleaseBuffer(vmbuffer);
2313
2314 /*
2315 * If tuple is cacheable, mark it for invalidation from the caches in case
2316 * we abort. Note it is OK to do this after releasing the buffer, because
2317 * the heaptup data structure is all in local memory, not in the shared
2318 * buffer.
2319 */
2321
2322 /* Note: speculative insertions are counted too, even if aborted later */
2323 pgstat_count_heap_insert(relation, 1);
2324
2325 /*
2326 * If heaptup is a private copy, release it. Don't forget to copy t_self
2327 * back to the caller's image, too.
2328 */
2329 if (heaptup != tup)
2330 {
2331 tup->t_self = heaptup->t_self;
2333 }
2334}
uint8_t uint8
Definition c.h:616
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition heapam.c:2343
#define HEAP_INSERT_SPECULATIVE
Definition heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition heapam_xlog.h:33
#define SizeOfHeapInsert
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition hio.c:500
#define HeapTupleHeaderGetNatts(tup)
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetNumberOfAttributes(relation)
Definition rel.h:520
#define REGBUF_KEEP_DATA
Definition xloginsert.h:36
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References Assert, AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, fb(), FirstOffsetNumber, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)
extern

Definition at line 4658 of file heapam.c.

4662{
4663 TM_Result result;
4664 ItemPointer tid = &(tuple->t_self);
4665 ItemId lp;
4666 Page page;
4667 Buffer vmbuffer = InvalidBuffer;
4668 BlockNumber block;
4669 TransactionId xid,
4670 xmax;
4674 bool first_time = true;
4675 bool skip_tuple_lock = false;
4676 bool have_tuple_lock = false;
4677 bool cleared_all_frozen = false;
4678
4679 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4680 block = ItemPointerGetBlockNumber(tid);
4681
4682 /*
4683 * Before locking the buffer, pin the visibility map page if it appears to
4684 * be necessary. Since we haven't got the lock yet, someone else might be
4685 * in the middle of changing this, so we'll need to recheck after we have
4686 * the lock.
4687 */
4688 if (PageIsAllVisible(BufferGetPage(*buffer)))
4689 visibilitymap_pin(relation, block, &vmbuffer);
4690
4692
4693 page = BufferGetPage(*buffer);
4696
4697 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4698 tuple->t_len = ItemIdGetLength(lp);
4699 tuple->t_tableOid = RelationGetRelid(relation);
4700
4701l3:
4702 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4703
4704 if (result == TM_Invisible)
4705 {
4706 /*
4707 * This is possible, but only when locking a tuple for ON CONFLICT DO
4708 * SELECT/UPDATE. We return this value here rather than throwing an
4709 * error in order to give that case the opportunity to throw a more
4710 * specific error.
4711 */
4712 result = TM_Invisible;
4713 goto out_locked;
4714 }
4715 else if (result == TM_BeingModified ||
4716 result == TM_Updated ||
4717 result == TM_Deleted)
4718 {
4722 bool require_sleep;
4723 ItemPointerData t_ctid;
4724
4725 /* must copy state data before unlocking buffer */
4727 infomask = tuple->t_data->t_infomask;
4728 infomask2 = tuple->t_data->t_infomask2;
4729 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4730
4732
4733 /*
4734 * If any subtransaction of the current top transaction already holds
4735 * a lock as strong as or stronger than what we're requesting, we
4736 * effectively hold the desired lock already. We *must* succeed
4737 * without trying to take the tuple lock, else we will deadlock
4738 * against anyone wanting to acquire a stronger lock.
4739 *
4740 * Note we only do this the first time we loop on the HTSU result;
4741 * there is no point in testing in subsequent passes, because
4742 * evidently our own transaction cannot have acquired a new lock after
4743 * the first time we checked.
4744 */
4745 if (first_time)
4746 {
4747 first_time = false;
4748
4750 {
4751 int i;
4752 int nmembers;
4753 MultiXactMember *members;
4754
4755 /*
4756 * We don't need to allow old multixacts here; if that had
4757 * been the case, HeapTupleSatisfiesUpdate would have returned
4758 * MayBeUpdated and we wouldn't be here.
4759 */
4760 nmembers =
4761 GetMultiXactIdMembers(xwait, &members, false,
4763
4764 for (i = 0; i < nmembers; i++)
4765 {
4766 /* only consider members of our own transaction */
4767 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4768 continue;
4769
4770 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4771 {
4772 pfree(members);
4773 result = TM_Ok;
4774 goto out_unlocked;
4775 }
4776 else
4777 {
4778 /*
4779 * Disable acquisition of the heavyweight tuple lock.
4780 * Otherwise, when promoting a weaker lock, we might
4781 * deadlock with another locker that has acquired the
4782 * heavyweight tuple lock and is waiting for our
4783 * transaction to finish.
4784 *
4785 * Note that in this case we still need to wait for
4786 * the multixact if required, to avoid acquiring
4787 * conflicting locks.
4788 */
4789 skip_tuple_lock = true;
4790 }
4791 }
4792
4793 if (members)
4794 pfree(members);
4795 }
4797 {
4798 switch (mode)
4799 {
4800 case LockTupleKeyShare:
4804 result = TM_Ok;
4805 goto out_unlocked;
4806 case LockTupleShare:
4809 {
4810 result = TM_Ok;
4811 goto out_unlocked;
4812 }
4813 break;
4816 {
4817 result = TM_Ok;
4818 goto out_unlocked;
4819 }
4820 break;
4821 case LockTupleExclusive:
4824 {
4825 result = TM_Ok;
4826 goto out_unlocked;
4827 }
4828 break;
4829 }
4830 }
4831 }
4832
4833 /*
4834 * Initially assume that we will have to wait for the locking
4835 * transaction(s) to finish. We check various cases below in which
4836 * this can be turned off.
4837 */
4838 require_sleep = true;
4839 if (mode == LockTupleKeyShare)
4840 {
4841 /*
4842 * If we're requesting KeyShare, and there's no update present, we
4843 * don't need to wait. Even if there is an update, we can still
4844 * continue if the key hasn't been modified.
4845 *
4846 * However, if there are updates, we need to walk the update chain
4847 * to mark future versions of the row as locked, too. That way,
4848 * if somebody deletes that future version, we're protected
4849 * against the key going away. This locking of future versions
4850 * could block momentarily, if a concurrent transaction is
4851 * deleting a key; or it could return a value to the effect that
4852 * the transaction deleting the key has already committed. So we
4853 * do this before re-locking the buffer; otherwise this would be
4854 * prone to deadlocks.
4855 *
4856 * Note that the TID we're locking was grabbed before we unlocked
4857 * the buffer. For it to change while we're not looking, the
4858 * other properties we're testing for below after re-locking the
4859 * buffer would also change, in which case we would restart this
4860 * loop above.
4861 */
4863 {
4864 bool updated;
4865
4867
4868 /*
4869 * If there are updates, follow the update chain; bail out if
4870 * that cannot be done.
4871 */
4872 if (follow_updates && updated &&
4873 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4874 {
4875 TM_Result res;
4876
4877 res = heap_lock_updated_tuple(relation,
4878 infomask, xwait, &t_ctid,
4880 mode);
4881 if (res != TM_Ok)
4882 {
4883 result = res;
4884 /* recovery code expects to have buffer lock held */
4886 goto failed;
4887 }
4888 }
4889
4891
4892 /*
4893 * Make sure it's still an appropriate lock, else start over.
4894 * Also, if it wasn't updated before we released the lock, but
4895 * is updated now, we start over too; the reason is that we
4896 * now need to follow the update chain to lock the new
4897 * versions.
4898 */
4899 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4900 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4901 !updated))
4902 goto l3;
4903
4904 /* Things look okay, so we can skip sleeping */
4905 require_sleep = false;
4906
4907 /*
4908 * Note we allow Xmax to change here; other updaters/lockers
4909 * could have modified it before we grabbed the buffer lock.
4910 * However, this is not a problem, because with the recheck we
4911 * just did we ensure that they still don't conflict with the
4912 * lock we want.
4913 */
4914 }
4915 }
4916 else if (mode == LockTupleShare)
4917 {
4918 /*
4919 * If we're requesting Share, we can similarly avoid sleeping if
4920 * there's no update and no exclusive lock present.
4921 */
4924 {
4926
4927 /*
4928 * Make sure it's still an appropriate lock, else start over.
4929 * See above about allowing xmax to change.
4930 */
4933 goto l3;
4934 require_sleep = false;
4935 }
4936 }
4937 else if (mode == LockTupleNoKeyExclusive)
4938 {
4939 /*
4940 * If we're requesting NoKeyExclusive, we might also be able to
4941 * avoid sleeping; just ensure that there no conflicting lock
4942 * already acquired.
4943 */
4945 {
4947 mode, NULL))
4948 {
4949 /*
4950 * No conflict, but if the xmax changed under us in the
4951 * meantime, start over.
4952 */
4956 xwait))
4957 goto l3;
4958
4959 /* otherwise, we're good */
4960 require_sleep = false;
4961 }
4962 }
4964 {
4966
4967 /* if the xmax changed in the meantime, start over */
4970 xwait))
4971 goto l3;
4972 /* otherwise, we're good */
4973 require_sleep = false;
4974 }
4975 }
4976
4977 /*
4978 * As a check independent from those above, we can also avoid sleeping
4979 * if the current transaction is the sole locker of the tuple. Note
4980 * that the strength of the lock already held is irrelevant; this is
4981 * not about recording the lock in Xmax (which will be done regardless
4982 * of this optimization, below). Also, note that the cases where we
4983 * hold a lock stronger than we are requesting are already handled
4984 * above by not doing anything.
4985 *
4986 * Note we only deal with the non-multixact case here; MultiXactIdWait
4987 * is well equipped to deal with this situation on its own.
4988 */
4991 {
4992 /* ... but if the xmax changed in the meantime, start over */
4996 xwait))
4997 goto l3;
4999 require_sleep = false;
5000 }
5001
5002 /*
5003 * Time to sleep on the other transaction/multixact, if necessary.
5004 *
5005 * If the other transaction is an update/delete that's already
5006 * committed, then sleeping cannot possibly do any good: if we're
5007 * required to sleep, get out to raise an error instead.
5008 *
5009 * By here, we either have already acquired the buffer exclusive lock,
5010 * or we must wait for the locking transaction or multixact; so below
5011 * we ensure that we grab buffer lock after the sleep.
5012 */
5013 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
5014 {
5016 goto failed;
5017 }
5018 else if (require_sleep)
5019 {
5020 /*
5021 * Acquire tuple lock to establish our priority for the tuple, or
5022 * die trying. LockTuple will release us when we are next-in-line
5023 * for the tuple. We must do this even if we are share-locking,
5024 * but not if we already have a weaker lock on the tuple.
5025 *
5026 * If we are forced to "start over" below, we keep the tuple lock;
5027 * this arranges that we stay at the head of the line while
5028 * rechecking tuple state.
5029 */
5030 if (!skip_tuple_lock &&
5031 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
5033 {
5034 /*
5035 * This can only happen if wait_policy is Skip and the lock
5036 * couldn't be obtained.
5037 */
5038 result = TM_WouldBlock;
5039 /* recovery code expects to have buffer lock held */
5041 goto failed;
5042 }
5043
5045 {
5047
5048 /* We only ever lock tuples, never update them */
5049 if (status >= MultiXactStatusNoKeyUpdate)
5050 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5051
5052 /* wait for multixact to end, or die trying */
5053 switch (wait_policy)
5054 {
5055 case LockWaitBlock:
5057 relation, &tuple->t_self, XLTW_Lock, NULL);
5058 break;
5059 case LockWaitSkip:
5061 status, infomask, relation,
5062 NULL, false))
5063 {
5064 result = TM_WouldBlock;
5065 /* recovery code expects to have buffer lock held */
5067 goto failed;
5068 }
5069 break;
5070 case LockWaitError:
5072 status, infomask, relation,
5074 ereport(ERROR,
5076 errmsg("could not obtain lock on row in relation \"%s\"",
5077 RelationGetRelationName(relation))));
5078
5079 break;
5080 }
5081
5082 /*
5083 * Of course, the multixact might not be done here: if we're
5084 * requesting a light lock mode, other transactions with light
5085 * locks could still be alive, as well as locks owned by our
5086 * own xact or other subxacts of this backend. We need to
5087 * preserve the surviving MultiXact members. Note that it
5088 * isn't absolutely necessary in the latter case, but doing so
5089 * is simpler.
5090 */
5091 }
5092 else
5093 {
5094 /* wait for regular transaction to end, or die trying */
5095 switch (wait_policy)
5096 {
5097 case LockWaitBlock:
5098 XactLockTableWait(xwait, relation, &tuple->t_self,
5099 XLTW_Lock);
5100 break;
5101 case LockWaitSkip:
5103 {
5104 result = TM_WouldBlock;
5105 /* recovery code expects to have buffer lock held */
5107 goto failed;
5108 }
5109 break;
5110 case LockWaitError:
5112 ereport(ERROR,
5114 errmsg("could not obtain lock on row in relation \"%s\"",
5115 RelationGetRelationName(relation))));
5116 break;
5117 }
5118 }
5119
5120 /* if there are updates, follow the update chain */
5122 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5123 {
5124 TM_Result res;
5125
5126 res = heap_lock_updated_tuple(relation,
5127 infomask, xwait, &t_ctid,
5129 mode);
5130 if (res != TM_Ok)
5131 {
5132 result = res;
5133 /* recovery code expects to have buffer lock held */
5135 goto failed;
5136 }
5137 }
5138
5140
5141 /*
5142 * xwait is done, but if xwait had just locked the tuple then some
5143 * other xact could update this tuple before we get to this point.
5144 * Check for xmax change, and start over if so.
5145 */
5148 xwait))
5149 goto l3;
5150
5152 {
5153 /*
5154 * Otherwise check if it committed or aborted. Note we cannot
5155 * be here if the tuple was only locked by somebody who didn't
5156 * conflict with us; that would have been handled above. So
5157 * that transaction must necessarily be gone by now. But
5158 * don't check for this in the multixact case, because some
5159 * locker transactions might still be running.
5160 */
5161 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5162 }
5163 }
5164
5165 /* By here, we're certain that we hold buffer exclusive lock again */
5166
5167 /*
5168 * We may lock if previous xmax aborted, or if it committed but only
5169 * locked the tuple without updating it; or if we didn't have to wait
5170 * at all for whatever reason.
5171 */
5172 if (!require_sleep ||
5173 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5176 result = TM_Ok;
5177 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5178 result = TM_Updated;
5179 else
5180 result = TM_Deleted;
5181 }
5182
5183failed:
5184 if (result != TM_Ok)
5185 {
5186 Assert(result == TM_SelfModified || result == TM_Updated ||
5187 result == TM_Deleted || result == TM_WouldBlock);
5188
5189 /*
5190 * When locking a tuple under LockWaitSkip semantics and we fail with
5191 * TM_WouldBlock above, it's possible for concurrent transactions to
5192 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5193 * this assert is slightly different from the equivalent one in
5194 * heap_delete and heap_update.
5195 */
5196 Assert((result == TM_WouldBlock) ||
5197 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5198 Assert(result != TM_Updated ||
5199 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5200 tmfd->ctid = tuple->t_data->t_ctid;
5201 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5202 if (result == TM_SelfModified)
5203 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5204 else
5205 tmfd->cmax = InvalidCommandId;
5206 goto out_locked;
5207 }
5208
5209 /*
5210 * If we didn't pin the visibility map page and the page has become all
5211 * visible while we were busy locking the buffer, or during some
5212 * subsequent window during which we had it unlocked, we'll have to unlock
5213 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5214 * unfortunate, especially since we'll now have to recheck whether the
5215 * tuple has been locked or updated under us, but hopefully it won't
5216 * happen very often.
5217 */
5218 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5219 {
5221 visibilitymap_pin(relation, block, &vmbuffer);
5223 goto l3;
5224 }
5225
5226 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5227 old_infomask = tuple->t_data->t_infomask;
5228
5229 /*
5230 * If this is the first possibly-multixact-able operation in the current
5231 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5232 * certain that the transaction will never become a member of any older
5233 * MultiXactIds than that. (We have to do this even if we end up just
5234 * using our own TransactionId below, since some other backend could
5235 * incorporate our XID into a MultiXact immediately afterwards.)
5236 */
5238
5239 /*
5240 * Compute the new xmax and infomask to store into the tuple. Note we do
5241 * not modify the tuple just yet, because that would leave it in the wrong
5242 * state if multixact.c elogs.
5243 */
5245 GetCurrentTransactionId(), mode, false,
5246 &xid, &new_infomask, &new_infomask2);
5247
5249
5250 /*
5251 * Store transaction information of xact locking the tuple.
5252 *
5253 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5254 * possibly generating a useless combo CID. Moreover, if we're locking a
5255 * previously updated tuple, it's important to preserve the Cmax.
5256 *
5257 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5258 * we would break the HOT chain.
5259 */
5262 tuple->t_data->t_infomask |= new_infomask;
5263 tuple->t_data->t_infomask2 |= new_infomask2;
5266 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5267
5268 /*
5269 * Make sure there is no forward chain link in t_ctid. Note that in the
5270 * cases where the tuple has been updated, we must not overwrite t_ctid,
5271 * because it was set by the updater. Moreover, if the tuple has been
5272 * updated, we need to follow the update chain to lock the new versions of
5273 * the tuple as well.
5274 */
5276 tuple->t_data->t_ctid = *tid;
5277
5278 /* Clear only the all-frozen bit on visibility map if needed */
5279 if (PageIsAllVisible(page) &&
5280 visibilitymap_clear(relation, block, vmbuffer,
5282 cleared_all_frozen = true;
5283
5284
5285 MarkBufferDirty(*buffer);
5286
5287 /*
5288 * XLOG stuff. You might think that we don't need an XLOG record because
5289 * there is no state change worth restoring after a crash. You would be
5290 * wrong however: we have just written either a TransactionId or a
5291 * MultiXactId that may never have been seen on disk before, and we need
5292 * to make sure that there are XLOG entries covering those ID numbers.
5293 * Else the same IDs might be re-used after a crash, which would be
5294 * disastrous if this page made it to disk before the crash. Essentially
5295 * we have to enforce the WAL log-before-data rule even in this case.
5296 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5297 * entries for everything anyway.)
5298 */
5299 if (RelationNeedsWAL(relation))
5300 {
5303
5306
5307 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5308 xlrec.xmax = xid;
5309 xlrec.infobits_set = compute_infobits(new_infomask,
5310 tuple->t_data->t_infomask2);
5313
5314 /* we don't decode row locks atm, so no need to log the origin */
5315
5317
5318 PageSetLSN(page, recptr);
5319 }
5320
5322
5323 result = TM_Ok;
5324
5327
5329 if (BufferIsValid(vmbuffer))
5330 ReleaseBuffer(vmbuffer);
5331
5332 /*
5333 * Don't update the visibility map here. Locking a tuple doesn't change
5334 * visibility info.
5335 */
5336
5337 /*
5338 * Now that we have successfully marked the tuple as locked, we can
5339 * release the lmgr tuple lock, if we had it.
5340 */
5341 if (have_tuple_lock)
5342 UnlockTupleTuplock(relation, tid, mode);
5343
5344 return result;
5345}
#define TUPLOCK_from_mxstatus(status)
Definition heapam.c:218
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition heapam.c:7895
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition heapam.c:6129
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition heapam.c:4611
#define XLH_LOCK_ALL_FROZEN_CLEARED
#define XLOG_HEAP_LOCK
Definition heapam_xlog.h:39
#define SizeOfHeapLock
#define HEAP_KEYS_UPDATED
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition lmgr.c:739
@ XLTW_Lock
Definition lmgr.h:29
bool log_lock_failures
Definition lock.c:54
@ LockWaitSkip
Definition lockoptions.h:42
@ LockWaitError
Definition lockoptions.h:44
@ LockTupleShare
Definition lockoptions.h:55
@ LockTupleKeyShare
Definition lockoptions.h:53
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1161
static PgChecksumMode mode
@ TM_WouldBlock
Definition tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg, ERROR, fb(), get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2423 of file heapam.c.

2425{
2428 int i;
2429 int ndone;
2431 Page page;
2432 Buffer vmbuffer = InvalidBuffer;
2433 bool needwal;
2437 bool starting_with_empty_page = false;
2438 int npages = 0;
2439 int npages_used = 0;
2440
2441 /* currently not needed (thus unsupported) for heap_multi_insert() */
2443
2444 AssertHasSnapshotForToast(relation);
2445
2446 needwal = RelationNeedsWAL(relation);
2449
2450 /* Toast and set header data in all the slots */
2451 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2452 for (i = 0; i < ntuples; i++)
2453 {
2454 HeapTuple tuple;
2455
2456 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2457 slots[i]->tts_tableOid = RelationGetRelid(relation);
2458 tuple->t_tableOid = slots[i]->tts_tableOid;
2459 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2460 options);
2461 }
2462
2463 /*
2464 * We're about to do the actual inserts -- but check for conflict first,
2465 * to minimize the possibility of having to roll back work we've just
2466 * done.
2467 *
2468 * A check here does not definitively prevent a serialization anomaly;
2469 * that check MUST be done at least past the point of acquiring an
2470 * exclusive buffer content lock on every buffer that will be affected,
2471 * and MAY be done after all inserts are reflected in the buffers and
2472 * those locks are released; otherwise there is a race condition. Since
2473 * multiple buffers can be locked and unlocked in the loop below, and it
2474 * would not be feasible to identify and lock all of those buffers before
2475 * the loop, we must do a final check at the end.
2476 *
2477 * The check here could be omitted with no loss of correctness; it is
2478 * present strictly as an optimization.
2479 *
2480 * For heap inserts, we only need to check for table-level SSI locks. Our
2481 * new tuples can't possibly conflict with existing tuple locks, and heap
2482 * page locks are only consolidated versions of tuple locks; they do not
2483 * lock "gaps" as index page locks do. So we don't need to specify a
2484 * buffer when making the call, which makes for a faster check.
2485 */
2487
2488 ndone = 0;
2489 while (ndone < ntuples)
2490 {
2491 Buffer buffer;
2492 bool all_visible_cleared = false;
2493 bool all_frozen_set = false;
2494 int nthispage;
2495
2497
2498 /*
2499 * Compute number of pages needed to fit the to-be-inserted tuples in
2500 * the worst case. This will be used to determine how much to extend
2501 * the relation by in RelationGetBufferForTuple(), if needed. If we
2502 * filled a prior page from scratch, we can just update our last
2503 * computation, but if we started with a partially filled page,
2504 * recompute from scratch, the number of potentially required pages
2505 * can vary due to tuples needing to fit onto the page, page headers
2506 * etc.
2507 */
2508 if (ndone == 0 || !starting_with_empty_page)
2509 {
2510 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2512 npages_used = 0;
2513 }
2514 else
2515 npages_used++;
2516
2517 /*
2518 * Find buffer where at least the next tuple will fit. If the page is
2519 * all-visible, this will also pin the requisite visibility map page.
2520 *
2521 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2522 * empty page. See all_frozen_set below.
2523 */
2524 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2525 InvalidBuffer, options, bistate,
2526 &vmbuffer, NULL,
2527 npages - npages_used);
2528 page = BufferGetPage(buffer);
2529
2531
2533 {
2534 all_frozen_set = true;
2535 /* Lock the vmbuffer before entering the critical section */
2537 }
2538
2539 /* NO EREPORT(ERROR) from here till changes are logged */
2541
2542 /*
2543 * RelationGetBufferForTuple has ensured that the first tuple fits.
2544 * Put that on the page, and then as many other tuples as fit.
2545 */
2546 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2547
2548 /*
2549 * For logical decoding we need combo CIDs to properly decode the
2550 * catalog.
2551 */
2552 if (needwal && need_cids)
2553 log_heap_new_cid(relation, heaptuples[ndone]);
2554
2555 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2556 {
2558
2559 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2560 break;
2561
2562 RelationPutHeapTuple(relation, buffer, heaptup, false);
2563
2564 /*
2565 * For logical decoding we need combo CIDs to properly decode the
2566 * catalog.
2567 */
2568 if (needwal && need_cids)
2569 log_heap_new_cid(relation, heaptup);
2570 }
2571
2572 /*
2573 * If the page is all visible, need to clear that, unless we're only
2574 * going to add further frozen rows to it.
2575 *
2576 * If we're only adding already frozen rows to a previously empty
2577 * page, mark it as all-frozen and update the visibility map. We're
2578 * already holding a pin on the vmbuffer.
2579 */
2581 {
2582 all_visible_cleared = true;
2583 PageClearAllVisible(page);
2584 visibilitymap_clear(relation,
2585 BufferGetBlockNumber(buffer),
2586 vmbuffer, VISIBILITYMAP_VALID_BITS);
2587 }
2588 else if (all_frozen_set)
2589 {
2590 PageSetAllVisible(page);
2591 PageClearPrunable(page);
2593 vmbuffer,
2596 relation->rd_locator);
2597 }
2598
2599 /*
2600 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2601 */
2602
2603 MarkBufferDirty(buffer);
2604
2605 /* XLOG stuff */
2606 if (needwal)
2607 {
2611 char *tupledata;
2612 int totaldatalen;
2613 char *scratchptr = scratch.data;
2614 bool init;
2615 int bufflags = 0;
2616
2617 /*
2618 * If the page was previously empty, we can reinit the page
2619 * instead of restoring the whole thing.
2620 */
2622
2623 /* allocate xl_heap_multi_insert struct from the scratch area */
2626
2627 /*
2628 * Allocate offsets array. Unless we're reinitializing the page,
2629 * in that case the tuples are stored in order starting at
2630 * FirstOffsetNumber and we don't need to store the offsets
2631 * explicitly.
2632 */
2633 if (!init)
2634 scratchptr += nthispage * sizeof(OffsetNumber);
2635
2636 /* the rest of the scratch space is used for tuple data */
2637 tupledata = scratchptr;
2638
2639 /* check that the mutually exclusive flags are not both set */
2641
2642 xlrec->flags = 0;
2645
2646 /*
2647 * We don't have to worry about including a conflict xid in the
2648 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2649 * visibility rules.
2650 */
2651 if (all_frozen_set)
2653
2654 xlrec->ntuples = nthispage;
2655
2656 /*
2657 * Write out an xl_multi_insert_tuple and the tuple data itself
2658 * for each tuple.
2659 */
2660 for (i = 0; i < nthispage; i++)
2661 {
2663 xl_multi_insert_tuple *tuphdr;
2664 int datalen;
2665
2666 if (!init)
2667 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2668 /* xl_multi_insert_tuple needs two-byte alignment. */
2670 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2671
2672 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2673 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2674 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2675
2676 /* write bitmap [+ padding] [+ oid] + data */
2677 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2679 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2680 datalen);
2681 tuphdr->datalen = datalen;
2682 scratchptr += datalen;
2683 }
2684 totaldatalen = scratchptr - tupledata;
2685 Assert((scratchptr - scratch.data) < BLCKSZ);
2686
2687 if (need_tuple_data)
2689
2690 /*
2691 * Signal that this is the last xl_heap_multi_insert record
2692 * emitted by this call to heap_multi_insert(). Needed for logical
2693 * decoding so it knows when to cleanup temporary data.
2694 */
2695 if (ndone + nthispage == ntuples)
2697
2698 if (init)
2699 {
2700 info |= XLOG_HEAP_INIT_PAGE;
2702 }
2703
2704 /*
2705 * If we're doing logical decoding, include the new tuple data
2706 * even if we take a full-page image of the page.
2707 */
2708 if (need_tuple_data)
2710
2712 XLogRegisterData(xlrec, tupledata - scratch.data);
2714 if (all_frozen_set)
2715 XLogRegisterBuffer(1, vmbuffer, 0);
2716
2717 XLogRegisterBufData(0, tupledata, totaldatalen);
2718
2719 /* filtering by origin on a row level is much more efficient */
2721
2722 recptr = XLogInsert(RM_HEAP2_ID, info);
2723
2724 PageSetLSN(page, recptr);
2725 if (all_frozen_set)
2726 {
2727 Assert(BufferIsDirty(vmbuffer));
2728 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2729 }
2730 }
2731
2733
2734 if (all_frozen_set)
2735 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2736
2737 UnlockReleaseBuffer(buffer);
2738 ndone += nthispage;
2739
2740 /*
2741 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2742 * likely that we'll insert into subsequent heap pages that are likely
2743 * to use the same vm page.
2744 */
2745 }
2746
2747 /* We're done with inserting all tuples, so release the last vmbuffer. */
2748 if (vmbuffer != InvalidBuffer)
2749 ReleaseBuffer(vmbuffer);
2750
2751 /*
2752 * We're done with the actual inserts. Check for conflicts again, to
2753 * ensure that all rw-conflicts in to these inserts are detected. Without
2754 * this final check, a sequential scan of the heap may have locked the
2755 * table after the "before" check, missing one opportunity to detect the
2756 * conflict, and then scanned the table before the new tuples were there,
2757 * missing the other chance to detect the conflict.
2758 *
2759 * For heap inserts, we only need to check for table-level SSI locks. Our
2760 * new tuples can't possibly conflict with existing tuple locks, and heap
2761 * page locks are only consolidated versions of tuple locks; they do not
2762 * lock "gaps" as index page locks do. So we don't need to specify a
2763 * buffer when making the call.
2764 */
2766
2767 /*
2768 * If tuples are cacheable, mark them for invalidation from the caches in
2769 * case we abort. Note it is OK to do this after releasing the buffer,
2770 * because the heaptuples data structure is all in local memory, not in
2771 * the shared buffer.
2772 */
2773 if (IsCatalogRelation(relation))
2774 {
2775 for (i = 0; i < ntuples; i++)
2777 }
2778
2779 /* copy t_self fields back to the caller's slots */
2780 for (i = 0; i < ntuples; i++)
2781 slots[i]->tts_tid = heaptuples[i]->t_self;
2782
2783 pgstat_count_heap_insert(relation, ntuples);
2784}
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3030
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
static void PageSetAllVisible(Page page)
Definition bufpage.h:460
#define PageClearPrunable(page)
Definition bufpage.h:486
#define MAXALIGN(LEN)
Definition c.h:898
#define SHORTALIGN(LEN)
Definition c.h:894
size_t Size
Definition c.h:691
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition heapam.c:2391
#define HEAP_INSERT_FROZEN
Definition heapam.h:38
#define SizeOfHeapMultiInsert
#define XLOG_HEAP2_MULTI_INSERT
Definition heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition heapam_xlog.h:79
#define SizeOfMultiInsertTuple
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
#define init()
RelFileLocator rd_locator
Definition rel.h:57
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), fb(), GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, PageClearAllVisible(), PageClearPrunable, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, xl_multi_insert_tuple::t_hoff, xl_multi_insert_tuple::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)
extern

Definition at line 815 of file pruneheap.c.

820{
822 bool do_freeze;
823 bool do_prune;
824 bool do_hint_prune;
827
828 /* Initialize prstate */
829 prune_freeze_setup(params,
831 presult, &prstate);
832
833 /*
834 * Examine all line pointers and tuple visibility information to determine
835 * which line pointers should change state and which tuples may be frozen.
836 * Prepare queue of state changes to later be executed in a critical
837 * section.
838 */
840
841 /*
842 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
843 * checking tuple visibility information in prune_freeze_plan() may have
844 * caused an FPI to be emitted.
845 */
847
848 do_prune = prstate.nredirected > 0 ||
849 prstate.ndead > 0 ||
850 prstate.nunused > 0;
851
852 /*
853 * Even if we don't prune anything, if we found a new value for the
854 * pd_prune_xid field or the page was marked full, we will update the hint
855 * bit.
856 */
857 do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
858 PageIsFull(prstate.page);
859
860 /*
861 * Decide if we want to go ahead with freezing according to the freeze
862 * plans we prepared, or not.
863 */
865 do_prune,
867 &prstate);
868
869 /*
870 * While scanning the line pointers, we did not clear
871 * set_all_visible/set_all_frozen when encountering LP_DEAD items because
872 * we wanted the decision whether or not to freeze the page to be
873 * unaffected by the short-term presence of LP_DEAD items. These LP_DEAD
874 * items are effectively assumed to be LP_UNUSED items in the making. It
875 * doesn't matter which vacuum heap pass (initial pass or final pass) ends
876 * up setting the page all-frozen, as long as the ongoing VACUUM does it.
877 *
878 * Now that we finished determining whether or not to freeze the page,
879 * update set_all_visible and set_all_frozen so that they reflect the true
880 * state of the page for setting PD_ALL_VISIBLE and VM bits.
881 */
882 if (prstate.lpdead_items > 0)
883 prstate.set_all_visible = prstate.set_all_frozen = false;
884
885 Assert(!prstate.set_all_frozen || prstate.set_all_visible);
886
887 /* Any error while applying the changes is critical */
889
890 if (do_hint_prune)
891 {
892 /*
893 * Update the page's pd_prune_xid field to either zero, or the lowest
894 * XID of any soon-prunable tuple.
895 */
896 ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
897
898 /*
899 * Also clear the "page is full" flag, since there's no point in
900 * repeating the prune/defrag process until something else happens to
901 * the page.
902 */
904
905 /*
906 * If that's all we had to do to the page, this is a non-WAL-logged
907 * hint. If we are going to freeze or prune the page, we will mark
908 * the buffer dirty below.
909 */
910 if (!do_freeze && !do_prune)
911 MarkBufferDirtyHint(prstate.buffer, true);
912 }
913
914 if (do_prune || do_freeze)
915 {
916 /* Apply the planned item changes and repair page fragmentation. */
917 if (do_prune)
918 {
919 heap_page_prune_execute(prstate.buffer, false,
920 prstate.redirected, prstate.nredirected,
921 prstate.nowdead, prstate.ndead,
922 prstate.nowunused, prstate.nunused);
923 }
924
925 if (do_freeze)
926 heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
927
928 MarkBufferDirty(prstate.buffer);
929
930 /*
931 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
932 */
933 if (RelationNeedsWAL(prstate.relation))
934 {
935 /*
936 * The snapshotConflictHorizon for the whole record should be the
937 * most conservative of all the horizons calculated for any of the
938 * possible modifications. If this record will prune tuples, any
939 * queries on the standby older than the newest xid of the most
940 * recently removed tuple this record will prune will conflict. If
941 * this record will freeze tuples, any queries on the standby with
942 * xids older than the newest tuple this record will freeze will
943 * conflict.
944 */
946
947 if (TransactionIdFollows(prstate.pagefrz.FreezePageConflictXid,
948 prstate.latest_xid_removed))
949 conflict_xid = prstate.pagefrz.FreezePageConflictXid;
950 else
951 conflict_xid = prstate.latest_xid_removed;
952
954 InvalidBuffer, /* vmbuffer */
955 0, /* vmflags */
957 true, params->reason,
958 prstate.frozen, prstate.nfrozen,
959 prstate.redirected, prstate.nredirected,
960 prstate.nowdead, prstate.ndead,
961 prstate.nowunused, prstate.nunused);
962 }
963 }
964
966
967 /* Copy information back for caller */
968 presult->ndeleted = prstate.ndeleted;
969 presult->nnewlpdead = prstate.ndead;
970 presult->nfrozen = prstate.nfrozen;
971 presult->live_tuples = prstate.live_tuples;
972 presult->recently_dead_tuples = prstate.recently_dead_tuples;
973 presult->set_all_visible = prstate.set_all_visible;
974 presult->set_all_frozen = prstate.set_all_frozen;
975 presult->hastup = prstate.hastup;
976
977 /*
978 * For callers planning to update the visibility map, the conflict horizon
979 * for that record must be the newest xmin on the page. However, if the
980 * page is completely frozen, there can be no conflict and the
981 * vm_conflict_horizon should remain InvalidTransactionId. This includes
982 * the case that we just froze all the tuples; the prune-freeze record
983 * included the conflict XID already so the caller doesn't need it.
984 */
985 if (presult->set_all_frozen)
986 presult->vm_conflict_horizon = InvalidTransactionId;
987 else
988 presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
989
990 presult->lpdead_items = prstate.lpdead_items;
991 /* the presult->deadoffsets array was already filled in */
992
993 if (prstate.attempt_freeze)
994 {
995 if (presult->nfrozen > 0)
996 {
997 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
998 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
999 }
1000 else
1001 {
1002 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1003 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1004 }
1005 }
1006}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5688
static TransactionId PageGetPruneXid(const PageData *page)
Definition bufpage.h:471
static void PageClearFull(Page page)
Definition bufpage.h:449
static bool PageIsFull(const PageData *page)
Definition bufpage.h:439
int64_t int64
Definition c.h:615
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7479
WalUsage pgWalUsage
Definition instrument.c:22
static void prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:469
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:672
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2162
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:337
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:1666
PruneReason reason
Definition heapam.h:269
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297

References Assert, END_CRIT_SECTION, fb(), heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, log_heap_prune_and_freeze(), MarkBufferDirty(), MarkBufferDirtyHint(), PageClearFull(), PageGetPruneXid(), PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)
extern

Definition at line 1666 of file pruneheap.c.

1670{
1671 Page page = BufferGetPage(buffer);
1672 OffsetNumber *offnum;
1674
1675 /* Shouldn't be called unless there's something to do */
1676 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1677
1678 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1679 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1680
1681 /* Update all redirected line pointers */
1682 offnum = redirected;
1683 for (int i = 0; i < nredirected; i++)
1684 {
1685 OffsetNumber fromoff = *offnum++;
1686 OffsetNumber tooff = *offnum++;
1689
1690#ifdef USE_ASSERT_CHECKING
1691
1692 /*
1693 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1694 * must be the first item from a HOT chain. If the item has tuple
1695 * storage then it can't be a heap-only tuple. Otherwise we are just
1696 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1697 * has been pruned at least once before now.
1698 */
1700 {
1702
1703 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1705 }
1706 else
1707 {
1708 /* We shouldn't need to redundantly set the redirect */
1710 }
1711
1712 /*
1713 * The item that we're about to set as an LP_REDIRECT (the 'from'
1714 * item) will point to an existing item (the 'to' item) that is
1715 * already a heap-only tuple. There can be at most one LP_REDIRECT
1716 * item per HOT chain.
1717 *
1718 * We need to keep around an LP_REDIRECT item (after original
1719 * non-heap-only root tuple gets pruned away) so that it's always
1720 * possible for VACUUM to easily figure out what TID to delete from
1721 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1722 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1723 * tuple can.
1724 *
1725 * This check may miss problems, e.g. the target of a redirect could
1726 * be marked as unused subsequently. The page_verify_redirects() check
1727 * below will catch such problems.
1728 */
1729 tolp = PageGetItemId(page, tooff);
1731 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1733#endif
1734
1736 }
1737
1738 /* Update all now-dead line pointers */
1739 offnum = nowdead;
1740 for (int i = 0; i < ndead; i++)
1741 {
1742 OffsetNumber off = *offnum++;
1743 ItemId lp = PageGetItemId(page, off);
1744
1745#ifdef USE_ASSERT_CHECKING
1746
1747 /*
1748 * An LP_DEAD line pointer must be left behind when the original item
1749 * (which is dead to everybody) could still be referenced by a TID in
1750 * an index. This should never be necessary with any individual
1751 * heap-only tuple item, though. (It's not clear how much of a problem
1752 * that would be, but there is no reason to allow it.)
1753 */
1754 if (ItemIdHasStorage(lp))
1755 {
1757 htup = (HeapTupleHeader) PageGetItem(page, lp);
1759 }
1760 else
1761 {
1762 /* Whole HOT chain becomes dead */
1764 }
1765#endif
1766
1768 }
1769
1770 /* Update all now-unused line pointers */
1771 offnum = nowunused;
1772 for (int i = 0; i < nunused; i++)
1773 {
1774 OffsetNumber off = *offnum++;
1775 ItemId lp = PageGetItemId(page, off);
1776
1777#ifdef USE_ASSERT_CHECKING
1778
1779 if (lp_truncate_only)
1780 {
1781 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1783 }
1784 else
1785 {
1786 /*
1787 * When heap_page_prune_and_freeze() was called, mark_unused_now
1788 * may have been passed as true, which allows would-be LP_DEAD
1789 * items to be made LP_UNUSED instead. This is only possible if
1790 * the relation has no indexes. If there are any dead items, then
1791 * mark_unused_now was not true and every item being marked
1792 * LP_UNUSED must refer to a heap-only tuple.
1793 */
1794 if (ndead > 0)
1795 {
1797 htup = (HeapTupleHeader) PageGetItem(page, lp);
1799 }
1800 else
1802 }
1803
1804#endif
1805
1807 }
1808
1809 if (lp_truncate_only)
1811 else
1812 {
1813 /*
1814 * Finally, repair any fragmentation, and update the page's hint bit
1815 * about whether it has free pointers.
1816 */
1818
1819 /*
1820 * Now that the page has been modified, assert that redirect items
1821 * still point to valid targets.
1822 */
1824 }
1825}
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:243
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void page_verify_redirects(Page page)
Definition pruneheap.c:1842

References Assert, BufferGetPage(), fb(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer,
Buffer vmbuffer 
)
extern

Definition at line 216 of file pruneheap.c.

217{
218 Page page = BufferGetPage(buffer);
220 GlobalVisState *vistest;
222
223 /*
224 * We can't write WAL in recovery mode, so there's no point trying to
225 * clean the page. The primary will likely issue a cleaning WAL record
226 * soon anyway, so this is no particular loss.
227 */
228 if (RecoveryInProgress())
229 return;
230
231 /*
232 * First check whether there's any chance there's something to prune,
233 * determining the appropriate horizon is a waste if there's no prune_xid
234 * (i.e. no updates/deletes left potentially dead tuples around).
235 */
238 return;
239
240 /*
241 * Check whether prune_xid indicates that there may be dead rows that can
242 * be cleaned up.
243 */
244 vistest = GlobalVisTestFor(relation);
245
247 return;
248
249 /*
250 * We prune when a previous UPDATE failed to find enough space on the page
251 * for a new tuple version, or when free space falls below the relation's
252 * fill-factor target (but not less than 10%).
253 *
254 * Checking free space here is questionable since we aren't holding any
255 * lock on the buffer; in the worst case we could get a bogus answer. It's
256 * unlikely to be *seriously* wrong, though, since reading either pd_lower
257 * or pd_upper is probably atomic. Avoiding taking a lock seems more
258 * important than sometimes getting a wrong answer in what is after all
259 * just a heuristic estimate.
260 */
263 minfree = Max(minfree, BLCKSZ / 10);
264
265 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
266 {
267 /* OK, try to get exclusive buffer lock */
269 return;
270
271 /*
272 * Now that we have buffer lock, get accurate information about the
273 * page's free space, and recheck the heuristic about whether to
274 * prune.
275 */
276 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
277 {
280
281 /*
282 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
283 * regardless of whether or not the relation has indexes, since we
284 * cannot safely determine that during on-access pruning with the
285 * current implementation.
286 */
287 PruneFreezeParams params = {
288 .relation = relation,
289 .buffer = buffer,
290 .reason = PRUNE_ON_ACCESS,
291 .options = 0,
292 .vistest = vistest,
293 .cutoffs = NULL,
294 };
295
297 NULL, NULL);
298
299 /*
300 * Report the number of tuples reclaimed to pgstats. This is
301 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
302 *
303 * We derive the number of dead tuples like this to avoid totally
304 * forgetting about items that were set to LP_DEAD, since they
305 * still need to be cleaned up by VACUUM. We only want to count
306 * heap-only tuples that just became LP_UNUSED in our report,
307 * which don't.
308 *
309 * VACUUM doesn't have to compensate in the same way when it
310 * tracks ndeleted, since it will set the same LP_DEAD items to
311 * LP_UNUSED separately.
312 */
313 if (presult.ndeleted > presult.nnewlpdead)
315 presult.ndeleted - presult.nnewlpdead);
316 }
317
318 /* And release buffer lock */
320
321 /*
322 * We avoid reuse of any free space created on the page by unrelated
323 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
324 * free space should be reused by UPDATEs to *this* page.
325 */
326 }
327}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6710
#define Max(x, y)
Definition c.h:1087
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition procarray.c:4271
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:815
Relation relation
Definition heapam.h:262
bool RecoveryInProgress(void)
Definition xlog.c:6444

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), fb(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PageGetHeapFreeSpace(), PageGetPruneXid(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7426 of file heapam.c.

7428{
7429 Page page = BufferGetPage(buffer);
7430
7431 for (int i = 0; i < ntuples; i++)
7432 {
7433 HeapTupleFreeze *frz = tuples + i;
7434 ItemId itemid = PageGetItemId(page, frz->offset);
7435 HeapTupleHeader htup;
7436
7437 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7438
7439 /* Deliberately avoid relying on tuple hint bits here */
7440 if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
7441 {
7443
7445 if (unlikely(!TransactionIdDidCommit(xmin)))
7446 ereport(ERROR,
7448 errmsg_internal("uncommitted xmin %u needs to be frozen",
7449 xmin)));
7450 }
7451
7452 /*
7453 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7454 * left behind by transactions that were in progress during a crash,
7455 * so we can only check that xmax didn't commit
7456 */
7457 if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
7458 {
7460
7463 ereport(ERROR,
7465 errmsg_internal("cannot freeze committed xmax %u",
7466 xmax)));
7467 }
7468 }
7469}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition heapam.h:150
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition heapam.h:149
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
#define ERRCODE_DATA_CORRUPTED
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdIsNormal(xid)
Definition transam.h:42

References Assert, BufferGetPage(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)
extern

Definition at line 7146 of file heapam.c.

7150{
7151 bool xmin_already_frozen = false,
7152 xmax_already_frozen = false;
7153 bool freeze_xmin = false,
7154 replace_xvac = false,
7155 replace_xmax = false,
7156 freeze_xmax = false;
7157 TransactionId xid;
7158
7159 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7160 frz->t_infomask2 = tuple->t_infomask2;
7161 frz->t_infomask = tuple->t_infomask;
7162 frz->frzflags = 0;
7163 frz->checkflags = 0;
7164
7165 /*
7166 * Process xmin, while keeping track of whether it's already frozen, or
7167 * will become frozen iff our freeze plan is executed by caller (could be
7168 * neither).
7169 */
7170 xid = HeapTupleHeaderGetXmin(tuple);
7171 if (!TransactionIdIsNormal(xid))
7172 xmin_already_frozen = true;
7173 else
7174 {
7175 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7176 ereport(ERROR,
7178 errmsg_internal("found xmin %u from before relfrozenxid %u",
7179 xid, cutoffs->relfrozenxid)));
7180
7181 /* Will set freeze_xmin flags in freeze plan below */
7183
7184 /* Verify that xmin committed if and when freeze plan is executed */
7185 if (freeze_xmin)
7186 {
7189 pagefrz->FreezePageConflictXid = xid;
7190 }
7191 }
7192
7193 /*
7194 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7195 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7196 */
7197 xid = HeapTupleHeaderGetXvac(tuple);
7198 if (TransactionIdIsNormal(xid))
7199 {
7201 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7202
7203 /*
7204 * For Xvac, we always freeze proactively. This allows totally_frozen
7205 * tracking to ignore xvac.
7206 */
7207 replace_xvac = pagefrz->freeze_required = true;
7208
7210 pagefrz->FreezePageConflictXid = xid;
7211
7212 /* Will set replace_xvac flags in freeze plan below */
7213 }
7214
7215 /* Now process xmax */
7216 xid = frz->xmax;
7217 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7218 {
7219 /* Raw xmax is a MultiXactId */
7221 uint16 flags;
7222
7223 /*
7224 * We will either remove xmax completely (in the "freeze_xmax" path),
7225 * process xmax by replacing it (in the "replace_xmax" path), or
7226 * perform no-op xmax processing. The only constraint is that the
7227 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7228 */
7229 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7230 &flags, pagefrz);
7231
7232 if (flags & FRM_NOOP)
7233 {
7234 /*
7235 * xmax is a MultiXactId, and nothing about it changes for now.
7236 * This is the only case where 'freeze_required' won't have been
7237 * set for us by FreezeMultiXactId, as well as the only case where
7238 * neither freeze_xmax nor replace_xmax are set (given a multi).
7239 *
7240 * This is a no-op, but the call to FreezeMultiXactId might have
7241 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7242 * for us (the "freeze page" variants, specifically). That'll
7243 * make it safe for our caller to freeze the page later on, while
7244 * leaving this particular xmax undisturbed.
7245 *
7246 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7247 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7248 * job. A call to heap_tuple_should_freeze for this same tuple
7249 * will take place below if 'freeze_required' isn't set already.
7250 * (This repeats work from FreezeMultiXactId, but allows "no
7251 * freeze" tracker maintenance to happen in only one place.)
7252 */
7255 }
7256 else if (flags & FRM_RETURN_IS_XID)
7257 {
7258 /*
7259 * xmax will become an updater Xid (original MultiXact's updater
7260 * member Xid will be carried forward as a simple Xid in Xmax).
7261 */
7263
7264 /*
7265 * NB -- some of these transformations are only valid because we
7266 * know the return Xid is a tuple updater (i.e. not merely a
7267 * locker.) Also note that the only reason we don't explicitly
7268 * worry about HEAP_KEYS_UPDATED is because it lives in
7269 * t_infomask2 rather than t_infomask.
7270 */
7271 frz->t_infomask &= ~HEAP_XMAX_BITS;
7272 frz->xmax = newxmax;
7273 if (flags & FRM_MARK_COMMITTED)
7274 frz->t_infomask |= HEAP_XMAX_COMMITTED;
7275 replace_xmax = true;
7276 }
7277 else if (flags & FRM_RETURN_IS_MULTI)
7278 {
7281
7282 /*
7283 * xmax is an old MultiXactId that we have to replace with a new
7284 * MultiXactId, to carry forward two or more original member XIDs.
7285 */
7287
7288 /*
7289 * We can't use GetMultiXactIdHintBits directly on the new multi
7290 * here; that routine initializes the masks to all zeroes, which
7291 * would lose other bits we need. Doing it this way ensures all
7292 * unrelated bits remain untouched.
7293 */
7294 frz->t_infomask &= ~HEAP_XMAX_BITS;
7295 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7297 frz->t_infomask |= newbits;
7298 frz->t_infomask2 |= newbits2;
7299 frz->xmax = newxmax;
7300 replace_xmax = true;
7301 }
7302 else
7303 {
7304 /*
7305 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7306 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7307 */
7308 Assert(flags & FRM_INVALIDATE_XMAX);
7310
7311 /* Will set freeze_xmax flags in freeze plan below */
7312 freeze_xmax = true;
7313 }
7314
7315 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7316 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7317 }
7318 else if (TransactionIdIsNormal(xid))
7319 {
7320 /* Raw xmax is normal XID */
7321 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7322 ereport(ERROR,
7324 errmsg_internal("found xmax %u from before relfrozenxid %u",
7325 xid, cutoffs->relfrozenxid)));
7326
7327 /* Will set freeze_xmax flags in freeze plan below */
7329
7330 /*
7331 * Verify that xmax aborted if and when freeze plan is executed,
7332 * provided it's from an update. (A lock-only xmax can be removed
7333 * independent of this, since the lock is released at xact end.)
7334 */
7336 frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
7337 }
7338 else if (!TransactionIdIsValid(xid))
7339 {
7340 /* Raw xmax is InvalidTransactionId XID */
7341 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7342 xmax_already_frozen = true;
7343 }
7344 else
7345 ereport(ERROR,
7347 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7348 xid, tuple->t_infomask)));
7349
7350 if (freeze_xmin)
7351 {
7353
7354 frz->t_infomask |= HEAP_XMIN_FROZEN;
7355 }
7356 if (replace_xvac)
7357 {
7358 /*
7359 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7360 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7361 * transaction succeeded.
7362 */
7363 Assert(pagefrz->freeze_required);
7364 if (tuple->t_infomask & HEAP_MOVED_OFF)
7365 frz->frzflags |= XLH_INVALID_XVAC;
7366 else
7367 frz->frzflags |= XLH_FREEZE_XVAC;
7368 }
7369 if (replace_xmax)
7370 {
7372 Assert(pagefrz->freeze_required);
7373
7374 /* Already set replace_xmax flags in freeze plan earlier */
7375 }
7376 if (freeze_xmax)
7377 {
7379
7380 frz->xmax = InvalidTransactionId;
7381
7382 /*
7383 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7384 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7385 * Also get rid of the HEAP_KEYS_UPDATED bit.
7386 */
7387 frz->t_infomask &= ~HEAP_XMAX_BITS;
7388 frz->t_infomask |= HEAP_XMAX_INVALID;
7389 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7390 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7391 }
7392
7393 /*
7394 * Determine if this tuple is already totally frozen, or will become
7395 * totally frozen (provided caller executes freeze plans for the page)
7396 */
7399
7400 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7402 {
7403 /*
7404 * So far no previous tuple from the page made freezing mandatory.
7405 * Does this tuple force caller to freeze the entire page?
7406 */
7407 pagefrz->freeze_required =
7408 heap_tuple_should_freeze(tuple, cutoffs,
7409 &pagefrz->NoFreezePageRelfrozenXid,
7410 &pagefrz->NoFreezePageRelminMxid);
7411 }
7412
7413 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7415}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition heapam.c:7546
#define FRM_RETURN_IS_XID
Definition heapam.c:6739
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition heapam.c:6790
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7965
#define FRM_MARK_COMMITTED
Definition heapam.c:6741
#define FRM_NOOP
Definition heapam.c:6737
#define FRM_RETURN_IS_MULTI
Definition heapam.c:6740
#define FRM_INVALIDATE_XMAX
Definition heapam.c:6738
#define HEAP_MOVED_OFF
#define HEAP_XMIN_FROZEN
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_COMMITTED
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2857
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
MultiXactId NoFreezePageRelminMxid
Definition heapam.h:244
TransactionId FreezePageConflictXid
Definition heapam.h:233
TransactionId NoFreezePageRelfrozenXid
Definition heapam.h:243
TransactionId OldestXmin
Definition vacuum.h:279
MultiXactId OldestMxact
Definition vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HeapPageFreeze::freeze_required, FreezeMultiXactId(), HeapPageFreeze::FreezePageConflictXid, FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, TransactionIdFollows(), TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)
extern

Definition at line 616 of file heapam.c.

617{
619 Buffer buffer = scan->rs_cbuf;
620 BlockNumber block = scan->rs_cblock;
621 Snapshot snapshot;
622 Page page;
623 int lines;
624 bool all_visible;
626
627 Assert(BufferGetBlockNumber(buffer) == block);
628
629 /* ensure we're not accidentally being used when not in pagemode */
631 snapshot = scan->rs_base.rs_snapshot;
632
633 /*
634 * Prune and repair fragmentation for the whole page, if possible.
635 */
636 heap_page_prune_opt(scan->rs_base.rs_rd, buffer, &scan->rs_vmbuffer);
637
638 /*
639 * We must hold share lock on the buffer content while examining tuple
640 * visibility. Afterwards, however, the tuples we have found to be
641 * visible are guaranteed good as long as we hold the buffer pin.
642 */
644
645 page = BufferGetPage(buffer);
646 lines = PageGetMaxOffsetNumber(page);
647
648 /*
649 * If the all-visible flag indicates that all tuples on the page are
650 * visible to everyone, we can skip the per-tuple visibility tests.
651 *
652 * Note: In hot standby, a tuple that's already visible to all
653 * transactions on the primary might still be invisible to a read-only
654 * transaction in the standby. We partly handle this problem by tracking
655 * the minimum xmin of visible tuples as the cut-off XID while marking a
656 * page all-visible on the primary and WAL log that along with the
657 * visibility map SET operation. In hot standby, we wait for (or abort)
658 * all transactions that can potentially may not see one or more tuples on
659 * the page. That's how index-only scans work fine in hot standby. A
660 * crucial difference between index-only scans and heap scans is that the
661 * index-only scan completely relies on the visibility map where as heap
662 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
663 * the page-level flag can be trusted in the same way, because it might
664 * get propagated somehow without being explicitly WAL-logged, e.g. via a
665 * full page write. Until we can prove that beyond doubt, let's check each
666 * tuple for visibility the hard way.
667 */
668 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
671
672 /*
673 * We call page_collect_tuples() with constant arguments, to get the
674 * compiler to constant fold the constant arguments. Separate calls with
675 * constant arguments, rather than variables, are needed on several
676 * compilers to actually perform constant folding.
677 */
678 if (likely(all_visible))
679 {
681 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
682 block, lines, true, false);
683 else
684 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
685 block, lines, true, true);
686 }
687 else
688 {
690 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
691 block, lines, false, false);
692 else
693 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
694 block, lines, false, true);
695 }
696
698}
#define likely(x)
Definition c.h:431
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition heapam.c:522
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:4000
void heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
Definition pruneheap.c:216
uint32 rs_ntuples
Definition heapam.h:105
BlockNumber rs_cblock
Definition heapam.h:69
bool takenDuringRecovery
Definition snapshot.h:180

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), fb(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)
extern

Definition at line 1319 of file heapam.c.

1321{
1323
1324 if (set_params)
1325 {
1326 if (allow_strat)
1328 else
1330
1331 if (allow_sync)
1333 else
1335
1336 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1339 else
1341 }
1342
1343 /*
1344 * unpin scan buffers
1345 */
1346 if (BufferIsValid(scan->rs_cbuf))
1347 {
1348 ReleaseBuffer(scan->rs_cbuf);
1349 scan->rs_cbuf = InvalidBuffer;
1350 }
1351
1352 if (BufferIsValid(scan->rs_vmbuffer))
1353 {
1355 scan->rs_vmbuffer = InvalidBuffer;
1356 }
1357
1358 /*
1359 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1360 * additional data vs a normal HeapScan
1361 */
1362
1363 /*
1364 * The read stream is reset on rescan. This must be done before
1365 * initscan(), as some state referred to by read_stream_reset() is reset
1366 * in initscan().
1367 */
1368 if (scan->rs_read_stream)
1370
1371 /*
1372 * reinitialize scan descriptor
1373 */
1374 initscan(scan, key, true);
1375}
void read_stream_reset(ReadStream *stream)
@ SO_ALLOW_STRAT
Definition tableam.h:58
@ SO_ALLOW_SYNC
Definition tableam.h:60

References BufferIsValid(), fb(), initscan(), InvalidBuffer, IsMVCCSnapshot, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)
extern

Definition at line 1489 of file heapam.c.

1491{
1497
1498 /*
1499 * For relations without any pages, we can simply leave the TID range
1500 * unset. There will be no tuples to scan, therefore no tuples outside
1501 * the given TID range.
1502 */
1503 if (scan->rs_nblocks == 0)
1504 return;
1505
1506 /*
1507 * Set up some ItemPointers which point to the first and last possible
1508 * tuples in the heap.
1509 */
1512
1513 /*
1514 * If the given maximum TID is below the highest possible TID in the
1515 * relation, then restrict the range to that, otherwise we scan to the end
1516 * of the relation.
1517 */
1520
1521 /*
1522 * If the given minimum TID is above the lowest possible TID in the
1523 * relation, then restrict the range to only scan for TIDs above that.
1524 */
1527
1528 /*
1529 * Check for an empty range and protect from would be negative results
1530 * from the numBlks calculation below.
1531 */
1533 {
1534 /* Set an empty range of blocks to scan */
1536 return;
1537 }
1538
1539 /*
1540 * Calculate the first block and the number of blocks we must scan. We
1541 * could be more aggressive here and perform some more validation to try
1542 * and further narrow the scope of blocks to scan by checking if the
1543 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1544 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1545 * we could scan one fewer blocks. However, such an optimization does not
1546 * seem worth troubling over, currently.
1547 */
1549
1552
1553 /* Set the start block and number of blocks to scan */
1555
1556 /* Finally, set the TID range in sscan */
1557 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1558 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1559}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition heapam.c:500
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition itemptr.h:93
#define MaxOffsetNumber
Definition off.h:28
BlockNumber rs_nblocks
Definition heapam.h:61

References fb(), FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)
extern

Definition at line 500 of file heapam.c.

501{
503
504 Assert(!scan->rs_inited); /* else too late to change */
505 /* else rs_startblock is significant */
507
508 /* Check startBlk is valid (but allow case of zero blocks...) */
509 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
510
511 scan->rs_startblock = startBlk;
512 scan->rs_numblocks = numBlks;
513}
BlockNumber rs_startblock
Definition heapam.h:62
BlockNumber rs_numblocks
Definition heapam.h:63

References Assert, fb(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)
extern

Definition at line 7910 of file heapam.c.

7911{
7912 TransactionId xid;
7913
7914 /*
7915 * If xmin is a normal transaction ID, this tuple is definitely not
7916 * frozen.
7917 */
7918 xid = HeapTupleHeaderGetXmin(tuple);
7919 if (TransactionIdIsNormal(xid))
7920 return true;
7921
7922 /*
7923 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7924 */
7925 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7926 {
7927 MultiXactId multi;
7928
7929 multi = HeapTupleHeaderGetRawXmax(tuple);
7930 if (MultiXactIdIsValid(multi))
7931 return true;
7932 }
7933 else
7934 {
7935 xid = HeapTupleHeaderGetRawXmax(tuple);
7936 if (TransactionIdIsNormal(xid))
7937 return true;
7938 }
7939
7940 if (tuple->t_infomask & HEAP_MOVED)
7941 {
7942 xid = HeapTupleHeaderGetXvac(tuple);
7943 if (TransactionIdIsNormal(xid))
7944 return true;
7945 }
7946
7947 return false;
7948}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)
extern

Definition at line 7965 of file heapam.c.

7969{
7970 TransactionId xid;
7971 MultiXactId multi;
7972 bool freeze = false;
7973
7974 /* First deal with xmin */
7975 xid = HeapTupleHeaderGetXmin(tuple);
7976 if (TransactionIdIsNormal(xid))
7977 {
7979 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7980 *NoFreezePageRelfrozenXid = xid;
7981 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7982 freeze = true;
7983 }
7984
7985 /* Now deal with xmax */
7987 multi = InvalidMultiXactId;
7988 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7989 multi = HeapTupleHeaderGetRawXmax(tuple);
7990 else
7991 xid = HeapTupleHeaderGetRawXmax(tuple);
7992
7993 if (TransactionIdIsNormal(xid))
7994 {
7996 /* xmax is a non-permanent XID */
7997 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7998 *NoFreezePageRelfrozenXid = xid;
7999 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8000 freeze = true;
8001 }
8002 else if (!MultiXactIdIsValid(multi))
8003 {
8004 /* xmax is a permanent XID or invalid MultiXactId/XID */
8005 }
8006 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
8007 {
8008 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
8009 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8010 *NoFreezePageRelminMxid = multi;
8011 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
8012 freeze = true;
8013 }
8014 else
8015 {
8016 /* xmax is a MultiXactId that may have an updater XID */
8017 MultiXactMember *members;
8018 int nmembers;
8019
8021 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8022 *NoFreezePageRelminMxid = multi;
8023 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
8024 freeze = true;
8025
8026 /* need to check whether any member of the mxact is old */
8027 nmembers = GetMultiXactIdMembers(multi, &members, false,
8029
8030 for (int i = 0; i < nmembers; i++)
8031 {
8032 xid = members[i].xid;
8034 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8035 *NoFreezePageRelfrozenXid = xid;
8036 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8037 freeze = true;
8038 }
8039 if (nmembers > 0)
8040 pfree(members);
8041 }
8042
8043 if (tuple->t_infomask & HEAP_MOVED)
8044 {
8045 xid = HeapTupleHeaderGetXvac(tuple);
8046 if (TransactionIdIsNormal(xid))
8047 {
8049 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8050 *NoFreezePageRelfrozenXid = xid;
8051 /* heap_prepare_freeze_tuple forces xvac freezing */
8052 freeze = true;
8053 }
8054 }
8055
8056 return freeze;
8057}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2871
#define InvalidMultiXactId
Definition multixact.h:25
TransactionId xid
Definition multixact.h:57

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 3323 of file heapam.c.

3327{
3328 TM_Result result;
3336 ItemId lp;
3340 bool old_key_copied = false;
3341 Page page,
3342 newpage;
3343 BlockNumber block;
3345 Buffer buffer,
3346 newbuf,
3347 vmbuffer = InvalidBuffer,
3349 bool need_toast;
3351 pagefree;
3352 bool have_tuple_lock = false;
3353 bool iscombo;
3354 bool use_hot_update = false;
3355 bool summarized_update = false;
3356 bool key_intact;
3357 bool all_visible_cleared = false;
3358 bool all_visible_cleared_new = false;
3359 bool checked_lockers;
3360 bool locker_remains;
3361 bool id_has_external = false;
3368
3370
3371 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3374
3375 AssertHasSnapshotForToast(relation);
3376
3377 /*
3378 * Forbid this during a parallel operation, lest it allocate a combo CID.
3379 * Other workers might need that combo CID for visibility checks, and we
3380 * have no provision for broadcasting it to them.
3381 */
3382 if (IsInParallelMode())
3383 ereport(ERROR,
3385 errmsg("cannot update tuples during a parallel operation")));
3386
3387#ifdef USE_ASSERT_CHECKING
3389#endif
3390
3391 /*
3392 * Fetch the list of attributes to be checked for various operations.
3393 *
3394 * For HOT considerations, this is wasted effort if we fail to update or
3395 * have to put the new tuple on a different page. But we must compute the
3396 * list before obtaining buffer lock --- in the worst case, if we are
3397 * doing an update on one of the relevant system catalogs, we could
3398 * deadlock if we try to fetch the list later. In any case, the relcache
3399 * caches the data so this is usually pretty cheap.
3400 *
3401 * We also need columns used by the replica identity and columns that are
3402 * considered the "key" of rows in the table.
3403 *
3404 * Note that we get copies of each bitmap, so we need not worry about
3405 * relcache flush happening midway through.
3406 */
3419
3421 INJECTION_POINT("heap_update-before-pin", NULL);
3422 buffer = ReadBuffer(relation, block);
3423 page = BufferGetPage(buffer);
3424
3425 /*
3426 * Before locking the buffer, pin the visibility map page if it appears to
3427 * be necessary. Since we haven't got the lock yet, someone else might be
3428 * in the middle of changing this, so we'll need to recheck after we have
3429 * the lock.
3430 */
3431 if (PageIsAllVisible(page))
3432 visibilitymap_pin(relation, block, &vmbuffer);
3433
3435
3437
3438 /*
3439 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3440 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3441 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3442 * of which indicates concurrent pruning.
3443 *
3444 * Failing with TM_Updated would be most accurate. However, unlike other
3445 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3446 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3447 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3448 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3449 * TM_Updated and TM_Deleted affects only the wording of error messages.
3450 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3451 * the specification of when tmfd->ctid is valid. Second, it creates
3452 * error log evidence that we took this branch.
3453 *
3454 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3455 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3456 * unrelated row, we'll fail with "duplicate key value violates unique".
3457 * XXX if otid is the live, newer version of the newtup row, we'll discard
3458 * changes originating in versions of this catalog row after the version
3459 * the caller got from syscache. See syscache-update-pruned.spec.
3460 */
3461 if (!ItemIdIsNormal(lp))
3462 {
3464
3465 UnlockReleaseBuffer(buffer);
3467 if (vmbuffer != InvalidBuffer)
3468 ReleaseBuffer(vmbuffer);
3469 tmfd->ctid = *otid;
3470 tmfd->xmax = InvalidTransactionId;
3471 tmfd->cmax = InvalidCommandId;
3473
3478 /* modified_attrs not yet initialized */
3480 return TM_Deleted;
3481 }
3482
3483 /*
3484 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3485 * properly.
3486 */
3487 oldtup.t_tableOid = RelationGetRelid(relation);
3488 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3489 oldtup.t_len = ItemIdGetLength(lp);
3490 oldtup.t_self = *otid;
3491
3492 /* the new tuple is ready, except for this: */
3493 newtup->t_tableOid = RelationGetRelid(relation);
3494
3495 /*
3496 * Determine columns modified by the update. Additionally, identify
3497 * whether any of the unmodified replica identity key attributes in the
3498 * old tuple is externally stored or not. This is required because for
3499 * such attributes the flattened value won't be WAL logged as part of the
3500 * new tuple so we must include it as part of the old_key_tuple. See
3501 * ExtractReplicaIdentity.
3502 */
3504 id_attrs, &oldtup,
3506
3507 /*
3508 * If we're not updating any "key" column, we can grab a weaker lock type.
3509 * This allows for more concurrency when we are running simultaneously
3510 * with foreign key checks.
3511 *
3512 * Note that if a column gets detoasted while executing the update, but
3513 * the value ends up being the same, this test will fail and we will use
3514 * the stronger lock. This is acceptable; the important case to optimize
3515 * is updates that don't manipulate key columns, not those that
3516 * serendipitously arrive at the same key values.
3517 */
3519 {
3520 *lockmode = LockTupleNoKeyExclusive;
3522 key_intact = true;
3523
3524 /*
3525 * If this is the first possibly-multixact-able operation in the
3526 * current transaction, set my per-backend OldestMemberMXactId
3527 * setting. We can be certain that the transaction will never become a
3528 * member of any older MultiXactIds than that. (We have to do this
3529 * even if we end up just using our own TransactionId below, since
3530 * some other backend could incorporate our XID into a MultiXact
3531 * immediately afterwards.)
3532 */
3534 }
3535 else
3536 {
3537 *lockmode = LockTupleExclusive;
3539 key_intact = false;
3540 }
3541
3542 /*
3543 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3544 * otid may very well point at newtup->t_self, which we will overwrite
3545 * with the new tuple's location, so there's great risk of confusion if we
3546 * use otid anymore.
3547 */
3548
3549l2:
3550 checked_lockers = false;
3551 locker_remains = false;
3552 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3553
3554 /* see below about the "no wait" case */
3555 Assert(result != TM_BeingModified || wait);
3556
3557 if (result == TM_Invisible)
3558 {
3559 UnlockReleaseBuffer(buffer);
3560 ereport(ERROR,
3562 errmsg("attempted to update invisible tuple")));
3563 }
3564 else if (result == TM_BeingModified && wait)
3565 {
3568 bool can_continue = false;
3569
3570 /*
3571 * XXX note that we don't consider the "no wait" case here. This
3572 * isn't a problem currently because no caller uses that case, but it
3573 * should be fixed if such a caller is introduced. It wasn't a
3574 * problem previously because this code would always wait, but now
3575 * that some tuple locks do not conflict with one of the lock modes we
3576 * use, it is possible that this case is interesting to handle
3577 * specially.
3578 *
3579 * This may cause failures with third-party code that calls
3580 * heap_update directly.
3581 */
3582
3583 /* must copy state data before unlocking buffer */
3585 infomask = oldtup.t_data->t_infomask;
3586
3587 /*
3588 * Now we have to do something about the existing locker. If it's a
3589 * multi, sleep on it; we might be awakened before it is completely
3590 * gone (or even not sleep at all in some cases); we need to preserve
3591 * it as locker, unless it is gone completely.
3592 *
3593 * If it's not a multi, we need to check for sleeping conditions
3594 * before actually going to sleep. If the update doesn't conflict
3595 * with the locks, we just continue without sleeping (but making sure
3596 * it is preserved).
3597 *
3598 * Before sleeping, we need to acquire tuple lock to establish our
3599 * priority for the tuple (see heap_lock_tuple). LockTuple will
3600 * release us when we are next-in-line for the tuple. Note we must
3601 * not acquire the tuple lock until we're sure we're going to sleep;
3602 * otherwise we're open for race conditions with other transactions
3603 * holding the tuple lock which sleep on us.
3604 *
3605 * If we are forced to "start over" below, we keep the tuple lock;
3606 * this arranges that we stay at the head of the line while rechecking
3607 * tuple state.
3608 */
3610 {
3612 int remain;
3613 bool current_is_member = false;
3614
3616 *lockmode, &current_is_member))
3617 {
3619
3620 /*
3621 * Acquire the lock, if necessary (but skip it when we're
3622 * requesting a lock and already have one; avoids deadlock).
3623 */
3624 if (!current_is_member)
3625 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3627
3628 /* wait for multixact */
3630 relation, &oldtup.t_self, XLTW_Update,
3631 &remain);
3632 checked_lockers = true;
3633 locker_remains = remain != 0;
3635
3636 /*
3637 * If xwait had just locked the tuple then some other xact
3638 * could update this tuple before we get to this point. Check
3639 * for xmax change, and start over if so.
3640 */
3641 if (xmax_infomask_changed(oldtup.t_data->t_infomask,
3642 infomask) ||
3644 xwait))
3645 goto l2;
3646 }
3647
3648 /*
3649 * Note that the multixact may not be done by now. It could have
3650 * surviving members; our own xact or other subxacts of this
3651 * backend, and also any other concurrent transaction that locked
3652 * the tuple with LockTupleKeyShare if we only got
3653 * LockTupleNoKeyExclusive. If this is the case, we have to be
3654 * careful to mark the updated tuple with the surviving members in
3655 * Xmax.
3656 *
3657 * Note that there could have been another update in the
3658 * MultiXact. In that case, we need to check whether it committed
3659 * or aborted. If it aborted we are safe to update it again;
3660 * otherwise there is an update conflict, and we have to return
3661 * TableTuple{Deleted, Updated} below.
3662 *
3663 * In the LockTupleExclusive case, we still need to preserve the
3664 * surviving members: those would include the tuple locks we had
3665 * before this one, which are important to keep in case this
3666 * subxact aborts.
3667 */
3668 if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
3670 else
3672
3673 /*
3674 * There was no UPDATE in the MultiXact; or it aborted. No
3675 * TransactionIdIsInProgress() call needed here, since we called
3676 * MultiXactIdWait() above.
3677 */
3680 can_continue = true;
3681 }
3683 {
3684 /*
3685 * The only locker is ourselves; we can avoid grabbing the tuple
3686 * lock here, but must preserve our locking information.
3687 */
3688 checked_lockers = true;
3689 locker_remains = true;
3690 can_continue = true;
3691 }
3693 {
3694 /*
3695 * If it's just a key-share locker, and we're not changing the key
3696 * columns, we don't need to wait for it to end; but we need to
3697 * preserve it as locker.
3698 */
3699 checked_lockers = true;
3700 locker_remains = true;
3701 can_continue = true;
3702 }
3703 else
3704 {
3705 /*
3706 * Wait for regular transaction to end; but first, acquire tuple
3707 * lock.
3708 */
3710 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3712 XactLockTableWait(xwait, relation, &oldtup.t_self,
3713 XLTW_Update);
3714 checked_lockers = true;
3716
3717 /*
3718 * xwait is done, but if xwait had just locked the tuple then some
3719 * other xact could update this tuple before we get to this point.
3720 * Check for xmax change, and start over if so.
3721 */
3722 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3725 goto l2;
3726
3727 /* Otherwise check if it committed or aborted */
3728 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3729 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3730 can_continue = true;
3731 }
3732
3733 if (can_continue)
3734 result = TM_Ok;
3735 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3736 result = TM_Updated;
3737 else
3738 result = TM_Deleted;
3739 }
3740
3741 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3742 if (result != TM_Ok)
3743 {
3744 Assert(result == TM_SelfModified ||
3745 result == TM_Updated ||
3746 result == TM_Deleted ||
3747 result == TM_BeingModified);
3748 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3749 Assert(result != TM_Updated ||
3750 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3751 }
3752
3753 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3754 {
3755 /* Perform additional check for transaction-snapshot mode RI updates */
3757 result = TM_Updated;
3758 }
3759
3760 if (result != TM_Ok)
3761 {
3762 tmfd->ctid = oldtup.t_data->t_ctid;
3763 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3764 if (result == TM_SelfModified)
3765 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3766 else
3767 tmfd->cmax = InvalidCommandId;
3768 UnlockReleaseBuffer(buffer);
3769 if (have_tuple_lock)
3770 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3771 if (vmbuffer != InvalidBuffer)
3772 ReleaseBuffer(vmbuffer);
3774
3781 return result;
3782 }
3783
3784 /*
3785 * If we didn't pin the visibility map page and the page has become all
3786 * visible while we were busy locking the buffer, or during some
3787 * subsequent window during which we had it unlocked, we'll have to unlock
3788 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3789 * bit unfortunate, especially since we'll now have to recheck whether the
3790 * tuple has been locked or updated under us, but hopefully it won't
3791 * happen very often.
3792 */
3793 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3794 {
3796 visibilitymap_pin(relation, block, &vmbuffer);
3798 goto l2;
3799 }
3800
3801 /* Fill in transaction status data */
3802
3803 /*
3804 * If the tuple we're updating is locked, we need to preserve the locking
3805 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3806 */
3808 oldtup.t_data->t_infomask,
3809 oldtup.t_data->t_infomask2,
3810 xid, *lockmode, true,
3813
3814 /*
3815 * And also prepare an Xmax value for the new copy of the tuple. If there
3816 * was no xmax previously, or there was one but all lockers are now gone,
3817 * then use InvalidTransactionId; otherwise, get the xmax from the old
3818 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3819 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3820 */
3821 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3822 HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
3825 else
3827
3829 {
3832 }
3833 else
3834 {
3835 /*
3836 * If we found a valid Xmax for the new tuple, then the infomask bits
3837 * to use on the new tuple depend on what was there on the old one.
3838 * Note that since we're doing an update, the only possibility is that
3839 * the lockers had FOR KEY SHARE lock.
3840 */
3841 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3842 {
3845 }
3846 else
3847 {
3850 }
3851 }
3852
3853 /*
3854 * Prepare the new tuple with the appropriate initial values of Xmin and
3855 * Xmax, as well as initial infomask bits as computed above.
3856 */
3857 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3858 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3859 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3861 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3862 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3864
3865 /*
3866 * Replace cid with a combo CID if necessary. Note that we already put
3867 * the plain cid into the new tuple.
3868 */
3870
3871 /*
3872 * If the toaster needs to be activated, OR if the new tuple will not fit
3873 * on the same page as the old, then we need to release the content lock
3874 * (but not the pin!) on the old tuple's buffer while we are off doing
3875 * TOAST and/or table-file-extension work. We must mark the old tuple to
3876 * show that it's locked, else other processes may try to update it
3877 * themselves.
3878 *
3879 * We need to invoke the toaster if there are already any out-of-line
3880 * toasted values present, or if the new tuple is over-threshold.
3881 */
3882 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3883 relation->rd_rel->relkind != RELKIND_MATVIEW)
3884 {
3885 /* toast table entries should never be recursively toasted */
3888 need_toast = false;
3889 }
3890 else
3893 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3894
3896
3897 newtupsize = MAXALIGN(newtup->t_len);
3898
3900 {
3904 bool cleared_all_frozen = false;
3905
3906 /*
3907 * To prevent concurrent sessions from updating the tuple, we have to
3908 * temporarily mark it locked, while we release the page-level lock.
3909 *
3910 * To satisfy the rule that any xid potentially appearing in a buffer
3911 * written out to disk, we unfortunately have to WAL log this
3912 * temporary modification. We can reuse xl_heap_lock for this
3913 * purpose. If we crash/error before following through with the
3914 * actual update, xmax will be of an aborted transaction, allowing
3915 * other sessions to proceed.
3916 */
3917
3918 /*
3919 * Compute xmax / infomask appropriate for locking the tuple. This has
3920 * to be done separately from the combo that's going to be used for
3921 * updating, because the potentially created multixact would otherwise
3922 * be wrong.
3923 */
3925 oldtup.t_data->t_infomask,
3926 oldtup.t_data->t_infomask2,
3927 xid, *lockmode, false,
3930
3932
3934
3935 /* Clear obsolete visibility flags ... */
3936 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3937 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3939 /* ... and store info about transaction updating this tuple */
3942 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3943 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3945
3946 /* temporarily make it look not-updated, but locked */
3947 oldtup.t_data->t_ctid = oldtup.t_self;
3948
3949 /*
3950 * Clear all-frozen bit on visibility map if needed. We could
3951 * immediately reset ALL_VISIBLE, but given that the WAL logging
3952 * overhead would be unchanged, that doesn't seem necessarily
3953 * worthwhile.
3954 */
3955 if (PageIsAllVisible(page) &&
3956 visibilitymap_clear(relation, block, vmbuffer,
3958 cleared_all_frozen = true;
3959
3960 MarkBufferDirty(buffer);
3961
3962 if (RelationNeedsWAL(relation))
3963 {
3966
3969
3970 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3972 xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
3973 oldtup.t_data->t_infomask2);
3974 xlrec.flags =
3978 PageSetLSN(page, recptr);
3979 }
3980
3982
3984
3985 /*
3986 * Let the toaster do its thing, if needed.
3987 *
3988 * Note: below this point, heaptup is the data we actually intend to
3989 * store into the relation; newtup is the caller's original untoasted
3990 * data.
3991 */
3992 if (need_toast)
3993 {
3994 /* Note we always use WAL and FSM during updates */
3996 newtupsize = MAXALIGN(heaptup->t_len);
3997 }
3998 else
3999 heaptup = newtup;
4000
4001 /*
4002 * Now, do we need a new page for the tuple, or not? This is a bit
4003 * tricky since someone else could have added tuples to the page while
4004 * we weren't looking. We have to recheck the available space after
4005 * reacquiring the buffer lock. But don't bother to do that if the
4006 * former amount of free space is still not enough; it's unlikely
4007 * there's more free now than before.
4008 *
4009 * What's more, if we need to get a new page, we will need to acquire
4010 * buffer locks on both old and new pages. To avoid deadlock against
4011 * some other backend trying to get the same two locks in the other
4012 * order, we must be consistent about the order we get the locks in.
4013 * We use the rule "lock the lower-numbered page of the relation
4014 * first". To implement this, we must do RelationGetBufferForTuple
4015 * while not holding the lock on the old page, and we must rely on it
4016 * to get the locks on both pages in the correct order.
4017 *
4018 * Another consideration is that we need visibility map page pin(s) if
4019 * we will have to clear the all-visible flag on either page. If we
4020 * call RelationGetBufferForTuple, we rely on it to acquire any such
4021 * pins; but if we don't, we have to handle that here. Hence we need
4022 * a loop.
4023 */
4024 for (;;)
4025 {
4026 if (newtupsize > pagefree)
4027 {
4028 /* It doesn't fit, must use RelationGetBufferForTuple. */
4029 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
4030 buffer, 0, NULL,
4031 &vmbuffer_new, &vmbuffer,
4032 0);
4033 /* We're all done. */
4034 break;
4035 }
4036 /* Acquire VM page pin if needed and we don't have it. */
4037 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4038 visibilitymap_pin(relation, block, &vmbuffer);
4039 /* Re-acquire the lock on the old tuple's page. */
4041 /* Re-check using the up-to-date free space */
4043 if (newtupsize > pagefree ||
4044 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
4045 {
4046 /*
4047 * Rats, it doesn't fit anymore, or somebody just now set the
4048 * all-visible flag. We must now unlock and loop to avoid
4049 * deadlock. Fortunately, this path should seldom be taken.
4050 */
4052 }
4053 else
4054 {
4055 /* We're all done. */
4056 newbuf = buffer;
4057 break;
4058 }
4059 }
4060 }
4061 else
4062 {
4063 /* No TOAST work needed, and it'll fit on same page */
4064 newbuf = buffer;
4065 heaptup = newtup;
4066 }
4067
4069
4070 /*
4071 * We're about to do the actual update -- check for conflict first, to
4072 * avoid possibly having to roll back work we've just done.
4073 *
4074 * This is safe without a recheck as long as there is no possibility of
4075 * another process scanning the pages between this check and the update
4076 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4077 * continuously held from this point until the tuple update is visible).
4078 *
4079 * For the new tuple the only check needed is at the relation level, but
4080 * since both tuples are in the same relation and the check for oldtup
4081 * will include checking the relation level, there is no benefit to a
4082 * separate check for the new tuple.
4083 */
4084 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4085 BufferGetBlockNumber(buffer));
4086
4087 /*
4088 * At this point newbuf and buffer are both pinned and locked, and newbuf
4089 * has enough space for the new tuple. If they are the same buffer, only
4090 * one pin is held.
4091 */
4092
4093 if (newbuf == buffer)
4094 {
4095 /*
4096 * Since the new tuple is going into the same page, we might be able
4097 * to do a HOT update. Check if any of the index columns have been
4098 * changed.
4099 */
4101 {
4102 use_hot_update = true;
4103
4104 /*
4105 * If none of the columns that are used in hot-blocking indexes
4106 * were updated, we can apply HOT, but we do still need to check
4107 * if we need to update the summarizing indexes, and update those
4108 * indexes if the columns were updated, or we may fail to detect
4109 * e.g. value bound changes in BRIN minmax indexes.
4110 */
4112 summarized_update = true;
4113 }
4114 }
4115 else
4116 {
4117 /* Set a hint that the old page could use prune/defrag */
4118 PageSetFull(page);
4119 }
4120
4121 /*
4122 * Compute replica identity tuple before entering the critical section so
4123 * we don't PANIC upon a memory allocation failure.
4124 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4125 * logged. Pass old key required as true only if the replica identity key
4126 * columns are modified or it has external data.
4127 */
4132
4133 /* NO EREPORT(ERROR) from here till changes are logged */
4135
4136 /*
4137 * If this transaction commits, the old tuple will become DEAD sooner or
4138 * later. Set flag that this page is a candidate for pruning once our xid
4139 * falls below the OldestXmin horizon. If the transaction finally aborts,
4140 * the subsequent page pruning will be a no-op and the hint will be
4141 * cleared.
4142 *
4143 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4144 * there would be a prunable tuple in the newbuf; but for now we choose
4145 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4146 * sync if this decision changes.
4147 */
4148 PageSetPrunable(page, xid);
4149
4150 if (use_hot_update)
4151 {
4152 /* Mark the old tuple as HOT-updated */
4154 /* And mark the new tuple as heap-only */
4156 /* Mark the caller's copy too, in case different from heaptup */
4158 }
4159 else
4160 {
4161 /* Make sure tuples are correctly marked as not-HOT */
4165 }
4166
4167 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4168
4169
4170 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4171 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
4172 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4173 /* ... and store info about transaction updating this tuple */
4176 oldtup.t_data->t_infomask |= infomask_old_tuple;
4177 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4179
4180 /* record address of new tuple in t_ctid of old one */
4181 oldtup.t_data->t_ctid = heaptup->t_self;
4182
4183 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4184 if (PageIsAllVisible(page))
4185 {
4186 all_visible_cleared = true;
4187 PageClearAllVisible(page);
4188 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4189 vmbuffer, VISIBILITYMAP_VALID_BITS);
4190 }
4191 if (newbuf != buffer && PageIsAllVisible(newpage))
4192 {
4197 }
4198
4199 if (newbuf != buffer)
4201 MarkBufferDirty(buffer);
4202
4203 /* XLOG stuff */
4204 if (RelationNeedsWAL(relation))
4205 {
4207
4208 /*
4209 * For logical decoding we need combo CIDs to properly decode the
4210 * catalog.
4211 */
4213 {
4214 log_heap_new_cid(relation, &oldtup);
4215 log_heap_new_cid(relation, heaptup);
4216 }
4217
4218 recptr = log_heap_update(relation, buffer,
4223 if (newbuf != buffer)
4224 {
4226 }
4227 PageSetLSN(page, recptr);
4228 }
4229
4231
4232 if (newbuf != buffer)
4235
4236 /*
4237 * Mark old tuple for invalidation from system caches at next command
4238 * boundary, and mark the new tuple for invalidation in case we abort. We
4239 * have to do this before releasing the buffer because oldtup is in the
4240 * buffer. (heaptup is all in local memory, but it's necessary to process
4241 * both tuple versions in one call to inval.c so we can avoid redundant
4242 * sinval messages.)
4243 */
4245
4246 /* Now we can release the buffer(s) */
4247 if (newbuf != buffer)
4249 ReleaseBuffer(buffer);
4252 if (BufferIsValid(vmbuffer))
4253 ReleaseBuffer(vmbuffer);
4254
4255 /*
4256 * Release the lmgr tuple lock, if we had it.
4257 */
4258 if (have_tuple_lock)
4259 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4260
4261 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4262
4263 /*
4264 * If heaptup is a private copy, release it. Don't forget to copy t_self
4265 * back to the caller's image, too.
4266 */
4267 if (heaptup != newtup)
4268 {
4269 newtup->t_self = heaptup->t_self;
4271 }
4272
4273 /*
4274 * If it is a HOT update, the update may still need to update summarized
4275 * indexes, lest we fail to update those summaries and get incorrect
4276 * results (for example, minmax bounds of the block may change with this
4277 * update).
4278 */
4279 if (use_hot_update)
4280 {
4283 else
4285 }
4286 else
4288
4291
4298
4299 return TM_Ok;
4300}
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:901
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:575
static void PageSetFull(Page page)
Definition bufpage.h:444
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition heapam.c:4480
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition heapam.c:8938
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition heapam.c:7679
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
#define HEAP2_XACT_MASK
#define HEAP_XMAX_LOCK_ONLY
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
#define HEAP_XACT_MASK
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
#define HEAP_UPDATED
#define HEAP_XMAX_KEYSHR_LOCK
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition relcache.c:5293
@ INDEX_ATTR_BITMAP_KEY
Definition relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition syscache.c:762
@ TU_Summarizing
Definition tableam.h:119
@ TU_All
Definition tableam.h:116
@ TU_None
Definition tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188

References Assert, AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg, ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)
extern

Definition at line 634 of file vacuumlazy.c.

636{
638 bool verbose,
639 instrument,
640 skipwithvm,
648 TimestampTz starttime = 0;
650 startwritetime = 0;
653 ErrorContextCallback errcallback;
654 char **indnames = NULL;
656
657 verbose = (params.options & VACOPT_VERBOSE) != 0;
658 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
659 params.log_vacuum_min_duration >= 0));
660 if (instrument)
661 {
663 if (track_io_timing)
664 {
667 }
668 }
669
670 /* Used for instrumentation and stats report */
671 starttime = GetCurrentTimestamp();
672
674 RelationGetRelid(rel));
677 params.is_wraparound
680 else
683
684 /*
685 * Setup error traceback support for ereport() first. The idea is to set
686 * up an error context callback to display additional information on any
687 * error during a vacuum. During different phases of vacuum, we update
688 * the state so that the error context callback always display current
689 * information.
690 *
691 * Copy the names of heap rel into local memory for error reporting
692 * purposes, too. It isn't always safe to assume that we can get the name
693 * of each rel. It's convenient for code in lazy_scan_heap to always use
694 * these temp copies.
695 */
698 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
699 vacrel->relname = pstrdup(RelationGetRelationName(rel));
700 vacrel->indname = NULL;
702 vacrel->verbose = verbose;
703 errcallback.callback = vacuum_error_callback;
704 errcallback.arg = vacrel;
705 errcallback.previous = error_context_stack;
706 error_context_stack = &errcallback;
707
708 /* Set up high level stuff about rel and its indexes */
709 vacrel->rel = rel;
711 &vacrel->indrels);
712 vacrel->bstrategy = bstrategy;
713 if (instrument && vacrel->nindexes > 0)
714 {
715 /* Copy index names used by instrumentation (not error reporting) */
716 indnames = palloc_array(char *, vacrel->nindexes);
717 for (int i = 0; i < vacrel->nindexes; i++)
719 }
720
721 /*
722 * The index_cleanup param either disables index vacuuming and cleanup or
723 * forces it to go ahead when we would otherwise apply the index bypass
724 * optimization. The default is 'auto', which leaves the final decision
725 * up to lazy_vacuum().
726 *
727 * The truncate param allows user to avoid attempting relation truncation,
728 * though it can't force truncation to happen.
729 */
732 params.truncate != VACOPTVALUE_AUTO);
733
734 /*
735 * While VacuumFailSafeActive is reset to false before calling this, we
736 * still need to reset it here due to recursive calls.
737 */
738 VacuumFailsafeActive = false;
739 vacrel->consider_bypass_optimization = true;
740 vacrel->do_index_vacuuming = true;
741 vacrel->do_index_cleanup = true;
742 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
744 {
745 /* Force disable index vacuuming up-front */
746 vacrel->do_index_vacuuming = false;
747 vacrel->do_index_cleanup = false;
748 }
749 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
750 {
751 /* Force index vacuuming. Note that failsafe can still bypass. */
752 vacrel->consider_bypass_optimization = false;
753 }
754 else
755 {
756 /* Default/auto, make all decisions dynamically */
758 }
759
760 /* Initialize page counters explicitly (be tidy) */
761 vacrel->scanned_pages = 0;
762 vacrel->eager_scanned_pages = 0;
763 vacrel->removed_pages = 0;
764 vacrel->new_frozen_tuple_pages = 0;
765 vacrel->lpdead_item_pages = 0;
766 vacrel->missed_dead_pages = 0;
767 vacrel->nonempty_pages = 0;
768 /* dead_items_alloc allocates vacrel->dead_items later on */
769
770 /* Allocate/initialize output statistics state */
771 vacrel->new_rel_tuples = 0;
772 vacrel->new_live_tuples = 0;
773 vacrel->indstats = (IndexBulkDeleteResult **)
774 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
775
776 /* Initialize remaining counters (be tidy) */
777 vacrel->num_index_scans = 0;
778 vacrel->num_dead_items_resets = 0;
779 vacrel->total_dead_items_bytes = 0;
780 vacrel->tuples_deleted = 0;
781 vacrel->tuples_frozen = 0;
782 vacrel->lpdead_items = 0;
783 vacrel->live_tuples = 0;
784 vacrel->recently_dead_tuples = 0;
785 vacrel->missed_dead_tuples = 0;
786
787 vacrel->new_all_visible_pages = 0;
788 vacrel->new_all_visible_all_frozen_pages = 0;
789 vacrel->new_all_frozen_pages = 0;
790
791 vacrel->worker_usage.vacuum.nlaunched = 0;
792 vacrel->worker_usage.vacuum.nplanned = 0;
793 vacrel->worker_usage.cleanup.nlaunched = 0;
794 vacrel->worker_usage.cleanup.nplanned = 0;
795
796 /*
797 * Get cutoffs that determine which deleted tuples are considered DEAD,
798 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
799 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
800 * happen in this order to ensure that the OldestXmin cutoff field works
801 * as an upper bound on the XIDs stored in the pages we'll actually scan
802 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
803 *
804 * Next acquire vistest, a related cutoff that's used in pruning. We use
805 * vistest in combination with OldestXmin to ensure that
806 * heap_page_prune_and_freeze() always removes any deleted tuple whose
807 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
808 * whether a tuple should be frozen or removed. (In the future we might
809 * want to teach lazy_scan_prune to recompute vistest from time to time,
810 * to increase the number of dead tuples it can prune away.)
811 */
812 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
814 vacrel->vistest = GlobalVisTestFor(rel);
815
816 /* Initialize state used to track oldest extant XID/MXID */
817 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
818 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
819
820 /*
821 * Initialize state related to tracking all-visible page skipping. This is
822 * very important to determine whether or not it is safe to advance the
823 * relfrozenxid/relminmxid.
824 */
825 vacrel->skippedallvis = false;
826 skipwithvm = true;
828 {
829 /*
830 * Force aggressive mode, and disable skipping blocks using the
831 * visibility map (even those set all-frozen)
832 */
833 vacrel->aggressive = true;
834 skipwithvm = false;
835 }
836
837 vacrel->skipwithvm = skipwithvm;
838
839 /*
840 * Set up eager scan tracking state. This must happen after determining
841 * whether or not the vacuum must be aggressive, because only normal
842 * vacuums use the eager scan algorithm.
843 */
845
846 /* Report the vacuum mode: 'normal' or 'aggressive' */
848 vacrel->aggressive
851
852 if (verbose)
853 {
854 if (vacrel->aggressive)
856 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
857 vacrel->dbname, vacrel->relnamespace,
858 vacrel->relname)));
859 else
861 (errmsg("vacuuming \"%s.%s.%s\"",
862 vacrel->dbname, vacrel->relnamespace,
863 vacrel->relname)));
864 }
865
866 /*
867 * Allocate dead_items memory using dead_items_alloc. This handles
868 * parallel VACUUM initialization as part of allocating shared memory
869 * space used for dead_items. (But do a failsafe precheck first, to
870 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
871 * is already dangerously old.)
872 */
875
876 /*
877 * Call lazy_scan_heap to perform all required heap pruning, index
878 * vacuuming, and heap vacuuming (plus related processing)
879 */
881
882 /*
883 * Save dead items max_bytes and update the memory usage statistics before
884 * cleanup, they are freed in parallel vacuum cases during
885 * dead_items_cleanup().
886 */
887 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
888 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
889
890 /*
891 * Free resources managed by dead_items_alloc. This ends parallel mode in
892 * passing when necessary.
893 */
896
897 /*
898 * Update pg_class entries for each of rel's indexes where appropriate.
899 *
900 * Unlike the later update to rel's pg_class entry, this is not critical.
901 * Maintains relpages/reltuples statistics used by the planner only.
902 */
903 if (vacrel->do_index_cleanup)
905
906 /* Done with rel's indexes */
907 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
908
909 /* Optionally truncate rel */
912
913 /* Pop the error context stack */
914 error_context_stack = errcallback.previous;
915
916 /* Report that we are now doing final cleanup */
919
920 /*
921 * Prepare to update rel's pg_class entry.
922 *
923 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
924 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
925 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
926 */
927 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
928 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
929 vacrel->cutoffs.relfrozenxid,
930 vacrel->NewRelfrozenXid));
931 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
932 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
933 vacrel->cutoffs.relminmxid,
934 vacrel->NewRelminMxid));
935 if (vacrel->skippedallvis)
936 {
937 /*
938 * Must keep original relfrozenxid in a non-aggressive VACUUM that
939 * chose to skip an all-visible page range. The state that tracks new
940 * values will have missed unfrozen XIDs from the pages we skipped.
941 */
942 Assert(!vacrel->aggressive);
943 vacrel->NewRelfrozenXid = InvalidTransactionId;
944 vacrel->NewRelminMxid = InvalidMultiXactId;
945 }
946
947 /*
948 * For safety, clamp relallvisible to be not more than what we're setting
949 * pg_class.relpages to
950 */
951 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
955
956 /*
957 * An all-frozen block _must_ be all-visible. As such, clamp the count of
958 * all-frozen blocks to the count of all-visible blocks. This matches the
959 * clamping of relallvisible above.
960 */
963
964 /*
965 * Now actually update rel's pg_class entry.
966 *
967 * In principle new_live_tuples could be -1 indicating that we (still)
968 * don't know the tuple count. In practice that can't happen, since we
969 * scan every page that isn't skipped using the visibility map.
970 */
971 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
973 vacrel->nindexes > 0,
974 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
976
977 /*
978 * Report results to the cumulative stats system, too.
979 *
980 * Deliberately avoid telling the stats system about LP_DEAD items that
981 * remain in the table due to VACUUM bypassing index and heap vacuuming.
982 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
983 * It seems like a good idea to err on the side of not vacuuming again too
984 * soon in cases where the failsafe prevented significant amounts of heap
985 * vacuuming.
986 */
988 Max(vacrel->new_live_tuples, 0),
989 vacrel->recently_dead_tuples +
990 vacrel->missed_dead_tuples,
991 starttime);
993
994 if (instrument)
995 {
997
998 if (verbose || params.log_vacuum_min_duration == 0 ||
1001 {
1002 long secs_dur;
1003 int usecs_dur;
1004 WalUsage walusage;
1005 BufferUsage bufferusage;
1007 char *msgfmt;
1008 int32 diff;
1009 double read_rate = 0,
1010 write_rate = 0;
1014
1016 memset(&walusage, 0, sizeof(WalUsage));
1018 memset(&bufferusage, 0, sizeof(BufferUsage));
1020
1021 total_blks_hit = bufferusage.shared_blks_hit +
1022 bufferusage.local_blks_hit;
1023 total_blks_read = bufferusage.shared_blks_read +
1024 bufferusage.local_blks_read;
1026 bufferusage.local_blks_dirtied;
1027
1029 if (verbose)
1030 {
1031 /*
1032 * Aggressiveness already reported earlier, in dedicated
1033 * VACUUM VERBOSE ereport
1034 */
1035 Assert(!params.is_wraparound);
1036 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1037 }
1038 else if (params.is_wraparound)
1039 {
1040 /*
1041 * While it's possible for a VACUUM to be both is_wraparound
1042 * and !aggressive, that's just a corner-case -- is_wraparound
1043 * implies aggressive. Produce distinct output for the corner
1044 * case all the same, just in case.
1045 */
1046 if (vacrel->aggressive)
1047 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1048 else
1049 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1050 }
1051 else
1052 {
1053 if (vacrel->aggressive)
1054 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1055 else
1056 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1057 }
1059 vacrel->dbname,
1060 vacrel->relnamespace,
1061 vacrel->relname,
1062 vacrel->num_index_scans);
1063 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1064 vacrel->removed_pages,
1066 vacrel->scanned_pages,
1067 orig_rel_pages == 0 ? 100.0 :
1068 100.0 * vacrel->scanned_pages /
1070 vacrel->eager_scanned_pages);
1072 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1073 vacrel->tuples_deleted,
1074 (int64) vacrel->new_rel_tuples,
1075 vacrel->recently_dead_tuples);
1076 if (vacrel->missed_dead_tuples > 0)
1078 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1079 vacrel->missed_dead_tuples,
1080 vacrel->missed_dead_pages);
1082 vacrel->cutoffs.OldestXmin);
1084 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1085 vacrel->cutoffs.OldestXmin, diff);
1087 {
1088 diff = (int32) (vacrel->NewRelfrozenXid -
1089 vacrel->cutoffs.relfrozenxid);
1091 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1092 vacrel->NewRelfrozenXid, diff);
1093 }
1094 if (minmulti_updated)
1095 {
1096 diff = (int32) (vacrel->NewRelminMxid -
1097 vacrel->cutoffs.relminmxid);
1099 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1100 vacrel->NewRelminMxid, diff);
1101 }
1102 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1103 vacrel->new_frozen_tuple_pages,
1104 orig_rel_pages == 0 ? 100.0 :
1105 100.0 * vacrel->new_frozen_tuple_pages /
1107 vacrel->tuples_frozen);
1108
1110 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1111 vacrel->new_all_visible_pages,
1112 vacrel->new_all_visible_all_frozen_pages +
1113 vacrel->new_all_frozen_pages,
1114 vacrel->new_all_frozen_pages);
1115 if (vacrel->do_index_vacuuming)
1116 {
1117 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1118 appendStringInfoString(&buf, _("index scan not needed: "));
1119 else
1120 appendStringInfoString(&buf, _("index scan needed: "));
1121
1122 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1123 }
1124 else
1125 {
1127 appendStringInfoString(&buf, _("index scan bypassed: "));
1128 else
1129 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1130
1131 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1132 }
1134 vacrel->lpdead_item_pages,
1135 orig_rel_pages == 0 ? 100.0 :
1136 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1137 vacrel->lpdead_items);
1138
1139 if (vacrel->worker_usage.vacuum.nplanned > 0)
1141 _("parallel workers: index vacuum: %d planned, %d launched in total\n"),
1142 vacrel->worker_usage.vacuum.nplanned,
1143 vacrel->worker_usage.vacuum.nlaunched);
1144
1145 if (vacrel->worker_usage.cleanup.nplanned > 0)
1147 _("parallel workers: index cleanup: %d planned, %d launched\n"),
1148 vacrel->worker_usage.cleanup.nplanned,
1149 vacrel->worker_usage.cleanup.nlaunched);
1150
1151 for (int i = 0; i < vacrel->nindexes; i++)
1152 {
1153 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1154
1155 if (!istat)
1156 continue;
1157
1159 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1160 indnames[i],
1161 istat->num_pages,
1162 istat->pages_newly_deleted,
1163 istat->pages_deleted,
1164 istat->pages_free);
1165 }
1167 {
1168 /*
1169 * We bypass the changecount mechanism because this value is
1170 * only updated by the calling process. We also rely on the
1171 * above call to pgstat_progress_end_command() to not clear
1172 * the st_progress_param array.
1173 */
1174 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1176 }
1177 if (track_io_timing)
1178 {
1179 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1180 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1181
1182 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1183 read_ms, write_ms);
1184 }
1185 if (secs_dur > 0 || usecs_dur > 0)
1186 {
1188 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1190 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1191 }
1192 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1195 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1200 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1201 walusage.wal_records,
1202 walusage.wal_fpi,
1203 walusage.wal_bytes,
1204 walusage.wal_fpi_bytes,
1205 walusage.wal_buffers_full);
1206
1207 /*
1208 * Report the dead items memory usage.
1209 *
1210 * The num_dead_items_resets counter increases when we reset the
1211 * collected dead items, so the counter is non-zero if at least
1212 * one dead items are collected, even if index vacuuming is
1213 * disabled.
1214 */
1216 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1217 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1218 vacrel->num_dead_items_resets),
1219 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1220 vacrel->num_dead_items_resets,
1221 (double) dead_items_max_bytes / (1024 * 1024));
1222 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1223
1224 ereport(verbose ? INFO : LOG,
1225 (errmsg_internal("%s", buf.data)));
1226 pfree(buf.data);
1227 }
1228 }
1229
1230 /* Cleanup index statistics and index names */
1231 for (int i = 0; i < vacrel->nindexes; i++)
1232 {
1233 if (vacrel->indstats[i])
1234 pfree(vacrel->indstats[i]);
1235
1236 if (instrument)
1237 pfree(indnames[i]);
1238 }
1239}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1712
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1772
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1636
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition bufmgr.c:192
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define ngettext(s, p, n)
Definition c.h:1272
int32_t int32
Definition c.h:614
int64 TimestampTz
Definition timestamp.h:39
ErrorContextCallback * error_context_stack
Definition elog.c:99
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
#define INFO
Definition elog.h:34
#define palloc0_object(type)
Definition fe_memutils.h:75
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1312
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3588
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
static int verbose
const void * data
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
int64 PgStat_Counter
Definition pgstat.h:70
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define RelationGetNamespace(relation)
Definition rel.h:555
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
BlockNumber pages_deleted
Definition genam.h:90
BlockNumber pages_newly_deleted
Definition genam.h:89
BlockNumber pages_free
Definition genam.h:91
BlockNumber num_pages
Definition genam.h:85
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
bool track_cost_delay_timing
Definition vacuum.c:83
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2367
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2410
bool VacuumFailsafeActive
Definition vacuum.c:111
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:507
static void vacuum_error_callback(void *arg)
static void lazy_truncate_heap(LVRelState *vacrel)
static bool should_attempt_truncation(LVRelState *vacrel)
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:225
static void lazy_scan_heap(LVRelState *vacrel)
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg, errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)
extern

Definition at line 9345 of file heapam.c.

9348{
9349 TransactionId xid;
9351
9352 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9353 return;
9354
9355 /*
9356 * Check to see whether the tuple has been written to by a concurrent
9357 * transaction, either to create it not visible to us, or to delete it
9358 * while it is visible to us. The "visible" bool indicates whether the
9359 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9360 * is going on with it.
9361 *
9362 * In the event of a concurrently inserted tuple that also happens to have
9363 * been concurrently updated (by a separate transaction), the xmin of the
9364 * tuple will be used -- not the updater's xid.
9365 */
9367 switch (htsvResult)
9368 {
9369 case HEAPTUPLE_LIVE:
9370 if (visible)
9371 return;
9372 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9373 break;
9376 if (visible)
9377 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9378 else
9379 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9380
9382 {
9383 /* This is like the HEAPTUPLE_DEAD case */
9384 Assert(!visible);
9385 return;
9386 }
9387 break;
9389 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9390 break;
9391 case HEAPTUPLE_DEAD:
9392 Assert(!visible);
9393 return;
9394 default:
9395
9396 /*
9397 * The only way to get to this default clause is if a new value is
9398 * added to the enum type without adding it to this switch
9399 * statement. That's a bug, so elog.
9400 */
9401 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9402
9403 /*
9404 * In spite of having all enum values covered and calling elog on
9405 * this default, some compilers think this is a code path which
9406 * allows xid to be used below without initialization. Silence
9407 * that warning.
9408 */
9410 }
9411
9414
9415 /*
9416 * Find top level xid. Bail out if xid is too early to be a conflict, or
9417 * if it's our own xid.
9418 */
9420 return;
9423 return;
9424
9425 CheckForSerializableConflictOut(relation, xid, snapshot);
9426}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4032
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:163
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:443

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, fb(), GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)
extern

Definition at line 1436 of file heapam_visibility.c.

1437{
1438 TransactionId xmax;
1439
1440 /* if there's no valid Xmax, then there's obviously no update either */
1441 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1442 return true;
1443
1444 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1445 return true;
1446
1447 /* invalid xmax means no update */
1449 return true;
1450
1451 /*
1452 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1453 * necessarily have been updated
1454 */
1455 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1456 return false;
1457
1458 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1459 xmax = HeapTupleGetUpdateXid(tuple);
1460
1461 /* not LOCKED_ONLY, so it has to have an xmax */
1463
1465 return false;
1466 if (TransactionIdIsInProgress(xmax))
1467 return false;
1468 if (TransactionIdDidCommit(xmax))
1469 return false;
1470
1471 /*
1472 * not current, not in progress, not committed -- must have aborted or
1473 * crashed
1474 */
1475 return true;
1476}
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1401

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)
extern

Definition at line 1381 of file heapam_visibility.c.

1382{
1383 HeapTupleHeader tuple = htup->t_data;
1384
1386 Assert(htup->t_tableOid != InvalidOid);
1387
1388 /*
1389 * If the inserting transaction is marked invalid, then it aborted, and
1390 * the tuple is definitely dead. If it's marked neither committed nor
1391 * invalid, then we assume it's still alive (since the presumption is that
1392 * all relevant hint bits were just set moments ago).
1393 */
1394 if (!HeapTupleHeaderXminCommitted(tuple))
1395 return HeapTupleHeaderXminInvalid(tuple);
1396
1397 /*
1398 * If the inserting transaction committed, but any deleting transaction
1399 * aborted, the tuple is still alive.
1400 */
1401 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1402 return false;
1403
1404 /*
1405 * If the XMAX is just a lock, the tuple is still alive.
1406 */
1408 return false;
1409
1410 /*
1411 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1412 * know without checking pg_multixact.
1413 */
1414 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1415 return false;
1416
1417 /* If deleter isn't known to have committed, assume it's still running. */
1418 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1419 return false;
1420
1421 /* Deleter committed, so tuple is dead if the XID is old enough. */
1422 return GlobalVisTestIsRemovableXid(vistest,
1424}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define InvalidOid

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesMVCCBatch()

int HeapTupleSatisfiesMVCCBatch ( Snapshot  snapshot,
Buffer  buffer,
int  ntups,
BatchMVCCState batchmvcc,
OffsetNumber vistuples_dense 
)
extern

Definition at line 1689 of file heapam_visibility.c.

1693{
1694 int nvis = 0;
1696
1697 Assert(IsMVCCSnapshot(snapshot));
1698
1699 for (int i = 0; i < ntups; i++)
1700 {
1701 bool valid;
1702 HeapTuple tup = &batchmvcc->tuples[i];
1703
1704 valid = HeapTupleSatisfiesMVCC(tup, snapshot, buffer, &state);
1705 batchmvcc->visible[i] = valid;
1706
1707 if (likely(valid))
1708 {
1709 vistuples_dense[nvis] = tup->t_self.ip_posid;
1710 nvis++;
1711 }
1712 }
1713
1714 if (state == SHB_ENABLED)
1715 BufferFinishSetHintBits(buffer, true, true);
1716
1717 return nvis;
1718}
void BufferFinishSetHintBits(Buffer buffer, bool mark_dirty, bool buffer_std)
Definition bufmgr.c:6937
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer, SetHintBitsState *state)
SetHintBitsState
@ SHB_ENABLED
@ SHB_INITIAL

References Assert, BufferFinishSetHintBits(), fb(), HeapTupleSatisfiesMVCC(), i, IsMVCCSnapshot, likely, SHB_ENABLED, and SHB_INITIAL.

Referenced by page_collect_tuples().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)
extern

Definition at line 511 of file heapam_visibility.c.

513{
514 HeapTupleHeader tuple = htup->t_data;
515
517 Assert(htup->t_tableOid != InvalidOid);
518
520 {
522 return TM_Invisible;
523
524 else if (!HeapTupleCleanMoved(tuple, buffer))
525 return TM_Invisible;
527 {
528 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
529 return TM_Invisible; /* inserted after scan started */
530
531 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
532 return TM_Ok;
533
535 {
536 TransactionId xmax;
537
538 xmax = HeapTupleHeaderGetRawXmax(tuple);
539
540 /*
541 * Careful here: even though this tuple was created by our own
542 * transaction, it might be locked by other transactions, if
543 * the original version was key-share locked when we updated
544 * it.
545 */
546
547 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
548 {
549 if (MultiXactIdIsRunning(xmax, true))
550 return TM_BeingModified;
551 else
552 return TM_Ok;
553 }
554
555 /*
556 * If the locker is gone, then there is nothing of interest
557 * left in this Xmax; otherwise, report the tuple as
558 * locked/updated.
559 */
560 if (!TransactionIdIsInProgress(xmax))
561 return TM_Ok;
562 return TM_BeingModified;
563 }
564
565 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
566 {
567 TransactionId xmax;
568
569 xmax = HeapTupleGetUpdateXid(tuple);
570
571 /* not LOCKED_ONLY, so it has to have an xmax */
573
574 /* deleting subtransaction must have aborted */
576 {
578 false))
579 return TM_BeingModified;
580 return TM_Ok;
581 }
582 else
583 {
584 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
585 return TM_SelfModified; /* updated after scan started */
586 else
587 return TM_Invisible; /* updated before scan started */
588 }
589 }
590
592 {
593 /* deleting subtransaction must have aborted */
594 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
596 return TM_Ok;
597 }
598
599 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
600 return TM_SelfModified; /* updated after scan started */
601 else
602 return TM_Invisible; /* updated before scan started */
603 }
605 return TM_Invisible;
607 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
609 else
610 {
611 /* it must have aborted or crashed */
612 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
614 return TM_Invisible;
615 }
616 }
617
618 /* by here, the inserting transaction has committed */
619
620 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
621 return TM_Ok;
622
623 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
624 {
626 return TM_Ok;
627 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
628 return TM_Updated; /* updated by other */
629 else
630 return TM_Deleted; /* deleted by other */
631 }
632
633 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
634 {
635 TransactionId xmax;
636
638 return TM_Ok;
639
641 {
643 return TM_BeingModified;
644
646 return TM_Ok;
647 }
648
649 xmax = HeapTupleGetUpdateXid(tuple);
650 if (!TransactionIdIsValid(xmax))
651 {
653 return TM_BeingModified;
654 }
655
656 /* not LOCKED_ONLY, so it has to have an xmax */
658
660 {
661 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
662 return TM_SelfModified; /* updated after scan started */
663 else
664 return TM_Invisible; /* updated before scan started */
665 }
666
668 return TM_BeingModified;
669
670 if (TransactionIdDidCommit(xmax))
671 {
672 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
673 return TM_Updated;
674 else
675 return TM_Deleted;
676 }
677
678 /*
679 * By here, the update in the Xmax is either aborted or crashed, but
680 * what about the other members?
681 */
682
684 {
685 /*
686 * There's no member, even just a locker, alive anymore, so we can
687 * mark the Xmax as invalid.
688 */
689 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
691 return TM_Ok;
692 }
693 else
694 {
695 /* There are lockers running */
696 return TM_BeingModified;
697 }
698 }
699
701 {
703 return TM_BeingModified;
704 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
705 return TM_SelfModified; /* updated after scan started */
706 else
707 return TM_Invisible; /* updated before scan started */
708 }
709
711 return TM_BeingModified;
712
714 {
715 /* it must have aborted or crashed */
716 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
718 return TM_Ok;
719 }
720
721 /* xmax transaction committed */
722
724 {
725 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
727 return TM_Ok;
728 }
729
730 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
732 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
733 return TM_Updated; /* updated by other */
734 else
735 return TM_Deleted; /* deleted by other */
736}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
#define HEAP_XMIN_INVALID
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:511

References Assert, HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)
extern

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)
extern

Definition at line 1147 of file heapam_visibility.c.

1148{
1149 HeapTupleHeader tuple = htup->t_data;
1150
1152 Assert(htup->t_tableOid != InvalidOid);
1154
1156
1157 /*
1158 * Has inserting transaction committed?
1159 *
1160 * If the inserting transaction aborted, then the tuple was never visible
1161 * to any other transaction, so we can delete it immediately.
1162 */
1163 if (!HeapTupleHeaderXminCommitted(tuple))
1164 {
1165 if (HeapTupleHeaderXminInvalid(tuple))
1166 return HEAPTUPLE_DEAD;
1167 else if (!HeapTupleCleanMoved(tuple, buffer))
1168 return HEAPTUPLE_DEAD;
1170 {
1171 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1173 /* only locked? run infomask-only check first, for performance */
1177 /* inserted and then deleted by same xact */
1180 /* deleting subtransaction must have aborted */
1182 }
1184 {
1185 /*
1186 * It'd be possible to discern between INSERT/DELETE in progress
1187 * here by looking at xmax - but that doesn't seem beneficial for
1188 * the majority of callers and even detrimental for some. We'd
1189 * rather have callers look at/wait for xmin than xmax. It's
1190 * always correct to return INSERT_IN_PROGRESS because that's
1191 * what's happening from the view of other backends.
1192 */
1194 }
1196 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1198 else
1199 {
1200 /*
1201 * Not in Progress, Not Committed, so either Aborted or crashed
1202 */
1203 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1205 return HEAPTUPLE_DEAD;
1206 }
1207
1208 /*
1209 * At this point the xmin is known committed, but we might not have
1210 * been able to set the hint bit yet; so we can no longer Assert that
1211 * it's set.
1212 */
1213 }
1214
1215 /*
1216 * Okay, the inserter committed, so it was good at some point. Now what
1217 * about the deleting transaction?
1218 */
1219 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1220 return HEAPTUPLE_LIVE;
1221
1223 {
1224 /*
1225 * "Deleting" xact really only locked it, so the tuple is live in any
1226 * case. However, we should make sure that either XMAX_COMMITTED or
1227 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1228 * examining the tuple for future xacts.
1229 */
1230 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1231 {
1232 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1233 {
1234 /*
1235 * If it's a pre-pg_upgrade tuple, the multixact cannot
1236 * possibly be running; otherwise have to check.
1237 */
1238 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1240 true))
1241 return HEAPTUPLE_LIVE;
1243 }
1244 else
1245 {
1247 return HEAPTUPLE_LIVE;
1248 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1250 }
1251 }
1252
1253 /*
1254 * We don't really care whether xmax did commit, abort or crash. We
1255 * know that xmax did lock the tuple, but it did not and will never
1256 * actually update it.
1257 */
1258
1259 return HEAPTUPLE_LIVE;
1260 }
1261
1262 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1263 {
1265
1266 /* already checked above */
1268
1269 /* not LOCKED_ONLY, so it has to have an xmax */
1271
1272 if (TransactionIdIsInProgress(xmax))
1274 else if (TransactionIdDidCommit(xmax))
1275 {
1276 /*
1277 * The multixact might still be running due to lockers. Need to
1278 * allow for pruning if below the xid horizon regardless --
1279 * otherwise we could end up with a tuple where the updater has to
1280 * be removed due to the horizon, but is not pruned away. It's
1281 * not a problem to prune that tuple, because any remaining
1282 * lockers will also be present in newer tuple versions.
1283 */
1284 *dead_after = xmax;
1286 }
1287 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1288 {
1289 /*
1290 * Not in Progress, Not Committed, so either Aborted or crashed.
1291 * Mark the Xmax as invalid.
1292 */
1294 }
1295
1296 return HEAPTUPLE_LIVE;
1297 }
1298
1299 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1300 {
1304 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1306 else
1307 {
1308 /*
1309 * Not in Progress, Not Committed, so either Aborted or crashed
1310 */
1311 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1313 return HEAPTUPLE_LIVE;
1314 }
1315
1316 /*
1317 * At this point the xmax is known committed, but we might not have
1318 * been able to set the hint bit yet; so we can no longer Assert that
1319 * it's set.
1320 */
1321 }
1322
1323 /*
1324 * Deleter committed, allow caller to check if it was recent enough that
1325 * some open transactions could still see the tuple.
1326 */
1329}

References Assert, fb(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_page_would_be_all_visible(), heap_prune_satisfies_vacuum(), heapam_scan_analyze_next_tuple(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)
extern

Definition at line 1731 of file heapam_visibility.c.

1732{
1733 switch (snapshot->snapshot_type)
1734 {
1735 case SNAPSHOT_MVCC:
1736 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer, NULL);
1737 case SNAPSHOT_SELF:
1738 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1739 case SNAPSHOT_ANY:
1740 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1741 case SNAPSHOT_TOAST:
1742 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1743 case SNAPSHOT_DIRTY:
1744 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1746 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1748 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1749 }
1750
1751 return false; /* keep compiler quiet */
1752}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition snapshot.h:114
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition snapshot.h:105
@ SNAPSHOT_DIRTY
Definition snapshot.h:98
SnapshotType snapshot_type
Definition snapshot.h:140

References fb(), HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)
extern

Definition at line 212 of file heapam_visibility.c.

214{
215 /*
216 * The uses from heapam.c rely on being able to perform the hint bit
217 * updates, which can only be guaranteed if we are holding an exclusive
218 * lock on the buffer - which all callers are doing.
219 */
221
222 SetHintBits(tuple, buffer, infomask, xid);
223}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:3003

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), fb(), and SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)
extern

Definition at line 2162 of file pruneheap.c.

2171{
2174 uint8 info;
2176
2177 /* The following local variables hold data registered in the WAL record: */
2181 xlhp_prune_items dead_items;
2184 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2186
2188
2189 xlrec.flags = 0;
2191
2192 /*
2193 * We can avoid an FPI of the heap page if the only modification we are
2194 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2195 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2196 * heap page with this record's LSN. Recovery skips records <= the stamped
2197 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2198 * torn page.
2199 */
2200 if (!do_prune &&
2201 nfrozen == 0 &&
2204
2205 /*
2206 * Prepare data for the buffer. The arrays are not actually in the
2207 * buffer, but we pretend that they are. When XLogInsert stores a full
2208 * page image, the arrays can be omitted.
2209 */
2212
2213 if (do_set_vm)
2214 XLogRegisterBuffer(1, vmbuffer, 0);
2215
2216 if (nfrozen > 0)
2217 {
2218 int nplans;
2219
2221
2222 /*
2223 * Prepare deduplicated representation for use in the WAL record. This
2224 * destructively sorts frozen tuples array in-place.
2225 */
2226 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2227
2228 freeze_plans.nplans = nplans;
2230 offsetof(xlhp_freeze_plans, plans));
2231 XLogRegisterBufData(0, plans,
2232 sizeof(xlhp_freeze_plan) * nplans);
2233 }
2234 if (nredirected > 0)
2235 {
2237
2238 redirect_items.ntargets = nredirected;
2241 XLogRegisterBufData(0, redirected,
2242 sizeof(OffsetNumber[2]) * nredirected);
2243 }
2244 if (ndead > 0)
2245 {
2246 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2247
2248 dead_items.ntargets = ndead;
2249 XLogRegisterBufData(0, &dead_items,
2251 XLogRegisterBufData(0, dead,
2252 sizeof(OffsetNumber) * ndead);
2253 }
2254 if (nunused > 0)
2255 {
2257
2258 unused_items.ntargets = nunused;
2261 XLogRegisterBufData(0, unused,
2262 sizeof(OffsetNumber) * nunused);
2263 }
2264 if (nfrozen > 0)
2266 sizeof(OffsetNumber) * nfrozen);
2267
2268 /*
2269 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2270 * flag above.
2271 */
2273 {
2274 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2276 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2277 }
2279 xlrec.flags |= XLHP_IS_CATALOG_REL;
2282 if (cleanup_lock)
2283 xlrec.flags |= XLHP_CLEANUP_LOCK;
2284 else
2285 {
2286 Assert(nredirected == 0 && ndead == 0);
2287 /* also, any items in 'unused' must've been LP_DEAD previously */
2288 }
2292
2293 switch (reason)
2294 {
2295 case PRUNE_ON_ACCESS:
2297 break;
2298 case PRUNE_VACUUM_SCAN:
2300 break;
2303 break;
2304 default:
2305 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2306 break;
2307 }
2308 recptr = XLogInsert(RM_HEAP2_ID, info);
2309
2310 if (do_set_vm)
2311 {
2312 Assert(BufferIsDirty(vmbuffer));
2313 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2314 }
2315
2316 /*
2317 * See comment at the top of the function about regbuf_flags_heap for
2318 * details on when we can advance the page LSN.
2319 */
2320 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2321 {
2322 Assert(BufferIsDirty(buffer));
2324 }
2325}
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2083
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References Assert, BufferGetPage(), BufferIsDirty(), data, elog, ERROR, fb(), heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)
extern

Definition at line 2114 of file heapam.c.

2115{
2116 if (bistate->current_buf != InvalidBuffer)
2117 ReleaseBuffer(bistate->current_buf);
2118 bistate->current_buf = InvalidBuffer;
2119
2120 /*
2121 * Despite the name, we also reset bulk relation extension state.
2122 * Otherwise we can end up erroring out due to looking for free space in
2123 * ->next_free of one partition, even though ->next_free was set when
2124 * extending another partition. It could obviously also be bad for
2125 * efficiency to look at existing blocks at offsets from another
2126 * partition, even if we don't error out.
2127 */
2128 bistate->next_free = InvalidBlockNumber;
2129 bistate->last_free = InvalidBlockNumber;
2130}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)
extern

Definition at line 5555 of file reorderbuffer.c.

5559{
5562 ForkNumber forkno;
5563 BlockNumber blockno;
5564 bool updated_mapping = false;
5565
5566 /*
5567 * Return unresolved if tuplecid_data is not valid. That's because when
5568 * streaming in-progress transactions we may run into tuples with the CID
5569 * before actually decoding them. Think e.g. about INSERT followed by
5570 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5571 * INSERT. So in such cases, we assume the CID is from the future
5572 * command.
5573 */
5574 if (tuplecid_data == NULL)
5575 return false;
5576
5577 /* be careful about padding */
5578 memset(&key, 0, sizeof(key));
5579
5580 Assert(!BufferIsLocal(buffer));
5581
5582 /*
5583 * get relfilelocator from the buffer, no convenient way to access it
5584 * other than that.
5585 */
5586 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5587
5588 /* tuples can only be in the main fork */
5589 Assert(forkno == MAIN_FORKNUM);
5590 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5591
5592 ItemPointerCopy(&htup->t_self,
5593 &key.tid);
5594
5595restart:
5598
5599 /*
5600 * failed to find a mapping, check whether the table was rewritten and
5601 * apply mapping if so, but only do that once - there can be no new
5602 * mappings while we are in here since we have to hold a lock on the
5603 * relation.
5604 */
5605 if (ent == NULL && !updated_mapping)
5606 {
5608 /* now check but don't update for a mapping again */
5609 updated_mapping = true;
5610 goto restart;
5611 }
5612 else if (ent == NULL)
5613 return false;
5614
5615 if (cmin)
5616 *cmin = ent->cmin;
5617 if (cmax)
5618 *cmax = ent->cmax;
5619 return true;
5620}
#define BufferIsLocal(buffer)
Definition buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_FIND
Definition hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition snapmgr.c:163

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 3277 of file heapam.c.

3278{
3279 TM_Result result;
3280 TM_FailureData tmfd;
3281
3282 result = heap_delete(relation, tid,
3284 true /* wait for commit */ ,
3285 &tmfd, false /* changingPart */ );
3286 switch (result)
3287 {
3288 case TM_SelfModified:
3289 /* Tuple was already updated in current command? */
3290 elog(ERROR, "tuple already updated by self");
3291 break;
3292
3293 case TM_Ok:
3294 /* done successfully */
3295 break;
3296
3297 case TM_Updated:
3298 elog(ERROR, "tuple concurrently updated");
3299 break;
3300
3301 case TM_Deleted:
3302 elog(ERROR, "tuple concurrently deleted");
3303 break;
3304
3305 default:
3306 elog(ERROR, "unrecognized heap_delete status: %u", result);
3307 break;
3308 }
3309}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition heapam.c:2854

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)
extern

Definition at line 2796 of file heapam.c.

2797{
2798 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2799}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition heapam.c:2152

References fb(), GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 4570 of file heapam.c.

4572{
4573 TM_Result result;
4574 TM_FailureData tmfd;
4575 LockTupleMode lockmode;
4576
4577 result = heap_update(relation, otid, tup,
4579 true /* wait for commit */ ,
4580 &tmfd, &lockmode, update_indexes);
4581 switch (result)
4582 {
4583 case TM_SelfModified:
4584 /* Tuple was already updated in current command? */
4585 elog(ERROR, "tuple already updated by self");
4586 break;
4587
4588 case TM_Ok:
4589 /* done successfully */
4590 break;
4591
4592 case TM_Updated:
4593 elog(ERROR, "tuple concurrently updated");
4594 break;
4595
4596 case TM_Deleted:
4597 elog(ERROR, "tuple concurrently deleted");
4598 break;
4599
4600 default:
4601 elog(ERROR, "unrecognized heap_update status: %u", result);
4602 break;
4603 }
4604}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition heapam.c:3323

References elog, ERROR, fb(), GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().