PostgreSQL Source Code git master
Loading...
Searching...
No Matches
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "commands/vacuum.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeParams
 
struct  PruneFreezeResult
 
struct  BatchMVCCState
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct GlobalVisState GlobalVisState
 
typedef struct TupleTableSlot TupleTableSlot
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeParams PruneFreezeParams
 
typedef struct PruneFreezeResult PruneFreezeResult
 
typedef struct BatchMVCCState BatchMVCCState
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, const ItemPointerData *tid)
 
void heap_abort_speculative (Relation relation, const ItemPointerData *tid)
 
TM_Result heap_update (Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, const ItemPointerData *tid)
 
void simple_heap_update (Relation relation, const ItemPointerData *otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, GlobalVisState *vistest)
 
int HeapTupleSatisfiesMVCCBatch (Snapshot snapshot, Buffer buffer, int ntups, BatchMVCCState *batchmvcc, OffsetNumber *vistuples_dense)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 138 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 137 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 39 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 40 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 44 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 43 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 51 of file heapam.h.

Typedef Documentation

◆ BatchMVCCState

◆ BitmapHeapScanDesc

Definition at line 110 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 46 of file heapam.h.

◆ GlobalVisState

Definition at line 47 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

Definition at line 102 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeParams

◆ PruneFreezeResult

◆ TupleTableSlot

Definition at line 48 of file heapam.h.

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 124 of file heapam.h.

125{
126 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
127 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
128 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
129 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
130 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition heapam.h:125
@ HEAPTUPLE_RECENTLY_DEAD
Definition heapam.h:128
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition heapam.h:129
@ HEAPTUPLE_LIVE
Definition heapam.h:127
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition heapam.h:130
@ HEAPTUPLE_DEAD
Definition heapam.h:126

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 226 of file heapam.h.

227{
228 PRUNE_ON_ACCESS, /* on-access pruning */
229 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
230 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition heapam.h:227
@ PRUNE_VACUUM_CLEANUP
Definition heapam.h:230
@ PRUNE_ON_ACCESS
Definition heapam.h:228
@ PRUNE_VACUUM_SCAN
Definition heapam.h:229

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)
extern

Definition at line 2092 of file heapam.c.

2093{
2094 if (bistate->current_buf != InvalidBuffer)
2095 ReleaseBuffer(bistate->current_buf);
2096 FreeAccessStrategy(bistate->strategy);
2097 pfree(bistate);
2098}
#define InvalidBuffer
Definition buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5502
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition freelist.c:643
void pfree(void *pointer)
Definition mcxt.c:1616
BufferAccessStrategy strategy
Definition hio.h:31
Buffer current_buf
Definition hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), MergePartitionsMoveRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )
extern

Definition at line 2075 of file heapam.c.

2076{
2077 BulkInsertState bistate;
2078
2081 bistate->current_buf = InvalidBuffer;
2082 bistate->next_free = InvalidBlockNumber;
2083 bistate->last_free = InvalidBlockNumber;
2084 bistate->already_extended_by = 0;
2085 return bistate;
2086}
#define InvalidBlockNumber
Definition block.h:33
@ BAS_BULKWRITE
Definition bufmgr.h:39
#define palloc_object(type)
Definition fe_memutils.h:74
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition freelist.c:461
struct BulkInsertStateData * BulkInsertState
Definition heapam.h:46
BlockNumber last_free
Definition hio.h:49
uint32 already_extended_by
Definition hio.h:50
BlockNumber next_free
Definition hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc_object, and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), MergePartitionsMoveRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6256 of file heapam.c.

6257{
6259 ItemId lp;
6260 HeapTupleData tp;
6261 Page page;
6262 BlockNumber block;
6263 Buffer buffer;
6264
6266
6267 block = ItemPointerGetBlockNumber(tid);
6268 buffer = ReadBuffer(relation, block);
6269 page = BufferGetPage(buffer);
6270
6272
6273 /*
6274 * Page can't be all visible, we just inserted into it, and are still
6275 * running.
6276 */
6277 Assert(!PageIsAllVisible(page));
6278
6281
6282 tp.t_tableOid = RelationGetRelid(relation);
6283 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6284 tp.t_len = ItemIdGetLength(lp);
6285 tp.t_self = *tid;
6286
6287 /*
6288 * Sanity check that the tuple really is a speculatively inserted tuple,
6289 * inserted by us.
6290 */
6291 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6292 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6293 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6294 elog(ERROR, "attempted to kill a non-speculative tuple");
6296
6297 /*
6298 * No need to check for serializable conflicts here. There is never a
6299 * need for a combo CID, either. No need to extract replica identity, or
6300 * do anything special with infomask bits.
6301 */
6302
6304
6305 /*
6306 * The tuple will become DEAD immediately. Flag that this page is a
6307 * candidate for pruning by setting xmin to TransactionXmin. While not
6308 * immediately prunable, it is the oldest xid we can cheaply determine
6309 * that's safe against wraparound / being older than the table's
6310 * relfrozenxid. To defend against the unlikely case of a new relation
6311 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6312 * if so (vacuum can't subsequently move relfrozenxid to beyond
6313 * TransactionXmin, so there's no race here).
6314 */
6316 {
6317 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6319
6320 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6321 prune_xid = relfrozenxid;
6322 else
6325 }
6326
6327 /* store transaction information of xact deleting the tuple */
6330
6331 /*
6332 * Set the tuple header xmin to InvalidTransactionId. This makes the
6333 * tuple immediately invisible everyone. (In particular, to any
6334 * transactions waiting on the speculative token, woken up later.)
6335 */
6337
6338 /* Clear the speculative insertion token too */
6339 tp.t_data->t_ctid = tp.t_self;
6340
6341 MarkBufferDirty(buffer);
6342
6343 /*
6344 * XLOG stuff
6345 *
6346 * The WAL records generated here match heap_delete(). The same recovery
6347 * routines are used.
6348 */
6349 if (RelationNeedsWAL(relation))
6350 {
6353
6355 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
6356 tp.t_data->t_infomask2);
6358 xlrec.xmax = xid;
6359
6363
6364 /* No replica identity & replication origin logged */
6365
6367
6368 PageSetLSN(page, recptr);
6369 }
6370
6372
6374
6375 if (HeapTupleHasExternal(&tp))
6376 {
6377 Assert(!IsToastRelation(relation));
6378 heap_toast_delete(relation, &tp, true);
6379 }
6380
6381 /*
6382 * Never need to mark tuple for invalidation, since catalogs don't support
6383 * speculative insertion
6384 */
6385
6386 /* Now we can release the buffer */
6387 ReleaseBuffer(buffer);
6388
6389 /* count deletion, as we counted the insertion too */
6390 pgstat_count_heap_delete(relation);
6391}
uint32 BlockNumber
Definition block.h:31
int Buffer
Definition buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition bufmgr.c:3057
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition bufmgr.c:865
static Page BufferGetPage(Buffer buffer)
Definition bufmgr.h:466
@ BUFFER_LOCK_EXCLUSIVE
Definition bufmgr.h:220
@ BUFFER_LOCK_UNLOCK
Definition bufmgr.h:205
static void LockBuffer(Buffer buffer, BufferLockMode mode)
Definition bufmgr.h:328
static bool PageIsAllVisible(const PageData *page)
Definition bufpage.h:428
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition bufpage.h:243
static void * PageGetItem(PageData *page, const ItemIdData *itemId)
Definition bufpage.h:353
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition bufpage.h:390
PageData * Page
Definition bufpage.h:81
#define PageSetPrunable(page, xid)
Definition bufpage.h:452
#define Assert(condition)
Definition c.h:906
uint32 TransactionId
Definition c.h:699
bool IsToastRelation(Relation relation)
Definition catalog.c:206
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition heapam.c:2799
#define XLOG_HEAP_DELETE
Definition heapam_xlog.h:34
#define SizeOfHeapDelete
#define XLH_DELETE_IS_SUPER
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
#define HEAP_XMAX_BITS
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
#define HEAP_MOVED
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
#define ItemIdGetLength(itemId)
Definition itemid.h:59
#define ItemIdIsNormal(itemId)
Definition itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition itemptr.h:83
#define START_CRIT_SECTION()
Definition miscadmin.h:150
#define END_CRIT_SECTION()
Definition miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
static int fb(int x)
#define RelationGetRelid(relation)
Definition rel.h:514
#define RelationNeedsWAL(relation)
Definition rel.h:637
TransactionId TransactionXmin
Definition snapmgr.c:159
ItemPointerData t_self
Definition htup.h:65
uint32 t_len
Definition htup.h:64
HeapTupleHeader t_data
Definition htup.h:68
Oid t_tableOid
Definition htup.h:66
TransactionId t_xmin
union HeapTupleHeaderData::@51 t_choice
ItemPointerData t_ctid
HeapTupleFields t_heap
Form_pg_class rd_rel
Definition rel.h:111
#define InvalidTransactionId
Definition transam.h:31
#define TransactionIdIsValid(xid)
Definition transam.h:41
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition transam.h:263
TransactionId GetCurrentTransactionId(void)
Definition xact.c:455
uint64 XLogRecPtr
Definition xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition xloginsert.c:368
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition xloginsert.c:245
void XLogBeginInsert(void)
Definition xloginsert.c:152
#define REGBUF_STANDARD
Definition xloginsert.h:35

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, fb(), xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)
extern

Definition at line 1164 of file heapam.c.

1168{
1169 HeapScanDesc scan;
1170
1171 /*
1172 * increment relation ref count while scanning relation
1173 *
1174 * This is just to make really sure the relcache entry won't go away while
1175 * the scan has a pointer to it. Caller should be holding the rel open
1176 * anyway, so this is redundant in all normal scenarios...
1177 */
1179
1180 /*
1181 * allocate and initialize scan descriptor
1182 */
1183 if (flags & SO_TYPE_BITMAPSCAN)
1184 {
1186
1187 /*
1188 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1189 * does not have, so no special initializations required here.
1190 */
1191 scan = (HeapScanDesc) bscan;
1192 }
1193 else
1195
1196 scan->rs_base.rs_rd = relation;
1197 scan->rs_base.rs_snapshot = snapshot;
1198 scan->rs_base.rs_nkeys = nkeys;
1199 scan->rs_base.rs_flags = flags;
1200 scan->rs_base.rs_parallel = parallel_scan;
1201 scan->rs_strategy = NULL; /* set in initscan */
1202 scan->rs_cbuf = InvalidBuffer;
1203
1204 /*
1205 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1206 */
1207 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1209
1210 /* Check that a historic snapshot is not used for non-catalog tables */
1211 if (snapshot &&
1212 IsHistoricMVCCSnapshot(snapshot) &&
1214 {
1215 ereport(ERROR,
1217 errmsg("cannot query non-catalog table \"%s\" during logical decoding",
1218 RelationGetRelationName(relation))));
1219 }
1220
1221 /*
1222 * For seqscan and sample scans in a serializable transaction, acquire a
1223 * predicate lock on the entire relation. This is required not only to
1224 * lock all the matching tuples, but also to conflict with new insertions
1225 * into the table. In an indexscan, we take page locks on the index pages
1226 * covering the range specified in the scan qual, but in a heap scan there
1227 * is nothing more fine-grained to lock. A bitmap scan is a different
1228 * story, there we have already scanned the index and locked the index
1229 * pages covering the predicate. But in that case we still have to lock
1230 * any matching heap tuples. For sample scan we could optimize the locking
1231 * to be at least page-level granularity, but we'd need to add per-tuple
1232 * locking for that.
1233 */
1235 {
1236 /*
1237 * Ensure a missing snapshot is noticed reliably, even if the
1238 * isolation mode means predicate locking isn't performed (and
1239 * therefore the snapshot isn't used here).
1240 */
1241 Assert(snapshot);
1242 PredicateLockRelation(relation, snapshot);
1243 }
1244
1245 /* we only need to set this up once */
1246 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1247
1248 /*
1249 * Allocate memory to keep track of page allocation for parallel workers
1250 * when doing a parallel scan.
1251 */
1252 if (parallel_scan != NULL)
1254 else
1256
1257 /*
1258 * we do this here instead of in initscan() because heap_rescan also calls
1259 * initscan() and we don't want to allocate memory again
1260 */
1261 if (nkeys > 0)
1262 scan->rs_base.rs_key = palloc_array(ScanKeyData, nkeys);
1263 else
1264 scan->rs_base.rs_key = NULL;
1265
1266 initscan(scan, key, false);
1267
1268 scan->rs_read_stream = NULL;
1269
1270 /*
1271 * Set up a read stream for sequential scans and TID range scans. This
1272 * should be done after initscan() because initscan() allocates the
1273 * BufferAccessStrategy object passed to the read stream API.
1274 */
1275 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1277 {
1279
1280 if (scan->rs_base.rs_parallel)
1282 else
1284
1285 /* ---
1286 * It is safe to use batchmode as the only locks taken by `cb`
1287 * are never taken while waiting for IO:
1288 * - SyncScanLock is used in the non-parallel case
1289 * - in the parallel case, only spinlocks and atomics are used
1290 * ---
1291 */
1294 scan->rs_strategy,
1295 scan->rs_base.rs_rd,
1297 cb,
1298 scan,
1299 0);
1300 }
1301 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1302 {
1305 scan->rs_strategy,
1306 scan->rs_base.rs_rd,
1309 scan,
1310 sizeof(TBMIterateResult));
1311 }
1312
1313
1314 return (TableScanDesc) scan;
1315}
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
#define ereport(elevel,...)
Definition elog.h:150
#define palloc_array(type, count)
Definition fe_memutils.h:76
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:252
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition heapam.c:292
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition heapam.c:317
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition heapam.c:357
struct HeapScanDescData * HeapScanDesc
Definition heapam.h:102
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition predicate.c:2574
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
#define READ_STREAM_USE_BATCHING
Definition read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition read_stream.h:77
#define READ_STREAM_DEFAULT
Definition read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition read_stream.h:36
#define RelationGetRelationName(relation)
Definition rel.h:548
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition rel.h:693
void RelationIncrementReferenceCount(Relation rel)
Definition relcache.c:2182
@ MAIN_FORKNUM
Definition relpath.h:58
#define IsHistoricMVCCSnapshot(snapshot)
Definition snapmgr.h:59
#define IsMVCCSnapshot(snapshot)
Definition snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition heapam.h:73
Buffer rs_cbuf
Definition heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition heapam.h:95
HeapTupleData rs_ctup
Definition heapam.h:75
ReadStream * rs_read_stream
Definition heapam.h:78
TableScanDescData rs_base
Definition heapam.h:58
Relation rs_rd
Definition relscan.h:35
uint32 rs_flags
Definition relscan.h:63
struct ScanKeyData * rs_key
Definition relscan.h:38
struct SnapshotData * rs_snapshot
Definition relscan.h:36
struct ParallelTableScanDescData * rs_parallel
Definition relscan.h:65
@ SO_TYPE_TIDRANGESCAN
Definition tableam.h:53
@ SO_TYPE_SAMPLESCAN
Definition tableam.h:51
@ SO_TYPE_SEQSCAN
Definition tableam.h:49
@ SO_TYPE_BITMAPSCAN
Definition tableam.h:50

References Assert, bitmapheap_stream_read_next(), ereport, errcode(), errmsg(), ERROR, fb(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsHistoricMVCCSnapshot, IsMVCCSnapshot, MAIN_FORKNUM, palloc_array, palloc_object, PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelationName, RelationGetRelid, RelationIncrementReferenceCount(), RelationIsAccessibleInLogicalDecoding, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
const ItemPointerData tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
bool  changingPart 
)
extern

Definition at line 2844 of file heapam.c.

2847{
2848 TM_Result result;
2850 ItemId lp;
2851 HeapTupleData tp;
2852 Page page;
2853 BlockNumber block;
2854 Buffer buffer;
2855 Buffer vmbuffer = InvalidBuffer;
2856 TransactionId new_xmax;
2859 bool have_tuple_lock = false;
2860 bool iscombo;
2861 bool all_visible_cleared = false;
2862 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2863 bool old_key_copied = false;
2864
2866
2867 AssertHasSnapshotForToast(relation);
2868
2869 /*
2870 * Forbid this during a parallel operation, lest it allocate a combo CID.
2871 * Other workers might need that combo CID for visibility checks, and we
2872 * have no provision for broadcasting it to them.
2873 */
2874 if (IsInParallelMode())
2875 ereport(ERROR,
2877 errmsg("cannot delete tuples during a parallel operation")));
2878
2879 block = ItemPointerGetBlockNumber(tid);
2880 buffer = ReadBuffer(relation, block);
2881 page = BufferGetPage(buffer);
2882
2883 /*
2884 * Before locking the buffer, pin the visibility map page if it appears to
2885 * be necessary. Since we haven't got the lock yet, someone else might be
2886 * in the middle of changing this, so we'll need to recheck after we have
2887 * the lock.
2888 */
2889 if (PageIsAllVisible(page))
2890 visibilitymap_pin(relation, block, &vmbuffer);
2891
2893
2896
2897 tp.t_tableOid = RelationGetRelid(relation);
2898 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2899 tp.t_len = ItemIdGetLength(lp);
2900 tp.t_self = *tid;
2901
2902l1:
2903
2904 /*
2905 * If we didn't pin the visibility map page and the page has become all
2906 * visible while we were busy locking the buffer, we'll have to unlock and
2907 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2908 * unfortunate, but hopefully shouldn't happen often.
2909 */
2910 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2911 {
2913 visibilitymap_pin(relation, block, &vmbuffer);
2915 }
2916
2917 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2918
2919 if (result == TM_Invisible)
2920 {
2921 UnlockReleaseBuffer(buffer);
2922 ereport(ERROR,
2924 errmsg("attempted to delete invisible tuple")));
2925 }
2926 else if (result == TM_BeingModified && wait)
2927 {
2930
2931 /* must copy state data before unlocking buffer */
2934
2935 /*
2936 * Sleep until concurrent transaction ends -- except when there's a
2937 * single locker and it's our own transaction. Note we don't care
2938 * which lock mode the locker has, because we need the strongest one.
2939 *
2940 * Before sleeping, we need to acquire tuple lock to establish our
2941 * priority for the tuple (see heap_lock_tuple). LockTuple will
2942 * release us when we are next-in-line for the tuple.
2943 *
2944 * If we are forced to "start over" below, we keep the tuple lock;
2945 * this arranges that we stay at the head of the line while rechecking
2946 * tuple state.
2947 */
2949 {
2950 bool current_is_member = false;
2951
2954 {
2956
2957 /*
2958 * Acquire the lock, if necessary (but skip it when we're
2959 * requesting a lock and already have one; avoids deadlock).
2960 */
2961 if (!current_is_member)
2964
2965 /* wait for multixact */
2967 relation, &(tp.t_self), XLTW_Delete,
2968 NULL);
2970
2971 /*
2972 * If xwait had just locked the tuple then some other xact
2973 * could update this tuple before we get to this point. Check
2974 * for xmax change, and start over if so.
2975 *
2976 * We also must start over if we didn't pin the VM page, and
2977 * the page has become all visible.
2978 */
2979 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2982 xwait))
2983 goto l1;
2984 }
2985
2986 /*
2987 * You might think the multixact is necessarily done here, but not
2988 * so: it could have surviving members, namely our own xact or
2989 * other subxacts of this backend. It is legal for us to delete
2990 * the tuple in either case, however (the latter case is
2991 * essentially a situation of upgrading our former shared lock to
2992 * exclusive). We don't bother changing the on-disk hint bits
2993 * since we are about to overwrite the xmax altogether.
2994 */
2995 }
2997 {
2998 /*
2999 * Wait for regular transaction to end; but first, acquire tuple
3000 * lock.
3001 */
3005 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
3007
3008 /*
3009 * xwait is done, but if xwait had just locked the tuple then some
3010 * other xact could update this tuple before we get to this point.
3011 * Check for xmax change, and start over if so.
3012 *
3013 * We also must start over if we didn't pin the VM page, and the
3014 * page has become all visible.
3015 */
3016 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
3019 xwait))
3020 goto l1;
3021
3022 /* Otherwise check if it committed or aborted */
3023 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
3024 }
3025
3026 /*
3027 * We may overwrite if previous xmax aborted, or if it committed but
3028 * only locked the tuple without updating it.
3029 */
3030 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3033 result = TM_Ok;
3034 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
3035 result = TM_Updated;
3036 else
3037 result = TM_Deleted;
3038 }
3039
3040 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3041 if (result != TM_Ok)
3042 {
3043 Assert(result == TM_SelfModified ||
3044 result == TM_Updated ||
3045 result == TM_Deleted ||
3046 result == TM_BeingModified);
3048 Assert(result != TM_Updated ||
3050 }
3051
3052 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3053 {
3054 /* Perform additional check for transaction-snapshot mode RI updates */
3055 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
3056 result = TM_Updated;
3057 }
3058
3059 if (result != TM_Ok)
3060 {
3061 tmfd->ctid = tp.t_data->t_ctid;
3063 if (result == TM_SelfModified)
3065 else
3066 tmfd->cmax = InvalidCommandId;
3067 UnlockReleaseBuffer(buffer);
3068 if (have_tuple_lock)
3070 if (vmbuffer != InvalidBuffer)
3071 ReleaseBuffer(vmbuffer);
3072 return result;
3073 }
3074
3075 /*
3076 * We're about to do the actual delete -- check for conflict first, to
3077 * avoid possibly having to roll back work we've just done.
3078 *
3079 * This is safe without a recheck as long as there is no possibility of
3080 * another process scanning the page between this check and the delete
3081 * being visible to the scan (i.e., an exclusive buffer content lock is
3082 * continuously held from this point until the tuple delete is visible).
3083 */
3085
3086 /* replace cid with a combo CID if necessary */
3088
3089 /*
3090 * Compute replica identity tuple before entering the critical section so
3091 * we don't PANIC upon a memory allocation failure.
3092 */
3093 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
3094
3095 /*
3096 * If this is the first possibly-multixact-able operation in the current
3097 * transaction, set my per-backend OldestMemberMXactId setting. We can be
3098 * certain that the transaction will never become a member of any older
3099 * MultiXactIds than that. (We have to do this even if we end up just
3100 * using our own TransactionId below, since some other backend could
3101 * incorporate our XID into a MultiXact immediately afterwards.)
3102 */
3104
3107 xid, LockTupleExclusive, true,
3108 &new_xmax, &new_infomask, &new_infomask2);
3109
3111
3112 /*
3113 * If this transaction commits, the tuple will become DEAD sooner or
3114 * later. Set flag that this page is a candidate for pruning once our xid
3115 * falls below the OldestXmin horizon. If the transaction finally aborts,
3116 * the subsequent page pruning will be a no-op and the hint will be
3117 * cleared.
3118 */
3119 PageSetPrunable(page, xid);
3120
3121 if (PageIsAllVisible(page))
3122 {
3123 all_visible_cleared = true;
3124 PageClearAllVisible(page);
3125 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3126 vmbuffer, VISIBILITYMAP_VALID_BITS);
3127 }
3128
3129 /* store transaction information of xact deleting the tuple */
3135 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3137 /* Make sure there is no forward chain link in t_ctid */
3138 tp.t_data->t_ctid = tp.t_self;
3139
3140 /* Signal that this is actually a move into another partition */
3141 if (changingPart)
3143
3144 MarkBufferDirty(buffer);
3145
3146 /*
3147 * XLOG stuff
3148 *
3149 * NB: heap_abort_speculative() uses the same xlog record and replay
3150 * routines.
3151 */
3152 if (RelationNeedsWAL(relation))
3153 {
3157
3158 /*
3159 * For logical decode we need combo CIDs to properly decode the
3160 * catalog
3161 */
3163 log_heap_new_cid(relation, &tp);
3164
3165 xlrec.flags = 0;
3168 if (changingPart)
3170 xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
3171 tp.t_data->t_infomask2);
3173 xlrec.xmax = new_xmax;
3174
3175 if (old_key_tuple != NULL)
3176 {
3177 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3179 else
3181 }
3182
3185
3187
3188 /*
3189 * Log replica identity of the deleted tuple if there is one
3190 */
3191 if (old_key_tuple != NULL)
3192 {
3193 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3194 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3195 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3196
3198 XLogRegisterData((char *) old_key_tuple->t_data
3200 old_key_tuple->t_len
3202 }
3203
3204 /* filtering by origin on a row level is much more efficient */
3206
3208
3209 PageSetLSN(page, recptr);
3210 }
3211
3213
3215
3216 if (vmbuffer != InvalidBuffer)
3217 ReleaseBuffer(vmbuffer);
3218
3219 /*
3220 * If the tuple has toasted out-of-line attributes, we need to delete
3221 * those items too. We have to do this before releasing the buffer
3222 * because we need to look at the contents of the tuple, but it's OK to
3223 * release the content lock on the buffer first.
3224 */
3225 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3226 relation->rd_rel->relkind != RELKIND_MATVIEW)
3227 {
3228 /* toast table entries should never be recursively toasted */
3230 }
3231 else if (HeapTupleHasExternal(&tp))
3232 heap_toast_delete(relation, &tp, false);
3233
3234 /*
3235 * Mark tuple for invalidation from system caches at next command
3236 * boundary. We have to do this before releasing the buffer because we
3237 * need to look at the contents of the tuple.
3238 */
3239 CacheInvalidateHeapTuple(relation, &tp, NULL);
3240
3241 /* Now we can release the buffer */
3242 ReleaseBuffer(buffer);
3243
3244 /*
3245 * Release the lmgr tuple lock, if we had it.
3246 */
3247 if (have_tuple_lock)
3249
3250 pgstat_count_heap_delete(relation);
3251
3254
3255 return TM_Ok;
3256}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition bufmgr.c:4357
void UnlockReleaseBuffer(Buffer buffer)
Definition bufmgr.c:5519
static void PageClearAllVisible(Page page)
Definition bufpage.h:438
#define InvalidCommandId
Definition c.h:716
TransactionId MultiXactId
Definition c.h:709
uint16_t uint16
Definition c.h:578
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition combocid.c:118
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition heapam.c:7677
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition heapam.c:9142
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition heapam.c:5396
static bool heap_acquire_tuplock(Relation relation, const ItemPointerData *tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition heapam.c:5347
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition heapam.c:9223
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper, int *remaining)
Definition heapam.c:7855
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition heapam.c:2821
#define UnlockTupleTuplock(rel, tup, mode)
Definition heapam.c:169
static void AssertHasSnapshotForToast(Relation rel)
Definition heapam.c:225
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition heapam.c:2053
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
#define XLH_DELETE_ALL_VISIBLE_CLEARED
#define SizeOfHeapHeader
#define XLH_DELETE_IS_PARTITION_MOVE
#define XLH_DELETE_CONTAINS_OLD_TUPLE
void heap_freetuple(HeapTuple htup)
Definition heaptuple.c:1435
#define SizeofHeapTupleHeader
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_IS_MULTI
#define HEAP_XMAX_INVALID
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition inval.c:1571
bool ItemPointerEquals(const ItemPointerData *pointer1, const ItemPointerData *pointer2)
Definition itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, const ItemPointerData *ctid, XLTW_Oper oper)
Definition lmgr.c:663
@ XLTW_Delete
Definition lmgr.h:28
@ LockWaitBlock
Definition lockoptions.h:40
@ LockTupleExclusive
Definition lockoptions.h:59
void MultiXactIdSetOldestMember(void)
Definition multixact.c:575
@ MultiXactStatusUpdate
Definition multixact.h:45
void CheckForSerializableConflictIn(Relation relation, const ItemPointerData *tid, BlockNumber blkno)
Definition predicate.c:4334
#define InvalidSnapshot
Definition snapshot.h:119
TransactionId xmax
Definition tableam.h:150
CommandId cmax
Definition tableam.h:151
ItemPointerData ctid
Definition tableam.h:149
TM_Result
Definition tableam.h:73
@ TM_Ok
Definition tableam.h:78
@ TM_BeingModified
Definition tableam.h:100
@ TM_Deleted
Definition tableam.h:93
@ TM_Updated
Definition tableam.h:90
@ TM_SelfModified
Definition tableam.h:84
@ TM_Invisible
Definition tableam.h:81
#define TransactionIdEquals(id1, id2)
Definition transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition xact.c:942
bool IsInParallelMode(void)
Definition xact.c:1090
#define XLOG_INCLUDE_ORIGIN
Definition xlog.h:165
void XLogSetRecordFlags(uint8 flags)
Definition xloginsert.c:460

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)
extern

Definition at line 1371 of file heapam.c.

1372{
1374
1375 /* Note: no locking manipulations needed */
1376
1377 /*
1378 * unpin scan buffers
1379 */
1380 if (BufferIsValid(scan->rs_cbuf))
1381 ReleaseBuffer(scan->rs_cbuf);
1382
1383 /*
1384 * Must free the read stream before freeing the BufferAccessStrategy.
1385 */
1386 if (scan->rs_read_stream)
1388
1389 /*
1390 * decrement relation reference count and free scan descriptor storage
1391 */
1393
1394 if (scan->rs_base.rs_key)
1395 pfree(scan->rs_base.rs_key);
1396
1397 if (scan->rs_strategy != NULL)
1399
1400 if (scan->rs_parallelworkerdata != NULL)
1402
1403 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1405
1406 pfree(scan);
1407}
static bool BufferIsValid(Buffer bufnum)
Definition bufmgr.h:417
void read_stream_end(ReadStream *stream)
void RelationDecrementReferenceCount(Relation rel)
Definition relcache.c:2195
void UnregisterSnapshot(Snapshot snapshot)
Definition snapmgr.c:866
@ SO_TEMP_SNAPSHOT
Definition tableam.h:65

References BufferIsValid(), fb(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 493 of file heapam.h.

494{
495 HeapTupleHeaderSetXmax(tuple, frz->xmax);
496
497 if (frz->frzflags & XLH_FREEZE_XVAC)
499
500 if (frz->frzflags & XLH_INVALID_XVAC)
502
503 tuple->t_infomask = frz->t_infomask;
504 tuple->t_infomask2 = frz->t_infomask2;
505}
#define XLH_INVALID_XVAC
#define XLH_FREEZE_XVAC
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
#define FrozenTransactionId
Definition transam.h:33

References fb(), FrozenTransactionId, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)
extern

Definition at line 1659 of file heapam.c.

1664{
1665 ItemPointer tid = &(tuple->t_self);
1666 ItemId lp;
1667 Buffer buffer;
1668 Page page;
1669 OffsetNumber offnum;
1670 bool valid;
1671
1672 /*
1673 * Fetch and pin the appropriate page of the relation.
1674 */
1675 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1676
1677 /*
1678 * Need share lock on buffer to examine tuple commit status.
1679 */
1681 page = BufferGetPage(buffer);
1682
1683 /*
1684 * We'd better check for out-of-range offnum in case of VACUUM since the
1685 * TID was obtained.
1686 */
1687 offnum = ItemPointerGetOffsetNumber(tid);
1689 {
1691 ReleaseBuffer(buffer);
1693 tuple->t_data = NULL;
1694 return false;
1695 }
1696
1697 /*
1698 * get the item line pointer corresponding to the requested tid
1699 */
1700 lp = PageGetItemId(page, offnum);
1701
1702 /*
1703 * Must check for deleted tuple.
1704 */
1705 if (!ItemIdIsNormal(lp))
1706 {
1708 ReleaseBuffer(buffer);
1710 tuple->t_data = NULL;
1711 return false;
1712 }
1713
1714 /*
1715 * fill in *tuple fields
1716 */
1717 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1718 tuple->t_len = ItemIdGetLength(lp);
1719 tuple->t_tableOid = RelationGetRelid(relation);
1720
1721 /*
1722 * check tuple visibility, then release lock
1723 */
1724 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1725
1726 if (valid)
1727 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1729
1730 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1731
1733
1734 if (valid)
1735 {
1736 /*
1737 * All checks passed, so return the tuple as valid. Caller is now
1738 * responsible for releasing the buffer.
1739 */
1740 *userbuf = buffer;
1741
1742 return true;
1743 }
1744
1745 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1746 if (keep_buf)
1747 *userbuf = buffer;
1748 else
1749 {
1750 ReleaseBuffer(buffer);
1752 tuple->t_data = NULL;
1753 }
1754
1755 return false;
1756}
@ BUFFER_LOCK_SHARE
Definition bufmgr.h:210
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition bufpage.h:371
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition heapam.c:9327
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
uint16 OffsetNumber
Definition off.h:24
void PredicateLockTID(Relation relation, const ItemPointerData *tid, Snapshot snapshot, TransactionId tuple_xid)
Definition predicate.c:2619

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), fb(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 6169 of file heapam.c.

6170{
6171 Buffer buffer;
6172 Page page;
6173 OffsetNumber offnum;
6174 ItemId lp;
6175 HeapTupleHeader htup;
6176
6177 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6179 page = BufferGetPage(buffer);
6180
6181 offnum = ItemPointerGetOffsetNumber(tid);
6183 elog(ERROR, "offnum out of range");
6184 lp = PageGetItemId(page, offnum);
6185 if (!ItemIdIsNormal(lp))
6186 elog(ERROR, "invalid lp");
6187
6188 htup = (HeapTupleHeader) PageGetItem(page, lp);
6189
6190 /* NO EREPORT(ERROR) from here till changes are logged */
6192
6194
6195 MarkBufferDirty(buffer);
6196
6197 /*
6198 * Replace the speculative insertion token with a real t_ctid, pointing to
6199 * itself like it does on regular tuples.
6200 */
6201 htup->t_ctid = *tid;
6202
6203 /* XLOG stuff */
6204 if (RelationNeedsWAL(relation))
6205 {
6208
6210
6212
6213 /* We want the same filtering on this as on a plain insert */
6215
6218
6220
6221 PageSetLSN(page, recptr);
6222 }
6223
6225
6226 UnlockReleaseBuffer(buffer);
6227}
#define SizeOfHeapConfirm
#define XLOG_HEAP_CONFIRM
Definition heapam_xlog.h:38
OffsetNumber offnum

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, fb(), HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7462 of file heapam.c.

7463{
7464 Page page = BufferGetPage(buffer);
7465
7466 for (int i = 0; i < ntuples; i++)
7467 {
7468 HeapTupleFreeze *frz = tuples + i;
7469 ItemId itemid = PageGetItemId(page, frz->offset);
7470 HeapTupleHeader htup;
7471
7472 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7474 }
7475}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition heapam.h:493
int i
Definition isn.c:77

References BufferGetPage(), fb(), heap_execute_freeze_tuple(), i, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)
extern

Definition at line 7484 of file heapam.c.

7487{
7489 bool do_freeze;
7490 bool totally_frozen;
7491 struct VacuumCutoffs cutoffs;
7492 HeapPageFreeze pagefrz;
7493
7494 cutoffs.relfrozenxid = relfrozenxid;
7495 cutoffs.relminmxid = relminmxid;
7496 cutoffs.OldestXmin = FreezeLimit;
7497 cutoffs.OldestMxact = MultiXactCutoff;
7498 cutoffs.FreezeLimit = FreezeLimit;
7499 cutoffs.MultiXactCutoff = MultiXactCutoff;
7500
7501 pagefrz.freeze_required = true;
7502 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7503 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7504 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7505 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7506
7507 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7508 &pagefrz, &frz, &totally_frozen);
7509
7510 /*
7511 * Note that because this is not a WAL-logged operation, we don't need to
7512 * fill in the offset in the freeze record.
7513 */
7514
7515 if (do_freeze)
7517 return do_freeze;
7518}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition heapam.c:7136
bool freeze_required
Definition heapam.h:182
TransactionId FreezeLimit
Definition vacuum.h:289
TransactionId relfrozenxid
Definition vacuum.h:263
MultiXactId relminmxid
Definition vacuum.h:264
MultiXactId MultiXactCutoff
Definition vacuum.h:290

References fb(), VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)
extern

Definition at line 1931 of file heapam.c.

1933{
1934 Relation relation = sscan->rs_rd;
1935 Snapshot snapshot = sscan->rs_snapshot;
1936 ItemPointerData ctid;
1938
1939 /*
1940 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1941 * Assume that t_ctid links are valid however - there shouldn't be invalid
1942 * ones in the table.
1943 */
1945
1946 /*
1947 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1948 * need to examine, and *tid is the TID we will return if ctid turns out
1949 * to be bogus.
1950 *
1951 * Note that we will loop until we reach the end of the t_ctid chain.
1952 * Depending on the snapshot passed, there might be at most one visible
1953 * version of the row, but we don't try to optimize for that.
1954 */
1955 ctid = *tid;
1956 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1957 for (;;)
1958 {
1959 Buffer buffer;
1960 Page page;
1961 OffsetNumber offnum;
1962 ItemId lp;
1963 HeapTupleData tp;
1964 bool valid;
1965
1966 /*
1967 * Read, pin, and lock the page.
1968 */
1969 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1971 page = BufferGetPage(buffer);
1972
1973 /*
1974 * Check for bogus item number. This is not treated as an error
1975 * condition because it can happen while following a t_ctid link. We
1976 * just assume that the prior tid is OK and return it unchanged.
1977 */
1978 offnum = ItemPointerGetOffsetNumber(&ctid);
1980 {
1981 UnlockReleaseBuffer(buffer);
1982 break;
1983 }
1984 lp = PageGetItemId(page, offnum);
1985 if (!ItemIdIsNormal(lp))
1986 {
1987 UnlockReleaseBuffer(buffer);
1988 break;
1989 }
1990
1991 /* OK to access the tuple */
1992 tp.t_self = ctid;
1993 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1994 tp.t_len = ItemIdGetLength(lp);
1995 tp.t_tableOid = RelationGetRelid(relation);
1996
1997 /*
1998 * After following a t_ctid link, we might arrive at an unrelated
1999 * tuple. Check for XMIN match.
2000 */
2003 {
2004 UnlockReleaseBuffer(buffer);
2005 break;
2006 }
2007
2008 /*
2009 * Check tuple visibility; if visible, set it as the new result
2010 * candidate.
2011 */
2012 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
2013 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
2014 if (valid)
2015 *tid = ctid;
2016
2017 /*
2018 * If there's a valid t_ctid link, follow it, else we're done.
2019 */
2020 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2024 {
2025 UnlockReleaseBuffer(buffer);
2026 break;
2027 }
2028
2029 ctid = tp.t_data->t_ctid;
2031 UnlockReleaseBuffer(buffer);
2032 } /* end of loop */
2033}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), fb(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)
extern

Definition at line 1907 of file pruneheap.c.

1908{
1909 OffsetNumber offnum,
1910 maxoff;
1911
1914
1915 maxoff = PageGetMaxOffsetNumber(page);
1916 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1917 {
1918 ItemId lp = PageGetItemId(page, offnum);
1919 HeapTupleHeader htup;
1922
1923 /* skip unused and dead items */
1924 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1925 continue;
1926
1927 if (ItemIdIsNormal(lp))
1928 {
1929 htup = (HeapTupleHeader) PageGetItem(page, lp);
1930
1931 /*
1932 * Check if this tuple is part of a HOT-chain rooted at some other
1933 * tuple. If so, skip it for now; we'll process it when we find
1934 * its root.
1935 */
1936 if (HeapTupleHeaderIsHeapOnly(htup))
1937 continue;
1938
1939 /*
1940 * This is either a plain tuple or the root of a HOT-chain.
1941 * Remember it in the mapping.
1942 */
1943 root_offsets[offnum - 1] = offnum;
1944
1945 /* If it's not the start of a HOT-chain, we're done with it */
1946 if (!HeapTupleHeaderIsHotUpdated(htup))
1947 continue;
1948
1949 /* Set up to scan the HOT-chain */
1952 }
1953 else
1954 {
1955 /* Must be a redirect item. We do not set its root_offsets entry */
1957 /* Set up to scan the HOT-chain */
1960 }
1961
1962 /*
1963 * Now follow the HOT-chain and collect other tuples in the chain.
1964 *
1965 * Note: Even though this is a nested loop, the complexity of the
1966 * function is O(N) because a tuple in the page should be visited not
1967 * more than twice, once in the outer loop and once in HOT-chain
1968 * chases.
1969 */
1970 for (;;)
1971 {
1972 /* Sanity check (pure paranoia) */
1973 if (offnum < FirstOffsetNumber)
1974 break;
1975
1976 /*
1977 * An offset past the end of page's line pointer array is possible
1978 * when the array was truncated
1979 */
1980 if (offnum > maxoff)
1981 break;
1982
1983 lp = PageGetItemId(page, nextoffnum);
1984
1985 /* Check for broken chains */
1986 if (!ItemIdIsNormal(lp))
1987 break;
1988
1989 htup = (HeapTupleHeader) PageGetItem(page, lp);
1990
1993 break;
1994
1995 /* Remember the root line pointer for this item */
1996 root_offsets[nextoffnum - 1] = offnum;
1997
1998 /* Advance to next chain member, if any */
1999 if (!HeapTupleHeaderIsHotUpdated(htup))
2000 break;
2001
2002 /* HOT implies it can't have moved to different partition */
2004
2007 }
2008 }
2009}
#define MemSet(start, val, len)
Definition c.h:1056
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
#define MaxHeapTuplesPerPage
#define ItemIdGetRedirect(itemId)
Definition itemid.h:78
#define ItemIdIsDead(itemId)
Definition itemid.h:113
#define ItemIdIsUsed(itemId)
Definition itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition itemid.h:106
#define InvalidOffsetNumber
Definition off.h:26
#define OffsetNumberNext(offsetNumber)
Definition off.h:52
#define FirstOffsetNumber
Definition off.h:27

References Assert, fb(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)
extern

Definition at line 1410 of file heapam.c.

1411{
1413
1414 /*
1415 * This is still widely used directly, without going through table AM, so
1416 * add a safety check. It's possible we should, at a later point,
1417 * downgrade this to an assert. The reason for checking the AM routine,
1418 * rather than the AM oid, is that this allows to write regression tests
1419 * that create another AM reusing the heap handler.
1420 */
1421 if (unlikely(sscan->rs_rd->rd_tableam != GetHeapamTableAmRoutine()))
1422 ereport(ERROR,
1424 errmsg_internal("only heap AM is supported")));
1425
1426 /* Note: no locking manipulations needed */
1427
1429 heapgettup_pagemode(scan, direction,
1430 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1431 else
1432 heapgettup(scan, direction,
1433 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1434
1435 if (scan->rs_ctup.t_data == NULL)
1436 return NULL;
1437
1438 /*
1439 * if we get here it means we have a new current scan tuple, so point to
1440 * the proper return buffer and return the tuple.
1441 */
1442
1444
1445 return &scan->rs_ctup;
1446}
#define unlikely(x)
Definition c.h:424
int int errmsg_internal(const char *fmt,...) pg_attribute_printf(1
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:960
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition heapam.c:1070
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition pgstat.h:699
@ SO_ALLOW_PAGEMODE
Definition tableam.h:62

References ereport, errcode(), errmsg_internal(), ERROR, fb(), GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1449 of file heapam.c.

1450{
1452
1453 /* Note: no locking manipulations needed */
1454
1455 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1456 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1457 else
1458 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1459
1460 if (scan->rs_ctup.t_data == NULL)
1461 {
1462 ExecClearTuple(slot);
1463 return false;
1464 }
1465
1466 /*
1467 * if we get here it means we have a new current scan tuple, so point to
1468 * the proper return buffer and return the tuple.
1469 */
1470
1472
1473 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1474 scan->rs_cbuf);
1475 return true;
1476}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition tuptable.h:457

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)
extern

Definition at line 1552 of file heapam.c.

1554{
1556 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1557 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1558
1559 /* Note: no locking manipulations needed */
1560 for (;;)
1561 {
1562 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1563 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1564 else
1565 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1566
1567 if (scan->rs_ctup.t_data == NULL)
1568 {
1569 ExecClearTuple(slot);
1570 return false;
1571 }
1572
1573 /*
1574 * heap_set_tidrange will have used heap_setscanlimits to limit the
1575 * range of pages we scan to only ones that can contain the TID range
1576 * we're scanning for. Here we must filter out any tuples from these
1577 * pages that are outside of that range.
1578 */
1579 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1580 {
1581 ExecClearTuple(slot);
1582
1583 /*
1584 * When scanning backwards, the TIDs will be in descending order.
1585 * Future tuples in this direction will be lower still, so we can
1586 * just return false to indicate there will be no more tuples.
1587 */
1588 if (ScanDirectionIsBackward(direction))
1589 return false;
1590
1591 continue;
1592 }
1593
1594 /*
1595 * Likewise for the final page, we must filter out TIDs greater than
1596 * maxtid.
1597 */
1598 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1599 {
1600 ExecClearTuple(slot);
1601
1602 /*
1603 * When scanning forward, the TIDs will be in ascending order.
1604 * Future tuples in this direction will be higher still, so we can
1605 * just return false to indicate there will be no more tuples.
1606 */
1607 if (ScanDirectionIsForward(direction))
1608 return false;
1609 continue;
1610 }
1611
1612 break;
1613 }
1614
1615 /*
1616 * if we get here it means we have a new current scan tuple, so point to
1617 * the proper return buffer and return the tuple.
1618 */
1620
1621 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1622 return true;
1623}
int32 ItemPointerCompare(const ItemPointerData *arg1, const ItemPointerData *arg2)
Definition itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition sdir.h:50

References ExecClearTuple(), ExecStoreBufferHeapTuple(), fb(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)
extern

Definition at line 1779 of file heapam.c.

1782{
1783 Page page = BufferGetPage(buffer);
1785 BlockNumber blkno;
1786 OffsetNumber offnum;
1787 bool at_chain_start;
1788 bool valid;
1789 bool skip;
1790 GlobalVisState *vistest = NULL;
1791
1792 /* If this is not the first call, previous call returned a (live!) tuple */
1793 if (all_dead)
1795
1796 blkno = ItemPointerGetBlockNumber(tid);
1797 offnum = ItemPointerGetOffsetNumber(tid);
1799 skip = !first_call;
1800
1801 /* XXX: we should assert that a snapshot is pushed or registered */
1803 Assert(BufferGetBlockNumber(buffer) == blkno);
1804
1805 /* Scan through possible multiple members of HOT-chain */
1806 for (;;)
1807 {
1808 ItemId lp;
1809
1810 /* check for bogus TID */
1812 break;
1813
1814 lp = PageGetItemId(page, offnum);
1815
1816 /* check for unused, dead, or redirected items */
1817 if (!ItemIdIsNormal(lp))
1818 {
1819 /* We should only see a redirect at start of chain */
1821 {
1822 /* Follow the redirect */
1823 offnum = ItemIdGetRedirect(lp);
1824 at_chain_start = false;
1825 continue;
1826 }
1827 /* else must be end of chain */
1828 break;
1829 }
1830
1831 /*
1832 * Update heapTuple to point to the element of the HOT chain we're
1833 * currently investigating. Having t_self set correctly is important
1834 * because the SSI checks and the *Satisfies routine for historical
1835 * MVCC snapshots need the correct tid to decide about the visibility.
1836 */
1837 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1838 heapTuple->t_len = ItemIdGetLength(lp);
1839 heapTuple->t_tableOid = RelationGetRelid(relation);
1840 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1841
1842 /*
1843 * Shouldn't see a HEAP_ONLY tuple at chain start.
1844 */
1846 break;
1847
1848 /*
1849 * The xmin should match the previous xmax value, else chain is
1850 * broken.
1851 */
1855 break;
1856
1857 /*
1858 * When first_call is true (and thus, skip is initially false) we'll
1859 * return the first tuple we find. But on later passes, heapTuple
1860 * will initially be pointing to the tuple we returned last time.
1861 * Returning it again would be incorrect (and would loop forever), so
1862 * we skip it and return the next match we find.
1863 */
1864 if (!skip)
1865 {
1866 /* If it's visible per the snapshot, we must return it */
1867 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1869 buffer, snapshot);
1870
1871 if (valid)
1872 {
1873 ItemPointerSetOffsetNumber(tid, offnum);
1874 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1876 if (all_dead)
1877 *all_dead = false;
1878 return true;
1879 }
1880 }
1881 skip = false;
1882
1883 /*
1884 * If we can't see it, maybe no one else can either. At caller
1885 * request, check whether all chain members are dead to all
1886 * transactions.
1887 *
1888 * Note: if you change the criterion here for what is "dead", fix the
1889 * planner's get_actual_variable_range() function to match.
1890 */
1891 if (all_dead && *all_dead)
1892 {
1893 if (!vistest)
1894 vistest = GlobalVisTestFor(relation);
1895
1896 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1897 *all_dead = false;
1898 }
1899
1900 /*
1901 * Check to see if HOT chain continues past this tuple; if so fetch
1902 * the next offnum and loop around.
1903 */
1905 {
1906 Assert(ItemPointerGetBlockNumber(&heapTuple->t_data->t_ctid) ==
1907 blkno);
1908 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1909 at_chain_start = false;
1911 }
1912 else
1913 break; /* end of chain */
1914 }
1915
1916 return false;
1917}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition itemptr.h:158
static const struct exclude_list_item skip[]
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition procarray.c:4118
TransactionId RecentXmin
Definition snapmgr.c:160

References Assert, BufferGetBlockNumber(), BufferGetPage(), fb(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)
extern

Definition at line 8200 of file heapam.c.

8201{
8202 /* Initial assumption is that earlier pruning took care of conflict */
8203 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8206 Page page = NULL;
8209#ifdef USE_PREFETCH
8212#endif
8214 int finalndeltids = 0,
8215 nblocksaccessed = 0;
8216
8217 /* State that's only used in bottom-up index deletion case */
8218 int nblocksfavorable = 0;
8219 int curtargetfreespace = delstate->bottomupfreespace,
8220 lastfreespace = 0,
8221 actualfreespace = 0;
8222 bool bottomup_final_block = false;
8223
8225
8226 /* Sort caller's deltids array by TID for further processing */
8228
8229 /*
8230 * Bottom-up case: resort deltids array in an order attuned to where the
8231 * greatest number of promising TIDs are to be found, and determine how
8232 * many blocks from the start of sorted array should be considered
8233 * favorable. This will also shrink the deltids array in order to
8234 * eliminate completely unfavorable blocks up front.
8235 */
8236 if (delstate->bottomup)
8238
8239#ifdef USE_PREFETCH
8240 /* Initialize prefetch state. */
8242 prefetch_state.next_item = 0;
8243 prefetch_state.ndeltids = delstate->ndeltids;
8244 prefetch_state.deltids = delstate->deltids;
8245
8246 /*
8247 * Determine the prefetch distance that we will attempt to maintain.
8248 *
8249 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8250 * sure that isn't a catalog relation before we call code that does
8251 * syscache lookups, to avoid risk of deadlock.
8252 */
8253 if (IsCatalogRelation(rel))
8255 else
8258
8259 /* Cap initial prefetch distance for bottom-up deletion caller */
8260 if (delstate->bottomup)
8261 {
8265 }
8266
8267 /* Start prefetching. */
8269#endif
8270
8271 /* Iterate over deltids, determine which to delete, check their horizon */
8272 Assert(delstate->ndeltids > 0);
8273 for (int i = 0; i < delstate->ndeltids; i++)
8274 {
8275 TM_IndexDelete *ideltid = &delstate->deltids[i];
8276 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8277 ItemPointer htid = &ideltid->tid;
8278 OffsetNumber offnum;
8279
8280 /*
8281 * Read buffer, and perform required extra steps each time a new block
8282 * is encountered. Avoid refetching if it's the same block as the one
8283 * from the last htid.
8284 */
8285 if (blkno == InvalidBlockNumber ||
8287 {
8288 /*
8289 * Consider giving up early for bottom-up index deletion caller
8290 * first. (Only prefetch next-next block afterwards, when it
8291 * becomes clear that we're at least going to access the next
8292 * block in line.)
8293 *
8294 * Sometimes the first block frees so much space for bottom-up
8295 * caller that the deletion process can end without accessing any
8296 * more blocks. It is usually necessary to access 2 or 3 blocks
8297 * per bottom-up deletion operation, though.
8298 */
8299 if (delstate->bottomup)
8300 {
8301 /*
8302 * We often allow caller to delete a few additional items
8303 * whose entries we reached after the point that space target
8304 * from caller was satisfied. The cost of accessing the page
8305 * was already paid at that point, so it made sense to finish
8306 * it off. When that happened, we finalize everything here
8307 * (by finishing off the whole bottom-up deletion operation
8308 * without needlessly paying the cost of accessing any more
8309 * blocks).
8310 */
8312 break;
8313
8314 /*
8315 * Give up when we didn't enable our caller to free any
8316 * additional space as a result of processing the page that we
8317 * just finished up with. This rule is the main way in which
8318 * we keep the cost of bottom-up deletion under control.
8319 */
8321 break;
8322 lastfreespace = actualfreespace; /* for next time */
8323
8324 /*
8325 * Deletion operation (which is bottom-up) will definitely
8326 * access the next block in line. Prepare for that now.
8327 *
8328 * Decay target free space so that we don't hang on for too
8329 * long with a marginal case. (Space target is only truly
8330 * helpful when it allows us to recognize that we don't need
8331 * to access more than 1 or 2 blocks to satisfy caller due to
8332 * agreeable workload characteristics.)
8333 *
8334 * We are a bit more patient when we encounter contiguous
8335 * blocks, though: these are treated as favorable blocks. The
8336 * decay process is only applied when the next block in line
8337 * is not a favorable/contiguous block. This is not an
8338 * exception to the general rule; we still insist on finding
8339 * at least one deletable item per block accessed. See
8340 * bottomup_nblocksfavorable() for full details of the theory
8341 * behind favorable blocks and heap block locality in general.
8342 *
8343 * Note: The first block in line is always treated as a
8344 * favorable block, so the earliest possible point that the
8345 * decay can be applied is just before we access the second
8346 * block in line. The Assert() verifies this for us.
8347 */
8349 if (nblocksfavorable > 0)
8351 else
8352 curtargetfreespace /= 2;
8353 }
8354
8355 /* release old buffer */
8356 if (BufferIsValid(buf))
8358
8360 buf = ReadBuffer(rel, blkno);
8362 Assert(!delstate->bottomup ||
8364
8365#ifdef USE_PREFETCH
8366
8367 /*
8368 * To maintain the prefetch distance, prefetch one more page for
8369 * each page we read.
8370 */
8372#endif
8373
8375
8376 page = BufferGetPage(buf);
8377 maxoff = PageGetMaxOffsetNumber(page);
8378 }
8379
8380 /*
8381 * In passing, detect index corruption involving an index page with a
8382 * TID that points to a location in the heap that couldn't possibly be
8383 * correct. We only do this with actual TIDs from caller's index page
8384 * (not items reached by traversing through a HOT chain).
8385 */
8387
8388 if (istatus->knowndeletable)
8389 Assert(!delstate->bottomup && !istatus->promising);
8390 else
8391 {
8392 ItemPointerData tmp = *htid;
8394
8395 /* Are any tuples from this HOT chain non-vacuumable? */
8397 &heapTuple, NULL, true))
8398 continue; /* can't delete entry */
8399
8400 /* Caller will delete, since whole HOT chain is vacuumable */
8401 istatus->knowndeletable = true;
8402
8403 /* Maintain index free space info for bottom-up deletion case */
8404 if (delstate->bottomup)
8405 {
8406 Assert(istatus->freespace > 0);
8407 actualfreespace += istatus->freespace;
8409 bottomup_final_block = true;
8410 }
8411 }
8412
8413 /*
8414 * Maintain snapshotConflictHorizon value for deletion operation as a
8415 * whole by advancing current value using heap tuple headers. This is
8416 * loosely based on the logic for pruning a HOT chain.
8417 */
8419 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8420 for (;;)
8421 {
8422 ItemId lp;
8423 HeapTupleHeader htup;
8424
8425 /* Sanity check (pure paranoia) */
8426 if (offnum < FirstOffsetNumber)
8427 break;
8428
8429 /*
8430 * An offset past the end of page's line pointer array is possible
8431 * when the array was truncated
8432 */
8433 if (offnum > maxoff)
8434 break;
8435
8436 lp = PageGetItemId(page, offnum);
8438 {
8439 offnum = ItemIdGetRedirect(lp);
8440 continue;
8441 }
8442
8443 /*
8444 * We'll often encounter LP_DEAD line pointers (especially with an
8445 * entry marked knowndeletable by our caller up front). No heap
8446 * tuple headers get examined for an htid that leads us to an
8447 * LP_DEAD item. This is okay because the earlier pruning
8448 * operation that made the line pointer LP_DEAD in the first place
8449 * must have considered the original tuple header as part of
8450 * generating its own snapshotConflictHorizon value.
8451 *
8452 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8453 * the same strategy that index vacuuming uses in all cases. Index
8454 * VACUUM WAL records don't even have a snapshotConflictHorizon
8455 * field of their own for this reason.
8456 */
8457 if (!ItemIdIsNormal(lp))
8458 break;
8459
8460 htup = (HeapTupleHeader) PageGetItem(page, lp);
8461
8462 /*
8463 * Check the tuple XMIN against prior XMAX, if any
8464 */
8467 break;
8468
8470 &snapshotConflictHorizon);
8471
8472 /*
8473 * If the tuple is not HOT-updated, then we are at the end of this
8474 * HOT-chain. No need to visit later tuples from the same update
8475 * chain (they get their own index entries) -- just move on to
8476 * next htid from index AM caller.
8477 */
8478 if (!HeapTupleHeaderIsHotUpdated(htup))
8479 break;
8480
8481 /* Advance to next HOT chain member */
8482 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8483 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8485 }
8486
8487 /* Enable further/final shrinking of deltids for caller */
8488 finalndeltids = i + 1;
8489 }
8490
8492
8493 /*
8494 * Shrink deltids array to exclude non-deletable entries at the end. This
8495 * is not just a minor optimization. Final deltids array size might be
8496 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8497 * ndeltids being zero in all cases with zero total deletable entries.
8498 */
8499 Assert(finalndeltids > 0 || delstate->bottomup);
8500 delstate->ndeltids = finalndeltids;
8501
8502 return snapshotConflictHorizon;
8503}
int maintenance_io_concurrency
Definition bufmgr.c:192
#define Min(x, y)
Definition c.h:1040
bool IsCatalogRelation(Relation relation)
Definition catalog.c:104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition heapam.c:8757
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition heapam.c:8055
#define BOTTOMUP_MAX_NBLOCKS
Definition heapam.c:189
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, const ItemPointerData *htid, TM_IndexStatus *istatus)
Definition heapam.c:8140
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition heapam.c:1779
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition heapam.c:8545
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition spccache.c:230

References Assert, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), fb(), FirstOffsetNumber, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), maintenance_io_concurrency, Min, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), RelationData::rd_rel, ReadBuffer(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void arg 
)
extern

Definition at line 6438 of file heapam.c.

6441{
6442 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6443 TM_Result result;
6444 bool ret;
6445
6446#ifdef USE_ASSERT_CHECKING
6447 if (RelationGetRelid(relation) == RelationRelationId)
6449#endif
6450
6451 Assert(BufferIsValid(buffer));
6452
6453 /*
6454 * Register shared cache invals if necessary. Other sessions may finish
6455 * inplace updates of this tuple between this step and LockTuple(). Since
6456 * inplace updates don't change cache keys, that's harmless.
6457 *
6458 * While it's tempting to register invals only after confirming we can
6459 * return true, the following obstacle precludes reordering steps that
6460 * way. Registering invals might reach a CatalogCacheInitializeCache()
6461 * that locks "buffer". That would hang indefinitely if running after our
6462 * own LockBuffer(). Hence, we must register invals before LockBuffer().
6463 */
6465
6466 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6468
6469 /*----------
6470 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6471 *
6472 * - wait unconditionally
6473 * - already locked tuple above, since inplace needs that unconditionally
6474 * - don't recheck header after wait: simpler to defer to next iteration
6475 * - don't try to continue even if the updater aborts: likewise
6476 * - no crosscheck
6477 */
6479 buffer);
6480
6481 if (result == TM_Invisible)
6482 {
6483 /* no known way this can happen */
6484 ereport(ERROR,
6486 errmsg_internal("attempted to overwrite invisible tuple")));
6487 }
6488 else if (result == TM_SelfModified)
6489 {
6490 /*
6491 * CREATE INDEX might reach this if an expression is silly enough to
6492 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6493 * statements might get here after a heap_update() of the same row, in
6494 * the absence of an intervening CommandCounterIncrement().
6495 */
6496 ereport(ERROR,
6498 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6499 }
6500 else if (result == TM_BeingModified)
6501 {
6504
6506 infomask = oldtup.t_data->t_infomask;
6507
6509 {
6512 int remain;
6513
6515 lockmode, NULL))
6516 {
6519 ret = false;
6521 relation, &oldtup.t_self, XLTW_Update,
6522 &remain);
6523 }
6524 else
6525 ret = true;
6526 }
6528 ret = true;
6530 ret = true;
6531 else
6532 {
6535 ret = false;
6536 XactLockTableWait(xwait, relation, &oldtup.t_self,
6537 XLTW_Update);
6538 }
6539 }
6540 else
6541 {
6542 ret = (result == TM_Ok);
6543 if (!ret)
6544 {
6547 }
6548 }
6549
6550 /*
6551 * GetCatalogSnapshot() relies on invalidation messages to know when to
6552 * take a new snapshot. COMMIT of xwait is responsible for sending the
6553 * invalidation. We're not acquiring heavyweight locks sufficient to
6554 * block if not yet sent, so we must take a new snapshot to ensure a later
6555 * attempt has a fair chance. While we don't need this if xwait aborted,
6556 * don't bother optimizing that.
6557 */
6558 if (!ret)
6559 {
6560 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6563 }
6564 return ret;
6565}
Datum arg
Definition elog.c:1322
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(uint16 infomask)
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple key_equivalent_tuple)
Definition inval.c:1593
void ForgetInplace_Inval(void)
Definition inval.c:1286
void UnlockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:601
void LockTuple(Relation relation, const ItemPointerData *tid, LOCKMODE lockmode)
Definition lmgr.c:562
@ XLTW_Update
Definition lmgr.h:27
#define InplaceUpdateTupleLock
Definition lockdefs.h:48
LockTupleMode
Definition lockoptions.h:51
@ LockTupleNoKeyExclusive
Definition lockoptions.h:57
MultiXactStatus
Definition multixact.h:37
@ MultiXactStatusNoKeyUpdate
Definition multixact.h:43
void InvalidateCatalogSnapshot(void)
Definition snapmgr.c:455
CommandId GetCurrentCommandId(bool used)
Definition xact.c:830

References arg, Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, fb(), ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)
extern

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)
extern

Definition at line 6576 of file heapam.c.

6579{
6580 HeapTupleHeader htup = oldtup->t_data;
6581 uint32 oldlen;
6582 uint32 newlen;
6583 char *dst;
6584 char *src;
6585 int nmsgs = 0;
6587 bool RelcacheInitFileInval = false;
6588
6589 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6590 oldlen = oldtup->t_len - htup->t_hoff;
6591 newlen = tuple->t_len - tuple->t_data->t_hoff;
6592 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6593 elog(ERROR, "wrong tuple length");
6594
6595 dst = (char *) htup + htup->t_hoff;
6596 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6597
6598 /* Like RecordTransactionCommit(), log only if needed */
6601 &RelcacheInitFileInval);
6602
6603 /*
6604 * Unlink relcache init files as needed. If unlinking, acquire
6605 * RelCacheInitLock until after associated invalidations. By doing this
6606 * in advance, if we checkpoint and then crash between inplace
6607 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6608 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6609 * neglect to PANIC on EIO.
6610 */
6612
6613 /*----------
6614 * NO EREPORT(ERROR) from here till changes are complete
6615 *
6616 * Our buffer lock won't stop a reader having already pinned and checked
6617 * visibility for this tuple. Hence, we write WAL first, then mutate the
6618 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6619 * checkpoint delay makes that acceptable. With the usual order of
6620 * changes, a crash after memcpy() and before XLogInsert() could allow
6621 * datfrozenxid to overtake relfrozenxid:
6622 *
6623 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6624 * ["R" is a VACUUM tbl]
6625 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6626 * D: systable_getnext() returns pg_class tuple of tbl
6627 * R: memcpy() into pg_class tuple of tbl
6628 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6629 * [crash]
6630 * [recovery restores datfrozenxid w/o relfrozenxid]
6631 *
6632 * Mimic MarkBufferDirtyHint() subroutine XLogSaveBufferForHint().
6633 * Specifically, use DELAY_CHKPT_START, and copy the buffer to the stack.
6634 * The stack copy facilitates a FPI of the post-mutation block before we
6635 * accept other sessions seeing it. DELAY_CHKPT_START allows us to
6636 * XLogInsert() before MarkBufferDirty(). Since XLogSaveBufferForHint()
6637 * can operate under BUFFER_LOCK_SHARED, it can't avoid DELAY_CHKPT_START.
6638 * This function, however, likely could avoid it with the following order
6639 * of operations: MarkBufferDirty(), XLogInsert(), memcpy(). Opt to use
6640 * DELAY_CHKPT_START here, too, as a way to have fewer distinct code
6641 * patterns to analyze. Inplace update isn't so frequent that it should
6642 * pursue the small optimization of skipping DELAY_CHKPT_START.
6643 */
6647
6648 /* XLOG stuff */
6649 if (RelationNeedsWAL(relation))
6650 {
6653 char *origdata = (char *) BufferGetBlock(buffer);
6654 Page page = BufferGetPage(buffer);
6655 uint16 lower = ((PageHeader) page)->pd_lower;
6656 uint16 upper = ((PageHeader) page)->pd_upper;
6658 RelFileLocator rlocator;
6659 ForkNumber forkno;
6660 BlockNumber blkno;
6662
6663 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6664 xlrec.dbId = MyDatabaseId;
6666 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6667 xlrec.nmsgs = nmsgs;
6668
6671 if (nmsgs != 0)
6673 nmsgs * sizeof(SharedInvalidationMessage));
6674
6675 /* register block matching what buffer will look like after changes */
6680 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6681 Assert(forkno == MAIN_FORKNUM);
6682 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6684 XLogRegisterBufData(0, src, newlen);
6685
6686 /* inplace updates aren't decoded atm, don't log the origin */
6687
6689
6690 PageSetLSN(page, recptr);
6691 }
6692
6693 memcpy(dst, src, newlen);
6694
6695 MarkBufferDirty(buffer);
6696
6698
6699 /*
6700 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6701 * do this before UnlockTuple().
6702 */
6704
6707 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6708
6709 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6710
6711 /*
6712 * Queue a transactional inval, for logical decoding and for third-party
6713 * code that might have been relying on it since long before inplace
6714 * update adopted immediate invalidation. See README.tuplock section
6715 * "Reading inplace-updated columns" for logical decoding details.
6716 */
6718 CacheInvalidateHeapTuple(relation, tuple, NULL);
6719}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition bufmgr.c:4378
static Block BufferGetBlock(Buffer buffer)
Definition bufmgr.h:433
PageHeaderData * PageHeader
Definition bufpage.h:173
uint32_t uint32
Definition c.h:579
Oid MyDatabaseTableSpace
Definition globals.c:96
Oid MyDatabaseId
Definition globals.c:94
#define MinSizeOfHeapInplace
#define XLOG_HEAP_INPLACE
Definition heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition inval.c:930
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition inval.c:1088
void PreInplace_Inval(void)
Definition inval.c:1250
void AtInplace_Inval(void)
Definition inval.c:1263
#define IsBootstrapProcessingMode()
Definition miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Datum upper(PG_FUNCTION_ARGS)
#define DELAY_CHKPT_START
Definition proc.h:136
ForkNumber
Definition relpath.h:56
PGPROC * MyProc
Definition proc.c:67
int delayChkptFlags
Definition proc.h:252
#define XLogStandbyInfoActive()
Definition xlog.h:125
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition xloginsert.c:409
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition xloginsert.c:313

References AcceptInvalidationMessages(), Assert, AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, fb(), inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2142 of file heapam.c.

2144{
2147 Buffer buffer;
2148 Buffer vmbuffer = InvalidBuffer;
2149 bool all_visible_cleared = false;
2150
2151 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2154
2155 AssertHasSnapshotForToast(relation);
2156
2157 /*
2158 * Fill in tuple header fields and toast the tuple if necessary.
2159 *
2160 * Note: below this point, heaptup is the data we actually intend to store
2161 * into the relation; tup is the caller's original untoasted data.
2162 */
2163 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2164
2165 /*
2166 * Find buffer to insert this tuple into. If the page is all visible,
2167 * this will also pin the requisite visibility map page.
2168 */
2169 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2170 InvalidBuffer, options, bistate,
2171 &vmbuffer, NULL,
2172 0);
2173
2174 /*
2175 * We're about to do the actual insert -- but check for conflict first, to
2176 * avoid possibly having to roll back work we've just done.
2177 *
2178 * This is safe without a recheck as long as there is no possibility of
2179 * another process scanning the page between this check and the insert
2180 * being visible to the scan (i.e., an exclusive buffer content lock is
2181 * continuously held from this point until the tuple insert is visible).
2182 *
2183 * For a heap insert, we only need to check for table-level SSI locks. Our
2184 * new tuple can't possibly conflict with existing tuple locks, and heap
2185 * page locks are only consolidated versions of tuple locks; they do not
2186 * lock "gaps" as index page locks do. So we don't need to specify a
2187 * buffer when making the call, which makes for a faster check.
2188 */
2190
2191 /* NO EREPORT(ERROR) from here till changes are logged */
2193
2194 RelationPutHeapTuple(relation, buffer, heaptup,
2196
2197 if (PageIsAllVisible(BufferGetPage(buffer)))
2198 {
2199 all_visible_cleared = true;
2201 visibilitymap_clear(relation,
2203 vmbuffer, VISIBILITYMAP_VALID_BITS);
2204 }
2205
2206 /*
2207 * XXX Should we set PageSetPrunable on this page ?
2208 *
2209 * The inserting transaction may eventually abort thus making this tuple
2210 * DEAD and hence available for pruning. Though we don't want to optimize
2211 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2212 * aborted tuple will never be pruned until next vacuum is triggered.
2213 *
2214 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2215 */
2216
2217 MarkBufferDirty(buffer);
2218
2219 /* XLOG stuff */
2220 if (RelationNeedsWAL(relation))
2221 {
2225 Page page = BufferGetPage(buffer);
2226 uint8 info = XLOG_HEAP_INSERT;
2227 int bufflags = 0;
2228
2229 /*
2230 * If this is a catalog, we need to transmit combo CIDs to properly
2231 * decode, so log that as well.
2232 */
2234 log_heap_new_cid(relation, heaptup);
2235
2236 /*
2237 * If this is the single and first tuple on page, we can reinit the
2238 * page instead of restoring the whole thing. Set flag, and hide
2239 * buffer references from XLogInsert.
2240 */
2243 {
2244 info |= XLOG_HEAP_INIT_PAGE;
2246 }
2247
2248 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2249 xlrec.flags = 0;
2255
2256 /*
2257 * For logical decoding, we need the tuple even if we're doing a full
2258 * page write, so make sure it's included even if we take a full-page
2259 * image. (XXX We could alternatively store a pointer into the FPW).
2260 */
2261 if (RelationIsLogicallyLogged(relation) &&
2263 {
2266
2267 if (IsToastRelation(relation))
2269 }
2270
2273
2274 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2275 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2276 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2277
2278 /*
2279 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2280 * write the whole page to the xlog, we don't need to store
2281 * xl_heap_header in the xlog.
2282 */
2285 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2287 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2289
2290 /* filtering by origin on a row level is much more efficient */
2292
2293 recptr = XLogInsert(RM_HEAP_ID, info);
2294
2295 PageSetLSN(page, recptr);
2296 }
2297
2299
2300 UnlockReleaseBuffer(buffer);
2301 if (vmbuffer != InvalidBuffer)
2302 ReleaseBuffer(vmbuffer);
2303
2304 /*
2305 * If tuple is cacheable, mark it for invalidation from the caches in case
2306 * we abort. Note it is OK to do this after releasing the buffer, because
2307 * the heaptup data structure is all in local memory, not in the shared
2308 * buffer.
2309 */
2311
2312 /* Note: speculative insertions are counted too, even if aborted later */
2313 pgstat_count_heap_insert(relation, 1);
2314
2315 /*
2316 * If heaptup is a private copy, release it. Don't forget to copy t_self
2317 * back to the caller's image, too.
2318 */
2319 if (heaptup != tup)
2320 {
2321 tup->t_self = heaptup->t_self;
2323 }
2324}
uint8_t uint8
Definition c.h:577
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition heapam.c:2333
#define HEAP_INSERT_SPECULATIVE
Definition heapam.h:40
#define HEAP_INSERT_NO_LOGICAL
Definition heapam.h:39
#define XLH_INSERT_ON_TOAST_RELATION
Definition heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition heapam_xlog.h:33
#define SizeOfHeapInsert
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition hio.c:500
#define HeapTupleHeaderGetNatts(tup)
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition rel.h:710
#define RelationGetNumberOfAttributes(relation)
Definition rel.h:520
#define REGBUF_KEEP_DATA
Definition xloginsert.h:36
#define REGBUF_WILL_INIT
Definition xloginsert.h:34

References Assert, AssertHasSnapshotForToast(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, fb(), FirstOffsetNumber, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
TM_FailureData tmfd 
)
extern

Definition at line 4645 of file heapam.c.

4649{
4650 TM_Result result;
4651 ItemPointer tid = &(tuple->t_self);
4652 ItemId lp;
4653 Page page;
4654 Buffer vmbuffer = InvalidBuffer;
4655 BlockNumber block;
4656 TransactionId xid,
4657 xmax;
4661 bool first_time = true;
4662 bool skip_tuple_lock = false;
4663 bool have_tuple_lock = false;
4664 bool cleared_all_frozen = false;
4665
4666 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4667 block = ItemPointerGetBlockNumber(tid);
4668
4669 /*
4670 * Before locking the buffer, pin the visibility map page if it appears to
4671 * be necessary. Since we haven't got the lock yet, someone else might be
4672 * in the middle of changing this, so we'll need to recheck after we have
4673 * the lock.
4674 */
4675 if (PageIsAllVisible(BufferGetPage(*buffer)))
4676 visibilitymap_pin(relation, block, &vmbuffer);
4677
4679
4680 page = BufferGetPage(*buffer);
4683
4684 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4685 tuple->t_len = ItemIdGetLength(lp);
4686 tuple->t_tableOid = RelationGetRelid(relation);
4687
4688l3:
4689 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4690
4691 if (result == TM_Invisible)
4692 {
4693 /*
4694 * This is possible, but only when locking a tuple for ON CONFLICT DO
4695 * SELECT/UPDATE. We return this value here rather than throwing an
4696 * error in order to give that case the opportunity to throw a more
4697 * specific error.
4698 */
4699 result = TM_Invisible;
4700 goto out_locked;
4701 }
4702 else if (result == TM_BeingModified ||
4703 result == TM_Updated ||
4704 result == TM_Deleted)
4705 {
4709 bool require_sleep;
4710 ItemPointerData t_ctid;
4711
4712 /* must copy state data before unlocking buffer */
4714 infomask = tuple->t_data->t_infomask;
4715 infomask2 = tuple->t_data->t_infomask2;
4716 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4717
4719
4720 /*
4721 * If any subtransaction of the current top transaction already holds
4722 * a lock as strong as or stronger than what we're requesting, we
4723 * effectively hold the desired lock already. We *must* succeed
4724 * without trying to take the tuple lock, else we will deadlock
4725 * against anyone wanting to acquire a stronger lock.
4726 *
4727 * Note we only do this the first time we loop on the HTSU result;
4728 * there is no point in testing in subsequent passes, because
4729 * evidently our own transaction cannot have acquired a new lock after
4730 * the first time we checked.
4731 */
4732 if (first_time)
4733 {
4734 first_time = false;
4735
4737 {
4738 int i;
4739 int nmembers;
4740 MultiXactMember *members;
4741
4742 /*
4743 * We don't need to allow old multixacts here; if that had
4744 * been the case, HeapTupleSatisfiesUpdate would have returned
4745 * MayBeUpdated and we wouldn't be here.
4746 */
4747 nmembers =
4748 GetMultiXactIdMembers(xwait, &members, false,
4750
4751 for (i = 0; i < nmembers; i++)
4752 {
4753 /* only consider members of our own transaction */
4754 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4755 continue;
4756
4757 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4758 {
4759 pfree(members);
4760 result = TM_Ok;
4761 goto out_unlocked;
4762 }
4763 else
4764 {
4765 /*
4766 * Disable acquisition of the heavyweight tuple lock.
4767 * Otherwise, when promoting a weaker lock, we might
4768 * deadlock with another locker that has acquired the
4769 * heavyweight tuple lock and is waiting for our
4770 * transaction to finish.
4771 *
4772 * Note that in this case we still need to wait for
4773 * the multixact if required, to avoid acquiring
4774 * conflicting locks.
4775 */
4776 skip_tuple_lock = true;
4777 }
4778 }
4779
4780 if (members)
4781 pfree(members);
4782 }
4784 {
4785 switch (mode)
4786 {
4787 case LockTupleKeyShare:
4791 result = TM_Ok;
4792 goto out_unlocked;
4793 case LockTupleShare:
4796 {
4797 result = TM_Ok;
4798 goto out_unlocked;
4799 }
4800 break;
4803 {
4804 result = TM_Ok;
4805 goto out_unlocked;
4806 }
4807 break;
4808 case LockTupleExclusive:
4811 {
4812 result = TM_Ok;
4813 goto out_unlocked;
4814 }
4815 break;
4816 }
4817 }
4818 }
4819
4820 /*
4821 * Initially assume that we will have to wait for the locking
4822 * transaction(s) to finish. We check various cases below in which
4823 * this can be turned off.
4824 */
4825 require_sleep = true;
4826 if (mode == LockTupleKeyShare)
4827 {
4828 /*
4829 * If we're requesting KeyShare, and there's no update present, we
4830 * don't need to wait. Even if there is an update, we can still
4831 * continue if the key hasn't been modified.
4832 *
4833 * However, if there are updates, we need to walk the update chain
4834 * to mark future versions of the row as locked, too. That way,
4835 * if somebody deletes that future version, we're protected
4836 * against the key going away. This locking of future versions
4837 * could block momentarily, if a concurrent transaction is
4838 * deleting a key; or it could return a value to the effect that
4839 * the transaction deleting the key has already committed. So we
4840 * do this before re-locking the buffer; otherwise this would be
4841 * prone to deadlocks.
4842 *
4843 * Note that the TID we're locking was grabbed before we unlocked
4844 * the buffer. For it to change while we're not looking, the
4845 * other properties we're testing for below after re-locking the
4846 * buffer would also change, in which case we would restart this
4847 * loop above.
4848 */
4850 {
4851 bool updated;
4852
4854
4855 /*
4856 * If there are updates, follow the update chain; bail out if
4857 * that cannot be done.
4858 */
4859 if (follow_updates && updated &&
4860 !ItemPointerEquals(&tuple->t_self, &t_ctid))
4861 {
4862 TM_Result res;
4863
4864 res = heap_lock_updated_tuple(relation,
4865 infomask, xwait, &t_ctid,
4867 mode);
4868 if (res != TM_Ok)
4869 {
4870 result = res;
4871 /* recovery code expects to have buffer lock held */
4873 goto failed;
4874 }
4875 }
4876
4878
4879 /*
4880 * Make sure it's still an appropriate lock, else start over.
4881 * Also, if it wasn't updated before we released the lock, but
4882 * is updated now, we start over too; the reason is that we
4883 * now need to follow the update chain to lock the new
4884 * versions.
4885 */
4886 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4887 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4888 !updated))
4889 goto l3;
4890
4891 /* Things look okay, so we can skip sleeping */
4892 require_sleep = false;
4893
4894 /*
4895 * Note we allow Xmax to change here; other updaters/lockers
4896 * could have modified it before we grabbed the buffer lock.
4897 * However, this is not a problem, because with the recheck we
4898 * just did we ensure that they still don't conflict with the
4899 * lock we want.
4900 */
4901 }
4902 }
4903 else if (mode == LockTupleShare)
4904 {
4905 /*
4906 * If we're requesting Share, we can similarly avoid sleeping if
4907 * there's no update and no exclusive lock present.
4908 */
4911 {
4913
4914 /*
4915 * Make sure it's still an appropriate lock, else start over.
4916 * See above about allowing xmax to change.
4917 */
4920 goto l3;
4921 require_sleep = false;
4922 }
4923 }
4924 else if (mode == LockTupleNoKeyExclusive)
4925 {
4926 /*
4927 * If we're requesting NoKeyExclusive, we might also be able to
4928 * avoid sleeping; just ensure that there no conflicting lock
4929 * already acquired.
4930 */
4932 {
4934 mode, NULL))
4935 {
4936 /*
4937 * No conflict, but if the xmax changed under us in the
4938 * meantime, start over.
4939 */
4943 xwait))
4944 goto l3;
4945
4946 /* otherwise, we're good */
4947 require_sleep = false;
4948 }
4949 }
4951 {
4953
4954 /* if the xmax changed in the meantime, start over */
4957 xwait))
4958 goto l3;
4959 /* otherwise, we're good */
4960 require_sleep = false;
4961 }
4962 }
4963
4964 /*
4965 * As a check independent from those above, we can also avoid sleeping
4966 * if the current transaction is the sole locker of the tuple. Note
4967 * that the strength of the lock already held is irrelevant; this is
4968 * not about recording the lock in Xmax (which will be done regardless
4969 * of this optimization, below). Also, note that the cases where we
4970 * hold a lock stronger than we are requesting are already handled
4971 * above by not doing anything.
4972 *
4973 * Note we only deal with the non-multixact case here; MultiXactIdWait
4974 * is well equipped to deal with this situation on its own.
4975 */
4978 {
4979 /* ... but if the xmax changed in the meantime, start over */
4983 xwait))
4984 goto l3;
4986 require_sleep = false;
4987 }
4988
4989 /*
4990 * Time to sleep on the other transaction/multixact, if necessary.
4991 *
4992 * If the other transaction is an update/delete that's already
4993 * committed, then sleeping cannot possibly do any good: if we're
4994 * required to sleep, get out to raise an error instead.
4995 *
4996 * By here, we either have already acquired the buffer exclusive lock,
4997 * or we must wait for the locking transaction or multixact; so below
4998 * we ensure that we grab buffer lock after the sleep.
4999 */
5000 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
5001 {
5003 goto failed;
5004 }
5005 else if (require_sleep)
5006 {
5007 /*
5008 * Acquire tuple lock to establish our priority for the tuple, or
5009 * die trying. LockTuple will release us when we are next-in-line
5010 * for the tuple. We must do this even if we are share-locking,
5011 * but not if we already have a weaker lock on the tuple.
5012 *
5013 * If we are forced to "start over" below, we keep the tuple lock;
5014 * this arranges that we stay at the head of the line while
5015 * rechecking tuple state.
5016 */
5017 if (!skip_tuple_lock &&
5018 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
5020 {
5021 /*
5022 * This can only happen if wait_policy is Skip and the lock
5023 * couldn't be obtained.
5024 */
5025 result = TM_WouldBlock;
5026 /* recovery code expects to have buffer lock held */
5028 goto failed;
5029 }
5030
5032 {
5034
5035 /* We only ever lock tuples, never update them */
5036 if (status >= MultiXactStatusNoKeyUpdate)
5037 elog(ERROR, "invalid lock mode in heap_lock_tuple");
5038
5039 /* wait for multixact to end, or die trying */
5040 switch (wait_policy)
5041 {
5042 case LockWaitBlock:
5044 relation, &tuple->t_self, XLTW_Lock, NULL);
5045 break;
5046 case LockWaitSkip:
5048 status, infomask, relation,
5049 NULL, false))
5050 {
5051 result = TM_WouldBlock;
5052 /* recovery code expects to have buffer lock held */
5054 goto failed;
5055 }
5056 break;
5057 case LockWaitError:
5059 status, infomask, relation,
5061 ereport(ERROR,
5063 errmsg("could not obtain lock on row in relation \"%s\"",
5064 RelationGetRelationName(relation))));
5065
5066 break;
5067 }
5068
5069 /*
5070 * Of course, the multixact might not be done here: if we're
5071 * requesting a light lock mode, other transactions with light
5072 * locks could still be alive, as well as locks owned by our
5073 * own xact or other subxacts of this backend. We need to
5074 * preserve the surviving MultiXact members. Note that it
5075 * isn't absolutely necessary in the latter case, but doing so
5076 * is simpler.
5077 */
5078 }
5079 else
5080 {
5081 /* wait for regular transaction to end, or die trying */
5082 switch (wait_policy)
5083 {
5084 case LockWaitBlock:
5085 XactLockTableWait(xwait, relation, &tuple->t_self,
5086 XLTW_Lock);
5087 break;
5088 case LockWaitSkip:
5090 {
5091 result = TM_WouldBlock;
5092 /* recovery code expects to have buffer lock held */
5094 goto failed;
5095 }
5096 break;
5097 case LockWaitError:
5099 ereport(ERROR,
5101 errmsg("could not obtain lock on row in relation \"%s\"",
5102 RelationGetRelationName(relation))));
5103 break;
5104 }
5105 }
5106
5107 /* if there are updates, follow the update chain */
5109 !ItemPointerEquals(&tuple->t_self, &t_ctid))
5110 {
5111 TM_Result res;
5112
5113 res = heap_lock_updated_tuple(relation,
5114 infomask, xwait, &t_ctid,
5116 mode);
5117 if (res != TM_Ok)
5118 {
5119 result = res;
5120 /* recovery code expects to have buffer lock held */
5122 goto failed;
5123 }
5124 }
5125
5127
5128 /*
5129 * xwait is done, but if xwait had just locked the tuple then some
5130 * other xact could update this tuple before we get to this point.
5131 * Check for xmax change, and start over if so.
5132 */
5135 xwait))
5136 goto l3;
5137
5139 {
5140 /*
5141 * Otherwise check if it committed or aborted. Note we cannot
5142 * be here if the tuple was only locked by somebody who didn't
5143 * conflict with us; that would have been handled above. So
5144 * that transaction must necessarily be gone by now. But
5145 * don't check for this in the multixact case, because some
5146 * locker transactions might still be running.
5147 */
5148 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5149 }
5150 }
5151
5152 /* By here, we're certain that we hold buffer exclusive lock again */
5153
5154 /*
5155 * We may lock if previous xmax aborted, or if it committed but only
5156 * locked the tuple without updating it; or if we didn't have to wait
5157 * at all for whatever reason.
5158 */
5159 if (!require_sleep ||
5160 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5163 result = TM_Ok;
5164 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5165 result = TM_Updated;
5166 else
5167 result = TM_Deleted;
5168 }
5169
5170failed:
5171 if (result != TM_Ok)
5172 {
5173 Assert(result == TM_SelfModified || result == TM_Updated ||
5174 result == TM_Deleted || result == TM_WouldBlock);
5175
5176 /*
5177 * When locking a tuple under LockWaitSkip semantics and we fail with
5178 * TM_WouldBlock above, it's possible for concurrent transactions to
5179 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5180 * this assert is slightly different from the equivalent one in
5181 * heap_delete and heap_update.
5182 */
5183 Assert((result == TM_WouldBlock) ||
5184 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5185 Assert(result != TM_Updated ||
5186 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5187 tmfd->ctid = tuple->t_data->t_ctid;
5188 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5189 if (result == TM_SelfModified)
5190 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5191 else
5192 tmfd->cmax = InvalidCommandId;
5193 goto out_locked;
5194 }
5195
5196 /*
5197 * If we didn't pin the visibility map page and the page has become all
5198 * visible while we were busy locking the buffer, or during some
5199 * subsequent window during which we had it unlocked, we'll have to unlock
5200 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5201 * unfortunate, especially since we'll now have to recheck whether the
5202 * tuple has been locked or updated under us, but hopefully it won't
5203 * happen very often.
5204 */
5205 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5206 {
5208 visibilitymap_pin(relation, block, &vmbuffer);
5210 goto l3;
5211 }
5212
5213 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5214 old_infomask = tuple->t_data->t_infomask;
5215
5216 /*
5217 * If this is the first possibly-multixact-able operation in the current
5218 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5219 * certain that the transaction will never become a member of any older
5220 * MultiXactIds than that. (We have to do this even if we end up just
5221 * using our own TransactionId below, since some other backend could
5222 * incorporate our XID into a MultiXact immediately afterwards.)
5223 */
5225
5226 /*
5227 * Compute the new xmax and infomask to store into the tuple. Note we do
5228 * not modify the tuple just yet, because that would leave it in the wrong
5229 * state if multixact.c elogs.
5230 */
5232 GetCurrentTransactionId(), mode, false,
5233 &xid, &new_infomask, &new_infomask2);
5234
5236
5237 /*
5238 * Store transaction information of xact locking the tuple.
5239 *
5240 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5241 * possibly generating a useless combo CID. Moreover, if we're locking a
5242 * previously updated tuple, it's important to preserve the Cmax.
5243 *
5244 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5245 * we would break the HOT chain.
5246 */
5249 tuple->t_data->t_infomask |= new_infomask;
5250 tuple->t_data->t_infomask2 |= new_infomask2;
5253 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5254
5255 /*
5256 * Make sure there is no forward chain link in t_ctid. Note that in the
5257 * cases where the tuple has been updated, we must not overwrite t_ctid,
5258 * because it was set by the updater. Moreover, if the tuple has been
5259 * updated, we need to follow the update chain to lock the new versions of
5260 * the tuple as well.
5261 */
5263 tuple->t_data->t_ctid = *tid;
5264
5265 /* Clear only the all-frozen bit on visibility map if needed */
5266 if (PageIsAllVisible(page) &&
5267 visibilitymap_clear(relation, block, vmbuffer,
5269 cleared_all_frozen = true;
5270
5271
5272 MarkBufferDirty(*buffer);
5273
5274 /*
5275 * XLOG stuff. You might think that we don't need an XLOG record because
5276 * there is no state change worth restoring after a crash. You would be
5277 * wrong however: we have just written either a TransactionId or a
5278 * MultiXactId that may never have been seen on disk before, and we need
5279 * to make sure that there are XLOG entries covering those ID numbers.
5280 * Else the same IDs might be re-used after a crash, which would be
5281 * disastrous if this page made it to disk before the crash. Essentially
5282 * we have to enforce the WAL log-before-data rule even in this case.
5283 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5284 * entries for everything anyway.)
5285 */
5286 if (RelationNeedsWAL(relation))
5287 {
5290
5293
5294 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5295 xlrec.xmax = xid;
5296 xlrec.infobits_set = compute_infobits(new_infomask,
5297 tuple->t_data->t_infomask2);
5300
5301 /* we don't decode row locks atm, so no need to log the origin */
5302
5304
5305 PageSetLSN(page, recptr);
5306 }
5307
5309
5310 result = TM_Ok;
5311
5314
5316 if (BufferIsValid(vmbuffer))
5317 ReleaseBuffer(vmbuffer);
5318
5319 /*
5320 * Don't update the visibility map here. Locking a tuple doesn't change
5321 * visibility info.
5322 */
5323
5324 /*
5325 * Now that we have successfully marked the tuple as locked, we can
5326 * release the lmgr tuple lock, if we had it.
5327 */
5328 if (have_tuple_lock)
5329 UnlockTupleTuplock(relation, tid, mode);
5330
5331 return result;
5332}
#define TUPLOCK_from_mxstatus(status)
Definition heapam.c:218
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition heapam.c:7877
static TM_Result heap_lock_updated_tuple(Relation rel, uint16 prior_infomask, TransactionId prior_raw_xmax, const ItemPointerData *prior_ctid, TransactionId xid, LockTupleMode mode)
Definition heapam.c:6116
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition heapam.c:4598
#define XLH_LOCK_ALL_FROZEN_CLEARED
#define XLOG_HEAP_LOCK
Definition heapam_xlog.h:39
#define SizeOfHeapLock
#define HEAP_KEYS_UPDATED
static bool HEAP_XMAX_IS_SHR_LOCKED(uint16 infomask)
static bool HEAP_XMAX_IS_EXCL_LOCKED(uint16 infomask)
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition lmgr.c:739
@ XLTW_Lock
Definition lmgr.h:29
bool log_lock_failures
Definition lock.c:54
@ LockWaitSkip
Definition lockoptions.h:42
@ LockWaitError
Definition lockoptions.h:44
@ LockTupleShare
Definition lockoptions.h:55
@ LockTupleKeyShare
Definition lockoptions.h:53
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition multixact.c:1151
static PgChecksumMode mode
@ TM_WouldBlock
Definition tableam.h:103
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, fb(), get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failures, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)
extern

Definition at line 2413 of file heapam.c.

2415{
2418 int i;
2419 int ndone;
2421 Page page;
2422 Buffer vmbuffer = InvalidBuffer;
2423 bool needwal;
2427 bool starting_with_empty_page = false;
2428 int npages = 0;
2429 int npages_used = 0;
2430
2431 /* currently not needed (thus unsupported) for heap_multi_insert() */
2433
2434 AssertHasSnapshotForToast(relation);
2435
2436 needwal = RelationNeedsWAL(relation);
2439
2440 /* Toast and set header data in all the slots */
2441 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2442 for (i = 0; i < ntuples; i++)
2443 {
2444 HeapTuple tuple;
2445
2446 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2447 slots[i]->tts_tableOid = RelationGetRelid(relation);
2448 tuple->t_tableOid = slots[i]->tts_tableOid;
2449 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2450 options);
2451 }
2452
2453 /*
2454 * We're about to do the actual inserts -- but check for conflict first,
2455 * to minimize the possibility of having to roll back work we've just
2456 * done.
2457 *
2458 * A check here does not definitively prevent a serialization anomaly;
2459 * that check MUST be done at least past the point of acquiring an
2460 * exclusive buffer content lock on every buffer that will be affected,
2461 * and MAY be done after all inserts are reflected in the buffers and
2462 * those locks are released; otherwise there is a race condition. Since
2463 * multiple buffers can be locked and unlocked in the loop below, and it
2464 * would not be feasible to identify and lock all of those buffers before
2465 * the loop, we must do a final check at the end.
2466 *
2467 * The check here could be omitted with no loss of correctness; it is
2468 * present strictly as an optimization.
2469 *
2470 * For heap inserts, we only need to check for table-level SSI locks. Our
2471 * new tuples can't possibly conflict with existing tuple locks, and heap
2472 * page locks are only consolidated versions of tuple locks; they do not
2473 * lock "gaps" as index page locks do. So we don't need to specify a
2474 * buffer when making the call, which makes for a faster check.
2475 */
2477
2478 ndone = 0;
2479 while (ndone < ntuples)
2480 {
2481 Buffer buffer;
2482 bool all_visible_cleared = false;
2483 bool all_frozen_set = false;
2484 int nthispage;
2485
2487
2488 /*
2489 * Compute number of pages needed to fit the to-be-inserted tuples in
2490 * the worst case. This will be used to determine how much to extend
2491 * the relation by in RelationGetBufferForTuple(), if needed. If we
2492 * filled a prior page from scratch, we can just update our last
2493 * computation, but if we started with a partially filled page,
2494 * recompute from scratch, the number of potentially required pages
2495 * can vary due to tuples needing to fit onto the page, page headers
2496 * etc.
2497 */
2498 if (ndone == 0 || !starting_with_empty_page)
2499 {
2500 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2502 npages_used = 0;
2503 }
2504 else
2505 npages_used++;
2506
2507 /*
2508 * Find buffer where at least the next tuple will fit. If the page is
2509 * all-visible, this will also pin the requisite visibility map page.
2510 *
2511 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2512 * empty page. See all_frozen_set below.
2513 */
2514 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2515 InvalidBuffer, options, bistate,
2516 &vmbuffer, NULL,
2517 npages - npages_used);
2518 page = BufferGetPage(buffer);
2519
2521
2523 {
2524 all_frozen_set = true;
2525 /* Lock the vmbuffer before entering the critical section */
2527 }
2528
2529 /* NO EREPORT(ERROR) from here till changes are logged */
2531
2532 /*
2533 * RelationGetBufferForTuple has ensured that the first tuple fits.
2534 * Put that on the page, and then as many other tuples as fit.
2535 */
2536 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2537
2538 /*
2539 * For logical decoding we need combo CIDs to properly decode the
2540 * catalog.
2541 */
2542 if (needwal && need_cids)
2543 log_heap_new_cid(relation, heaptuples[ndone]);
2544
2545 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2546 {
2548
2549 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2550 break;
2551
2552 RelationPutHeapTuple(relation, buffer, heaptup, false);
2553
2554 /*
2555 * For logical decoding we need combo CIDs to properly decode the
2556 * catalog.
2557 */
2558 if (needwal && need_cids)
2559 log_heap_new_cid(relation, heaptup);
2560 }
2561
2562 /*
2563 * If the page is all visible, need to clear that, unless we're only
2564 * going to add further frozen rows to it.
2565 *
2566 * If we're only adding already frozen rows to a previously empty
2567 * page, mark it as all-frozen and update the visibility map. We're
2568 * already holding a pin on the vmbuffer.
2569 */
2571 {
2572 all_visible_cleared = true;
2573 PageClearAllVisible(page);
2574 visibilitymap_clear(relation,
2575 BufferGetBlockNumber(buffer),
2576 vmbuffer, VISIBILITYMAP_VALID_BITS);
2577 }
2578 else if (all_frozen_set)
2579 {
2580 PageSetAllVisible(page);
2581 PageClearPrunable(page);
2583 vmbuffer,
2586 relation->rd_locator);
2587 }
2588
2589 /*
2590 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2591 */
2592
2593 MarkBufferDirty(buffer);
2594
2595 /* XLOG stuff */
2596 if (needwal)
2597 {
2601 char *tupledata;
2602 int totaldatalen;
2603 char *scratchptr = scratch.data;
2604 bool init;
2605 int bufflags = 0;
2606
2607 /*
2608 * If the page was previously empty, we can reinit the page
2609 * instead of restoring the whole thing.
2610 */
2612
2613 /* allocate xl_heap_multi_insert struct from the scratch area */
2616
2617 /*
2618 * Allocate offsets array. Unless we're reinitializing the page,
2619 * in that case the tuples are stored in order starting at
2620 * FirstOffsetNumber and we don't need to store the offsets
2621 * explicitly.
2622 */
2623 if (!init)
2624 scratchptr += nthispage * sizeof(OffsetNumber);
2625
2626 /* the rest of the scratch space is used for tuple data */
2627 tupledata = scratchptr;
2628
2629 /* check that the mutually exclusive flags are not both set */
2631
2632 xlrec->flags = 0;
2635
2636 /*
2637 * We don't have to worry about including a conflict xid in the
2638 * WAL record, as HEAP_INSERT_FROZEN intentionally violates
2639 * visibility rules.
2640 */
2641 if (all_frozen_set)
2643
2644 xlrec->ntuples = nthispage;
2645
2646 /*
2647 * Write out an xl_multi_insert_tuple and the tuple data itself
2648 * for each tuple.
2649 */
2650 for (i = 0; i < nthispage; i++)
2651 {
2653 xl_multi_insert_tuple *tuphdr;
2654 int datalen;
2655
2656 if (!init)
2657 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2658 /* xl_multi_insert_tuple needs two-byte alignment. */
2660 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2661
2662 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2663 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2664 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2665
2666 /* write bitmap [+ padding] [+ oid] + data */
2667 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2669 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2670 datalen);
2671 tuphdr->datalen = datalen;
2672 scratchptr += datalen;
2673 }
2674 totaldatalen = scratchptr - tupledata;
2675 Assert((scratchptr - scratch.data) < BLCKSZ);
2676
2677 if (need_tuple_data)
2679
2680 /*
2681 * Signal that this is the last xl_heap_multi_insert record
2682 * emitted by this call to heap_multi_insert(). Needed for logical
2683 * decoding so it knows when to cleanup temporary data.
2684 */
2685 if (ndone + nthispage == ntuples)
2687
2688 if (init)
2689 {
2690 info |= XLOG_HEAP_INIT_PAGE;
2692 }
2693
2694 /*
2695 * If we're doing logical decoding, include the new tuple data
2696 * even if we take a full-page image of the page.
2697 */
2698 if (need_tuple_data)
2700
2702 XLogRegisterData(xlrec, tupledata - scratch.data);
2704 if (all_frozen_set)
2705 XLogRegisterBuffer(1, vmbuffer, 0);
2706
2707 XLogRegisterBufData(0, tupledata, totaldatalen);
2708
2709 /* filtering by origin on a row level is much more efficient */
2711
2712 recptr = XLogInsert(RM_HEAP2_ID, info);
2713
2714 PageSetLSN(page, recptr);
2715 if (all_frozen_set)
2716 {
2717 Assert(BufferIsDirty(vmbuffer));
2718 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2719 }
2720 }
2721
2723
2724 if (all_frozen_set)
2725 LockBuffer(vmbuffer, BUFFER_LOCK_UNLOCK);
2726
2727 UnlockReleaseBuffer(buffer);
2728 ndone += nthispage;
2729
2730 /*
2731 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2732 * likely that we'll insert into subsequent heap pages that are likely
2733 * to use the same vm page.
2734 */
2735 }
2736
2737 /* We're done with inserting all tuples, so release the last vmbuffer. */
2738 if (vmbuffer != InvalidBuffer)
2739 ReleaseBuffer(vmbuffer);
2740
2741 /*
2742 * We're done with the actual inserts. Check for conflicts again, to
2743 * ensure that all rw-conflicts in to these inserts are detected. Without
2744 * this final check, a sequential scan of the heap may have locked the
2745 * table after the "before" check, missing one opportunity to detect the
2746 * conflict, and then scanned the table before the new tuples were there,
2747 * missing the other chance to detect the conflict.
2748 *
2749 * For heap inserts, we only need to check for table-level SSI locks. Our
2750 * new tuples can't possibly conflict with existing tuple locks, and heap
2751 * page locks are only consolidated versions of tuple locks; they do not
2752 * lock "gaps" as index page locks do. So we don't need to specify a
2753 * buffer when making the call.
2754 */
2756
2757 /*
2758 * If tuples are cacheable, mark them for invalidation from the caches in
2759 * case we abort. Note it is OK to do this after releasing the buffer,
2760 * because the heaptuples data structure is all in local memory, not in
2761 * the shared buffer.
2762 */
2763 if (IsCatalogRelation(relation))
2764 {
2765 for (i = 0; i < ntuples; i++)
2767 }
2768
2769 /* copy t_self fields back to the caller's slots */
2770 for (i = 0; i < ntuples; i++)
2771 slots[i]->tts_tid = heaptuples[i]->t_self;
2772
2773 pgstat_count_heap_insert(relation, ntuples);
2774}
bool BufferIsDirty(Buffer buffer)
Definition bufmgr.c:3025
Size PageGetHeapFreeSpace(const PageData *page)
Definition bufpage.c:990
static void PageSetAllVisible(Page page)
Definition bufpage.h:433
#define PageClearPrunable(page)
Definition bufpage.h:459
#define MAXALIGN(LEN)
Definition c.h:859
#define SHORTALIGN(LEN)
Definition c.h:855
size_t Size
Definition c.h:652
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition heapam.c:2381
#define HEAP_INSERT_FROZEN
Definition heapam.h:38
#define SizeOfHeapMultiInsert
#define XLOG_HEAP2_MULTI_INSERT
Definition heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition heapam_xlog.h:79
#define SizeOfMultiInsertTuple
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition rel.h:360
#define init()
RelFileLocator rd_locator
Definition rel.h:57
void visibilitymap_set_vmbits(BlockNumber heapBlk, Buffer vmBuf, uint8 flags, const RelFileLocator rlocator)
#define VISIBILITYMAP_ALL_VISIBLE

References Assert, AssertHasSnapshotForToast(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsDirty(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), fb(), GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, IsCatalogRelation(), ItemPointerGetOffsetNumber(), LockBuffer(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, PageClearAllVisible(), PageClearPrunable, PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), RelationData::rd_locator, REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, xl_multi_insert_tuple::t_hoff, xl_multi_insert_tuple::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_set_vmbits(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( PruneFreezeParams params,
PruneFreezeResult presult,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)
extern

Definition at line 832 of file pruneheap.c.

837{
839 bool do_freeze;
840 bool do_prune;
841 bool do_hint_prune;
844
845 /* Initialize prstate */
846 prune_freeze_setup(params,
848 presult, &prstate);
849
850 /*
851 * Examine all line pointers and tuple visibility information to determine
852 * which line pointers should change state and which tuples may be frozen.
853 * Prepare queue of state changes to later be executed in a critical
854 * section.
855 */
857
858 /*
859 * If checksums are enabled, calling heap_prune_satisfies_vacuum() while
860 * checking tuple visibility information in prune_freeze_plan() may have
861 * caused an FPI to be emitted.
862 */
864
865 do_prune = prstate.nredirected > 0 ||
866 prstate.ndead > 0 ||
867 prstate.nunused > 0;
868
869 /*
870 * Even if we don't prune anything, if we found a new value for the
871 * pd_prune_xid field or the page was marked full, we will update the hint
872 * bit.
873 */
874 do_hint_prune = PageGetPruneXid(prstate.page) != prstate.new_prune_xid ||
875 PageIsFull(prstate.page);
876
877 /*
878 * Decide if we want to go ahead with freezing according to the freeze
879 * plans we prepared, or not.
880 */
882 do_prune,
884 &prstate);
885
886 /*
887 * While scanning the line pointers, we did not clear
888 * set_all_visible/set_all_frozen when encountering LP_DEAD items because
889 * we wanted the decision whether or not to freeze the page to be
890 * unaffected by the short-term presence of LP_DEAD items. These LP_DEAD
891 * items are effectively assumed to be LP_UNUSED items in the making. It
892 * doesn't matter which vacuum heap pass (initial pass or final pass) ends
893 * up setting the page all-frozen, as long as the ongoing VACUUM does it.
894 *
895 * Now that we finished determining whether or not to freeze the page,
896 * update set_all_visible and set_all_frozen so that they reflect the true
897 * state of the page for setting PD_ALL_VISIBLE and VM bits.
898 */
899 if (prstate.lpdead_items > 0)
900 prstate.set_all_visible = prstate.set_all_frozen = false;
901
902 Assert(!prstate.set_all_frozen || prstate.set_all_visible);
903
904 /* Any error while applying the changes is critical */
906
907 if (do_hint_prune)
908 {
909 /*
910 * Update the page's pd_prune_xid field to either zero, or the lowest
911 * XID of any soon-prunable tuple.
912 */
913 ((PageHeader) prstate.page)->pd_prune_xid = prstate.new_prune_xid;
914
915 /*
916 * Also clear the "page is full" flag, since there's no point in
917 * repeating the prune/defrag process until something else happens to
918 * the page.
919 */
921
922 /*
923 * If that's all we had to do to the page, this is a non-WAL-logged
924 * hint. If we are going to freeze or prune the page, we will mark
925 * the buffer dirty below.
926 */
927 if (!do_freeze && !do_prune)
928 MarkBufferDirtyHint(prstate.buffer, true);
929 }
930
931 if (do_prune || do_freeze)
932 {
933 /* Apply the planned item changes and repair page fragmentation. */
934 if (do_prune)
935 {
936 heap_page_prune_execute(prstate.buffer, false,
937 prstate.redirected, prstate.nredirected,
938 prstate.nowdead, prstate.ndead,
939 prstate.nowunused, prstate.nunused);
940 }
941
942 if (do_freeze)
943 heap_freeze_prepared_tuples(prstate.buffer, prstate.frozen, prstate.nfrozen);
944
945 MarkBufferDirty(prstate.buffer);
946
947 /*
948 * Emit a WAL XLOG_HEAP2_PRUNE* record showing what we did
949 */
950 if (RelationNeedsWAL(prstate.relation))
951 {
952 /*
953 * The snapshotConflictHorizon for the whole record should be the
954 * most conservative of all the horizons calculated for any of the
955 * possible modifications. If this record will prune tuples, any
956 * transactions on the standby older than the youngest xmax of the
957 * most recently removed tuple this record will prune will
958 * conflict. If this record will freeze tuples, any transactions
959 * on the standby with xids older than the youngest tuple this
960 * record will freeze will conflict.
961 */
963
964 if (TransactionIdFollows(prstate.frz_conflict_horizon,
965 prstate.latest_xid_removed))
966 conflict_xid = prstate.frz_conflict_horizon;
967 else
968 conflict_xid = prstate.latest_xid_removed;
969
971 InvalidBuffer, /* vmbuffer */
972 0, /* vmflags */
974 true, params->reason,
975 prstate.frozen, prstate.nfrozen,
976 prstate.redirected, prstate.nredirected,
977 prstate.nowdead, prstate.ndead,
978 prstate.nowunused, prstate.nunused);
979 }
980 }
981
983
984 /* Copy information back for caller */
985 presult->ndeleted = prstate.ndeleted;
986 presult->nnewlpdead = prstate.ndead;
987 presult->nfrozen = prstate.nfrozen;
988 presult->live_tuples = prstate.live_tuples;
989 presult->recently_dead_tuples = prstate.recently_dead_tuples;
990 presult->set_all_visible = prstate.set_all_visible;
991 presult->set_all_frozen = prstate.set_all_frozen;
992 presult->hastup = prstate.hastup;
993
994 /*
995 * For callers planning to update the visibility map, the conflict horizon
996 * for that record must be the newest xmin on the page. However, if the
997 * page is completely frozen, there can be no conflict and the
998 * vm_conflict_horizon should remain InvalidTransactionId. This includes
999 * the case that we just froze all the tuples; the prune-freeze record
1000 * included the conflict XID already so the caller doesn't need it.
1001 */
1002 if (presult->set_all_frozen)
1003 presult->vm_conflict_horizon = InvalidTransactionId;
1004 else
1005 presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
1006
1007 presult->lpdead_items = prstate.lpdead_items;
1008 /* the presult->deadoffsets array was already filled in */
1009
1010 if (prstate.attempt_freeze)
1011 {
1012 if (presult->nfrozen > 0)
1013 {
1014 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
1015 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
1016 }
1017 else
1018 {
1019 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
1020 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
1021 }
1022 }
1023}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition bufmgr.c:5566
static TransactionId PageGetPruneXid(const PageData *page)
Definition bufpage.h:444
static void PageClearFull(Page page)
Definition bufpage.h:422
static bool PageIsFull(const PageData *page)
Definition bufpage.h:412
int64_t int64
Definition c.h:576
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition heapam.c:7462
WalUsage pgWalUsage
Definition instrument.c:22
static void prune_freeze_plan(PruneState *prstate, OffsetNumber *off_loc)
Definition pruneheap.c:472
static bool heap_page_will_freeze(bool did_tuple_hint_fpi, bool do_prune, bool do_hint_prune, PruneState *prstate)
Definition pruneheap.c:675
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, Buffer vmbuffer, uint8 vmflags, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition pruneheap.c:2179
static void prune_freeze_setup(PruneFreezeParams *params, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid, PruneFreezeResult *presult, PruneState *prstate)
Definition pruneheap.c:340
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition pruneheap.c:1683
PruneReason reason
Definition heapam.h:245
int64 wal_fpi
Definition instrument.h:54
static bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition transam.h:297

References Assert, END_CRIT_SECTION, fb(), heap_freeze_prepared_tuples(), heap_page_prune_execute(), heap_page_will_freeze(), InvalidBuffer, InvalidTransactionId, log_heap_prune_and_freeze(), MarkBufferDirty(), MarkBufferDirtyHint(), PageClearFull(), PageGetPruneXid(), PageIsFull(), pgWalUsage, prune_freeze_plan(), prune_freeze_setup(), PruneFreezeParams::reason, RelationNeedsWAL, START_CRIT_SECTION, TransactionIdFollows(), and WalUsage::wal_fpi.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)
extern

Definition at line 1683 of file pruneheap.c.

1687{
1688 Page page = BufferGetPage(buffer);
1689 OffsetNumber *offnum;
1691
1692 /* Shouldn't be called unless there's something to do */
1693 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1694
1695 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1696 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1697
1698 /* Update all redirected line pointers */
1699 offnum = redirected;
1700 for (int i = 0; i < nredirected; i++)
1701 {
1702 OffsetNumber fromoff = *offnum++;
1703 OffsetNumber tooff = *offnum++;
1706
1707#ifdef USE_ASSERT_CHECKING
1708
1709 /*
1710 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1711 * must be the first item from a HOT chain. If the item has tuple
1712 * storage then it can't be a heap-only tuple. Otherwise we are just
1713 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1714 * has been pruned at least once before now.
1715 */
1717 {
1719
1720 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1722 }
1723 else
1724 {
1725 /* We shouldn't need to redundantly set the redirect */
1727 }
1728
1729 /*
1730 * The item that we're about to set as an LP_REDIRECT (the 'from'
1731 * item) will point to an existing item (the 'to' item) that is
1732 * already a heap-only tuple. There can be at most one LP_REDIRECT
1733 * item per HOT chain.
1734 *
1735 * We need to keep around an LP_REDIRECT item (after original
1736 * non-heap-only root tuple gets pruned away) so that it's always
1737 * possible for VACUUM to easily figure out what TID to delete from
1738 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1739 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1740 * tuple can.
1741 *
1742 * This check may miss problems, e.g. the target of a redirect could
1743 * be marked as unused subsequently. The page_verify_redirects() check
1744 * below will catch such problems.
1745 */
1746 tolp = PageGetItemId(page, tooff);
1748 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1750#endif
1751
1753 }
1754
1755 /* Update all now-dead line pointers */
1756 offnum = nowdead;
1757 for (int i = 0; i < ndead; i++)
1758 {
1759 OffsetNumber off = *offnum++;
1760 ItemId lp = PageGetItemId(page, off);
1761
1762#ifdef USE_ASSERT_CHECKING
1763
1764 /*
1765 * An LP_DEAD line pointer must be left behind when the original item
1766 * (which is dead to everybody) could still be referenced by a TID in
1767 * an index. This should never be necessary with any individual
1768 * heap-only tuple item, though. (It's not clear how much of a problem
1769 * that would be, but there is no reason to allow it.)
1770 */
1771 if (ItemIdHasStorage(lp))
1772 {
1774 htup = (HeapTupleHeader) PageGetItem(page, lp);
1776 }
1777 else
1778 {
1779 /* Whole HOT chain becomes dead */
1781 }
1782#endif
1783
1785 }
1786
1787 /* Update all now-unused line pointers */
1788 offnum = nowunused;
1789 for (int i = 0; i < nunused; i++)
1790 {
1791 OffsetNumber off = *offnum++;
1792 ItemId lp = PageGetItemId(page, off);
1793
1794#ifdef USE_ASSERT_CHECKING
1795
1796 if (lp_truncate_only)
1797 {
1798 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1800 }
1801 else
1802 {
1803 /*
1804 * When heap_page_prune_and_freeze() was called, mark_unused_now
1805 * may have been passed as true, which allows would-be LP_DEAD
1806 * items to be made LP_UNUSED instead. This is only possible if
1807 * the relation has no indexes. If there are any dead items, then
1808 * mark_unused_now was not true and every item being marked
1809 * LP_UNUSED must refer to a heap-only tuple.
1810 */
1811 if (ndead > 0)
1812 {
1814 htup = (HeapTupleHeader) PageGetItem(page, lp);
1816 }
1817 else
1819 }
1820
1821#endif
1822
1824 }
1825
1826 if (lp_truncate_only)
1828 else
1829 {
1830 /*
1831 * Finally, repair any fragmentation, and update the page's hint bit
1832 * about whether it has free pointers.
1833 */
1835
1836 /*
1837 * Now that the page has been modified, assert that redirect items
1838 * still point to valid targets.
1839 */
1841 }
1842}
void PageRepairFragmentation(Page page)
Definition bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition c.h:235
#define ItemIdSetRedirect(itemId, link)
Definition itemid.h:152
#define ItemIdSetDead(itemId)
Definition itemid.h:164
#define ItemIdSetUnused(itemId)
Definition itemid.h:128
#define ItemIdHasStorage(itemId)
Definition itemid.h:120
static void page_verify_redirects(Page page)
Definition pruneheap.c:1859

References Assert, BufferGetPage(), fb(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)
extern

Definition at line 219 of file pruneheap.c.

220{
221 Page page = BufferGetPage(buffer);
223 GlobalVisState *vistest;
225
226 /*
227 * We can't write WAL in recovery mode, so there's no point trying to
228 * clean the page. The primary will likely issue a cleaning WAL record
229 * soon anyway, so this is no particular loss.
230 */
231 if (RecoveryInProgress())
232 return;
233
234 /*
235 * First check whether there's any chance there's something to prune,
236 * determining the appropriate horizon is a waste if there's no prune_xid
237 * (i.e. no updates/deletes left potentially dead tuples around).
238 */
241 return;
242
243 /*
244 * Check whether prune_xid indicates that there may be dead rows that can
245 * be cleaned up.
246 */
247 vistest = GlobalVisTestFor(relation);
248
250 return;
251
252 /*
253 * We prune when a previous UPDATE failed to find enough space on the page
254 * for a new tuple version, or when free space falls below the relation's
255 * fill-factor target (but not less than 10%).
256 *
257 * Checking free space here is questionable since we aren't holding any
258 * lock on the buffer; in the worst case we could get a bogus answer. It's
259 * unlikely to be *seriously* wrong, though, since reading either pd_lower
260 * or pd_upper is probably atomic. Avoiding taking a lock seems more
261 * important than sometimes getting a wrong answer in what is after all
262 * just a heuristic estimate.
263 */
266 minfree = Max(minfree, BLCKSZ / 10);
267
268 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
269 {
270 /* OK, try to get exclusive buffer lock */
272 return;
273
274 /*
275 * Now that we have buffer lock, get accurate information about the
276 * page's free space, and recheck the heuristic about whether to
277 * prune.
278 */
279 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
280 {
283
284 /*
285 * We don't pass the HEAP_PAGE_PRUNE_MARK_UNUSED_NOW option
286 * regardless of whether or not the relation has indexes, since we
287 * cannot safely determine that during on-access pruning with the
288 * current implementation.
289 */
290 PruneFreezeParams params = {
291 .relation = relation,
292 .buffer = buffer,
293 .reason = PRUNE_ON_ACCESS,
294 .options = 0,
295 .vistest = vistest,
296 .cutoffs = NULL,
297 };
298
300 NULL, NULL);
301
302 /*
303 * Report the number of tuples reclaimed to pgstats. This is
304 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
305 *
306 * We derive the number of dead tuples like this to avoid totally
307 * forgetting about items that were set to LP_DEAD, since they
308 * still need to be cleaned up by VACUUM. We only want to count
309 * heap-only tuples that just became LP_UNUSED in our report,
310 * which don't.
311 *
312 * VACUUM doesn't have to compensate in the same way when it
313 * tracks ndeleted, since it will set the same LP_DEAD items to
314 * LP_UNUSED separately.
315 */
316 if (presult.ndeleted > presult.nnewlpdead)
318 presult.ndeleted - presult.nnewlpdead);
319 }
320
321 /* And release buffer lock */
323
324 /*
325 * We avoid reuse of any free space created on the page by unrelated
326 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
327 * free space should be reused by UPDATEs to *this* page.
328 */
329 }
330}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition bufmgr.c:6701
#define Max(x, y)
Definition c.h:1034
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition procarray.c:4275
void heap_page_prune_and_freeze(PruneFreezeParams *params, PruneFreezeResult *presult, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition pruneheap.c:832
Relation relation
Definition heapam.h:238
bool RecoveryInProgress(void)
Definition xlog.c:6443

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), fb(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PageGetHeapFreeSpace(), PageGetPruneXid(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), PruneFreezeParams::relation, RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)
extern

Definition at line 7409 of file heapam.c.

7411{
7412 Page page = BufferGetPage(buffer);
7413
7414 for (int i = 0; i < ntuples; i++)
7415 {
7416 HeapTupleFreeze *frz = tuples + i;
7417 ItemId itemid = PageGetItemId(page, frz->offset);
7418 HeapTupleHeader htup;
7419
7420 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7421
7422 /* Deliberately avoid relying on tuple hint bits here */
7423 if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED)
7424 {
7426
7428 if (unlikely(!TransactionIdDidCommit(xmin)))
7429 ereport(ERROR,
7431 errmsg_internal("uncommitted xmin %u needs to be frozen",
7432 xmin)));
7433 }
7434
7435 /*
7436 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7437 * left behind by transactions that were in progress during a crash,
7438 * so we can only check that xmax didn't commit
7439 */
7440 if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED)
7441 {
7443
7446 ereport(ERROR,
7448 errmsg_internal("cannot freeze committed xmax %u",
7449 xmax)));
7450 }
7451 }
7452}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition heapam.h:138
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition heapam.h:137
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
#define ERRCODE_DATA_CORRUPTED
bool TransactionIdDidCommit(TransactionId transactionId)
Definition transam.c:126
#define TransactionIdIsNormal(xid)
Definition transam.h:42

References Assert, BufferGetPage(), ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_will_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)
extern

Definition at line 7136 of file heapam.c.

7140{
7141 bool xmin_already_frozen = false,
7142 xmax_already_frozen = false;
7143 bool freeze_xmin = false,
7144 replace_xvac = false,
7145 replace_xmax = false,
7146 freeze_xmax = false;
7147 TransactionId xid;
7148
7149 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7150 frz->t_infomask2 = tuple->t_infomask2;
7151 frz->t_infomask = tuple->t_infomask;
7152 frz->frzflags = 0;
7153 frz->checkflags = 0;
7154
7155 /*
7156 * Process xmin, while keeping track of whether it's already frozen, or
7157 * will become frozen iff our freeze plan is executed by caller (could be
7158 * neither).
7159 */
7160 xid = HeapTupleHeaderGetXmin(tuple);
7161 if (!TransactionIdIsNormal(xid))
7162 xmin_already_frozen = true;
7163 else
7164 {
7165 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7166 ereport(ERROR,
7168 errmsg_internal("found xmin %u from before relfrozenxid %u",
7169 xid, cutoffs->relfrozenxid)));
7170
7171 /* Will set freeze_xmin flags in freeze plan below */
7173
7174 /* Verify that xmin committed if and when freeze plan is executed */
7175 if (freeze_xmin)
7177 }
7178
7179 /*
7180 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7181 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7182 */
7183 xid = HeapTupleHeaderGetXvac(tuple);
7184 if (TransactionIdIsNormal(xid))
7185 {
7187 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7188
7189 /*
7190 * For Xvac, we always freeze proactively. This allows totally_frozen
7191 * tracking to ignore xvac.
7192 */
7193 replace_xvac = pagefrz->freeze_required = true;
7194
7195 /* Will set replace_xvac flags in freeze plan below */
7196 }
7197
7198 /* Now process xmax */
7199 xid = frz->xmax;
7200 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7201 {
7202 /* Raw xmax is a MultiXactId */
7204 uint16 flags;
7205
7206 /*
7207 * We will either remove xmax completely (in the "freeze_xmax" path),
7208 * process xmax by replacing it (in the "replace_xmax" path), or
7209 * perform no-op xmax processing. The only constraint is that the
7210 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7211 */
7212 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7213 &flags, pagefrz);
7214
7215 if (flags & FRM_NOOP)
7216 {
7217 /*
7218 * xmax is a MultiXactId, and nothing about it changes for now.
7219 * This is the only case where 'freeze_required' won't have been
7220 * set for us by FreezeMultiXactId, as well as the only case where
7221 * neither freeze_xmax nor replace_xmax are set (given a multi).
7222 *
7223 * This is a no-op, but the call to FreezeMultiXactId might have
7224 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7225 * for us (the "freeze page" variants, specifically). That'll
7226 * make it safe for our caller to freeze the page later on, while
7227 * leaving this particular xmax undisturbed.
7228 *
7229 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7230 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7231 * job. A call to heap_tuple_should_freeze for this same tuple
7232 * will take place below if 'freeze_required' isn't set already.
7233 * (This repeats work from FreezeMultiXactId, but allows "no
7234 * freeze" tracker maintenance to happen in only one place.)
7235 */
7238 }
7239 else if (flags & FRM_RETURN_IS_XID)
7240 {
7241 /*
7242 * xmax will become an updater Xid (original MultiXact's updater
7243 * member Xid will be carried forward as a simple Xid in Xmax).
7244 */
7246
7247 /*
7248 * NB -- some of these transformations are only valid because we
7249 * know the return Xid is a tuple updater (i.e. not merely a
7250 * locker.) Also note that the only reason we don't explicitly
7251 * worry about HEAP_KEYS_UPDATED is because it lives in
7252 * t_infomask2 rather than t_infomask.
7253 */
7254 frz->t_infomask &= ~HEAP_XMAX_BITS;
7255 frz->xmax = newxmax;
7256 if (flags & FRM_MARK_COMMITTED)
7257 frz->t_infomask |= HEAP_XMAX_COMMITTED;
7258 replace_xmax = true;
7259 }
7260 else if (flags & FRM_RETURN_IS_MULTI)
7261 {
7264
7265 /*
7266 * xmax is an old MultiXactId that we have to replace with a new
7267 * MultiXactId, to carry forward two or more original member XIDs.
7268 */
7270
7271 /*
7272 * We can't use GetMultiXactIdHintBits directly on the new multi
7273 * here; that routine initializes the masks to all zeroes, which
7274 * would lose other bits we need. Doing it this way ensures all
7275 * unrelated bits remain untouched.
7276 */
7277 frz->t_infomask &= ~HEAP_XMAX_BITS;
7278 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7280 frz->t_infomask |= newbits;
7281 frz->t_infomask2 |= newbits2;
7282 frz->xmax = newxmax;
7283 replace_xmax = true;
7284 }
7285 else
7286 {
7287 /*
7288 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7289 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7290 */
7291 Assert(flags & FRM_INVALIDATE_XMAX);
7293
7294 /* Will set freeze_xmax flags in freeze plan below */
7295 freeze_xmax = true;
7296 }
7297
7298 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7299 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7300 }
7301 else if (TransactionIdIsNormal(xid))
7302 {
7303 /* Raw xmax is normal XID */
7304 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7305 ereport(ERROR,
7307 errmsg_internal("found xmax %u from before relfrozenxid %u",
7308 xid, cutoffs->relfrozenxid)));
7309
7310 /* Will set freeze_xmax flags in freeze plan below */
7312
7313 /*
7314 * Verify that xmax aborted if and when freeze plan is executed,
7315 * provided it's from an update. (A lock-only xmax can be removed
7316 * independent of this, since the lock is released at xact end.)
7317 */
7319 frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED;
7320 }
7321 else if (!TransactionIdIsValid(xid))
7322 {
7323 /* Raw xmax is InvalidTransactionId XID */
7324 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7325 xmax_already_frozen = true;
7326 }
7327 else
7328 ereport(ERROR,
7330 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7331 xid, tuple->t_infomask)));
7332
7333 if (freeze_xmin)
7334 {
7336
7337 frz->t_infomask |= HEAP_XMIN_FROZEN;
7338 }
7339 if (replace_xvac)
7340 {
7341 /*
7342 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7343 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7344 * transaction succeeded.
7345 */
7346 Assert(pagefrz->freeze_required);
7347 if (tuple->t_infomask & HEAP_MOVED_OFF)
7348 frz->frzflags |= XLH_INVALID_XVAC;
7349 else
7350 frz->frzflags |= XLH_FREEZE_XVAC;
7351 }
7352 if (replace_xmax)
7353 {
7355 Assert(pagefrz->freeze_required);
7356
7357 /* Already set replace_xmax flags in freeze plan earlier */
7358 }
7359 if (freeze_xmax)
7360 {
7362
7363 frz->xmax = InvalidTransactionId;
7364
7365 /*
7366 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7367 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7368 * Also get rid of the HEAP_KEYS_UPDATED bit.
7369 */
7370 frz->t_infomask &= ~HEAP_XMAX_BITS;
7371 frz->t_infomask |= HEAP_XMAX_INVALID;
7372 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7373 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7374 }
7375
7376 /*
7377 * Determine if this tuple is already totally frozen, or will become
7378 * totally frozen (provided caller executes freeze plans for the page)
7379 */
7382
7383 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7385 {
7386 /*
7387 * So far no previous tuple from the page made freezing mandatory.
7388 * Does this tuple force caller to freeze the entire page?
7389 */
7390 pagefrz->freeze_required =
7391 heap_tuple_should_freeze(tuple, cutoffs,
7392 &pagefrz->NoFreezePageRelfrozenXid,
7393 &pagefrz->NoFreezePageRelminMxid);
7394 }
7395
7396 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7398}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition heapam.c:7528
#define FRM_RETURN_IS_XID
Definition heapam.c:6735
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition heapam.c:6786
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition heapam.c:7947
#define FRM_MARK_COMMITTED
Definition heapam.c:6737
#define FRM_NOOP
Definition heapam.c:6733
#define FRM_RETURN_IS_MULTI
Definition heapam.c:6736
#define FRM_INVALIDATE_XMAX
Definition heapam.c:6734
#define HEAP_MOVED_OFF
#define HEAP_XMIN_FROZEN
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
#define HEAP_XMAX_COMMITTED
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2818
#define MultiXactIdIsValid(multi)
Definition multixact.h:29
MultiXactId NoFreezePageRelminMxid
Definition heapam.h:220
TransactionId NoFreezePageRelfrozenXid
Definition heapam.h:219
TransactionId OldestXmin
Definition vacuum.h:279
MultiXactId OldestMxact
Definition vacuum.h:280
static bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:282

References Assert, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, fb(), HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, and XLH_INVALID_XVAC.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)
extern

Definition at line 616 of file heapam.c.

617{
619 Buffer buffer = scan->rs_cbuf;
620 BlockNumber block = scan->rs_cblock;
621 Snapshot snapshot;
622 Page page;
623 int lines;
624 bool all_visible;
626
627 Assert(BufferGetBlockNumber(buffer) == block);
628
629 /* ensure we're not accidentally being used when not in pagemode */
631 snapshot = scan->rs_base.rs_snapshot;
632
633 /*
634 * Prune and repair fragmentation for the whole page, if possible.
635 */
636 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
637
638 /*
639 * We must hold share lock on the buffer content while examining tuple
640 * visibility. Afterwards, however, the tuples we have found to be
641 * visible are guaranteed good as long as we hold the buffer pin.
642 */
644
645 page = BufferGetPage(buffer);
646 lines = PageGetMaxOffsetNumber(page);
647
648 /*
649 * If the all-visible flag indicates that all tuples on the page are
650 * visible to everyone, we can skip the per-tuple visibility tests.
651 *
652 * Note: In hot standby, a tuple that's already visible to all
653 * transactions on the primary might still be invisible to a read-only
654 * transaction in the standby. We partly handle this problem by tracking
655 * the minimum xmin of visible tuples as the cut-off XID while marking a
656 * page all-visible on the primary and WAL log that along with the
657 * visibility map SET operation. In hot standby, we wait for (or abort)
658 * all transactions that can potentially may not see one or more tuples on
659 * the page. That's how index-only scans work fine in hot standby. A
660 * crucial difference between index-only scans and heap scans is that the
661 * index-only scan completely relies on the visibility map where as heap
662 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
663 * the page-level flag can be trusted in the same way, because it might
664 * get propagated somehow without being explicitly WAL-logged, e.g. via a
665 * full page write. Until we can prove that beyond doubt, let's check each
666 * tuple for visibility the hard way.
667 */
668 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
671
672 /*
673 * We call page_collect_tuples() with constant arguments, to get the
674 * compiler to constant fold the constant arguments. Separate calls with
675 * constant arguments, rather than variables, are needed on several
676 * compilers to actually perform constant folding.
677 */
678 if (likely(all_visible))
679 {
681 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
682 block, lines, true, false);
683 else
684 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
685 block, lines, true, true);
686 }
687 else
688 {
690 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
691 block, lines, false, false);
692 else
693 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
694 block, lines, false, true);
695 }
696
698}
#define likely(x)
Definition c.h:423
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition heapam.c:522
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition predicate.c:3989
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition pruneheap.c:219
uint32 rs_ntuples
Definition heapam.h:99
BlockNumber rs_cblock
Definition heapam.h:69
bool takenDuringRecovery
Definition snapshot.h:180

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), fb(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)
extern

Definition at line 1318 of file heapam.c.

1320{
1322
1323 if (set_params)
1324 {
1325 if (allow_strat)
1327 else
1329
1330 if (allow_sync)
1332 else
1334
1335 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1338 else
1340 }
1341
1342 /*
1343 * unpin scan buffers
1344 */
1345 if (BufferIsValid(scan->rs_cbuf))
1346 {
1347 ReleaseBuffer(scan->rs_cbuf);
1348 scan->rs_cbuf = InvalidBuffer;
1349 }
1350
1351 /*
1352 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1353 * additional data vs a normal HeapScan
1354 */
1355
1356 /*
1357 * The read stream is reset on rescan. This must be done before
1358 * initscan(), as some state referred to by read_stream_reset() is reset
1359 * in initscan().
1360 */
1361 if (scan->rs_read_stream)
1363
1364 /*
1365 * reinitialize scan descriptor
1366 */
1367 initscan(scan, key, true);
1368}
void read_stream_reset(ReadStream *stream)
@ SO_ALLOW_STRAT
Definition tableam.h:58
@ SO_ALLOW_SYNC
Definition tableam.h:60

References BufferIsValid(), fb(), initscan(), InvalidBuffer, IsMVCCSnapshot, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)
extern

Definition at line 1479 of file heapam.c.

1481{
1487
1488 /*
1489 * For relations without any pages, we can simply leave the TID range
1490 * unset. There will be no tuples to scan, therefore no tuples outside
1491 * the given TID range.
1492 */
1493 if (scan->rs_nblocks == 0)
1494 return;
1495
1496 /*
1497 * Set up some ItemPointers which point to the first and last possible
1498 * tuples in the heap.
1499 */
1502
1503 /*
1504 * If the given maximum TID is below the highest possible TID in the
1505 * relation, then restrict the range to that, otherwise we scan to the end
1506 * of the relation.
1507 */
1510
1511 /*
1512 * If the given minimum TID is above the lowest possible TID in the
1513 * relation, then restrict the range to only scan for TIDs above that.
1514 */
1517
1518 /*
1519 * Check for an empty range and protect from would be negative results
1520 * from the numBlks calculation below.
1521 */
1523 {
1524 /* Set an empty range of blocks to scan */
1526 return;
1527 }
1528
1529 /*
1530 * Calculate the first block and the number of blocks we must scan. We
1531 * could be more aggressive here and perform some more validation to try
1532 * and further narrow the scope of blocks to scan by checking if the
1533 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1534 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1535 * we could scan one fewer blocks. However, such an optimization does not
1536 * seem worth troubling over, currently.
1537 */
1539
1542
1543 /* Set the start block and number of blocks to scan */
1545
1546 /* Finally, set the TID range in sscan */
1547 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1548 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1549}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition heapam.c:500
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition itemptr.h:93
#define MaxOffsetNumber
Definition off.h:28
BlockNumber rs_nblocks
Definition heapam.h:61

References fb(), FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)
extern

Definition at line 500 of file heapam.c.

501{
503
504 Assert(!scan->rs_inited); /* else too late to change */
505 /* else rs_startblock is significant */
507
508 /* Check startBlk is valid (but allow case of zero blocks...) */
509 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
510
511 scan->rs_startblock = startBlk;
512 scan->rs_numblocks = numBlks;
513}
BlockNumber rs_startblock
Definition heapam.h:62
BlockNumber rs_numblocks
Definition heapam.h:63

References Assert, fb(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)
extern

Definition at line 7892 of file heapam.c.

7893{
7894 TransactionId xid;
7895
7896 /*
7897 * If xmin is a normal transaction ID, this tuple is definitely not
7898 * frozen.
7899 */
7900 xid = HeapTupleHeaderGetXmin(tuple);
7901 if (TransactionIdIsNormal(xid))
7902 return true;
7903
7904 /*
7905 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7906 */
7907 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7908 {
7909 MultiXactId multi;
7910
7911 multi = HeapTupleHeaderGetRawXmax(tuple);
7912 if (MultiXactIdIsValid(multi))
7913 return true;
7914 }
7915 else
7916 {
7917 xid = HeapTupleHeaderGetRawXmax(tuple);
7918 if (TransactionIdIsNormal(xid))
7919 return true;
7920 }
7921
7922 if (tuple->t_infomask & HEAP_MOVED)
7923 {
7924 xid = HeapTupleHeaderGetXvac(tuple);
7925 if (TransactionIdIsNormal(xid))
7926 return true;
7927 }
7928
7929 return false;
7930}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_would_be_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)
extern

Definition at line 7947 of file heapam.c.

7951{
7952 TransactionId xid;
7953 MultiXactId multi;
7954 bool freeze = false;
7955
7956 /* First deal with xmin */
7957 xid = HeapTupleHeaderGetXmin(tuple);
7958 if (TransactionIdIsNormal(xid))
7959 {
7961 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7962 *NoFreezePageRelfrozenXid = xid;
7963 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7964 freeze = true;
7965 }
7966
7967 /* Now deal with xmax */
7969 multi = InvalidMultiXactId;
7970 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7971 multi = HeapTupleHeaderGetRawXmax(tuple);
7972 else
7973 xid = HeapTupleHeaderGetRawXmax(tuple);
7974
7975 if (TransactionIdIsNormal(xid))
7976 {
7978 /* xmax is a non-permanent XID */
7979 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7980 *NoFreezePageRelfrozenXid = xid;
7981 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7982 freeze = true;
7983 }
7984 else if (!MultiXactIdIsValid(multi))
7985 {
7986 /* xmax is a permanent XID or invalid MultiXactId/XID */
7987 }
7988 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7989 {
7990 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7991 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7992 *NoFreezePageRelminMxid = multi;
7993 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7994 freeze = true;
7995 }
7996 else
7997 {
7998 /* xmax is a MultiXactId that may have an updater XID */
7999 MultiXactMember *members;
8000 int nmembers;
8001
8003 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
8004 *NoFreezePageRelminMxid = multi;
8005 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
8006 freeze = true;
8007
8008 /* need to check whether any member of the mxact is old */
8009 nmembers = GetMultiXactIdMembers(multi, &members, false,
8011
8012 for (int i = 0; i < nmembers; i++)
8013 {
8014 xid = members[i].xid;
8016 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8017 *NoFreezePageRelfrozenXid = xid;
8018 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
8019 freeze = true;
8020 }
8021 if (nmembers > 0)
8022 pfree(members);
8023 }
8024
8025 if (tuple->t_infomask & HEAP_MOVED)
8026 {
8027 xid = HeapTupleHeaderGetXvac(tuple);
8028 if (TransactionIdIsNormal(xid))
8029 {
8031 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
8032 *NoFreezePageRelfrozenXid = xid;
8033 /* heap_prepare_freeze_tuple forces xvac freezing */
8034 freeze = true;
8035 }
8036 }
8037
8038 return freeze;
8039}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition multixact.c:2832
#define InvalidMultiXactId
Definition multixact.h:25
TransactionId xid
Definition multixact.h:57

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 3313 of file heapam.c.

3317{
3318 TM_Result result;
3326 ItemId lp;
3330 bool old_key_copied = false;
3331 Page page;
3332 BlockNumber block;
3334 Buffer buffer,
3335 newbuf,
3336 vmbuffer = InvalidBuffer,
3338 bool need_toast;
3340 pagefree;
3341 bool have_tuple_lock = false;
3342 bool iscombo;
3343 bool use_hot_update = false;
3344 bool summarized_update = false;
3345 bool key_intact;
3346 bool all_visible_cleared = false;
3347 bool all_visible_cleared_new = false;
3348 bool checked_lockers;
3349 bool locker_remains;
3350 bool id_has_external = false;
3357
3359
3360 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3363
3364 AssertHasSnapshotForToast(relation);
3365
3366 /*
3367 * Forbid this during a parallel operation, lest it allocate a combo CID.
3368 * Other workers might need that combo CID for visibility checks, and we
3369 * have no provision for broadcasting it to them.
3370 */
3371 if (IsInParallelMode())
3372 ereport(ERROR,
3374 errmsg("cannot update tuples during a parallel operation")));
3375
3376#ifdef USE_ASSERT_CHECKING
3378#endif
3379
3380 /*
3381 * Fetch the list of attributes to be checked for various operations.
3382 *
3383 * For HOT considerations, this is wasted effort if we fail to update or
3384 * have to put the new tuple on a different page. But we must compute the
3385 * list before obtaining buffer lock --- in the worst case, if we are
3386 * doing an update on one of the relevant system catalogs, we could
3387 * deadlock if we try to fetch the list later. In any case, the relcache
3388 * caches the data so this is usually pretty cheap.
3389 *
3390 * We also need columns used by the replica identity and columns that are
3391 * considered the "key" of rows in the table.
3392 *
3393 * Note that we get copies of each bitmap, so we need not worry about
3394 * relcache flush happening midway through.
3395 */
3408
3410 INJECTION_POINT("heap_update-before-pin", NULL);
3411 buffer = ReadBuffer(relation, block);
3412 page = BufferGetPage(buffer);
3413
3414 /*
3415 * Before locking the buffer, pin the visibility map page if it appears to
3416 * be necessary. Since we haven't got the lock yet, someone else might be
3417 * in the middle of changing this, so we'll need to recheck after we have
3418 * the lock.
3419 */
3420 if (PageIsAllVisible(page))
3421 visibilitymap_pin(relation, block, &vmbuffer);
3422
3424
3426
3427 /*
3428 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3429 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3430 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3431 * of which indicates concurrent pruning.
3432 *
3433 * Failing with TM_Updated would be most accurate. However, unlike other
3434 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3435 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3436 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3437 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3438 * TM_Updated and TM_Deleted affects only the wording of error messages.
3439 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3440 * the specification of when tmfd->ctid is valid. Second, it creates
3441 * error log evidence that we took this branch.
3442 *
3443 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3444 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3445 * unrelated row, we'll fail with "duplicate key value violates unique".
3446 * XXX if otid is the live, newer version of the newtup row, we'll discard
3447 * changes originating in versions of this catalog row after the version
3448 * the caller got from syscache. See syscache-update-pruned.spec.
3449 */
3450 if (!ItemIdIsNormal(lp))
3451 {
3453
3454 UnlockReleaseBuffer(buffer);
3456 if (vmbuffer != InvalidBuffer)
3457 ReleaseBuffer(vmbuffer);
3458 tmfd->ctid = *otid;
3459 tmfd->xmax = InvalidTransactionId;
3460 tmfd->cmax = InvalidCommandId;
3462
3467 /* modified_attrs not yet initialized */
3469 return TM_Deleted;
3470 }
3471
3472 /*
3473 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3474 * properly.
3475 */
3476 oldtup.t_tableOid = RelationGetRelid(relation);
3477 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3478 oldtup.t_len = ItemIdGetLength(lp);
3479 oldtup.t_self = *otid;
3480
3481 /* the new tuple is ready, except for this: */
3482 newtup->t_tableOid = RelationGetRelid(relation);
3483
3484 /*
3485 * Determine columns modified by the update. Additionally, identify
3486 * whether any of the unmodified replica identity key attributes in the
3487 * old tuple is externally stored or not. This is required because for
3488 * such attributes the flattened value won't be WAL logged as part of the
3489 * new tuple so we must include it as part of the old_key_tuple. See
3490 * ExtractReplicaIdentity.
3491 */
3493 id_attrs, &oldtup,
3495
3496 /*
3497 * If we're not updating any "key" column, we can grab a weaker lock type.
3498 * This allows for more concurrency when we are running simultaneously
3499 * with foreign key checks.
3500 *
3501 * Note that if a column gets detoasted while executing the update, but
3502 * the value ends up being the same, this test will fail and we will use
3503 * the stronger lock. This is acceptable; the important case to optimize
3504 * is updates that don't manipulate key columns, not those that
3505 * serendipitously arrive at the same key values.
3506 */
3508 {
3509 *lockmode = LockTupleNoKeyExclusive;
3511 key_intact = true;
3512
3513 /*
3514 * If this is the first possibly-multixact-able operation in the
3515 * current transaction, set my per-backend OldestMemberMXactId
3516 * setting. We can be certain that the transaction will never become a
3517 * member of any older MultiXactIds than that. (We have to do this
3518 * even if we end up just using our own TransactionId below, since
3519 * some other backend could incorporate our XID into a MultiXact
3520 * immediately afterwards.)
3521 */
3523 }
3524 else
3525 {
3526 *lockmode = LockTupleExclusive;
3528 key_intact = false;
3529 }
3530
3531 /*
3532 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3533 * otid may very well point at newtup->t_self, which we will overwrite
3534 * with the new tuple's location, so there's great risk of confusion if we
3535 * use otid anymore.
3536 */
3537
3538l2:
3539 checked_lockers = false;
3540 locker_remains = false;
3541 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3542
3543 /* see below about the "no wait" case */
3544 Assert(result != TM_BeingModified || wait);
3545
3546 if (result == TM_Invisible)
3547 {
3548 UnlockReleaseBuffer(buffer);
3549 ereport(ERROR,
3551 errmsg("attempted to update invisible tuple")));
3552 }
3553 else if (result == TM_BeingModified && wait)
3554 {
3557 bool can_continue = false;
3558
3559 /*
3560 * XXX note that we don't consider the "no wait" case here. This
3561 * isn't a problem currently because no caller uses that case, but it
3562 * should be fixed if such a caller is introduced. It wasn't a
3563 * problem previously because this code would always wait, but now
3564 * that some tuple locks do not conflict with one of the lock modes we
3565 * use, it is possible that this case is interesting to handle
3566 * specially.
3567 *
3568 * This may cause failures with third-party code that calls
3569 * heap_update directly.
3570 */
3571
3572 /* must copy state data before unlocking buffer */
3574 infomask = oldtup.t_data->t_infomask;
3575
3576 /*
3577 * Now we have to do something about the existing locker. If it's a
3578 * multi, sleep on it; we might be awakened before it is completely
3579 * gone (or even not sleep at all in some cases); we need to preserve
3580 * it as locker, unless it is gone completely.
3581 *
3582 * If it's not a multi, we need to check for sleeping conditions
3583 * before actually going to sleep. If the update doesn't conflict
3584 * with the locks, we just continue without sleeping (but making sure
3585 * it is preserved).
3586 *
3587 * Before sleeping, we need to acquire tuple lock to establish our
3588 * priority for the tuple (see heap_lock_tuple). LockTuple will
3589 * release us when we are next-in-line for the tuple. Note we must
3590 * not acquire the tuple lock until we're sure we're going to sleep;
3591 * otherwise we're open for race conditions with other transactions
3592 * holding the tuple lock which sleep on us.
3593 *
3594 * If we are forced to "start over" below, we keep the tuple lock;
3595 * this arranges that we stay at the head of the line while rechecking
3596 * tuple state.
3597 */
3599 {
3601 int remain;
3602 bool current_is_member = false;
3603
3605 *lockmode, &current_is_member))
3606 {
3608
3609 /*
3610 * Acquire the lock, if necessary (but skip it when we're
3611 * requesting a lock and already have one; avoids deadlock).
3612 */
3613 if (!current_is_member)
3614 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3616
3617 /* wait for multixact */
3619 relation, &oldtup.t_self, XLTW_Update,
3620 &remain);
3621 checked_lockers = true;
3622 locker_remains = remain != 0;
3624
3625 /*
3626 * If xwait had just locked the tuple then some other xact
3627 * could update this tuple before we get to this point. Check
3628 * for xmax change, and start over if so.
3629 */
3630 if (xmax_infomask_changed(oldtup.t_data->t_infomask,
3631 infomask) ||
3633 xwait))
3634 goto l2;
3635 }
3636
3637 /*
3638 * Note that the multixact may not be done by now. It could have
3639 * surviving members; our own xact or other subxacts of this
3640 * backend, and also any other concurrent transaction that locked
3641 * the tuple with LockTupleKeyShare if we only got
3642 * LockTupleNoKeyExclusive. If this is the case, we have to be
3643 * careful to mark the updated tuple with the surviving members in
3644 * Xmax.
3645 *
3646 * Note that there could have been another update in the
3647 * MultiXact. In that case, we need to check whether it committed
3648 * or aborted. If it aborted we are safe to update it again;
3649 * otherwise there is an update conflict, and we have to return
3650 * TableTuple{Deleted, Updated} below.
3651 *
3652 * In the LockTupleExclusive case, we still need to preserve the
3653 * surviving members: those would include the tuple locks we had
3654 * before this one, which are important to keep in case this
3655 * subxact aborts.
3656 */
3657 if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask))
3659 else
3661
3662 /*
3663 * There was no UPDATE in the MultiXact; or it aborted. No
3664 * TransactionIdIsInProgress() call needed here, since we called
3665 * MultiXactIdWait() above.
3666 */
3669 can_continue = true;
3670 }
3672 {
3673 /*
3674 * The only locker is ourselves; we can avoid grabbing the tuple
3675 * lock here, but must preserve our locking information.
3676 */
3677 checked_lockers = true;
3678 locker_remains = true;
3679 can_continue = true;
3680 }
3682 {
3683 /*
3684 * If it's just a key-share locker, and we're not changing the key
3685 * columns, we don't need to wait for it to end; but we need to
3686 * preserve it as locker.
3687 */
3688 checked_lockers = true;
3689 locker_remains = true;
3690 can_continue = true;
3691 }
3692 else
3693 {
3694 /*
3695 * Wait for regular transaction to end; but first, acquire tuple
3696 * lock.
3697 */
3699 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3701 XactLockTableWait(xwait, relation, &oldtup.t_self,
3702 XLTW_Update);
3703 checked_lockers = true;
3705
3706 /*
3707 * xwait is done, but if xwait had just locked the tuple then some
3708 * other xact could update this tuple before we get to this point.
3709 * Check for xmax change, and start over if so.
3710 */
3711 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3714 goto l2;
3715
3716 /* Otherwise check if it committed or aborted */
3717 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3718 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3719 can_continue = true;
3720 }
3721
3722 if (can_continue)
3723 result = TM_Ok;
3724 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3725 result = TM_Updated;
3726 else
3727 result = TM_Deleted;
3728 }
3729
3730 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3731 if (result != TM_Ok)
3732 {
3733 Assert(result == TM_SelfModified ||
3734 result == TM_Updated ||
3735 result == TM_Deleted ||
3736 result == TM_BeingModified);
3737 Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3738 Assert(result != TM_Updated ||
3739 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3740 }
3741
3742 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3743 {
3744 /* Perform additional check for transaction-snapshot mode RI updates */
3746 result = TM_Updated;
3747 }
3748
3749 if (result != TM_Ok)
3750 {
3751 tmfd->ctid = oldtup.t_data->t_ctid;
3752 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3753 if (result == TM_SelfModified)
3754 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3755 else
3756 tmfd->cmax = InvalidCommandId;
3757 UnlockReleaseBuffer(buffer);
3758 if (have_tuple_lock)
3759 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3760 if (vmbuffer != InvalidBuffer)
3761 ReleaseBuffer(vmbuffer);
3763
3770 return result;
3771 }
3772
3773 /*
3774 * If we didn't pin the visibility map page and the page has become all
3775 * visible while we were busy locking the buffer, or during some
3776 * subsequent window during which we had it unlocked, we'll have to unlock
3777 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3778 * bit unfortunate, especially since we'll now have to recheck whether the
3779 * tuple has been locked or updated under us, but hopefully it won't
3780 * happen very often.
3781 */
3782 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3783 {
3785 visibilitymap_pin(relation, block, &vmbuffer);
3787 goto l2;
3788 }
3789
3790 /* Fill in transaction status data */
3791
3792 /*
3793 * If the tuple we're updating is locked, we need to preserve the locking
3794 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3795 */
3797 oldtup.t_data->t_infomask,
3798 oldtup.t_data->t_infomask2,
3799 xid, *lockmode, true,
3802
3803 /*
3804 * And also prepare an Xmax value for the new copy of the tuple. If there
3805 * was no xmax previously, or there was one but all lockers are now gone,
3806 * then use InvalidTransactionId; otherwise, get the xmax from the old
3807 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3808 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3809 */
3810 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3811 HEAP_LOCKED_UPGRADED(oldtup.t_data->t_infomask) ||
3814 else
3816
3818 {
3821 }
3822 else
3823 {
3824 /*
3825 * If we found a valid Xmax for the new tuple, then the infomask bits
3826 * to use on the new tuple depend on what was there on the old one.
3827 * Note that since we're doing an update, the only possibility is that
3828 * the lockers had FOR KEY SHARE lock.
3829 */
3830 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3831 {
3834 }
3835 else
3836 {
3839 }
3840 }
3841
3842 /*
3843 * Prepare the new tuple with the appropriate initial values of Xmin and
3844 * Xmax, as well as initial infomask bits as computed above.
3845 */
3846 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3847 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3848 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3850 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3851 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3853
3854 /*
3855 * Replace cid with a combo CID if necessary. Note that we already put
3856 * the plain cid into the new tuple.
3857 */
3859
3860 /*
3861 * If the toaster needs to be activated, OR if the new tuple will not fit
3862 * on the same page as the old, then we need to release the content lock
3863 * (but not the pin!) on the old tuple's buffer while we are off doing
3864 * TOAST and/or table-file-extension work. We must mark the old tuple to
3865 * show that it's locked, else other processes may try to update it
3866 * themselves.
3867 *
3868 * We need to invoke the toaster if there are already any out-of-line
3869 * toasted values present, or if the new tuple is over-threshold.
3870 */
3871 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3872 relation->rd_rel->relkind != RELKIND_MATVIEW)
3873 {
3874 /* toast table entries should never be recursively toasted */
3877 need_toast = false;
3878 }
3879 else
3882 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3883
3885
3886 newtupsize = MAXALIGN(newtup->t_len);
3887
3889 {
3893 bool cleared_all_frozen = false;
3894
3895 /*
3896 * To prevent concurrent sessions from updating the tuple, we have to
3897 * temporarily mark it locked, while we release the page-level lock.
3898 *
3899 * To satisfy the rule that any xid potentially appearing in a buffer
3900 * written out to disk, we unfortunately have to WAL log this
3901 * temporary modification. We can reuse xl_heap_lock for this
3902 * purpose. If we crash/error before following through with the
3903 * actual update, xmax will be of an aborted transaction, allowing
3904 * other sessions to proceed.
3905 */
3906
3907 /*
3908 * Compute xmax / infomask appropriate for locking the tuple. This has
3909 * to be done separately from the combo that's going to be used for
3910 * updating, because the potentially created multixact would otherwise
3911 * be wrong.
3912 */
3914 oldtup.t_data->t_infomask,
3915 oldtup.t_data->t_infomask2,
3916 xid, *lockmode, false,
3919
3921
3923
3924 /* Clear obsolete visibility flags ... */
3925 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3926 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3928 /* ... and store info about transaction updating this tuple */
3931 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3932 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3934
3935 /* temporarily make it look not-updated, but locked */
3936 oldtup.t_data->t_ctid = oldtup.t_self;
3937
3938 /*
3939 * Clear all-frozen bit on visibility map if needed. We could
3940 * immediately reset ALL_VISIBLE, but given that the WAL logging
3941 * overhead would be unchanged, that doesn't seem necessarily
3942 * worthwhile.
3943 */
3944 if (PageIsAllVisible(page) &&
3945 visibilitymap_clear(relation, block, vmbuffer,
3947 cleared_all_frozen = true;
3948
3949 MarkBufferDirty(buffer);
3950
3951 if (RelationNeedsWAL(relation))
3952 {
3955
3958
3959 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3961 xlrec.infobits_set = compute_infobits(oldtup.t_data->t_infomask,
3962 oldtup.t_data->t_infomask2);
3963 xlrec.flags =
3967 PageSetLSN(page, recptr);
3968 }
3969
3971
3973
3974 /*
3975 * Let the toaster do its thing, if needed.
3976 *
3977 * Note: below this point, heaptup is the data we actually intend to
3978 * store into the relation; newtup is the caller's original untoasted
3979 * data.
3980 */
3981 if (need_toast)
3982 {
3983 /* Note we always use WAL and FSM during updates */
3985 newtupsize = MAXALIGN(heaptup->t_len);
3986 }
3987 else
3988 heaptup = newtup;
3989
3990 /*
3991 * Now, do we need a new page for the tuple, or not? This is a bit
3992 * tricky since someone else could have added tuples to the page while
3993 * we weren't looking. We have to recheck the available space after
3994 * reacquiring the buffer lock. But don't bother to do that if the
3995 * former amount of free space is still not enough; it's unlikely
3996 * there's more free now than before.
3997 *
3998 * What's more, if we need to get a new page, we will need to acquire
3999 * buffer locks on both old and new pages. To avoid deadlock against
4000 * some other backend trying to get the same two locks in the other
4001 * order, we must be consistent about the order we get the locks in.
4002 * We use the rule "lock the lower-numbered page of the relation
4003 * first". To implement this, we must do RelationGetBufferForTuple
4004 * while not holding the lock on the old page, and we must rely on it
4005 * to get the locks on both pages in the correct order.
4006 *
4007 * Another consideration is that we need visibility map page pin(s) if
4008 * we will have to clear the all-visible flag on either page. If we
4009 * call RelationGetBufferForTuple, we rely on it to acquire any such
4010 * pins; but if we don't, we have to handle that here. Hence we need
4011 * a loop.
4012 */
4013 for (;;)
4014 {
4015 if (newtupsize > pagefree)
4016 {
4017 /* It doesn't fit, must use RelationGetBufferForTuple. */
4018 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
4019 buffer, 0, NULL,
4020 &vmbuffer_new, &vmbuffer,
4021 0);
4022 /* We're all done. */
4023 break;
4024 }
4025 /* Acquire VM page pin if needed and we don't have it. */
4026 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4027 visibilitymap_pin(relation, block, &vmbuffer);
4028 /* Re-acquire the lock on the old tuple's page. */
4030 /* Re-check using the up-to-date free space */
4032 if (newtupsize > pagefree ||
4033 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
4034 {
4035 /*
4036 * Rats, it doesn't fit anymore, or somebody just now set the
4037 * all-visible flag. We must now unlock and loop to avoid
4038 * deadlock. Fortunately, this path should seldom be taken.
4039 */
4041 }
4042 else
4043 {
4044 /* We're all done. */
4045 newbuf = buffer;
4046 break;
4047 }
4048 }
4049 }
4050 else
4051 {
4052 /* No TOAST work needed, and it'll fit on same page */
4053 newbuf = buffer;
4054 heaptup = newtup;
4055 }
4056
4057 /*
4058 * We're about to do the actual update -- check for conflict first, to
4059 * avoid possibly having to roll back work we've just done.
4060 *
4061 * This is safe without a recheck as long as there is no possibility of
4062 * another process scanning the pages between this check and the update
4063 * being visible to the scan (i.e., exclusive buffer content lock(s) are
4064 * continuously held from this point until the tuple update is visible).
4065 *
4066 * For the new tuple the only check needed is at the relation level, but
4067 * since both tuples are in the same relation and the check for oldtup
4068 * will include checking the relation level, there is no benefit to a
4069 * separate check for the new tuple.
4070 */
4071 CheckForSerializableConflictIn(relation, &oldtup.t_self,
4072 BufferGetBlockNumber(buffer));
4073
4074 /*
4075 * At this point newbuf and buffer are both pinned and locked, and newbuf
4076 * has enough space for the new tuple. If they are the same buffer, only
4077 * one pin is held.
4078 */
4079
4080 if (newbuf == buffer)
4081 {
4082 /*
4083 * Since the new tuple is going into the same page, we might be able
4084 * to do a HOT update. Check if any of the index columns have been
4085 * changed.
4086 */
4088 {
4089 use_hot_update = true;
4090
4091 /*
4092 * If none of the columns that are used in hot-blocking indexes
4093 * were updated, we can apply HOT, but we do still need to check
4094 * if we need to update the summarizing indexes, and update those
4095 * indexes if the columns were updated, or we may fail to detect
4096 * e.g. value bound changes in BRIN minmax indexes.
4097 */
4099 summarized_update = true;
4100 }
4101 }
4102 else
4103 {
4104 /* Set a hint that the old page could use prune/defrag */
4105 PageSetFull(page);
4106 }
4107
4108 /*
4109 * Compute replica identity tuple before entering the critical section so
4110 * we don't PANIC upon a memory allocation failure.
4111 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4112 * logged. Pass old key required as true only if the replica identity key
4113 * columns are modified or it has external data.
4114 */
4119
4120 /* NO EREPORT(ERROR) from here till changes are logged */
4122
4123 /*
4124 * If this transaction commits, the old tuple will become DEAD sooner or
4125 * later. Set flag that this page is a candidate for pruning once our xid
4126 * falls below the OldestXmin horizon. If the transaction finally aborts,
4127 * the subsequent page pruning will be a no-op and the hint will be
4128 * cleared.
4129 *
4130 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4131 * there would be a prunable tuple in the newbuf; but for now we choose
4132 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4133 * sync if this decision changes.
4134 */
4135 PageSetPrunable(page, xid);
4136
4137 if (use_hot_update)
4138 {
4139 /* Mark the old tuple as HOT-updated */
4141 /* And mark the new tuple as heap-only */
4143 /* Mark the caller's copy too, in case different from heaptup */
4145 }
4146 else
4147 {
4148 /* Make sure tuples are correctly marked as not-HOT */
4152 }
4153
4154 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4155
4156
4157 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4158 oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
4159 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4160 /* ... and store info about transaction updating this tuple */
4163 oldtup.t_data->t_infomask |= infomask_old_tuple;
4164 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4166
4167 /* record address of new tuple in t_ctid of old one */
4168 oldtup.t_data->t_ctid = heaptup->t_self;
4169
4170 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4171 if (PageIsAllVisible(BufferGetPage(buffer)))
4172 {
4173 all_visible_cleared = true;
4175 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4176 vmbuffer, VISIBILITYMAP_VALID_BITS);
4177 }
4178 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4179 {
4184 }
4185
4186 if (newbuf != buffer)
4188 MarkBufferDirty(buffer);
4189
4190 /* XLOG stuff */
4191 if (RelationNeedsWAL(relation))
4192 {
4194
4195 /*
4196 * For logical decoding we need combo CIDs to properly decode the
4197 * catalog.
4198 */
4200 {
4201 log_heap_new_cid(relation, &oldtup);
4202 log_heap_new_cid(relation, heaptup);
4203 }
4204
4205 recptr = log_heap_update(relation, buffer,
4210 if (newbuf != buffer)
4211 {
4213 }
4215 }
4216
4218
4219 if (newbuf != buffer)
4222
4223 /*
4224 * Mark old tuple for invalidation from system caches at next command
4225 * boundary, and mark the new tuple for invalidation in case we abort. We
4226 * have to do this before releasing the buffer because oldtup is in the
4227 * buffer. (heaptup is all in local memory, but it's necessary to process
4228 * both tuple versions in one call to inval.c so we can avoid redundant
4229 * sinval messages.)
4230 */
4232
4233 /* Now we can release the buffer(s) */
4234 if (newbuf != buffer)
4236 ReleaseBuffer(buffer);
4239 if (BufferIsValid(vmbuffer))
4240 ReleaseBuffer(vmbuffer);
4241
4242 /*
4243 * Release the lmgr tuple lock, if we had it.
4244 */
4245 if (have_tuple_lock)
4246 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4247
4248 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4249
4250 /*
4251 * If heaptup is a private copy, release it. Don't forget to copy t_self
4252 * back to the caller's image, too.
4253 */
4254 if (heaptup != newtup)
4255 {
4256 newtup->t_self = heaptup->t_self;
4258 }
4259
4260 /*
4261 * If it is a HOT update, the update may still need to update summarized
4262 * indexes, lest we fail to update those summaries and get incorrect
4263 * results (for example, minmax bounds of the block may change with this
4264 * update).
4265 */
4266 if (use_hot_update)
4267 {
4270 else
4272 }
4273 else
4275
4278
4285
4286 return TM_Ok;
4287}
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:901
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:575
static void PageSetFull(Page page)
Definition bufpage.h:417
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition heapam.c:4467
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition heapam.c:8920
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition heapam.c:7661
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
#define HEAP2_XACT_MASK
#define HEAP_XMAX_LOCK_ONLY
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
#define HEAP_XACT_MASK
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
#define HEAP_UPDATED
#define HEAP_XMAX_KEYSHR_LOCK
#define INJECTION_POINT(name, arg)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition relcache.c:5298
@ INDEX_ATTR_BITMAP_KEY
Definition relcache.h:69
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition relcache.h:72
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition relcache.h:73
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition relcache.h:71
bool RelationSupportsSysCache(Oid relid)
Definition syscache.c:762
@ TU_Summarizing
Definition tableam.h:119
@ TU_All
Definition tableam.h:116
@ TU_None
Definition tableam.h:113
bool TransactionIdDidAbort(TransactionId transactionId)
Definition transam.c:188

References Assert, AssertHasSnapshotForToast(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), fb(), GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
const VacuumParams  params,
BufferAccessStrategy  bstrategy 
)
extern

Definition at line 626 of file vacuumlazy.c.

628{
630 bool verbose,
631 instrument,
632 skipwithvm,
640 TimestampTz starttime = 0;
642 startwritetime = 0;
645 ErrorContextCallback errcallback;
646 char **indnames = NULL;
648
649 verbose = (params.options & VACOPT_VERBOSE) != 0;
650 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
651 params.log_vacuum_min_duration >= 0));
652 if (instrument)
653 {
655 if (track_io_timing)
656 {
659 }
660 }
661
662 /* Used for instrumentation and stats report */
663 starttime = GetCurrentTimestamp();
664
666 RelationGetRelid(rel));
669 params.is_wraparound
672 else
675
676 /*
677 * Setup error traceback support for ereport() first. The idea is to set
678 * up an error context callback to display additional information on any
679 * error during a vacuum. During different phases of vacuum, we update
680 * the state so that the error context callback always display current
681 * information.
682 *
683 * Copy the names of heap rel into local memory for error reporting
684 * purposes, too. It isn't always safe to assume that we can get the name
685 * of each rel. It's convenient for code in lazy_scan_heap to always use
686 * these temp copies.
687 */
690 vacrel->relnamespace = get_namespace_name(RelationGetNamespace(rel));
691 vacrel->relname = pstrdup(RelationGetRelationName(rel));
692 vacrel->indname = NULL;
694 vacrel->verbose = verbose;
695 errcallback.callback = vacuum_error_callback;
696 errcallback.arg = vacrel;
697 errcallback.previous = error_context_stack;
698 error_context_stack = &errcallback;
699
700 /* Set up high level stuff about rel and its indexes */
701 vacrel->rel = rel;
703 &vacrel->indrels);
704 vacrel->bstrategy = bstrategy;
705 if (instrument && vacrel->nindexes > 0)
706 {
707 /* Copy index names used by instrumentation (not error reporting) */
708 indnames = palloc_array(char *, vacrel->nindexes);
709 for (int i = 0; i < vacrel->nindexes; i++)
711 }
712
713 /*
714 * The index_cleanup param either disables index vacuuming and cleanup or
715 * forces it to go ahead when we would otherwise apply the index bypass
716 * optimization. The default is 'auto', which leaves the final decision
717 * up to lazy_vacuum().
718 *
719 * The truncate param allows user to avoid attempting relation truncation,
720 * though it can't force truncation to happen.
721 */
724 params.truncate != VACOPTVALUE_AUTO);
725
726 /*
727 * While VacuumFailSafeActive is reset to false before calling this, we
728 * still need to reset it here due to recursive calls.
729 */
730 VacuumFailsafeActive = false;
731 vacrel->consider_bypass_optimization = true;
732 vacrel->do_index_vacuuming = true;
733 vacrel->do_index_cleanup = true;
734 vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
736 {
737 /* Force disable index vacuuming up-front */
738 vacrel->do_index_vacuuming = false;
739 vacrel->do_index_cleanup = false;
740 }
741 else if (params.index_cleanup == VACOPTVALUE_ENABLED)
742 {
743 /* Force index vacuuming. Note that failsafe can still bypass. */
744 vacrel->consider_bypass_optimization = false;
745 }
746 else
747 {
748 /* Default/auto, make all decisions dynamically */
750 }
751
752 /* Initialize page counters explicitly (be tidy) */
753 vacrel->scanned_pages = 0;
754 vacrel->eager_scanned_pages = 0;
755 vacrel->removed_pages = 0;
756 vacrel->new_frozen_tuple_pages = 0;
757 vacrel->lpdead_item_pages = 0;
758 vacrel->missed_dead_pages = 0;
759 vacrel->nonempty_pages = 0;
760 /* dead_items_alloc allocates vacrel->dead_items later on */
761
762 /* Allocate/initialize output statistics state */
763 vacrel->new_rel_tuples = 0;
764 vacrel->new_live_tuples = 0;
765 vacrel->indstats = (IndexBulkDeleteResult **)
766 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
767
768 /* Initialize remaining counters (be tidy) */
769 vacrel->num_index_scans = 0;
770 vacrel->num_dead_items_resets = 0;
771 vacrel->total_dead_items_bytes = 0;
772 vacrel->tuples_deleted = 0;
773 vacrel->tuples_frozen = 0;
774 vacrel->lpdead_items = 0;
775 vacrel->live_tuples = 0;
776 vacrel->recently_dead_tuples = 0;
777 vacrel->missed_dead_tuples = 0;
778
779 vacrel->new_all_visible_pages = 0;
780 vacrel->new_all_visible_all_frozen_pages = 0;
781 vacrel->new_all_frozen_pages = 0;
782
783 /*
784 * Get cutoffs that determine which deleted tuples are considered DEAD,
785 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
786 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
787 * happen in this order to ensure that the OldestXmin cutoff field works
788 * as an upper bound on the XIDs stored in the pages we'll actually scan
789 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
790 *
791 * Next acquire vistest, a related cutoff that's used in pruning. We use
792 * vistest in combination with OldestXmin to ensure that
793 * heap_page_prune_and_freeze() always removes any deleted tuple whose
794 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
795 * whether a tuple should be frozen or removed. (In the future we might
796 * want to teach lazy_scan_prune to recompute vistest from time to time,
797 * to increase the number of dead tuples it can prune away.)
798 */
799 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
801 vacrel->vistest = GlobalVisTestFor(rel);
802
803 /* Initialize state used to track oldest extant XID/MXID */
804 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
805 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
806
807 /*
808 * Initialize state related to tracking all-visible page skipping. This is
809 * very important to determine whether or not it is safe to advance the
810 * relfrozenxid/relminmxid.
811 */
812 vacrel->skippedallvis = false;
813 skipwithvm = true;
815 {
816 /*
817 * Force aggressive mode, and disable skipping blocks using the
818 * visibility map (even those set all-frozen)
819 */
820 vacrel->aggressive = true;
821 skipwithvm = false;
822 }
823
824 vacrel->skipwithvm = skipwithvm;
825
826 /*
827 * Set up eager scan tracking state. This must happen after determining
828 * whether or not the vacuum must be aggressive, because only normal
829 * vacuums use the eager scan algorithm.
830 */
832
833 /* Report the vacuum mode: 'normal' or 'aggressive' */
835 vacrel->aggressive
838
839 if (verbose)
840 {
841 if (vacrel->aggressive)
843 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
844 vacrel->dbname, vacrel->relnamespace,
845 vacrel->relname)));
846 else
848 (errmsg("vacuuming \"%s.%s.%s\"",
849 vacrel->dbname, vacrel->relnamespace,
850 vacrel->relname)));
851 }
852
853 /*
854 * Allocate dead_items memory using dead_items_alloc. This handles
855 * parallel VACUUM initialization as part of allocating shared memory
856 * space used for dead_items. (But do a failsafe precheck first, to
857 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
858 * is already dangerously old.)
859 */
862
863 /*
864 * Call lazy_scan_heap to perform all required heap pruning, index
865 * vacuuming, and heap vacuuming (plus related processing)
866 */
868
869 /*
870 * Save dead items max_bytes and update the memory usage statistics before
871 * cleanup, they are freed in parallel vacuum cases during
872 * dead_items_cleanup().
873 */
874 dead_items_max_bytes = vacrel->dead_items_info->max_bytes;
875 vacrel->total_dead_items_bytes += TidStoreMemoryUsage(vacrel->dead_items);
876
877 /*
878 * Free resources managed by dead_items_alloc. This ends parallel mode in
879 * passing when necessary.
880 */
883
884 /*
885 * Update pg_class entries for each of rel's indexes where appropriate.
886 *
887 * Unlike the later update to rel's pg_class entry, this is not critical.
888 * Maintains relpages/reltuples statistics used by the planner only.
889 */
890 if (vacrel->do_index_cleanup)
892
893 /* Done with rel's indexes */
894 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
895
896 /* Optionally truncate rel */
899
900 /* Pop the error context stack */
901 error_context_stack = errcallback.previous;
902
903 /* Report that we are now doing final cleanup */
906
907 /*
908 * Prepare to update rel's pg_class entry.
909 *
910 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
911 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
912 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
913 */
914 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
915 TransactionIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.FreezeLimit :
916 vacrel->cutoffs.relfrozenxid,
917 vacrel->NewRelfrozenXid));
918 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
919 MultiXactIdPrecedesOrEquals(vacrel->aggressive ? vacrel->cutoffs.MultiXactCutoff :
920 vacrel->cutoffs.relminmxid,
921 vacrel->NewRelminMxid));
922 if (vacrel->skippedallvis)
923 {
924 /*
925 * Must keep original relfrozenxid in a non-aggressive VACUUM that
926 * chose to skip an all-visible page range. The state that tracks new
927 * values will have missed unfrozen XIDs from the pages we skipped.
928 */
929 Assert(!vacrel->aggressive);
930 vacrel->NewRelfrozenXid = InvalidTransactionId;
931 vacrel->NewRelminMxid = InvalidMultiXactId;
932 }
933
934 /*
935 * For safety, clamp relallvisible to be not more than what we're setting
936 * pg_class.relpages to
937 */
938 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
942
943 /*
944 * An all-frozen block _must_ be all-visible. As such, clamp the count of
945 * all-frozen blocks to the count of all-visible blocks. This matches the
946 * clamping of relallvisible above.
947 */
950
951 /*
952 * Now actually update rel's pg_class entry.
953 *
954 * In principle new_live_tuples could be -1 indicating that we (still)
955 * don't know the tuple count. In practice that can't happen, since we
956 * scan every page that isn't skipped using the visibility map.
957 */
958 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
960 vacrel->nindexes > 0,
961 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
963
964 /*
965 * Report results to the cumulative stats system, too.
966 *
967 * Deliberately avoid telling the stats system about LP_DEAD items that
968 * remain in the table due to VACUUM bypassing index and heap vacuuming.
969 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
970 * It seems like a good idea to err on the side of not vacuuming again too
971 * soon in cases where the failsafe prevented significant amounts of heap
972 * vacuuming.
973 */
975 Max(vacrel->new_live_tuples, 0),
976 vacrel->recently_dead_tuples +
977 vacrel->missed_dead_tuples,
978 starttime);
980
981 if (instrument)
982 {
984
985 if (verbose || params.log_vacuum_min_duration == 0 ||
988 {
989 long secs_dur;
990 int usecs_dur;
991 WalUsage walusage;
992 BufferUsage bufferusage;
994 char *msgfmt;
995 int32 diff;
996 double read_rate = 0,
997 write_rate = 0;
1001
1003 memset(&walusage, 0, sizeof(WalUsage));
1005 memset(&bufferusage, 0, sizeof(BufferUsage));
1007
1008 total_blks_hit = bufferusage.shared_blks_hit +
1009 bufferusage.local_blks_hit;
1010 total_blks_read = bufferusage.shared_blks_read +
1011 bufferusage.local_blks_read;
1013 bufferusage.local_blks_dirtied;
1014
1016 if (verbose)
1017 {
1018 /*
1019 * Aggressiveness already reported earlier, in dedicated
1020 * VACUUM VERBOSE ereport
1021 */
1022 Assert(!params.is_wraparound);
1023 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
1024 }
1025 else if (params.is_wraparound)
1026 {
1027 /*
1028 * While it's possible for a VACUUM to be both is_wraparound
1029 * and !aggressive, that's just a corner-case -- is_wraparound
1030 * implies aggressive. Produce distinct output for the corner
1031 * case all the same, just in case.
1032 */
1033 if (vacrel->aggressive)
1034 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1035 else
1036 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1037 }
1038 else
1039 {
1040 if (vacrel->aggressive)
1041 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1042 else
1043 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1044 }
1046 vacrel->dbname,
1047 vacrel->relnamespace,
1048 vacrel->relname,
1049 vacrel->num_index_scans);
1050 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1051 vacrel->removed_pages,
1053 vacrel->scanned_pages,
1054 orig_rel_pages == 0 ? 100.0 :
1055 100.0 * vacrel->scanned_pages /
1057 vacrel->eager_scanned_pages);
1059 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1060 vacrel->tuples_deleted,
1061 (int64) vacrel->new_rel_tuples,
1062 vacrel->recently_dead_tuples);
1063 if (vacrel->missed_dead_tuples > 0)
1065 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1066 vacrel->missed_dead_tuples,
1067 vacrel->missed_dead_pages);
1069 vacrel->cutoffs.OldestXmin);
1071 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1072 vacrel->cutoffs.OldestXmin, diff);
1074 {
1075 diff = (int32) (vacrel->NewRelfrozenXid -
1076 vacrel->cutoffs.relfrozenxid);
1078 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1079 vacrel->NewRelfrozenXid, diff);
1080 }
1081 if (minmulti_updated)
1082 {
1083 diff = (int32) (vacrel->NewRelminMxid -
1084 vacrel->cutoffs.relminmxid);
1086 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1087 vacrel->NewRelminMxid, diff);
1088 }
1089 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1090 vacrel->new_frozen_tuple_pages,
1091 orig_rel_pages == 0 ? 100.0 :
1092 100.0 * vacrel->new_frozen_tuple_pages /
1094 vacrel->tuples_frozen);
1095
1097 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1098 vacrel->new_all_visible_pages,
1099 vacrel->new_all_visible_all_frozen_pages +
1100 vacrel->new_all_frozen_pages,
1101 vacrel->new_all_frozen_pages);
1102 if (vacrel->do_index_vacuuming)
1103 {
1104 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1105 appendStringInfoString(&buf, _("index scan not needed: "));
1106 else
1107 appendStringInfoString(&buf, _("index scan needed: "));
1108
1109 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1110 }
1111 else
1112 {
1114 appendStringInfoString(&buf, _("index scan bypassed: "));
1115 else
1116 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1117
1118 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1119 }
1121 vacrel->lpdead_item_pages,
1122 orig_rel_pages == 0 ? 100.0 :
1123 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1124 vacrel->lpdead_items);
1125 for (int i = 0; i < vacrel->nindexes; i++)
1126 {
1127 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1128
1129 if (!istat)
1130 continue;
1131
1133 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1134 indnames[i],
1135 istat->num_pages,
1136 istat->pages_newly_deleted,
1137 istat->pages_deleted,
1138 istat->pages_free);
1139 }
1141 {
1142 /*
1143 * We bypass the changecount mechanism because this value is
1144 * only updated by the calling process. We also rely on the
1145 * above call to pgstat_progress_end_command() to not clear
1146 * the st_progress_param array.
1147 */
1148 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1150 }
1151 if (track_io_timing)
1152 {
1153 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1154 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1155
1156 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1157 read_ms, write_ms);
1158 }
1159 if (secs_dur > 0 || usecs_dur > 0)
1160 {
1162 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1164 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1165 }
1166 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1169 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1174 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRIu64 " full page image bytes, %" PRId64 " buffers full\n"),
1175 walusage.wal_records,
1176 walusage.wal_fpi,
1177 walusage.wal_bytes,
1178 walusage.wal_fpi_bytes,
1179 walusage.wal_buffers_full);
1180
1181 /*
1182 * Report the dead items memory usage.
1183 *
1184 * The num_dead_items_resets counter increases when we reset the
1185 * collected dead items, so the counter is non-zero if at least
1186 * one dead items are collected, even if index vacuuming is
1187 * disabled.
1188 */
1190 ngettext("memory usage: dead item storage %.2f MB accumulated across %d reset (limit %.2f MB each)\n",
1191 "memory usage: dead item storage %.2f MB accumulated across %d resets (limit %.2f MB each)\n",
1192 vacrel->num_dead_items_resets),
1193 (double) vacrel->total_dead_items_bytes / (1024 * 1024),
1194 vacrel->num_dead_items_resets,
1195 (double) dead_items_max_bytes / (1024 * 1024));
1196 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1197
1198 ereport(verbose ? INFO : LOG,
1199 (errmsg_internal("%s", buf.data)));
1200 pfree(buf.data);
1201 }
1202 }
1203
1204 /* Cleanup index statistics and index names */
1205 for (int i = 0; i < vacrel->nindexes; i++)
1206 {
1207 if (vacrel->indstats[i])
1208 pfree(vacrel->indstats[i]);
1209
1210 if (instrument)
1211 pfree(indnames[i]);
1212 }
1213}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition timestamp.c:1719
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition timestamp.c:1779
TimestampTz GetCurrentTimestamp(void)
Definition timestamp.c:1643
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition bufmgr.c:177
#define RelationGetNumberOfBlocks(reln)
Definition bufmgr.h:307
#define ngettext(s, p, n)
Definition c.h:1219
int32_t int32
Definition c.h:575
int64 TimestampTz
Definition timestamp.h:39
ErrorContextCallback * error_context_stack
Definition elog.c:99
#define _(x)
Definition elog.c:95
#define LOG
Definition elog.h:31
#define INFO
Definition elog.h:34
#define palloc0_object(type)
Definition fe_memutils.h:75
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition instrument.c:289
BufferUsage pgBufferUsage
Definition instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition instrument.c:249
#define NoLock
Definition lockdefs.h:34
#define RowExclusiveLock
Definition lockdefs.h:38
char * get_database_name(Oid dbid)
Definition lsyscache.c:1242
char * get_namespace_name(Oid nspid)
Definition lsyscache.c:3518
char * pstrdup(const char *in)
Definition mcxt.c:1781
void * palloc0(Size size)
Definition mcxt.c:1417
#define AmAutoVacuumWorkerProcess()
Definition miscadmin.h:383
static int verbose
const void * data
const char * pg_rusage_show(const PGRUsage *ru0)
Definition pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition pg_rusage.c:27
int64 PgStat_Counter
Definition pgstat.h:71
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition progress.h:41
#define PROGRESS_VACUUM_MODE
Definition progress.h:32
#define PROGRESS_VACUUM_MODE_NORMAL
Definition progress.h:44
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM
Definition progress.h:50
#define PROGRESS_VACUUM_PHASE
Definition progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition progress.h:31
#define PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND
Definition progress.h:51
#define PROGRESS_VACUUM_STARTED_BY_MANUAL
Definition progress.h:49
#define PROGRESS_VACUUM_STARTED_BY
Definition progress.h:33
#define PROGRESS_VACUUM_MODE_AGGRESSIVE
Definition progress.h:45
#define RelationGetNamespace(relation)
Definition rel.h:555
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition stringinfo.c:230
void initStringInfo(StringInfo str)
Definition stringinfo.c:97
int64 shared_blks_dirtied
Definition instrument.h:28
int64 local_blks_hit
Definition instrument.h:30
int64 shared_blks_read
Definition instrument.h:27
int64 local_blks_read
Definition instrument.h:31
int64 local_blks_dirtied
Definition instrument.h:32
int64 shared_blks_hit
Definition instrument.h:26
struct ErrorContextCallback * previous
Definition elog.h:297
void(* callback)(void *arg)
Definition elog.h:298
BlockNumber pages_deleted
Definition genam.h:88
BlockNumber pages_newly_deleted
Definition genam.h:87
BlockNumber pages_free
Definition genam.h:89
BlockNumber num_pages
Definition genam.h:83
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition vacuum.h:251
VacOptValue truncate
Definition vacuum.h:236
bits32 options
Definition vacuum.h:219
int log_vacuum_min_duration
Definition vacuum.h:227
bool is_wraparound
Definition vacuum.h:226
VacOptValue index_cleanup
Definition vacuum.h:235
int64 wal_buffers_full
Definition instrument.h:57
uint64 wal_bytes
Definition instrument.h:55
uint64 wal_fpi_bytes
Definition instrument.h:56
int64 wal_records
Definition instrument.h:53
size_t TidStoreMemoryUsage(TidStore *ts)
Definition tidstore.c:532
static TransactionId ReadNextTransactionId(void)
Definition transam.h:377
bool track_cost_delay_timing
Definition vacuum.c:82
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition vacuum.c:2366
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition vacuum.c:2409
bool VacuumFailsafeActive
Definition vacuum.c:110
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition vacuum.c:1426
bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs)
Definition vacuum.c:1100
#define VACOPT_VERBOSE
Definition vacuum.h:182
@ VACOPTVALUE_AUTO
Definition vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
static void update_relstats_all_indexes(LVRelState *vacrel)
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
Definition vacuumlazy.c:499
static void vacuum_error_callback(void *arg)
static void lazy_truncate_heap(LVRelState *vacrel)
static bool should_attempt_truncation(LVRelState *vacrel)
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition vacuumlazy.c:224
static void lazy_scan_heap(LVRelState *vacrel)
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, dead_items_alloc(), dead_items_cleanup(), ereport, errmsg(), errmsg_internal(), error_context_stack, fb(), get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_vacuum_min_duration, Max, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, ngettext, NoLock, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc0(), palloc0_object, palloc_array, pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_MODE, PROGRESS_VACUUM_MODE_AGGRESSIVE, PROGRESS_VACUUM_MODE_NORMAL, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, PROGRESS_VACUUM_STARTED_BY, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM, PROGRESS_VACUUM_STARTED_BY_AUTOVACUUM_WRAPAROUND, PROGRESS_VACUUM_STARTED_BY_MANUAL, pstrdup(), ReadNextTransactionId(), RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, RowExclusiveLock, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), PgBackendStatus::st_progress_param, TidStoreMemoryUsage(), TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, verbose, visibilitymap_count(), WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)
extern

Definition at line 9327 of file heapam.c.

9330{
9331 TransactionId xid;
9333
9334 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9335 return;
9336
9337 /*
9338 * Check to see whether the tuple has been written to by a concurrent
9339 * transaction, either to create it not visible to us, or to delete it
9340 * while it is visible to us. The "visible" bool indicates whether the
9341 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9342 * is going on with it.
9343 *
9344 * In the event of a concurrently inserted tuple that also happens to have
9345 * been concurrently updated (by a separate transaction), the xmin of the
9346 * tuple will be used -- not the updater's xid.
9347 */
9349 switch (htsvResult)
9350 {
9351 case HEAPTUPLE_LIVE:
9352 if (visible)
9353 return;
9354 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9355 break;
9358 if (visible)
9359 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9360 else
9361 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9362
9364 {
9365 /* This is like the HEAPTUPLE_DEAD case */
9366 Assert(!visible);
9367 return;
9368 }
9369 break;
9371 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9372 break;
9373 case HEAPTUPLE_DEAD:
9374 Assert(!visible);
9375 return;
9376 default:
9377
9378 /*
9379 * The only way to get to this default clause is if a new value is
9380 * added to the enum type without adding it to this switch
9381 * statement. That's a bug, so elog.
9382 */
9383 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9384
9385 /*
9386 * In spite of having all enum values covered and calling elog on
9387 * this default, some compilers think this is a code path which
9388 * allows xid to be used below without initialization. Silence
9389 * that warning.
9390 */
9392 }
9393
9396
9397 /*
9398 * Find top level xid. Bail out if xid is too early to be a conflict, or
9399 * if it's our own xid.
9400 */
9402 return;
9405 return;
9406
9407 CheckForSerializableConflictOut(relation, xid, snapshot);
9408}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition predicate.c:4021
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition subtrans.c:162
static bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition transam.h:312
TransactionId GetTopTransactionIdIfAny(void)
Definition xact.c:442

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, fb(), GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)
extern

Definition at line 1365 of file heapam_visibility.c.

1366{
1367 TransactionId xmax;
1368
1369 /* if there's no valid Xmax, then there's obviously no update either */
1370 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1371 return true;
1372
1373 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1374 return true;
1375
1376 /* invalid xmax means no update */
1378 return true;
1379
1380 /*
1381 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1382 * necessarily have been updated
1383 */
1384 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1385 return false;
1386
1387 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1388 xmax = HeapTupleGetUpdateXid(tuple);
1389
1390 /* not LOCKED_ONLY, so it has to have an xmax */
1392
1394 return false;
1395 if (TransactionIdIsInProgress(xmax))
1396 return false;
1397 if (TransactionIdDidCommit(xmax))
1398 return false;
1399
1400 /*
1401 * not current, not in progress, not committed -- must have aborted or
1402 * crashed
1403 */
1404 return true;
1405}
bool TransactionIdIsInProgress(TransactionId xid)
Definition procarray.c:1405

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
GlobalVisState vistest 
)
extern

Definition at line 1310 of file heapam_visibility.c.

1311{
1312 HeapTupleHeader tuple = htup->t_data;
1313
1315 Assert(htup->t_tableOid != InvalidOid);
1316
1317 /*
1318 * If the inserting transaction is marked invalid, then it aborted, and
1319 * the tuple is definitely dead. If it's marked neither committed nor
1320 * invalid, then we assume it's still alive (since the presumption is that
1321 * all relevant hint bits were just set moments ago).
1322 */
1323 if (!HeapTupleHeaderXminCommitted(tuple))
1324 return HeapTupleHeaderXminInvalid(tuple);
1325
1326 /*
1327 * If the inserting transaction committed, but any deleting transaction
1328 * aborted, the tuple is still alive.
1329 */
1330 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1331 return false;
1332
1333 /*
1334 * If the XMAX is just a lock, the tuple is still alive.
1335 */
1337 return false;
1338
1339 /*
1340 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1341 * know without checking pg_multixact.
1342 */
1343 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1344 return false;
1345
1346 /* If deleter isn't known to have committed, assume it's still running. */
1347 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1348 return false;
1349
1350 /* Deleter committed, so tuple is dead if the XID is old enough. */
1351 return GlobalVisTestIsRemovableXid(vistest,
1353}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
#define InvalidOid

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesMVCCBatch()

int HeapTupleSatisfiesMVCCBatch ( Snapshot  snapshot,
Buffer  buffer,
int  ntups,
BatchMVCCState batchmvcc,
OffsetNumber vistuples_dense 
)
extern

Definition at line 1617 of file heapam_visibility.c.

1621{
1622 int nvis = 0;
1623
1624 Assert(IsMVCCSnapshot(snapshot));
1625
1626 for (int i = 0; i < ntups; i++)
1627 {
1628 bool valid;
1629 HeapTuple tup = &batchmvcc->tuples[i];
1630
1631 valid = HeapTupleSatisfiesMVCC(tup, snapshot, buffer);
1632 batchmvcc->visible[i] = valid;
1633
1634 if (likely(valid))
1635 {
1636 vistuples_dense[nvis] = tup->t_self.ip_posid;
1637 nvis++;
1638 }
1639 }
1640
1641 return nvis;
1642}
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)

References Assert, fb(), HeapTupleSatisfiesMVCC(), i, IsMVCCSnapshot, and likely.

Referenced by page_collect_tuples().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)
extern

Definition at line 440 of file heapam_visibility.c.

442{
443 HeapTupleHeader tuple = htup->t_data;
444
446 Assert(htup->t_tableOid != InvalidOid);
447
449 {
451 return TM_Invisible;
452
453 else if (!HeapTupleCleanMoved(tuple, buffer))
454 return TM_Invisible;
456 {
457 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
458 return TM_Invisible; /* inserted after scan started */
459
460 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
461 return TM_Ok;
462
464 {
465 TransactionId xmax;
466
467 xmax = HeapTupleHeaderGetRawXmax(tuple);
468
469 /*
470 * Careful here: even though this tuple was created by our own
471 * transaction, it might be locked by other transactions, if
472 * the original version was key-share locked when we updated
473 * it.
474 */
475
476 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
477 {
478 if (MultiXactIdIsRunning(xmax, true))
479 return TM_BeingModified;
480 else
481 return TM_Ok;
482 }
483
484 /*
485 * If the locker is gone, then there is nothing of interest
486 * left in this Xmax; otherwise, report the tuple as
487 * locked/updated.
488 */
489 if (!TransactionIdIsInProgress(xmax))
490 return TM_Ok;
491 return TM_BeingModified;
492 }
493
494 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
495 {
496 TransactionId xmax;
497
498 xmax = HeapTupleGetUpdateXid(tuple);
499
500 /* not LOCKED_ONLY, so it has to have an xmax */
502
503 /* deleting subtransaction must have aborted */
505 {
507 false))
508 return TM_BeingModified;
509 return TM_Ok;
510 }
511 else
512 {
513 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
514 return TM_SelfModified; /* updated after scan started */
515 else
516 return TM_Invisible; /* updated before scan started */
517 }
518 }
519
521 {
522 /* deleting subtransaction must have aborted */
523 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
525 return TM_Ok;
526 }
527
528 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
529 return TM_SelfModified; /* updated after scan started */
530 else
531 return TM_Invisible; /* updated before scan started */
532 }
534 return TM_Invisible;
536 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
538 else
539 {
540 /* it must have aborted or crashed */
541 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
543 return TM_Invisible;
544 }
545 }
546
547 /* by here, the inserting transaction has committed */
548
549 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
550 return TM_Ok;
551
552 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
553 {
555 return TM_Ok;
556 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
557 return TM_Updated; /* updated by other */
558 else
559 return TM_Deleted; /* deleted by other */
560 }
561
562 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
563 {
564 TransactionId xmax;
565
567 return TM_Ok;
568
570 {
572 return TM_BeingModified;
573
575 return TM_Ok;
576 }
577
578 xmax = HeapTupleGetUpdateXid(tuple);
579 if (!TransactionIdIsValid(xmax))
580 {
582 return TM_BeingModified;
583 }
584
585 /* not LOCKED_ONLY, so it has to have an xmax */
587
589 {
590 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
591 return TM_SelfModified; /* updated after scan started */
592 else
593 return TM_Invisible; /* updated before scan started */
594 }
595
597 return TM_BeingModified;
598
599 if (TransactionIdDidCommit(xmax))
600 {
601 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
602 return TM_Updated;
603 else
604 return TM_Deleted;
605 }
606
607 /*
608 * By here, the update in the Xmax is either aborted or crashed, but
609 * what about the other members?
610 */
611
613 {
614 /*
615 * There's no member, even just a locker, alive anymore, so we can
616 * mark the Xmax as invalid.
617 */
618 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
620 return TM_Ok;
621 }
622 else
623 {
624 /* There are lockers running */
625 return TM_BeingModified;
626 }
627 }
628
630 {
632 return TM_BeingModified;
633 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
634 return TM_SelfModified; /* updated after scan started */
635 else
636 return TM_Invisible; /* updated before scan started */
637 }
638
640 return TM_BeingModified;
641
643 {
644 /* it must have aborted or crashed */
645 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
647 return TM_Ok;
648 }
649
650 /* xmax transaction committed */
651
653 {
654 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
656 return TM_Ok;
657 }
658
659 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
661 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
662 return TM_Updated; /* updated by other */
663 else
664 return TM_Deleted; /* deleted by other */
665}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
static bool HeapTupleCleanMoved(HeapTupleHeader tuple, Buffer buffer)
#define HEAP_XMIN_COMMITTED
#define HEAP_XMIN_INVALID
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition multixact.c:501

References Assert, HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)
extern

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)
extern

Definition at line 1076 of file heapam_visibility.c.

1077{
1078 HeapTupleHeader tuple = htup->t_data;
1079
1081 Assert(htup->t_tableOid != InvalidOid);
1083
1085
1086 /*
1087 * Has inserting transaction committed?
1088 *
1089 * If the inserting transaction aborted, then the tuple was never visible
1090 * to any other transaction, so we can delete it immediately.
1091 */
1092 if (!HeapTupleHeaderXminCommitted(tuple))
1093 {
1094 if (HeapTupleHeaderXminInvalid(tuple))
1095 return HEAPTUPLE_DEAD;
1096 else if (!HeapTupleCleanMoved(tuple, buffer))
1097 return HEAPTUPLE_DEAD;
1099 {
1100 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1102 /* only locked? run infomask-only check first, for performance */
1106 /* inserted and then deleted by same xact */
1109 /* deleting subtransaction must have aborted */
1111 }
1113 {
1114 /*
1115 * It'd be possible to discern between INSERT/DELETE in progress
1116 * here by looking at xmax - but that doesn't seem beneficial for
1117 * the majority of callers and even detrimental for some. We'd
1118 * rather have callers look at/wait for xmin than xmax. It's
1119 * always correct to return INSERT_IN_PROGRESS because that's
1120 * what's happening from the view of other backends.
1121 */
1123 }
1125 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1127 else
1128 {
1129 /*
1130 * Not in Progress, Not Committed, so either Aborted or crashed
1131 */
1132 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1134 return HEAPTUPLE_DEAD;
1135 }
1136
1137 /*
1138 * At this point the xmin is known committed, but we might not have
1139 * been able to set the hint bit yet; so we can no longer Assert that
1140 * it's set.
1141 */
1142 }
1143
1144 /*
1145 * Okay, the inserter committed, so it was good at some point. Now what
1146 * about the deleting transaction?
1147 */
1148 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1149 return HEAPTUPLE_LIVE;
1150
1152 {
1153 /*
1154 * "Deleting" xact really only locked it, so the tuple is live in any
1155 * case. However, we should make sure that either XMAX_COMMITTED or
1156 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1157 * examining the tuple for future xacts.
1158 */
1159 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1160 {
1161 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1162 {
1163 /*
1164 * If it's a pre-pg_upgrade tuple, the multixact cannot
1165 * possibly be running; otherwise have to check.
1166 */
1167 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1169 true))
1170 return HEAPTUPLE_LIVE;
1172 }
1173 else
1174 {
1176 return HEAPTUPLE_LIVE;
1177 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1179 }
1180 }
1181
1182 /*
1183 * We don't really care whether xmax did commit, abort or crash. We
1184 * know that xmax did lock the tuple, but it did not and will never
1185 * actually update it.
1186 */
1187
1188 return HEAPTUPLE_LIVE;
1189 }
1190
1191 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1192 {
1194
1195 /* already checked above */
1197
1198 /* not LOCKED_ONLY, so it has to have an xmax */
1200
1201 if (TransactionIdIsInProgress(xmax))
1203 else if (TransactionIdDidCommit(xmax))
1204 {
1205 /*
1206 * The multixact might still be running due to lockers. Need to
1207 * allow for pruning if below the xid horizon regardless --
1208 * otherwise we could end up with a tuple where the updater has to
1209 * be removed due to the horizon, but is not pruned away. It's
1210 * not a problem to prune that tuple, because any remaining
1211 * lockers will also be present in newer tuple versions.
1212 */
1213 *dead_after = xmax;
1215 }
1216 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1217 {
1218 /*
1219 * Not in Progress, Not Committed, so either Aborted or crashed.
1220 * Mark the Xmax as invalid.
1221 */
1223 }
1224
1225 return HEAPTUPLE_LIVE;
1226 }
1227
1228 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1229 {
1233 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1235 else
1236 {
1237 /*
1238 * Not in Progress, Not Committed, so either Aborted or crashed
1239 */
1240 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1242 return HEAPTUPLE_LIVE;
1243 }
1244
1245 /*
1246 * At this point the xmax is known committed, but we might not have
1247 * been able to set the hint bit yet; so we can no longer Assert that
1248 * it's set.
1249 */
1250 }
1251
1252 /*
1253 * Deleter committed, allow caller to check if it was recent enough that
1254 * some open transactions could still see the tuple.
1255 */
1258}

References Assert, fb(), HEAP_LOCKED_UPGRADED(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleCleanMoved(), HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_page_would_be_all_visible(), heap_prune_satisfies_vacuum(), heapam_scan_analyze_next_tuple(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)
extern

Definition at line 1655 of file heapam_visibility.c.

1656{
1657 switch (snapshot->snapshot_type)
1658 {
1659 case SNAPSHOT_MVCC:
1660 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1661 case SNAPSHOT_SELF:
1662 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1663 case SNAPSHOT_ANY:
1664 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1665 case SNAPSHOT_TOAST:
1666 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1667 case SNAPSHOT_DIRTY:
1668 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1670 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1672 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1673 }
1674
1675 return false; /* keep compiler quiet */
1676}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition snapshot.h:70
@ SNAPSHOT_SELF
Definition snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition snapshot.h:114
@ SNAPSHOT_MVCC
Definition snapshot.h:46
@ SNAPSHOT_ANY
Definition snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition snapshot.h:105
@ SNAPSHOT_DIRTY
Definition snapshot.h:98
SnapshotType snapshot_type
Definition snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)
extern

Definition at line 141 of file heapam_visibility.c.

143{
144 /*
145 * The uses from heapam.c rely on being able to perform the hint bit
146 * updates, which can only be guaranteed if we are holding an exclusive
147 * lock on the buffer - which all callers are doing.
148 */
150
151 SetHintBits(tuple, buffer, infomask, xid);
152}
bool BufferIsLockedByMeInMode(Buffer buffer, BufferLockMode mode)
Definition bufmgr.c:2998

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferIsLockedByMeInMode(), fb(), and SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
Buffer  vmbuffer,
uint8  vmflags,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)
extern

Definition at line 2179 of file pruneheap.c.

2188{
2191 uint8 info;
2193
2194 /* The following local variables hold data registered in the WAL record: */
2198 xlhp_prune_items dead_items;
2201 bool do_prune = nredirected > 0 || ndead > 0 || nunused > 0;
2203
2205
2206 xlrec.flags = 0;
2208
2209 /*
2210 * We can avoid an FPI of the heap page if the only modification we are
2211 * making to it is to set PD_ALL_VISIBLE and checksums/wal_log_hints are
2212 * disabled. Note that if we explicitly skip an FPI, we must not stamp the
2213 * heap page with this record's LSN. Recovery skips records <= the stamped
2214 * LSN, so this could lead to skipping an earlier FPI needed to repair a
2215 * torn page.
2216 */
2217 if (!do_prune &&
2218 nfrozen == 0 &&
2221
2222 /*
2223 * Prepare data for the buffer. The arrays are not actually in the
2224 * buffer, but we pretend that they are. When XLogInsert stores a full
2225 * page image, the arrays can be omitted.
2226 */
2229
2230 if (do_set_vm)
2231 XLogRegisterBuffer(1, vmbuffer, 0);
2232
2233 if (nfrozen > 0)
2234 {
2235 int nplans;
2236
2238
2239 /*
2240 * Prepare deduplicated representation for use in the WAL record. This
2241 * destructively sorts frozen tuples array in-place.
2242 */
2243 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2244
2245 freeze_plans.nplans = nplans;
2247 offsetof(xlhp_freeze_plans, plans));
2248 XLogRegisterBufData(0, plans,
2249 sizeof(xlhp_freeze_plan) * nplans);
2250 }
2251 if (nredirected > 0)
2252 {
2254
2255 redirect_items.ntargets = nredirected;
2258 XLogRegisterBufData(0, redirected,
2259 sizeof(OffsetNumber[2]) * nredirected);
2260 }
2261 if (ndead > 0)
2262 {
2263 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2264
2265 dead_items.ntargets = ndead;
2266 XLogRegisterBufData(0, &dead_items,
2268 XLogRegisterBufData(0, dead,
2269 sizeof(OffsetNumber) * ndead);
2270 }
2271 if (nunused > 0)
2272 {
2274
2275 unused_items.ntargets = nunused;
2278 XLogRegisterBufData(0, unused,
2279 sizeof(OffsetNumber) * nunused);
2280 }
2281 if (nfrozen > 0)
2283 sizeof(OffsetNumber) * nfrozen);
2284
2285 /*
2286 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2287 * flag above.
2288 */
2290 {
2291 xlrec.flags |= XLHP_VM_ALL_VISIBLE;
2293 xlrec.flags |= XLHP_VM_ALL_FROZEN;
2294 }
2296 xlrec.flags |= XLHP_IS_CATALOG_REL;
2299 if (cleanup_lock)
2300 xlrec.flags |= XLHP_CLEANUP_LOCK;
2301 else
2302 {
2303 Assert(nredirected == 0 && ndead == 0);
2304 /* also, any items in 'unused' must've been LP_DEAD previously */
2305 }
2309
2310 switch (reason)
2311 {
2312 case PRUNE_ON_ACCESS:
2314 break;
2315 case PRUNE_VACUUM_SCAN:
2317 break;
2320 break;
2321 default:
2322 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2323 break;
2324 }
2325 recptr = XLogInsert(RM_HEAP2_ID, info);
2326
2327 if (do_set_vm)
2328 {
2329 Assert(BufferIsDirty(vmbuffer));
2330 PageSetLSN(BufferGetPage(vmbuffer), recptr);
2331 }
2332
2333 /*
2334 * See comment at the top of the function about regbuf_flags_heap for
2335 * details on when we can advance the page LSN.
2336 */
2337 if (do_prune || nfrozen > 0 || (do_set_vm && XLogHintBitIsNeeded()))
2338 {
2339 Assert(BufferIsDirty(buffer));
2341 }
2342}
#define XLHP_HAS_CONFLICT_HORIZON
#define XLHP_HAS_FREEZE_PLANS
#define XLHP_VM_ALL_VISIBLE
#define SizeOfHeapPrune
#define XLHP_HAS_NOW_UNUSED_ITEMS
#define XLHP_VM_ALL_FROZEN
#define XLHP_HAS_REDIRECTIONS
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
#define XLHP_HAS_DEAD_ITEMS
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition pruneheap.c:2100
#define XLogHintBitIsNeeded()
Definition xlog.h:122
#define REGBUF_NO_IMAGE
Definition xloginsert.h:33

References Assert, BufferGetPage(), BufferIsDirty(), data, elog, ERROR, fb(), heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_NO_IMAGE, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, VISIBILITYMAP_VALID_BITS, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLHP_VM_ALL_FROZEN, XLHP_VM_ALL_VISIBLE, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogHintBitIsNeeded, XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)
extern

Definition at line 2104 of file heapam.c.

2105{
2106 if (bistate->current_buf != InvalidBuffer)
2107 ReleaseBuffer(bistate->current_buf);
2108 bistate->current_buf = InvalidBuffer;
2109
2110 /*
2111 * Despite the name, we also reset bulk relation extension state.
2112 * Otherwise we can end up erroring out due to looking for free space in
2113 * ->next_free of one partition, even though ->next_free was set when
2114 * extending another partition. It could obviously also be bad for
2115 * efficiency to look at existing blocks at offsets from another
2116 * partition, even if we don't error out.
2117 */
2118 bistate->next_free = InvalidBlockNumber;
2119 bistate->last_free = InvalidBlockNumber;
2120}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)
extern

Definition at line 5558 of file reorderbuffer.c.

5562{
5565 ForkNumber forkno;
5566 BlockNumber blockno;
5567 bool updated_mapping = false;
5568
5569 /*
5570 * Return unresolved if tuplecid_data is not valid. That's because when
5571 * streaming in-progress transactions we may run into tuples with the CID
5572 * before actually decoding them. Think e.g. about INSERT followed by
5573 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5574 * INSERT. So in such cases, we assume the CID is from the future
5575 * command.
5576 */
5577 if (tuplecid_data == NULL)
5578 return false;
5579
5580 /* be careful about padding */
5581 memset(&key, 0, sizeof(key));
5582
5583 Assert(!BufferIsLocal(buffer));
5584
5585 /*
5586 * get relfilelocator from the buffer, no convenient way to access it
5587 * other than that.
5588 */
5589 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5590
5591 /* tuples can only be in the main fork */
5592 Assert(forkno == MAIN_FORKNUM);
5593 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5594
5595 ItemPointerCopy(&htup->t_self,
5596 &key.tid);
5597
5598restart:
5601
5602 /*
5603 * failed to find a mapping, check whether the table was rewritten and
5604 * apply mapping if so, but only do that once - there can be no new
5605 * mappings while we are in here since we have to hold a lock on the
5606 * relation.
5607 */
5608 if (ent == NULL && !updated_mapping)
5609 {
5611 /* now check but don't update for a mapping again */
5612 updated_mapping = true;
5613 goto restart;
5614 }
5615 else if (ent == NULL)
5616 return false;
5617
5618 if (cmin)
5619 *cmin = ent->cmin;
5620 if (cmax)
5621 *cmax = ent->cmax;
5622 return true;
5623}
#define BufferIsLocal(buffer)
Definition buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition dynahash.c:952
@ HASH_FIND
Definition hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition snapmgr.c:163

References Assert, BufferGetTag(), BufferIsLocal, fb(), HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
const ItemPointerData tid 
)
extern

Definition at line 3267 of file heapam.c.

3268{
3269 TM_Result result;
3270 TM_FailureData tmfd;
3271
3272 result = heap_delete(relation, tid,
3274 true /* wait for commit */ ,
3275 &tmfd, false /* changingPart */ );
3276 switch (result)
3277 {
3278 case TM_SelfModified:
3279 /* Tuple was already updated in current command? */
3280 elog(ERROR, "tuple already updated by self");
3281 break;
3282
3283 case TM_Ok:
3284 /* done successfully */
3285 break;
3286
3287 case TM_Updated:
3288 elog(ERROR, "tuple concurrently updated");
3289 break;
3290
3291 case TM_Deleted:
3292 elog(ERROR, "tuple concurrently deleted");
3293 break;
3294
3295 default:
3296 elog(ERROR, "unrecognized heap_delete status: %u", result);
3297 break;
3298 }
3299}
TM_Result heap_delete(Relation relation, const ItemPointerData *tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition heapam.c:2844

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)
extern

Definition at line 2786 of file heapam.c.

2787{
2788 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2789}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition heapam.c:2142

References fb(), GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
const ItemPointerData otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)
extern

Definition at line 4557 of file heapam.c.

4559{
4560 TM_Result result;
4561 TM_FailureData tmfd;
4562 LockTupleMode lockmode;
4563
4564 result = heap_update(relation, otid, tup,
4566 true /* wait for commit */ ,
4567 &tmfd, &lockmode, update_indexes);
4568 switch (result)
4569 {
4570 case TM_SelfModified:
4571 /* Tuple was already updated in current command? */
4572 elog(ERROR, "tuple already updated by self");
4573 break;
4574
4575 case TM_Ok:
4576 /* done successfully */
4577 break;
4578
4579 case TM_Updated:
4580 elog(ERROR, "tuple concurrently updated");
4581 break;
4582
4583 case TM_Deleted:
4584 elog(ERROR, "tuple concurrently deleted");
4585 break;
4586
4587 default:
4588 elog(ERROR, "unrecognized heap_update status: %u", result);
4589 break;
4590 }
4591}
TM_Result heap_update(Relation relation, const ItemPointerData *otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition heapam.c:3313

References elog, ERROR, fb(), GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().