PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
heapam.h File Reference
#include "access/heapam_xlog.h"
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  BitmapHeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct BitmapHeapScanDescData BitmapHeapScanDescData
 
typedef struct BitmapHeapScanDescDataBitmapHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
bool heap_inplace_lock (Relation relation, HeapTuple oldtup_ptr, Buffer buffer, void(*release_callback)(void *), void *arg)
 
void heap_inplace_update_and_unlock (Relation relation, HeapTuple oldtup, HeapTuple tuple, Buffer buffer)
 
void heap_inplace_unlock (Relation relation, HeapTuple oldtup, Buffer buffer)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 
static void heap_execute_freeze_tuple (HeapTupleHeader tuple, HeapTupleFreeze *frz)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 136 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 135 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 38 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 39 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 43 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 42 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 286 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 49 of file heapam.h.

Typedef Documentation

◆ BitmapHeapScanDesc

Definition at line 108 of file heapam.h.

◆ BitmapHeapScanDescData

◆ BulkInsertState

Definition at line 45 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 100 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 122 of file heapam.h.

123{
124 HEAPTUPLE_DEAD, /* tuple is dead and deletable */
125 HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
126 HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
127 HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
128 HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
HTSV_Result
Definition: heapam.h:123
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:126
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:127
@ HEAPTUPLE_LIVE
Definition: heapam.h:125
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_DEAD
Definition: heapam.h:124

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 266 of file heapam.h.

267{
268 PRUNE_ON_ACCESS, /* on-access pruning */
269 PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
270 PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
PruneReason
Definition: heapam.h:267
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:270
@ PRUNE_ON_ACCESS
Definition: heapam.h:268
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:269

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 2006 of file heapam.c.

2007{
2008 if (bistate->current_buf != InvalidBuffer)
2009 ReleaseBuffer(bistate->current_buf);
2010 FreeAccessStrategy(bistate->strategy);
2011 pfree(bistate);
2012}
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5303
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:723
void pfree(void *pointer)
Definition: mcxt.c:2147
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1989 of file heapam.c.

1990{
1991 BulkInsertState bistate;
1992
1993 bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1995 bistate->current_buf = InvalidBuffer;
1996 bistate->next_free = InvalidBlockNumber;
1997 bistate->last_free = InvalidBlockNumber;
1998 bistate->already_extended_by = 0;
1999 return bistate;
2000}
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:39
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:45
void * palloc(Size size)
Definition: mcxt.c:1940
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 6139 of file heapam.c.

6140{
6142 ItemId lp;
6143 HeapTupleData tp;
6144 Page page;
6145 BlockNumber block;
6146 Buffer buffer;
6147
6149
6150 block = ItemPointerGetBlockNumber(tid);
6151 buffer = ReadBuffer(relation, block);
6152 page = BufferGetPage(buffer);
6153
6155
6156 /*
6157 * Page can't be all visible, we just inserted into it, and are still
6158 * running.
6159 */
6160 Assert(!PageIsAllVisible(page));
6161
6164
6165 tp.t_tableOid = RelationGetRelid(relation);
6166 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
6167 tp.t_len = ItemIdGetLength(lp);
6168 tp.t_self = *tid;
6169
6170 /*
6171 * Sanity check that the tuple really is a speculatively inserted tuple,
6172 * inserted by us.
6173 */
6174 if (tp.t_data->t_choice.t_heap.t_xmin != xid)
6175 elog(ERROR, "attempted to kill a tuple inserted by another transaction");
6176 if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
6177 elog(ERROR, "attempted to kill a non-speculative tuple");
6179
6180 /*
6181 * No need to check for serializable conflicts here. There is never a
6182 * need for a combo CID, either. No need to extract replica identity, or
6183 * do anything special with infomask bits.
6184 */
6185
6187
6188 /*
6189 * The tuple will become DEAD immediately. Flag that this page is a
6190 * candidate for pruning by setting xmin to TransactionXmin. While not
6191 * immediately prunable, it is the oldest xid we can cheaply determine
6192 * that's safe against wraparound / being older than the table's
6193 * relfrozenxid. To defend against the unlikely case of a new relation
6194 * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
6195 * if so (vacuum can't subsequently move relfrozenxid to beyond
6196 * TransactionXmin, so there's no race here).
6197 */
6199 {
6200 TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
6201 TransactionId prune_xid;
6202
6203 if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
6204 prune_xid = relfrozenxid;
6205 else
6206 prune_xid = TransactionXmin;
6207 PageSetPrunable(page, prune_xid);
6208 }
6209
6210 /* store transaction information of xact deleting the tuple */
6212 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6213
6214 /*
6215 * Set the tuple header xmin to InvalidTransactionId. This makes the
6216 * tuple immediately invisible everyone. (In particular, to any
6217 * transactions waiting on the speculative token, woken up later.)
6218 */
6220
6221 /* Clear the speculative insertion token too */
6222 tp.t_data->t_ctid = tp.t_self;
6223
6224 MarkBufferDirty(buffer);
6225
6226 /*
6227 * XLOG stuff
6228 *
6229 * The WAL records generated here match heap_delete(). The same recovery
6230 * routines are used.
6231 */
6232 if (RelationNeedsWAL(relation))
6233 {
6234 xl_heap_delete xlrec;
6235 XLogRecPtr recptr;
6236
6237 xlrec.flags = XLH_DELETE_IS_SUPER;
6239 tp.t_data->t_infomask2);
6241 xlrec.xmax = xid;
6242
6246
6247 /* No replica identity & replication origin logged */
6248
6249 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6250
6251 PageSetLSN(page, recptr);
6252 }
6253
6255
6257
6258 if (HeapTupleHasExternal(&tp))
6259 {
6260 Assert(!IsToastRelation(relation));
6261 heap_toast_delete(relation, &tp, true);
6262 }
6263
6264 /*
6265 * Never need to mark tuple for invalidation, since catalogs don't support
6266 * speculative insertion
6267 */
6268
6269 /* Now we can release the buffer */
6270 ReleaseBuffer(buffer);
6271
6272 /* count deletion, as we counted the insertion too */
6273 pgstat_count_heap_delete(relation);
6274}
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2945
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5537
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:751
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:196
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:414
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:198
static bool PageIsAllVisible(const PageData *page)
Definition: bufpage.h:429
static Item PageGetItem(const PageData *page, const ItemIdData *itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:244
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
PageData * Page
Definition: bufpage.h:82
#define PageSetPrunable(page, xid)
Definition: bufpage.h:447
uint32 TransactionId
Definition: c.h:623
bool IsToastRelation(Relation relation)
Definition: catalog.c:175
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
Assert(PointerIsAligned(start, uint64))
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2700
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:34
#define SizeOfHeapDelete
Definition: heapam_xlog.h:121
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:105
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
static bool HeapTupleHasExternal(const HeapTupleData *tuple)
Definition: htup_details.h:762
#define HEAP_XMAX_BITS
Definition: htup_details.h:281
static bool HeapTupleHeaderIsHeapOnly(const HeapTupleHeaderData *tup)
Definition: htup_details.h:555
#define HEAP_MOVED
Definition: htup_details.h:213
static bool HeapTupleHeaderIsSpeculative(const HeapTupleHeaderData *tup)
Definition: htup_details.h:461
static void HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:331
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:516
#define RelationNeedsWAL(relation)
Definition: rel.h:639
TransactionId TransactionXmin
Definition: snapmgr.c:158
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@47 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:454
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:35

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsSpeculative(), HeapTupleHeaderSetXmin(), xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1079 of file heapam.c.

1083{
1084 HeapScanDesc scan;
1085
1086 /*
1087 * increment relation ref count while scanning relation
1088 *
1089 * This is just to make really sure the relcache entry won't go away while
1090 * the scan has a pointer to it. Caller should be holding the rel open
1091 * anyway, so this is redundant in all normal scenarios...
1092 */
1094
1095 /*
1096 * allocate and initialize scan descriptor
1097 */
1098 if (flags & SO_TYPE_BITMAPSCAN)
1099 {
1101
1102 /*
1103 * Bitmap Heap scans do not have any fields that a normal Heap Scan
1104 * does not have, so no special initializations required here.
1105 */
1106 scan = (HeapScanDesc) bscan;
1107 }
1108 else
1109 scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1110
1111 scan->rs_base.rs_rd = relation;
1112 scan->rs_base.rs_snapshot = snapshot;
1113 scan->rs_base.rs_nkeys = nkeys;
1114 scan->rs_base.rs_flags = flags;
1115 scan->rs_base.rs_parallel = parallel_scan;
1116 scan->rs_strategy = NULL; /* set in initscan */
1117 scan->rs_cbuf = InvalidBuffer;
1118
1119 /*
1120 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1121 */
1122 if (!(snapshot && IsMVCCSnapshot(snapshot)))
1123 scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE;
1124
1125 /*
1126 * For seqscan and sample scans in a serializable transaction, acquire a
1127 * predicate lock on the entire relation. This is required not only to
1128 * lock all the matching tuples, but also to conflict with new insertions
1129 * into the table. In an indexscan, we take page locks on the index pages
1130 * covering the range specified in the scan qual, but in a heap scan there
1131 * is nothing more fine-grained to lock. A bitmap scan is a different
1132 * story, there we have already scanned the index and locked the index
1133 * pages covering the predicate. But in that case we still have to lock
1134 * any matching heap tuples. For sample scan we could optimize the locking
1135 * to be at least page-level granularity, but we'd need to add per-tuple
1136 * locking for that.
1137 */
1139 {
1140 /*
1141 * Ensure a missing snapshot is noticed reliably, even if the
1142 * isolation mode means predicate locking isn't performed (and
1143 * therefore the snapshot isn't used here).
1144 */
1145 Assert(snapshot);
1146 PredicateLockRelation(relation, snapshot);
1147 }
1148
1149 /* we only need to set this up once */
1150 scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1151
1152 /*
1153 * Allocate memory to keep track of page allocation for parallel workers
1154 * when doing a parallel scan.
1155 */
1156 if (parallel_scan != NULL)
1158 else
1159 scan->rs_parallelworkerdata = NULL;
1160
1161 /*
1162 * we do this here instead of in initscan() because heap_rescan also calls
1163 * initscan() and we don't want to allocate memory again
1164 */
1165 if (nkeys > 0)
1166 scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1167 else
1168 scan->rs_base.rs_key = NULL;
1169
1170 initscan(scan, key, false);
1171
1172 scan->rs_read_stream = NULL;
1173
1174 /*
1175 * Set up a read stream for sequential scans and TID range scans. This
1176 * should be done after initscan() because initscan() allocates the
1177 * BufferAccessStrategy object passed to the read stream API.
1178 */
1179 if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1181 {
1183
1184 if (scan->rs_base.rs_parallel)
1186 else
1188
1189 /* ---
1190 * It is safe to use batchmode as the only locks taken by `cb`
1191 * are never taken while waiting for IO:
1192 * - SyncScanLock is used in the non-parallel case
1193 * - in the parallel case, only spinlocks and atomics are used
1194 * ---
1195 */
1198 scan->rs_strategy,
1199 scan->rs_base.rs_rd,
1201 cb,
1202 scan,
1203 0);
1204 }
1205 else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
1206 {
1209 scan->rs_strategy,
1210 scan->rs_base.rs_rd,
1213 scan,
1214 sizeof(TBMIterateResult));
1215 }
1216
1217
1218 return (TableScanDesc) scan;
1219}
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:226
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:264
static BlockNumber bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data, void *per_buffer_data)
Definition: heapam.c:289
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:329
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:100
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2576
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:716
#define READ_STREAM_USE_BATCHING
Definition: read_stream.h:64
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:77
#define READ_STREAM_DEFAULT
Definition: read_stream.h:21
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2171
@ MAIN_FORKNUM
Definition: relpath.h:58
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:55
BufferAccessStrategy rs_strategy
Definition: heapam.h:71
Buffer rs_cbuf
Definition: heapam.h:68
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:93
HeapTupleData rs_ctup
Definition: heapam.h:73
ReadStream * rs_read_stream
Definition: heapam.h:76
TableScanDescData rs_base
Definition: heapam.h:56
Relation rs_rd
Definition: relscan.h:36
uint32 rs_flags
Definition: relscan.h:64
struct ScanKeyData * rs_key
Definition: relscan.h:39
struct SnapshotData * rs_snapshot
Definition: relscan.h:37
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:66
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:52
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48
@ SO_TYPE_BITMAPSCAN
Definition: tableam.h:49

References Assert(), bitmapheap_stream_read_next(), heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_DEFAULT, READ_STREAM_SEQUENTIAL, READ_STREAM_USE_BATCHING, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TYPE_BITMAPSCAN, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2745 of file heapam.c.

2748{
2749 TM_Result result;
2751 ItemId lp;
2752 HeapTupleData tp;
2753 Page page;
2754 BlockNumber block;
2755 Buffer buffer;
2756 Buffer vmbuffer = InvalidBuffer;
2757 TransactionId new_xmax;
2758 uint16 new_infomask,
2759 new_infomask2;
2760 bool have_tuple_lock = false;
2761 bool iscombo;
2762 bool all_visible_cleared = false;
2763 HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2764 bool old_key_copied = false;
2765
2767
2768 /*
2769 * Forbid this during a parallel operation, lest it allocate a combo CID.
2770 * Other workers might need that combo CID for visibility checks, and we
2771 * have no provision for broadcasting it to them.
2772 */
2773 if (IsInParallelMode())
2774 ereport(ERROR,
2775 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2776 errmsg("cannot delete tuples during a parallel operation")));
2777
2778 block = ItemPointerGetBlockNumber(tid);
2779 buffer = ReadBuffer(relation, block);
2780 page = BufferGetPage(buffer);
2781
2782 /*
2783 * Before locking the buffer, pin the visibility map page if it appears to
2784 * be necessary. Since we haven't got the lock yet, someone else might be
2785 * in the middle of changing this, so we'll need to recheck after we have
2786 * the lock.
2787 */
2788 if (PageIsAllVisible(page))
2789 visibilitymap_pin(relation, block, &vmbuffer);
2790
2792
2795
2796 tp.t_tableOid = RelationGetRelid(relation);
2797 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2798 tp.t_len = ItemIdGetLength(lp);
2799 tp.t_self = *tid;
2800
2801l1:
2802
2803 /*
2804 * If we didn't pin the visibility map page and the page has become all
2805 * visible while we were busy locking the buffer, we'll have to unlock and
2806 * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2807 * unfortunate, but hopefully shouldn't happen often.
2808 */
2809 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2810 {
2812 visibilitymap_pin(relation, block, &vmbuffer);
2814 }
2815
2816 result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2817
2818 if (result == TM_Invisible)
2819 {
2820 UnlockReleaseBuffer(buffer);
2821 ereport(ERROR,
2822 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2823 errmsg("attempted to delete invisible tuple")));
2824 }
2825 else if (result == TM_BeingModified && wait)
2826 {
2827 TransactionId xwait;
2828 uint16 infomask;
2829
2830 /* must copy state data before unlocking buffer */
2832 infomask = tp.t_data->t_infomask;
2833
2834 /*
2835 * Sleep until concurrent transaction ends -- except when there's a
2836 * single locker and it's our own transaction. Note we don't care
2837 * which lock mode the locker has, because we need the strongest one.
2838 *
2839 * Before sleeping, we need to acquire tuple lock to establish our
2840 * priority for the tuple (see heap_lock_tuple). LockTuple will
2841 * release us when we are next-in-line for the tuple.
2842 *
2843 * If we are forced to "start over" below, we keep the tuple lock;
2844 * this arranges that we stay at the head of the line while rechecking
2845 * tuple state.
2846 */
2847 if (infomask & HEAP_XMAX_IS_MULTI)
2848 {
2849 bool current_is_member = false;
2850
2851 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2852 LockTupleExclusive, &current_is_member))
2853 {
2855
2856 /*
2857 * Acquire the lock, if necessary (but skip it when we're
2858 * requesting a lock and already have one; avoids deadlock).
2859 */
2860 if (!current_is_member)
2862 LockWaitBlock, &have_tuple_lock);
2863
2864 /* wait for multixact */
2866 relation, &(tp.t_self), XLTW_Delete,
2867 NULL);
2869
2870 /*
2871 * If xwait had just locked the tuple then some other xact
2872 * could update this tuple before we get to this point. Check
2873 * for xmax change, and start over if so.
2874 *
2875 * We also must start over if we didn't pin the VM page, and
2876 * the page has become all visible.
2877 */
2878 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2879 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2881 xwait))
2882 goto l1;
2883 }
2884
2885 /*
2886 * You might think the multixact is necessarily done here, but not
2887 * so: it could have surviving members, namely our own xact or
2888 * other subxacts of this backend. It is legal for us to delete
2889 * the tuple in either case, however (the latter case is
2890 * essentially a situation of upgrading our former shared lock to
2891 * exclusive). We don't bother changing the on-disk hint bits
2892 * since we are about to overwrite the xmax altogether.
2893 */
2894 }
2895 else if (!TransactionIdIsCurrentTransactionId(xwait))
2896 {
2897 /*
2898 * Wait for regular transaction to end; but first, acquire tuple
2899 * lock.
2900 */
2903 LockWaitBlock, &have_tuple_lock);
2904 XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2906
2907 /*
2908 * xwait is done, but if xwait had just locked the tuple then some
2909 * other xact could update this tuple before we get to this point.
2910 * Check for xmax change, and start over if so.
2911 *
2912 * We also must start over if we didn't pin the VM page, and the
2913 * page has become all visible.
2914 */
2915 if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2916 xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2918 xwait))
2919 goto l1;
2920
2921 /* Otherwise check if it committed or aborted */
2922 UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2923 }
2924
2925 /*
2926 * We may overwrite if previous xmax aborted, or if it committed but
2927 * only locked the tuple without updating it.
2928 */
2929 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2932 result = TM_Ok;
2933 else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2934 result = TM_Updated;
2935 else
2936 result = TM_Deleted;
2937 }
2938
2939 /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2940 if (result != TM_Ok)
2941 {
2942 Assert(result == TM_SelfModified ||
2943 result == TM_Updated ||
2944 result == TM_Deleted ||
2945 result == TM_BeingModified);
2947 Assert(result != TM_Updated ||
2949 }
2950
2951 if (crosscheck != InvalidSnapshot && result == TM_Ok)
2952 {
2953 /* Perform additional check for transaction-snapshot mode RI updates */
2954 if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2955 result = TM_Updated;
2956 }
2957
2958 if (result != TM_Ok)
2959 {
2960 tmfd->ctid = tp.t_data->t_ctid;
2962 if (result == TM_SelfModified)
2964 else
2965 tmfd->cmax = InvalidCommandId;
2966 UnlockReleaseBuffer(buffer);
2967 if (have_tuple_lock)
2969 if (vmbuffer != InvalidBuffer)
2970 ReleaseBuffer(vmbuffer);
2971 return result;
2972 }
2973
2974 /*
2975 * We're about to do the actual delete -- check for conflict first, to
2976 * avoid possibly having to roll back work we've just done.
2977 *
2978 * This is safe without a recheck as long as there is no possibility of
2979 * another process scanning the page between this check and the delete
2980 * being visible to the scan (i.e., an exclusive buffer content lock is
2981 * continuously held from this point until the tuple delete is visible).
2982 */
2984
2985 /* replace cid with a combo CID if necessary */
2986 HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2987
2988 /*
2989 * Compute replica identity tuple before entering the critical section so
2990 * we don't PANIC upon a memory allocation failure.
2991 */
2992 old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2993
2994 /*
2995 * If this is the first possibly-multixact-able operation in the current
2996 * transaction, set my per-backend OldestMemberMXactId setting. We can be
2997 * certain that the transaction will never become a member of any older
2998 * MultiXactIds than that. (We have to do this even if we end up just
2999 * using our own TransactionId below, since some other backend could
3000 * incorporate our XID into a MultiXact immediately afterwards.)
3001 */
3003
3006 xid, LockTupleExclusive, true,
3007 &new_xmax, &new_infomask, &new_infomask2);
3008
3010
3011 /*
3012 * If this transaction commits, the tuple will become DEAD sooner or
3013 * later. Set flag that this page is a candidate for pruning once our xid
3014 * falls below the OldestXmin horizon. If the transaction finally aborts,
3015 * the subsequent page pruning will be a no-op and the hint will be
3016 * cleared.
3017 */
3018 PageSetPrunable(page, xid);
3019
3020 if (PageIsAllVisible(page))
3021 {
3022 all_visible_cleared = true;
3023 PageClearAllVisible(page);
3024 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3025 vmbuffer, VISIBILITYMAP_VALID_BITS);
3026 }
3027
3028 /* store transaction information of xact deleting the tuple */
3030 tp.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3031 tp.t_data->t_infomask |= new_infomask;
3032 tp.t_data->t_infomask2 |= new_infomask2;
3034 HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
3035 HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
3036 /* Make sure there is no forward chain link in t_ctid */
3037 tp.t_data->t_ctid = tp.t_self;
3038
3039 /* Signal that this is actually a move into another partition */
3040 if (changingPart)
3042
3043 MarkBufferDirty(buffer);
3044
3045 /*
3046 * XLOG stuff
3047 *
3048 * NB: heap_abort_speculative() uses the same xlog record and replay
3049 * routines.
3050 */
3051 if (RelationNeedsWAL(relation))
3052 {
3053 xl_heap_delete xlrec;
3054 xl_heap_header xlhdr;
3055 XLogRecPtr recptr;
3056
3057 /*
3058 * For logical decode we need combo CIDs to properly decode the
3059 * catalog
3060 */
3062 log_heap_new_cid(relation, &tp);
3063
3064 xlrec.flags = 0;
3065 if (all_visible_cleared)
3067 if (changingPart)
3070 tp.t_data->t_infomask2);
3072 xlrec.xmax = new_xmax;
3073
3074 if (old_key_tuple != NULL)
3075 {
3076 if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3078 else
3080 }
3081
3084
3086
3087 /*
3088 * Log replica identity of the deleted tuple if there is one
3089 */
3090 if (old_key_tuple != NULL)
3091 {
3092 xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3093 xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3094 xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3095
3097 XLogRegisterData((char *) old_key_tuple->t_data
3099 old_key_tuple->t_len
3101 }
3102
3103 /* filtering by origin on a row level is much more efficient */
3105
3106 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3107
3108 PageSetLSN(page, recptr);
3109 }
3110
3112
3114
3115 if (vmbuffer != InvalidBuffer)
3116 ReleaseBuffer(vmbuffer);
3117
3118 /*
3119 * If the tuple has toasted out-of-line attributes, we need to delete
3120 * those items too. We have to do this before releasing the buffer
3121 * because we need to look at the contents of the tuple, but it's OK to
3122 * release the content lock on the buffer first.
3123 */
3124 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3125 relation->rd_rel->relkind != RELKIND_MATVIEW)
3126 {
3127 /* toast table entries should never be recursively toasted */
3129 }
3130 else if (HeapTupleHasExternal(&tp))
3131 heap_toast_delete(relation, &tp, false);
3132
3133 /*
3134 * Mark tuple for invalidation from system caches at next command
3135 * boundary. We have to do this before releasing the buffer because we
3136 * need to look at the contents of the tuple.
3137 */
3138 CacheInvalidateHeapTuple(relation, &tp, NULL);
3139
3140 /* Now we can release the buffer */
3141 ReleaseBuffer(buffer);
3142
3143 /*
3144 * Release the lmgr tuple lock, if we had it.
3145 */
3146 if (have_tuple_lock)
3148
3149 pgstat_count_heap_delete(relation);
3150
3151 if (old_key_tuple != NULL && old_key_copied)
3152 heap_freetuple(old_key_tuple);
3153
3154 return TM_Ok;
3155}
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:4161
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5320
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define InvalidCommandId
Definition: c.h:640
TransactionId MultiXactId
Definition: c.h:633
uint16_t uint16
Definition: c.h:501
void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7551
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:9016
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5290
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:9097
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5241
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7729
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2722
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:164
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1967
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:104
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:102
#define SizeOfHeapHeader
Definition: heapam_xlog.h:157
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:106
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:103
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
static bool HEAP_XMAX_IS_LOCKED_ONLY(uint16 infomask)
Definition: htup_details.h:226
static void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo)
Definition: htup_details.h:431
static void HeapTupleHeaderClearHotUpdated(HeapTupleHeaderData *tup)
Definition: htup_details.h:549
static TransactionId HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup)
Definition: htup_details.h:377
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
static TransactionId HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:397
static void HeapTupleHeaderSetMovedPartitions(HeapTupleHeaderData *tup)
Definition: htup_details.h:486
static void HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:383
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1561
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:663
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:673
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4336
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:695
#define InvalidSnapshot
Definition: snapshot.h:119
TransactionId xmax
Definition: tableam.h:144
CommandId cmax
Definition: tableam.h:145
ItemPointerData ctid
Definition: tableam.h:143
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:941
bool IsInParallelMode(void)
Definition: xact.c:1089
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetMovedPartitions(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1275 of file heapam.c.

1276{
1277 HeapScanDesc scan = (HeapScanDesc) sscan;
1278
1279 /* Note: no locking manipulations needed */
1280
1281 /*
1282 * unpin scan buffers
1283 */
1284 if (BufferIsValid(scan->rs_cbuf))
1285 ReleaseBuffer(scan->rs_cbuf);
1286
1287 /*
1288 * Must free the read stream before freeing the BufferAccessStrategy.
1289 */
1290 if (scan->rs_read_stream)
1292
1293 /*
1294 * decrement relation reference count and free scan descriptor storage
1295 */
1297
1298 if (scan->rs_base.rs_key)
1299 pfree(scan->rs_base.rs_key);
1300
1301 if (scan->rs_strategy != NULL)
1303
1304 if (scan->rs_parallelworkerdata != NULL)
1306
1307 if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1309
1310 pfree(scan);
1311}
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:365
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:1055
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2184
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:853
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_execute_freeze_tuple()

static void heap_execute_freeze_tuple ( HeapTupleHeader  tuple,
HeapTupleFreeze frz 
)
inlinestatic

Definition at line 441 of file heapam.h.

442{
443 HeapTupleHeaderSetXmax(tuple, frz->xmax);
444
445 if (frz->frzflags & XLH_FREEZE_XVAC)
447
448 if (frz->frzflags & XLH_INVALID_XVAC)
450
451 tuple->t_infomask = frz->t_infomask;
452 tuple->t_infomask2 = frz->t_infomask2;
453}
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:340
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:339
static void HeapTupleHeaderSetXvac(HeapTupleHeaderData *tup, TransactionId xid)
Definition: htup_details.h:451
uint8 frzflags
Definition: heapam.h:145
uint16 t_infomask2
Definition: heapam.h:143
TransactionId xmax
Definition: heapam.h:142
uint16 t_infomask
Definition: heapam.h:144
#define FrozenTransactionId
Definition: transam.h:33

References FrozenTransactionId, HeapTupleFreeze::frzflags, HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXvac(), InvalidTransactionId, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_prepared_tuples(), heap_freeze_tuple(), and heap_xlog_prune_freeze().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1573 of file heapam.c.

1578{
1579 ItemPointer tid = &(tuple->t_self);
1580 ItemId lp;
1581 Buffer buffer;
1582 Page page;
1583 OffsetNumber offnum;
1584 bool valid;
1585
1586 /*
1587 * Fetch and pin the appropriate page of the relation.
1588 */
1589 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1590
1591 /*
1592 * Need share lock on buffer to examine tuple commit status.
1593 */
1595 page = BufferGetPage(buffer);
1596
1597 /*
1598 * We'd better check for out-of-range offnum in case of VACUUM since the
1599 * TID was obtained.
1600 */
1601 offnum = ItemPointerGetOffsetNumber(tid);
1602 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1603 {
1605 ReleaseBuffer(buffer);
1606 *userbuf = InvalidBuffer;
1607 tuple->t_data = NULL;
1608 return false;
1609 }
1610
1611 /*
1612 * get the item line pointer corresponding to the requested tid
1613 */
1614 lp = PageGetItemId(page, offnum);
1615
1616 /*
1617 * Must check for deleted tuple.
1618 */
1619 if (!ItemIdIsNormal(lp))
1620 {
1622 ReleaseBuffer(buffer);
1623 *userbuf = InvalidBuffer;
1624 tuple->t_data = NULL;
1625 return false;
1626 }
1627
1628 /*
1629 * fill in *tuple fields
1630 */
1631 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1632 tuple->t_len = ItemIdGetLength(lp);
1633 tuple->t_tableOid = RelationGetRelid(relation);
1634
1635 /*
1636 * check tuple visibility, then release lock
1637 */
1638 valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1639
1640 if (valid)
1641 PredicateLockTID(relation, &(tuple->t_self), snapshot,
1643
1644 HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1645
1647
1648 if (valid)
1649 {
1650 /*
1651 * All checks passed, so return the tuple as valid. Caller is now
1652 * responsible for releasing the buffer.
1653 */
1654 *userbuf = buffer;
1655
1656 return true;
1657 }
1658
1659 /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1660 if (keep_buf)
1661 *userbuf = buffer;
1662 else
1663 {
1664 ReleaseBuffer(buffer);
1665 *userbuf = InvalidBuffer;
1666 tuple->t_data = NULL;
1667 }
1668
1669 return false;
1670}
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:197
static OffsetNumber PageGetMaxOffsetNumber(const PageData *page)
Definition: bufpage.h:372
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:9201
static TransactionId HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:324
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2621

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 6052 of file heapam.c.

6053{
6054 Buffer buffer;
6055 Page page;
6056 OffsetNumber offnum;
6057 ItemId lp = NULL;
6058 HeapTupleHeader htup;
6059
6060 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
6062 page = (Page) BufferGetPage(buffer);
6063
6064 offnum = ItemPointerGetOffsetNumber(tid);
6065 if (PageGetMaxOffsetNumber(page) >= offnum)
6066 lp = PageGetItemId(page, offnum);
6067
6068 if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6069 elog(ERROR, "invalid lp");
6070
6071 htup = (HeapTupleHeader) PageGetItem(page, lp);
6072
6073 /* NO EREPORT(ERROR) from here till changes are logged */
6075
6077
6078 MarkBufferDirty(buffer);
6079
6080 /*
6081 * Replace the speculative insertion token with a real t_ctid, pointing to
6082 * itself like it does on regular tuples.
6083 */
6084 htup->t_ctid = *tid;
6085
6086 /* XLOG stuff */
6087 if (RelationNeedsWAL(relation))
6088 {
6089 xl_heap_confirm xlrec;
6090 XLogRecPtr recptr;
6091
6093
6095
6096 /* We want the same filtering on this as on a plain insert */
6098
6101
6102 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
6103
6104 PageSetLSN(page, recptr);
6105 }
6106
6108
6109 UnlockReleaseBuffer(buffer);
6110}
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:423
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:38
OffsetNumber offnum
Definition: heapam_xlog.h:420

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative(), ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7336 of file heapam.c.

7337{
7338 Page page = BufferGetPage(buffer);
7339
7340 for (int i = 0; i < ntuples; i++)
7341 {
7342 HeapTupleFreeze *frz = tuples + i;
7343 ItemId itemid = PageGetItemId(page, frz->offset);
7344 HeapTupleHeader htup;
7345
7346 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7347 heap_execute_freeze_tuple(htup, frz);
7348 }
7349}
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.h:441
int i
Definition: isn.c:77
OffsetNumber offset
Definition: heapam.h:150

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 7358 of file heapam.c.

7361{
7362 HeapTupleFreeze frz;
7363 bool do_freeze;
7364 bool totally_frozen;
7365 struct VacuumCutoffs cutoffs;
7366 HeapPageFreeze pagefrz;
7367
7368 cutoffs.relfrozenxid = relfrozenxid;
7369 cutoffs.relminmxid = relminmxid;
7370 cutoffs.OldestXmin = FreezeLimit;
7371 cutoffs.OldestMxact = MultiXactCutoff;
7372 cutoffs.FreezeLimit = FreezeLimit;
7373 cutoffs.MultiXactCutoff = MultiXactCutoff;
7374
7375 pagefrz.freeze_required = true;
7376 pagefrz.FreezePageRelfrozenXid = FreezeLimit;
7377 pagefrz.FreezePageRelminMxid = MultiXactCutoff;
7378 pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
7379 pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
7380
7381 do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
7382 &pagefrz, &frz, &totally_frozen);
7383
7384 /*
7385 * Note that because this is not a WAL-logged operation, we don't need to
7386 * fill in the offset in the freeze record.
7387 */
7388
7389 if (do_freeze)
7390 heap_execute_freeze_tuple(tuple, &frz);
7391 return do_freeze;
7392}
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:7010
TransactionId FreezeLimit
Definition: vacuum.h:284
TransactionId relfrozenxid
Definition: vacuum.h:258
MultiXactId relminmxid
Definition: vacuum.h:259
MultiXactId MultiXactCutoff
Definition: vacuum.h:285

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1845 of file heapam.c.

1847{
1848 Relation relation = sscan->rs_rd;
1849 Snapshot snapshot = sscan->rs_snapshot;
1850 ItemPointerData ctid;
1851 TransactionId priorXmax;
1852
1853 /*
1854 * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1855 * Assume that t_ctid links are valid however - there shouldn't be invalid
1856 * ones in the table.
1857 */
1859
1860 /*
1861 * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1862 * need to examine, and *tid is the TID we will return if ctid turns out
1863 * to be bogus.
1864 *
1865 * Note that we will loop until we reach the end of the t_ctid chain.
1866 * Depending on the snapshot passed, there might be at most one visible
1867 * version of the row, but we don't try to optimize for that.
1868 */
1869 ctid = *tid;
1870 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1871 for (;;)
1872 {
1873 Buffer buffer;
1874 Page page;
1875 OffsetNumber offnum;
1876 ItemId lp;
1877 HeapTupleData tp;
1878 bool valid;
1879
1880 /*
1881 * Read, pin, and lock the page.
1882 */
1883 buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1885 page = BufferGetPage(buffer);
1886
1887 /*
1888 * Check for bogus item number. This is not treated as an error
1889 * condition because it can happen while following a t_ctid link. We
1890 * just assume that the prior tid is OK and return it unchanged.
1891 */
1892 offnum = ItemPointerGetOffsetNumber(&ctid);
1893 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1894 {
1895 UnlockReleaseBuffer(buffer);
1896 break;
1897 }
1898 lp = PageGetItemId(page, offnum);
1899 if (!ItemIdIsNormal(lp))
1900 {
1901 UnlockReleaseBuffer(buffer);
1902 break;
1903 }
1904
1905 /* OK to access the tuple */
1906 tp.t_self = ctid;
1907 tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1908 tp.t_len = ItemIdGetLength(lp);
1909 tp.t_tableOid = RelationGetRelid(relation);
1910
1911 /*
1912 * After following a t_ctid link, we might arrive at an unrelated
1913 * tuple. Check for XMIN match.
1914 */
1915 if (TransactionIdIsValid(priorXmax) &&
1917 {
1918 UnlockReleaseBuffer(buffer);
1919 break;
1920 }
1921
1922 /*
1923 * Check tuple visibility; if visible, set it as the new result
1924 * candidate.
1925 */
1926 valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1927 HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1928 if (valid)
1929 *tid = ctid;
1930
1931 /*
1932 * If there's a valid t_ctid link, follow it, else we're done.
1933 */
1934 if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1938 {
1939 UnlockReleaseBuffer(buffer);
1940 break;
1941 }
1942
1943 ctid = tp.t_data->t_ctid;
1944 priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1945 UnlockReleaseBuffer(buffer);
1946 } /* end of loop */
1947}
static bool HeapTupleHeaderIndicatesMovedPartitions(const HeapTupleHeaderData *tup)
Definition: htup_details.h:480

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1785 of file pruneheap.c.

1786{
1787 OffsetNumber offnum,
1788 maxoff;
1789
1790 MemSet(root_offsets, InvalidOffsetNumber,
1792
1793 maxoff = PageGetMaxOffsetNumber(page);
1794 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1795 {
1796 ItemId lp = PageGetItemId(page, offnum);
1797 HeapTupleHeader htup;
1798 OffsetNumber nextoffnum;
1799 TransactionId priorXmax;
1800
1801 /* skip unused and dead items */
1802 if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1803 continue;
1804
1805 if (ItemIdIsNormal(lp))
1806 {
1807 htup = (HeapTupleHeader) PageGetItem(page, lp);
1808
1809 /*
1810 * Check if this tuple is part of a HOT-chain rooted at some other
1811 * tuple. If so, skip it for now; we'll process it when we find
1812 * its root.
1813 */
1814 if (HeapTupleHeaderIsHeapOnly(htup))
1815 continue;
1816
1817 /*
1818 * This is either a plain tuple or the root of a HOT-chain.
1819 * Remember it in the mapping.
1820 */
1821 root_offsets[offnum - 1] = offnum;
1822
1823 /* If it's not the start of a HOT-chain, we're done with it */
1824 if (!HeapTupleHeaderIsHotUpdated(htup))
1825 continue;
1826
1827 /* Set up to scan the HOT-chain */
1828 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1829 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1830 }
1831 else
1832 {
1833 /* Must be a redirect item. We do not set its root_offsets entry */
1835 /* Set up to scan the HOT-chain */
1836 nextoffnum = ItemIdGetRedirect(lp);
1837 priorXmax = InvalidTransactionId;
1838 }
1839
1840 /*
1841 * Now follow the HOT-chain and collect other tuples in the chain.
1842 *
1843 * Note: Even though this is a nested loop, the complexity of the
1844 * function is O(N) because a tuple in the page should be visited not
1845 * more than twice, once in the outer loop and once in HOT-chain
1846 * chases.
1847 */
1848 for (;;)
1849 {
1850 /* Sanity check (pure paranoia) */
1851 if (offnum < FirstOffsetNumber)
1852 break;
1853
1854 /*
1855 * An offset past the end of page's line pointer array is possible
1856 * when the array was truncated
1857 */
1858 if (offnum > maxoff)
1859 break;
1860
1861 lp = PageGetItemId(page, nextoffnum);
1862
1863 /* Check for broken chains */
1864 if (!ItemIdIsNormal(lp))
1865 break;
1866
1867 htup = (HeapTupleHeader) PageGetItem(page, lp);
1868
1869 if (TransactionIdIsValid(priorXmax) &&
1871 break;
1872
1873 /* Remember the root line pointer for this item */
1874 root_offsets[nextoffnum - 1] = offnum;
1875
1876 /* Advance to next chain member, if any */
1877 if (!HeapTupleHeaderIsHotUpdated(htup))
1878 break;
1879
1880 /* HOT implies it can't have moved to different partition */
1882
1883 nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1884 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1885 }
1886 }
1887}
#define MemSet(start, val, len)
Definition: c.h:991
static bool HeapTupleHeaderIsHotUpdated(const HeapTupleHeaderData *tup)
Definition: htup_details.h:534
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIndicatesMovedPartitions(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1314 of file heapam.c.

1315{
1316 HeapScanDesc scan = (HeapScanDesc) sscan;
1317
1318 /*
1319 * This is still widely used directly, without going through table AM, so
1320 * add a safety check. It's possible we should, at a later point,
1321 * downgrade this to an assert. The reason for checking the AM routine,
1322 * rather than the AM oid, is that this allows to write regression tests
1323 * that create another AM reusing the heap handler.
1324 */
1326 ereport(ERROR,
1327 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1328 errmsg_internal("only heap AM is supported")));
1329
1330 /*
1331 * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1332 * for catalog or regular tables. See detailed comments in xact.c where
1333 * these variables are declared. Normally we have such a check at tableam
1334 * level API but this is called from many places so we need to ensure it
1335 * here.
1336 */
1338 elog(ERROR, "unexpected heap_getnext call during logical decoding");
1339
1340 /* Note: no locking manipulations needed */
1341
1343 heapgettup_pagemode(scan, direction,
1344 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1345 else
1346 heapgettup(scan, direction,
1347 scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1348
1349 if (scan->rs_ctup.t_data == NULL)
1350 return NULL;
1351
1352 /*
1353 * if we get here it means we have a new current scan tuple, so point to
1354 * the proper return buffer and return the tuple.
1355 */
1356
1358
1359 return &scan->rs_ctup;
1360}
#define unlikely(x)
Definition: c.h:347
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:875
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:985
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:684
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1363 of file heapam.c.

1364{
1365 HeapScanDesc scan = (HeapScanDesc) sscan;
1366
1367 /* Note: no locking manipulations needed */
1368
1369 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1370 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1371 else
1372 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1373
1374 if (scan->rs_ctup.t_data == NULL)
1375 {
1376 ExecClearTuple(slot);
1377 return false;
1378 }
1379
1380 /*
1381 * if we get here it means we have a new current scan tuple, so point to
1382 * the proper return buffer and return the tuple.
1383 */
1384
1386
1387 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1388 scan->rs_cbuf);
1389 return true;
1390}
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1581
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:458

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1466 of file heapam.c.

1468{
1469 HeapScanDesc scan = (HeapScanDesc) sscan;
1470 ItemPointer mintid = &sscan->st.tidrange.rs_mintid;
1471 ItemPointer maxtid = &sscan->st.tidrange.rs_maxtid;
1472
1473 /* Note: no locking manipulations needed */
1474 for (;;)
1475 {
1476 if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1477 heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1478 else
1479 heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1480
1481 if (scan->rs_ctup.t_data == NULL)
1482 {
1483 ExecClearTuple(slot);
1484 return false;
1485 }
1486
1487 /*
1488 * heap_set_tidrange will have used heap_setscanlimits to limit the
1489 * range of pages we scan to only ones that can contain the TID range
1490 * we're scanning for. Here we must filter out any tuples from these
1491 * pages that are outside of that range.
1492 */
1493 if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1494 {
1495 ExecClearTuple(slot);
1496
1497 /*
1498 * When scanning backwards, the TIDs will be in descending order.
1499 * Future tuples in this direction will be lower still, so we can
1500 * just return false to indicate there will be no more tuples.
1501 */
1502 if (ScanDirectionIsBackward(direction))
1503 return false;
1504
1505 continue;
1506 }
1507
1508 /*
1509 * Likewise for the final page, we must filter out TIDs greater than
1510 * maxtid.
1511 */
1512 if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1513 {
1514 ExecClearTuple(slot);
1515
1516 /*
1517 * When scanning forward, the TIDs will be in ascending order.
1518 * Future tuples in this direction will be higher still, so we can
1519 * just return false to indicate there will be no more tuples.
1520 */
1521 if (ScanDirectionIsForward(direction))
1522 return false;
1523 continue;
1524 }
1525
1526 break;
1527 }
1528
1529 /*
1530 * if we get here it means we have a new current scan tuple, so point to
1531 * the proper return buffer and return the tuple.
1532 */
1534
1535 ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1536 return true;
1537}
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:55
union TableScanDescData::@49 st
ItemPointerData rs_maxtid
Definition: relscan.h:56
struct TableScanDescData::@49::@50 tidrange

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, TableScanDescData::st, HeapTupleData::t_data, HeapTupleData::t_self, and TableScanDescData::tidrange.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool *  all_dead,
bool  first_call 
)

Definition at line 1693 of file heapam.c.

1696{
1697 Page page = BufferGetPage(buffer);
1699 BlockNumber blkno;
1700 OffsetNumber offnum;
1701 bool at_chain_start;
1702 bool valid;
1703 bool skip;
1704 GlobalVisState *vistest = NULL;
1705
1706 /* If this is not the first call, previous call returned a (live!) tuple */
1707 if (all_dead)
1708 *all_dead = first_call;
1709
1710 blkno = ItemPointerGetBlockNumber(tid);
1711 offnum = ItemPointerGetOffsetNumber(tid);
1712 at_chain_start = first_call;
1713 skip = !first_call;
1714
1715 /* XXX: we should assert that a snapshot is pushed or registered */
1717 Assert(BufferGetBlockNumber(buffer) == blkno);
1718
1719 /* Scan through possible multiple members of HOT-chain */
1720 for (;;)
1721 {
1722 ItemId lp;
1723
1724 /* check for bogus TID */
1725 if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1726 break;
1727
1728 lp = PageGetItemId(page, offnum);
1729
1730 /* check for unused, dead, or redirected items */
1731 if (!ItemIdIsNormal(lp))
1732 {
1733 /* We should only see a redirect at start of chain */
1734 if (ItemIdIsRedirected(lp) && at_chain_start)
1735 {
1736 /* Follow the redirect */
1737 offnum = ItemIdGetRedirect(lp);
1738 at_chain_start = false;
1739 continue;
1740 }
1741 /* else must be end of chain */
1742 break;
1743 }
1744
1745 /*
1746 * Update heapTuple to point to the element of the HOT chain we're
1747 * currently investigating. Having t_self set correctly is important
1748 * because the SSI checks and the *Satisfies routine for historical
1749 * MVCC snapshots need the correct tid to decide about the visibility.
1750 */
1751 heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1752 heapTuple->t_len = ItemIdGetLength(lp);
1753 heapTuple->t_tableOid = RelationGetRelid(relation);
1754 ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1755
1756 /*
1757 * Shouldn't see a HEAP_ONLY tuple at chain start.
1758 */
1759 if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1760 break;
1761
1762 /*
1763 * The xmin should match the previous xmax value, else chain is
1764 * broken.
1765 */
1766 if (TransactionIdIsValid(prev_xmax) &&
1767 !TransactionIdEquals(prev_xmax,
1768 HeapTupleHeaderGetXmin(heapTuple->t_data)))
1769 break;
1770
1771 /*
1772 * When first_call is true (and thus, skip is initially false) we'll
1773 * return the first tuple we find. But on later passes, heapTuple
1774 * will initially be pointing to the tuple we returned last time.
1775 * Returning it again would be incorrect (and would loop forever), so
1776 * we skip it and return the next match we find.
1777 */
1778 if (!skip)
1779 {
1780 /* If it's visible per the snapshot, we must return it */
1781 valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1782 HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1783 buffer, snapshot);
1784
1785 if (valid)
1786 {
1787 ItemPointerSetOffsetNumber(tid, offnum);
1788 PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1789 HeapTupleHeaderGetXmin(heapTuple->t_data));
1790 if (all_dead)
1791 *all_dead = false;
1792 return true;
1793 }
1794 }
1795 skip = false;
1796
1797 /*
1798 * If we can't see it, maybe no one else can either. At caller
1799 * request, check whether all chain members are dead to all
1800 * transactions.
1801 *
1802 * Note: if you change the criterion here for what is "dead", fix the
1803 * planner's get_actual_variable_range() function to match.
1804 */
1805 if (all_dead && *all_dead)
1806 {
1807 if (!vistest)
1808 vistest = GlobalVisTestFor(relation);
1809
1810 if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1811 *all_dead = false;
1812 }
1813
1814 /*
1815 * Check to see if HOT chain continues past this tuple; if so fetch
1816 * the next offnum and loop around.
1817 */
1818 if (HeapTupleIsHotUpdated(heapTuple))
1819 {
1821 blkno);
1822 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1823 at_chain_start = false;
1824 prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1825 }
1826 else
1827 break; /* end of chain */
1828 }
1829
1830 return false;
1831}
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
static bool HeapTupleIsHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:768
static bool HeapTupleIsHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:786
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:107
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4107
TransactionId RecentXmin
Definition: snapmgr.c:159

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleIsHeapOnly(), HeapTupleIsHotUpdated(), HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_index_delete_tuples(), and heapam_index_fetch_tuple().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 8074 of file heapam.c.

8075{
8076 /* Initial assumption is that earlier pruning took care of conflict */
8077 TransactionId snapshotConflictHorizon = InvalidTransactionId;
8080 Page page = NULL;
8082 TransactionId priorXmax;
8083#ifdef USE_PREFETCH
8084 IndexDeletePrefetchState prefetch_state;
8085 int prefetch_distance;
8086#endif
8087 SnapshotData SnapshotNonVacuumable;
8088 int finalndeltids = 0,
8089 nblocksaccessed = 0;
8090
8091 /* State that's only used in bottom-up index deletion case */
8092 int nblocksfavorable = 0;
8093 int curtargetfreespace = delstate->bottomupfreespace,
8094 lastfreespace = 0,
8095 actualfreespace = 0;
8096 bool bottomup_final_block = false;
8097
8098 InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
8099
8100 /* Sort caller's deltids array by TID for further processing */
8101 index_delete_sort(delstate);
8102
8103 /*
8104 * Bottom-up case: resort deltids array in an order attuned to where the
8105 * greatest number of promising TIDs are to be found, and determine how
8106 * many blocks from the start of sorted array should be considered
8107 * favorable. This will also shrink the deltids array in order to
8108 * eliminate completely unfavorable blocks up front.
8109 */
8110 if (delstate->bottomup)
8111 nblocksfavorable = bottomup_sort_and_shrink(delstate);
8112
8113#ifdef USE_PREFETCH
8114 /* Initialize prefetch state. */
8115 prefetch_state.cur_hblkno = InvalidBlockNumber;
8116 prefetch_state.next_item = 0;
8117 prefetch_state.ndeltids = delstate->ndeltids;
8118 prefetch_state.deltids = delstate->deltids;
8119
8120 /*
8121 * Determine the prefetch distance that we will attempt to maintain.
8122 *
8123 * Since the caller holds a buffer lock somewhere in rel, we'd better make
8124 * sure that isn't a catalog relation before we call code that does
8125 * syscache lookups, to avoid risk of deadlock.
8126 */
8127 if (IsCatalogRelation(rel))
8128 prefetch_distance = maintenance_io_concurrency;
8129 else
8130 prefetch_distance =
8132
8133 /* Cap initial prefetch distance for bottom-up deletion caller */
8134 if (delstate->bottomup)
8135 {
8136 Assert(nblocksfavorable >= 1);
8137 Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
8138 prefetch_distance = Min(prefetch_distance, nblocksfavorable);
8139 }
8140
8141 /* Start prefetching. */
8142 index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
8143#endif
8144
8145 /* Iterate over deltids, determine which to delete, check their horizon */
8146 Assert(delstate->ndeltids > 0);
8147 for (int i = 0; i < delstate->ndeltids; i++)
8148 {
8149 TM_IndexDelete *ideltid = &delstate->deltids[i];
8150 TM_IndexStatus *istatus = delstate->status + ideltid->id;
8151 ItemPointer htid = &ideltid->tid;
8152 OffsetNumber offnum;
8153
8154 /*
8155 * Read buffer, and perform required extra steps each time a new block
8156 * is encountered. Avoid refetching if it's the same block as the one
8157 * from the last htid.
8158 */
8159 if (blkno == InvalidBlockNumber ||
8160 ItemPointerGetBlockNumber(htid) != blkno)
8161 {
8162 /*
8163 * Consider giving up early for bottom-up index deletion caller
8164 * first. (Only prefetch next-next block afterwards, when it
8165 * becomes clear that we're at least going to access the next
8166 * block in line.)
8167 *
8168 * Sometimes the first block frees so much space for bottom-up
8169 * caller that the deletion process can end without accessing any
8170 * more blocks. It is usually necessary to access 2 or 3 blocks
8171 * per bottom-up deletion operation, though.
8172 */
8173 if (delstate->bottomup)
8174 {
8175 /*
8176 * We often allow caller to delete a few additional items
8177 * whose entries we reached after the point that space target
8178 * from caller was satisfied. The cost of accessing the page
8179 * was already paid at that point, so it made sense to finish
8180 * it off. When that happened, we finalize everything here
8181 * (by finishing off the whole bottom-up deletion operation
8182 * without needlessly paying the cost of accessing any more
8183 * blocks).
8184 */
8185 if (bottomup_final_block)
8186 break;
8187
8188 /*
8189 * Give up when we didn't enable our caller to free any
8190 * additional space as a result of processing the page that we
8191 * just finished up with. This rule is the main way in which
8192 * we keep the cost of bottom-up deletion under control.
8193 */
8194 if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
8195 break;
8196 lastfreespace = actualfreespace; /* for next time */
8197
8198 /*
8199 * Deletion operation (which is bottom-up) will definitely
8200 * access the next block in line. Prepare for that now.
8201 *
8202 * Decay target free space so that we don't hang on for too
8203 * long with a marginal case. (Space target is only truly
8204 * helpful when it allows us to recognize that we don't need
8205 * to access more than 1 or 2 blocks to satisfy caller due to
8206 * agreeable workload characteristics.)
8207 *
8208 * We are a bit more patient when we encounter contiguous
8209 * blocks, though: these are treated as favorable blocks. The
8210 * decay process is only applied when the next block in line
8211 * is not a favorable/contiguous block. This is not an
8212 * exception to the general rule; we still insist on finding
8213 * at least one deletable item per block accessed. See
8214 * bottomup_nblocksfavorable() for full details of the theory
8215 * behind favorable blocks and heap block locality in general.
8216 *
8217 * Note: The first block in line is always treated as a
8218 * favorable block, so the earliest possible point that the
8219 * decay can be applied is just before we access the second
8220 * block in line. The Assert() verifies this for us.
8221 */
8222 Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
8223 if (nblocksfavorable > 0)
8224 nblocksfavorable--;
8225 else
8226 curtargetfreespace /= 2;
8227 }
8228
8229 /* release old buffer */
8230 if (BufferIsValid(buf))
8232
8233 blkno = ItemPointerGetBlockNumber(htid);
8234 buf = ReadBuffer(rel, blkno);
8235 nblocksaccessed++;
8236 Assert(!delstate->bottomup ||
8237 nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
8238
8239#ifdef USE_PREFETCH
8240
8241 /*
8242 * To maintain the prefetch distance, prefetch one more page for
8243 * each page we read.
8244 */
8245 index_delete_prefetch_buffer(rel, &prefetch_state, 1);
8246#endif
8247
8249
8250 page = BufferGetPage(buf);
8251 maxoff = PageGetMaxOffsetNumber(page);
8252 }
8253
8254 /*
8255 * In passing, detect index corruption involving an index page with a
8256 * TID that points to a location in the heap that couldn't possibly be
8257 * correct. We only do this with actual TIDs from caller's index page
8258 * (not items reached by traversing through a HOT chain).
8259 */
8260 index_delete_check_htid(delstate, page, maxoff, htid, istatus);
8261
8262 if (istatus->knowndeletable)
8263 Assert(!delstate->bottomup && !istatus->promising);
8264 else
8265 {
8266 ItemPointerData tmp = *htid;
8267 HeapTupleData heapTuple;
8268
8269 /* Are any tuples from this HOT chain non-vacuumable? */
8270 if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
8271 &heapTuple, NULL, true))
8272 continue; /* can't delete entry */
8273
8274 /* Caller will delete, since whole HOT chain is vacuumable */
8275 istatus->knowndeletable = true;
8276
8277 /* Maintain index free space info for bottom-up deletion case */
8278 if (delstate->bottomup)
8279 {
8280 Assert(istatus->freespace > 0);
8281 actualfreespace += istatus->freespace;
8282 if (actualfreespace >= curtargetfreespace)
8283 bottomup_final_block = true;
8284 }
8285 }
8286
8287 /*
8288 * Maintain snapshotConflictHorizon value for deletion operation as a
8289 * whole by advancing current value using heap tuple headers. This is
8290 * loosely based on the logic for pruning a HOT chain.
8291 */
8292 offnum = ItemPointerGetOffsetNumber(htid);
8293 priorXmax = InvalidTransactionId; /* cannot check first XMIN */
8294 for (;;)
8295 {
8296 ItemId lp;
8297 HeapTupleHeader htup;
8298
8299 /* Sanity check (pure paranoia) */
8300 if (offnum < FirstOffsetNumber)
8301 break;
8302
8303 /*
8304 * An offset past the end of page's line pointer array is possible
8305 * when the array was truncated
8306 */
8307 if (offnum > maxoff)
8308 break;
8309
8310 lp = PageGetItemId(page, offnum);
8311 if (ItemIdIsRedirected(lp))
8312 {
8313 offnum = ItemIdGetRedirect(lp);
8314 continue;
8315 }
8316
8317 /*
8318 * We'll often encounter LP_DEAD line pointers (especially with an
8319 * entry marked knowndeletable by our caller up front). No heap
8320 * tuple headers get examined for an htid that leads us to an
8321 * LP_DEAD item. This is okay because the earlier pruning
8322 * operation that made the line pointer LP_DEAD in the first place
8323 * must have considered the original tuple header as part of
8324 * generating its own snapshotConflictHorizon value.
8325 *
8326 * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
8327 * the same strategy that index vacuuming uses in all cases. Index
8328 * VACUUM WAL records don't even have a snapshotConflictHorizon
8329 * field of their own for this reason.
8330 */
8331 if (!ItemIdIsNormal(lp))
8332 break;
8333
8334 htup = (HeapTupleHeader) PageGetItem(page, lp);
8335
8336 /*
8337 * Check the tuple XMIN against prior XMAX, if any
8338 */
8339 if (TransactionIdIsValid(priorXmax) &&
8341 break;
8342
8344 &snapshotConflictHorizon);
8345
8346 /*
8347 * If the tuple is not HOT-updated, then we are at the end of this
8348 * HOT-chain. No need to visit later tuples from the same update
8349 * chain (they get their own index entries) -- just move on to
8350 * next htid from index AM caller.
8351 */
8352 if (!HeapTupleHeaderIsHotUpdated(htup))
8353 break;
8354
8355 /* Advance to next HOT chain member */
8356 Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
8357 offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
8358 priorXmax = HeapTupleHeaderGetUpdateXid(htup);
8359 }
8360
8361 /* Enable further/final shrinking of deltids for caller */
8362 finalndeltids = i + 1;
8363 }
8364
8366
8367 /*
8368 * Shrink deltids array to exclude non-deletable entries at the end. This
8369 * is not just a minor optimization. Final deltids array size might be
8370 * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
8371 * ndeltids being zero in all cases with zero total deletable entries.
8372 */
8373 Assert(finalndeltids > 0 || delstate->bottomup);
8374 delstate->ndeltids = finalndeltids;
8375
8376 return snapshotConflictHorizon;
8377}
int maintenance_io_concurrency
Definition: bufmgr.c:159
#define Min(x, y)
Definition: c.h:975
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8631
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7929
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:8014
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:184
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1693
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8419
static char * buf
Definition: pg_test_fsync.c:72
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:50
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:248
int bottomupfreespace
Definition: tableam.h:243
TM_IndexDelete * deltids
Definition: tableam.h:247
ItemPointerData tid
Definition: tableam.h:206
bool knowndeletable
Definition: tableam.h:213
bool promising
Definition: tableam.h:216
int16 freespace
Definition: tableam.h:217

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleHeaderIsHotUpdated(), i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_lock()

bool heap_inplace_lock ( Relation  relation,
HeapTuple  oldtup_ptr,
Buffer  buffer,
void(*)(void *)  release_callback,
void *  arg 
)

Definition at line 6318 of file heapam.c.

6321{
6322 HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */
6323 TM_Result result;
6324 bool ret;
6325
6326#ifdef USE_ASSERT_CHECKING
6327 if (RelationGetRelid(relation) == RelationRelationId)
6328 check_inplace_rel_lock(oldtup_ptr);
6329#endif
6330
6331 Assert(BufferIsValid(buffer));
6332
6333 /*
6334 * Construct shared cache inval if necessary. Because we pass a tuple
6335 * version without our own inplace changes or inplace changes other
6336 * sessions complete while we wait for locks, inplace update mustn't
6337 * change catcache lookup keys. But we aren't bothering with index
6338 * updates either, so that's true a fortiori. After LockBuffer(), it
6339 * would be too late, because this might reach a
6340 * CatalogCacheInitializeCache() that locks "buffer".
6341 */
6342 CacheInvalidateHeapTupleInplace(relation, oldtup_ptr, NULL);
6343
6344 LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6346
6347 /*----------
6348 * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except:
6349 *
6350 * - wait unconditionally
6351 * - already locked tuple above, since inplace needs that unconditionally
6352 * - don't recheck header after wait: simpler to defer to next iteration
6353 * - don't try to continue even if the updater aborts: likewise
6354 * - no crosscheck
6355 */
6356 result = HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false),
6357 buffer);
6358
6359 if (result == TM_Invisible)
6360 {
6361 /* no known way this can happen */
6362 ereport(ERROR,
6363 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6364 errmsg_internal("attempted to overwrite invisible tuple")));
6365 }
6366 else if (result == TM_SelfModified)
6367 {
6368 /*
6369 * CREATE INDEX might reach this if an expression is silly enough to
6370 * call e.g. SELECT ... FROM pg_class FOR SHARE. C code of other SQL
6371 * statements might get here after a heap_update() of the same row, in
6372 * the absence of an intervening CommandCounterIncrement().
6373 */
6374 ereport(ERROR,
6375 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
6376 errmsg("tuple to be updated was already modified by an operation triggered by the current command")));
6377 }
6378 else if (result == TM_BeingModified)
6379 {
6380 TransactionId xwait;
6381 uint16 infomask;
6382
6383 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
6384 infomask = oldtup.t_data->t_infomask;
6385
6386 if (infomask & HEAP_XMAX_IS_MULTI)
6387 {
6390 int remain;
6391
6392 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
6393 lockmode, NULL))
6394 {
6396 release_callback(arg);
6397 ret = false;
6398 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
6399 relation, &oldtup.t_self, XLTW_Update,
6400 &remain);
6401 }
6402 else
6403 ret = true;
6404 }
6406 ret = true;
6407 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
6408 ret = true;
6409 else
6410 {
6412 release_callback(arg);
6413 ret = false;
6414 XactLockTableWait(xwait, relation, &oldtup.t_self,
6415 XLTW_Update);
6416 }
6417 }
6418 else
6419 {
6420 ret = (result == TM_Ok);
6421 if (!ret)
6422 {
6424 release_callback(arg);
6425 }
6426 }
6427
6428 /*
6429 * GetCatalogSnapshot() relies on invalidation messages to know when to
6430 * take a new snapshot. COMMIT of xwait is responsible for sending the
6431 * invalidation. We're not acquiring heavyweight locks sufficient to
6432 * block if not yet sent, so we must take a new snapshot to ensure a later
6433 * attempt has a fair chance. While we don't need this if xwait aborted,
6434 * don't bother optimizing that.
6435 */
6436 if (!ret)
6437 {
6438 UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock);
6441 }
6442 return ret;
6443}
static bool HEAP_XMAX_IS_KEYSHR_LOCKED(int16 infomask)
Definition: htup_details.h:275
void CacheInvalidateHeapTupleInplace(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1578
void ForgetInplace_Inval(void)
Definition: inval.c:1279
void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:562
void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
Definition: lmgr.c:601
@ XLTW_Update
Definition: lmgr.h:27
#define InplaceUpdateTupleLock
Definition: lockdefs.h:48
LockTupleMode
Definition: lockoptions.h:50
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
void * arg
void InvalidateCatalogSnapshot(void)
Definition: snapmgr.c:443
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:829

References arg, Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferIsValid(), CacheInvalidateHeapTupleInplace(), DoesMultiXactIdConflict(), ereport, errcode(), errmsg(), errmsg_internal(), ERROR, ForgetInplace_Inval(), GetCurrentCommandId(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleSatisfiesUpdate(), InplaceUpdateTupleLock, InvalidateCatalogSnapshot(), LockBuffer(), LockTuple(), LockTupleNoKeyExclusive, MultiXactIdWait(), MultiXactStatusNoKeyUpdate, RelationGetRelid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, TM_BeingModified, TM_Invisible, TM_Ok, TM_SelfModified, TransactionIdIsCurrentTransactionId(), UnlockTuple(), XactLockTableWait(), and XLTW_Update.

Referenced by systable_inplace_update_begin().

◆ heap_inplace_unlock()

void heap_inplace_unlock ( Relation  relation,
HeapTuple  oldtup,
Buffer  buffer 
)

◆ heap_inplace_update_and_unlock()

void heap_inplace_update_and_unlock ( Relation  relation,
HeapTuple  oldtup,
HeapTuple  tuple,
Buffer  buffer 
)

Definition at line 6454 of file heapam.c.

6457{
6458 HeapTupleHeader htup = oldtup->t_data;
6459 uint32 oldlen;
6460 uint32 newlen;
6461 char *dst;
6462 char *src;
6463 int nmsgs = 0;
6464 SharedInvalidationMessage *invalMessages = NULL;
6465 bool RelcacheInitFileInval = false;
6466
6467 Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
6468 oldlen = oldtup->t_len - htup->t_hoff;
6469 newlen = tuple->t_len - tuple->t_data->t_hoff;
6470 if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6471 elog(ERROR, "wrong tuple length");
6472
6473 dst = (char *) htup + htup->t_hoff;
6474 src = (char *) tuple->t_data + tuple->t_data->t_hoff;
6475
6476 /* Like RecordTransactionCommit(), log only if needed */
6478 nmsgs = inplaceGetInvalidationMessages(&invalMessages,
6479 &RelcacheInitFileInval);
6480
6481 /*
6482 * Unlink relcache init files as needed. If unlinking, acquire
6483 * RelCacheInitLock until after associated invalidations. By doing this
6484 * in advance, if we checkpoint and then crash between inplace
6485 * XLogInsert() and inval, we don't rely on StartupXLOG() ->
6486 * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would
6487 * neglect to PANIC on EIO.
6488 */
6490
6491 /*----------
6492 * NO EREPORT(ERROR) from here till changes are complete
6493 *
6494 * Our buffer lock won't stop a reader having already pinned and checked
6495 * visibility for this tuple. Hence, we write WAL first, then mutate the
6496 * buffer. Like in MarkBufferDirtyHint() or RecordTransactionCommit(),
6497 * checkpoint delay makes that acceptable. With the usual order of
6498 * changes, a crash after memcpy() and before XLogInsert() could allow
6499 * datfrozenxid to overtake relfrozenxid:
6500 *
6501 * ["D" is a VACUUM (ONLY_DATABASE_STATS)]
6502 * ["R" is a VACUUM tbl]
6503 * D: vac_update_datfrozenxid() -> systable_beginscan(pg_class)
6504 * D: systable_getnext() returns pg_class tuple of tbl
6505 * R: memcpy() into pg_class tuple of tbl
6506 * D: raise pg_database.datfrozenxid, XLogInsert(), finish
6507 * [crash]
6508 * [recovery restores datfrozenxid w/o relfrozenxid]
6509 *
6510 * Like in MarkBufferDirtyHint() subroutine XLogSaveBufferForHint(), copy
6511 * the buffer to the stack before logging. Here, that facilitates a FPI
6512 * of the post-mutation block before we accept other sessions seeing it.
6513 */
6517
6518 /* XLOG stuff */
6519 if (RelationNeedsWAL(relation))
6520 {
6521 xl_heap_inplace xlrec;
6522 PGAlignedBlock copied_buffer;
6523 char *origdata = (char *) BufferGetBlock(buffer);
6524 Page page = BufferGetPage(buffer);
6525 uint16 lower = ((PageHeader) page)->pd_lower;
6526 uint16 upper = ((PageHeader) page)->pd_upper;
6527 uintptr_t dst_offset_in_block;
6528 RelFileLocator rlocator;
6529 ForkNumber forkno;
6530 BlockNumber blkno;
6531 XLogRecPtr recptr;
6532
6533 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6534 xlrec.dbId = MyDatabaseId;
6535 xlrec.tsId = MyDatabaseTableSpace;
6536 xlrec.relcacheInitFileInval = RelcacheInitFileInval;
6537 xlrec.nmsgs = nmsgs;
6538
6541 if (nmsgs != 0)
6542 XLogRegisterData(invalMessages,
6543 nmsgs * sizeof(SharedInvalidationMessage));
6544
6545 /* register block matching what buffer will look like after changes */
6546 memcpy(copied_buffer.data, origdata, lower);
6547 memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
6548 dst_offset_in_block = dst - origdata;
6549 memcpy(copied_buffer.data + dst_offset_in_block, src, newlen);
6550 BufferGetTag(buffer, &rlocator, &forkno, &blkno);
6551 Assert(forkno == MAIN_FORKNUM);
6552 XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data,
6554 XLogRegisterBufData(0, src, newlen);
6555
6556 /* inplace updates aren't decoded atm, don't log the origin */
6557
6558 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6559
6560 PageSetLSN(page, recptr);
6561 }
6562
6563 memcpy(dst, src, newlen);
6564
6565 MarkBufferDirty(buffer);
6566
6568
6569 /*
6570 * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we
6571 * do this before UnlockTuple().
6572 *
6573 * If we're mutating a tuple visible only to this transaction, there's an
6574 * equivalent transactional inval from the action that created the tuple,
6575 * and this inval is superfluous.
6576 */
6578
6579 MyProc->delayChkptFlags &= ~DELAY_CHKPT_START;
6581 UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock);
6582
6583 AcceptInvalidationMessages(); /* local processing of just-sent inval */
6584
6585 /*
6586 * Queue a transactional inval. The immediate invalidation we just sent
6587 * is the only one known to be necessary. To reduce risk from the
6588 * transition to immediate invalidation, continue sending a transactional
6589 * invalidation like we've long done. Third-party code might rely on it.
6590 */
6592 CacheInvalidateHeapTuple(relation, tuple, NULL);
6593}
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:4182
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:381
PageHeaderData * PageHeader
Definition: bufpage.h:174
uint32_t uint32
Definition: c.h:502
Oid MyDatabaseTableSpace
Definition: globals.c:97
Oid MyDatabaseId
Definition: globals.c:95
#define MinSizeOfHeapInplace
Definition: heapam_xlog.h:436
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:40
void AcceptInvalidationMessages(void)
Definition: inval.c:929
int inplaceGetInvalidationMessages(SharedInvalidationMessage **msgs, bool *RelcacheInitFileInval)
Definition: inval.c:1081
void PreInplace_Inval(void)
Definition: inval.c:1243
void AtInplace_Inval(void)
Definition: inval.c:1256
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
#define DELAY_CHKPT_START
Definition: proc.h:120
ForkNumber
Definition: relpath.h:56
PGPROC * MyProc
Definition: proc.c:67
int delayChkptFlags
Definition: proc.h:241
OffsetNumber offnum
Definition: heapam_xlog.h:428
bool relcacheInitFileInval
Definition: heapam_xlog.h:431
char data[BLCKSZ]
Definition: c.h:1090
#define XLogStandbyInfoActive()
Definition: xlog.h:123
void XLogRegisterBufData(uint8 block_id, const void *data, uint32 len)
Definition: xloginsert.c:405
void XLogRegisterBlock(uint8 block_id, RelFileLocator *rlocator, ForkNumber forknum, BlockNumber blknum, const PageData *page, uint8 flags)
Definition: xloginsert.c:309

References AcceptInvalidationMessages(), Assert(), AtInplace_Inval(), BUFFER_LOCK_UNLOCK, BufferGetBlock(), BufferGetPage(), BufferGetTag(), CacheInvalidateHeapTuple(), PGAlignedBlock::data, xl_heap_inplace::dbId, DELAY_CHKPT_START, PGPROC::delayChkptFlags, elog, END_CRIT_SECTION, ERROR, inplaceGetInvalidationMessages(), InplaceUpdateTupleLock, IsBootstrapProcessingMode, ItemPointerEquals(), ItemPointerGetOffsetNumber(), LockBuffer(), lower(), MAIN_FORKNUM, MarkBufferDirty(), MinSizeOfHeapInplace, MyDatabaseId, MyDatabaseTableSpace, MyProc, xl_heap_inplace::nmsgs, xl_heap_inplace::offnum, PageSetLSN(), PreInplace_Inval(), REGBUF_STANDARD, RelationNeedsWAL, xl_heap_inplace::relcacheInitFileInval, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, xl_heap_inplace::tsId, UnlockTuple(), upper(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBlock(), XLogRegisterBufData(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by systable_inplace_update_finish().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2056 of file heapam.c.

2058{
2060 HeapTuple heaptup;
2061 Buffer buffer;
2062 Buffer vmbuffer = InvalidBuffer;
2063 bool all_visible_cleared = false;
2064
2065 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2068
2069 /*
2070 * Fill in tuple header fields and toast the tuple if necessary.
2071 *
2072 * Note: below this point, heaptup is the data we actually intend to store
2073 * into the relation; tup is the caller's original untoasted data.
2074 */
2075 heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2076
2077 /*
2078 * Find buffer to insert this tuple into. If the page is all visible,
2079 * this will also pin the requisite visibility map page.
2080 */
2081 buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2082 InvalidBuffer, options, bistate,
2083 &vmbuffer, NULL,
2084 0);
2085
2086 /*
2087 * We're about to do the actual insert -- but check for conflict first, to
2088 * avoid possibly having to roll back work we've just done.
2089 *
2090 * This is safe without a recheck as long as there is no possibility of
2091 * another process scanning the page between this check and the insert
2092 * being visible to the scan (i.e., an exclusive buffer content lock is
2093 * continuously held from this point until the tuple insert is visible).
2094 *
2095 * For a heap insert, we only need to check for table-level SSI locks. Our
2096 * new tuple can't possibly conflict with existing tuple locks, and heap
2097 * page locks are only consolidated versions of tuple locks; they do not
2098 * lock "gaps" as index page locks do. So we don't need to specify a
2099 * buffer when making the call, which makes for a faster check.
2100 */
2102
2103 /* NO EREPORT(ERROR) from here till changes are logged */
2105
2106 RelationPutHeapTuple(relation, buffer, heaptup,
2108
2109 if (PageIsAllVisible(BufferGetPage(buffer)))
2110 {
2111 all_visible_cleared = true;
2113 visibilitymap_clear(relation,
2114 ItemPointerGetBlockNumber(&(heaptup->t_self)),
2115 vmbuffer, VISIBILITYMAP_VALID_BITS);
2116 }
2117
2118 /*
2119 * XXX Should we set PageSetPrunable on this page ?
2120 *
2121 * The inserting transaction may eventually abort thus making this tuple
2122 * DEAD and hence available for pruning. Though we don't want to optimize
2123 * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2124 * aborted tuple will never be pruned until next vacuum is triggered.
2125 *
2126 * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2127 */
2128
2129 MarkBufferDirty(buffer);
2130
2131 /* XLOG stuff */
2132 if (RelationNeedsWAL(relation))
2133 {
2134 xl_heap_insert xlrec;
2135 xl_heap_header xlhdr;
2136 XLogRecPtr recptr;
2137 Page page = BufferGetPage(buffer);
2138 uint8 info = XLOG_HEAP_INSERT;
2139 int bufflags = 0;
2140
2141 /*
2142 * If this is a catalog, we need to transmit combo CIDs to properly
2143 * decode, so log that as well.
2144 */
2146 log_heap_new_cid(relation, heaptup);
2147
2148 /*
2149 * If this is the single and first tuple on page, we can reinit the
2150 * page instead of restoring the whole thing. Set flag, and hide
2151 * buffer references from XLogInsert.
2152 */
2155 {
2156 info |= XLOG_HEAP_INIT_PAGE;
2157 bufflags |= REGBUF_WILL_INIT;
2158 }
2159
2160 xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2161 xlrec.flags = 0;
2162 if (all_visible_cleared)
2167
2168 /*
2169 * For logical decoding, we need the tuple even if we're doing a full
2170 * page write, so make sure it's included even if we take a full-page
2171 * image. (XXX We could alternatively store a pointer into the FPW).
2172 */
2173 if (RelationIsLogicallyLogged(relation) &&
2175 {
2177 bufflags |= REGBUF_KEEP_DATA;
2178
2179 if (IsToastRelation(relation))
2181 }
2182
2185
2186 xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2187 xlhdr.t_infomask = heaptup->t_data->t_infomask;
2188 xlhdr.t_hoff = heaptup->t_data->t_hoff;
2189
2190 /*
2191 * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2192 * write the whole page to the xlog, we don't need to store
2193 * xl_heap_header in the xlog.
2194 */
2195 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2197 /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2199 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2200 heaptup->t_len - SizeofHeapTupleHeader);
2201
2202 /* filtering by origin on a row level is much more efficient */
2204
2205 recptr = XLogInsert(RM_HEAP_ID, info);
2206
2207 PageSetLSN(page, recptr);
2208 }
2209
2211
2212 UnlockReleaseBuffer(buffer);
2213 if (vmbuffer != InvalidBuffer)
2214 ReleaseBuffer(vmbuffer);
2215
2216 /*
2217 * If tuple is cachable, mark it for invalidation from the caches in case
2218 * we abort. Note it is OK to do this after releasing the buffer, because
2219 * the heaptup data structure is all in local memory, not in the shared
2220 * buffer.
2221 */
2222 CacheInvalidateHeapTuple(relation, heaptup, NULL);
2223
2224 /* Note: speculative insertions are counted too, even if aborted later */
2225 pgstat_count_heap_insert(relation, 1);
2226
2227 /*
2228 * If heaptup is a private copy, release it. Don't forget to copy t_self
2229 * back to the caller's image, too.
2230 */
2231 if (heaptup != tup)
2232 {
2233 tup->t_self = heaptup->t_self;
2234 heap_freetuple(heaptup);
2235 }
2236}
uint8_t uint8
Definition: c.h:500
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2245
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:39
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:38
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:76
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:74
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:72
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:33
#define SizeOfHeapInsert
Definition: heapam_xlog.h:168
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:75
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:47
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:577
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:712
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:522
OffsetNumber offnum
Definition: heapam_xlog.h:162
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:36
#define REGBUF_WILL_INIT
Definition: xloginsert.h:34

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4543 of file heapam.c.

4547{
4548 TM_Result result;
4549 ItemPointer tid = &(tuple->t_self);
4550 ItemId lp;
4551 Page page;
4552 Buffer vmbuffer = InvalidBuffer;
4553 BlockNumber block;
4554 TransactionId xid,
4555 xmax;
4556 uint16 old_infomask,
4557 new_infomask,
4558 new_infomask2;
4559 bool first_time = true;
4560 bool skip_tuple_lock = false;
4561 bool have_tuple_lock = false;
4562 bool cleared_all_frozen = false;
4563
4564 *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4565 block = ItemPointerGetBlockNumber(tid);
4566
4567 /*
4568 * Before locking the buffer, pin the visibility map page if it appears to
4569 * be necessary. Since we haven't got the lock yet, someone else might be
4570 * in the middle of changing this, so we'll need to recheck after we have
4571 * the lock.
4572 */
4573 if (PageIsAllVisible(BufferGetPage(*buffer)))
4574 visibilitymap_pin(relation, block, &vmbuffer);
4575
4577
4578 page = BufferGetPage(*buffer);
4581
4582 tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4583 tuple->t_len = ItemIdGetLength(lp);
4584 tuple->t_tableOid = RelationGetRelid(relation);
4585
4586l3:
4587 result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4588
4589 if (result == TM_Invisible)
4590 {
4591 /*
4592 * This is possible, but only when locking a tuple for ON CONFLICT
4593 * UPDATE. We return this value here rather than throwing an error in
4594 * order to give that case the opportunity to throw a more specific
4595 * error.
4596 */
4597 result = TM_Invisible;
4598 goto out_locked;
4599 }
4600 else if (result == TM_BeingModified ||
4601 result == TM_Updated ||
4602 result == TM_Deleted)
4603 {
4604 TransactionId xwait;
4605 uint16 infomask;
4606 uint16 infomask2;
4607 bool require_sleep;
4608 ItemPointerData t_ctid;
4609
4610 /* must copy state data before unlocking buffer */
4611 xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4612 infomask = tuple->t_data->t_infomask;
4613 infomask2 = tuple->t_data->t_infomask2;
4614 ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4615
4617
4618 /*
4619 * If any subtransaction of the current top transaction already holds
4620 * a lock as strong as or stronger than what we're requesting, we
4621 * effectively hold the desired lock already. We *must* succeed
4622 * without trying to take the tuple lock, else we will deadlock
4623 * against anyone wanting to acquire a stronger lock.
4624 *
4625 * Note we only do this the first time we loop on the HTSU result;
4626 * there is no point in testing in subsequent passes, because
4627 * evidently our own transaction cannot have acquired a new lock after
4628 * the first time we checked.
4629 */
4630 if (first_time)
4631 {
4632 first_time = false;
4633
4634 if (infomask & HEAP_XMAX_IS_MULTI)
4635 {
4636 int i;
4637 int nmembers;
4638 MultiXactMember *members;
4639
4640 /*
4641 * We don't need to allow old multixacts here; if that had
4642 * been the case, HeapTupleSatisfiesUpdate would have returned
4643 * MayBeUpdated and we wouldn't be here.
4644 */
4645 nmembers =
4646 GetMultiXactIdMembers(xwait, &members, false,
4647 HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4648
4649 for (i = 0; i < nmembers; i++)
4650 {
4651 /* only consider members of our own transaction */
4652 if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4653 continue;
4654
4655 if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4656 {
4657 pfree(members);
4658 result = TM_Ok;
4659 goto out_unlocked;
4660 }
4661 else
4662 {
4663 /*
4664 * Disable acquisition of the heavyweight tuple lock.
4665 * Otherwise, when promoting a weaker lock, we might
4666 * deadlock with another locker that has acquired the
4667 * heavyweight tuple lock and is waiting for our
4668 * transaction to finish.
4669 *
4670 * Note that in this case we still need to wait for
4671 * the multixact if required, to avoid acquiring
4672 * conflicting locks.
4673 */
4674 skip_tuple_lock = true;
4675 }
4676 }
4677
4678 if (members)
4679 pfree(members);
4680 }
4682 {
4683 switch (mode)
4684 {
4685 case LockTupleKeyShare:
4687 HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4688 HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4689 result = TM_Ok;
4690 goto out_unlocked;
4691 case LockTupleShare:
4692 if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4693 HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4694 {
4695 result = TM_Ok;
4696 goto out_unlocked;
4697 }
4698 break;
4700 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4701 {
4702 result = TM_Ok;
4703 goto out_unlocked;
4704 }
4705 break;
4706 case LockTupleExclusive:
4707 if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4708 infomask2 & HEAP_KEYS_UPDATED)
4709 {
4710 result = TM_Ok;
4711 goto out_unlocked;
4712 }
4713 break;
4714 }
4715 }
4716 }
4717
4718 /*
4719 * Initially assume that we will have to wait for the locking
4720 * transaction(s) to finish. We check various cases below in which
4721 * this can be turned off.
4722 */
4723 require_sleep = true;
4724 if (mode == LockTupleKeyShare)
4725 {
4726 /*
4727 * If we're requesting KeyShare, and there's no update present, we
4728 * don't need to wait. Even if there is an update, we can still
4729 * continue if the key hasn't been modified.
4730 *
4731 * However, if there are updates, we need to walk the update chain
4732 * to mark future versions of the row as locked, too. That way,
4733 * if somebody deletes that future version, we're protected
4734 * against the key going away. This locking of future versions
4735 * could block momentarily, if a concurrent transaction is
4736 * deleting a key; or it could return a value to the effect that
4737 * the transaction deleting the key has already committed. So we
4738 * do this before re-locking the buffer; otherwise this would be
4739 * prone to deadlocks.
4740 *
4741 * Note that the TID we're locking was grabbed before we unlocked
4742 * the buffer. For it to change while we're not looking, the
4743 * other properties we're testing for below after re-locking the
4744 * buffer would also change, in which case we would restart this
4745 * loop above.
4746 */
4747 if (!(infomask2 & HEAP_KEYS_UPDATED))
4748 {
4749 bool updated;
4750
4751 updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4752
4753 /*
4754 * If there are updates, follow the update chain; bail out if
4755 * that cannot be done.
4756 */
4757 if (follow_updates && updated)
4758 {
4759 TM_Result res;
4760
4761 res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4763 mode);
4764 if (res != TM_Ok)
4765 {
4766 result = res;
4767 /* recovery code expects to have buffer lock held */
4769 goto failed;
4770 }
4771 }
4772
4774
4775 /*
4776 * Make sure it's still an appropriate lock, else start over.
4777 * Also, if it wasn't updated before we released the lock, but
4778 * is updated now, we start over too; the reason is that we
4779 * now need to follow the update chain to lock the new
4780 * versions.
4781 */
4782 if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4783 ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4784 !updated))
4785 goto l3;
4786
4787 /* Things look okay, so we can skip sleeping */
4788 require_sleep = false;
4789
4790 /*
4791 * Note we allow Xmax to change here; other updaters/lockers
4792 * could have modified it before we grabbed the buffer lock.
4793 * However, this is not a problem, because with the recheck we
4794 * just did we ensure that they still don't conflict with the
4795 * lock we want.
4796 */
4797 }
4798 }
4799 else if (mode == LockTupleShare)
4800 {
4801 /*
4802 * If we're requesting Share, we can similarly avoid sleeping if
4803 * there's no update and no exclusive lock present.
4804 */
4805 if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4806 !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4807 {
4809
4810 /*
4811 * Make sure it's still an appropriate lock, else start over.
4812 * See above about allowing xmax to change.
4813 */
4816 goto l3;
4817 require_sleep = false;
4818 }
4819 }
4820 else if (mode == LockTupleNoKeyExclusive)
4821 {
4822 /*
4823 * If we're requesting NoKeyExclusive, we might also be able to
4824 * avoid sleeping; just ensure that there no conflicting lock
4825 * already acquired.
4826 */
4827 if (infomask & HEAP_XMAX_IS_MULTI)
4828 {
4829 if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4830 mode, NULL))
4831 {
4832 /*
4833 * No conflict, but if the xmax changed under us in the
4834 * meantime, start over.
4835 */
4837 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4839 xwait))
4840 goto l3;
4841
4842 /* otherwise, we're good */
4843 require_sleep = false;
4844 }
4845 }
4846 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4847 {
4849
4850 /* if the xmax changed in the meantime, start over */
4851 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4853 xwait))
4854 goto l3;
4855 /* otherwise, we're good */
4856 require_sleep = false;
4857 }
4858 }
4859
4860 /*
4861 * As a check independent from those above, we can also avoid sleeping
4862 * if the current transaction is the sole locker of the tuple. Note
4863 * that the strength of the lock already held is irrelevant; this is
4864 * not about recording the lock in Xmax (which will be done regardless
4865 * of this optimization, below). Also, note that the cases where we
4866 * hold a lock stronger than we are requesting are already handled
4867 * above by not doing anything.
4868 *
4869 * Note we only deal with the non-multixact case here; MultiXactIdWait
4870 * is well equipped to deal with this situation on its own.
4871 */
4872 if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4874 {
4875 /* ... but if the xmax changed in the meantime, start over */
4877 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4879 xwait))
4880 goto l3;
4882 require_sleep = false;
4883 }
4884
4885 /*
4886 * Time to sleep on the other transaction/multixact, if necessary.
4887 *
4888 * If the other transaction is an update/delete that's already
4889 * committed, then sleeping cannot possibly do any good: if we're
4890 * required to sleep, get out to raise an error instead.
4891 *
4892 * By here, we either have already acquired the buffer exclusive lock,
4893 * or we must wait for the locking transaction or multixact; so below
4894 * we ensure that we grab buffer lock after the sleep.
4895 */
4896 if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4897 {
4899 goto failed;
4900 }
4901 else if (require_sleep)
4902 {
4903 /*
4904 * Acquire tuple lock to establish our priority for the tuple, or
4905 * die trying. LockTuple will release us when we are next-in-line
4906 * for the tuple. We must do this even if we are share-locking,
4907 * but not if we already have a weaker lock on the tuple.
4908 *
4909 * If we are forced to "start over" below, we keep the tuple lock;
4910 * this arranges that we stay at the head of the line while
4911 * rechecking tuple state.
4912 */
4913 if (!skip_tuple_lock &&
4914 !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4915 &have_tuple_lock))
4916 {
4917 /*
4918 * This can only happen if wait_policy is Skip and the lock
4919 * couldn't be obtained.
4920 */
4921 result = TM_WouldBlock;
4922 /* recovery code expects to have buffer lock held */
4924 goto failed;
4925 }
4926
4927 if (infomask & HEAP_XMAX_IS_MULTI)
4928 {
4930
4931 /* We only ever lock tuples, never update them */
4932 if (status >= MultiXactStatusNoKeyUpdate)
4933 elog(ERROR, "invalid lock mode in heap_lock_tuple");
4934
4935 /* wait for multixact to end, or die trying */
4936 switch (wait_policy)
4937 {
4938 case LockWaitBlock:
4939 MultiXactIdWait((MultiXactId) xwait, status, infomask,
4940 relation, &tuple->t_self, XLTW_Lock, NULL);
4941 break;
4942 case LockWaitSkip:
4944 status, infomask, relation,
4945 NULL, false))
4946 {
4947 result = TM_WouldBlock;
4948 /* recovery code expects to have buffer lock held */
4950 goto failed;
4951 }
4952 break;
4953 case LockWaitError:
4955 status, infomask, relation,
4956 NULL, log_lock_failure))
4957 ereport(ERROR,
4958 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4959 errmsg("could not obtain lock on row in relation \"%s\"",
4960 RelationGetRelationName(relation))));
4961
4962 break;
4963 }
4964
4965 /*
4966 * Of course, the multixact might not be done here: if we're
4967 * requesting a light lock mode, other transactions with light
4968 * locks could still be alive, as well as locks owned by our
4969 * own xact or other subxacts of this backend. We need to
4970 * preserve the surviving MultiXact members. Note that it
4971 * isn't absolutely necessary in the latter case, but doing so
4972 * is simpler.
4973 */
4974 }
4975 else
4976 {
4977 /* wait for regular transaction to end, or die trying */
4978 switch (wait_policy)
4979 {
4980 case LockWaitBlock:
4981 XactLockTableWait(xwait, relation, &tuple->t_self,
4982 XLTW_Lock);
4983 break;
4984 case LockWaitSkip:
4985 if (!ConditionalXactLockTableWait(xwait, false))
4986 {
4987 result = TM_WouldBlock;
4988 /* recovery code expects to have buffer lock held */
4990 goto failed;
4991 }
4992 break;
4993 case LockWaitError:
4995 ereport(ERROR,
4996 (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4997 errmsg("could not obtain lock on row in relation \"%s\"",
4998 RelationGetRelationName(relation))));
4999 break;
5000 }
5001 }
5002
5003 /* if there are updates, follow the update chain */
5004 if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
5005 {
5006 TM_Result res;
5007
5008 res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
5010 mode);
5011 if (res != TM_Ok)
5012 {
5013 result = res;
5014 /* recovery code expects to have buffer lock held */
5016 goto failed;
5017 }
5018 }
5019
5021
5022 /*
5023 * xwait is done, but if xwait had just locked the tuple then some
5024 * other xact could update this tuple before we get to this point.
5025 * Check for xmax change, and start over if so.
5026 */
5027 if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
5029 xwait))
5030 goto l3;
5031
5032 if (!(infomask & HEAP_XMAX_IS_MULTI))
5033 {
5034 /*
5035 * Otherwise check if it committed or aborted. Note we cannot
5036 * be here if the tuple was only locked by somebody who didn't
5037 * conflict with us; that would have been handled above. So
5038 * that transaction must necessarily be gone by now. But
5039 * don't check for this in the multixact case, because some
5040 * locker transactions might still be running.
5041 */
5042 UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
5043 }
5044 }
5045
5046 /* By here, we're certain that we hold buffer exclusive lock again */
5047
5048 /*
5049 * We may lock if previous xmax aborted, or if it committed but only
5050 * locked the tuple without updating it; or if we didn't have to wait
5051 * at all for whatever reason.
5052 */
5053 if (!require_sleep ||
5054 (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
5057 result = TM_Ok;
5058 else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
5059 result = TM_Updated;
5060 else
5061 result = TM_Deleted;
5062 }
5063
5064failed:
5065 if (result != TM_Ok)
5066 {
5067 Assert(result == TM_SelfModified || result == TM_Updated ||
5068 result == TM_Deleted || result == TM_WouldBlock);
5069
5070 /*
5071 * When locking a tuple under LockWaitSkip semantics and we fail with
5072 * TM_WouldBlock above, it's possible for concurrent transactions to
5073 * release the lock and set HEAP_XMAX_INVALID in the meantime. So
5074 * this assert is slightly different from the equivalent one in
5075 * heap_delete and heap_update.
5076 */
5077 Assert((result == TM_WouldBlock) ||
5078 !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
5079 Assert(result != TM_Updated ||
5080 !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
5081 tmfd->ctid = tuple->t_data->t_ctid;
5082 tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
5083 if (result == TM_SelfModified)
5084 tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
5085 else
5086 tmfd->cmax = InvalidCommandId;
5087 goto out_locked;
5088 }
5089
5090 /*
5091 * If we didn't pin the visibility map page and the page has become all
5092 * visible while we were busy locking the buffer, or during some
5093 * subsequent window during which we had it unlocked, we'll have to unlock
5094 * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
5095 * unfortunate, especially since we'll now have to recheck whether the
5096 * tuple has been locked or updated under us, but hopefully it won't
5097 * happen very often.
5098 */
5099 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
5100 {
5102 visibilitymap_pin(relation, block, &vmbuffer);
5104 goto l3;
5105 }
5106
5107 xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
5108 old_infomask = tuple->t_data->t_infomask;
5109
5110 /*
5111 * If this is the first possibly-multixact-able operation in the current
5112 * transaction, set my per-backend OldestMemberMXactId setting. We can be
5113 * certain that the transaction will never become a member of any older
5114 * MultiXactIds than that. (We have to do this even if we end up just
5115 * using our own TransactionId below, since some other backend could
5116 * incorporate our XID into a MultiXact immediately afterwards.)
5117 */
5119
5120 /*
5121 * Compute the new xmax and infomask to store into the tuple. Note we do
5122 * not modify the tuple just yet, because that would leave it in the wrong
5123 * state if multixact.c elogs.
5124 */
5125 compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
5126 GetCurrentTransactionId(), mode, false,
5127 &xid, &new_infomask, &new_infomask2);
5128
5130
5131 /*
5132 * Store transaction information of xact locking the tuple.
5133 *
5134 * Note: Cmax is meaningless in this context, so don't set it; this avoids
5135 * possibly generating a useless combo CID. Moreover, if we're locking a
5136 * previously updated tuple, it's important to preserve the Cmax.
5137 *
5138 * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
5139 * we would break the HOT chain.
5140 */
5141 tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
5142 tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
5143 tuple->t_data->t_infomask |= new_infomask;
5144 tuple->t_data->t_infomask2 |= new_infomask2;
5145 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5147 HeapTupleHeaderSetXmax(tuple->t_data, xid);
5148
5149 /*
5150 * Make sure there is no forward chain link in t_ctid. Note that in the
5151 * cases where the tuple has been updated, we must not overwrite t_ctid,
5152 * because it was set by the updater. Moreover, if the tuple has been
5153 * updated, we need to follow the update chain to lock the new versions of
5154 * the tuple as well.
5155 */
5156 if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
5157 tuple->t_data->t_ctid = *tid;
5158
5159 /* Clear only the all-frozen bit on visibility map if needed */
5160 if (PageIsAllVisible(page) &&
5161 visibilitymap_clear(relation, block, vmbuffer,
5163 cleared_all_frozen = true;
5164
5165
5166 MarkBufferDirty(*buffer);
5167
5168 /*
5169 * XLOG stuff. You might think that we don't need an XLOG record because
5170 * there is no state change worth restoring after a crash. You would be
5171 * wrong however: we have just written either a TransactionId or a
5172 * MultiXactId that may never have been seen on disk before, and we need
5173 * to make sure that there are XLOG entries covering those ID numbers.
5174 * Else the same IDs might be re-used after a crash, which would be
5175 * disastrous if this page made it to disk before the crash. Essentially
5176 * we have to enforce the WAL log-before-data rule even in this case.
5177 * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
5178 * entries for everything anyway.)
5179 */
5180 if (RelationNeedsWAL(relation))
5181 {
5182 xl_heap_lock xlrec;
5183 XLogRecPtr recptr;
5184
5187
5188 xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5189 xlrec.xmax = xid;
5190 xlrec.infobits_set = compute_infobits(new_infomask,
5191 tuple->t_data->t_infomask2);
5192 xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
5194
5195 /* we don't decode row locks atm, so no need to log the origin */
5196
5197 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
5198
5199 PageSetLSN(page, recptr);
5200 }
5201
5203
5204 result = TM_Ok;
5205
5206out_locked:
5208
5209out_unlocked:
5210 if (BufferIsValid(vmbuffer))
5211 ReleaseBuffer(vmbuffer);
5212
5213 /*
5214 * Don't update the visibility map here. Locking a tuple doesn't change
5215 * visibility info.
5216 */
5217
5218 /*
5219 * Now that we have successfully marked the tuple as locked, we can
5220 * release the lmgr tuple lock, if we had it.
5221 */
5222 if (have_tuple_lock)
5223 UnlockTupleTuplock(relation, tid, mode);
5224
5225 return result;
5226}
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:213
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining, bool logLockFailure)
Definition: heapam.c:7751
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:6007
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4495
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:393
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:39
#define SizeOfHeapLock
Definition: heapam_xlog.h:404
static bool HEAP_XMAX_IS_SHR_LOCKED(int16 infomask)
Definition: htup_details.h:263
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:289
static bool HEAP_XMAX_IS_EXCL_LOCKED(int16 infomask)
Definition: htup_details.h:269
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
Definition: lmgr.c:736
@ XLTW_Lock
Definition: lmgr.h:29
bool log_lock_failure
Definition: lock.c:54
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1299
static PgChecksumMode mode
Definition: pg_checksums.c:55
#define RelationGetRelationName(relation)
Definition: rel.h:550
uint8 infobits_set
Definition: heapam_xlog.h:400
OffsetNumber offnum
Definition: heapam_xlog.h:399
TransactionId xmax
Definition: heapam_xlog.h:398
@ TM_WouldBlock
Definition: tableam.h:102
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED(), HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED(), HeapTupleHeaderClearHotUpdated(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax(), HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, log_lock_failure, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2325 of file heapam.c.

2327{
2329 HeapTuple *heaptuples;
2330 int i;
2331 int ndone;
2332 PGAlignedBlock scratch;
2333 Page page;
2334 Buffer vmbuffer = InvalidBuffer;
2335 bool needwal;
2336 Size saveFreeSpace;
2337 bool need_tuple_data = RelationIsLogicallyLogged(relation);
2338 bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2339 bool starting_with_empty_page = false;
2340 int npages = 0;
2341 int npages_used = 0;
2342
2343 /* currently not needed (thus unsupported) for heap_multi_insert() */
2345
2346 needwal = RelationNeedsWAL(relation);
2347 saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2349
2350 /* Toast and set header data in all the slots */
2351 heaptuples = palloc(ntuples * sizeof(HeapTuple));
2352 for (i = 0; i < ntuples; i++)
2353 {
2354 HeapTuple tuple;
2355
2356 tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2357 slots[i]->tts_tableOid = RelationGetRelid(relation);
2358 tuple->t_tableOid = slots[i]->tts_tableOid;
2359 heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2360 options);
2361 }
2362
2363 /*
2364 * We're about to do the actual inserts -- but check for conflict first,
2365 * to minimize the possibility of having to roll back work we've just
2366 * done.
2367 *
2368 * A check here does not definitively prevent a serialization anomaly;
2369 * that check MUST be done at least past the point of acquiring an
2370 * exclusive buffer content lock on every buffer that will be affected,
2371 * and MAY be done after all inserts are reflected in the buffers and
2372 * those locks are released; otherwise there is a race condition. Since
2373 * multiple buffers can be locked and unlocked in the loop below, and it
2374 * would not be feasible to identify and lock all of those buffers before
2375 * the loop, we must do a final check at the end.
2376 *
2377 * The check here could be omitted with no loss of correctness; it is
2378 * present strictly as an optimization.
2379 *
2380 * For heap inserts, we only need to check for table-level SSI locks. Our
2381 * new tuples can't possibly conflict with existing tuple locks, and heap
2382 * page locks are only consolidated versions of tuple locks; they do not
2383 * lock "gaps" as index page locks do. So we don't need to specify a
2384 * buffer when making the call, which makes for a faster check.
2385 */
2387
2388 ndone = 0;
2389 while (ndone < ntuples)
2390 {
2391 Buffer buffer;
2392 bool all_visible_cleared = false;
2393 bool all_frozen_set = false;
2394 int nthispage;
2395
2397
2398 /*
2399 * Compute number of pages needed to fit the to-be-inserted tuples in
2400 * the worst case. This will be used to determine how much to extend
2401 * the relation by in RelationGetBufferForTuple(), if needed. If we
2402 * filled a prior page from scratch, we can just update our last
2403 * computation, but if we started with a partially filled page,
2404 * recompute from scratch, the number of potentially required pages
2405 * can vary due to tuples needing to fit onto the page, page headers
2406 * etc.
2407 */
2408 if (ndone == 0 || !starting_with_empty_page)
2409 {
2410 npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2411 saveFreeSpace);
2412 npages_used = 0;
2413 }
2414 else
2415 npages_used++;
2416
2417 /*
2418 * Find buffer where at least the next tuple will fit. If the page is
2419 * all-visible, this will also pin the requisite visibility map page.
2420 *
2421 * Also pin visibility map page if COPY FREEZE inserts tuples into an
2422 * empty page. See all_frozen_set below.
2423 */
2424 buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2425 InvalidBuffer, options, bistate,
2426 &vmbuffer, NULL,
2427 npages - npages_used);
2428 page = BufferGetPage(buffer);
2429
2430 starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2431
2432 if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2433 all_frozen_set = true;
2434
2435 /* NO EREPORT(ERROR) from here till changes are logged */
2437
2438 /*
2439 * RelationGetBufferForTuple has ensured that the first tuple fits.
2440 * Put that on the page, and then as many other tuples as fit.
2441 */
2442 RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2443
2444 /*
2445 * For logical decoding we need combo CIDs to properly decode the
2446 * catalog.
2447 */
2448 if (needwal && need_cids)
2449 log_heap_new_cid(relation, heaptuples[ndone]);
2450
2451 for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2452 {
2453 HeapTuple heaptup = heaptuples[ndone + nthispage];
2454
2455 if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2456 break;
2457
2458 RelationPutHeapTuple(relation, buffer, heaptup, false);
2459
2460 /*
2461 * For logical decoding we need combo CIDs to properly decode the
2462 * catalog.
2463 */
2464 if (needwal && need_cids)
2465 log_heap_new_cid(relation, heaptup);
2466 }
2467
2468 /*
2469 * If the page is all visible, need to clear that, unless we're only
2470 * going to add further frozen rows to it.
2471 *
2472 * If we're only adding already frozen rows to a previously empty
2473 * page, mark it as all-visible.
2474 */
2476 {
2477 all_visible_cleared = true;
2478 PageClearAllVisible(page);
2479 visibilitymap_clear(relation,
2480 BufferGetBlockNumber(buffer),
2481 vmbuffer, VISIBILITYMAP_VALID_BITS);
2482 }
2483 else if (all_frozen_set)
2484 PageSetAllVisible(page);
2485
2486 /*
2487 * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2488 */
2489
2490 MarkBufferDirty(buffer);
2491
2492 /* XLOG stuff */
2493 if (needwal)
2494 {
2495 XLogRecPtr recptr;
2496 xl_heap_multi_insert *xlrec;
2498 char *tupledata;
2499 int totaldatalen;
2500 char *scratchptr = scratch.data;
2501 bool init;
2502 int bufflags = 0;
2503
2504 /*
2505 * If the page was previously empty, we can reinit the page
2506 * instead of restoring the whole thing.
2507 */
2508 init = starting_with_empty_page;
2509
2510 /* allocate xl_heap_multi_insert struct from the scratch area */
2511 xlrec = (xl_heap_multi_insert *) scratchptr;
2512 scratchptr += SizeOfHeapMultiInsert;
2513
2514 /*
2515 * Allocate offsets array. Unless we're reinitializing the page,
2516 * in that case the tuples are stored in order starting at
2517 * FirstOffsetNumber and we don't need to store the offsets
2518 * explicitly.
2519 */
2520 if (!init)
2521 scratchptr += nthispage * sizeof(OffsetNumber);
2522
2523 /* the rest of the scratch space is used for tuple data */
2524 tupledata = scratchptr;
2525
2526 /* check that the mutually exclusive flags are not both set */
2527 Assert(!(all_visible_cleared && all_frozen_set));
2528
2529 xlrec->flags = 0;
2530 if (all_visible_cleared)
2532 if (all_frozen_set)
2534
2535 xlrec->ntuples = nthispage;
2536
2537 /*
2538 * Write out an xl_multi_insert_tuple and the tuple data itself
2539 * for each tuple.
2540 */
2541 for (i = 0; i < nthispage; i++)
2542 {
2543 HeapTuple heaptup = heaptuples[ndone + i];
2544 xl_multi_insert_tuple *tuphdr;
2545 int datalen;
2546
2547 if (!init)
2548 xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2549 /* xl_multi_insert_tuple needs two-byte alignment. */
2550 tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2551 scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2552
2553 tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2554 tuphdr->t_infomask = heaptup->t_data->t_infomask;
2555 tuphdr->t_hoff = heaptup->t_data->t_hoff;
2556
2557 /* write bitmap [+ padding] [+ oid] + data */
2558 datalen = heaptup->t_len - SizeofHeapTupleHeader;
2559 memcpy(scratchptr,
2560 (char *) heaptup->t_data + SizeofHeapTupleHeader,
2561 datalen);
2562 tuphdr->datalen = datalen;
2563 scratchptr += datalen;
2564 }
2565 totaldatalen = scratchptr - tupledata;
2566 Assert((scratchptr - scratch.data) < BLCKSZ);
2567
2568 if (need_tuple_data)
2570
2571 /*
2572 * Signal that this is the last xl_heap_multi_insert record
2573 * emitted by this call to heap_multi_insert(). Needed for logical
2574 * decoding so it knows when to cleanup temporary data.
2575 */
2576 if (ndone + nthispage == ntuples)
2578
2579 if (init)
2580 {
2581 info |= XLOG_HEAP_INIT_PAGE;
2582 bufflags |= REGBUF_WILL_INIT;
2583 }
2584
2585 /*
2586 * If we're doing logical decoding, include the new tuple data
2587 * even if we take a full-page image of the page.
2588 */
2589 if (need_tuple_data)
2590 bufflags |= REGBUF_KEEP_DATA;
2591
2593 XLogRegisterData(xlrec, tupledata - scratch.data);
2594 XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2595
2596 XLogRegisterBufData(0, tupledata, totaldatalen);
2597
2598 /* filtering by origin on a row level is much more efficient */
2600
2601 recptr = XLogInsert(RM_HEAP2_ID, info);
2602
2603 PageSetLSN(page, recptr);
2604 }
2605
2607
2608 /*
2609 * If we've frozen everything on the page, update the visibilitymap.
2610 * We're already holding pin on the vmbuffer.
2611 */
2612 if (all_frozen_set)
2613 {
2614 Assert(PageIsAllVisible(page));
2616
2617 /*
2618 * It's fine to use InvalidTransactionId here - this is only used
2619 * when HEAP_INSERT_FROZEN is specified, which intentionally
2620 * violates visibility rules.
2621 */
2622 visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2623 InvalidXLogRecPtr, vmbuffer,
2626 }
2627
2628 UnlockReleaseBuffer(buffer);
2629 ndone += nthispage;
2630
2631 /*
2632 * NB: Only release vmbuffer after inserting all tuples - it's fairly
2633 * likely that we'll insert into subsequent heap pages that are likely
2634 * to use the same vm page.
2635 */
2636 }
2637
2638 /* We're done with inserting all tuples, so release the last vmbuffer. */
2639 if (vmbuffer != InvalidBuffer)
2640 ReleaseBuffer(vmbuffer);
2641
2642 /*
2643 * We're done with the actual inserts. Check for conflicts again, to
2644 * ensure that all rw-conflicts in to these inserts are detected. Without
2645 * this final check, a sequential scan of the heap may have locked the
2646 * table after the "before" check, missing one opportunity to detect the
2647 * conflict, and then scanned the table before the new tuples were there,
2648 * missing the other chance to detect the conflict.
2649 *
2650 * For heap inserts, we only need to check for table-level SSI locks. Our
2651 * new tuples can't possibly conflict with existing tuple locks, and heap
2652 * page locks are only consolidated versions of tuple locks; they do not
2653 * lock "gaps" as index page locks do. So we don't need to specify a
2654 * buffer when making the call.
2655 */
2657
2658 /*
2659 * If tuples are cachable, mark them for invalidation from the caches in
2660 * case we abort. Note it is OK to do this after releasing the buffer,
2661 * because the heaptuples data structure is all in local memory, not in
2662 * the shared buffer.
2663 */
2664 if (IsCatalogRelation(relation))
2665 {
2666 for (i = 0; i < ntuples; i++)
2667 CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2668 }
2669
2670 /* copy t_self fields back to the caller's slots */
2671 for (i = 0; i < ntuples; i++)
2672 slots[i]->tts_tid = heaptuples[i]->t_self;
2673
2674 pgstat_count_heap_insert(relation, ntuples);
2675}
Size PageGetHeapFreeSpace(const PageData *page)
Definition: bufpage.c:990
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
#define MAXALIGN(LEN)
Definition: c.h:782
#define SHORTALIGN(LEN)
Definition: c.h:778
size_t Size
Definition: c.h:576
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1833
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2293
#define HEAP_INSERT_FROZEN
Definition: heapam.h:37
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:188
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:64
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:79
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:199
int init
Definition: isn.c:79
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:389
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:360
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
uint8 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 350 of file pruneheap.c.

359{
360 Page page = BufferGetPage(buffer);
361 BlockNumber blockno = BufferGetBlockNumber(buffer);
362 OffsetNumber offnum,
363 maxoff;
364 PruneState prstate;
365 HeapTupleData tup;
366 bool do_freeze;
367 bool do_prune;
368 bool do_hint;
369 bool hint_bit_fpi;
370 int64 fpi_before = pgWalUsage.wal_fpi;
371
372 /* Copy parameters to prstate */
373 prstate.vistest = vistest;
375 prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
376 prstate.cutoffs = cutoffs;
377
378 /*
379 * Our strategy is to scan the page and make lists of items to change,
380 * then apply the changes within a critical section. This keeps as much
381 * logic as possible out of the critical section, and also ensures that
382 * WAL replay will work the same as the normal case.
383 *
384 * First, initialize the new pd_prune_xid value to zero (indicating no
385 * prunable tuples). If we find any tuples which may soon become
386 * prunable, we will save the lowest relevant XID in new_prune_xid. Also
387 * initialize the rest of our working state.
388 */
391 prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
392 prstate.nroot_items = 0;
393 prstate.nheaponly_items = 0;
394
395 /* initialize page freezing working state */
396 prstate.pagefrz.freeze_required = false;
397 if (prstate.freeze)
398 {
399 Assert(new_relfrozen_xid && new_relmin_mxid);
400 prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
401 prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
402 prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
403 prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
404 }
405 else
406 {
407 Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
412 }
413
414 prstate.ndeleted = 0;
415 prstate.live_tuples = 0;
416 prstate.recently_dead_tuples = 0;
417 prstate.hastup = false;
418 prstate.lpdead_items = 0;
419 prstate.deadoffsets = presult->deadoffsets;
420
421 /*
422 * Caller may update the VM after we're done. We can keep track of
423 * whether the page will be all-visible and all-frozen after pruning and
424 * freezing to help the caller to do that.
425 *
426 * Currently, only VACUUM sets the VM bits. To save the effort, only do
427 * the bookkeeping if the caller needs it. Currently, that's tied to
428 * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
429 * to update the VM bits without also freezing or freeze without also
430 * setting the VM bits.
431 *
432 * In addition to telling the caller whether it can set the VM bit, we
433 * also use 'all_visible' and 'all_frozen' for our own decision-making. If
434 * the whole page would become frozen, we consider opportunistically
435 * freezing tuples. We will not be able to freeze the whole page if there
436 * are tuples present that are not visible to everyone or if there are
437 * dead tuples which are not yet removable. However, dead tuples which
438 * will be removed by the end of vacuuming should not preclude us from
439 * opportunistically freezing. Because of that, we do not clear
440 * all_visible when we see LP_DEAD items. We fix that at the end of the
441 * function, when we return the value to the caller, so that the caller
442 * doesn't set the VM bit incorrectly.
443 */
444 if (prstate.freeze)
445 {
446 prstate.all_visible = true;
447 prstate.all_frozen = true;
448 }
449 else
450 {
451 /*
452 * Initializing to false allows skipping the work to update them in
453 * heap_prune_record_unchanged_lp_normal().
454 */
455 prstate.all_visible = false;
456 prstate.all_frozen = false;
457 }
458
459 /*
460 * The visibility cutoff xid is the newest xmin of live tuples on the
461 * page. In the common case, this will be set as the conflict horizon the
462 * caller can use for updating the VM. If, at the end of freezing and
463 * pruning, the page is all-frozen, there is no possibility that any
464 * running transaction on the standby does not see tuples on the page as
465 * all-visible, so the conflict horizon remains InvalidTransactionId.
466 */
468
469 maxoff = PageGetMaxOffsetNumber(page);
470 tup.t_tableOid = RelationGetRelid(relation);
471
472 /*
473 * Determine HTSV for all tuples, and queue them up for processing as HOT
474 * chain roots or as heap-only items.
475 *
476 * Determining HTSV only once for each tuple is required for correctness,
477 * to deal with cases where running HTSV twice could result in different
478 * results. For example, RECENTLY_DEAD can turn to DEAD if another
479 * checked item causes GlobalVisTestIsRemovableFullXid() to update the
480 * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
481 * transaction aborts.
482 *
483 * It's also good for performance. Most commonly tuples within a page are
484 * stored at decreasing offsets (while the items are stored at increasing
485 * offsets). When processing all tuples on a page this leads to reading
486 * memory at decreasing offsets within a page, with a variable stride.
487 * That's hard for CPU prefetchers to deal with. Processing the items in
488 * reverse order (and thus the tuples in increasing order) increases
489 * prefetching efficiency significantly / decreases the number of cache
490 * misses.
491 */
492 for (offnum = maxoff;
493 offnum >= FirstOffsetNumber;
494 offnum = OffsetNumberPrev(offnum))
495 {
496 ItemId itemid = PageGetItemId(page, offnum);
497 HeapTupleHeader htup;
498
499 /*
500 * Set the offset number so that we can display it along with any
501 * error that occurred while processing this tuple.
502 */
503 *off_loc = offnum;
504
505 prstate.processed[offnum] = false;
506 prstate.htsv[offnum] = -1;
507
508 /* Nothing to do if slot doesn't contain a tuple */
509 if (!ItemIdIsUsed(itemid))
510 {
511 heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
512 continue;
513 }
514
515 if (ItemIdIsDead(itemid))
516 {
517 /*
518 * If the caller set mark_unused_now true, we can set dead line
519 * pointers LP_UNUSED now.
520 */
521 if (unlikely(prstate.mark_unused_now))
522 heap_prune_record_unused(&prstate, offnum, false);
523 else
524 heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
525 continue;
526 }
527
528 if (ItemIdIsRedirected(itemid))
529 {
530 /* This is the start of a HOT chain */
531 prstate.root_items[prstate.nroot_items++] = offnum;
532 continue;
533 }
534
535 Assert(ItemIdIsNormal(itemid));
536
537 /*
538 * Get the tuple's visibility status and queue it up for processing.
539 */
540 htup = (HeapTupleHeader) PageGetItem(page, itemid);
541 tup.t_data = htup;
542 tup.t_len = ItemIdGetLength(itemid);
543 ItemPointerSet(&tup.t_self, blockno, offnum);
544
545 prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
546 buffer);
547
548 if (!HeapTupleHeaderIsHeapOnly(htup))
549 prstate.root_items[prstate.nroot_items++] = offnum;
550 else
551 prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
552 }
553
554 /*
555 * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
556 * an FPI to be emitted.
557 */
558 hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
559
560 /*
561 * Process HOT chains.
562 *
563 * We added the items to the array starting from 'maxoff', so by
564 * processing the array in reverse order, we process the items in
565 * ascending offset number order. The order doesn't matter for
566 * correctness, but some quick micro-benchmarking suggests that this is
567 * faster. (Earlier PostgreSQL versions, which scanned all the items on
568 * the page instead of using the root_items array, also did it in
569 * ascending offset number order.)
570 */
571 for (int i = prstate.nroot_items - 1; i >= 0; i--)
572 {
573 offnum = prstate.root_items[i];
574
575 /* Ignore items already processed as part of an earlier chain */
576 if (prstate.processed[offnum])
577 continue;
578
579 /* see preceding loop */
580 *off_loc = offnum;
581
582 /* Process this item or chain of items */
583 heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
584 }
585
586 /*
587 * Process any heap-only tuples that were not already processed as part of
588 * a HOT chain.
589 */
590 for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
591 {
592 offnum = prstate.heaponly_items[i];
593
594 if (prstate.processed[offnum])
595 continue;
596
597 /* see preceding loop */
598 *off_loc = offnum;
599
600 /*
601 * If the tuple is DEAD and doesn't chain to anything else, mark it
602 * unused. (If it does chain, we can only remove it as part of
603 * pruning its chain.)
604 *
605 * We need this primarily to handle aborted HOT updates, that is,
606 * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607 * chain, since the parent tuple might be re-updated before any
608 * pruning occurs. So we have to be able to reap them separately from
609 * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610 * return true for an XMIN_INVALID tuple, so this code will work even
611 * when there were sequential updates within the aborted transaction.)
612 */
613 if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
614 {
615 ItemId itemid = PageGetItemId(page, offnum);
616 HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617
619 {
621 &prstate.latest_xid_removed);
622 heap_prune_record_unused(&prstate, offnum, true);
623 }
624 else
625 {
626 /*
627 * This tuple should've been processed and removed as part of
628 * a HOT chain, so something's wrong. To preserve evidence,
629 * we don't dare to remove it. We cannot leave behind a DEAD
630 * tuple either, because that will cause VACUUM to error out.
631 * Throwing an error with a distinct error message seems like
632 * the least bad option.
633 */
634 elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635 blockno, offnum);
636 }
637 }
638 else
639 heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
640 }
641
642 /* We should now have processed every tuple exactly once */
643#ifdef USE_ASSERT_CHECKING
644 for (offnum = FirstOffsetNumber;
645 offnum <= maxoff;
646 offnum = OffsetNumberNext(offnum))
647 {
648 *off_loc = offnum;
649
650 Assert(prstate.processed[offnum]);
651 }
652#endif
653
654 /* Clear the offset information once we have processed the given page. */
655 *off_loc = InvalidOffsetNumber;
656
657 do_prune = prstate.nredirected > 0 ||
658 prstate.ndead > 0 ||
659 prstate.nunused > 0;
660
661 /*
662 * Even if we don't prune anything, if we found a new value for the
663 * pd_prune_xid field or the page was marked full, we will update the hint
664 * bit.
665 */
666 do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
667 PageIsFull(page);
668
669 /*
670 * Decide if we want to go ahead with freezing according to the freeze
671 * plans we prepared, or not.
672 */
673 do_freeze = false;
674 if (prstate.freeze)
675 {
676 if (prstate.pagefrz.freeze_required)
677 {
678 /*
679 * heap_prepare_freeze_tuple indicated that at least one XID/MXID
680 * from before FreezeLimit/MultiXactCutoff is present. Must
681 * freeze to advance relfrozenxid/relminmxid.
682 */
683 do_freeze = true;
684 }
685 else
686 {
687 /*
688 * Opportunistically freeze the page if we are generating an FPI
689 * anyway and if doing so means that we can set the page
690 * all-frozen afterwards (might not happen until VACUUM's final
691 * heap pass).
692 *
693 * XXX: Previously, we knew if pruning emitted an FPI by checking
694 * pgWalUsage.wal_fpi before and after pruning. Once the freeze
695 * and prune records were combined, this heuristic couldn't be
696 * used anymore. The opportunistic freeze heuristic must be
697 * improved; however, for now, try to approximate the old logic.
698 */
699 if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
700 {
701 /*
702 * Freezing would make the page all-frozen. Have already
703 * emitted an FPI or will do so anyway?
704 */
705 if (RelationNeedsWAL(relation))
706 {
707 if (hint_bit_fpi)
708 do_freeze = true;
709 else if (do_prune)
710 {
711 if (XLogCheckBufferNeedsBackup(buffer))
712 do_freeze = true;
713 }
714 else if (do_hint)
715 {
717 do_freeze = true;
718 }
719 }
720 }
721 }
722 }
723
724 if (do_freeze)
725 {
726 /*
727 * Validate the tuples we will be freezing before entering the
728 * critical section.
729 */
730 heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
731 }
732 else if (prstate.nfrozen > 0)
733 {
734 /*
735 * The page contained some tuples that were not already frozen, and we
736 * chose not to freeze them now. The page won't be all-frozen then.
737 */
739
740 prstate.all_frozen = false;
741 prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
742 }
743 else
744 {
745 /*
746 * We have no freeze plans to execute. The page might already be
747 * all-frozen (perhaps only following pruning), though. Such pages
748 * can be marked all-frozen in the VM by our caller, even though none
749 * of its tuples were newly frozen here.
750 */
751 }
752
753 /* Any error while applying the changes is critical */
755
756 if (do_hint)
757 {
758 /*
759 * Update the page's pd_prune_xid field to either zero, or the lowest
760 * XID of any soon-prunable tuple.
761 */
762 ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
763
764 /*
765 * Also clear the "page is full" flag, since there's no point in
766 * repeating the prune/defrag process until something else happens to
767 * the page.
768 */
769 PageClearFull(page);
770
771 /*
772 * If that's all we had to do to the page, this is a non-WAL-logged
773 * hint. If we are going to freeze or prune the page, we will mark
774 * the buffer dirty below.
775 */
776 if (!do_freeze && !do_prune)
777 MarkBufferDirtyHint(buffer, true);
778 }
779
780 if (do_prune || do_freeze)
781 {
782 /* Apply the planned item changes and repair page fragmentation. */
783 if (do_prune)
784 {
785 heap_page_prune_execute(buffer, false,
786 prstate.redirected, prstate.nredirected,
787 prstate.nowdead, prstate.ndead,
788 prstate.nowunused, prstate.nunused);
789 }
790
791 if (do_freeze)
792 heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
793
794 MarkBufferDirty(buffer);
795
796 /*
797 * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
798 */
799 if (RelationNeedsWAL(relation))
800 {
801 /*
802 * The snapshotConflictHorizon for the whole record should be the
803 * most conservative of all the horizons calculated for any of the
804 * possible modifications. If this record will prune tuples, any
805 * transactions on the standby older than the youngest xmax of the
806 * most recently removed tuple this record will prune will
807 * conflict. If this record will freeze tuples, any transactions
808 * on the standby with xids older than the youngest tuple this
809 * record will freeze will conflict.
810 */
811 TransactionId frz_conflict_horizon = InvalidTransactionId;
812 TransactionId conflict_xid;
813
814 /*
815 * We can use the visibility_cutoff_xid as our cutoff for
816 * conflicts when the whole page is eligible to become all-frozen
817 * in the VM once we're done with it. Otherwise we generate a
818 * conservative cutoff by stepping back from OldestXmin.
819 */
820 if (do_freeze)
821 {
822 if (prstate.all_visible && prstate.all_frozen)
823 frz_conflict_horizon = prstate.visibility_cutoff_xid;
824 else
825 {
826 /* Avoids false conflicts when hot_standby_feedback in use */
827 frz_conflict_horizon = prstate.cutoffs->OldestXmin;
828 TransactionIdRetreat(frz_conflict_horizon);
829 }
830 }
831
832 if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
833 conflict_xid = frz_conflict_horizon;
834 else
835 conflict_xid = prstate.latest_xid_removed;
836
837 log_heap_prune_and_freeze(relation, buffer,
838 conflict_xid,
839 true, reason,
840 prstate.frozen, prstate.nfrozen,
841 prstate.redirected, prstate.nredirected,
842 prstate.nowdead, prstate.ndead,
843 prstate.nowunused, prstate.nunused);
844 }
845 }
846
848
849 /* Copy information back for caller */
850 presult->ndeleted = prstate.ndeleted;
851 presult->nnewlpdead = prstate.ndead;
852 presult->nfrozen = prstate.nfrozen;
853 presult->live_tuples = prstate.live_tuples;
855
856 /*
857 * It was convenient to ignore LP_DEAD items in all_visible earlier on to
858 * make the choice of whether or not to freeze the page unaffected by the
859 * short-term presence of LP_DEAD items. These LP_DEAD items were
860 * effectively assumed to be LP_UNUSED items in the making. It doesn't
861 * matter which vacuum heap pass (initial pass or final pass) ends up
862 * setting the page all-frozen, as long as the ongoing VACUUM does it.
863 *
864 * Now that freezing has been finalized, unset all_visible if there are
865 * any LP_DEAD items on the page. It needs to reflect the present state
866 * of the page, as expected by our caller.
867 */
868 if (prstate.all_visible && prstate.lpdead_items == 0)
869 {
870 presult->all_visible = prstate.all_visible;
871 presult->all_frozen = prstate.all_frozen;
872 }
873 else
874 {
875 presult->all_visible = false;
876 presult->all_frozen = false;
877 }
878
879 presult->hastup = prstate.hastup;
880
881 /*
882 * For callers planning to update the visibility map, the conflict horizon
883 * for that record must be the newest xmin on the page. However, if the
884 * page is completely frozen, there can be no conflict and the
885 * vm_conflict_horizon should remain InvalidTransactionId. This includes
886 * the case that we just froze all the tuples; the prune-freeze record
887 * included the conflict XID already so the caller doesn't need it.
888 */
889 if (presult->all_frozen)
891 else
893
894 presult->lpdead_items = prstate.lpdead_items;
895 /* the presult->deadoffsets array was already filled in */
896
897 if (prstate.freeze)
898 {
899 if (presult->nfrozen > 0)
900 {
901 *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
902 *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
903 }
904 else
905 {
906 *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
907 *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
908 }
909 }
910}
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5367
static void PageClearFull(Page page)
Definition: bufpage.h:423
static bool PageIsFull(const PageData *page)
Definition: bufpage.h:413
#define likely(x)
Definition: c.h:346
int64_t int64
Definition: c.h:499
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7336
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:7283
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:43
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:42
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:999
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1508
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1297
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1330
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1319
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:917
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1561
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:218
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:206
bool freeze_required
Definition: heapam.h:180
MultiXactId FreezePageRelminMxid
Definition: heapam.h:207
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:217
int recently_dead_tuples
Definition: heapam.h:233
TransactionId vm_conflict_horizon
Definition: heapam.h:248
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:262
bool all_visible
Definition: heapam.h:246
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:274
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert(), BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly(), HeapTupleHeaderIsHotUpdated(), PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1561 of file pruneheap.c.

1565{
1566 Page page = (Page) BufferGetPage(buffer);
1567 OffsetNumber *offnum;
1569
1570 /* Shouldn't be called unless there's something to do */
1571 Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1572
1573 /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1574 Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1575
1576 /* Update all redirected line pointers */
1577 offnum = redirected;
1578 for (int i = 0; i < nredirected; i++)
1579 {
1580 OffsetNumber fromoff = *offnum++;
1581 OffsetNumber tooff = *offnum++;
1582 ItemId fromlp = PageGetItemId(page, fromoff);
1584
1585#ifdef USE_ASSERT_CHECKING
1586
1587 /*
1588 * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1589 * must be the first item from a HOT chain. If the item has tuple
1590 * storage then it can't be a heap-only tuple. Otherwise we are just
1591 * maintaining an existing LP_REDIRECT from an existing HOT chain that
1592 * has been pruned at least once before now.
1593 */
1594 if (!ItemIdIsRedirected(fromlp))
1595 {
1596 Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1597
1598 htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1600 }
1601 else
1602 {
1603 /* We shouldn't need to redundantly set the redirect */
1604 Assert(ItemIdGetRedirect(fromlp) != tooff);
1605 }
1606
1607 /*
1608 * The item that we're about to set as an LP_REDIRECT (the 'from'
1609 * item) will point to an existing item (the 'to' item) that is
1610 * already a heap-only tuple. There can be at most one LP_REDIRECT
1611 * item per HOT chain.
1612 *
1613 * We need to keep around an LP_REDIRECT item (after original
1614 * non-heap-only root tuple gets pruned away) so that it's always
1615 * possible for VACUUM to easily figure out what TID to delete from
1616 * indexes when an entire HOT chain becomes dead. A heap-only tuple
1617 * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1618 * tuple can.
1619 *
1620 * This check may miss problems, e.g. the target of a redirect could
1621 * be marked as unused subsequently. The page_verify_redirects() check
1622 * below will catch such problems.
1623 */
1624 tolp = PageGetItemId(page, tooff);
1625 Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1626 htup = (HeapTupleHeader) PageGetItem(page, tolp);
1628#endif
1629
1630 ItemIdSetRedirect(fromlp, tooff);
1631 }
1632
1633 /* Update all now-dead line pointers */
1634 offnum = nowdead;
1635 for (int i = 0; i < ndead; i++)
1636 {
1637 OffsetNumber off = *offnum++;
1638 ItemId lp = PageGetItemId(page, off);
1639
1640#ifdef USE_ASSERT_CHECKING
1641
1642 /*
1643 * An LP_DEAD line pointer must be left behind when the original item
1644 * (which is dead to everybody) could still be referenced by a TID in
1645 * an index. This should never be necessary with any individual
1646 * heap-only tuple item, though. (It's not clear how much of a problem
1647 * that would be, but there is no reason to allow it.)
1648 */
1649 if (ItemIdHasStorage(lp))
1650 {
1652 htup = (HeapTupleHeader) PageGetItem(page, lp);
1654 }
1655 else
1656 {
1657 /* Whole HOT chain becomes dead */
1659 }
1660#endif
1661
1662 ItemIdSetDead(lp);
1663 }
1664
1665 /* Update all now-unused line pointers */
1666 offnum = nowunused;
1667 for (int i = 0; i < nunused; i++)
1668 {
1669 OffsetNumber off = *offnum++;
1670 ItemId lp = PageGetItemId(page, off);
1671
1672#ifdef USE_ASSERT_CHECKING
1673
1674 if (lp_truncate_only)
1675 {
1676 /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1678 }
1679 else
1680 {
1681 /*
1682 * When heap_page_prune_and_freeze() was called, mark_unused_now
1683 * may have been passed as true, which allows would-be LP_DEAD
1684 * items to be made LP_UNUSED instead. This is only possible if
1685 * the relation has no indexes. If there are any dead items, then
1686 * mark_unused_now was not true and every item being marked
1687 * LP_UNUSED must refer to a heap-only tuple.
1688 */
1689 if (ndead > 0)
1690 {
1692 htup = (HeapTupleHeader) PageGetItem(page, lp);
1694 }
1695 else
1696 Assert(ItemIdIsUsed(lp));
1697 }
1698
1699#endif
1700
1701 ItemIdSetUnused(lp);
1702 }
1703
1704 if (lp_truncate_only)
1706 else
1707 {
1708 /*
1709 * Finally, repair any fragmentation, and update the page's hint bit
1710 * about whether it has free pointers.
1711 */
1713
1714 /*
1715 * Now that the page has been modified, assert that redirect items
1716 * still point to valid targets.
1717 */
1719 }
1720}
void PageRepairFragmentation(Page page)
Definition: bufpage.c:698
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:834
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:224
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1737

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly(), i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194{
195 Page page = BufferGetPage(buffer);
196 TransactionId prune_xid;
197 GlobalVisState *vistest;
198 Size minfree;
199
200 /*
201 * We can't write WAL in recovery mode, so there's no point trying to
202 * clean the page. The primary will likely issue a cleaning WAL record
203 * soon anyway, so this is no particular loss.
204 */
205 if (RecoveryInProgress())
206 return;
207
208 /*
209 * First check whether there's any chance there's something to prune,
210 * determining the appropriate horizon is a waste if there's no prune_xid
211 * (i.e. no updates/deletes left potentially dead tuples around).
212 */
213 prune_xid = ((PageHeader) page)->pd_prune_xid;
214 if (!TransactionIdIsValid(prune_xid))
215 return;
216
217 /*
218 * Check whether prune_xid indicates that there may be dead rows that can
219 * be cleaned up.
220 */
221 vistest = GlobalVisTestFor(relation);
222
223 if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224 return;
225
226 /*
227 * We prune when a previous UPDATE failed to find enough space on the page
228 * for a new tuple version, or when free space falls below the relation's
229 * fill-factor target (but not less than 10%).
230 *
231 * Checking free space here is questionable since we aren't holding any
232 * lock on the buffer; in the worst case we could get a bogus answer. It's
233 * unlikely to be *seriously* wrong, though, since reading either pd_lower
234 * or pd_upper is probably atomic. Avoiding taking a lock seems more
235 * important than sometimes getting a wrong answer in what is after all
236 * just a heuristic estimate.
237 */
238 minfree = RelationGetTargetPageFreeSpace(relation,
240 minfree = Max(minfree, BLCKSZ / 10);
241
242 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243 {
244 /* OK, try to get exclusive buffer lock */
246 return;
247
248 /*
249 * Now that we have buffer lock, get accurate information about the
250 * page's free space, and recheck the heuristic about whether to
251 * prune.
252 */
253 if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254 {
255 OffsetNumber dummy_off_loc;
256 PruneFreezeResult presult;
257
258 /*
259 * For now, pass mark_unused_now as false regardless of whether or
260 * not the relation has indexes, since we cannot safely determine
261 * that during on-access pruning with the current implementation.
262 */
263 heap_page_prune_and_freeze(relation, buffer, vistest, 0,
264 NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
265
266 /*
267 * Report the number of tuples reclaimed to pgstats. This is
268 * presult.ndeleted minus the number of newly-LP_DEAD-set items.
269 *
270 * We derive the number of dead tuples like this to avoid totally
271 * forgetting about items that were set to LP_DEAD, since they
272 * still need to be cleaned up by VACUUM. We only want to count
273 * heap-only tuples that just became LP_UNUSED in our report,
274 * which don't.
275 *
276 * VACUUM doesn't have to compensate in the same way when it
277 * tracks ndeleted, since it will set the same LP_DEAD items to
278 * LP_UNUSED separately.
279 */
280 if (presult.ndeleted > presult.nnewlpdead)
282 presult.ndeleted - presult.nnewlpdead);
283 }
284
285 /* And release buffer lock */
287
288 /*
289 * We avoid reuse of any free space created on the page by unrelated
290 * UPDATEs/INSERTs by opting to not update the FSM at this point. The
291 * free space should be reused by UPDATEs to *this* page.
292 */
293 }
294}
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5785
#define Max(x, y)
Definition: c.h:969
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4264
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:350
bool RecoveryInProgress(void)
Definition: xlog.c:6522

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by BitmapHeapScanNextBlock(), heap_prepare_pagescan(), and heapam_index_fetch_tuple().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 7283 of file heapam.c.

7285{
7286 Page page = BufferGetPage(buffer);
7287
7288 for (int i = 0; i < ntuples; i++)
7289 {
7290 HeapTupleFreeze *frz = tuples + i;
7291 ItemId itemid = PageGetItemId(page, frz->offset);
7292 HeapTupleHeader htup;
7293
7294 htup = (HeapTupleHeader) PageGetItem(page, itemid);
7295
7296 /* Deliberately avoid relying on tuple hint bits here */
7298 {
7300
7302 if (unlikely(!TransactionIdDidCommit(xmin)))
7303 ereport(ERROR,
7305 errmsg_internal("uncommitted xmin %u needs to be frozen",
7306 xmin)));
7307 }
7308
7309 /*
7310 * TransactionIdDidAbort won't work reliably in the presence of XIDs
7311 * left behind by transactions that were in progress during a crash,
7312 * so we can only check that xmax didn't commit
7313 */
7315 {
7317
7320 ereport(ERROR,
7322 errmsg_internal("cannot freeze committed xmax %u",
7323 xmax)));
7324 }
7325 }
7326}
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:136
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:135
static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup)
Definition: htup_details.h:350
static TransactionId HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup)
Definition: htup_details.h:318
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint8 checkflags
Definition: heapam.h:148
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderXminFrozen(), i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_prune_and_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool *  totally_frozen 
)

Definition at line 7010 of file heapam.c.

7014{
7015 bool xmin_already_frozen = false,
7016 xmax_already_frozen = false;
7017 bool freeze_xmin = false,
7018 replace_xvac = false,
7019 replace_xmax = false,
7020 freeze_xmax = false;
7021 TransactionId xid;
7022
7023 frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
7024 frz->t_infomask2 = tuple->t_infomask2;
7025 frz->t_infomask = tuple->t_infomask;
7026 frz->frzflags = 0;
7027 frz->checkflags = 0;
7028
7029 /*
7030 * Process xmin, while keeping track of whether it's already frozen, or
7031 * will become frozen iff our freeze plan is executed by caller (could be
7032 * neither).
7033 */
7034 xid = HeapTupleHeaderGetXmin(tuple);
7035 if (!TransactionIdIsNormal(xid))
7036 xmin_already_frozen = true;
7037 else
7038 {
7039 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7040 ereport(ERROR,
7042 errmsg_internal("found xmin %u from before relfrozenxid %u",
7043 xid, cutoffs->relfrozenxid)));
7044
7045 /* Will set freeze_xmin flags in freeze plan below */
7046 freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7047
7048 /* Verify that xmin committed if and when freeze plan is executed */
7049 if (freeze_xmin)
7051 }
7052
7053 /*
7054 * Old-style VACUUM FULL is gone, but we have to process xvac for as long
7055 * as we support having MOVED_OFF/MOVED_IN tuples in the database
7056 */
7057 xid = HeapTupleHeaderGetXvac(tuple);
7058 if (TransactionIdIsNormal(xid))
7059 {
7061 Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
7062
7063 /*
7064 * For Xvac, we always freeze proactively. This allows totally_frozen
7065 * tracking to ignore xvac.
7066 */
7067 replace_xvac = pagefrz->freeze_required = true;
7068
7069 /* Will set replace_xvac flags in freeze plan below */
7070 }
7071
7072 /* Now process xmax */
7073 xid = frz->xmax;
7074 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7075 {
7076 /* Raw xmax is a MultiXactId */
7077 TransactionId newxmax;
7078 uint16 flags;
7079
7080 /*
7081 * We will either remove xmax completely (in the "freeze_xmax" path),
7082 * process xmax by replacing it (in the "replace_xmax" path), or
7083 * perform no-op xmax processing. The only constraint is that the
7084 * FreezeLimit/MultiXactCutoff postcondition must never be violated.
7085 */
7086 newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
7087 &flags, pagefrz);
7088
7089 if (flags & FRM_NOOP)
7090 {
7091 /*
7092 * xmax is a MultiXactId, and nothing about it changes for now.
7093 * This is the only case where 'freeze_required' won't have been
7094 * set for us by FreezeMultiXactId, as well as the only case where
7095 * neither freeze_xmax nor replace_xmax are set (given a multi).
7096 *
7097 * This is a no-op, but the call to FreezeMultiXactId might have
7098 * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
7099 * for us (the "freeze page" variants, specifically). That'll
7100 * make it safe for our caller to freeze the page later on, while
7101 * leaving this particular xmax undisturbed.
7102 *
7103 * FreezeMultiXactId is _not_ responsible for the "no freeze"
7104 * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
7105 * job. A call to heap_tuple_should_freeze for this same tuple
7106 * will take place below if 'freeze_required' isn't set already.
7107 * (This repeats work from FreezeMultiXactId, but allows "no
7108 * freeze" tracker maintenance to happen in only one place.)
7109 */
7110 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
7111 Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
7112 }
7113 else if (flags & FRM_RETURN_IS_XID)
7114 {
7115 /*
7116 * xmax will become an updater Xid (original MultiXact's updater
7117 * member Xid will be carried forward as a simple Xid in Xmax).
7118 */
7119 Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
7120
7121 /*
7122 * NB -- some of these transformations are only valid because we
7123 * know the return Xid is a tuple updater (i.e. not merely a
7124 * locker.) Also note that the only reason we don't explicitly
7125 * worry about HEAP_KEYS_UPDATED is because it lives in
7126 * t_infomask2 rather than t_infomask.
7127 */
7128 frz->t_infomask &= ~HEAP_XMAX_BITS;
7129 frz->xmax = newxmax;
7130 if (flags & FRM_MARK_COMMITTED)
7132 replace_xmax = true;
7133 }
7134 else if (flags & FRM_RETURN_IS_MULTI)
7135 {
7136 uint16 newbits;
7137 uint16 newbits2;
7138
7139 /*
7140 * xmax is an old MultiXactId that we have to replace with a new
7141 * MultiXactId, to carry forward two or more original member XIDs.
7142 */
7143 Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
7144
7145 /*
7146 * We can't use GetMultiXactIdHintBits directly on the new multi
7147 * here; that routine initializes the masks to all zeroes, which
7148 * would lose other bits we need. Doing it this way ensures all
7149 * unrelated bits remain untouched.
7150 */
7151 frz->t_infomask &= ~HEAP_XMAX_BITS;
7152 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7153 GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
7154 frz->t_infomask |= newbits;
7155 frz->t_infomask2 |= newbits2;
7156 frz->xmax = newxmax;
7157 replace_xmax = true;
7158 }
7159 else
7160 {
7161 /*
7162 * Freeze plan for tuple "freezes xmax" in the strictest sense:
7163 * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
7164 */
7165 Assert(flags & FRM_INVALIDATE_XMAX);
7166 Assert(!TransactionIdIsValid(newxmax));
7167
7168 /* Will set freeze_xmax flags in freeze plan below */
7169 freeze_xmax = true;
7170 }
7171
7172 /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
7173 Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
7174 }
7175 else if (TransactionIdIsNormal(xid))
7176 {
7177 /* Raw xmax is normal XID */
7178 if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
7179 ereport(ERROR,
7181 errmsg_internal("found xmax %u from before relfrozenxid %u",
7182 xid, cutoffs->relfrozenxid)));
7183
7184 /* Will set freeze_xmax flags in freeze plan below */
7185 freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
7186
7187 /*
7188 * Verify that xmax aborted if and when freeze plan is executed,
7189 * provided it's from an update. (A lock-only xmax can be removed
7190 * independent of this, since the lock is released at xact end.)
7191 */
7192 if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
7194 }
7195 else if (!TransactionIdIsValid(xid))
7196 {
7197 /* Raw xmax is InvalidTransactionId XID */
7198 Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
7199 xmax_already_frozen = true;
7200 }
7201 else
7202 ereport(ERROR,
7204 errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
7205 xid, tuple->t_infomask)));
7206
7207 if (freeze_xmin)
7208 {
7209 Assert(!xmin_already_frozen);
7210
7212 }
7213 if (replace_xvac)
7214 {
7215 /*
7216 * If a MOVED_OFF tuple is not dead, the xvac transaction must have
7217 * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
7218 * transaction succeeded.
7219 */
7220 Assert(pagefrz->freeze_required);
7221 if (tuple->t_infomask & HEAP_MOVED_OFF)
7222 frz->frzflags |= XLH_INVALID_XVAC;
7223 else
7224 frz->frzflags |= XLH_FREEZE_XVAC;
7225 }
7226 if (replace_xmax)
7227 {
7228 Assert(!xmax_already_frozen && !freeze_xmax);
7229 Assert(pagefrz->freeze_required);
7230
7231 /* Already set replace_xmax flags in freeze plan earlier */
7232 }
7233 if (freeze_xmax)
7234 {
7235 Assert(!xmax_already_frozen && !replace_xmax);
7236
7238
7239 /*
7240 * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
7241 * LOCKED. Normalize to INVALID just to be sure no one gets confused.
7242 * Also get rid of the HEAP_KEYS_UPDATED bit.
7243 */
7244 frz->t_infomask &= ~HEAP_XMAX_BITS;
7246 frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
7247 frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
7248 }
7249
7250 /*
7251 * Determine if this tuple is already totally frozen, or will become
7252 * totally frozen (provided caller executes freeze plans for the page)
7253 */
7254 *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
7255 (freeze_xmax || xmax_already_frozen));
7256
7257 if (!pagefrz->freeze_required && !(xmin_already_frozen &&
7258 xmax_already_frozen))
7259 {
7260 /*
7261 * So far no previous tuple from the page made freezing mandatory.
7262 * Does this tuple force caller to freeze the entire page?
7263 */
7264 pagefrz->freeze_required =
7265 heap_tuple_should_freeze(tuple, cutoffs,
7266 &pagefrz->NoFreezePageRelfrozenXid,
7267 &pagefrz->NoFreezePageRelminMxid);
7268 }
7269
7270 /* Tell caller if this tuple has a usable freeze plan set in *frz */
7271 return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
7272}
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:7402
#define FRM_RETURN_IS_XID
Definition: heapam.c:6609
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6660
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7821
#define FRM_MARK_COMMITTED
Definition: heapam.c:6611
#define FRM_NOOP
Definition: heapam.c:6607
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6610
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6608
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
static TransactionId HeapTupleHeaderGetXvac(const HeapTupleHeaderData *tup)
Definition: htup_details.h:442
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3317
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
MultiXactId OldestMxact
Definition: vacuum.h:275
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 531 of file heapam.c.

532{
533 HeapScanDesc scan = (HeapScanDesc) sscan;
534 Buffer buffer = scan->rs_cbuf;
535 BlockNumber block = scan->rs_cblock;
536 Snapshot snapshot;
537 Page page;
538 int lines;
539 bool all_visible;
540 bool check_serializable;
541
542 Assert(BufferGetBlockNumber(buffer) == block);
543
544 /* ensure we're not accidentally being used when not in pagemode */
546 snapshot = scan->rs_base.rs_snapshot;
547
548 /*
549 * Prune and repair fragmentation for the whole page, if possible.
550 */
551 heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
552
553 /*
554 * We must hold share lock on the buffer content while examining tuple
555 * visibility. Afterwards, however, the tuples we have found to be
556 * visible are guaranteed good as long as we hold the buffer pin.
557 */
559
560 page = BufferGetPage(buffer);
561 lines = PageGetMaxOffsetNumber(page);
562
563 /*
564 * If the all-visible flag indicates that all tuples on the page are
565 * visible to everyone, we can skip the per-tuple visibility tests.
566 *
567 * Note: In hot standby, a tuple that's already visible to all
568 * transactions on the primary might still be invisible to a read-only
569 * transaction in the standby. We partly handle this problem by tracking
570 * the minimum xmin of visible tuples as the cut-off XID while marking a
571 * page all-visible on the primary and WAL log that along with the
572 * visibility map SET operation. In hot standby, we wait for (or abort)
573 * all transactions that can potentially may not see one or more tuples on
574 * the page. That's how index-only scans work fine in hot standby. A
575 * crucial difference between index-only scans and heap scans is that the
576 * index-only scan completely relies on the visibility map where as heap
577 * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
578 * the page-level flag can be trusted in the same way, because it might
579 * get propagated somehow without being explicitly WAL-logged, e.g. via a
580 * full page write. Until we can prove that beyond doubt, let's check each
581 * tuple for visibility the hard way.
582 */
583 all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
584 check_serializable =
586
587 /*
588 * We call page_collect_tuples() with constant arguments, to get the
589 * compiler to constant fold the constant arguments. Separate calls with
590 * constant arguments, rather than variables, are needed on several
591 * compilers to actually perform constant folding.
592 */
593 if (likely(all_visible))
594 {
595 if (likely(!check_serializable))
596 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
597 block, lines, true, false);
598 else
599 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
600 block, lines, true, true);
601 }
602 else
603 {
604 if (likely(!check_serializable))
605 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
606 block, lines, false, false);
607 else
608 scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
609 block, lines, false, true);
610 }
611
613}
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:481
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3991
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
uint32 rs_ntuples
Definition: heapam.h:97
BlockNumber rs_cblock
Definition: heapam.h:67
bool takenDuringRecovery
Definition: snapshot.h:180

References Assert(), BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1222 of file heapam.c.

1224{
1225 HeapScanDesc scan = (HeapScanDesc) sscan;
1226
1227 if (set_params)
1228 {
1229 if (allow_strat)
1231 else
1232 scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1233
1234 if (allow_sync)
1236 else
1237 scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1238
1239 if (allow_pagemode && scan->rs_base.rs_snapshot &&
1242 else
1244 }
1245
1246 /*
1247 * unpin scan buffers
1248 */
1249 if (BufferIsValid(scan->rs_cbuf))
1250 {
1251 ReleaseBuffer(scan->rs_cbuf);
1252 scan->rs_cbuf = InvalidBuffer;
1253 }
1254
1255 /*
1256 * SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
1257 * additional data vs a normal HeapScan
1258 */
1259
1260 /*
1261 * The read stream is reset on rescan. This must be done before
1262 * initscan(), as some state referred to by read_stream_reset() is reset
1263 * in initscan().
1264 */
1265 if (scan->rs_read_stream)
1267
1268 /*
1269 * reinitialize scan descriptor
1270 */
1271 initscan(scan, key, true);
1272}
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:1010
@ SO_ALLOW_STRAT
Definition: tableam.h:57
@ SO_ALLOW_SYNC
Definition: tableam.h:59

References BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1393 of file heapam.c.

1395{
1396 HeapScanDesc scan = (HeapScanDesc) sscan;
1397 BlockNumber startBlk;
1398 BlockNumber numBlks;
1399 ItemPointerData highestItem;
1400 ItemPointerData lowestItem;
1401
1402 /*
1403 * For relations without any pages, we can simply leave the TID range
1404 * unset. There will be no tuples to scan, therefore no tuples outside
1405 * the given TID range.
1406 */
1407 if (scan->rs_nblocks == 0)
1408 return;
1409
1410 /*
1411 * Set up some ItemPointers which point to the first and last possible
1412 * tuples in the heap.
1413 */
1414 ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1415 ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1416
1417 /*
1418 * If the given maximum TID is below the highest possible TID in the
1419 * relation, then restrict the range to that, otherwise we scan to the end
1420 * of the relation.
1421 */
1422 if (ItemPointerCompare(maxtid, &highestItem) < 0)
1423 ItemPointerCopy(maxtid, &highestItem);
1424
1425 /*
1426 * If the given minimum TID is above the lowest possible TID in the
1427 * relation, then restrict the range to only scan for TIDs above that.
1428 */
1429 if (ItemPointerCompare(mintid, &lowestItem) > 0)
1430 ItemPointerCopy(mintid, &lowestItem);
1431
1432 /*
1433 * Check for an empty range and protect from would be negative results
1434 * from the numBlks calculation below.
1435 */
1436 if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1437 {
1438 /* Set an empty range of blocks to scan */
1439 heap_setscanlimits(sscan, 0, 0);
1440 return;
1441 }
1442
1443 /*
1444 * Calculate the first block and the number of blocks we must scan. We
1445 * could be more aggressive here and perform some more validation to try
1446 * and further narrow the scope of blocks to scan by checking if the
1447 * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1448 * advance startBlk by one. Likewise, if highestItem has an offset of 0
1449 * we could scan one fewer blocks. However, such an optimization does not
1450 * seem worth troubling over, currently.
1451 */
1452 startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1453
1454 numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1455 ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1456
1457 /* Set the start block and number of blocks to scan */
1458 heap_setscanlimits(sscan, startBlk, numBlks);
1459
1460 /* Finally, set the TID range in sscan */
1461 ItemPointerCopy(&lowestItem, &sscan->st.tidrange.rs_mintid);
1462 ItemPointerCopy(&highestItem, &sscan->st.tidrange.rs_maxtid);
1463}
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:459
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:59

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, HeapScanDescData::rs_nblocks, TableScanDescData::st, and TableScanDescData::tidrange.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 459 of file heapam.c.

460{
461 HeapScanDesc scan = (HeapScanDesc) sscan;
462
463 Assert(!scan->rs_inited); /* else too late to change */
464 /* else rs_startblock is significant */
466
467 /* Check startBlk is valid (but allow case of zero blocks...) */
468 Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
469
470 scan->rs_startblock = startBlk;
471 scan->rs_numblocks = numBlks;
472}
bool rs_inited
Definition: heapam.h:65
BlockNumber rs_startblock
Definition: heapam.h:60
BlockNumber rs_numblocks
Definition: heapam.h:61

References Assert(), HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7766 of file heapam.c.

7767{
7768 TransactionId xid;
7769
7770 /*
7771 * If xmin is a normal transaction ID, this tuple is definitely not
7772 * frozen.
7773 */
7774 xid = HeapTupleHeaderGetXmin(tuple);
7775 if (TransactionIdIsNormal(xid))
7776 return true;
7777
7778 /*
7779 * If xmax is a valid xact or multixact, this tuple is also not frozen.
7780 */
7781 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7782 {
7783 MultiXactId multi;
7784
7785 multi = HeapTupleHeaderGetRawXmax(tuple);
7786 if (MultiXactIdIsValid(multi))
7787 return true;
7788 }
7789 else
7790 {
7791 xid = HeapTupleHeaderGetRawXmax(tuple);
7792 if (TransactionIdIsNormal(xid))
7793 return true;
7794 }
7795
7796 if (tuple->t_infomask & HEAP_MOVED)
7797 {
7798 xid = HeapTupleHeaderGetXvac(tuple);
7799 if (TransactionIdIsNormal(xid))
7800 return true;
7801 }
7802
7803 return false;
7804}

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_is_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7821 of file heapam.c.

7825{
7826 TransactionId xid;
7827 MultiXactId multi;
7828 bool freeze = false;
7829
7830 /* First deal with xmin */
7831 xid = HeapTupleHeaderGetXmin(tuple);
7832 if (TransactionIdIsNormal(xid))
7833 {
7835 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7836 *NoFreezePageRelfrozenXid = xid;
7837 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7838 freeze = true;
7839 }
7840
7841 /* Now deal with xmax */
7843 multi = InvalidMultiXactId;
7844 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7845 multi = HeapTupleHeaderGetRawXmax(tuple);
7846 else
7847 xid = HeapTupleHeaderGetRawXmax(tuple);
7848
7849 if (TransactionIdIsNormal(xid))
7850 {
7852 /* xmax is a non-permanent XID */
7853 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7854 *NoFreezePageRelfrozenXid = xid;
7855 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7856 freeze = true;
7857 }
7858 else if (!MultiXactIdIsValid(multi))
7859 {
7860 /* xmax is a permanent XID or invalid MultiXactId/XID */
7861 }
7862 else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7863 {
7864 /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7865 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7866 *NoFreezePageRelminMxid = multi;
7867 /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7868 freeze = true;
7869 }
7870 else
7871 {
7872 /* xmax is a MultiXactId that may have an updater XID */
7873 MultiXactMember *members;
7874 int nmembers;
7875
7877 if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7878 *NoFreezePageRelminMxid = multi;
7879 if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7880 freeze = true;
7881
7882 /* need to check whether any member of the mxact is old */
7883 nmembers = GetMultiXactIdMembers(multi, &members, false,
7885
7886 for (int i = 0; i < nmembers; i++)
7887 {
7888 xid = members[i].xid;
7890 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7891 *NoFreezePageRelfrozenXid = xid;
7892 if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7893 freeze = true;
7894 }
7895 if (nmembers > 0)
7896 pfree(members);
7897 }
7898
7899 if (tuple->t_infomask & HEAP_MOVED)
7900 {
7901 xid = HeapTupleHeaderGetXvac(tuple);
7902 if (TransactionIdIsNormal(xid))
7903 {
7905 if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7906 *NoFreezePageRelfrozenXid = xid;
7907 /* heap_prepare_freeze_tuple forces xvac freezing */
7908 freeze = true;
7909 }
7910 }
7911
7912 return freeze;
7913}
static bool HEAP_LOCKED_UPGRADED(uint16 infomask)
Definition: htup_details.h:251
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3331
TransactionId xid
Definition: multixact.h:58

References Assert(), VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetXmin(), HeapTupleHeaderGetXvac(), i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3212 of file heapam.c.

3216{
3217 TM_Result result;
3219 Bitmapset *hot_attrs;
3220 Bitmapset *sum_attrs;
3221 Bitmapset *key_attrs;
3222 Bitmapset *id_attrs;
3223 Bitmapset *interesting_attrs;
3224 Bitmapset *modified_attrs;
3225 ItemId lp;
3226 HeapTupleData oldtup;
3227 HeapTuple heaptup;
3228 HeapTuple old_key_tuple = NULL;
3229 bool old_key_copied = false;
3230 Page page;
3231 BlockNumber block;
3232 MultiXactStatus mxact_status;
3233 Buffer buffer,
3234 newbuf,
3235 vmbuffer = InvalidBuffer,
3236 vmbuffer_new = InvalidBuffer;
3237 bool need_toast;
3238 Size newtupsize,
3239 pagefree;
3240 bool have_tuple_lock = false;
3241 bool iscombo;
3242 bool use_hot_update = false;
3243 bool summarized_update = false;
3244 bool key_intact;
3245 bool all_visible_cleared = false;
3246 bool all_visible_cleared_new = false;
3247 bool checked_lockers;
3248 bool locker_remains;
3249 bool id_has_external = false;
3250 TransactionId xmax_new_tuple,
3251 xmax_old_tuple;
3252 uint16 infomask_old_tuple,
3253 infomask2_old_tuple,
3254 infomask_new_tuple,
3255 infomask2_new_tuple;
3256
3258
3259 /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3262
3263 /*
3264 * Forbid this during a parallel operation, lest it allocate a combo CID.
3265 * Other workers might need that combo CID for visibility checks, and we
3266 * have no provision for broadcasting it to them.
3267 */
3268 if (IsInParallelMode())
3269 ereport(ERROR,
3270 (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3271 errmsg("cannot update tuples during a parallel operation")));
3272
3273#ifdef USE_ASSERT_CHECKING
3274 check_lock_if_inplace_updateable_rel(relation, otid, newtup);
3275#endif
3276
3277 /*
3278 * Fetch the list of attributes to be checked for various operations.
3279 *
3280 * For HOT considerations, this is wasted effort if we fail to update or
3281 * have to put the new tuple on a different page. But we must compute the
3282 * list before obtaining buffer lock --- in the worst case, if we are
3283 * doing an update on one of the relevant system catalogs, we could
3284 * deadlock if we try to fetch the list later. In any case, the relcache
3285 * caches the data so this is usually pretty cheap.
3286 *
3287 * We also need columns used by the replica identity and columns that are
3288 * considered the "key" of rows in the table.
3289 *
3290 * Note that we get copies of each bitmap, so we need not worry about
3291 * relcache flush happening midway through.
3292 */
3293 hot_attrs = RelationGetIndexAttrBitmap(relation,
3295 sum_attrs = RelationGetIndexAttrBitmap(relation,
3298 id_attrs = RelationGetIndexAttrBitmap(relation,
3300 interesting_attrs = NULL;
3301 interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3302 interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3303 interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3304 interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3305
3306 block = ItemPointerGetBlockNumber(otid);
3307 INJECTION_POINT("heap_update-before-pin");
3308 buffer = ReadBuffer(relation, block);
3309 page = BufferGetPage(buffer);
3310
3311 /*
3312 * Before locking the buffer, pin the visibility map page if it appears to
3313 * be necessary. Since we haven't got the lock yet, someone else might be
3314 * in the middle of changing this, so we'll need to recheck after we have
3315 * the lock.
3316 */
3317 if (PageIsAllVisible(page))
3318 visibilitymap_pin(relation, block, &vmbuffer);
3319
3321
3322 lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3323
3324 /*
3325 * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring
3326 * we see LP_NORMAL here. When the otid origin is a syscache, we may have
3327 * neither a pin nor a snapshot. Hence, we may see other LP_ states, each
3328 * of which indicates concurrent pruning.
3329 *
3330 * Failing with TM_Updated would be most accurate. However, unlike other
3331 * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and
3332 * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted
3333 * does matter to SQL statements UPDATE and MERGE, those SQL statements
3334 * hold a snapshot that ensures LP_NORMAL. Hence, the choice between
3335 * TM_Updated and TM_Deleted affects only the wording of error messages.
3336 * Settle on TM_Deleted, for two reasons. First, it avoids complicating
3337 * the specification of when tmfd->ctid is valid. Second, it creates
3338 * error log evidence that we took this branch.
3339 *
3340 * Since it's possible to see LP_UNUSED at otid, it's also possible to see
3341 * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an
3342 * unrelated row, we'll fail with "duplicate key value violates unique".
3343 * XXX if otid is the live, newer version of the newtup row, we'll discard
3344 * changes originating in versions of this catalog row after the version
3345 * the caller got from syscache. See syscache-update-pruned.spec.
3346 */
3347 if (!ItemIdIsNormal(lp))
3348 {
3350
3351 UnlockReleaseBuffer(buffer);
3352 Assert(!have_tuple_lock);
3353 if (vmbuffer != InvalidBuffer)
3354 ReleaseBuffer(vmbuffer);
3355 tmfd->ctid = *otid;
3356 tmfd->xmax = InvalidTransactionId;
3357 tmfd->cmax = InvalidCommandId;
3358 *update_indexes = TU_None;
3359
3360 bms_free(hot_attrs);
3361 bms_free(sum_attrs);
3362 bms_free(key_attrs);
3363 bms_free(id_attrs);
3364 /* modified_attrs not yet initialized */
3365 bms_free(interesting_attrs);
3366 return TM_Deleted;
3367 }
3368
3369 /*
3370 * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3371 * properly.
3372 */
3373 oldtup.t_tableOid = RelationGetRelid(relation);
3374 oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3375 oldtup.t_len = ItemIdGetLength(lp);
3376 oldtup.t_self = *otid;
3377
3378 /* the new tuple is ready, except for this: */
3379 newtup->t_tableOid = RelationGetRelid(relation);
3380
3381 /*
3382 * Determine columns modified by the update. Additionally, identify
3383 * whether any of the unmodified replica identity key attributes in the
3384 * old tuple is externally stored or not. This is required because for
3385 * such attributes the flattened value won't be WAL logged as part of the
3386 * new tuple so we must include it as part of the old_key_tuple. See
3387 * ExtractReplicaIdentity.
3388 */
3389 modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3390 id_attrs, &oldtup,
3391 newtup, &id_has_external);
3392
3393 /*
3394 * If we're not updating any "key" column, we can grab a weaker lock type.
3395 * This allows for more concurrency when we are running simultaneously
3396 * with foreign key checks.
3397 *
3398 * Note that if a column gets detoasted while executing the update, but
3399 * the value ends up being the same, this test will fail and we will use
3400 * the stronger lock. This is acceptable; the important case to optimize
3401 * is updates that don't manipulate key columns, not those that
3402 * serendipitously arrive at the same key values.
3403 */
3404 if (!bms_overlap(modified_attrs, key_attrs))
3405 {
3406 *lockmode = LockTupleNoKeyExclusive;
3407 mxact_status = MultiXactStatusNoKeyUpdate;
3408 key_intact = true;
3409
3410 /*
3411 * If this is the first possibly-multixact-able operation in the
3412 * current transaction, set my per-backend OldestMemberMXactId
3413 * setting. We can be certain that the transaction will never become a
3414 * member of any older MultiXactIds than that. (We have to do this
3415 * even if we end up just using our own TransactionId below, since
3416 * some other backend could incorporate our XID into a MultiXact
3417 * immediately afterwards.)
3418 */
3420 }
3421 else
3422 {
3423 *lockmode = LockTupleExclusive;
3424 mxact_status = MultiXactStatusUpdate;
3425 key_intact = false;
3426 }
3427
3428 /*
3429 * Note: beyond this point, use oldtup not otid to refer to old tuple.
3430 * otid may very well point at newtup->t_self, which we will overwrite
3431 * with the new tuple's location, so there's great risk of confusion if we
3432 * use otid anymore.
3433 */
3434
3435l2:
3436 checked_lockers = false;
3437 locker_remains = false;
3438 result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3439
3440 /* see below about the "no wait" case */
3441 Assert(result != TM_BeingModified || wait);
3442
3443 if (result == TM_Invisible)
3444 {
3445 UnlockReleaseBuffer(buffer);
3446 ereport(ERROR,
3447 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3448 errmsg("attempted to update invisible tuple")));
3449 }
3450 else if (result == TM_BeingModified && wait)
3451 {
3452 TransactionId xwait;
3453 uint16 infomask;
3454 bool can_continue = false;
3455
3456 /*
3457 * XXX note that we don't consider the "no wait" case here. This
3458 * isn't a problem currently because no caller uses that case, but it
3459 * should be fixed if such a caller is introduced. It wasn't a
3460 * problem previously because this code would always wait, but now
3461 * that some tuple locks do not conflict with one of the lock modes we
3462 * use, it is possible that this case is interesting to handle
3463 * specially.
3464 *
3465 * This may cause failures with third-party code that calls
3466 * heap_update directly.
3467 */
3468
3469 /* must copy state data before unlocking buffer */
3470 xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3471 infomask = oldtup.t_data->t_infomask;
3472
3473 /*
3474 * Now we have to do something about the existing locker. If it's a
3475 * multi, sleep on it; we might be awakened before it is completely
3476 * gone (or even not sleep at all in some cases); we need to preserve
3477 * it as locker, unless it is gone completely.
3478 *
3479 * If it's not a multi, we need to check for sleeping conditions
3480 * before actually going to sleep. If the update doesn't conflict
3481 * with the locks, we just continue without sleeping (but making sure
3482 * it is preserved).
3483 *
3484 * Before sleeping, we need to acquire tuple lock to establish our
3485 * priority for the tuple (see heap_lock_tuple). LockTuple will
3486 * release us when we are next-in-line for the tuple. Note we must
3487 * not acquire the tuple lock until we're sure we're going to sleep;
3488 * otherwise we're open for race conditions with other transactions
3489 * holding the tuple lock which sleep on us.
3490 *
3491 * If we are forced to "start over" below, we keep the tuple lock;
3492 * this arranges that we stay at the head of the line while rechecking
3493 * tuple state.
3494 */
3495 if (infomask & HEAP_XMAX_IS_MULTI)
3496 {
3497 TransactionId update_xact;
3498 int remain;
3499 bool current_is_member = false;
3500
3501 if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3502 *lockmode, &current_is_member))
3503 {
3505
3506 /*
3507 * Acquire the lock, if necessary (but skip it when we're
3508 * requesting a lock and already have one; avoids deadlock).
3509 */
3510 if (!current_is_member)
3511 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3512 LockWaitBlock, &have_tuple_lock);
3513
3514 /* wait for multixact */
3515 MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3516 relation, &oldtup.t_self, XLTW_Update,
3517 &remain);
3518 checked_lockers = true;
3519 locker_remains = remain != 0;
3521
3522 /*
3523 * If xwait had just locked the tuple then some other xact
3524 * could update this tuple before we get to this point. Check
3525 * for xmax change, and start over if so.
3526 */
3528 infomask) ||
3530 xwait))
3531 goto l2;
3532 }
3533
3534 /*
3535 * Note that the multixact may not be done by now. It could have
3536 * surviving members; our own xact or other subxacts of this
3537 * backend, and also any other concurrent transaction that locked
3538 * the tuple with LockTupleKeyShare if we only got
3539 * LockTupleNoKeyExclusive. If this is the case, we have to be
3540 * careful to mark the updated tuple with the surviving members in
3541 * Xmax.
3542 *
3543 * Note that there could have been another update in the
3544 * MultiXact. In that case, we need to check whether it committed
3545 * or aborted. If it aborted we are safe to update it again;
3546 * otherwise there is an update conflict, and we have to return
3547 * TableTuple{Deleted, Updated} below.
3548 *
3549 * In the LockTupleExclusive case, we still need to preserve the
3550 * surviving members: those would include the tuple locks we had
3551 * before this one, which are important to keep in case this
3552 * subxact aborts.
3553 */
3555 update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3556 else
3557 update_xact = InvalidTransactionId;
3558
3559 /*
3560 * There was no UPDATE in the MultiXact; or it aborted. No
3561 * TransactionIdIsInProgress() call needed here, since we called
3562 * MultiXactIdWait() above.
3563 */
3564 if (!TransactionIdIsValid(update_xact) ||
3565 TransactionIdDidAbort(update_xact))
3566 can_continue = true;
3567 }
3569 {
3570 /*
3571 * The only locker is ourselves; we can avoid grabbing the tuple
3572 * lock here, but must preserve our locking information.
3573 */
3574 checked_lockers = true;
3575 locker_remains = true;
3576 can_continue = true;
3577 }
3578 else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3579 {
3580 /*
3581 * If it's just a key-share locker, and we're not changing the key
3582 * columns, we don't need to wait for it to end; but we need to
3583 * preserve it as locker.
3584 */
3585 checked_lockers = true;
3586 locker_remains = true;
3587 can_continue = true;
3588 }
3589 else
3590 {
3591 /*
3592 * Wait for regular transaction to end; but first, acquire tuple
3593 * lock.
3594 */
3596 heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3597 LockWaitBlock, &have_tuple_lock);
3598 XactLockTableWait(xwait, relation, &oldtup.t_self,
3599 XLTW_Update);
3600 checked_lockers = true;
3602
3603 /*
3604 * xwait is done, but if xwait had just locked the tuple then some
3605 * other xact could update this tuple before we get to this point.
3606 * Check for xmax change, and start over if so.
3607 */
3608 if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3609 !TransactionIdEquals(xwait,
3611 goto l2;
3612
3613 /* Otherwise check if it committed or aborted */
3614 UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3615 if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3616 can_continue = true;
3617 }
3618
3619 if (can_continue)
3620 result = TM_Ok;
3621 else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3622 result = TM_Updated;
3623 else
3624 result = TM_Deleted;
3625 }
3626
3627 /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3628 if (result != TM_Ok)
3629 {
3630 Assert(result == TM_SelfModified ||
3631 result == TM_Updated ||
3632 result == TM_Deleted ||
3633 result == TM_BeingModified);
3635 Assert(result != TM_Updated ||
3636 !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3637 }
3638
3639 if (crosscheck != InvalidSnapshot && result == TM_Ok)
3640 {
3641 /* Perform additional check for transaction-snapshot mode RI updates */
3642 if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3643 result = TM_Updated;
3644 }
3645
3646 if (result != TM_Ok)
3647 {
3648 tmfd->ctid = oldtup.t_data->t_ctid;
3649 tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3650 if (result == TM_SelfModified)
3651 tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3652 else
3653 tmfd->cmax = InvalidCommandId;
3654 UnlockReleaseBuffer(buffer);
3655 if (have_tuple_lock)
3656 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3657 if (vmbuffer != InvalidBuffer)
3658 ReleaseBuffer(vmbuffer);
3659 *update_indexes = TU_None;
3660
3661 bms_free(hot_attrs);
3662 bms_free(sum_attrs);
3663 bms_free(key_attrs);
3664 bms_free(id_attrs);
3665 bms_free(modified_attrs);
3666 bms_free(interesting_attrs);
3667 return result;
3668 }
3669
3670 /*
3671 * If we didn't pin the visibility map page and the page has become all
3672 * visible while we were busy locking the buffer, or during some
3673 * subsequent window during which we had it unlocked, we'll have to unlock
3674 * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3675 * bit unfortunate, especially since we'll now have to recheck whether the
3676 * tuple has been locked or updated under us, but hopefully it won't
3677 * happen very often.
3678 */
3679 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3680 {
3682 visibilitymap_pin(relation, block, &vmbuffer);
3684 goto l2;
3685 }
3686
3687 /* Fill in transaction status data */
3688
3689 /*
3690 * If the tuple we're updating is locked, we need to preserve the locking
3691 * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3692 */
3694 oldtup.t_data->t_infomask,
3695 oldtup.t_data->t_infomask2,
3696 xid, *lockmode, true,
3697 &xmax_old_tuple, &infomask_old_tuple,
3698 &infomask2_old_tuple);
3699
3700 /*
3701 * And also prepare an Xmax value for the new copy of the tuple. If there
3702 * was no xmax previously, or there was one but all lockers are now gone,
3703 * then use InvalidTransactionId; otherwise, get the xmax from the old
3704 * tuple. (In rare cases that might also be InvalidTransactionId and yet
3705 * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3706 */
3707 if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3709 (checked_lockers && !locker_remains))
3710 xmax_new_tuple = InvalidTransactionId;
3711 else
3712 xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3713
3714 if (!TransactionIdIsValid(xmax_new_tuple))
3715 {
3716 infomask_new_tuple = HEAP_XMAX_INVALID;
3717 infomask2_new_tuple = 0;
3718 }
3719 else
3720 {
3721 /*
3722 * If we found a valid Xmax for the new tuple, then the infomask bits
3723 * to use on the new tuple depend on what was there on the old one.
3724 * Note that since we're doing an update, the only possibility is that
3725 * the lockers had FOR KEY SHARE lock.
3726 */
3727 if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3728 {
3729 GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3730 &infomask2_new_tuple);
3731 }
3732 else
3733 {
3734 infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3735 infomask2_new_tuple = 0;
3736 }
3737 }
3738
3739 /*
3740 * Prepare the new tuple with the appropriate initial values of Xmin and
3741 * Xmax, as well as initial infomask bits as computed above.
3742 */
3743 newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3744 newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3745 HeapTupleHeaderSetXmin(newtup->t_data, xid);
3746 HeapTupleHeaderSetCmin(newtup->t_data, cid);
3747 newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3748 newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3749 HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3750
3751 /*
3752 * Replace cid with a combo CID if necessary. Note that we already put
3753 * the plain cid into the new tuple.
3754 */
3755 HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3756
3757 /*
3758 * If the toaster needs to be activated, OR if the new tuple will not fit
3759 * on the same page as the old, then we need to release the content lock
3760 * (but not the pin!) on the old tuple's buffer while we are off doing
3761 * TOAST and/or table-file-extension work. We must mark the old tuple to
3762 * show that it's locked, else other processes may try to update it
3763 * themselves.
3764 *
3765 * We need to invoke the toaster if there are already any out-of-line
3766 * toasted values present, or if the new tuple is over-threshold.
3767 */
3768 if (relation->rd_rel->relkind != RELKIND_RELATION &&
3769 relation->rd_rel->relkind != RELKIND_MATVIEW)
3770 {
3771 /* toast table entries should never be recursively toasted */
3772 Assert(!HeapTupleHasExternal(&oldtup));
3773 Assert(!HeapTupleHasExternal(newtup));
3774 need_toast = false;
3775 }
3776 else
3777 need_toast = (HeapTupleHasExternal(&oldtup) ||
3778 HeapTupleHasExternal(newtup) ||
3779 newtup->t_len > TOAST_TUPLE_THRESHOLD);
3780
3781 pagefree = PageGetHeapFreeSpace(page);
3782
3783 newtupsize = MAXALIGN(newtup->t_len);
3784
3785 if (need_toast || newtupsize > pagefree)
3786 {
3787 TransactionId xmax_lock_old_tuple;
3788 uint16 infomask_lock_old_tuple,
3789 infomask2_lock_old_tuple;
3790 bool cleared_all_frozen = false;
3791
3792 /*
3793 * To prevent concurrent sessions from updating the tuple, we have to
3794 * temporarily mark it locked, while we release the page-level lock.
3795 *
3796 * To satisfy the rule that any xid potentially appearing in a buffer
3797 * written out to disk, we unfortunately have to WAL log this
3798 * temporary modification. We can reuse xl_heap_lock for this
3799 * purpose. If we crash/error before following through with the
3800 * actual update, xmax will be of an aborted transaction, allowing
3801 * other sessions to proceed.
3802 */
3803
3804 /*
3805 * Compute xmax / infomask appropriate for locking the tuple. This has
3806 * to be done separately from the combo that's going to be used for
3807 * updating, because the potentially created multixact would otherwise
3808 * be wrong.
3809 */
3811 oldtup.t_data->t_infomask,
3812 oldtup.t_data->t_infomask2,
3813 xid, *lockmode, false,
3814 &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3815 &infomask2_lock_old_tuple);
3816
3817 Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3818
3820
3821 /* Clear obsolete visibility flags ... */
3823 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3824 HeapTupleClearHotUpdated(&oldtup);
3825 /* ... and store info about transaction updating this tuple */
3826 Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3827 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3828 oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3829 oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3830 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3831
3832 /* temporarily make it look not-updated, but locked */
3833 oldtup.t_data->t_ctid = oldtup.t_self;
3834
3835 /*
3836 * Clear all-frozen bit on visibility map if needed. We could
3837 * immediately reset ALL_VISIBLE, but given that the WAL logging
3838 * overhead would be unchanged, that doesn't seem necessarily
3839 * worthwhile.
3840 */
3841 if (PageIsAllVisible(page) &&
3842 visibilitymap_clear(relation, block, vmbuffer,
3844 cleared_all_frozen = true;
3845
3846 MarkBufferDirty(buffer);
3847
3848 if (RelationNeedsWAL(relation))
3849 {
3850 xl_heap_lock xlrec;
3851 XLogRecPtr recptr;
3852
3855
3856 xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3857 xlrec.xmax = xmax_lock_old_tuple;
3859 oldtup.t_data->t_infomask2);
3860 xlrec.flags =
3861 cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3863 recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3864 PageSetLSN(page, recptr);
3865 }
3866
3868
3870
3871 /*
3872 * Let the toaster do its thing, if needed.
3873 *
3874 * Note: below this point, heaptup is the data we actually intend to
3875 * store into the relation; newtup is the caller's original untoasted
3876 * data.
3877 */
3878 if (need_toast)
3879 {
3880 /* Note we always use WAL and FSM during updates */
3881 heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3882 newtupsize = MAXALIGN(heaptup->t_len);
3883 }
3884 else
3885 heaptup = newtup;
3886
3887 /*
3888 * Now, do we need a new page for the tuple, or not? This is a bit
3889 * tricky since someone else could have added tuples to the page while
3890 * we weren't looking. We have to recheck the available space after
3891 * reacquiring the buffer lock. But don't bother to do that if the
3892 * former amount of free space is still not enough; it's unlikely
3893 * there's more free now than before.
3894 *
3895 * What's more, if we need to get a new page, we will need to acquire
3896 * buffer locks on both old and new pages. To avoid deadlock against
3897 * some other backend trying to get the same two locks in the other
3898 * order, we must be consistent about the order we get the locks in.
3899 * We use the rule "lock the lower-numbered page of the relation
3900 * first". To implement this, we must do RelationGetBufferForTuple
3901 * while not holding the lock on the old page, and we must rely on it
3902 * to get the locks on both pages in the correct order.
3903 *
3904 * Another consideration is that we need visibility map page pin(s) if
3905 * we will have to clear the all-visible flag on either page. If we
3906 * call RelationGetBufferForTuple, we rely on it to acquire any such
3907 * pins; but if we don't, we have to handle that here. Hence we need
3908 * a loop.
3909 */
3910 for (;;)
3911 {
3912 if (newtupsize > pagefree)
3913 {
3914 /* It doesn't fit, must use RelationGetBufferForTuple. */
3915 newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3916 buffer, 0, NULL,
3917 &vmbuffer_new, &vmbuffer,
3918 0);
3919 /* We're all done. */
3920 break;
3921 }
3922 /* Acquire VM page pin if needed and we don't have it. */
3923 if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3924 visibilitymap_pin(relation, block, &vmbuffer);
3925 /* Re-acquire the lock on the old tuple's page. */
3927 /* Re-check using the up-to-date free space */
3928 pagefree = PageGetHeapFreeSpace(page);
3929 if (newtupsize > pagefree ||
3930 (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3931 {
3932 /*
3933 * Rats, it doesn't fit anymore, or somebody just now set the
3934 * all-visible flag. We must now unlock and loop to avoid
3935 * deadlock. Fortunately, this path should seldom be taken.
3936 */
3938 }
3939 else
3940 {
3941 /* We're all done. */
3942 newbuf = buffer;
3943 break;
3944 }
3945 }
3946 }
3947 else
3948 {
3949 /* No TOAST work needed, and it'll fit on same page */
3950 newbuf = buffer;
3951 heaptup = newtup;
3952 }
3953
3954 /*
3955 * We're about to do the actual update -- check for conflict first, to
3956 * avoid possibly having to roll back work we've just done.
3957 *
3958 * This is safe without a recheck as long as there is no possibility of
3959 * another process scanning the pages between this check and the update
3960 * being visible to the scan (i.e., exclusive buffer content lock(s) are
3961 * continuously held from this point until the tuple update is visible).
3962 *
3963 * For the new tuple the only check needed is at the relation level, but
3964 * since both tuples are in the same relation and the check for oldtup
3965 * will include checking the relation level, there is no benefit to a
3966 * separate check for the new tuple.
3967 */
3968 CheckForSerializableConflictIn(relation, &oldtup.t_self,
3969 BufferGetBlockNumber(buffer));
3970
3971 /*
3972 * At this point newbuf and buffer are both pinned and locked, and newbuf
3973 * has enough space for the new tuple. If they are the same buffer, only
3974 * one pin is held.
3975 */
3976
3977 if (newbuf == buffer)
3978 {
3979 /*
3980 * Since the new tuple is going into the same page, we might be able
3981 * to do a HOT update. Check if any of the index columns have been
3982 * changed.
3983 */
3984 if (!bms_overlap(modified_attrs, hot_attrs))
3985 {
3986 use_hot_update = true;
3987
3988 /*
3989 * If none of the columns that are used in hot-blocking indexes
3990 * were updated, we can apply HOT, but we do still need to check
3991 * if we need to update the summarizing indexes, and update those
3992 * indexes if the columns were updated, or we may fail to detect
3993 * e.g. value bound changes in BRIN minmax indexes.
3994 */
3995 if (bms_overlap(modified_attrs, sum_attrs))
3996 summarized_update = true;
3997 }
3998 }
3999 else
4000 {
4001 /* Set a hint that the old page could use prune/defrag */
4002 PageSetFull(page);
4003 }
4004
4005 /*
4006 * Compute replica identity tuple before entering the critical section so
4007 * we don't PANIC upon a memory allocation failure.
4008 * ExtractReplicaIdentity() will return NULL if nothing needs to be
4009 * logged. Pass old key required as true only if the replica identity key
4010 * columns are modified or it has external data.
4011 */
4012 old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
4013 bms_overlap(modified_attrs, id_attrs) ||
4014 id_has_external,
4015 &old_key_copied);
4016
4017 /* NO EREPORT(ERROR) from here till changes are logged */
4019
4020 /*
4021 * If this transaction commits, the old tuple will become DEAD sooner or
4022 * later. Set flag that this page is a candidate for pruning once our xid
4023 * falls below the OldestXmin horizon. If the transaction finally aborts,
4024 * the subsequent page pruning will be a no-op and the hint will be
4025 * cleared.
4026 *
4027 * XXX Should we set hint on newbuf as well? If the transaction aborts,
4028 * there would be a prunable tuple in the newbuf; but for now we choose
4029 * not to optimize for aborts. Note that heap_xlog_update must be kept in
4030 * sync if this decision changes.
4031 */
4032 PageSetPrunable(page, xid);
4033
4034 if (use_hot_update)
4035 {
4036 /* Mark the old tuple as HOT-updated */
4037 HeapTupleSetHotUpdated(&oldtup);
4038 /* And mark the new tuple as heap-only */
4039 HeapTupleSetHeapOnly(heaptup);
4040 /* Mark the caller's copy too, in case different from heaptup */
4041 HeapTupleSetHeapOnly(newtup);
4042 }
4043 else
4044 {
4045 /* Make sure tuples are correctly marked as not-HOT */
4046 HeapTupleClearHotUpdated(&oldtup);
4047 HeapTupleClearHeapOnly(heaptup);
4048 HeapTupleClearHeapOnly(newtup);
4049 }
4050
4051 RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
4052
4053
4054 /* Clear obsolete visibility flags, possibly set by ourselves above... */
4056 oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4057 /* ... and store info about transaction updating this tuple */
4058 Assert(TransactionIdIsValid(xmax_old_tuple));
4059 HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
4060 oldtup.t_data->t_infomask |= infomask_old_tuple;
4061 oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
4062 HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
4063
4064 /* record address of new tuple in t_ctid of old one */
4065 oldtup.t_data->t_ctid = heaptup->t_self;
4066
4067 /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
4068 if (PageIsAllVisible(BufferGetPage(buffer)))
4069 {
4070 all_visible_cleared = true;
4072 visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
4073 vmbuffer, VISIBILITYMAP_VALID_BITS);
4074 }
4075 if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
4076 {
4077 all_visible_cleared_new = true;
4079 visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
4080 vmbuffer_new, VISIBILITYMAP_VALID_BITS);
4081 }
4082
4083 if (newbuf != buffer)
4084 MarkBufferDirty(newbuf);
4085 MarkBufferDirty(buffer);
4086
4087 /* XLOG stuff */
4088 if (RelationNeedsWAL(relation))
4089 {
4090 XLogRecPtr recptr;
4091
4092 /*
4093 * For logical decoding we need combo CIDs to properly decode the
4094 * catalog.
4095 */
4097 {
4098 log_heap_new_cid(relation, &oldtup);
4099 log_heap_new_cid(relation, heaptup);
4100 }
4101
4102 recptr = log_heap_update(relation, buffer,
4103 newbuf, &oldtup, heaptup,
4104 old_key_tuple,
4105 all_visible_cleared,
4106 all_visible_cleared_new);
4107 if (newbuf != buffer)
4108 {
4109 PageSetLSN(BufferGetPage(newbuf), recptr);
4110 }
4111 PageSetLSN(BufferGetPage(buffer), recptr);
4112 }
4113
4115
4116 if (newbuf != buffer)
4119
4120 /*
4121 * Mark old tuple for invalidation from system caches at next command
4122 * boundary, and mark the new tuple for invalidation in case we abort. We
4123 * have to do this before releasing the buffer because oldtup is in the
4124 * buffer. (heaptup is all in local memory, but it's necessary to process
4125 * both tuple versions in one call to inval.c so we can avoid redundant
4126 * sinval messages.)
4127 */
4128 CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4129
4130 /* Now we can release the buffer(s) */
4131 if (newbuf != buffer)
4132 ReleaseBuffer(newbuf);
4133 ReleaseBuffer(buffer);
4134 if (BufferIsValid(vmbuffer_new))
4135 ReleaseBuffer(vmbuffer_new);
4136 if (BufferIsValid(vmbuffer))
4137 ReleaseBuffer(vmbuffer);
4138
4139 /*
4140 * Release the lmgr tuple lock, if we had it.
4141 */
4142 if (have_tuple_lock)
4143 UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4144
4145 pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4146
4147 /*
4148 * If heaptup is a private copy, release it. Don't forget to copy t_self
4149 * back to the caller's image, too.
4150 */
4151 if (heaptup != newtup)
4152 {
4153 newtup->t_self = heaptup->t_self;
4154 heap_freetuple(heaptup);
4155 }
4156
4157 /*
4158 * If it is a HOT update, the update may still need to update summarized
4159 * indexes, lest we fail to update those summaries and get incorrect
4160 * results (for example, minmax bounds of the block may change with this
4161 * update).
4162 */
4163 if (use_hot_update)
4164 {
4165 if (summarized_update)
4166 *update_indexes = TU_Summarizing;
4167 else
4168 *update_indexes = TU_None;
4169 }
4170 else
4171 *update_indexes = TU_All;
4172
4173 if (old_key_tuple != NULL && old_key_copied)
4174 heap_freetuple(old_key_tuple);
4175
4176 bms_free(hot_attrs);
4177 bms_free(sum_attrs);
4178 bms_free(key_attrs);
4179 bms_free(id_attrs);
4180 bms_free(modified_attrs);
4181 bms_free(interesting_attrs);
4182
4183 return TM_Ok;
4184}
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
static void PageSetFull(Page page)
Definition: bufpage.h:418
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4364
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8794
TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup)
Definition: heapam.c:7535
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
static void HeapTupleClearHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:780
#define HEAP2_XACT_MASK
Definition: htup_details.h:293
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
static void HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid)
Definition: htup_details.h:422
static void HeapTupleSetHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:792
#define HEAP_XACT_MASK
Definition: htup_details.h:215
static void HeapTupleSetHotUpdated(const HeapTupleData *tuple)
Definition: htup_details.h:774
static void HeapTupleClearHeapOnly(const HeapTupleData *tuple)
Definition: htup_details.h:798
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define INJECTION_POINT(name)
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5286
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:61
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:64
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:65
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:63
bool RelationSupportsSysCache(Oid relid)
Definition: syscache.c:770
@ TU_Summarizing
Definition: tableam.h:118
@ TU_All
Definition: tableam.h:115
@ TU_None
Definition: tableam.h:112
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert(), bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED(), HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly(), HeapTupleClearHotUpdated(), HeapTupleGetUpdateXid(), HeapTupleHasExternal(), HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderSetCmax(), HeapTupleHeaderSetCmin(), HeapTupleHeaderSetXmax(), HeapTupleHeaderSetXmin(), HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly(), HeapTupleSetHotUpdated(), INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, INJECTION_POINT, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), RelationSupportsSysCache(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
struct VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 615 of file vacuumlazy.c.

617{
618 LVRelState *vacrel;
619 bool verbose,
620 instrument,
621 skipwithvm,
622 frozenxid_updated,
623 minmulti_updated;
624 BlockNumber orig_rel_pages,
625 new_rel_pages,
626 new_rel_allvisible,
627 new_rel_allfrozen;
628 PGRUsage ru0;
629 TimestampTz starttime = 0;
630 PgStat_Counter startreadtime = 0,
631 startwritetime = 0;
632 WalUsage startwalusage = pgWalUsage;
633 BufferUsage startbufferusage = pgBufferUsage;
634 ErrorContextCallback errcallback;
635 char **indnames = NULL;
636
637 verbose = (params->options & VACOPT_VERBOSE) != 0;
638 instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
639 params->log_min_duration >= 0));
640 if (instrument)
641 {
642 pg_rusage_init(&ru0);
643 if (track_io_timing)
644 {
645 startreadtime = pgStatBlockReadTime;
646 startwritetime = pgStatBlockWriteTime;
647 }
648 }
649
650 /* Used for instrumentation and stats report */
651 starttime = GetCurrentTimestamp();
652
654 RelationGetRelid(rel));
655
656 /*
657 * Setup error traceback support for ereport() first. The idea is to set
658 * up an error context callback to display additional information on any
659 * error during a vacuum. During different phases of vacuum, we update
660 * the state so that the error context callback always display current
661 * information.
662 *
663 * Copy the names of heap rel into local memory for error reporting
664 * purposes, too. It isn't always safe to assume that we can get the name
665 * of each rel. It's convenient for code in lazy_scan_heap to always use
666 * these temp copies.
667 */
668 vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
672 vacrel->indname = NULL;
674 vacrel->verbose = verbose;
675 errcallback.callback = vacuum_error_callback;
676 errcallback.arg = vacrel;
677 errcallback.previous = error_context_stack;
678 error_context_stack = &errcallback;
679
680 /* Set up high level stuff about rel and its indexes */
681 vacrel->rel = rel;
683 &vacrel->indrels);
684 vacrel->bstrategy = bstrategy;
685 if (instrument && vacrel->nindexes > 0)
686 {
687 /* Copy index names used by instrumentation (not error reporting) */
688 indnames = palloc(sizeof(char *) * vacrel->nindexes);
689 for (int i = 0; i < vacrel->nindexes; i++)
690 indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
691 }
692
693 /*
694 * The index_cleanup param either disables index vacuuming and cleanup or
695 * forces it to go ahead when we would otherwise apply the index bypass
696 * optimization. The default is 'auto', which leaves the final decision
697 * up to lazy_vacuum().
698 *
699 * The truncate param allows user to avoid attempting relation truncation,
700 * though it can't force truncation to happen.
701 */
704 params->truncate != VACOPTVALUE_AUTO);
705
706 /*
707 * While VacuumFailSafeActive is reset to false before calling this, we
708 * still need to reset it here due to recursive calls.
709 */
710 VacuumFailsafeActive = false;
711 vacrel->consider_bypass_optimization = true;
712 vacrel->do_index_vacuuming = true;
713 vacrel->do_index_cleanup = true;
714 vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
715 if (params->index_cleanup == VACOPTVALUE_DISABLED)
716 {
717 /* Force disable index vacuuming up-front */
718 vacrel->do_index_vacuuming = false;
719 vacrel->do_index_cleanup = false;
720 }
721 else if (params->index_cleanup == VACOPTVALUE_ENABLED)
722 {
723 /* Force index vacuuming. Note that failsafe can still bypass. */
724 vacrel->consider_bypass_optimization = false;
725 }
726 else
727 {
728 /* Default/auto, make all decisions dynamically */
730 }
731
732 /* Initialize page counters explicitly (be tidy) */
733 vacrel->scanned_pages = 0;
734 vacrel->eager_scanned_pages = 0;
735 vacrel->removed_pages = 0;
736 vacrel->new_frozen_tuple_pages = 0;
737 vacrel->lpdead_item_pages = 0;
738 vacrel->missed_dead_pages = 0;
739 vacrel->nonempty_pages = 0;
740 /* dead_items_alloc allocates vacrel->dead_items later on */
741
742 /* Allocate/initialize output statistics state */
743 vacrel->new_rel_tuples = 0;
744 vacrel->new_live_tuples = 0;
745 vacrel->indstats = (IndexBulkDeleteResult **)
746 palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
747
748 /* Initialize remaining counters (be tidy) */
749 vacrel->num_index_scans = 0;
750 vacrel->tuples_deleted = 0;
751 vacrel->tuples_frozen = 0;
752 vacrel->lpdead_items = 0;
753 vacrel->live_tuples = 0;
754 vacrel->recently_dead_tuples = 0;
755 vacrel->missed_dead_tuples = 0;
756
757 vacrel->vm_new_visible_pages = 0;
758 vacrel->vm_new_visible_frozen_pages = 0;
759 vacrel->vm_new_frozen_pages = 0;
760 vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
761
762 /*
763 * Get cutoffs that determine which deleted tuples are considered DEAD,
764 * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
765 * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
766 * happen in this order to ensure that the OldestXmin cutoff field works
767 * as an upper bound on the XIDs stored in the pages we'll actually scan
768 * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
769 *
770 * Next acquire vistest, a related cutoff that's used in pruning. We use
771 * vistest in combination with OldestXmin to ensure that
772 * heap_page_prune_and_freeze() always removes any deleted tuple whose
773 * xmax is < OldestXmin. lazy_scan_prune must never become confused about
774 * whether a tuple should be frozen or removed. (In the future we might
775 * want to teach lazy_scan_prune to recompute vistest from time to time,
776 * to increase the number of dead tuples it can prune away.)
777 */
778 vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
779 vacrel->vistest = GlobalVisTestFor(rel);
780 /* Initialize state used to track oldest extant XID/MXID */
781 vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
782 vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
783
784 /*
785 * Initialize state related to tracking all-visible page skipping. This is
786 * very important to determine whether or not it is safe to advance the
787 * relfrozenxid/relminmxid.
788 */
789 vacrel->skippedallvis = false;
790 skipwithvm = true;
792 {
793 /*
794 * Force aggressive mode, and disable skipping blocks using the
795 * visibility map (even those set all-frozen)
796 */
797 vacrel->aggressive = true;
798 skipwithvm = false;
799 }
800
801 vacrel->skipwithvm = skipwithvm;
802
803 /*
804 * Set up eager scan tracking state. This must happen after determining
805 * whether or not the vacuum must be aggressive, because only normal
806 * vacuums use the eager scan algorithm.
807 */
808 heap_vacuum_eager_scan_setup(vacrel, params);
809
810 if (verbose)
811 {
812 if (vacrel->aggressive)
814 (errmsg("aggressively vacuuming \"%s.%s.%s\"",
815 vacrel->dbname, vacrel->relnamespace,
816 vacrel->relname)));
817 else
819 (errmsg("vacuuming \"%s.%s.%s\"",
820 vacrel->dbname, vacrel->relnamespace,
821 vacrel->relname)));
822 }
823
824 /*
825 * Allocate dead_items memory using dead_items_alloc. This handles
826 * parallel VACUUM initialization as part of allocating shared memory
827 * space used for dead_items. (But do a failsafe precheck first, to
828 * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
829 * is already dangerously old.)
830 */
832 dead_items_alloc(vacrel, params->nworkers);
833
834 /*
835 * Call lazy_scan_heap to perform all required heap pruning, index
836 * vacuuming, and heap vacuuming (plus related processing)
837 */
838 lazy_scan_heap(vacrel);
839
840 /*
841 * Free resources managed by dead_items_alloc. This ends parallel mode in
842 * passing when necessary.
843 */
844 dead_items_cleanup(vacrel);
846
847 /*
848 * Update pg_class entries for each of rel's indexes where appropriate.
849 *
850 * Unlike the later update to rel's pg_class entry, this is not critical.
851 * Maintains relpages/reltuples statistics used by the planner only.
852 */
853 if (vacrel->do_index_cleanup)
855
856 /* Done with rel's indexes */
857 vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
858
859 /* Optionally truncate rel */
860 if (should_attempt_truncation(vacrel))
861 lazy_truncate_heap(vacrel);
862
863 /* Pop the error context stack */
864 error_context_stack = errcallback.previous;
865
866 /* Report that we are now doing final cleanup */
869
870 /*
871 * Prepare to update rel's pg_class entry.
872 *
873 * Aggressive VACUUMs must always be able to advance relfrozenxid to a
874 * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
875 * Non-aggressive VACUUMs may advance them by any amount, or not at all.
876 */
877 Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
879 vacrel->cutoffs.relfrozenxid,
880 vacrel->NewRelfrozenXid));
881 Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
883 vacrel->cutoffs.relminmxid,
884 vacrel->NewRelminMxid));
885 if (vacrel->skippedallvis)
886 {
887 /*
888 * Must keep original relfrozenxid in a non-aggressive VACUUM that
889 * chose to skip an all-visible page range. The state that tracks new
890 * values will have missed unfrozen XIDs from the pages we skipped.
891 */
892 Assert(!vacrel->aggressive);
895 }
896
897 /*
898 * For safety, clamp relallvisible to be not more than what we're setting
899 * pg_class.relpages to
900 */
901 new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
902 visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
903 if (new_rel_allvisible > new_rel_pages)
904 new_rel_allvisible = new_rel_pages;
905
906 /*
907 * An all-frozen block _must_ be all-visible. As such, clamp the count of
908 * all-frozen blocks to the count of all-visible blocks. This matches the
909 * clamping of relallvisible above.
910 */
911 if (new_rel_allfrozen > new_rel_allvisible)
912 new_rel_allfrozen = new_rel_allvisible;
913
914 /*
915 * Now actually update rel's pg_class entry.
916 *
917 * In principle new_live_tuples could be -1 indicating that we (still)
918 * don't know the tuple count. In practice that can't happen, since we
919 * scan every page that isn't skipped using the visibility map.
920 */
921 vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
922 new_rel_allvisible, new_rel_allfrozen,
923 vacrel->nindexes > 0,
924 vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
925 &frozenxid_updated, &minmulti_updated, false);
926
927 /*
928 * Report results to the cumulative stats system, too.
929 *
930 * Deliberately avoid telling the stats system about LP_DEAD items that
931 * remain in the table due to VACUUM bypassing index and heap vacuuming.
932 * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
933 * It seems like a good idea to err on the side of not vacuuming again too
934 * soon in cases where the failsafe prevented significant amounts of heap
935 * vacuuming.
936 */
938 rel->rd_rel->relisshared,
939 Max(vacrel->new_live_tuples, 0),
940 vacrel->recently_dead_tuples +
941 vacrel->missed_dead_tuples,
942 starttime);
944
945 if (instrument)
946 {
948
949 if (verbose || params->log_min_duration == 0 ||
950 TimestampDifferenceExceeds(starttime, endtime,
951 params->log_min_duration))
952 {
953 long secs_dur;
954 int usecs_dur;
955 WalUsage walusage;
956 BufferUsage bufferusage;
958 char *msgfmt;
959 int32 diff;
960 double read_rate = 0,
961 write_rate = 0;
962 int64 total_blks_hit;
963 int64 total_blks_read;
964 int64 total_blks_dirtied;
965
966 TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
967 memset(&walusage, 0, sizeof(WalUsage));
968 WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
969 memset(&bufferusage, 0, sizeof(BufferUsage));
970 BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
971
972 total_blks_hit = bufferusage.shared_blks_hit +
973 bufferusage.local_blks_hit;
974 total_blks_read = bufferusage.shared_blks_read +
975 bufferusage.local_blks_read;
976 total_blks_dirtied = bufferusage.shared_blks_dirtied +
977 bufferusage.local_blks_dirtied;
978
980 if (verbose)
981 {
982 /*
983 * Aggressiveness already reported earlier, in dedicated
984 * VACUUM VERBOSE ereport
985 */
986 Assert(!params->is_wraparound);
987 msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
988 }
989 else if (params->is_wraparound)
990 {
991 /*
992 * While it's possible for a VACUUM to be both is_wraparound
993 * and !aggressive, that's just a corner-case -- is_wraparound
994 * implies aggressive. Produce distinct output for the corner
995 * case all the same, just in case.
996 */
997 if (vacrel->aggressive)
998 msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
999 else
1000 msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
1001 }
1002 else
1003 {
1004 if (vacrel->aggressive)
1005 msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
1006 else
1007 msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
1008 }
1009 appendStringInfo(&buf, msgfmt,
1010 vacrel->dbname,
1011 vacrel->relnamespace,
1012 vacrel->relname,
1013 vacrel->num_index_scans);
1014 appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
1015 vacrel->removed_pages,
1016 new_rel_pages,
1017 vacrel->scanned_pages,
1018 orig_rel_pages == 0 ? 100.0 :
1019 100.0 * vacrel->scanned_pages /
1020 orig_rel_pages,
1021 vacrel->eager_scanned_pages);
1023 _("tuples: %" PRId64 " removed, %" PRId64 " remain, %" PRId64 " are dead but not yet removable\n"),
1024 vacrel->tuples_deleted,
1025 (int64) vacrel->new_rel_tuples,
1026 vacrel->recently_dead_tuples);
1027 if (vacrel->missed_dead_tuples > 0)
1029 _("tuples missed: %" PRId64 " dead from %u pages not removed due to cleanup lock contention\n"),
1030 vacrel->missed_dead_tuples,
1031 vacrel->missed_dead_pages);
1032 diff = (int32) (ReadNextTransactionId() -
1033 vacrel->cutoffs.OldestXmin);
1035 _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
1036 vacrel->cutoffs.OldestXmin, diff);
1037 if (frozenxid_updated)
1038 {
1039 diff = (int32) (vacrel->NewRelfrozenXid -
1040 vacrel->cutoffs.relfrozenxid);
1042 _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
1043 vacrel->NewRelfrozenXid, diff);
1044 }
1045 if (minmulti_updated)
1046 {
1047 diff = (int32) (vacrel->NewRelminMxid -
1048 vacrel->cutoffs.relminmxid);
1050 _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
1051 vacrel->NewRelminMxid, diff);
1052 }
1053 appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %" PRId64 " tuples frozen\n"),
1054 vacrel->new_frozen_tuple_pages,
1055 orig_rel_pages == 0 ? 100.0 :
1056 100.0 * vacrel->new_frozen_tuple_pages /
1057 orig_rel_pages,
1058 vacrel->tuples_frozen);
1059
1061 _("visibility map: %u pages set all-visible, %u pages set all-frozen (%u were all-visible)\n"),
1062 vacrel->vm_new_visible_pages,
1064 vacrel->vm_new_frozen_pages,
1065 vacrel->vm_new_frozen_pages);
1066 if (vacrel->do_index_vacuuming)
1067 {
1068 if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
1069 appendStringInfoString(&buf, _("index scan not needed: "));
1070 else
1071 appendStringInfoString(&buf, _("index scan needed: "));
1072
1073 msgfmt = _("%u pages from table (%.2f%% of total) had %" PRId64 " dead item identifiers removed\n");
1074 }
1075 else
1076 {
1078 appendStringInfoString(&buf, _("index scan bypassed: "));
1079 else
1080 appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
1081
1082 msgfmt = _("%u pages from table (%.2f%% of total) have %" PRId64 " dead item identifiers\n");
1083 }
1084 appendStringInfo(&buf, msgfmt,
1085 vacrel->lpdead_item_pages,
1086 orig_rel_pages == 0 ? 100.0 :
1087 100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
1088 vacrel->lpdead_items);
1089 for (int i = 0; i < vacrel->nindexes; i++)
1090 {
1091 IndexBulkDeleteResult *istat = vacrel->indstats[i];
1092
1093 if (!istat)
1094 continue;
1095
1097 _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
1098 indnames[i],
1099 istat->num_pages,
1100 istat->pages_newly_deleted,
1101 istat->pages_deleted,
1102 istat->pages_free);
1103 }
1105 {
1106 /*
1107 * We bypass the changecount mechanism because this value is
1108 * only updated by the calling process. We also rely on the
1109 * above call to pgstat_progress_end_command() to not clear
1110 * the st_progress_param array.
1111 */
1112 appendStringInfo(&buf, _("delay time: %.3f ms\n"),
1114 }
1115 if (track_io_timing)
1116 {
1117 double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
1118 double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
1119
1120 appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
1121 read_ms, write_ms);
1122 }
1123 if (secs_dur > 0 || usecs_dur > 0)
1124 {
1125 read_rate = (double) BLCKSZ * total_blks_read /
1126 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1127 write_rate = (double) BLCKSZ * total_blks_dirtied /
1128 (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
1129 }
1130 appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
1131 read_rate, write_rate);
1133 _("buffer usage: %" PRId64 " hits, %" PRId64 " reads, %" PRId64 " dirtied\n"),
1134 total_blks_hit,
1135 total_blks_read,
1136 total_blks_dirtied);
1138 _("WAL usage: %" PRId64 " records, %" PRId64 " full page images, %" PRIu64 " bytes, %" PRId64 " buffers full\n"),
1139 walusage.wal_records,
1140 walusage.wal_fpi,
1141 walusage.wal_bytes,
1142 walusage.wal_buffers_full);
1143 appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
1144
1145 ereport(verbose ? INFO : LOG,
1146 (errmsg_internal("%s", buf.data)));
1147 pfree(buf.data);
1148 }
1149 }
1150
1151 /* Cleanup index statistics and index names */
1152 for (int i = 0; i < vacrel->nindexes; i++)
1153 {
1154 if (vacrel->indstats[i])
1155 pfree(vacrel->indstats[i]);
1156
1157 if (instrument)
1158 pfree(indnames[i]);
1159 }
1160}
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1721
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
PgBackendStatus * MyBEEntry
bool track_io_timing
Definition: bufmgr.c:144
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:280
int32_t int32
Definition: c.h:498
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3188
ErrorContextCallback * error_context_stack
Definition: elog.c:95
#define _(x)
Definition: elog.c:91
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:287
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3506
char * pstrdup(const char *in)
Definition: mcxt.c:2322
void * palloc0(Size size)
Definition: mcxt.c:1970
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:383
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:65
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples, TimestampTz starttime)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:39
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define PROGRESS_VACUUM_DELAY_TIME
Definition: progress.h:31
#define RelationGetNamespace(relation)
Definition: rel.h:557
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:297
void(* callback)(void *arg)
Definition: elog.h:298
BlockNumber pages_deleted
Definition: genam.h:105
BlockNumber pages_newly_deleted
Definition: genam.h:104
BlockNumber pages_free
Definition: genam.h:106
BlockNumber num_pages
Definition: genam.h:100
bool verbose
Definition: vacuumlazy.c:298
BlockNumber vm_new_frozen_pages
Definition: vacuumlazy.c:337
int nindexes
Definition: vacuumlazy.c:264
int64 tuples_deleted
Definition: vacuumlazy.c:352
BlockNumber nonempty_pages
Definition: vacuumlazy.c:341
bool do_rel_truncate
Definition: vacuumlazy.c:280
BlockNumber scanned_pages
Definition: vacuumlazy.c:314
bool aggressive
Definition: vacuumlazy.c:271
BlockNumber new_frozen_tuple_pages
Definition: vacuumlazy.c:323
GlobalVisState * vistest
Definition: vacuumlazy.c:284
BlockNumber removed_pages
Definition: vacuumlazy.c:322
int num_index_scans
Definition: vacuumlazy.c:350
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:347
double new_live_tuples
Definition: vacuumlazy.c:345
double new_rel_tuples
Definition: vacuumlazy.c:344
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:286
Relation rel
Definition: vacuumlazy.c:262
bool consider_bypass_optimization
Definition: vacuumlazy.c:275
BlockNumber rel_pages
Definition: vacuumlazy.c:313
int64 recently_dead_tuples
Definition: vacuumlazy.c:356
int64 tuples_frozen
Definition: vacuumlazy.c:353
char * dbname
Definition: vacuumlazy.c:291
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:340
char * relnamespace
Definition: vacuumlazy.c:292
int64 live_tuples
Definition: vacuumlazy.c:355
int64 lpdead_items
Definition: vacuumlazy.c:354
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:267
bool skippedallvis
Definition: vacuumlazy.c:288
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:339
BlockNumber eager_scanned_pages
Definition: vacuumlazy.c:320
Relation * indrels
Definition: vacuumlazy.c:263
bool skipwithvm
Definition: vacuumlazy.c:273
bool do_index_cleanup
Definition: vacuumlazy.c:279
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:287
int64 missed_dead_tuples
Definition: vacuumlazy.c:357
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:283
BlockNumber vm_new_visible_pages
Definition: vacuumlazy.c:326
char * relname
Definition: vacuumlazy.c:293
VacErrPhase phase
Definition: vacuumlazy.c:297
char * indname
Definition: vacuumlazy.c:294
BlockNumber vm_new_visible_frozen_pages
Definition: vacuumlazy.c:334
bool do_index_vacuuming
Definition: vacuumlazy.c:278
int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]
int nworkers
Definition: vacuum.h:246
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
int64 wal_buffers_full
Definition: instrument.h:56
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
bool track_cost_delay_timing
Definition: vacuum.c:80
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2338
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2381
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1102
bool VacuumFailsafeActive
Definition: vacuum.c:108
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1428
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:3590
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3731
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3766
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:3210
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:3190
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:226
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:1199
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2960
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
Definition: vacuumlazy.c:488
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:3483
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert(), LVRelState::bstrategy, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, LVRelState::eager_scanned_pages, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), heap_vacuum_eager_scan_setup(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyBEEntry, MyDatabaseId, LVRelState::new_frozen_tuple_pages, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_DELAY_TIME, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, PgBackendStatus::st_progress_param, TimestampDifference(), TimestampDifferenceExceeds(), track_cost_delay_timing, track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, LVRelState::vm_new_frozen_pages, LVRelState::vm_new_visible_frozen_pages, LVRelState::vm_new_visible_pages, WalUsage::wal_buffers_full, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 9201 of file heapam.c.

9204{
9205 TransactionId xid;
9206 HTSV_Result htsvResult;
9207
9208 if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
9209 return;
9210
9211 /*
9212 * Check to see whether the tuple has been written to by a concurrent
9213 * transaction, either to create it not visible to us, or to delete it
9214 * while it is visible to us. The "visible" bool indicates whether the
9215 * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
9216 * is going on with it.
9217 *
9218 * In the event of a concurrently inserted tuple that also happens to have
9219 * been concurrently updated (by a separate transaction), the xmin of the
9220 * tuple will be used -- not the updater's xid.
9221 */
9222 htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
9223 switch (htsvResult)
9224 {
9225 case HEAPTUPLE_LIVE:
9226 if (visible)
9227 return;
9228 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9229 break;
9232 if (visible)
9233 xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
9234 else
9235 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9236
9238 {
9239 /* This is like the HEAPTUPLE_DEAD case */
9240 Assert(!visible);
9241 return;
9242 }
9243 break;
9245 xid = HeapTupleHeaderGetXmin(tuple->t_data);
9246 break;
9247 case HEAPTUPLE_DEAD:
9248 Assert(!visible);
9249 return;
9250 default:
9251
9252 /*
9253 * The only way to get to this default clause is if a new value is
9254 * added to the enum type without adding it to this switch
9255 * statement. That's a bug, so elog.
9256 */
9257 elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
9258
9259 /*
9260 * In spite of having all enum values covered and calling elog on
9261 * this default, some compilers think this is a code path which
9262 * allows xid to be used below without initialization. Silence
9263 * that warning.
9264 */
9266 }
9267
9270
9271 /*
9272 * Find top level xid. Bail out if xid is too early to be a conflict, or
9273 * if it's our own xid.
9274 */
9276 return;
9279 return;
9280
9281 CheckForSerializableConflictOut(relation, xid, snapshot);
9282}
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4023
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:163
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:441

References Assert(), CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXmin(), HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by BitmapHeapScanNextBlock(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1529 of file heapam_visibility.c.

1530{
1531 TransactionId xmax;
1532
1533 /* if there's no valid Xmax, then there's obviously no update either */
1534 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1535 return true;
1536
1537 if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1538 return true;
1539
1540 /* invalid xmax means no update */
1542 return true;
1543
1544 /*
1545 * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1546 * necessarily have been updated
1547 */
1548 if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1549 return false;
1550
1551 /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1552 xmax = HeapTupleGetUpdateXid(tuple);
1553
1554 /* not LOCKED_ONLY, so it has to have an xmax */
1556
1558 return false;
1559 if (TransactionIdIsInProgress(xmax))
1560 return false;
1561 if (TransactionIdDidCommit(xmax))
1562 return false;
1563
1564 /*
1565 * not current, not in progress, not committed -- must have aborted or
1566 * crashed
1567 */
1568 return true;
1569}
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402

References Assert(), HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
struct GlobalVisState vistest 
)

Definition at line 1474 of file heapam_visibility.c.

1475{
1476 HeapTupleHeader tuple = htup->t_data;
1477
1479 Assert(htup->t_tableOid != InvalidOid);
1480
1481 /*
1482 * If the inserting transaction is marked invalid, then it aborted, and
1483 * the tuple is definitely dead. If it's marked neither committed nor
1484 * invalid, then we assume it's still alive (since the presumption is that
1485 * all relevant hint bits were just set moments ago).
1486 */
1487 if (!HeapTupleHeaderXminCommitted(tuple))
1488 return HeapTupleHeaderXminInvalid(tuple);
1489
1490 /*
1491 * If the inserting transaction committed, but any deleting transaction
1492 * aborted, the tuple is still alive.
1493 */
1494 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1495 return false;
1496
1497 /*
1498 * If the XMAX is just a lock, the tuple is still alive.
1499 */
1501 return false;
1502
1503 /*
1504 * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1505 * know without checking pg_multixact.
1506 */
1507 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1508 return false;
1509
1510 /* If deleter isn't known to have committed, assume it's still running. */
1511 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1512 return false;
1513
1514 /* Deleter committed, so tuple is dead if the XID is old enough. */
1515 return GlobalVisTestIsRemovableXid(vistest,
1517}
static bool HeapTupleHeaderXminInvalid(const HeapTupleHeaderData *tup)
Definition: htup_details.h:343
static bool HeapTupleHeaderXminCommitted(const HeapTupleHeaderData *tup)
Definition: htup_details.h:337
#define InvalidOid
Definition: postgres_ext.h:35

References Assert(), GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460{
461 HeapTupleHeader tuple = htup->t_data;
462
464 Assert(htup->t_tableOid != InvalidOid);
465
467 {
469 return TM_Invisible;
470
471 /* Used by pre-9.0 binary upgrades */
472 if (tuple->t_infomask & HEAP_MOVED_OFF)
473 {
475
477 return TM_Invisible;
478 if (!TransactionIdIsInProgress(xvac))
479 {
480 if (TransactionIdDidCommit(xvac))
481 {
482 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484 return TM_Invisible;
485 }
486 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488 }
489 }
490 /* Used by pre-9.0 binary upgrades */
491 else if (tuple->t_infomask & HEAP_MOVED_IN)
492 {
494
496 {
498 return TM_Invisible;
499 if (TransactionIdDidCommit(xvac))
500 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502 else
503 {
504 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506 return TM_Invisible;
507 }
508 }
509 }
511 {
512 if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513 return TM_Invisible; /* inserted after scan started */
514
515 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516 return TM_Ok;
517
519 {
520 TransactionId xmax;
521
522 xmax = HeapTupleHeaderGetRawXmax(tuple);
523
524 /*
525 * Careful here: even though this tuple was created by our own
526 * transaction, it might be locked by other transactions, if
527 * the original version was key-share locked when we updated
528 * it.
529 */
530
531 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532 {
533 if (MultiXactIdIsRunning(xmax, true))
534 return TM_BeingModified;
535 else
536 return TM_Ok;
537 }
538
539 /*
540 * If the locker is gone, then there is nothing of interest
541 * left in this Xmax; otherwise, report the tuple as
542 * locked/updated.
543 */
544 if (!TransactionIdIsInProgress(xmax))
545 return TM_Ok;
546 return TM_BeingModified;
547 }
548
549 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550 {
551 TransactionId xmax;
552
553 xmax = HeapTupleGetUpdateXid(tuple);
554
555 /* not LOCKED_ONLY, so it has to have an xmax */
557
558 /* deleting subtransaction must have aborted */
560 {
562 false))
563 return TM_BeingModified;
564 return TM_Ok;
565 }
566 else
567 {
568 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569 return TM_SelfModified; /* updated after scan started */
570 else
571 return TM_Invisible; /* updated before scan started */
572 }
573 }
574
576 {
577 /* deleting subtransaction must have aborted */
578 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580 return TM_Ok;
581 }
582
583 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584 return TM_SelfModified; /* updated after scan started */
585 else
586 return TM_Invisible; /* updated before scan started */
587 }
589 return TM_Invisible;
591 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593 else
594 {
595 /* it must have aborted or crashed */
596 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598 return TM_Invisible;
599 }
600 }
601
602 /* by here, the inserting transaction has committed */
603
604 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605 return TM_Ok;
606
607 if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608 {
610 return TM_Ok;
611 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612 return TM_Updated; /* updated by other */
613 else
614 return TM_Deleted; /* deleted by other */
615 }
616
617 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618 {
619 TransactionId xmax;
620
622 return TM_Ok;
623
625 {
627 return TM_BeingModified;
628
630 return TM_Ok;
631 }
632
633 xmax = HeapTupleGetUpdateXid(tuple);
634 if (!TransactionIdIsValid(xmax))
635 {
637 return TM_BeingModified;
638 }
639
640 /* not LOCKED_ONLY, so it has to have an xmax */
642
644 {
645 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646 return TM_SelfModified; /* updated after scan started */
647 else
648 return TM_Invisible; /* updated before scan started */
649 }
650
652 return TM_BeingModified;
653
654 if (TransactionIdDidCommit(xmax))
655 {
656 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657 return TM_Updated;
658 else
659 return TM_Deleted;
660 }
661
662 /*
663 * By here, the update in the Xmax is either aborted or crashed, but
664 * what about the other members?
665 */
666
668 {
669 /*
670 * There's no member, even just a locker, alive anymore, so we can
671 * mark the Xmax as invalid.
672 */
673 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675 return TM_Ok;
676 }
677 else
678 {
679 /* There are lockers running */
680 return TM_BeingModified;
681 }
682 }
683
685 {
687 return TM_BeingModified;
688 if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689 return TM_SelfModified; /* updated after scan started */
690 else
691 return TM_Invisible; /* updated before scan started */
692 }
693
695 return TM_BeingModified;
696
698 {
699 /* it must have aborted or crashed */
700 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702 return TM_Ok;
703 }
704
705 /* xmax transaction committed */
706
708 {
709 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711 return TM_Ok;
712 }
713
714 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716 if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717 return TM_Updated; /* updated by other */
718 else
719 return TM_Deleted; /* deleted by other */
720}
CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:599

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetXvac(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_inplace_lock(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1171 of file heapam_visibility.c.

1173{
1175 HTSV_Result res;
1176
1177 res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1178
1179 if (res == HEAPTUPLE_RECENTLY_DEAD)
1180 {
1181 Assert(TransactionIdIsValid(dead_after));
1182
1183 if (TransactionIdPrecedes(dead_after, OldestXmin))
1184 res = HEAPTUPLE_DEAD;
1185 }
1186 else
1187 Assert(!TransactionIdIsValid(dead_after));
1188
1189 return res;
1190}
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert(), HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_is_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), and tuple_all_visible().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1205 of file heapam_visibility.c.

1206{
1207 HeapTupleHeader tuple = htup->t_data;
1208
1210 Assert(htup->t_tableOid != InvalidOid);
1211 Assert(dead_after != NULL);
1212
1213 *dead_after = InvalidTransactionId;
1214
1215 /*
1216 * Has inserting transaction committed?
1217 *
1218 * If the inserting transaction aborted, then the tuple was never visible
1219 * to any other transaction, so we can delete it immediately.
1220 */
1221 if (!HeapTupleHeaderXminCommitted(tuple))
1222 {
1223 if (HeapTupleHeaderXminInvalid(tuple))
1224 return HEAPTUPLE_DEAD;
1225 /* Used by pre-9.0 binary upgrades */
1226 else if (tuple->t_infomask & HEAP_MOVED_OFF)
1227 {
1229
1232 if (TransactionIdIsInProgress(xvac))
1234 if (TransactionIdDidCommit(xvac))
1235 {
1236 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1238 return HEAPTUPLE_DEAD;
1239 }
1240 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1242 }
1243 /* Used by pre-9.0 binary upgrades */
1244 else if (tuple->t_infomask & HEAP_MOVED_IN)
1245 {
1247
1250 if (TransactionIdIsInProgress(xvac))
1252 if (TransactionIdDidCommit(xvac))
1253 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1255 else
1256 {
1257 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1259 return HEAPTUPLE_DEAD;
1260 }
1261 }
1263 {
1264 if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1266 /* only locked? run infomask-only check first, for performance */
1270 /* inserted and then deleted by same xact */
1273 /* deleting subtransaction must have aborted */
1275 }
1277 {
1278 /*
1279 * It'd be possible to discern between INSERT/DELETE in progress
1280 * here by looking at xmax - but that doesn't seem beneficial for
1281 * the majority of callers and even detrimental for some. We'd
1282 * rather have callers look at/wait for xmin than xmax. It's
1283 * always correct to return INSERT_IN_PROGRESS because that's
1284 * what's happening from the view of other backends.
1285 */
1287 }
1289 SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1291 else
1292 {
1293 /*
1294 * Not in Progress, Not Committed, so either Aborted or crashed
1295 */
1296 SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1298 return HEAPTUPLE_DEAD;
1299 }
1300
1301 /*
1302 * At this point the xmin is known committed, but we might not have
1303 * been able to set the hint bit yet; so we can no longer Assert that
1304 * it's set.
1305 */
1306 }
1307
1308 /*
1309 * Okay, the inserter committed, so it was good at some point. Now what
1310 * about the deleting transaction?
1311 */
1312 if (tuple->t_infomask & HEAP_XMAX_INVALID)
1313 return HEAPTUPLE_LIVE;
1314
1316 {
1317 /*
1318 * "Deleting" xact really only locked it, so the tuple is live in any
1319 * case. However, we should make sure that either XMAX_COMMITTED or
1320 * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1321 * examining the tuple for future xacts.
1322 */
1323 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1324 {
1325 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1326 {
1327 /*
1328 * If it's a pre-pg_upgrade tuple, the multixact cannot
1329 * possibly be running; otherwise have to check.
1330 */
1331 if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1333 true))
1334 return HEAPTUPLE_LIVE;
1336 }
1337 else
1338 {
1340 return HEAPTUPLE_LIVE;
1341 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1343 }
1344 }
1345
1346 /*
1347 * We don't really care whether xmax did commit, abort or crash. We
1348 * know that xmax did lock the tuple, but it did not and will never
1349 * actually update it.
1350 */
1351
1352 return HEAPTUPLE_LIVE;
1353 }
1354
1355 if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1356 {
1358
1359 /* already checked above */
1361
1362 /* not LOCKED_ONLY, so it has to have an xmax */
1364
1365 if (TransactionIdIsInProgress(xmax))
1367 else if (TransactionIdDidCommit(xmax))
1368 {
1369 /*
1370 * The multixact might still be running due to lockers. Need to
1371 * allow for pruning if below the xid horizon regardless --
1372 * otherwise we could end up with a tuple where the updater has to
1373 * be removed due to the horizon, but is not pruned away. It's
1374 * not a problem to prune that tuple, because any remaining
1375 * lockers will also be present in newer tuple versions.
1376 */
1377 *dead_after = xmax;
1379 }
1380 else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1381 {
1382 /*
1383 * Not in Progress, Not Committed, so either Aborted or crashed.
1384 * Mark the Xmax as invalid.
1385 */
1387 }
1388
1389 return HEAPTUPLE_LIVE;
1390 }
1391
1392 if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1393 {
1397 SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1399 else
1400 {
1401 /*
1402 * Not in Progress, Not Committed, so either Aborted or crashed
1403 */
1404 SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1406 return HEAPTUPLE_LIVE;
1407 }
1408
1409 /*
1410 * At this point the xmax is known committed, but we might not have
1411 * been able to set the hint bit yet; so we can no longer Assert that
1412 * it's set.
1413 */
1414 }
1415
1416 /*
1417 * Deleter committed, allow caller to check if it was recent enough that
1418 * some open transactions could still see the tuple.
1419 */
1420 *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1422}

References Assert(), HEAP_LOCKED_UPGRADED(), HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY(), HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax(), HeapTupleHeaderGetRawXmin(), HeapTupleHeaderGetUpdateXid(), HeapTupleHeaderGetXvac(), HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted(), HeapTupleHeaderXminInvalid(), InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1776 of file heapam_visibility.c.

1777{
1778 switch (snapshot->snapshot_type)
1779 {
1780 case SNAPSHOT_MVCC:
1781 return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1782 case SNAPSHOT_SELF:
1783 return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1784 case SNAPSHOT_ANY:
1785 return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1786 case SNAPSHOT_TOAST:
1787 return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1788 case SNAPSHOT_DIRTY:
1789 return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1791 return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1793 return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1794 }
1795
1796 return false; /* keep compiler quiet */
1797}
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:70
@ SNAPSHOT_SELF
Definition: snapshot.h:60
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:114
@ SNAPSHOT_MVCC
Definition: snapshot.h:46
@ SNAPSHOT_ANY
Definition: snapshot.h:65
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:105
@ SNAPSHOT_DIRTY
Definition: snapshot.h:98
SnapshotType snapshot_type
Definition: snapshot.h:140

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by BitmapHeapScanNextBlock(), heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143{
144 SetHintBits(tuple, buffer, infomask, xid);
145}

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2053 of file pruneheap.c.

2061{
2062 xl_heap_prune xlrec;
2063 XLogRecPtr recptr;
2064 uint8 info;
2065
2066 /* The following local variables hold data registered in the WAL record: */
2068 xlhp_freeze_plans freeze_plans;
2069 xlhp_prune_items redirect_items;
2070 xlhp_prune_items dead_items;
2071 xlhp_prune_items unused_items;
2073
2074 xlrec.flags = 0;
2075
2076 /*
2077 * Prepare data for the buffer. The arrays are not actually in the
2078 * buffer, but we pretend that they are. When XLogInsert stores a full
2079 * page image, the arrays can be omitted.
2080 */
2083 if (nfrozen > 0)
2084 {
2085 int nplans;
2086
2088
2089 /*
2090 * Prepare deduplicated representation for use in the WAL record. This
2091 * destructively sorts frozen tuples array in-place.
2092 */
2093 nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2094
2095 freeze_plans.nplans = nplans;
2096 XLogRegisterBufData(0, &freeze_plans,
2097 offsetof(xlhp_freeze_plans, plans));
2098 XLogRegisterBufData(0, plans,
2099 sizeof(xlhp_freeze_plan) * nplans);
2100 }
2101 if (nredirected > 0)
2102 {
2104
2105 redirect_items.ntargets = nredirected;
2106 XLogRegisterBufData(0, &redirect_items,
2107 offsetof(xlhp_prune_items, data));
2108 XLogRegisterBufData(0, redirected,
2109 sizeof(OffsetNumber[2]) * nredirected);
2110 }
2111 if (ndead > 0)
2112 {
2113 xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2114
2115 dead_items.ntargets = ndead;
2116 XLogRegisterBufData(0, &dead_items,
2117 offsetof(xlhp_prune_items, data));
2118 XLogRegisterBufData(0, dead,
2119 sizeof(OffsetNumber) * ndead);
2120 }
2121 if (nunused > 0)
2122 {
2124
2125 unused_items.ntargets = nunused;
2126 XLogRegisterBufData(0, &unused_items,
2127 offsetof(xlhp_prune_items, data));
2128 XLogRegisterBufData(0, unused,
2129 sizeof(OffsetNumber) * nunused);
2130 }
2131 if (nfrozen > 0)
2132 XLogRegisterBufData(0, frz_offsets,
2133 sizeof(OffsetNumber) * nfrozen);
2134
2135 /*
2136 * Prepare the main xl_heap_prune record. We already set the XLHP_HAS_*
2137 * flag above.
2138 */
2140 xlrec.flags |= XLHP_IS_CATALOG_REL;
2141 if (TransactionIdIsValid(conflict_xid))
2143 if (cleanup_lock)
2144 xlrec.flags |= XLHP_CLEANUP_LOCK;
2145 else
2146 {
2147 Assert(nredirected == 0 && ndead == 0);
2148 /* also, any items in 'unused' must've been LP_DEAD previously */
2149 }
2151 if (TransactionIdIsValid(conflict_xid))
2152 XLogRegisterData(&conflict_xid, sizeof(TransactionId));
2153
2154 switch (reason)
2155 {
2156 case PRUNE_ON_ACCESS:
2158 break;
2159 case PRUNE_VACUUM_SCAN:
2161 break;
2164 break;
2165 default:
2166 elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2167 break;
2168 }
2169 recptr = XLogInsert(RM_HEAP2_ID, info);
2170
2171 PageSetLSN(BufferGetPage(buffer), recptr);
2172}
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:317
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:323
#define SizeOfHeapPrune
Definition: heapam_xlog.h:296
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:332
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:61
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:60
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:309
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:331
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:62
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:299
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1978

References Assert(), BufferGetPage(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 2018 of file heapam.c.

2019{
2020 if (bistate->current_buf != InvalidBuffer)
2021 ReleaseBuffer(bistate->current_buf);
2022 bistate->current_buf = InvalidBuffer;
2023
2024 /*
2025 * Despite the name, we also reset bulk relation extension state.
2026 * Otherwise we can end up erroring out due to looking for free space in
2027 * ->next_free of one partition, even though ->next_free was set when
2028 * extending another partition. It could obviously also be bad for
2029 * efficiency to look at existing blocks at offsets from another
2030 * partition, even if we don't error out.
2031 */
2032 bistate->next_free = InvalidBlockNumber;
2033 bistate->last_free = InvalidBlockNumber;
2034}

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5397 of file reorderbuffer.c.

5401{
5404 ForkNumber forkno;
5405 BlockNumber blockno;
5406 bool updated_mapping = false;
5407
5408 /*
5409 * Return unresolved if tuplecid_data is not valid. That's because when
5410 * streaming in-progress transactions we may run into tuples with the CID
5411 * before actually decoding them. Think e.g. about INSERT followed by
5412 * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5413 * INSERT. So in such cases, we assume the CID is from the future
5414 * command.
5415 */
5416 if (tuplecid_data == NULL)
5417 return false;
5418
5419 /* be careful about padding */
5420 memset(&key, 0, sizeof(key));
5421
5422 Assert(!BufferIsLocal(buffer));
5423
5424 /*
5425 * get relfilelocator from the buffer, no convenient way to access it
5426 * other than that.
5427 */
5428 BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5429
5430 /* tuples can only be in the main fork */
5431 Assert(forkno == MAIN_FORKNUM);
5432 Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5433
5434 ItemPointerCopy(&htup->t_self,
5435 &key.tid);
5436
5437restart:
5438 ent = (ReorderBufferTupleCidEnt *)
5440
5441 /*
5442 * failed to find a mapping, check whether the table was rewritten and
5443 * apply mapping if so, but only do that once - there can be no new
5444 * mappings while we are in here since we have to hold a lock on the
5445 * relation.
5446 */
5447 if (ent == NULL && !updated_mapping)
5448 {
5450 /* now check but don't update for a mapping again */
5451 updated_mapping = true;
5452 goto restart;
5453 }
5454 else if (ent == NULL)
5455 return false;
5456
5457 if (cmin)
5458 *cmin = ent->cmin;
5459 if (cmax)
5460 *cmax = ent->cmax;
5461 return true;
5462}
#define BufferIsLocal(buffer)
Definition: buf.h:37
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_FIND
Definition: hsearch.h:113
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:162

References Assert(), BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
ItemPointer  tid 
)

Definition at line 3166 of file heapam.c.

3167{
3168 TM_Result result;
3169 TM_FailureData tmfd;
3170
3171 result = heap_delete(relation, tid,
3173 true /* wait for commit */ ,
3174 &tmfd, false /* changingPart */ );
3175 switch (result)
3176 {
3177 case TM_SelfModified:
3178 /* Tuple was already updated in current command? */
3179 elog(ERROR, "tuple already updated by self");
3180 break;
3181
3182 case TM_Ok:
3183 /* done successfully */
3184 break;
3185
3186 case TM_Updated:
3187 elog(ERROR, "tuple concurrently updated");
3188 break;
3189
3190 case TM_Deleted:
3191 elog(ERROR, "tuple concurrently deleted");
3192 break;
3193
3194 default:
3195 elog(ERROR, "unrecognized heap_delete status: %u", result);
3196 break;
3197 }
3198}
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2745

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2687 of file heapam.c.

2688{
2689 heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2690}
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:2056

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4454 of file heapam.c.

4456{
4457 TM_Result result;
4458 TM_FailureData tmfd;
4459 LockTupleMode lockmode;
4460
4461 result = heap_update(relation, otid, tup,
4463 true /* wait for commit */ ,
4464 &tmfd, &lockmode, update_indexes);
4465 switch (result)
4466 {
4467 case TM_SelfModified:
4468 /* Tuple was already updated in current command? */
4469 elog(ERROR, "tuple already updated by self");
4470 break;
4471
4472 case TM_Ok:
4473 /* done successfully */
4474 break;
4475
4476 case TM_Updated:
4477 elog(ERROR, "tuple concurrently updated");
4478 break;
4479
4480 case TM_Deleted:
4481 elog(ERROR, "tuple concurrently deleted");
4482 break;
4483
4484 default:
4485 elog(ERROR, "unrecognized heap_update status: %u", result);
4486 break;
4487 }
4488}
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3212

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().