PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool lp_truncate_only, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 137 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 136 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 38 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 42 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 41 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 287 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 48 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 44 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 109 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 123 of file heapam.h.

124 {
125  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
126  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
127  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
128  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
129  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
130 } HTSV_Result;
HTSV_Result
Definition: heapam.h:124
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:127
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_LIVE
Definition: heapam.h:126
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_DEAD
Definition: heapam.h:125

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 267 of file heapam.h.

268 {
269  PRUNE_ON_ACCESS, /* on-access pruning */
270  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
271  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
272 } PruneReason;
PruneReason
Definition: heapam.h:268
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:271
@ PRUNE_ON_ACCESS
Definition: heapam.h:269
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:270

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1940 of file heapam.c.

1941 {
1942  if (bistate->current_buf != InvalidBuffer)
1943  ReleaseBuffer(bistate->current_buf);
1944  FreeAccessStrategy(bistate->strategy);
1945  pfree(bistate);
1946 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4850
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1520
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), moveMergedTablesRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1923 of file heapam.c.

1924 {
1925  BulkInsertState bistate;
1926 
1927  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1929  bistate->current_buf = InvalidBuffer;
1930  bistate->next_free = InvalidBlockNumber;
1931  bistate->last_free = InvalidBlockNumber;
1932  bistate->already_extended_by = 0;
1933  return bistate;
1934 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:44
void * palloc(Size size)
Definition: mcxt.c:1316
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), moveMergedTablesRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5902 of file heapam.c.

5903 {
5905  ItemId lp;
5906  HeapTupleData tp;
5907  Page page;
5908  BlockNumber block;
5909  Buffer buffer;
5910  TransactionId prune_xid;
5911 
5912  Assert(ItemPointerIsValid(tid));
5913 
5914  block = ItemPointerGetBlockNumber(tid);
5915  buffer = ReadBuffer(relation, block);
5916  page = BufferGetPage(buffer);
5917 
5919 
5920  /*
5921  * Page can't be all visible, we just inserted into it, and are still
5922  * running.
5923  */
5924  Assert(!PageIsAllVisible(page));
5925 
5926  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5927  Assert(ItemIdIsNormal(lp));
5928 
5929  tp.t_tableOid = RelationGetRelid(relation);
5930  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5931  tp.t_len = ItemIdGetLength(lp);
5932  tp.t_self = *tid;
5933 
5934  /*
5935  * Sanity check that the tuple really is a speculatively inserted tuple,
5936  * inserted by us.
5937  */
5938  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5939  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5940  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5941  elog(ERROR, "attempted to kill a non-speculative tuple");
5943 
5944  /*
5945  * No need to check for serializable conflicts here. There is never a
5946  * need for a combo CID, either. No need to extract replica identity, or
5947  * do anything special with infomask bits.
5948  */
5949 
5951 
5952  /*
5953  * The tuple will become DEAD immediately. Flag that this page is a
5954  * candidate for pruning by setting xmin to TransactionXmin. While not
5955  * immediately prunable, it is the oldest xid we can cheaply determine
5956  * that's safe against wraparound / being older than the table's
5957  * relfrozenxid. To defend against the unlikely case of a new relation
5958  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5959  * if so (vacuum can't subsequently move relfrozenxid to beyond
5960  * TransactionXmin, so there's no race here).
5961  */
5963  if (TransactionIdPrecedes(TransactionXmin, relation->rd_rel->relfrozenxid))
5964  prune_xid = relation->rd_rel->relfrozenxid;
5965  else
5966  prune_xid = TransactionXmin;
5967  PageSetPrunable(page, prune_xid);
5968 
5969  /* store transaction information of xact deleting the tuple */
5972 
5973  /*
5974  * Set the tuple header xmin to InvalidTransactionId. This makes the
5975  * tuple immediately invisible everyone. (In particular, to any
5976  * transactions waiting on the speculative token, woken up later.)
5977  */
5979 
5980  /* Clear the speculative insertion token too */
5981  tp.t_data->t_ctid = tp.t_self;
5982 
5983  MarkBufferDirty(buffer);
5984 
5985  /*
5986  * XLOG stuff
5987  *
5988  * The WAL records generated here match heap_delete(). The same recovery
5989  * routines are used.
5990  */
5991  if (RelationNeedsWAL(relation))
5992  {
5993  xl_heap_delete xlrec;
5994  XLogRecPtr recptr;
5995 
5996  xlrec.flags = XLH_DELETE_IS_SUPER;
5998  tp.t_data->t_infomask2);
6000  xlrec.xmax = xid;
6001 
6002  XLogBeginInsert();
6003  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6004  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6005 
6006  /* No replica identity & replication origin logged */
6007 
6008  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6009 
6010  PageSetLSN(page, recptr);
6011  }
6012 
6013  END_CRIT_SECTION();
6014 
6015  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6016 
6017  if (HeapTupleHasExternal(&tp))
6018  {
6019  Assert(!IsToastRelation(relation));
6020  heap_toast_delete(relation, &tp, true);
6021  }
6022 
6023  /*
6024  * Never need to mark tuple for invalidation, since catalogs don't support
6025  * speculative insertion
6026  */
6027 
6028  /* Now we can release the buffer */
6029  ReleaseBuffer(buffer);
6030 
6031  /* count deletion, as we counted the insertion too */
6032  pgstat_count_heap_delete(relation);
6033 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2474
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5085
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:197
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:408
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:199
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageSetPrunable(page, xid)
Definition: bufpage.h:444
#define Assert(condition)
Definition: c.h:858
uint32 TransactionId
Definition: c.h:652
bool IsToastRelation(Relation relation)
Definition: catalog.c:145
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2634
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:120
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:104
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
union HeapTupleHeaderData::@48 t_choice
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:114
OffsetNumber offnum
Definition: heapam_xlog.h:115
uint8 infobits_set
Definition: heapam_xlog.h:116
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:451
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1037 of file heapam.c.

1041 {
1042  HeapScanDesc scan;
1043 
1044  /*
1045  * increment relation ref count while scanning relation
1046  *
1047  * This is just to make really sure the relcache entry won't go away while
1048  * the scan has a pointer to it. Caller should be holding the rel open
1049  * anyway, so this is redundant in all normal scenarios...
1050  */
1052 
1053  /*
1054  * allocate and initialize scan descriptor
1055  */
1056  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1057 
1058  scan->rs_base.rs_rd = relation;
1059  scan->rs_base.rs_snapshot = snapshot;
1060  scan->rs_base.rs_nkeys = nkeys;
1061  scan->rs_base.rs_flags = flags;
1062  scan->rs_base.rs_parallel = parallel_scan;
1063  scan->rs_strategy = NULL; /* set in initscan */
1064  scan->rs_vmbuffer = InvalidBuffer;
1065  scan->rs_empty_tuples_pending = 0;
1066 
1067  /*
1068  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1069  */
1070  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1072 
1073  /*
1074  * For seqscan and sample scans in a serializable transaction, acquire a
1075  * predicate lock on the entire relation. This is required not only to
1076  * lock all the matching tuples, but also to conflict with new insertions
1077  * into the table. In an indexscan, we take page locks on the index pages
1078  * covering the range specified in the scan qual, but in a heap scan there
1079  * is nothing more fine-grained to lock. A bitmap scan is a different
1080  * story, there we have already scanned the index and locked the index
1081  * pages covering the predicate. But in that case we still have to lock
1082  * any matching heap tuples. For sample scan we could optimize the locking
1083  * to be at least page-level granularity, but we'd need to add per-tuple
1084  * locking for that.
1085  */
1087  {
1088  /*
1089  * Ensure a missing snapshot is noticed reliably, even if the
1090  * isolation mode means predicate locking isn't performed (and
1091  * therefore the snapshot isn't used here).
1092  */
1093  Assert(snapshot);
1094  PredicateLockRelation(relation, snapshot);
1095  }
1096 
1097  /* we only need to set this up once */
1098  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1099 
1100  /*
1101  * Allocate memory to keep track of page allocation for parallel workers
1102  * when doing a parallel scan.
1103  */
1104  if (parallel_scan != NULL)
1106  else
1107  scan->rs_parallelworkerdata = NULL;
1108 
1109  /*
1110  * we do this here instead of in initscan() because heap_rescan also calls
1111  * initscan() and we don't want to allocate memory again
1112  */
1113  if (nkeys > 0)
1114  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1115  else
1116  scan->rs_base.rs_key = NULL;
1117 
1118  initscan(scan, key, false);
1119 
1120  scan->rs_read_stream = NULL;
1121 
1122  /*
1123  * Set up a read stream for sequential scans and TID range scans. This
1124  * should be done after initscan() because initscan() allocates the
1125  * BufferAccessStrategy object passed to the read stream API.
1126  */
1127  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1129  {
1131 
1132  if (scan->rs_base.rs_parallel)
1134  else
1136 
1138  scan->rs_strategy,
1139  scan->rs_base.rs_rd,
1140  MAIN_FORKNUM,
1141  cb,
1142  scan,
1143  0);
1144  }
1145 
1146 
1147  return (TableScanDesc) scan;
1148 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:231
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:269
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:293
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:109
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2561
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:410
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:48
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:35
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2160
@ MAIN_FORKNUM
Definition: relpath.h:50
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
Buffer rs_vmbuffer
Definition: heapam.h:101
BufferAccessStrategy rs_strategy
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:92
HeapTupleData rs_ctup
Definition: heapam.h:72
ReadStream * rs_read_stream
Definition: heapam.h:75
int rs_empty_tuples_pending
Definition: heapam.h:102
TableScanDescData rs_base
Definition: heapam.h:55
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2679 of file heapam.c.

2682 {
2683  TM_Result result;
2685  ItemId lp;
2686  HeapTupleData tp;
2687  Page page;
2688  BlockNumber block;
2689  Buffer buffer;
2690  Buffer vmbuffer = InvalidBuffer;
2691  TransactionId new_xmax;
2692  uint16 new_infomask,
2693  new_infomask2;
2694  bool have_tuple_lock = false;
2695  bool iscombo;
2696  bool all_visible_cleared = false;
2697  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2698  bool old_key_copied = false;
2699 
2700  Assert(ItemPointerIsValid(tid));
2701 
2702  /*
2703  * Forbid this during a parallel operation, lest it allocate a combo CID.
2704  * Other workers might need that combo CID for visibility checks, and we
2705  * have no provision for broadcasting it to them.
2706  */
2707  if (IsInParallelMode())
2708  ereport(ERROR,
2709  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2710  errmsg("cannot delete tuples during a parallel operation")));
2711 
2712  block = ItemPointerGetBlockNumber(tid);
2713  buffer = ReadBuffer(relation, block);
2714  page = BufferGetPage(buffer);
2715 
2716  /*
2717  * Before locking the buffer, pin the visibility map page if it appears to
2718  * be necessary. Since we haven't got the lock yet, someone else might be
2719  * in the middle of changing this, so we'll need to recheck after we have
2720  * the lock.
2721  */
2722  if (PageIsAllVisible(page))
2723  visibilitymap_pin(relation, block, &vmbuffer);
2724 
2726 
2727  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2728  Assert(ItemIdIsNormal(lp));
2729 
2730  tp.t_tableOid = RelationGetRelid(relation);
2731  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2732  tp.t_len = ItemIdGetLength(lp);
2733  tp.t_self = *tid;
2734 
2735 l1:
2736 
2737  /*
2738  * If we didn't pin the visibility map page and the page has become all
2739  * visible while we were busy locking the buffer, we'll have to unlock and
2740  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2741  * unfortunate, but hopefully shouldn't happen often.
2742  */
2743  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2744  {
2745  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2746  visibilitymap_pin(relation, block, &vmbuffer);
2748  }
2749 
2750  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2751 
2752  if (result == TM_Invisible)
2753  {
2754  UnlockReleaseBuffer(buffer);
2755  ereport(ERROR,
2756  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2757  errmsg("attempted to delete invisible tuple")));
2758  }
2759  else if (result == TM_BeingModified && wait)
2760  {
2761  TransactionId xwait;
2762  uint16 infomask;
2763 
2764  /* must copy state data before unlocking buffer */
2765  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2766  infomask = tp.t_data->t_infomask;
2767 
2768  /*
2769  * Sleep until concurrent transaction ends -- except when there's a
2770  * single locker and it's our own transaction. Note we don't care
2771  * which lock mode the locker has, because we need the strongest one.
2772  *
2773  * Before sleeping, we need to acquire tuple lock to establish our
2774  * priority for the tuple (see heap_lock_tuple). LockTuple will
2775  * release us when we are next-in-line for the tuple.
2776  *
2777  * If we are forced to "start over" below, we keep the tuple lock;
2778  * this arranges that we stay at the head of the line while rechecking
2779  * tuple state.
2780  */
2781  if (infomask & HEAP_XMAX_IS_MULTI)
2782  {
2783  bool current_is_member = false;
2784 
2785  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2786  LockTupleExclusive, &current_is_member))
2787  {
2788  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2789 
2790  /*
2791  * Acquire the lock, if necessary (but skip it when we're
2792  * requesting a lock and already have one; avoids deadlock).
2793  */
2794  if (!current_is_member)
2796  LockWaitBlock, &have_tuple_lock);
2797 
2798  /* wait for multixact */
2800  relation, &(tp.t_self), XLTW_Delete,
2801  NULL);
2803 
2804  /*
2805  * If xwait had just locked the tuple then some other xact
2806  * could update this tuple before we get to this point. Check
2807  * for xmax change, and start over if so.
2808  *
2809  * We also must start over if we didn't pin the VM page, and
2810  * the page has become all visible.
2811  */
2812  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2813  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2815  xwait))
2816  goto l1;
2817  }
2818 
2819  /*
2820  * You might think the multixact is necessarily done here, but not
2821  * so: it could have surviving members, namely our own xact or
2822  * other subxacts of this backend. It is legal for us to delete
2823  * the tuple in either case, however (the latter case is
2824  * essentially a situation of upgrading our former shared lock to
2825  * exclusive). We don't bother changing the on-disk hint bits
2826  * since we are about to overwrite the xmax altogether.
2827  */
2828  }
2829  else if (!TransactionIdIsCurrentTransactionId(xwait))
2830  {
2831  /*
2832  * Wait for regular transaction to end; but first, acquire tuple
2833  * lock.
2834  */
2835  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2837  LockWaitBlock, &have_tuple_lock);
2838  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2840 
2841  /*
2842  * xwait is done, but if xwait had just locked the tuple then some
2843  * other xact could update this tuple before we get to this point.
2844  * Check for xmax change, and start over if so.
2845  *
2846  * We also must start over if we didn't pin the VM page, and the
2847  * page has become all visible.
2848  */
2849  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2850  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2852  xwait))
2853  goto l1;
2854 
2855  /* Otherwise check if it committed or aborted */
2856  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2857  }
2858 
2859  /*
2860  * We may overwrite if previous xmax aborted, or if it committed but
2861  * only locked the tuple without updating it.
2862  */
2863  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2866  result = TM_Ok;
2867  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2868  result = TM_Updated;
2869  else
2870  result = TM_Deleted;
2871  }
2872 
2873  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2874  if (result != TM_Ok)
2875  {
2876  Assert(result == TM_SelfModified ||
2877  result == TM_Updated ||
2878  result == TM_Deleted ||
2879  result == TM_BeingModified);
2881  Assert(result != TM_Updated ||
2882  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2883  }
2884 
2885  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2886  {
2887  /* Perform additional check for transaction-snapshot mode RI updates */
2888  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2889  result = TM_Updated;
2890  }
2891 
2892  if (result != TM_Ok)
2893  {
2894  tmfd->ctid = tp.t_data->t_ctid;
2896  if (result == TM_SelfModified)
2897  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2898  else
2899  tmfd->cmax = InvalidCommandId;
2900  UnlockReleaseBuffer(buffer);
2901  if (have_tuple_lock)
2902  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2903  if (vmbuffer != InvalidBuffer)
2904  ReleaseBuffer(vmbuffer);
2905  return result;
2906  }
2907 
2908  /*
2909  * We're about to do the actual delete -- check for conflict first, to
2910  * avoid possibly having to roll back work we've just done.
2911  *
2912  * This is safe without a recheck as long as there is no possibility of
2913  * another process scanning the page between this check and the delete
2914  * being visible to the scan (i.e., an exclusive buffer content lock is
2915  * continuously held from this point until the tuple delete is visible).
2916  */
2917  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2918 
2919  /* replace cid with a combo CID if necessary */
2920  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2921 
2922  /*
2923  * Compute replica identity tuple before entering the critical section so
2924  * we don't PANIC upon a memory allocation failure.
2925  */
2926  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2927 
2928  /*
2929  * If this is the first possibly-multixact-able operation in the current
2930  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2931  * certain that the transaction will never become a member of any older
2932  * MultiXactIds than that. (We have to do this even if we end up just
2933  * using our own TransactionId below, since some other backend could
2934  * incorporate our XID into a MultiXact immediately afterwards.)
2935  */
2937 
2940  xid, LockTupleExclusive, true,
2941  &new_xmax, &new_infomask, &new_infomask2);
2942 
2944 
2945  /*
2946  * If this transaction commits, the tuple will become DEAD sooner or
2947  * later. Set flag that this page is a candidate for pruning once our xid
2948  * falls below the OldestXmin horizon. If the transaction finally aborts,
2949  * the subsequent page pruning will be a no-op and the hint will be
2950  * cleared.
2951  */
2952  PageSetPrunable(page, xid);
2953 
2954  if (PageIsAllVisible(page))
2955  {
2956  all_visible_cleared = true;
2957  PageClearAllVisible(page);
2958  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2959  vmbuffer, VISIBILITYMAP_VALID_BITS);
2960  }
2961 
2962  /* store transaction information of xact deleting the tuple */
2965  tp.t_data->t_infomask |= new_infomask;
2966  tp.t_data->t_infomask2 |= new_infomask2;
2968  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2969  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2970  /* Make sure there is no forward chain link in t_ctid */
2971  tp.t_data->t_ctid = tp.t_self;
2972 
2973  /* Signal that this is actually a move into another partition */
2974  if (changingPart)
2976 
2977  MarkBufferDirty(buffer);
2978 
2979  /*
2980  * XLOG stuff
2981  *
2982  * NB: heap_abort_speculative() uses the same xlog record and replay
2983  * routines.
2984  */
2985  if (RelationNeedsWAL(relation))
2986  {
2987  xl_heap_delete xlrec;
2988  xl_heap_header xlhdr;
2989  XLogRecPtr recptr;
2990 
2991  /*
2992  * For logical decode we need combo CIDs to properly decode the
2993  * catalog
2994  */
2996  log_heap_new_cid(relation, &tp);
2997 
2998  xlrec.flags = 0;
2999  if (all_visible_cleared)
3001  if (changingPart)
3004  tp.t_data->t_infomask2);
3006  xlrec.xmax = new_xmax;
3007 
3008  if (old_key_tuple != NULL)
3009  {
3010  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3012  else
3014  }
3015 
3016  XLogBeginInsert();
3017  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3018 
3019  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3020 
3021  /*
3022  * Log replica identity of the deleted tuple if there is one
3023  */
3024  if (old_key_tuple != NULL)
3025  {
3026  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3027  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3028  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3029 
3030  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3031  XLogRegisterData((char *) old_key_tuple->t_data
3033  old_key_tuple->t_len
3035  }
3036 
3037  /* filtering by origin on a row level is much more efficient */
3039 
3040  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3041 
3042  PageSetLSN(page, recptr);
3043  }
3044 
3045  END_CRIT_SECTION();
3046 
3047  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3048 
3049  if (vmbuffer != InvalidBuffer)
3050  ReleaseBuffer(vmbuffer);
3051 
3052  /*
3053  * If the tuple has toasted out-of-line attributes, we need to delete
3054  * those items too. We have to do this before releasing the buffer
3055  * because we need to look at the contents of the tuple, but it's OK to
3056  * release the content lock on the buffer first.
3057  */
3058  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3059  relation->rd_rel->relkind != RELKIND_MATVIEW)
3060  {
3061  /* toast table entries should never be recursively toasted */
3063  }
3064  else if (HeapTupleHasExternal(&tp))
3065  heap_toast_delete(relation, &tp, false);
3066 
3067  /*
3068  * Mark tuple for invalidation from system caches at next command
3069  * boundary. We have to do this before releasing the buffer because we
3070  * need to look at the contents of the tuple.
3071  */
3072  CacheInvalidateHeapTuple(relation, &tp, NULL);
3073 
3074  /* Now we can release the buffer */
3075  ReleaseBuffer(buffer);
3076 
3077  /*
3078  * Release the lmgr tuple lock, if we had it.
3079  */
3080  if (have_tuple_lock)
3081  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3082 
3083  pgstat_count_heap_delete(relation);
3084 
3085  if (old_key_tuple != NULL && old_key_copied)
3086  heap_freetuple(old_key_tuple);
3087 
3088  return TM_Ok;
3089 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3667
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4867
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define InvalidCommandId
Definition: c.h:669
unsigned short uint16
Definition: c.h:505
TransactionId MultiXactId
Definition: c.h:662
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7106
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8570
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5053
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8651
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5004
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7283
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2656
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:169
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1901
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:103
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:101
#define SizeOfHeapHeader
Definition: heapam_xlog.h:156
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:105
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:102
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1204
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:667
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:631
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4321
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
uint16 t_infomask
Definition: heapam_xlog.h:152
uint16 t_infomask2
Definition: heapam_xlog.h:151
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:938
bool IsInParallelMode(void)
Definition: xact.c:1086
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:152
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1204 of file heapam.c.

1205 {
1206  HeapScanDesc scan = (HeapScanDesc) sscan;
1207 
1208  /* Note: no locking manipulations needed */
1209 
1210  /*
1211  * unpin scan buffers
1212  */
1213  if (BufferIsValid(scan->rs_cbuf))
1214  ReleaseBuffer(scan->rs_cbuf);
1215 
1216  if (BufferIsValid(scan->rs_vmbuffer))
1217  ReleaseBuffer(scan->rs_vmbuffer);
1218 
1219  Assert(scan->rs_empty_tuples_pending == 0);
1220 
1221  /*
1222  * Must free the read stream before freeing the BufferAccessStrategy.
1223  */
1224  if (scan->rs_read_stream)
1226 
1227  /*
1228  * decrement relation reference count and free scan descriptor storage
1229  */
1231 
1232  if (scan->rs_base.rs_key)
1233  pfree(scan->rs_base.rs_key);
1234 
1235  if (scan->rs_strategy != NULL)
1237 
1238  if (scan->rs_parallelworkerdata != NULL)
1240 
1241  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1243 
1244  pfree(scan);
1245 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:359
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:800
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2173
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:67
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References Assert, BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1507 of file heapam.c.

1512 {
1513  ItemPointer tid = &(tuple->t_self);
1514  ItemId lp;
1515  Buffer buffer;
1516  Page page;
1517  OffsetNumber offnum;
1518  bool valid;
1519 
1520  /*
1521  * Fetch and pin the appropriate page of the relation.
1522  */
1523  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1524 
1525  /*
1526  * Need share lock on buffer to examine tuple commit status.
1527  */
1528  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1529  page = BufferGetPage(buffer);
1530 
1531  /*
1532  * We'd better check for out-of-range offnum in case of VACUUM since the
1533  * TID was obtained.
1534  */
1535  offnum = ItemPointerGetOffsetNumber(tid);
1536  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1537  {
1538  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1539  ReleaseBuffer(buffer);
1540  *userbuf = InvalidBuffer;
1541  tuple->t_data = NULL;
1542  return false;
1543  }
1544 
1545  /*
1546  * get the item line pointer corresponding to the requested tid
1547  */
1548  lp = PageGetItemId(page, offnum);
1549 
1550  /*
1551  * Must check for deleted tuple.
1552  */
1553  if (!ItemIdIsNormal(lp))
1554  {
1555  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1556  ReleaseBuffer(buffer);
1557  *userbuf = InvalidBuffer;
1558  tuple->t_data = NULL;
1559  return false;
1560  }
1561 
1562  /*
1563  * fill in *tuple fields
1564  */
1565  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1566  tuple->t_len = ItemIdGetLength(lp);
1567  tuple->t_tableOid = RelationGetRelid(relation);
1568 
1569  /*
1570  * check tuple visibility, then release lock
1571  */
1572  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1573 
1574  if (valid)
1575  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1576  HeapTupleHeaderGetXmin(tuple->t_data));
1577 
1578  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1579 
1580  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1581 
1582  if (valid)
1583  {
1584  /*
1585  * All checks passed, so return the tuple as valid. Caller is now
1586  * responsible for releasing the buffer.
1587  */
1588  *userbuf = buffer;
1589 
1590  return true;
1591  }
1592 
1593  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1594  if (keep_buf)
1595  *userbuf = buffer;
1596  else
1597  {
1598  ReleaseBuffer(buffer);
1599  *userbuf = InvalidBuffer;
1600  tuple->t_data = NULL;
1601  }
1602 
1603  return false;
1604 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:198
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10052
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2606

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5815 of file heapam.c.

5816 {
5817  Buffer buffer;
5818  Page page;
5819  OffsetNumber offnum;
5820  ItemId lp = NULL;
5821  HeapTupleHeader htup;
5822 
5823  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5825  page = (Page) BufferGetPage(buffer);
5826 
5827  offnum = ItemPointerGetOffsetNumber(tid);
5828  if (PageGetMaxOffsetNumber(page) >= offnum)
5829  lp = PageGetItemId(page, offnum);
5830 
5831  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5832  elog(ERROR, "invalid lp");
5833 
5834  htup = (HeapTupleHeader) PageGetItem(page, lp);
5835 
5836  /* NO EREPORT(ERROR) from here till changes are logged */
5838 
5840 
5841  MarkBufferDirty(buffer);
5842 
5843  /*
5844  * Replace the speculative insertion token with a real t_ctid, pointing to
5845  * itself like it does on regular tuples.
5846  */
5847  htup->t_ctid = *tid;
5848 
5849  /* XLOG stuff */
5850  if (RelationNeedsWAL(relation))
5851  {
5852  xl_heap_confirm xlrec;
5853  XLogRecPtr recptr;
5854 
5855  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5856 
5857  XLogBeginInsert();
5858 
5859  /* We want the same filtering on this as on a plain insert */
5861 
5862  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5863  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5864 
5865  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5866 
5867  PageSetLSN(page, recptr);
5868  }
5869 
5870  END_CRIT_SECTION();
5871 
5872  UnlockReleaseBuffer(buffer);
5873 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:422
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:419

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6891 of file heapam.c.

6892 {
6893  Page page = BufferGetPage(buffer);
6894 
6895  for (int i = 0; i < ntuples; i++)
6896  {
6897  HeapTupleFreeze *frz = tuples + i;
6898  ItemId itemid = PageGetItemId(page, frz->offset);
6899  HeapTupleHeader htup;
6900 
6901  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6902  heap_execute_freeze_tuple(htup, frz);
6903  }
6904 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6815
int i
Definition: isn.c:73
OffsetNumber offset
Definition: heapam.h:151

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6913 of file heapam.c.

6916 {
6917  HeapTupleFreeze frz;
6918  bool do_freeze;
6919  bool totally_frozen;
6920  struct VacuumCutoffs cutoffs;
6921  HeapPageFreeze pagefrz;
6922 
6923  cutoffs.relfrozenxid = relfrozenxid;
6924  cutoffs.relminmxid = relminmxid;
6925  cutoffs.OldestXmin = FreezeLimit;
6926  cutoffs.OldestMxact = MultiXactCutoff;
6927  cutoffs.FreezeLimit = FreezeLimit;
6928  cutoffs.MultiXactCutoff = MultiXactCutoff;
6929 
6930  pagefrz.freeze_required = true;
6931  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6932  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6933  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6934  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6935 
6936  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6937  &pagefrz, &frz, &totally_frozen);
6938 
6939  /*
6940  * Note that because this is not a WAL-logged operation, we don't need to
6941  * fill in the offset in the freeze record.
6942  */
6943 
6944  if (do_freeze)
6945  heap_execute_freeze_tuple(tuple, &frz);
6946  return do_freeze;
6947 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6541
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1779 of file heapam.c.

1781 {
1782  Relation relation = sscan->rs_rd;
1783  Snapshot snapshot = sscan->rs_snapshot;
1784  ItemPointerData ctid;
1785  TransactionId priorXmax;
1786 
1787  /*
1788  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1789  * Assume that t_ctid links are valid however - there shouldn't be invalid
1790  * ones in the table.
1791  */
1792  Assert(ItemPointerIsValid(tid));
1793 
1794  /*
1795  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1796  * need to examine, and *tid is the TID we will return if ctid turns out
1797  * to be bogus.
1798  *
1799  * Note that we will loop until we reach the end of the t_ctid chain.
1800  * Depending on the snapshot passed, there might be at most one visible
1801  * version of the row, but we don't try to optimize for that.
1802  */
1803  ctid = *tid;
1804  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1805  for (;;)
1806  {
1807  Buffer buffer;
1808  Page page;
1809  OffsetNumber offnum;
1810  ItemId lp;
1811  HeapTupleData tp;
1812  bool valid;
1813 
1814  /*
1815  * Read, pin, and lock the page.
1816  */
1817  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1818  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1819  page = BufferGetPage(buffer);
1820 
1821  /*
1822  * Check for bogus item number. This is not treated as an error
1823  * condition because it can happen while following a t_ctid link. We
1824  * just assume that the prior tid is OK and return it unchanged.
1825  */
1826  offnum = ItemPointerGetOffsetNumber(&ctid);
1827  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1828  {
1829  UnlockReleaseBuffer(buffer);
1830  break;
1831  }
1832  lp = PageGetItemId(page, offnum);
1833  if (!ItemIdIsNormal(lp))
1834  {
1835  UnlockReleaseBuffer(buffer);
1836  break;
1837  }
1838 
1839  /* OK to access the tuple */
1840  tp.t_self = ctid;
1841  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1842  tp.t_len = ItemIdGetLength(lp);
1843  tp.t_tableOid = RelationGetRelid(relation);
1844 
1845  /*
1846  * After following a t_ctid link, we might arrive at an unrelated
1847  * tuple. Check for XMIN match.
1848  */
1849  if (TransactionIdIsValid(priorXmax) &&
1851  {
1852  UnlockReleaseBuffer(buffer);
1853  break;
1854  }
1855 
1856  /*
1857  * Check tuple visibility; if visible, set it as the new result
1858  * candidate.
1859  */
1860  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1861  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1862  if (valid)
1863  *tid = ctid;
1864 
1865  /*
1866  * If there's a valid t_ctid link, follow it, else we're done.
1867  */
1868  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1872  {
1873  UnlockReleaseBuffer(buffer);
1874  break;
1875  }
1876 
1877  ctid = tp.t_data->t_ctid;
1878  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1879  UnlockReleaseBuffer(buffer);
1880  } /* end of loop */
1881 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1764 of file pruneheap.c.

1765 {
1766  OffsetNumber offnum,
1767  maxoff;
1768 
1769  MemSet(root_offsets, InvalidOffsetNumber,
1771 
1772  maxoff = PageGetMaxOffsetNumber(page);
1773  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1774  {
1775  ItemId lp = PageGetItemId(page, offnum);
1776  HeapTupleHeader htup;
1777  OffsetNumber nextoffnum;
1778  TransactionId priorXmax;
1779 
1780  /* skip unused and dead items */
1781  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1782  continue;
1783 
1784  if (ItemIdIsNormal(lp))
1785  {
1786  htup = (HeapTupleHeader) PageGetItem(page, lp);
1787 
1788  /*
1789  * Check if this tuple is part of a HOT-chain rooted at some other
1790  * tuple. If so, skip it for now; we'll process it when we find
1791  * its root.
1792  */
1793  if (HeapTupleHeaderIsHeapOnly(htup))
1794  continue;
1795 
1796  /*
1797  * This is either a plain tuple or the root of a HOT-chain.
1798  * Remember it in the mapping.
1799  */
1800  root_offsets[offnum - 1] = offnum;
1801 
1802  /* If it's not the start of a HOT-chain, we're done with it */
1803  if (!HeapTupleHeaderIsHotUpdated(htup))
1804  continue;
1805 
1806  /* Set up to scan the HOT-chain */
1807  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1808  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1809  }
1810  else
1811  {
1812  /* Must be a redirect item. We do not set its root_offsets entry */
1814  /* Set up to scan the HOT-chain */
1815  nextoffnum = ItemIdGetRedirect(lp);
1816  priorXmax = InvalidTransactionId;
1817  }
1818 
1819  /*
1820  * Now follow the HOT-chain and collect other tuples in the chain.
1821  *
1822  * Note: Even though this is a nested loop, the complexity of the
1823  * function is O(N) because a tuple in the page should be visited not
1824  * more than twice, once in the outer loop and once in HOT-chain
1825  * chases.
1826  */
1827  for (;;)
1828  {
1829  /* Sanity check (pure paranoia) */
1830  if (offnum < FirstOffsetNumber)
1831  break;
1832 
1833  /*
1834  * An offset past the end of page's line pointer array is possible
1835  * when the array was truncated
1836  */
1837  if (offnum > maxoff)
1838  break;
1839 
1840  lp = PageGetItemId(page, nextoffnum);
1841 
1842  /* Check for broken chains */
1843  if (!ItemIdIsNormal(lp))
1844  break;
1845 
1846  htup = (HeapTupleHeader) PageGetItem(page, lp);
1847 
1848  if (TransactionIdIsValid(priorXmax) &&
1849  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1850  break;
1851 
1852  /* Remember the root line pointer for this item */
1853  root_offsets[nextoffnum - 1] = offnum;
1854 
1855  /* Advance to next chain member, if any */
1856  if (!HeapTupleHeaderIsHotUpdated(htup))
1857  break;
1858 
1859  /* HOT implies it can't have moved to different partition */
1861 
1862  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1863  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1864  }
1865  }
1866 }
#define MemSet(start, val, len)
Definition: c.h:1020
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1248 of file heapam.c.

1249 {
1250  HeapScanDesc scan = (HeapScanDesc) sscan;
1251 
1252  /*
1253  * This is still widely used directly, without going through table AM, so
1254  * add a safety check. It's possible we should, at a later point,
1255  * downgrade this to an assert. The reason for checking the AM routine,
1256  * rather than the AM oid, is that this allows to write regression tests
1257  * that create another AM reusing the heap handler.
1258  */
1260  ereport(ERROR,
1261  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1262  errmsg_internal("only heap AM is supported")));
1263 
1264  /*
1265  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1266  * for catalog or regular tables. See detailed comments in xact.c where
1267  * these variables are declared. Normally we have such a check at tableam
1268  * level API but this is called from many places so we need to ensure it
1269  * here.
1270  */
1272  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1273 
1274  /* Note: no locking manipulations needed */
1275 
1276  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1277  heapgettup_pagemode(scan, direction,
1278  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1279  else
1280  heapgettup(scan, direction,
1281  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1282 
1283  if (scan->rs_ctup.t_data == NULL)
1284  return NULL;
1285 
1286  /*
1287  * if we get here it means we have a new current scan tuple, so point to
1288  * the proper return buffer and return the tuple.
1289  */
1290 
1292 
1293  return &scan->rs_ctup;
1294 }
#define unlikely(x)
Definition: c.h:311
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1159
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:837
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:947
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1297 of file heapam.c.

1298 {
1299  HeapScanDesc scan = (HeapScanDesc) sscan;
1300 
1301  /* Note: no locking manipulations needed */
1302 
1303  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1304  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1305  else
1306  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1307 
1308  if (scan->rs_ctup.t_data == NULL)
1309  {
1310  ExecClearTuple(slot);
1311  return false;
1312  }
1313 
1314  /*
1315  * if we get here it means we have a new current scan tuple, so point to
1316  * the proper return buffer and return the tuple.
1317  */
1318 
1320 
1321  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1322  scan->rs_cbuf);
1323  return true;
1324 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1400 of file heapam.c.

1402 {
1403  HeapScanDesc scan = (HeapScanDesc) sscan;
1404  ItemPointer mintid = &sscan->rs_mintid;
1405  ItemPointer maxtid = &sscan->rs_maxtid;
1406 
1407  /* Note: no locking manipulations needed */
1408  for (;;)
1409  {
1410  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1411  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1412  else
1413  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1414 
1415  if (scan->rs_ctup.t_data == NULL)
1416  {
1417  ExecClearTuple(slot);
1418  return false;
1419  }
1420 
1421  /*
1422  * heap_set_tidrange will have used heap_setscanlimits to limit the
1423  * range of pages we scan to only ones that can contain the TID range
1424  * we're scanning for. Here we must filter out any tuples from these
1425  * pages that are outside of that range.
1426  */
1427  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1428  {
1429  ExecClearTuple(slot);
1430 
1431  /*
1432  * When scanning backwards, the TIDs will be in descending order.
1433  * Future tuples in this direction will be lower still, so we can
1434  * just return false to indicate there will be no more tuples.
1435  */
1436  if (ScanDirectionIsBackward(direction))
1437  return false;
1438 
1439  continue;
1440  }
1441 
1442  /*
1443  * Likewise for the final page, we must filter out TIDs greater than
1444  * maxtid.
1445  */
1446  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1447  {
1448  ExecClearTuple(slot);
1449 
1450  /*
1451  * When scanning forward, the TIDs will be in ascending order.
1452  * Future tuples in this direction will be higher still, so we can
1453  * just return false to indicate there will be no more tuples.
1454  */
1455  if (ScanDirectionIsForward(direction))
1456  return false;
1457  continue;
1458  }
1459 
1460  break;
1461  }
1462 
1463  /*
1464  * if we get here it means we have a new current scan tuple, so point to
1465  * the proper return buffer and return the tuple.
1466  */
1468 
1469  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1470  return true;
1471 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1627 of file heapam.c.

1630 {
1631  Page page = BufferGetPage(buffer);
1632  TransactionId prev_xmax = InvalidTransactionId;
1633  BlockNumber blkno;
1634  OffsetNumber offnum;
1635  bool at_chain_start;
1636  bool valid;
1637  bool skip;
1638  GlobalVisState *vistest = NULL;
1639 
1640  /* If this is not the first call, previous call returned a (live!) tuple */
1641  if (all_dead)
1642  *all_dead = first_call;
1643 
1644  blkno = ItemPointerGetBlockNumber(tid);
1645  offnum = ItemPointerGetOffsetNumber(tid);
1646  at_chain_start = first_call;
1647  skip = !first_call;
1648 
1649  /* XXX: we should assert that a snapshot is pushed or registered */
1651  Assert(BufferGetBlockNumber(buffer) == blkno);
1652 
1653  /* Scan through possible multiple members of HOT-chain */
1654  for (;;)
1655  {
1656  ItemId lp;
1657 
1658  /* check for bogus TID */
1659  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1660  break;
1661 
1662  lp = PageGetItemId(page, offnum);
1663 
1664  /* check for unused, dead, or redirected items */
1665  if (!ItemIdIsNormal(lp))
1666  {
1667  /* We should only see a redirect at start of chain */
1668  if (ItemIdIsRedirected(lp) && at_chain_start)
1669  {
1670  /* Follow the redirect */
1671  offnum = ItemIdGetRedirect(lp);
1672  at_chain_start = false;
1673  continue;
1674  }
1675  /* else must be end of chain */
1676  break;
1677  }
1678 
1679  /*
1680  * Update heapTuple to point to the element of the HOT chain we're
1681  * currently investigating. Having t_self set correctly is important
1682  * because the SSI checks and the *Satisfies routine for historical
1683  * MVCC snapshots need the correct tid to decide about the visibility.
1684  */
1685  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1686  heapTuple->t_len = ItemIdGetLength(lp);
1687  heapTuple->t_tableOid = RelationGetRelid(relation);
1688  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1689 
1690  /*
1691  * Shouldn't see a HEAP_ONLY tuple at chain start.
1692  */
1693  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1694  break;
1695 
1696  /*
1697  * The xmin should match the previous xmax value, else chain is
1698  * broken.
1699  */
1700  if (TransactionIdIsValid(prev_xmax) &&
1701  !TransactionIdEquals(prev_xmax,
1702  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1703  break;
1704 
1705  /*
1706  * When first_call is true (and thus, skip is initially false) we'll
1707  * return the first tuple we find. But on later passes, heapTuple
1708  * will initially be pointing to the tuple we returned last time.
1709  * Returning it again would be incorrect (and would loop forever), so
1710  * we skip it and return the next match we find.
1711  */
1712  if (!skip)
1713  {
1714  /* If it's visible per the snapshot, we must return it */
1715  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1716  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1717  buffer, snapshot);
1718 
1719  if (valid)
1720  {
1721  ItemPointerSetOffsetNumber(tid, offnum);
1722  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1723  HeapTupleHeaderGetXmin(heapTuple->t_data));
1724  if (all_dead)
1725  *all_dead = false;
1726  return true;
1727  }
1728  }
1729  skip = false;
1730 
1731  /*
1732  * If we can't see it, maybe no one else can either. At caller
1733  * request, check whether all chain members are dead to all
1734  * transactions.
1735  *
1736  * Note: if you change the criterion here for what is "dead", fix the
1737  * planner's get_actual_variable_range() function to match.
1738  */
1739  if (all_dead && *all_dead)
1740  {
1741  if (!vistest)
1742  vistest = GlobalVisTestFor(relation);
1743 
1744  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1745  *all_dead = false;
1746  }
1747 
1748  /*
1749  * Check to see if HOT chain continues past this tuple; if so fetch
1750  * the next offnum and loop around.
1751  */
1752  if (HeapTupleIsHotUpdated(heapTuple))
1753  {
1755  blkno);
1756  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1757  at_chain_start = false;
1758  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1759  }
1760  else
1761  break; /* end of chain */
1762  }
1763 
1764  return false;
1765 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4091
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7627 of file heapam.c.

7628 {
7629  /* Initial assumption is that earlier pruning took care of conflict */
7630  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7633  Page page = NULL;
7635  TransactionId priorXmax;
7636 #ifdef USE_PREFETCH
7637  IndexDeletePrefetchState prefetch_state;
7638  int prefetch_distance;
7639 #endif
7640  SnapshotData SnapshotNonVacuumable;
7641  int finalndeltids = 0,
7642  nblocksaccessed = 0;
7643 
7644  /* State that's only used in bottom-up index deletion case */
7645  int nblocksfavorable = 0;
7646  int curtargetfreespace = delstate->bottomupfreespace,
7647  lastfreespace = 0,
7648  actualfreespace = 0;
7649  bool bottomup_final_block = false;
7650 
7651  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7652 
7653  /* Sort caller's deltids array by TID for further processing */
7654  index_delete_sort(delstate);
7655 
7656  /*
7657  * Bottom-up case: resort deltids array in an order attuned to where the
7658  * greatest number of promising TIDs are to be found, and determine how
7659  * many blocks from the start of sorted array should be considered
7660  * favorable. This will also shrink the deltids array in order to
7661  * eliminate completely unfavorable blocks up front.
7662  */
7663  if (delstate->bottomup)
7664  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7665 
7666 #ifdef USE_PREFETCH
7667  /* Initialize prefetch state. */
7668  prefetch_state.cur_hblkno = InvalidBlockNumber;
7669  prefetch_state.next_item = 0;
7670  prefetch_state.ndeltids = delstate->ndeltids;
7671  prefetch_state.deltids = delstate->deltids;
7672 
7673  /*
7674  * Determine the prefetch distance that we will attempt to maintain.
7675  *
7676  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7677  * sure that isn't a catalog relation before we call code that does
7678  * syscache lookups, to avoid risk of deadlock.
7679  */
7680  if (IsCatalogRelation(rel))
7681  prefetch_distance = maintenance_io_concurrency;
7682  else
7683  prefetch_distance =
7685 
7686  /* Cap initial prefetch distance for bottom-up deletion caller */
7687  if (delstate->bottomup)
7688  {
7689  Assert(nblocksfavorable >= 1);
7690  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7691  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7692  }
7693 
7694  /* Start prefetching. */
7695  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7696 #endif
7697 
7698  /* Iterate over deltids, determine which to delete, check their horizon */
7699  Assert(delstate->ndeltids > 0);
7700  for (int i = 0; i < delstate->ndeltids; i++)
7701  {
7702  TM_IndexDelete *ideltid = &delstate->deltids[i];
7703  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7704  ItemPointer htid = &ideltid->tid;
7705  OffsetNumber offnum;
7706 
7707  /*
7708  * Read buffer, and perform required extra steps each time a new block
7709  * is encountered. Avoid refetching if it's the same block as the one
7710  * from the last htid.
7711  */
7712  if (blkno == InvalidBlockNumber ||
7713  ItemPointerGetBlockNumber(htid) != blkno)
7714  {
7715  /*
7716  * Consider giving up early for bottom-up index deletion caller
7717  * first. (Only prefetch next-next block afterwards, when it
7718  * becomes clear that we're at least going to access the next
7719  * block in line.)
7720  *
7721  * Sometimes the first block frees so much space for bottom-up
7722  * caller that the deletion process can end without accessing any
7723  * more blocks. It is usually necessary to access 2 or 3 blocks
7724  * per bottom-up deletion operation, though.
7725  */
7726  if (delstate->bottomup)
7727  {
7728  /*
7729  * We often allow caller to delete a few additional items
7730  * whose entries we reached after the point that space target
7731  * from caller was satisfied. The cost of accessing the page
7732  * was already paid at that point, so it made sense to finish
7733  * it off. When that happened, we finalize everything here
7734  * (by finishing off the whole bottom-up deletion operation
7735  * without needlessly paying the cost of accessing any more
7736  * blocks).
7737  */
7738  if (bottomup_final_block)
7739  break;
7740 
7741  /*
7742  * Give up when we didn't enable our caller to free any
7743  * additional space as a result of processing the page that we
7744  * just finished up with. This rule is the main way in which
7745  * we keep the cost of bottom-up deletion under control.
7746  */
7747  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7748  break;
7749  lastfreespace = actualfreespace; /* for next time */
7750 
7751  /*
7752  * Deletion operation (which is bottom-up) will definitely
7753  * access the next block in line. Prepare for that now.
7754  *
7755  * Decay target free space so that we don't hang on for too
7756  * long with a marginal case. (Space target is only truly
7757  * helpful when it allows us to recognize that we don't need
7758  * to access more than 1 or 2 blocks to satisfy caller due to
7759  * agreeable workload characteristics.)
7760  *
7761  * We are a bit more patient when we encounter contiguous
7762  * blocks, though: these are treated as favorable blocks. The
7763  * decay process is only applied when the next block in line
7764  * is not a favorable/contiguous block. This is not an
7765  * exception to the general rule; we still insist on finding
7766  * at least one deletable item per block accessed. See
7767  * bottomup_nblocksfavorable() for full details of the theory
7768  * behind favorable blocks and heap block locality in general.
7769  *
7770  * Note: The first block in line is always treated as a
7771  * favorable block, so the earliest possible point that the
7772  * decay can be applied is just before we access the second
7773  * block in line. The Assert() verifies this for us.
7774  */
7775  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7776  if (nblocksfavorable > 0)
7777  nblocksfavorable--;
7778  else
7779  curtargetfreespace /= 2;
7780  }
7781 
7782  /* release old buffer */
7783  if (BufferIsValid(buf))
7785 
7786  blkno = ItemPointerGetBlockNumber(htid);
7787  buf = ReadBuffer(rel, blkno);
7788  nblocksaccessed++;
7789  Assert(!delstate->bottomup ||
7790  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7791 
7792 #ifdef USE_PREFETCH
7793 
7794  /*
7795  * To maintain the prefetch distance, prefetch one more page for
7796  * each page we read.
7797  */
7798  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7799 #endif
7800 
7802 
7803  page = BufferGetPage(buf);
7804  maxoff = PageGetMaxOffsetNumber(page);
7805  }
7806 
7807  /*
7808  * In passing, detect index corruption involving an index page with a
7809  * TID that points to a location in the heap that couldn't possibly be
7810  * correct. We only do this with actual TIDs from caller's index page
7811  * (not items reached by traversing through a HOT chain).
7812  */
7813  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7814 
7815  if (istatus->knowndeletable)
7816  Assert(!delstate->bottomup && !istatus->promising);
7817  else
7818  {
7819  ItemPointerData tmp = *htid;
7820  HeapTupleData heapTuple;
7821 
7822  /* Are any tuples from this HOT chain non-vacuumable? */
7823  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7824  &heapTuple, NULL, true))
7825  continue; /* can't delete entry */
7826 
7827  /* Caller will delete, since whole HOT chain is vacuumable */
7828  istatus->knowndeletable = true;
7829 
7830  /* Maintain index free space info for bottom-up deletion case */
7831  if (delstate->bottomup)
7832  {
7833  Assert(istatus->freespace > 0);
7834  actualfreespace += istatus->freespace;
7835  if (actualfreespace >= curtargetfreespace)
7836  bottomup_final_block = true;
7837  }
7838  }
7839 
7840  /*
7841  * Maintain snapshotConflictHorizon value for deletion operation as a
7842  * whole by advancing current value using heap tuple headers. This is
7843  * loosely based on the logic for pruning a HOT chain.
7844  */
7845  offnum = ItemPointerGetOffsetNumber(htid);
7846  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7847  for (;;)
7848  {
7849  ItemId lp;
7850  HeapTupleHeader htup;
7851 
7852  /* Sanity check (pure paranoia) */
7853  if (offnum < FirstOffsetNumber)
7854  break;
7855 
7856  /*
7857  * An offset past the end of page's line pointer array is possible
7858  * when the array was truncated
7859  */
7860  if (offnum > maxoff)
7861  break;
7862 
7863  lp = PageGetItemId(page, offnum);
7864  if (ItemIdIsRedirected(lp))
7865  {
7866  offnum = ItemIdGetRedirect(lp);
7867  continue;
7868  }
7869 
7870  /*
7871  * We'll often encounter LP_DEAD line pointers (especially with an
7872  * entry marked knowndeletable by our caller up front). No heap
7873  * tuple headers get examined for an htid that leads us to an
7874  * LP_DEAD item. This is okay because the earlier pruning
7875  * operation that made the line pointer LP_DEAD in the first place
7876  * must have considered the original tuple header as part of
7877  * generating its own snapshotConflictHorizon value.
7878  *
7879  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
7880  * the same strategy that index vacuuming uses in all cases. Index
7881  * VACUUM WAL records don't even have a snapshotConflictHorizon
7882  * field of their own for this reason.
7883  */
7884  if (!ItemIdIsNormal(lp))
7885  break;
7886 
7887  htup = (HeapTupleHeader) PageGetItem(page, lp);
7888 
7889  /*
7890  * Check the tuple XMIN against prior XMAX, if any
7891  */
7892  if (TransactionIdIsValid(priorXmax) &&
7893  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7894  break;
7895 
7897  &snapshotConflictHorizon);
7898 
7899  /*
7900  * If the tuple is not HOT-updated, then we are at the end of this
7901  * HOT-chain. No need to visit later tuples from the same update
7902  * chain (they get their own index entries) -- just move on to
7903  * next htid from index AM caller.
7904  */
7905  if (!HeapTupleHeaderIsHotUpdated(htup))
7906  break;
7907 
7908  /* Advance to next HOT chain member */
7909  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7910  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7911  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7912  }
7913 
7914  /* Enable further/final shrinking of deltids for caller */
7915  finalndeltids = i + 1;
7916  }
7917 
7919 
7920  /*
7921  * Shrink deltids array to exclude non-deletable entries at the end. This
7922  * is not just a minor optimization. Final deltids array size might be
7923  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7924  * ndeltids being zero in all cases with zero total deletable entries.
7925  */
7926  Assert(finalndeltids > 0 || delstate->bottomup);
7927  delstate->ndeltids = finalndeltids;
7928 
7929  return snapshotConflictHorizon;
7930 }
int maintenance_io_concurrency
Definition: bufmgr.c:157
#define Min(x, y)
Definition: c.h:1004
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8185
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7482
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7567
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:189
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1627
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7972
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
TM_IndexDelete * deltids
Definition: tableam.h:254
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 6055 of file heapam.c.

6056 {
6057  Buffer buffer;
6058  Page page;
6059  OffsetNumber offnum;
6060  ItemId lp = NULL;
6061  HeapTupleHeader htup;
6062  uint32 oldlen;
6063  uint32 newlen;
6064 
6065  /*
6066  * For now, we don't allow parallel updates. Unlike a regular update,
6067  * this should never create a combo CID, so it might be possible to relax
6068  * this restriction, but not without more thought and testing. It's not
6069  * clear that it would be useful, anyway.
6070  */
6071  if (IsInParallelMode())
6072  ereport(ERROR,
6073  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
6074  errmsg("cannot update tuples during a parallel operation")));
6075 
6076  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
6078  page = (Page) BufferGetPage(buffer);
6079 
6080  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
6081  if (PageGetMaxOffsetNumber(page) >= offnum)
6082  lp = PageGetItemId(page, offnum);
6083 
6084  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6085  elog(ERROR, "invalid lp");
6086 
6087  htup = (HeapTupleHeader) PageGetItem(page, lp);
6088 
6089  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
6090  newlen = tuple->t_len - tuple->t_data->t_hoff;
6091  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6092  elog(ERROR, "wrong tuple length");
6093 
6094  /* NO EREPORT(ERROR) from here till changes are logged */
6096 
6097  memcpy((char *) htup + htup->t_hoff,
6098  (char *) tuple->t_data + tuple->t_data->t_hoff,
6099  newlen);
6100 
6101  MarkBufferDirty(buffer);
6102 
6103  /* XLOG stuff */
6104  if (RelationNeedsWAL(relation))
6105  {
6106  xl_heap_inplace xlrec;
6107  XLogRecPtr recptr;
6108 
6109  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6110 
6111  XLogBeginInsert();
6112  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6113 
6114  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6115  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6116 
6117  /* inplace updates aren't decoded atm, don't log the origin */
6118 
6119  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6120 
6121  PageSetLSN(page, recptr);
6122  }
6123 
6124  END_CRIT_SECTION();
6125 
6126  UnlockReleaseBuffer(buffer);
6127 
6128  /*
6129  * Send out shared cache inval if necessary. Note that because we only
6130  * pass the new version of the tuple, this mustn't be used for any
6131  * operations that could change catcache lookup keys. But we aren't
6132  * bothering with index updates either, so that's true a fortiori.
6133  */
6135  CacheInvalidateHeapTuple(relation, tuple, NULL);
6136 }
unsigned int uint32
Definition: c.h:506
#define SizeOfHeapInplace
Definition: heapam_xlog.h:431
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
OffsetNumber offnum
Definition: heapam_xlog.h:427
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1990 of file heapam.c.

1992 {
1994  HeapTuple heaptup;
1995  Buffer buffer;
1996  Buffer vmbuffer = InvalidBuffer;
1997  bool all_visible_cleared = false;
1998 
1999  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2001  RelationGetNumberOfAttributes(relation));
2002 
2003  /*
2004  * Fill in tuple header fields and toast the tuple if necessary.
2005  *
2006  * Note: below this point, heaptup is the data we actually intend to store
2007  * into the relation; tup is the caller's original untoasted data.
2008  */
2009  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2010 
2011  /*
2012  * Find buffer to insert this tuple into. If the page is all visible,
2013  * this will also pin the requisite visibility map page.
2014  */
2015  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2016  InvalidBuffer, options, bistate,
2017  &vmbuffer, NULL,
2018  0);
2019 
2020  /*
2021  * We're about to do the actual insert -- but check for conflict first, to
2022  * avoid possibly having to roll back work we've just done.
2023  *
2024  * This is safe without a recheck as long as there is no possibility of
2025  * another process scanning the page between this check and the insert
2026  * being visible to the scan (i.e., an exclusive buffer content lock is
2027  * continuously held from this point until the tuple insert is visible).
2028  *
2029  * For a heap insert, we only need to check for table-level SSI locks. Our
2030  * new tuple can't possibly conflict with existing tuple locks, and heap
2031  * page locks are only consolidated versions of tuple locks; they do not
2032  * lock "gaps" as index page locks do. So we don't need to specify a
2033  * buffer when making the call, which makes for a faster check.
2034  */
2036 
2037  /* NO EREPORT(ERROR) from here till changes are logged */
2039 
2040  RelationPutHeapTuple(relation, buffer, heaptup,
2041  (options & HEAP_INSERT_SPECULATIVE) != 0);
2042 
2043  if (PageIsAllVisible(BufferGetPage(buffer)))
2044  {
2045  all_visible_cleared = true;
2047  visibilitymap_clear(relation,
2048  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2049  vmbuffer, VISIBILITYMAP_VALID_BITS);
2050  }
2051 
2052  /*
2053  * XXX Should we set PageSetPrunable on this page ?
2054  *
2055  * The inserting transaction may eventually abort thus making this tuple
2056  * DEAD and hence available for pruning. Though we don't want to optimize
2057  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2058  * aborted tuple will never be pruned until next vacuum is triggered.
2059  *
2060  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2061  */
2062 
2063  MarkBufferDirty(buffer);
2064 
2065  /* XLOG stuff */
2066  if (RelationNeedsWAL(relation))
2067  {
2068  xl_heap_insert xlrec;
2069  xl_heap_header xlhdr;
2070  XLogRecPtr recptr;
2071  Page page = BufferGetPage(buffer);
2072  uint8 info = XLOG_HEAP_INSERT;
2073  int bufflags = 0;
2074 
2075  /*
2076  * If this is a catalog, we need to transmit combo CIDs to properly
2077  * decode, so log that as well.
2078  */
2080  log_heap_new_cid(relation, heaptup);
2081 
2082  /*
2083  * If this is the single and first tuple on page, we can reinit the
2084  * page instead of restoring the whole thing. Set flag, and hide
2085  * buffer references from XLogInsert.
2086  */
2087  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2089  {
2090  info |= XLOG_HEAP_INIT_PAGE;
2091  bufflags |= REGBUF_WILL_INIT;
2092  }
2093 
2094  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2095  xlrec.flags = 0;
2096  if (all_visible_cleared)
2101 
2102  /*
2103  * For logical decoding, we need the tuple even if we're doing a full
2104  * page write, so make sure it's included even if we take a full-page
2105  * image. (XXX We could alternatively store a pointer into the FPW).
2106  */
2107  if (RelationIsLogicallyLogged(relation) &&
2109  {
2111  bufflags |= REGBUF_KEEP_DATA;
2112 
2113  if (IsToastRelation(relation))
2115  }
2116 
2117  XLogBeginInsert();
2118  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2119 
2120  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2121  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2122  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2123 
2124  /*
2125  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2126  * write the whole page to the xlog, we don't need to store
2127  * xl_heap_header in the xlog.
2128  */
2129  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2130  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2131  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2133  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2134  heaptup->t_len - SizeofHeapTupleHeader);
2135 
2136  /* filtering by origin on a row level is much more efficient */
2138 
2139  recptr = XLogInsert(RM_HEAP_ID, info);
2140 
2141  PageSetLSN(page, recptr);
2142  }
2143 
2144  END_CRIT_SECTION();
2145 
2146  UnlockReleaseBuffer(buffer);
2147  if (vmbuffer != InvalidBuffer)
2148  ReleaseBuffer(vmbuffer);
2149 
2150  /*
2151  * If tuple is cachable, mark it for invalidation from the caches in case
2152  * we abort. Note it is OK to do this after releasing the buffer, because
2153  * the heaptup data structure is all in local memory, not in the shared
2154  * buffer.
2155  */
2156  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2157 
2158  /* Note: speculative insertions are counted too, even if aborted later */
2159  pgstat_count_heap_insert(relation, 1);
2160 
2161  /*
2162  * If heaptup is a private copy, release it. Don't forget to copy t_self
2163  * back to the caller's image, too.
2164  */
2165  if (heaptup != tup)
2166  {
2167  tup->t_self = heaptup->t_self;
2168  heap_freetuple(heaptup);
2169  }
2170 }
unsigned char uint8
Definition: c.h:504
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2179
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:37
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:75
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:71
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:167
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:74
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:161
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4306 of file heapam.c.

4310 {
4311  TM_Result result;
4312  ItemPointer tid = &(tuple->t_self);
4313  ItemId lp;
4314  Page page;
4315  Buffer vmbuffer = InvalidBuffer;
4316  BlockNumber block;
4317  TransactionId xid,
4318  xmax;
4319  uint16 old_infomask,
4320  new_infomask,
4321  new_infomask2;
4322  bool first_time = true;
4323  bool skip_tuple_lock = false;
4324  bool have_tuple_lock = false;
4325  bool cleared_all_frozen = false;
4326 
4327  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4328  block = ItemPointerGetBlockNumber(tid);
4329 
4330  /*
4331  * Before locking the buffer, pin the visibility map page if it appears to
4332  * be necessary. Since we haven't got the lock yet, someone else might be
4333  * in the middle of changing this, so we'll need to recheck after we have
4334  * the lock.
4335  */
4336  if (PageIsAllVisible(BufferGetPage(*buffer)))
4337  visibilitymap_pin(relation, block, &vmbuffer);
4338 
4340 
4341  page = BufferGetPage(*buffer);
4342  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4343  Assert(ItemIdIsNormal(lp));
4344 
4345  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4346  tuple->t_len = ItemIdGetLength(lp);
4347  tuple->t_tableOid = RelationGetRelid(relation);
4348 
4349 l3:
4350  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4351 
4352  if (result == TM_Invisible)
4353  {
4354  /*
4355  * This is possible, but only when locking a tuple for ON CONFLICT
4356  * UPDATE. We return this value here rather than throwing an error in
4357  * order to give that case the opportunity to throw a more specific
4358  * error.
4359  */
4360  result = TM_Invisible;
4361  goto out_locked;
4362  }
4363  else if (result == TM_BeingModified ||
4364  result == TM_Updated ||
4365  result == TM_Deleted)
4366  {
4367  TransactionId xwait;
4368  uint16 infomask;
4369  uint16 infomask2;
4370  bool require_sleep;
4371  ItemPointerData t_ctid;
4372 
4373  /* must copy state data before unlocking buffer */
4374  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4375  infomask = tuple->t_data->t_infomask;
4376  infomask2 = tuple->t_data->t_infomask2;
4377  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4378 
4379  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4380 
4381  /*
4382  * If any subtransaction of the current top transaction already holds
4383  * a lock as strong as or stronger than what we're requesting, we
4384  * effectively hold the desired lock already. We *must* succeed
4385  * without trying to take the tuple lock, else we will deadlock
4386  * against anyone wanting to acquire a stronger lock.
4387  *
4388  * Note we only do this the first time we loop on the HTSU result;
4389  * there is no point in testing in subsequent passes, because
4390  * evidently our own transaction cannot have acquired a new lock after
4391  * the first time we checked.
4392  */
4393  if (first_time)
4394  {
4395  first_time = false;
4396 
4397  if (infomask & HEAP_XMAX_IS_MULTI)
4398  {
4399  int i;
4400  int nmembers;
4401  MultiXactMember *members;
4402 
4403  /*
4404  * We don't need to allow old multixacts here; if that had
4405  * been the case, HeapTupleSatisfiesUpdate would have returned
4406  * MayBeUpdated and we wouldn't be here.
4407  */
4408  nmembers =
4409  GetMultiXactIdMembers(xwait, &members, false,
4410  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4411 
4412  for (i = 0; i < nmembers; i++)
4413  {
4414  /* only consider members of our own transaction */
4415  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4416  continue;
4417 
4418  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4419  {
4420  pfree(members);
4421  result = TM_Ok;
4422  goto out_unlocked;
4423  }
4424  else
4425  {
4426  /*
4427  * Disable acquisition of the heavyweight tuple lock.
4428  * Otherwise, when promoting a weaker lock, we might
4429  * deadlock with another locker that has acquired the
4430  * heavyweight tuple lock and is waiting for our
4431  * transaction to finish.
4432  *
4433  * Note that in this case we still need to wait for
4434  * the multixact if required, to avoid acquiring
4435  * conflicting locks.
4436  */
4437  skip_tuple_lock = true;
4438  }
4439  }
4440 
4441  if (members)
4442  pfree(members);
4443  }
4444  else if (TransactionIdIsCurrentTransactionId(xwait))
4445  {
4446  switch (mode)
4447  {
4448  case LockTupleKeyShare:
4449  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4450  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4451  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4452  result = TM_Ok;
4453  goto out_unlocked;
4454  case LockTupleShare:
4455  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4456  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4457  {
4458  result = TM_Ok;
4459  goto out_unlocked;
4460  }
4461  break;
4463  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4464  {
4465  result = TM_Ok;
4466  goto out_unlocked;
4467  }
4468  break;
4469  case LockTupleExclusive:
4470  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4471  infomask2 & HEAP_KEYS_UPDATED)
4472  {
4473  result = TM_Ok;
4474  goto out_unlocked;
4475  }
4476  break;
4477  }
4478  }
4479  }
4480 
4481  /*
4482  * Initially assume that we will have to wait for the locking
4483  * transaction(s) to finish. We check various cases below in which
4484  * this can be turned off.
4485  */
4486  require_sleep = true;
4487  if (mode == LockTupleKeyShare)
4488  {
4489  /*
4490  * If we're requesting KeyShare, and there's no update present, we
4491  * don't need to wait. Even if there is an update, we can still
4492  * continue if the key hasn't been modified.
4493  *
4494  * However, if there are updates, we need to walk the update chain
4495  * to mark future versions of the row as locked, too. That way,
4496  * if somebody deletes that future version, we're protected
4497  * against the key going away. This locking of future versions
4498  * could block momentarily, if a concurrent transaction is
4499  * deleting a key; or it could return a value to the effect that
4500  * the transaction deleting the key has already committed. So we
4501  * do this before re-locking the buffer; otherwise this would be
4502  * prone to deadlocks.
4503  *
4504  * Note that the TID we're locking was grabbed before we unlocked
4505  * the buffer. For it to change while we're not looking, the
4506  * other properties we're testing for below after re-locking the
4507  * buffer would also change, in which case we would restart this
4508  * loop above.
4509  */
4510  if (!(infomask2 & HEAP_KEYS_UPDATED))
4511  {
4512  bool updated;
4513 
4514  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4515 
4516  /*
4517  * If there are updates, follow the update chain; bail out if
4518  * that cannot be done.
4519  */
4520  if (follow_updates && updated)
4521  {
4522  TM_Result res;
4523 
4524  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4526  mode);
4527  if (res != TM_Ok)
4528  {
4529  result = res;
4530  /* recovery code expects to have buffer lock held */
4532  goto failed;
4533  }
4534  }
4535 
4537 
4538  /*
4539  * Make sure it's still an appropriate lock, else start over.
4540  * Also, if it wasn't updated before we released the lock, but
4541  * is updated now, we start over too; the reason is that we
4542  * now need to follow the update chain to lock the new
4543  * versions.
4544  */
4545  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4546  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4547  !updated))
4548  goto l3;
4549 
4550  /* Things look okay, so we can skip sleeping */
4551  require_sleep = false;
4552 
4553  /*
4554  * Note we allow Xmax to change here; other updaters/lockers
4555  * could have modified it before we grabbed the buffer lock.
4556  * However, this is not a problem, because with the recheck we
4557  * just did we ensure that they still don't conflict with the
4558  * lock we want.
4559  */
4560  }
4561  }
4562  else if (mode == LockTupleShare)
4563  {
4564  /*
4565  * If we're requesting Share, we can similarly avoid sleeping if
4566  * there's no update and no exclusive lock present.
4567  */
4568  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4569  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4570  {
4572 
4573  /*
4574  * Make sure it's still an appropriate lock, else start over.
4575  * See above about allowing xmax to change.
4576  */
4577  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4579  goto l3;
4580  require_sleep = false;
4581  }
4582  }
4583  else if (mode == LockTupleNoKeyExclusive)
4584  {
4585  /*
4586  * If we're requesting NoKeyExclusive, we might also be able to
4587  * avoid sleeping; just ensure that there no conflicting lock
4588  * already acquired.
4589  */
4590  if (infomask & HEAP_XMAX_IS_MULTI)
4591  {
4592  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4593  mode, NULL))
4594  {
4595  /*
4596  * No conflict, but if the xmax changed under us in the
4597  * meantime, start over.
4598  */
4600  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4602  xwait))
4603  goto l3;
4604 
4605  /* otherwise, we're good */
4606  require_sleep = false;
4607  }
4608  }
4609  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4610  {
4612 
4613  /* if the xmax changed in the meantime, start over */
4614  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4616  xwait))
4617  goto l3;
4618  /* otherwise, we're good */
4619  require_sleep = false;
4620  }
4621  }
4622 
4623  /*
4624  * As a check independent from those above, we can also avoid sleeping
4625  * if the current transaction is the sole locker of the tuple. Note
4626  * that the strength of the lock already held is irrelevant; this is
4627  * not about recording the lock in Xmax (which will be done regardless
4628  * of this optimization, below). Also, note that the cases where we
4629  * hold a lock stronger than we are requesting are already handled
4630  * above by not doing anything.
4631  *
4632  * Note we only deal with the non-multixact case here; MultiXactIdWait
4633  * is well equipped to deal with this situation on its own.
4634  */
4635  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4637  {
4638  /* ... but if the xmax changed in the meantime, start over */
4640  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4642  xwait))
4643  goto l3;
4645  require_sleep = false;
4646  }
4647 
4648  /*
4649  * Time to sleep on the other transaction/multixact, if necessary.
4650  *
4651  * If the other transaction is an update/delete that's already
4652  * committed, then sleeping cannot possibly do any good: if we're
4653  * required to sleep, get out to raise an error instead.
4654  *
4655  * By here, we either have already acquired the buffer exclusive lock,
4656  * or we must wait for the locking transaction or multixact; so below
4657  * we ensure that we grab buffer lock after the sleep.
4658  */
4659  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4660  {
4662  goto failed;
4663  }
4664  else if (require_sleep)
4665  {
4666  /*
4667  * Acquire tuple lock to establish our priority for the tuple, or
4668  * die trying. LockTuple will release us when we are next-in-line
4669  * for the tuple. We must do this even if we are share-locking,
4670  * but not if we already have a weaker lock on the tuple.
4671  *
4672  * If we are forced to "start over" below, we keep the tuple lock;
4673  * this arranges that we stay at the head of the line while
4674  * rechecking tuple state.
4675  */
4676  if (!skip_tuple_lock &&
4677  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4678  &have_tuple_lock))
4679  {
4680  /*
4681  * This can only happen if wait_policy is Skip and the lock
4682  * couldn't be obtained.
4683  */
4684  result = TM_WouldBlock;
4685  /* recovery code expects to have buffer lock held */
4687  goto failed;
4688  }
4689 
4690  if (infomask & HEAP_XMAX_IS_MULTI)
4691  {
4693 
4694  /* We only ever lock tuples, never update them */
4695  if (status >= MultiXactStatusNoKeyUpdate)
4696  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4697 
4698  /* wait for multixact to end, or die trying */
4699  switch (wait_policy)
4700  {
4701  case LockWaitBlock:
4702  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4703  relation, &tuple->t_self, XLTW_Lock, NULL);
4704  break;
4705  case LockWaitSkip:
4707  status, infomask, relation,
4708  NULL))
4709  {
4710  result = TM_WouldBlock;
4711  /* recovery code expects to have buffer lock held */
4713  goto failed;
4714  }
4715  break;
4716  case LockWaitError:
4718  status, infomask, relation,
4719  NULL))
4720  ereport(ERROR,
4721  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4722  errmsg("could not obtain lock on row in relation \"%s\"",
4723  RelationGetRelationName(relation))));
4724 
4725  break;
4726  }
4727 
4728  /*
4729  * Of course, the multixact might not be done here: if we're
4730  * requesting a light lock mode, other transactions with light
4731  * locks could still be alive, as well as locks owned by our
4732  * own xact or other subxacts of this backend. We need to
4733  * preserve the surviving MultiXact members. Note that it
4734  * isn't absolutely necessary in the latter case, but doing so
4735  * is simpler.
4736  */
4737  }
4738  else
4739  {
4740  /* wait for regular transaction to end, or die trying */
4741  switch (wait_policy)
4742  {
4743  case LockWaitBlock:
4744  XactLockTableWait(xwait, relation, &tuple->t_self,
4745  XLTW_Lock);
4746  break;
4747  case LockWaitSkip:
4748  if (!ConditionalXactLockTableWait(xwait))
4749  {
4750  result = TM_WouldBlock;
4751  /* recovery code expects to have buffer lock held */
4753  goto failed;
4754  }
4755  break;
4756  case LockWaitError:
4757  if (!ConditionalXactLockTableWait(xwait))
4758  ereport(ERROR,
4759  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4760  errmsg("could not obtain lock on row in relation \"%s\"",
4761  RelationGetRelationName(relation))));
4762  break;
4763  }
4764  }
4765 
4766  /* if there are updates, follow the update chain */
4767  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4768  {
4769  TM_Result res;
4770 
4771  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4773  mode);
4774  if (res != TM_Ok)
4775  {
4776  result = res;
4777  /* recovery code expects to have buffer lock held */
4779  goto failed;
4780  }
4781  }
4782 
4784 
4785  /*
4786  * xwait is done, but if xwait had just locked the tuple then some
4787  * other xact could update this tuple before we get to this point.
4788  * Check for xmax change, and start over if so.
4789  */
4790  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4792  xwait))
4793  goto l3;
4794 
4795  if (!(infomask & HEAP_XMAX_IS_MULTI))
4796  {
4797  /*
4798  * Otherwise check if it committed or aborted. Note we cannot
4799  * be here if the tuple was only locked by somebody who didn't
4800  * conflict with us; that would have been handled above. So
4801  * that transaction must necessarily be gone by now. But
4802  * don't check for this in the multixact case, because some
4803  * locker transactions might still be running.
4804  */
4805  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4806  }
4807  }
4808 
4809  /* By here, we're certain that we hold buffer exclusive lock again */
4810 
4811  /*
4812  * We may lock if previous xmax aborted, or if it committed but only
4813  * locked the tuple without updating it; or if we didn't have to wait
4814  * at all for whatever reason.
4815  */
4816  if (!require_sleep ||
4817  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4820  result = TM_Ok;
4821  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4822  result = TM_Updated;
4823  else
4824  result = TM_Deleted;
4825  }
4826 
4827 failed:
4828  if (result != TM_Ok)
4829  {
4830  Assert(result == TM_SelfModified || result == TM_Updated ||
4831  result == TM_Deleted || result == TM_WouldBlock);
4832 
4833  /*
4834  * When locking a tuple under LockWaitSkip semantics and we fail with
4835  * TM_WouldBlock above, it's possible for concurrent transactions to
4836  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4837  * this assert is slightly different from the equivalent one in
4838  * heap_delete and heap_update.
4839  */
4840  Assert((result == TM_WouldBlock) ||
4841  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4842  Assert(result != TM_Updated ||
4843  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4844  tmfd->ctid = tuple->t_data->t_ctid;
4845  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4846  if (result == TM_SelfModified)
4847  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4848  else
4849  tmfd->cmax = InvalidCommandId;
4850  goto out_locked;
4851  }
4852 
4853  /*
4854  * If we didn't pin the visibility map page and the page has become all
4855  * visible while we were busy locking the buffer, or during some
4856  * subsequent window during which we had it unlocked, we'll have to unlock
4857  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4858  * unfortunate, especially since we'll now have to recheck whether the
4859  * tuple has been locked or updated under us, but hopefully it won't
4860  * happen very often.
4861  */
4862  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4863  {
4864  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4865  visibilitymap_pin(relation, block, &vmbuffer);
4867  goto l3;
4868  }
4869 
4870  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4871  old_infomask = tuple->t_data->t_infomask;
4872 
4873  /*
4874  * If this is the first possibly-multixact-able operation in the current
4875  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4876  * certain that the transaction will never become a member of any older
4877  * MultiXactIds than that. (We have to do this even if we end up just
4878  * using our own TransactionId below, since some other backend could
4879  * incorporate our XID into a MultiXact immediately afterwards.)
4880  */
4882 
4883  /*
4884  * Compute the new xmax and infomask to store into the tuple. Note we do
4885  * not modify the tuple just yet, because that would leave it in the wrong
4886  * state if multixact.c elogs.
4887  */
4888  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4889  GetCurrentTransactionId(), mode, false,
4890  &xid, &new_infomask, &new_infomask2);
4891 
4893 
4894  /*
4895  * Store transaction information of xact locking the tuple.
4896  *
4897  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4898  * possibly generating a useless combo CID. Moreover, if we're locking a
4899  * previously updated tuple, it's important to preserve the Cmax.
4900  *
4901  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4902  * we would break the HOT chain.
4903  */
4904  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4905  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4906  tuple->t_data->t_infomask |= new_infomask;
4907  tuple->t_data->t_infomask2 |= new_infomask2;
4908  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4910  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4911 
4912  /*
4913  * Make sure there is no forward chain link in t_ctid. Note that in the
4914  * cases where the tuple has been updated, we must not overwrite t_ctid,
4915  * because it was set by the updater. Moreover, if the tuple has been
4916  * updated, we need to follow the update chain to lock the new versions of
4917  * the tuple as well.
4918  */
4919  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4920  tuple->t_data->t_ctid = *tid;
4921 
4922  /* Clear only the all-frozen bit on visibility map if needed */
4923  if (PageIsAllVisible(page) &&
4924  visibilitymap_clear(relation, block, vmbuffer,
4926  cleared_all_frozen = true;
4927 
4928 
4929  MarkBufferDirty(*buffer);
4930 
4931  /*
4932  * XLOG stuff. You might think that we don't need an XLOG record because
4933  * there is no state change worth restoring after a crash. You would be
4934  * wrong however: we have just written either a TransactionId or a
4935  * MultiXactId that may never have been seen on disk before, and we need
4936  * to make sure that there are XLOG entries covering those ID numbers.
4937  * Else the same IDs might be re-used after a crash, which would be
4938  * disastrous if this page made it to disk before the crash. Essentially
4939  * we have to enforce the WAL log-before-data rule even in this case.
4940  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4941  * entries for everything anyway.)
4942  */
4943  if (RelationNeedsWAL(relation))
4944  {
4945  xl_heap_lock xlrec;
4946  XLogRecPtr recptr;
4947 
4948  XLogBeginInsert();
4949  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4950 
4951  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4952  xlrec.xmax = xid;
4953  xlrec.infobits_set = compute_infobits(new_infomask,
4954  tuple->t_data->t_infomask2);
4955  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4956  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4957 
4958  /* we don't decode row locks atm, so no need to log the origin */
4959 
4960  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4961 
4962  PageSetLSN(page, recptr);
4963  }
4964 
4965  END_CRIT_SECTION();
4966 
4967  result = TM_Ok;
4968 
4969 out_locked:
4970  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4971 
4972 out_unlocked:
4973  if (BufferIsValid(vmbuffer))
4974  ReleaseBuffer(vmbuffer);
4975 
4976  /*
4977  * Don't update the visibility map here. Locking a tuple doesn't change
4978  * visibility info.
4979  */
4980 
4981  /*
4982  * Now that we have successfully marked the tuple as locked, we can
4983  * release the lmgr tuple lock, if we had it.
4984  */
4985  if (have_tuple_lock)
4986  UnlockTupleTuplock(relation, tid, mode);
4987 
4988  return result;
4989 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:218
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5770
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7305
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4258
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:392
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:403
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:740
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1252
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:399
OffsetNumber offnum
Definition: heapam_xlog.h:398
TransactionId xmax
Definition: heapam_xlog.h:397
@ TM_WouldBlock
Definition: tableam.h:110
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2259 of file heapam.c.

2261 {
2263  HeapTuple *heaptuples;
2264  int i;
2265  int ndone;
2266  PGAlignedBlock scratch;
2267  Page page;
2268  Buffer vmbuffer = InvalidBuffer;
2269  bool needwal;
2270  Size saveFreeSpace;
2271  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2272  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2273  bool starting_with_empty_page = false;
2274  int npages = 0;
2275  int npages_used = 0;
2276 
2277  /* currently not needed (thus unsupported) for heap_multi_insert() */
2279 
2280  needwal = RelationNeedsWAL(relation);
2281  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2283 
2284  /* Toast and set header data in all the slots */
2285  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2286  for (i = 0; i < ntuples; i++)
2287  {
2288  HeapTuple tuple;
2289 
2290  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2291  slots[i]->tts_tableOid = RelationGetRelid(relation);
2292  tuple->t_tableOid = slots[i]->tts_tableOid;
2293  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2294  options);
2295  }
2296 
2297  /*
2298  * We're about to do the actual inserts -- but check for conflict first,
2299  * to minimize the possibility of having to roll back work we've just
2300  * done.
2301  *
2302  * A check here does not definitively prevent a serialization anomaly;
2303  * that check MUST be done at least past the point of acquiring an
2304  * exclusive buffer content lock on every buffer that will be affected,
2305  * and MAY be done after all inserts are reflected in the buffers and
2306  * those locks are released; otherwise there is a race condition. Since
2307  * multiple buffers can be locked and unlocked in the loop below, and it
2308  * would not be feasible to identify and lock all of those buffers before
2309  * the loop, we must do a final check at the end.
2310  *
2311  * The check here could be omitted with no loss of correctness; it is
2312  * present strictly as an optimization.
2313  *
2314  * For heap inserts, we only need to check for table-level SSI locks. Our
2315  * new tuples can't possibly conflict with existing tuple locks, and heap
2316  * page locks are only consolidated versions of tuple locks; they do not
2317  * lock "gaps" as index page locks do. So we don't need to specify a
2318  * buffer when making the call, which makes for a faster check.
2319  */
2321 
2322  ndone = 0;
2323  while (ndone < ntuples)
2324  {
2325  Buffer buffer;
2326  bool all_visible_cleared = false;
2327  bool all_frozen_set = false;
2328  int nthispage;
2329 
2331 
2332  /*
2333  * Compute number of pages needed to fit the to-be-inserted tuples in
2334  * the worst case. This will be used to determine how much to extend
2335  * the relation by in RelationGetBufferForTuple(), if needed. If we
2336  * filled a prior page from scratch, we can just update our last
2337  * computation, but if we started with a partially filled page,
2338  * recompute from scratch, the number of potentially required pages
2339  * can vary due to tuples needing to fit onto the page, page headers
2340  * etc.
2341  */
2342  if (ndone == 0 || !starting_with_empty_page)
2343  {
2344  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2345  saveFreeSpace);
2346  npages_used = 0;
2347  }
2348  else
2349  npages_used++;
2350 
2351  /*
2352  * Find buffer where at least the next tuple will fit. If the page is
2353  * all-visible, this will also pin the requisite visibility map page.
2354  *
2355  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2356  * empty page. See all_frozen_set below.
2357  */
2358  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2359  InvalidBuffer, options, bistate,
2360  &vmbuffer, NULL,
2361  npages - npages_used);
2362  page = BufferGetPage(buffer);
2363 
2364  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2365 
2366  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2367  all_frozen_set = true;
2368 
2369  /* NO EREPORT(ERROR) from here till changes are logged */
2371 
2372  /*
2373  * RelationGetBufferForTuple has ensured that the first tuple fits.
2374  * Put that on the page, and then as many other tuples as fit.
2375  */
2376  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2377 
2378  /*
2379  * For logical decoding we need combo CIDs to properly decode the
2380  * catalog.
2381  */
2382  if (needwal && need_cids)
2383  log_heap_new_cid(relation, heaptuples[ndone]);
2384 
2385  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2386  {
2387  HeapTuple heaptup = heaptuples[ndone + nthispage];
2388 
2389  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2390  break;
2391 
2392  RelationPutHeapTuple(relation, buffer, heaptup, false);
2393 
2394  /*
2395  * For logical decoding we need combo CIDs to properly decode the
2396  * catalog.
2397  */
2398  if (needwal && need_cids)
2399  log_heap_new_cid(relation, heaptup);
2400  }
2401 
2402  /*
2403  * If the page is all visible, need to clear that, unless we're only
2404  * going to add further frozen rows to it.
2405  *
2406  * If we're only adding already frozen rows to a previously empty
2407  * page, mark it as all-visible.
2408  */
2409  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2410  {
2411  all_visible_cleared = true;
2412  PageClearAllVisible(page);
2413  visibilitymap_clear(relation,
2414  BufferGetBlockNumber(buffer),
2415  vmbuffer, VISIBILITYMAP_VALID_BITS);
2416  }
2417  else if (all_frozen_set)
2418  PageSetAllVisible(page);
2419 
2420  /*
2421  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2422  */
2423 
2424  MarkBufferDirty(buffer);
2425 
2426  /* XLOG stuff */
2427  if (needwal)
2428  {
2429  XLogRecPtr recptr;
2430  xl_heap_multi_insert *xlrec;
2432  char *tupledata;
2433  int totaldatalen;
2434  char *scratchptr = scratch.data;
2435  bool init;
2436  int bufflags = 0;
2437 
2438  /*
2439  * If the page was previously empty, we can reinit the page
2440  * instead of restoring the whole thing.
2441  */
2442  init = starting_with_empty_page;
2443 
2444  /* allocate xl_heap_multi_insert struct from the scratch area */
2445  xlrec = (xl_heap_multi_insert *) scratchptr;
2446  scratchptr += SizeOfHeapMultiInsert;
2447 
2448  /*
2449  * Allocate offsets array. Unless we're reinitializing the page,
2450  * in that case the tuples are stored in order starting at
2451  * FirstOffsetNumber and we don't need to store the offsets
2452  * explicitly.
2453  */
2454  if (!init)
2455  scratchptr += nthispage * sizeof(OffsetNumber);
2456 
2457  /* the rest of the scratch space is used for tuple data */
2458  tupledata = scratchptr;
2459 
2460  /* check that the mutually exclusive flags are not both set */
2461  Assert(!(all_visible_cleared && all_frozen_set));
2462 
2463  xlrec->flags = 0;
2464  if (all_visible_cleared)
2466  if (all_frozen_set)
2468 
2469  xlrec->ntuples = nthispage;
2470 
2471  /*
2472  * Write out an xl_multi_insert_tuple and the tuple data itself
2473  * for each tuple.
2474  */
2475  for (i = 0; i < nthispage; i++)
2476  {
2477  HeapTuple heaptup = heaptuples[ndone + i];
2478  xl_multi_insert_tuple *tuphdr;
2479  int datalen;
2480 
2481  if (!init)
2482  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2483  /* xl_multi_insert_tuple needs two-byte alignment. */
2484  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2485  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2486 
2487  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2488  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2489  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2490 
2491  /* write bitmap [+ padding] [+ oid] + data */
2492  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2493  memcpy(scratchptr,
2494  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2495  datalen);
2496  tuphdr->datalen = datalen;
2497  scratchptr += datalen;
2498  }
2499  totaldatalen = scratchptr - tupledata;
2500  Assert((scratchptr - scratch.data) < BLCKSZ);
2501 
2502  if (need_tuple_data)
2504 
2505  /*
2506  * Signal that this is the last xl_heap_multi_insert record
2507  * emitted by this call to heap_multi_insert(). Needed for logical
2508  * decoding so it knows when to cleanup temporary data.
2509  */
2510  if (ndone + nthispage == ntuples)
2511  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2512 
2513  if (init)
2514  {
2515  info |= XLOG_HEAP_INIT_PAGE;
2516  bufflags |= REGBUF_WILL_INIT;
2517  }
2518 
2519  /*
2520  * If we're doing logical decoding, include the new tuple data
2521  * even if we take a full-page image of the page.
2522  */
2523  if (need_tuple_data)
2524  bufflags |= REGBUF_KEEP_DATA;
2525 
2526  XLogBeginInsert();
2527  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2528  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2529 
2530  XLogRegisterBufData(0, tupledata, totaldatalen);
2531 
2532  /* filtering by origin on a row level is much more efficient */
2534 
2535  recptr = XLogInsert(RM_HEAP2_ID, info);
2536 
2537  PageSetLSN(page, recptr);
2538  }
2539 
2540  END_CRIT_SECTION();
2541 
2542  /*
2543  * If we've frozen everything on the page, update the visibilitymap.
2544  * We're already holding pin on the vmbuffer.
2545  */
2546  if (all_frozen_set)
2547  {
2548  Assert(PageIsAllVisible(page));
2549  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2550 
2551  /*
2552  * It's fine to use InvalidTransactionId here - this is only used
2553  * when HEAP_INSERT_FROZEN is specified, which intentionally
2554  * violates visibility rules.
2555  */
2556  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2557  InvalidXLogRecPtr, vmbuffer,
2560  }
2561 
2562  UnlockReleaseBuffer(buffer);
2563  ndone += nthispage;
2564 
2565  /*
2566  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2567  * likely that we'll insert into subsequent heap pages that are likely
2568  * to use the same vm page.
2569  */
2570  }
2571 
2572  /* We're done with inserting all tuples, so release the last vmbuffer. */
2573  if (vmbuffer != InvalidBuffer)
2574  ReleaseBuffer(vmbuffer);
2575 
2576  /*
2577  * We're done with the actual inserts. Check for conflicts again, to
2578  * ensure that all rw-conflicts in to these inserts are detected. Without
2579  * this final check, a sequential scan of the heap may have locked the
2580  * table after the "before" check, missing one opportunity to detect the
2581  * conflict, and then scanned the table before the new tuples were there,
2582  * missing the other chance to detect the conflict.
2583  *
2584  * For heap inserts, we only need to check for table-level SSI locks. Our
2585  * new tuples can't possibly conflict with existing tuple locks, and heap
2586  * page locks are only consolidated versions of tuple locks; they do not
2587  * lock "gaps" as index page locks do. So we don't need to specify a
2588  * buffer when making the call.
2589  */
2591 
2592  /*
2593  * If tuples are cachable, mark them for invalidation from the caches in
2594  * case we abort. Note it is OK to do this after releasing the buffer,
2595  * because the heaptuples data structure is all in local memory, not in
2596  * the shared buffer.
2597  */
2598  if (IsCatalogRelation(relation))
2599  {
2600  for (i = 0; i < ntuples; i++)
2601  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2602  }
2603 
2604  /* copy t_self fields back to the caller's slots */
2605  for (i = 0; i < ntuples; i++)
2606  slots[i]->tts_tid = heaptuples[i]->t_self;
2607 
2608  pgstat_count_heap_insert(relation, ntuples);
2609 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
#define MAXALIGN(LEN)
Definition: c.h:811
#define SHORTALIGN(LEN)
Definition: c.h:807
size_t Size
Definition: c.h:605
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2227
#define HEAP_INSERT_FROZEN
Definition: heapam.h:36
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:187
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:63
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:72
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:78
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:198
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:184
char data[BLCKSZ]
Definition: c.h:1119
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 348 of file pruneheap.c.

357 {
358  Page page = BufferGetPage(buffer);
359  BlockNumber blockno = BufferGetBlockNumber(buffer);
360  OffsetNumber offnum,
361  maxoff;
362  PruneState prstate;
363  HeapTupleData tup;
364  bool do_freeze;
365  bool do_prune;
366  bool do_hint;
367  bool hint_bit_fpi;
368  int64 fpi_before = pgWalUsage.wal_fpi;
369 
370  /* Copy parameters to prstate */
371  prstate.vistest = vistest;
373  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
374  prstate.cutoffs = cutoffs;
375 
376  /*
377  * Our strategy is to scan the page and make lists of items to change,
378  * then apply the changes within a critical section. This keeps as much
379  * logic as possible out of the critical section, and also ensures that
380  * WAL replay will work the same as the normal case.
381  *
382  * First, initialize the new pd_prune_xid value to zero (indicating no
383  * prunable tuples). If we find any tuples which may soon become
384  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
385  * initialize the rest of our working state.
386  */
389  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
390  prstate.nroot_items = 0;
391  prstate.nheaponly_items = 0;
392 
393  /* initialize page freezing working state */
394  prstate.pagefrz.freeze_required = false;
395  if (prstate.freeze)
396  {
397  Assert(new_relfrozen_xid && new_relmin_mxid);
398  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
399  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
400  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
401  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
402  }
403  else
404  {
405  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
410  }
411 
412  prstate.ndeleted = 0;
413  prstate.live_tuples = 0;
414  prstate.recently_dead_tuples = 0;
415  prstate.hastup = false;
416  prstate.lpdead_items = 0;
417  prstate.deadoffsets = presult->deadoffsets;
418 
419  /*
420  * Caller may update the VM after we're done. We can keep track of
421  * whether the page will be all-visible and all-frozen after pruning and
422  * freezing to help the caller to do that.
423  *
424  * Currently, only VACUUM sets the VM bits. To save the effort, only do
425  * the bookkeeping if the caller needs it. Currently, that's tied to
426  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
427  * to update the VM bits without also freezing or freeze without also
428  * setting the VM bits.
429  *
430  * In addition to telling the caller whether it can set the VM bit, we
431  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
432  * the whole page would become frozen, we consider opportunistically
433  * freezing tuples. We will not be able to freeze the whole page if there
434  * are tuples present that are not visible to everyone or if there are
435  * dead tuples which are not yet removable. However, dead tuples which
436  * will be removed by the end of vacuuming should not preclude us from
437  * opportunistically freezing. Because of that, we do not clear
438  * all_visible when we see LP_DEAD items. We fix that at the end of the
439  * function, when we return the value to the caller, so that the caller
440  * doesn't set the VM bit incorrectly.
441  */
442  if (prstate.freeze)
443  {
444  prstate.all_visible = true;
445  prstate.all_frozen = true;
446  }
447  else
448  {
449  /*
450  * Initializing to false allows skipping the work to update them in
451  * heap_prune_record_unchanged_lp_normal().
452  */
453  prstate.all_visible = false;
454  prstate.all_frozen = false;
455  }
456 
457  /*
458  * The visibility cutoff xid is the newest xmin of live tuples on the
459  * page. In the common case, this will be set as the conflict horizon the
460  * caller can use for updating the VM. If, at the end of freezing and
461  * pruning, the page is all-frozen, there is no possibility that any
462  * running transaction on the standby does not see tuples on the page as
463  * all-visible, so the conflict horizon remains InvalidTransactionId.
464  */
466 
467  maxoff = PageGetMaxOffsetNumber(page);
468  tup.t_tableOid = RelationGetRelid(relation);
469 
470  /*
471  * Determine HTSV for all tuples, and queue them up for processing as HOT
472  * chain roots or as heap-only items.
473  *
474  * Determining HTSV only once for each tuple is required for correctness,
475  * to deal with cases where running HTSV twice could result in different
476  * results. For example, RECENTLY_DEAD can turn to DEAD if another
477  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
478  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
479  * transaction aborts.
480  *
481  * It's also good for performance. Most commonly tuples within a page are
482  * stored at decreasing offsets (while the items are stored at increasing
483  * offsets). When processing all tuples on a page this leads to reading
484  * memory at decreasing offsets within a page, with a variable stride.
485  * That's hard for CPU prefetchers to deal with. Processing the items in
486  * reverse order (and thus the tuples in increasing order) increases
487  * prefetching efficiency significantly / decreases the number of cache
488  * misses.
489  */
490  for (offnum = maxoff;
491  offnum >= FirstOffsetNumber;
492  offnum = OffsetNumberPrev(offnum))
493  {
494  ItemId itemid = PageGetItemId(page, offnum);
495  HeapTupleHeader htup;
496 
497  /*
498  * Set the offset number so that we can display it along with any
499  * error that occurred while processing this tuple.
500  */
501  *off_loc = offnum;
502 
503  prstate.processed[offnum] = false;
504  prstate.htsv[offnum] = -1;
505 
506  /* Nothing to do if slot doesn't contain a tuple */
507  if (!ItemIdIsUsed(itemid))
508  {
509  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
510  continue;
511  }
512 
513  if (ItemIdIsDead(itemid))
514  {
515  /*
516  * If the caller set mark_unused_now true, we can set dead line
517  * pointers LP_UNUSED now.
518  */
519  if (unlikely(prstate.mark_unused_now))
520  heap_prune_record_unused(&prstate, offnum, false);
521  else
522  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
523  continue;
524  }
525 
526  if (ItemIdIsRedirected(itemid))
527  {
528  /* This is the start of a HOT chain */
529  prstate.root_items[prstate.nroot_items++] = offnum;
530  continue;
531  }
532 
533  Assert(ItemIdIsNormal(itemid));
534 
535  /*
536  * Get the tuple's visibility status and queue it up for processing.
537  */
538  htup = (HeapTupleHeader) PageGetItem(page, itemid);
539  tup.t_data = htup;
540  tup.t_len = ItemIdGetLength(itemid);
541  ItemPointerSet(&tup.t_self, blockno, offnum);
542 
543  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
544  buffer);
545 
546  if (!HeapTupleHeaderIsHeapOnly(htup))
547  prstate.root_items[prstate.nroot_items++] = offnum;
548  else
549  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
550  }
551 
552  /*
553  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
554  * an FPI to be emitted.
555  */
556  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
557 
558  /*
559  * Process HOT chains.
560  *
561  * We added the items to the array starting from 'maxoff', so by
562  * processing the array in reverse order, we process the items in
563  * ascending offset number order. The order doesn't matter for
564  * correctness, but some quick micro-benchmarking suggests that this is
565  * faster. (Earlier PostgreSQL versions, which scanned all the items on
566  * the page instead of using the root_items array, also did it in
567  * ascending offset number order.)
568  */
569  for (int i = prstate.nroot_items - 1; i >= 0; i--)
570  {
571  offnum = prstate.root_items[i];
572 
573  /* Ignore items already processed as part of an earlier chain */
574  if (prstate.processed[offnum])
575  continue;
576 
577  /* see preceding loop */
578  *off_loc = offnum;
579 
580  /* Process this item or chain of items */
581  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
582  }
583 
584  /*
585  * Process any heap-only tuples that were not already processed as part of
586  * a HOT chain.
587  */
588  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
589  {
590  offnum = prstate.heaponly_items[i];
591 
592  if (prstate.processed[offnum])
593  continue;
594 
595  /* see preceding loop */
596  *off_loc = offnum;
597 
598  /*
599  * If the tuple is DEAD and doesn't chain to anything else, mark it
600  * unused. (If it does chain, we can only remove it as part of
601  * pruning its chain.)
602  *
603  * We need this primarily to handle aborted HOT updates, that is,
604  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
605  * chain, since the parent tuple might be re-updated before any
606  * pruning occurs. So we have to be able to reap them separately from
607  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
608  * return true for an XMIN_INVALID tuple, so this code will work even
609  * when there were sequential updates within the aborted transaction.)
610  */
611  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
612  {
613  ItemId itemid = PageGetItemId(page, offnum);
614  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
615 
617  {
619  &prstate.latest_xid_removed);
620  heap_prune_record_unused(&prstate, offnum, true);
621  }
622  else
623  {
624  /*
625  * This tuple should've been processed and removed as part of
626  * a HOT chain, so something's wrong. To preserve evidence,
627  * we don't dare to remove it. We cannot leave behind a DEAD
628  * tuple either, because that will cause VACUUM to error out.
629  * Throwing an error with a distinct error message seems like
630  * the least bad option.
631  */
632  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
633  blockno, offnum);
634  }
635  }
636  else
637  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
638  }
639 
640  /* We should now have processed every tuple exactly once */
641 #ifdef USE_ASSERT_CHECKING
642  for (offnum = FirstOffsetNumber;
643  offnum <= maxoff;
644  offnum = OffsetNumberNext(offnum))
645  {
646  *off_loc = offnum;
647 
648  Assert(prstate.processed[offnum]);
649  }
650 #endif
651 
652  /* Clear the offset information once we have processed the given page. */
653  *off_loc = InvalidOffsetNumber;
654 
655  do_prune = prstate.nredirected > 0 ||
656  prstate.ndead > 0 ||
657  prstate.nunused > 0;
658 
659  /*
660  * Even if we don't prune anything, if we found a new value for the
661  * pd_prune_xid field or the page was marked full, we will update the hint
662  * bit.
663  */
664  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
665  PageIsFull(page);
666 
667  /*
668  * Decide if we want to go ahead with freezing according to the freeze
669  * plans we prepared, or not.
670  */
671  do_freeze = false;
672  if (prstate.freeze)
673  {
674  if (prstate.pagefrz.freeze_required)
675  {
676  /*
677  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
678  * from before FreezeLimit/MultiXactCutoff is present. Must
679  * freeze to advance relfrozenxid/relminmxid.
680  */
681  do_freeze = true;
682  }
683  else
684  {
685  /*
686  * Opportunistically freeze the page if we are generating an FPI
687  * anyway and if doing so means that we can set the page
688  * all-frozen afterwards (might not happen until VACUUM's final
689  * heap pass).
690  *
691  * XXX: Previously, we knew if pruning emitted an FPI by checking
692  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
693  * and prune records were combined, this heuristic couldn't be
694  * used anymore. The opportunistic freeze heuristic must be
695  * improved; however, for now, try to approximate the old logic.
696  */
697  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
698  {
699  /*
700  * Freezing would make the page all-frozen. Have already
701  * emitted an FPI or will do so anyway?
702  */
703  if (RelationNeedsWAL(relation))
704  {
705  if (hint_bit_fpi)
706  do_freeze = true;
707  else if (do_prune)
708  {
709  if (XLogCheckBufferNeedsBackup(buffer))
710  do_freeze = true;
711  }
712  else if (do_hint)
713  {
715  do_freeze = true;
716  }
717  }
718  }
719  }
720  }
721 
722  if (do_freeze)
723  {
724  /*
725  * Validate the tuples we will be freezing before entering the
726  * critical section.
727  */
728  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
729  }
730  else if (prstate.nfrozen > 0)
731  {
732  /*
733  * The page contained some tuples that were not already frozen, and we
734  * chose not to freeze them now. The page won't be all-frozen then.
735  */
736  Assert(!prstate.pagefrz.freeze_required);
737 
738  prstate.all_frozen = false;
739  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
740  }
741  else
742  {
743  /*
744  * We have no freeze plans to execute. The page might already be
745  * all-frozen (perhaps only following pruning), though. Such pages
746  * can be marked all-frozen in the VM by our caller, even though none
747  * of its tuples were newly frozen here.
748  */
749  }
750 
751  /* Any error while applying the changes is critical */
753 
754  if (do_hint)
755  {
756  /*
757  * Update the page's pd_prune_xid field to either zero, or the lowest
758  * XID of any soon-prunable tuple.
759  */
760  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
761 
762  /*
763  * Also clear the "page is full" flag, since there's no point in
764  * repeating the prune/defrag process until something else happens to
765  * the page.
766  */
767  PageClearFull(page);
768 
769  /*
770  * If that's all we had to do to the page, this is a non-WAL-logged
771  * hint. If we are going to freeze or prune the page, we will mark
772  * the buffer dirty below.
773  */
774  if (!do_freeze && !do_prune)
775  MarkBufferDirtyHint(buffer, true);
776  }
777 
778  if (do_prune || do_freeze)
779  {
780  /* Apply the planned item changes and repair page fragmentation. */
781  if (do_prune)
782  {
783  heap_page_prune_execute(buffer, false,
784  prstate.redirected, prstate.nredirected,
785  prstate.nowdead, prstate.ndead,
786  prstate.nowunused, prstate.nunused);
787  }
788 
789  if (do_freeze)
790  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
791 
792  MarkBufferDirty(buffer);
793 
794  /*
795  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
796  */
797  if (RelationNeedsWAL(relation))
798  {
799  /*
800  * The snapshotConflictHorizon for the whole record should be the
801  * most conservative of all the horizons calculated for any of the
802  * possible modifications. If this record will prune tuples, any
803  * transactions on the standby older than the youngest xmax of the
804  * most recently removed tuple this record will prune will
805  * conflict. If this record will freeze tuples, any transactions
806  * on the standby with xids older than the youngest tuple this
807  * record will freeze will conflict.
808  */
809  TransactionId frz_conflict_horizon = InvalidTransactionId;
810  TransactionId conflict_xid;
811 
812  /*
813  * We can use the visibility_cutoff_xid as our cutoff for
814  * conflicts when the whole page is eligible to become all-frozen
815  * in the VM once we're done with it. Otherwise we generate a
816  * conservative cutoff by stepping back from OldestXmin.
817  */
818  if (do_freeze)
819  {
820  if (prstate.all_visible && prstate.all_frozen)
821  frz_conflict_horizon = prstate.visibility_cutoff_xid;
822  else
823  {
824  /* Avoids false conflicts when hot_standby_feedback in use */
825  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
826  TransactionIdRetreat(frz_conflict_horizon);
827  }
828  }
829 
830  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
831  conflict_xid = frz_conflict_horizon;
832  else
833  conflict_xid = prstate.latest_xid_removed;
834 
835  log_heap_prune_and_freeze(relation, buffer,
836  conflict_xid,
837  true, reason,
838  prstate.frozen, prstate.nfrozen,
839  prstate.redirected, prstate.nredirected,
840  prstate.nowdead, prstate.ndead,
841  prstate.nowunused, prstate.nunused);
842  }
843  }
844 
846 
847  /* Copy information back for caller */
848  presult->ndeleted = prstate.ndeleted;
849  presult->nnewlpdead = prstate.ndead;
850  presult->nfrozen = prstate.nfrozen;
851  presult->live_tuples = prstate.live_tuples;
852  presult->recently_dead_tuples = prstate.recently_dead_tuples;
853 
854  /*
855  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
856  * make the choice of whether or not to freeze the page unaffected by the
857  * short-term presence of LP_DEAD items. These LP_DEAD items were
858  * effectively assumed to be LP_UNUSED items in the making. It doesn't
859  * matter which vacuum heap pass (initial pass or final pass) ends up
860  * setting the page all-frozen, as long as the ongoing VACUUM does it.
861  *
862  * Now that freezing has been finalized, unset all_visible if there are
863  * any LP_DEAD items on the page. It needs to reflect the present state
864  * of the page, as expected by our caller.
865  */
866  if (prstate.all_visible && prstate.lpdead_items == 0)
867  {
868  presult->all_visible = prstate.all_visible;
869  presult->all_frozen = prstate.all_frozen;
870  }
871  else
872  {
873  presult->all_visible = false;
874  presult->all_frozen = false;
875  }
876 
877  presult->hastup = prstate.hastup;
878 
879  /*
880  * For callers planning to update the visibility map, the conflict horizon
881  * for that record must be the newest xmin on the page. However, if the
882  * page is completely frozen, there can be no conflict and the
883  * vm_conflict_horizon should remain InvalidTransactionId. This includes
884  * the case that we just froze all the tuples; the prune-freeze record
885  * included the conflict XID already so the caller doesn't need it.
886  */
887  if (presult->all_frozen)
889  else
890  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
891 
892  presult->lpdead_items = prstate.lpdead_items;
893  /* the presult->deadoffsets array was already filled in */
894 
895  if (prstate.freeze)
896  {
897  if (presult->nfrozen > 0)
898  {
899  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
900  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
901  }
902  else
903  {
904  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
905  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
906  }
907  }
908 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4914
PageHeaderData * PageHeader
Definition: bufpage.h:170
static void PageClearFull(Page page)
Definition: bufpage.h:420
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define likely(x)
Definition: c.h:310
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6891
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6838
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:42
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:41
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:978
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1487
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1276
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1309
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2032
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1298
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:915
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1540
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:219
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:207
bool freeze_required
Definition: heapam.h:181
MultiXactId FreezePageRelminMxid
Definition: heapam.h:208
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:218
int recently_dead_tuples
Definition: heapam.h:234
TransactionId vm_conflict_horizon
Definition: heapam.h:249
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:263
bool all_visible
Definition: heapam.h:247
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:267
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:118
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert, BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1540 of file pruneheap.c.

1544 {
1545  Page page = (Page) BufferGetPage(buffer);
1546  OffsetNumber *offnum;
1548 
1549  /* Shouldn't be called unless there's something to do */
1550  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1551 
1552  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1553  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1554 
1555  /* Update all redirected line pointers */
1556  offnum = redirected;
1557  for (int i = 0; i < nredirected; i++)
1558  {
1559  OffsetNumber fromoff = *offnum++;
1560  OffsetNumber tooff = *offnum++;
1561  ItemId fromlp = PageGetItemId(page, fromoff);
1563 
1564 #ifdef USE_ASSERT_CHECKING
1565 
1566  /*
1567  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1568  * must be the first item from a HOT chain. If the item has tuple
1569  * storage then it can't be a heap-only tuple. Otherwise we are just
1570  * maintaining an existing LP_REDIRECT from an existing HOT chain that
1571  * has been pruned at least once before now.
1572  */
1573  if (!ItemIdIsRedirected(fromlp))
1574  {
1575  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1576 
1577  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1579  }
1580  else
1581  {
1582  /* We shouldn't need to redundantly set the redirect */
1583  Assert(ItemIdGetRedirect(fromlp) != tooff);
1584  }
1585 
1586  /*
1587  * The item that we're about to set as an LP_REDIRECT (the 'from'
1588  * item) will point to an existing item (the 'to' item) that is
1589  * already a heap-only tuple. There can be at most one LP_REDIRECT
1590  * item per HOT chain.
1591  *
1592  * We need to keep around an LP_REDIRECT item (after original
1593  * non-heap-only root tuple gets pruned away) so that it's always
1594  * possible for VACUUM to easily figure out what TID to delete from
1595  * indexes when an entire HOT chain becomes dead. A heap-only tuple
1596  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1597  * tuple can.
1598  *
1599  * This check may miss problems, e.g. the target of a redirect could
1600  * be marked as unused subsequently. The page_verify_redirects() check
1601  * below will catch such problems.
1602  */
1603  tolp = PageGetItemId(page, tooff);
1604  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1605  htup = (HeapTupleHeader) PageGetItem(page, tolp);
1607 #endif
1608 
1609  ItemIdSetRedirect(fromlp, tooff);
1610  }
1611 
1612  /* Update all now-dead line pointers */
1613  offnum = nowdead;
1614  for (int i = 0; i < ndead; i++)
1615  {
1616  OffsetNumber off = *offnum++;
1617  ItemId lp = PageGetItemId(page, off);
1618 
1619 #ifdef USE_ASSERT_CHECKING
1620 
1621  /*
1622  * An LP_DEAD line pointer must be left behind when the original item
1623  * (which is dead to everybody) could still be referenced by a TID in
1624  * an index. This should never be necessary with any individual
1625  * heap-only tuple item, though. (It's not clear how much of a problem
1626  * that would be, but there is no reason to allow it.)
1627  */
1628  if (ItemIdHasStorage(lp))
1629  {
1630  Assert(ItemIdIsNormal(lp));
1631  htup = (HeapTupleHeader) PageGetItem(page, lp);
1633  }
1634  else
1635  {
1636  /* Whole HOT chain becomes dead */
1638  }
1639 #endif
1640 
1641  ItemIdSetDead(lp);
1642  }
1643 
1644  /* Update all now-unused line pointers */
1645  offnum = nowunused;
1646  for (int i = 0; i < nunused; i++)
1647  {
1648  OffsetNumber off = *offnum++;
1649  ItemId lp = PageGetItemId(page, off);
1650 
1651 #ifdef USE_ASSERT_CHECKING
1652 
1653  if (lp_truncate_only)
1654  {
1655  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1656  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1657  }
1658  else
1659  {
1660  /*
1661  * When heap_page_prune_and_freeze() was called, mark_unused_now
1662  * may have been passed as true, which allows would-be LP_DEAD
1663  * items to be made LP_UNUSED instead. This is only possible if
1664  * the relation has no indexes. If there are any dead items, then
1665  * mark_unused_now was not true and every item being marked
1666  * LP_UNUSED must refer to a heap-only tuple.
1667  */
1668  if (ndead > 0)
1669  {
1671  htup = (HeapTupleHeader) PageGetItem(page, lp);
1673  }
1674  else
1675  Assert(ItemIdIsUsed(lp));
1676  }
1677 
1678 #endif
1679 
1680  ItemIdSetUnused(lp);
1681  }
1682 
1683  if (lp_truncate_only)
1685  else
1686  {
1687  /*
1688  * Finally, repair any fragmentation, and update the page's hint bit
1689  * about whether it has free pointers.
1690  */
1692 
1693  /*
1694  * Now that the page has been modified, assert that redirect items
1695  * still point to valid targets.
1696  */
1697  page_verify_redirects(page);
1698  }
1699 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1716

References Assert, BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194 {
195  Page page = BufferGetPage(buffer);
196  TransactionId prune_xid;
197  GlobalVisState *vistest;
198  Size minfree;
199 
200  /*
201  * We can't write WAL in recovery mode, so there's no point trying to
202  * clean the page. The primary will likely issue a cleaning WAL record
203  * soon anyway, so this is no particular loss.
204  */
205  if (RecoveryInProgress())
206  return;
207 
208  /*
209  * First check whether there's any chance there's something to prune,
210  * determining the appropriate horizon is a waste if there's no prune_xid
211  * (i.e. no updates/deletes left potentially dead tuples around).
212  */
213  prune_xid = ((PageHeader) page)->pd_prune_xid;
214  if (!TransactionIdIsValid(prune_xid))
215  return;
216 
217  /*
218  * Check whether prune_xid indicates that there may be dead rows that can
219  * be cleaned up.
220  */
221  vistest = GlobalVisTestFor(relation);
222 
223  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224  return;
225 
226  /*
227  * We prune when a previous UPDATE failed to find enough space on the page
228  * for a new tuple version, or when free space falls below the relation's
229  * fill-factor target (but not less than 10%).
230  *
231  * Checking free space here is questionable since we aren't holding any
232  * lock on the buffer; in the worst case we could get a bogus answer. It's
233  * unlikely to be *seriously* wrong, though, since reading either pd_lower
234  * or pd_upper is probably atomic. Avoiding taking a lock seems more
235  * important than sometimes getting a wrong answer in what is after all
236  * just a heuristic estimate.
237  */
238  minfree = RelationGetTargetPageFreeSpace(relation,
240  minfree = Max(minfree, BLCKSZ / 10);
241 
242  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243  {
244  /* OK, try to get exclusive buffer lock */
245  if (!ConditionalLockBufferForCleanup(buffer))
246  return;
247 
248  /*
249  * Now that we have buffer lock, get accurate information about the
250  * page's free space, and recheck the heuristic about whether to
251  * prune.
252  */
253  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254  {
255  OffsetNumber dummy_off_loc;
256  PruneFreezeResult presult;
257 
258  /*
259  * For now, pass mark_unused_now as false regardless of whether or
260  * not the relation has indexes, since we cannot safely determine
261  * that during on-access pruning with the current implementation.
262  */
263  heap_page_prune_and_freeze(relation, buffer, vistest, 0,
264  NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
265 
266  /*
267  * Report the number of tuples reclaimed to pgstats. This is
268  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
269  *
270  * We derive the number of dead tuples like this to avoid totally
271  * forgetting about items that were set to LP_DEAD, since they
272  * still need to be cleaned up by VACUUM. We only want to count
273  * heap-only tuples that just became LP_UNUSED in our report,
274  * which don't.
275  *
276  * VACUUM doesn't have to compensate in the same way when it
277  * tracks ndeleted, since it will set the same LP_DEAD items to
278  * LP_UNUSED separately.
279  */
280  if (presult.ndeleted > presult.nnewlpdead)
282  presult.ndeleted - presult.nnewlpdead);
283  }
284 
285  /* And release buffer lock */
287 
288  /*
289  * We avoid reuse of any free space created on the page by unrelated
290  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
291  * free space should be reused by UPDATEs to *this* page.
292  */
293  }
294 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5326
#define Max(x, y)
Definition: c.h:998
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4248
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:348
bool RecoveryInProgress(void)
Definition: xlog.c:6290

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heap_prepare_pagescan(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6838 of file heapam.c.

6840 {
6841  Page page = BufferGetPage(buffer);
6842 
6843  for (int i = 0; i < ntuples; i++)
6844  {
6845  HeapTupleFreeze *frz = tuples + i;
6846  ItemId itemid = PageGetItemId(page, frz->offset);
6847  HeapTupleHeader htup;
6848 
6849  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6850 
6851  /* Deliberately avoid relying on tuple hint bits here */
6853  {
6855 
6857  if (unlikely(!TransactionIdDidCommit(xmin)))
6858  ereport(ERROR,
6860  errmsg_internal("uncommitted xmin %u needs to be frozen",
6861  xmin)));
6862  }
6863 
6864  /*
6865  * TransactionIdDidAbort won't work reliably in the presence of XIDs
6866  * left behind by transactions that were in progress during a crash,
6867  * so we can only check that xmax didn't commit
6868  */
6870  {
6872 
6874  if (unlikely(TransactionIdDidCommit(xmax)))
6875  ereport(ERROR,
6877  errmsg_internal("cannot freeze committed xmax %u",
6878  xmax)));
6879  }
6880  }
6881 }
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:137
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:136
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint8 checkflags
Definition: heapam.h:149
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert, BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_prune_and_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)

Definition at line 6541 of file heapam.c.

6545 {
6546  bool xmin_already_frozen = false,
6547  xmax_already_frozen = false;
6548  bool freeze_xmin = false,
6549  replace_xvac = false,
6550  replace_xmax = false,
6551  freeze_xmax = false;
6552  TransactionId xid;
6553 
6554  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6555  frz->t_infomask2 = tuple->t_infomask2;
6556  frz->t_infomask = tuple->t_infomask;
6557  frz->frzflags = 0;
6558  frz->checkflags = 0;
6559 
6560  /*
6561  * Process xmin, while keeping track of whether it's already frozen, or
6562  * will become frozen iff our freeze plan is executed by caller (could be
6563  * neither).
6564  */
6565  xid = HeapTupleHeaderGetXmin(tuple);
6566  if (!TransactionIdIsNormal(xid))
6567  xmin_already_frozen = true;
6568  else
6569  {
6570  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6571  ereport(ERROR,
6573  errmsg_internal("found xmin %u from before relfrozenxid %u",
6574  xid, cutoffs->relfrozenxid)));
6575 
6576  /* Will set freeze_xmin flags in freeze plan below */
6577  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6578 
6579  /* Verify that xmin committed if and when freeze plan is executed */
6580  if (freeze_xmin)
6582  }
6583 
6584  /*
6585  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6586  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6587  */
6588  xid = HeapTupleHeaderGetXvac(tuple);
6589  if (TransactionIdIsNormal(xid))
6590  {
6592  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6593 
6594  /*
6595  * For Xvac, we always freeze proactively. This allows totally_frozen
6596  * tracking to ignore xvac.
6597  */
6598  replace_xvac = pagefrz->freeze_required = true;
6599 
6600  /* Will set replace_xvac flags in freeze plan below */
6601  }
6602 
6603  /* Now process xmax */
6604  xid = frz->xmax;
6605  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6606  {
6607  /* Raw xmax is a MultiXactId */
6608  TransactionId newxmax;
6609  uint16 flags;
6610 
6611  /*
6612  * We will either remove xmax completely (in the "freeze_xmax" path),
6613  * process xmax by replacing it (in the "replace_xmax" path), or
6614  * perform no-op xmax processing. The only constraint is that the
6615  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6616  */
6617  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6618  &flags, pagefrz);
6619 
6620  if (flags & FRM_NOOP)
6621  {
6622  /*
6623  * xmax is a MultiXactId, and nothing about it changes for now.
6624  * This is the only case where 'freeze_required' won't have been
6625  * set for us by FreezeMultiXactId, as well as the only case where
6626  * neither freeze_xmax nor replace_xmax are set (given a multi).
6627  *
6628  * This is a no-op, but the call to FreezeMultiXactId might have
6629  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6630  * for us (the "freeze page" variants, specifically). That'll
6631  * make it safe for our caller to freeze the page later on, while
6632  * leaving this particular xmax undisturbed.
6633  *
6634  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6635  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6636  * job. A call to heap_tuple_should_freeze for this same tuple
6637  * will take place below if 'freeze_required' isn't set already.
6638  * (This repeats work from FreezeMultiXactId, but allows "no
6639  * freeze" tracker maintenance to happen in only one place.)
6640  */
6641  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6642  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6643  }
6644  else if (flags & FRM_RETURN_IS_XID)
6645  {
6646  /*
6647  * xmax will become an updater Xid (original MultiXact's updater
6648  * member Xid will be carried forward as a simple Xid in Xmax).
6649  */
6650  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
6651 
6652  /*
6653  * NB -- some of these transformations are only valid because we
6654  * know the return Xid is a tuple updater (i.e. not merely a
6655  * locker.) Also note that the only reason we don't explicitly
6656  * worry about HEAP_KEYS_UPDATED is because it lives in
6657  * t_infomask2 rather than t_infomask.
6658  */
6659  frz->t_infomask &= ~HEAP_XMAX_BITS;
6660  frz->xmax = newxmax;
6661  if (flags & FRM_MARK_COMMITTED)
6663  replace_xmax = true;
6664  }
6665  else if (flags & FRM_RETURN_IS_MULTI)
6666  {
6667  uint16 newbits;
6668  uint16 newbits2;
6669 
6670  /*
6671  * xmax is an old MultiXactId that we have to replace with a new
6672  * MultiXactId, to carry forward two or more original member XIDs.
6673  */
6674  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
6675 
6676  /*
6677  * We can't use GetMultiXactIdHintBits directly on the new multi
6678  * here; that routine initializes the masks to all zeroes, which
6679  * would lose other bits we need. Doing it this way ensures all
6680  * unrelated bits remain untouched.
6681  */
6682  frz->t_infomask &= ~HEAP_XMAX_BITS;
6683  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6684  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
6685  frz->t_infomask |= newbits;
6686  frz->t_infomask2 |= newbits2;
6687  frz->xmax = newxmax;
6688  replace_xmax = true;
6689  }
6690  else
6691  {
6692  /*
6693  * Freeze plan for tuple "freezes xmax" in the strictest sense:
6694  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
6695  */
6696  Assert(flags & FRM_INVALIDATE_XMAX);
6697  Assert(!TransactionIdIsValid(newxmax));
6698 
6699  /* Will set freeze_xmax flags in freeze plan below */
6700  freeze_xmax = true;
6701  }
6702 
6703  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
6704  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
6705  }
6706  else if (TransactionIdIsNormal(xid))
6707  {
6708  /* Raw xmax is normal XID */
6709  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6710  ereport(ERROR,
6712  errmsg_internal("found xmax %u from before relfrozenxid %u",
6713  xid, cutoffs->relfrozenxid)));
6714 
6715  /* Will set freeze_xmax flags in freeze plan below */
6716  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6717 
6718  /*
6719  * Verify that xmax aborted if and when freeze plan is executed,
6720  * provided it's from an update. (A lock-only xmax can be removed
6721  * independent of this, since the lock is released at xact end.)
6722  */
6723  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
6725  }
6726  else if (!TransactionIdIsValid(xid))
6727  {
6728  /* Raw xmax is InvalidTransactionId XID */
6729  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
6730  xmax_already_frozen = true;
6731  }
6732  else
6733  ereport(ERROR,
6735  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
6736  xid, tuple->t_infomask)));
6737 
6738  if (freeze_xmin)
6739  {
6740  Assert(!xmin_already_frozen);
6741 
6742  frz->t_infomask |= HEAP_XMIN_FROZEN;
6743  }
6744  if (replace_xvac)
6745  {
6746  /*
6747  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
6748  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
6749  * transaction succeeded.
6750  */
6751  Assert(pagefrz->freeze_required);
6752  if (tuple->t_infomask & HEAP_MOVED_OFF)
6753  frz->frzflags |= XLH_INVALID_XVAC;
6754  else
6755  frz->frzflags |= XLH_FREEZE_XVAC;
6756  }
6757  if (replace_xmax)
6758  {
6759  Assert(!xmax_already_frozen && !freeze_xmax);
6760  Assert(pagefrz->freeze_required);
6761 
6762  /* Already set replace_xmax flags in freeze plan earlier */
6763  }
6764  if (freeze_xmax)
6765  {
6766  Assert(!xmax_already_frozen && !replace_xmax);
6767 
6768  frz->xmax = InvalidTransactionId;
6769 
6770  /*
6771  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
6772  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
6773  * Also get rid of the HEAP_KEYS_UPDATED bit.
6774  */
6775  frz->t_infomask &= ~HEAP_XMAX_BITS;
6776  frz->t_infomask |= HEAP_XMAX_INVALID;
6777  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
6778  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6779  }
6780 
6781  /*
6782  * Determine if this tuple is already totally frozen, or will become
6783  * totally frozen (provided caller executes freeze plans for the page)
6784  */
6785  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
6786  (freeze_xmax || xmax_already_frozen));
6787 
6788  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
6789  xmax_already_frozen))
6790  {
6791  /*
6792  * So far no previous tuple from the page made freezing mandatory.
6793  * Does this tuple force caller to freeze the entire page?
6794  */
6795  pagefrz->freeze_required =
6796  heap_tuple_should_freeze(tuple, cutoffs,
6797  &pagefrz->NoFreezePageRelfrozenXid,
6798  &pagefrz->NoFreezePageRelminMxid);
6799  }
6800 
6801  /* Tell caller if this tuple has a usable freeze plan set in *frz */
6802  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
6803 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:6957
#define FRM_RETURN_IS_XID
Definition: heapam.c:6140
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6191
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7374
#define FRM_MARK_COMMITTED
Definition: heapam.c:6142
#define FRM_NOOP
Definition: heapam.c:6138
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6141
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6139
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:339
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:338
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3260
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
uint8 frzflags
Definition: heapam.h:146
uint16 t_infomask2
Definition: heapam.h:144
TransactionId xmax
Definition: heapam.h:143
uint16 t_infomask
Definition: heapam.h:145
MultiXactId OldestMxact
Definition: vacuum.h:268
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert, HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 493 of file heapam.c.

494 {
495  HeapScanDesc scan = (HeapScanDesc) sscan;
496  Buffer buffer = scan->rs_cbuf;
497  BlockNumber block = scan->rs_cblock;
498  Snapshot snapshot;
499  Page page;
500  int lines;
501  bool all_visible;
502  bool check_serializable;
503 
504  Assert(BufferGetBlockNumber(buffer) == block);
505 
506  /* ensure we're not accidentally being used when not in pagemode */
508  snapshot = scan->rs_base.rs_snapshot;
509 
510  /*
511  * Prune and repair fragmentation for the whole page, if possible.
512  */
513  heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
514 
515  /*
516  * We must hold share lock on the buffer content while examining tuple
517  * visibility. Afterwards, however, the tuples we have found to be
518  * visible are guaranteed good as long as we hold the buffer pin.
519  */
520  LockBuffer(buffer, BUFFER_LOCK_SHARE);
521 
522  page = BufferGetPage(buffer);
523  lines = PageGetMaxOffsetNumber(page);
524 
525  /*
526  * If the all-visible flag indicates that all tuples on the page are
527  * visible to everyone, we can skip the per-tuple visibility tests.
528  *
529  * Note: In hot standby, a tuple that's already visible to all
530  * transactions on the primary might still be invisible to a read-only
531  * transaction in the standby. We partly handle this problem by tracking
532  * the minimum xmin of visible tuples as the cut-off XID while marking a
533  * page all-visible on the primary and WAL log that along with the
534  * visibility map SET operation. In hot standby, we wait for (or abort)
535  * all transactions that can potentially may not see one or more tuples on
536  * the page. That's how index-only scans work fine in hot standby. A
537  * crucial difference between index-only scans and heap scans is that the
538  * index-only scan completely relies on the visibility map where as heap
539  * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
540  * the page-level flag can be trusted in the same way, because it might
541  * get propagated somehow without being explicitly WAL-logged, e.g. via a
542  * full page write. Until we can prove that beyond doubt, let's check each
543  * tuple for visibility the hard way.
544  */
545  all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
546  check_serializable =
548 
549  /*
550  * We call page_collect_tuples() with constant arguments, to get the
551  * compiler to constant fold the constant arguments. Separate calls with
552  * constant arguments, rather than variables, are needed on several
553  * compilers to actually perform constant folding.
554  */
555  if (likely(all_visible))
556  {
557  if (likely(!check_serializable))
558  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
559  block, lines, true, false);
560  else
561  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
562  block, lines, true, true);
563  }
564  else
565  {
566  if (likely(!check_serializable))
567  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
568  block, lines, false, false);
569  else
570  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
571  block, lines, false, true);
572  }
573 
575 }
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:443
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3976
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
BlockNumber rs_cblock
Definition: heapam.h:66
bool takenDuringRecovery
Definition: snapshot.h:184

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1151 of file heapam.c.

1153 {
1154  HeapScanDesc scan = (HeapScanDesc) sscan;
1155 
1156  if (set_params)
1157  {
1158  if (allow_strat)
1159  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1160  else
1161  scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1162 
1163  if (allow_sync)
1164  scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1165  else
1166  scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1167 
1168  if (allow_pagemode && scan->rs_base.rs_snapshot &&
1171  else
1173  }
1174 
1175  /*
1176  * unpin scan buffers
1177  */
1178  if (BufferIsValid(scan->rs_cbuf))
1179  ReleaseBuffer(scan->rs_cbuf);
1180 
1181  if (BufferIsValid(scan->rs_vmbuffer))
1182  {
1183  ReleaseBuffer(scan->rs_vmbuffer);
1184  scan->rs_vmbuffer = InvalidBuffer;
1185  }
1186 
1187  Assert(scan->rs_empty_tuples_pending == 0);
1188 
1189  /*
1190  * The read stream is reset on rescan. This must be done before
1191  * initscan(), as some state referred to by read_stream_reset() is reset
1192  * in initscan().
1193  */
1194  if (scan->rs_read_stream)
1196 
1197  /*
1198  * reinitialize scan descriptor
1199  */
1200  initscan(scan, key, true);
1201 }
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:773
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_ALLOW_SYNC
Definition: tableam.h:60

References Assert, BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1327 of file heapam.c.

1329 {
1330  HeapScanDesc scan = (HeapScanDesc) sscan;
1331  BlockNumber startBlk;
1332  BlockNumber numBlks;
1333  ItemPointerData highestItem;
1334  ItemPointerData lowestItem;
1335 
1336  /*
1337  * For relations without any pages, we can simply leave the TID range
1338  * unset. There will be no tuples to scan, therefore no tuples outside
1339  * the given TID range.
1340  */
1341  if (scan->rs_nblocks == 0)
1342  return;
1343 
1344  /*
1345  * Set up some ItemPointers which point to the first and last possible
1346  * tuples in the heap.
1347  */
1348  ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1349  ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1350 
1351  /*
1352  * If the given maximum TID is below the highest possible TID in the
1353  * relation, then restrict the range to that, otherwise we scan to the end
1354  * of the relation.
1355  */
1356  if (ItemPointerCompare(maxtid, &highestItem) < 0)
1357  ItemPointerCopy(maxtid, &highestItem);
1358 
1359  /*
1360  * If the given minimum TID is above the lowest possible TID in the
1361  * relation, then restrict the range to only scan for TIDs above that.
1362  */
1363  if (ItemPointerCompare(mintid, &lowestItem) > 0)
1364  ItemPointerCopy(mintid, &lowestItem);
1365 
1366  /*
1367  * Check for an empty range and protect from would be negative results
1368  * from the numBlks calculation below.
1369  */
1370  if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1371  {
1372  /* Set an empty range of blocks to scan */
1373  heap_setscanlimits(sscan, 0, 0);
1374  return;
1375  }
1376 
1377  /*
1378  * Calculate the first block and the number of blocks we must scan. We
1379  * could be more aggressive here and perform some more validation to try
1380  * and further narrow the scope of blocks to scan by checking if the
1381  * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1382  * advance startBlk by one. Likewise, if highestItem has an offset of 0
1383  * we could scan one fewer blocks. However, such an optimization does not
1384  * seem worth troubling over, currently.
1385  */
1386  startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1387 
1388  numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1389  ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1390 
1391  /* Set the start block and number of blocks to scan */
1392  heap_setscanlimits(sscan, startBlk, numBlks);
1393 
1394  /* Finally, set the TID range in sscan */
1395  ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
1396  ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
1397 }
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:421
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:58

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 421 of file heapam.c.

422 {
423  HeapScanDesc scan = (HeapScanDesc) sscan;
424 
425  Assert(!scan->rs_inited); /* else too late to change */
426  /* else rs_startblock is significant */
427  Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
428 
429  /* Check startBlk is valid (but allow case of zero blocks...) */
430  Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
431 
432  scan->rs_startblock = startBlk;
433  scan->rs_numblocks = numBlks;
434 }
bool rs_inited
Definition: heapam.h:64
BlockNumber rs_startblock
Definition: heapam.h:59
BlockNumber rs_numblocks
Definition: heapam.h:60

References Assert, HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7319 of file heapam.c.

7320 {
7321  TransactionId xid;
7322 
7323  /*
7324  * If xmin is a normal transaction ID, this tuple is definitely not
7325  * frozen.
7326  */
7327  xid = HeapTupleHeaderGetXmin(tuple);
7328  if (TransactionIdIsNormal(xid))
7329  return true;
7330 
7331  /*
7332  * If xmax is a valid xact or multixact, this tuple is also not frozen.
7333  */
7334  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7335  {
7336  MultiXactId multi;
7337 
7338  multi = HeapTupleHeaderGetRawXmax(tuple);
7339  if (MultiXactIdIsValid(multi))
7340  return true;
7341  }
7342  else
7343  {
7344  xid = HeapTupleHeaderGetRawXmax(tuple);
7345  if (TransactionIdIsNormal(xid))
7346  return true;
7347  }
7348 
7349  if (tuple->t_infomask & HEAP_MOVED)
7350  {
7351  xid = HeapTupleHeaderGetXvac(tuple);
7352  if (TransactionIdIsNormal(xid))
7353  return true;
7354  }
7355 
7356  return false;
7357 }

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_is_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7374 of file heapam.c.

7378 {
7379  TransactionId xid;
7380  MultiXactId multi;
7381  bool freeze = false;
7382 
7383  /* First deal with xmin */
7384  xid = HeapTupleHeaderGetXmin(tuple);
7385  if (TransactionIdIsNormal(xid))
7386  {
7388  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7389  *NoFreezePageRelfrozenXid = xid;
7390  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7391  freeze = true;
7392  }
7393 
7394  /* Now deal with xmax */
7395  xid = InvalidTransactionId;
7396  multi = InvalidMultiXactId;
7397  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7398  multi = HeapTupleHeaderGetRawXmax(tuple);
7399  else
7400  xid = HeapTupleHeaderGetRawXmax(tuple);
7401 
7402  if (TransactionIdIsNormal(xid))
7403  {
7405  /* xmax is a non-permanent XID */
7406  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7407  *NoFreezePageRelfrozenXid = xid;
7408  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7409  freeze = true;
7410  }
7411  else if (!MultiXactIdIsValid(multi))
7412  {
7413  /* xmax is a permanent XID or invalid MultiXactId/XID */
7414  }
7415  else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7416  {
7417  /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7418  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7419  *NoFreezePageRelminMxid = multi;
7420  /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7421  freeze = true;
7422  }
7423  else
7424  {
7425  /* xmax is a MultiXactId that may have an updater XID */
7426  MultiXactMember *members;
7427  int nmembers;
7428 
7429  Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi));
7430  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7431  *NoFreezePageRelminMxid = multi;
7432  if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7433  freeze = true;
7434 
7435  /* need to check whether any member of the mxact is old */
7436  nmembers = GetMultiXactIdMembers(multi, &members, false,
7438 
7439  for (int i = 0; i < nmembers; i++)
7440  {
7441  xid = members[i].xid;
7443  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7444  *NoFreezePageRelfrozenXid = xid;
7445  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7446  freeze = true;
7447  }
7448  if (nmembers > 0)
7449  pfree(members);
7450  }
7451 
7452  if (tuple->t_infomask & HEAP_MOVED)
7453  {
7454  xid = HeapTupleHeaderGetXvac(tuple);
7455  if (TransactionIdIsNormal(xid))
7456  {
7458  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7459  *NoFreezePageRelfrozenXid = xid;
7460  /* heap_prepare_freeze_tuple forces xvac freezing */
7461  freeze = true;
7462  }
7463  }
7464 
7465  return freeze;
7466 }
#define HEAP_LOCKED_UPGRADED(infomask)
Definition: htup_details.h:249
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3274
TransactionId xid
Definition: multixact.h:58

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED, HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3146 of file heapam.c.

3150 {
3151  TM_Result result;
3153  Bitmapset *hot_attrs;
3154  Bitmapset *sum_attrs;
3155  Bitmapset *key_attrs;
3156  Bitmapset *id_attrs;
3157  Bitmapset *interesting_attrs;
3158  Bitmapset *modified_attrs;
3159  ItemId lp;
3160  HeapTupleData oldtup;
3161  HeapTuple heaptup;
3162  HeapTuple old_key_tuple = NULL;
3163  bool old_key_copied = false;
3164  Page page;
3165  BlockNumber block;
3166  MultiXactStatus mxact_status;
3167  Buffer buffer,
3168  newbuf,
3169  vmbuffer = InvalidBuffer,
3170  vmbuffer_new = InvalidBuffer;
3171  bool need_toast;
3172  Size newtupsize,
3173  pagefree;
3174  bool have_tuple_lock = false;
3175  bool iscombo;
3176  bool use_hot_update = false;
3177  bool summarized_update = false;
3178  bool key_intact;
3179  bool all_visible_cleared = false;
3180  bool all_visible_cleared_new = false;
3181  bool checked_lockers;
3182  bool locker_remains;
3183  bool id_has_external = false;
3184  TransactionId xmax_new_tuple,
3185  xmax_old_tuple;
3186  uint16 infomask_old_tuple,
3187  infomask2_old_tuple,
3188  infomask_new_tuple,
3189  infomask2_new_tuple;
3190 
3191  Assert(ItemPointerIsValid(otid));
3192 
3193  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3195  RelationGetNumberOfAttributes(relation));
3196 
3197  /*
3198  * Forbid this during a parallel operation, lest it allocate a combo CID.
3199  * Other workers might need that combo CID for visibility checks, and we
3200  * have no provision for broadcasting it to them.
3201  */
3202  if (IsInParallelMode())
3203  ereport(ERROR,
3204  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3205  errmsg("cannot update tuples during a parallel operation")));
3206 
3207  /*
3208  * Fetch the list of attributes to be checked for various operations.
3209  *
3210  * For HOT considerations, this is wasted effort if we fail to update or
3211  * have to put the new tuple on a different page. But we must compute the
3212  * list before obtaining buffer lock --- in the worst case, if we are
3213  * doing an update on one of the relevant system catalogs, we could
3214  * deadlock if we try to fetch the list later. In any case, the relcache
3215  * caches the data so this is usually pretty cheap.
3216  *
3217  * We also need columns used by the replica identity and columns that are
3218  * considered the "key" of rows in the table.
3219  *
3220  * Note that we get copies of each bitmap, so we need not worry about
3221  * relcache flush happening midway through.
3222  */
3223  hot_attrs = RelationGetIndexAttrBitmap(relation,
3225  sum_attrs = RelationGetIndexAttrBitmap(relation,
3227  key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
3228  id_attrs = RelationGetIndexAttrBitmap(relation,
3230  interesting_attrs = NULL;
3231  interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3232  interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3233  interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3234  interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3235 
3236  block = ItemPointerGetBlockNumber(otid);
3237  buffer = ReadBuffer(relation, block);
3238  page = BufferGetPage(buffer);
3239 
3240  /*
3241  * Before locking the buffer, pin the visibility map page if it appears to
3242  * be necessary. Since we haven't got the lock yet, someone else might be
3243  * in the middle of changing this, so we'll need to recheck after we have
3244  * the lock.
3245  */
3246  if (PageIsAllVisible(page))
3247  visibilitymap_pin(relation, block, &vmbuffer);
3248 
3250 
3251  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3252  Assert(ItemIdIsNormal(lp));
3253 
3254  /*
3255  * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3256  * properly.
3257  */
3258  oldtup.t_tableOid = RelationGetRelid(relation);
3259  oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3260  oldtup.t_len = ItemIdGetLength(lp);
3261  oldtup.t_self = *otid;
3262 
3263  /* the new tuple is ready, except for this: */
3264  newtup->t_tableOid = RelationGetRelid(relation);
3265 
3266  /*
3267  * Determine columns modified by the update. Additionally, identify
3268  * whether any of the unmodified replica identity key attributes in the
3269  * old tuple is externally stored or not. This is required because for
3270  * such attributes the flattened value won't be WAL logged as part of the
3271  * new tuple so we must include it as part of the old_key_tuple. See
3272  * ExtractReplicaIdentity.
3273  */
3274  modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3275  id_attrs, &oldtup,
3276  newtup, &id_has_external);
3277 
3278  /*
3279  * If we're not updating any "key" column, we can grab a weaker lock type.
3280  * This allows for more concurrency when we are running simultaneously
3281  * with foreign key checks.
3282  *
3283  * Note that if a column gets detoasted while executing the update, but
3284  * the value ends up being the same, this test will fail and we will use
3285  * the stronger lock. This is acceptable; the important case to optimize
3286  * is updates that don't manipulate key columns, not those that
3287  * serendipitously arrive at the same key values.
3288  */
3289  if (!bms_overlap(modified_attrs, key_attrs))
3290  {
3291  *lockmode = LockTupleNoKeyExclusive;
3292  mxact_status = MultiXactStatusNoKeyUpdate;
3293  key_intact = true;
3294 
3295  /*
3296  * If this is the first possibly-multixact-able operation in the
3297  * current transaction, set my per-backend OldestMemberMXactId
3298  * setting. We can be certain that the transaction will never become a
3299  * member of any older MultiXactIds than that. (We have to do this
3300  * even if we end up just using our own TransactionId below, since
3301  * some other backend could incorporate our XID into a MultiXact
3302  * immediately afterwards.)
3303  */
3305  }
3306  else
3307  {
3308  *lockmode = LockTupleExclusive;
3309  mxact_status = MultiXactStatusUpdate;
3310  key_intact = false;
3311  }
3312 
3313  /*
3314  * Note: beyond this point, use oldtup not otid to refer to old tuple.
3315  * otid may very well point at newtup->t_self, which we will overwrite
3316  * with the new tuple's location, so there's great risk of confusion if we
3317  * use otid anymore.
3318  */
3319 
3320 l2:
3321  checked_lockers = false;
3322  locker_remains = false;
3323  result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3324 
3325  /* see below about the "no wait" case */
3326  Assert(result != TM_BeingModified || wait);
3327 
3328  if (result == TM_Invisible)
3329  {
3330  UnlockReleaseBuffer(buffer);
3331  ereport(ERROR,
3332  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3333  errmsg("attempted to update invisible tuple")));
3334  }
3335  else if (result == TM_BeingModified && wait)
3336  {
3337  TransactionId xwait;
3338  uint16 infomask;
3339  bool can_continue = false;
3340 
3341  /*
3342  * XXX note that we don't consider the "no wait" case here. This
3343  * isn't a problem currently because no caller uses that case, but it
3344  * should be fixed if such a caller is introduced. It wasn't a
3345  * problem previously because this code would always wait, but now
3346  * that some tuple locks do not conflict with one of the lock modes we
3347  * use, it is possible that this case is interesting to handle
3348  * specially.
3349  *
3350  * This may cause failures with third-party code that calls
3351  * heap_update directly.
3352  */
3353 
3354  /* must copy state data before unlocking buffer */
3355  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3356  infomask = oldtup.t_data->t_infomask;
3357 
3358  /*
3359  * Now we have to do something about the existing locker. If it's a
3360  * multi, sleep on it; we might be awakened before it is completely
3361  * gone (or even not sleep at all in some cases); we need to preserve
3362  * it as locker, unless it is gone completely.
3363  *
3364  * If it's not a multi, we need to check for sleeping conditions
3365  * before actually going to sleep. If the update doesn't conflict
3366  * with the locks, we just continue without sleeping (but making sure
3367  * it is preserved).
3368  *
3369  * Before sleeping, we need to acquire tuple lock to establish our
3370  * priority for the tuple (see heap_lock_tuple). LockTuple will
3371  * release us when we are next-in-line for the tuple. Note we must
3372  * not acquire the tuple lock until we're sure we're going to sleep;
3373  * otherwise we're open for race conditions with other transactions
3374  * holding the tuple lock which sleep on us.
3375  *
3376  * If we are forced to "start over" below, we keep the tuple lock;
3377  * this arranges that we stay at the head of the line while rechecking
3378  * tuple state.
3379  */
3380  if (infomask & HEAP_XMAX_IS_MULTI)
3381  {
3382  TransactionId update_xact;
3383  int remain;
3384  bool current_is_member = false;
3385 
3386  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3387  *lockmode, &current_is_member))
3388  {
3389  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3390 
3391  /*
3392  * Acquire the lock, if necessary (but skip it when we're
3393  * requesting a lock and already have one; avoids deadlock).
3394  */
3395  if (!current_is_member)
3396  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3397  LockWaitBlock, &have_tuple_lock);
3398 
3399  /* wait for multixact */
3400  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3401  relation, &oldtup.t_self, XLTW_Update,
3402  &remain);
3403  checked_lockers = true;
3404  locker_remains = remain != 0;
3406 
3407  /*
3408  * If xwait had just locked the tuple then some other xact
3409  * could update this tuple before we get to this point. Check
3410  * for xmax change, and start over if so.
3411  */
3413  infomask) ||
3415  xwait))
3416  goto l2;
3417  }
3418 
3419  /*
3420  * Note that the multixact may not be done by now. It could have
3421  * surviving members; our own xact or other subxacts of this
3422  * backend, and also any other concurrent transaction that locked
3423  * the tuple with LockTupleKeyShare if we only got
3424  * LockTupleNoKeyExclusive. If this is the case, we have to be
3425  * careful to mark the updated tuple with the surviving members in
3426  * Xmax.
3427  *
3428  * Note that there could have been another update in the
3429  * MultiXact. In that case, we need to check whether it committed
3430  * or aborted. If it aborted we are safe to update it again;
3431  * otherwise there is an update conflict, and we have to return
3432  * TableTuple{Deleted, Updated} below.
3433  *
3434  * In the LockTupleExclusive case, we still need to preserve the
3435  * surviving members: those would include the tuple locks we had
3436  * before this one, which are important to keep in case this
3437  * subxact aborts.
3438  */
3440  update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3441  else
3442  update_xact = InvalidTransactionId;
3443 
3444  /*
3445  * There was no UPDATE in the MultiXact; or it aborted. No
3446  * TransactionIdIsInProgress() call needed here, since we called
3447  * MultiXactIdWait() above.
3448  */
3449  if (!TransactionIdIsValid(update_xact) ||
3450  TransactionIdDidAbort(update_xact))
3451  can_continue = true;
3452  }
3453  else if (TransactionIdIsCurrentTransactionId(xwait))
3454  {
3455  /*
3456  * The only locker is ourselves; we can avoid grabbing the tuple
3457  * lock here, but must preserve our locking information.
3458  */
3459  checked_lockers = true;
3460  locker_remains = true;
3461  can_continue = true;
3462  }
3463  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3464  {
3465  /*
3466  * If it's just a key-share locker, and we're not changing the key
3467  * columns, we don't need to wait for it to end; but we need to
3468  * preserve it as locker.
3469  */
3470  checked_lockers = true;
3471  locker_remains = true;
3472  can_continue = true;
3473  }
3474  else
3475  {
3476  /*
3477  * Wait for regular transaction to end; but first, acquire tuple
3478  * lock.
3479  */
3480  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3481  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3482  LockWaitBlock, &have_tuple_lock);
3483  XactLockTableWait(xwait, relation, &oldtup.t_self,
3484  XLTW_Update);
3485  checked_lockers = true;
3487 
3488  /*
3489  * xwait is done, but if xwait had just locked the tuple then some
3490  * other xact could update this tuple before we get to this point.
3491  * Check for xmax change, and start over if so.
3492  */
3493  if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3494  !TransactionIdEquals(xwait,
3496  goto l2;
3497 
3498  /* Otherwise check if it committed or aborted */
3499  UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3500  if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3501  can_continue = true;
3502  }
3503 
3504  if (can_continue)
3505  result = TM_Ok;
3506  else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3507  result = TM_Updated;
3508  else
3509  result = TM_Deleted;
3510  }
3511 
3512  /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3513  if (result != TM_Ok)
3514  {
3515  Assert(result == TM_SelfModified ||
3516  result == TM_Updated ||
3517  result == TM_Deleted ||
3518  result == TM_BeingModified);
3519  Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3520  Assert(result != TM_Updated ||
3521  !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3522  }
3523 
3524  if (crosscheck != InvalidSnapshot && result == TM_Ok)
3525  {
3526  /* Perform additional check for transaction-snapshot mode RI updates */
3527  if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3528  result = TM_Updated;
3529  }
3530 
3531  if (result != TM_Ok)
3532  {
3533  tmfd->ctid = oldtup.t_data->t_ctid;
3534  tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3535  if (result == TM_SelfModified)
3536  tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3537  else
3538  tmfd->cmax = InvalidCommandId;
3539  UnlockReleaseBuffer(buffer);
3540  if (have_tuple_lock)
3541  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3542  if (vmbuffer != InvalidBuffer)
3543  ReleaseBuffer(vmbuffer);
3544  *update_indexes = TU_None;
3545 
3546  bms_free(hot_attrs);
3547  bms_free(sum_attrs);
3548  bms_free(key_attrs);
3549  bms_free(id_attrs);
3550  bms_free(modified_attrs);
3551  bms_free(interesting_attrs);
3552  return result;
3553  }
3554 
3555  /*
3556  * If we didn't pin the visibility map page and the page has become all
3557  * visible while we were busy locking the buffer, or during some
3558  * subsequent window during which we had it unlocked, we'll have to unlock
3559  * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3560  * bit unfortunate, especially since we'll now have to recheck whether the
3561  * tuple has been locked or updated under us, but hopefully it won't
3562  * happen very often.
3563  */
3564  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3565  {
3566  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3567  visibilitymap_pin(relation, block, &vmbuffer);
3569  goto l2;
3570  }
3571 
3572  /* Fill in transaction status data */
3573 
3574  /*
3575  * If the tuple we're updating is locked, we need to preserve the locking
3576  * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3577  */
3579  oldtup.t_data->t_infomask,
3580  oldtup.t_data->t_infomask2,
3581  xid, *lockmode, true,
3582  &xmax_old_tuple, &infomask_old_tuple,
3583  &infomask2_old_tuple);
3584 
3585  /*
3586  * And also prepare an Xmax value for the new copy of the tuple. If there
3587  * was no xmax previously, or there was one but all lockers are now gone,
3588  * then use InvalidTransactionId; otherwise, get the xmax from the old
3589  * tuple. (In rare cases that might also be InvalidTransactionId and yet
3590  * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3591  */
3592  if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3594  (checked_lockers && !locker_remains))
3595  xmax_new_tuple = InvalidTransactionId;
3596  else
3597  xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3598 
3599  if (!TransactionIdIsValid(xmax_new_tuple))
3600  {
3601  infomask_new_tuple = HEAP_XMAX_INVALID;
3602  infomask2_new_tuple = 0;
3603  }
3604  else
3605  {
3606  /*
3607  * If we found a valid Xmax for the new tuple, then the infomask bits
3608  * to use on the new tuple depend on what was there on the old one.
3609  * Note that since we're doing an update, the only possibility is that
3610  * the lockers had FOR KEY SHARE lock.
3611  */
3612  if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3613  {
3614  GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3615  &infomask2_new_tuple);
3616  }
3617  else
3618  {
3619  infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3620  infomask2_new_tuple = 0;
3621  }
3622  }
3623 
3624  /*
3625  * Prepare the new tuple with the appropriate initial values of Xmin and
3626  * Xmax, as well as initial infomask bits as computed above.
3627  */
3628  newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3629  newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3630  HeapTupleHeaderSetXmin(newtup->t_data, xid);
3631  HeapTupleHeaderSetCmin(newtup->t_data, cid);
3632  newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3633  newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3634  HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3635 
3636  /*
3637  * Replace cid with a combo CID if necessary. Note that we already put
3638  * the plain cid into the new tuple.
3639  */
3640  HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3641 
3642  /*
3643  * If the toaster needs to be activated, OR if the new tuple will not fit
3644  * on the same page as the old, then we need to release the content lock
3645  * (but not the pin!) on the old tuple's buffer while we are off doing
3646  * TOAST and/or table-file-extension work. We must mark the old tuple to
3647  * show that it's locked, else other processes may try to update it
3648  * themselves.
3649  *
3650  * We need to invoke the toaster if there are already any out-of-line
3651  * toasted values present, or if the new tuple is over-threshold.
3652  */
3653  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3654  relation->rd_rel->relkind != RELKIND_MATVIEW)
3655  {
3656  /* toast table entries should never be recursively toasted */
3657  Assert(!HeapTupleHasExternal(&oldtup));
3658  Assert(!HeapTupleHasExternal(newtup));
3659  need_toast = false;
3660  }
3661  else
3662  need_toast = (HeapTupleHasExternal(&oldtup) ||
3663  HeapTupleHasExternal(newtup) ||
3664  newtup->t_len > TOAST_TUPLE_THRESHOLD);
3665 
3666  pagefree = PageGetHeapFreeSpace(page);
3667 
3668  newtupsize = MAXALIGN(newtup->t_len);
3669 
3670  if (need_toast || newtupsize > pagefree)
3671  {
3672  TransactionId xmax_lock_old_tuple;
3673  uint16 infomask_lock_old_tuple,
3674  infomask2_lock_old_tuple;
3675  bool cleared_all_frozen = false;
3676 
3677  /*
3678  * To prevent concurrent sessions from updating the tuple, we have to
3679  * temporarily mark it locked, while we release the page-level lock.
3680  *
3681  * To satisfy the rule that any xid potentially appearing in a buffer
3682  * written out to disk, we unfortunately have to WAL log this
3683  * temporary modification. We can reuse xl_heap_lock for this
3684  * purpose. If we crash/error before following through with the
3685  * actual update, xmax will be of an aborted transaction, allowing
3686  * other sessions to proceed.
3687  */
3688 
3689  /*
3690  * Compute xmax / infomask appropriate for locking the tuple. This has
3691  * to be done separately from the combo that's going to be used for
3692  * updating, because the potentially created multixact would otherwise
3693  * be wrong.
3694  */
3696  oldtup.t_data->t_infomask,
3697  oldtup.t_data->t_infomask2,
3698  xid, *lockmode, false,
3699  &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3700  &infomask2_lock_old_tuple);
3701 
3702  Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3703 
3705 
3706  /* Clear obsolete visibility flags ... */
3707  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3708  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3709  HeapTupleClearHotUpdated(&oldtup);
3710  /* ... and store info about transaction updating this tuple */
3711  Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3712  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3713  oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3714  oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3715  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3716 
3717  /* temporarily make it look not-updated, but locked */
3718  oldtup.t_data->t_ctid = oldtup.t_self;
3719 
3720  /*
3721  * Clear all-frozen bit on visibility map if needed. We could
3722  * immediately reset ALL_VISIBLE, but given that the WAL logging
3723  * overhead would be unchanged, that doesn't seem necessarily
3724  * worthwhile.
3725  */
3726  if (PageIsAllVisible(page) &&
3727  visibilitymap_clear(relation, block, vmbuffer,
3729  cleared_all_frozen = true;
3730 
3731  MarkBufferDirty(buffer);
3732 
3733  if (RelationNeedsWAL(relation))
3734  {
3735  xl_heap_lock xlrec;
3736  XLogRecPtr recptr;
3737 
3738  XLogBeginInsert();
3739  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3740 
3741  xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3742  xlrec.xmax = xmax_lock_old_tuple;
3744  oldtup.t_data->t_infomask2);
3745  xlrec.flags =
3746  cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3747  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
3748  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3749  PageSetLSN(page, recptr);
3750  }
3751 
3752  END_CRIT_SECTION();
3753 
3754  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3755 
3756  /*
3757  * Let the toaster do its thing, if needed.
3758  *
3759  * Note: below this point, heaptup is the data we actually intend to
3760  * store into the relation; newtup is the caller's original untoasted
3761  * data.
3762  */
3763  if (need_toast)
3764  {
3765  /* Note we always use WAL and FSM during updates */
3766  heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3767  newtupsize = MAXALIGN(heaptup->t_len);
3768  }
3769  else
3770  heaptup = newtup;
3771 
3772  /*
3773  * Now, do we need a new page for the tuple, or not? This is a bit
3774  * tricky since someone else could have added tuples to the page while
3775  * we weren't looking. We have to recheck the available space after
3776  * reacquiring the buffer lock. But don't bother to do that if the
3777  * former amount of free space is still not enough; it's unlikely
3778  * there's more free now than before.
3779  *
3780  * What's more, if we need to get a new page, we will need to acquire
3781  * buffer locks on both old and new pages. To avoid deadlock against
3782  * some other backend trying to get the same two locks in the other
3783  * order, we must be consistent about the order we get the locks in.
3784  * We use the rule "lock the lower-numbered page of the relation
3785  * first". To implement this, we must do RelationGetBufferForTuple
3786  * while not holding the lock on the old page, and we must rely on it
3787  * to get the locks on both pages in the correct order.
3788  *
3789  * Another consideration is that we need visibility map page pin(s) if
3790  * we will have to clear the all-visible flag on either page. If we
3791  * call RelationGetBufferForTuple, we rely on it to acquire any such
3792  * pins; but if we don't, we have to handle that here. Hence we need
3793  * a loop.
3794  */
3795  for (;;)
3796  {
3797  if (newtupsize > pagefree)
3798  {
3799  /* It doesn't fit, must use RelationGetBufferForTuple. */
3800  newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3801  buffer, 0, NULL,
3802  &vmbuffer_new, &vmbuffer,
3803  0);
3804  /* We're all done. */
3805  break;
3806  }
3807  /* Acquire VM page pin if needed and we don't have it. */
3808  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3809  visibilitymap_pin(relation, block, &vmbuffer);
3810  /* Re-acquire the lock on the old tuple's page. */
3812  /* Re-check using the up-to-date free space */
3813  pagefree = PageGetHeapFreeSpace(page);
3814  if (newtupsize > pagefree ||
3815  (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3816  {
3817  /*
3818  * Rats, it doesn't fit anymore, or somebody just now set the
3819  * all-visible flag. We must now unlock and loop to avoid
3820  * deadlock. Fortunately, this path should seldom be taken.
3821  */
3822  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3823  }
3824  else
3825  {
3826  /* We're all done. */
3827  newbuf = buffer;
3828  break;
3829  }
3830  }
3831  }
3832  else
3833  {
3834  /* No TOAST work needed, and it'll fit on same page */
3835  newbuf = buffer;
3836  heaptup = newtup;
3837  }
3838 
3839  /*
3840  * We're about to do the actual update -- check for conflict first, to
3841  * avoid possibly having to roll back work we've just done.
3842  *
3843  * This is safe without a recheck as long as there is no possibility of
3844  * another process scanning the pages between this check and the update
3845  * being visible to the scan (i.e., exclusive buffer content lock(s) are
3846  * continuously held from this point until the tuple update is visible).
3847  *
3848  * For the new tuple the only check needed is at the relation level, but
3849  * since both tuples are in the same relation and the check for oldtup
3850  * will include checking the relation level, there is no benefit to a
3851  * separate check for the new tuple.
3852  */
3853  CheckForSerializableConflictIn(relation, &oldtup.t_self,
3854  BufferGetBlockNumber(buffer));
3855 
3856  /*
3857  * At this point newbuf and buffer are both pinned and locked, and newbuf
3858  * has enough space for the new tuple. If they are the same buffer, only
3859  * one pin is held.
3860  */
3861 
3862  if (newbuf == buffer)
3863  {
3864  /*
3865  * Since the new tuple is going into the same page, we might be able
3866  * to do a HOT update. Check if any of the index columns have been
3867  * changed.
3868  */
3869  if (!bms_overlap(modified_attrs, hot_attrs))
3870  {
3871  use_hot_update = true;
3872 
3873  /*
3874  * If none of the columns that are used in hot-blocking indexes
3875  * were updated, we can apply HOT, but we do still need to check
3876  * if we need to update the summarizing indexes, and update those
3877  * indexes if the columns were updated, or we may fail to detect
3878  * e.g. value bound changes in BRIN minmax indexes.
3879  */
3880  if (bms_overlap(modified_attrs, sum_attrs))
3881  summarized_update = true;
3882  }
3883  }
3884  else
3885  {
3886  /* Set a hint that the old page could use prune/defrag */
3887  PageSetFull(page);
3888  }
3889 
3890  /*
3891  * Compute replica identity tuple before entering the critical section so
3892  * we don't PANIC upon a memory allocation failure.
3893  * ExtractReplicaIdentity() will return NULL if nothing needs to be
3894  * logged. Pass old key required as true only if the replica identity key
3895  * columns are modified or it has external data.
3896  */
3897  old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
3898  bms_overlap(modified_attrs, id_attrs) ||
3899  id_has_external,
3900  &old_key_copied);
3901 
3902  /* NO EREPORT(ERROR) from here till changes are logged */
3904 
3905  /*
3906  * If this transaction commits, the old tuple will become DEAD sooner or
3907  * later. Set flag that this page is a candidate for pruning once our xid
3908  * falls below the OldestXmin horizon. If the transaction finally aborts,
3909  * the subsequent page pruning will be a no-op and the hint will be
3910  * cleared.
3911  *
3912  * XXX Should we set hint on newbuf as well? If the transaction aborts,
3913  * there would be a prunable tuple in the newbuf; but for now we choose
3914  * not to optimize for aborts. Note that heap_xlog_update must be kept in
3915  * sync if this decision changes.
3916  */
3917  PageSetPrunable(page, xid);
3918 
3919  if (use_hot_update)
3920  {
3921  /* Mark the old tuple as HOT-updated */
3922  HeapTupleSetHotUpdated(&oldtup);
3923  /* And mark the new tuple as heap-only */
3924  HeapTupleSetHeapOnly(heaptup);
3925  /* Mark the caller's copy too, in case different from heaptup */
3926  HeapTupleSetHeapOnly(newtup);
3927  }
3928  else
3929  {
3930  /* Make sure tuples are correctly marked as not-HOT */
3931  HeapTupleClearHotUpdated(&oldtup);
3932  HeapTupleClearHeapOnly(heaptup);
3933  HeapTupleClearHeapOnly(newtup);
3934  }
3935 
3936  RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
3937 
3938 
3939  /* Clear obsolete visibility flags, possibly set by ourselves above... */
3940  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3941  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3942  /* ... and store info about transaction updating this tuple */
3943  Assert(TransactionIdIsValid(xmax_old_tuple));
3944  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
3945  oldtup.t_data->t_infomask |= infomask_old_tuple;
3946  oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
3947  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3948 
3949  /* record address of new tuple in t_ctid of old one */
3950  oldtup.t_data->t_ctid = heaptup->t_self;
3951 
3952  /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
3953  if (PageIsAllVisible(BufferGetPage(buffer)))
3954  {
3955  all_visible_cleared = true;
3957  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3958  vmbuffer, VISIBILITYMAP_VALID_BITS);
3959  }
3960  if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
3961  {
3962  all_visible_cleared_new = true;
3964  visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
3965  vmbuffer_new, VISIBILITYMAP_VALID_BITS);
3966  }
3967 
3968  if (newbuf != buffer)
3969  MarkBufferDirty(newbuf);
3970  MarkBufferDirty(buffer);
3971 
3972  /* XLOG stuff */
3973  if (RelationNeedsWAL(relation))
3974  {
3975  XLogRecPtr recptr;
3976 
3977  /*
3978  * For logical decoding we need combo CIDs to properly decode the
3979  * catalog.
3980  */
3982  {
3983  log_heap_new_cid(relation, &oldtup);
3984  log_heap_new_cid(relation, heaptup);
3985  }
3986 
3987  recptr = log_heap_update(relation, buffer,
3988  newbuf, &oldtup, heaptup,
3989  old_key_tuple,
3990  all_visible_cleared,
3991  all_visible_cleared_new);
3992  if (newbuf != buffer)
3993  {
3994  PageSetLSN(BufferGetPage(newbuf), recptr);
3995  }
3996  PageSetLSN(BufferGetPage(buffer), recptr);
3997  }
3998 
3999  END_CRIT_SECTION();
4000 
4001  if (newbuf != buffer)
4002  LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
4003  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4004 
4005  /*
4006  * Mark old tuple for invalidation from system caches at next command
4007  * boundary, and mark the new tuple for invalidation in case we abort. We
4008  * have to do this before releasing the buffer because oldtup is in the
4009  * buffer. (heaptup is all in local memory, but it's necessary to process
4010  * both tuple versions in one call to inval.c so we can avoid redundant
4011  * sinval messages.)
4012  */
4013  CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4014 
4015  /* Now we can release the buffer(s) */
4016  if (newbuf != buffer)
4017  ReleaseBuffer(newbuf);
4018  ReleaseBuffer(buffer);
4019  if (BufferIsValid(vmbuffer_new))
4020  ReleaseBuffer(vmbuffer_new);
4021  if (BufferIsValid(vmbuffer))
4022  ReleaseBuffer(vmbuffer);
4023 
4024  /*
4025  * Release the lmgr tuple lock, if we had it.
4026  */
4027  if (have_tuple_lock)
4028  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4029 
4030  pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4031 
4032  /*
4033  * If heaptup is a private copy, release it. Don't forget to copy t_self
4034  * back to the caller's image, too.
4035  */
4036  if (heaptup != newtup)
4037  {
4038  newtup->t_self = heaptup->t_self;
4039  heap_freetuple(heaptup);
4040  }
4041 
4042  /*
4043  * If it is a HOT update, the update may still need to update summarized
4044  * indexes, lest we fail to update those summaries and get incorrect
4045  * results (for example, minmax bounds of the block may change with this
4046  * update).
4047  */
4048  if (use_hot_update)
4049  {
4050  if (summarized_update)
4051  *update_indexes = TU_Summarizing;
4052  else
4053  *update_indexes = TU_None;
4054  }
4055  else
4056  *update_indexes = TU_All;
4057 
4058  if (old_key_tuple != NULL && old_key_copied)
4059  heap_freetuple(old_key_tuple);
4060 
4061  bms_free(hot_attrs);
4062  bms_free(sum_attrs);
4063  bms_free(key_attrs);
4064  bms_free(id_attrs);
4065  bms_free(modified_attrs);
4066  bms_free(interesting_attrs);
4067 
4068  return TM_Ok;
4069 }
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
static void PageSetFull(Page page)
Definition: bufpage.h:415
TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple)
Definition: heapam.c:7090
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4127
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8348
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HeapTupleSetHotUpdated(tuple)
Definition: htup_details.h:677
#define HEAP2_XACT_MASK
Definition: htup_details.h:279
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
#define HeapTupleHeaderSetCmin(tup, cid)
Definition: htup_details.h:393
#define HEAP_XACT_MASK
Definition: htup_details.h:215
#define HeapTupleSetHeapOnly(tuple)
Definition: htup_details.h:686
#define HeapTupleClearHeapOnly(tuple)
Definition: htup_details.h:689
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define HeapTupleClearHotUpdated(tuple)
Definition: htup_details.h:680
@ XLTW_Update
Definition: lmgr.h:27
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5231
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:61
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:64
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:65
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:63
@ TU_Summarizing
Definition: tableam.h:126
@ TU_All
Definition: tableam.h:123
@ TU_None
Definition: tableam.h:120
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert, bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_LOCKED_UPGRADED, HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly, HeapTupleClearHotUpdated, HeapTupleGetUpdateXid(), HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderSetCmax, HeapTupleHeaderSetCmin, HeapTupleHeaderSetXmax, HeapTupleHeaderSetXmin, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly, HeapTupleSetHotUpdated, INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
struct VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 295 of file vacuumlazy.c.

297 {
298  LVRelState *vacrel;
299  bool verbose,
300  instrument,
301  skipwithvm,
302  frozenxid_updated,
303  minmulti_updated;
304  BlockNumber orig_rel_pages,
305  new_rel_pages,
306  new_rel_allvisible;
307  PGRUsage ru0;
308  TimestampTz starttime = 0;
309  PgStat_Counter startreadtime = 0,
310  startwritetime = 0;
311  WalUsage startwalusage = pgWalUsage;
312  int64 StartPageHit = VacuumPageHit,
313  StartPageMiss = VacuumPageMiss,
314  StartPageDirty = VacuumPageDirty;
315  ErrorContextCallback errcallback;
316  char **indnames = NULL;
317 
318  verbose = (params->options & VACOPT_VERBOSE) != 0;
319  instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
320  params->log_min_duration >= 0));
321  if (instrument)
322  {
323  pg_rusage_init(&ru0);
324  starttime = GetCurrentTimestamp();
325  if (track_io_timing)
326  {
327  startreadtime = pgStatBlockReadTime;
328  startwritetime = pgStatBlockWriteTime;
329  }
330  }
331 
333  RelationGetRelid(rel));
334 
335  /*
336  * Setup error traceback support for ereport() first. The idea is to set
337  * up an error context callback to display additional information on any
338  * error during a vacuum. During different phases of vacuum, we update
339  * the state so that the error context callback always display current
340  * information.
341  *
342  * Copy the names of heap rel into local memory for error reporting
343  * purposes, too. It isn't always safe to assume that we can get the name
344  * of each rel. It's convenient for code in lazy_scan_heap to always use
345  * these temp copies.
346  */
347  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
350  vacrel->relname = pstrdup(RelationGetRelationName(rel));
351  vacrel->indname = NULL;
353  vacrel->verbose = verbose;
354  errcallback.callback = vacuum_error_callback;
355  errcallback.arg = vacrel;
356  errcallback.previous = error_context_stack;
357  error_context_stack = &errcallback;
358 
359  /* Set up high level stuff about rel and its indexes */
360  vacrel->rel = rel;
361  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
362  &vacrel->indrels);
363  vacrel->bstrategy = bstrategy;
364  if (instrument && vacrel->nindexes > 0)
365  {
366  /* Copy index names used by instrumentation (not error reporting) */
367  indnames = palloc(sizeof(char *) * vacrel->nindexes);
368  for (int i = 0; i < vacrel->nindexes; i++)
369  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
370  }
371 
372  /*
373  * The index_cleanup param either disables index vacuuming and cleanup or
374  * forces it to go ahead when we would otherwise apply the index bypass
375  * optimization. The default is 'auto', which leaves the final decision
376  * up to lazy_vacuum().
377  *
378  * The truncate param allows user to avoid attempting relation truncation,
379  * though it can't force truncation to happen.
380  */
383  params->truncate != VACOPTVALUE_AUTO);
384 
385  /*
386  * While VacuumFailSafeActive is reset to false before calling this, we
387  * still need to reset it here due to recursive calls.
388  */
389  VacuumFailsafeActive = false;
390  vacrel->consider_bypass_optimization = true;
391  vacrel->do_index_vacuuming = true;
392  vacrel->do_index_cleanup = true;
393  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
394  if (params->index_cleanup == VACOPTVALUE_DISABLED)
395  {
396  /* Force disable index vacuuming up-front */
397  vacrel->do_index_vacuuming = false;
398  vacrel->do_index_cleanup = false;
399  }
400  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
401  {
402  /* Force index vacuuming. Note that failsafe can still bypass. */
403  vacrel->consider_bypass_optimization = false;
404  }
405  else
406  {
407  /* Default/auto, make all decisions dynamically */
409  }
410 
411  /* Initialize page counters explicitly (be tidy) */
412  vacrel->scanned_pages = 0;
413  vacrel->removed_pages = 0;
414  vacrel->frozen_pages = 0;
415  vacrel->lpdead_item_pages = 0;
416  vacrel->missed_dead_pages = 0;
417  vacrel->nonempty_pages = 0;
418  /* dead_items_alloc allocates vacrel->dead_items later on */
419 
420  /* Allocate/initialize output statistics state */
421  vacrel->new_rel_tuples = 0;
422  vacrel->new_live_tuples = 0;
423  vacrel->indstats = (IndexBulkDeleteResult **)
424  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
425 
426  /* Initialize remaining counters (be tidy) */
427  vacrel->num_index_scans = 0;
428  vacrel->tuples_deleted = 0;
429  vacrel->tuples_frozen = 0;
430  vacrel->lpdead_items = 0;
431  vacrel->live_tuples = 0;
432  vacrel->recently_dead_tuples = 0;
433  vacrel->missed_dead_tuples = 0;
434 
435  /*
436  * Get cutoffs that determine which deleted tuples are considered DEAD,
437  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
438  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
439  * happen in this order to ensure that the OldestXmin cutoff field works
440  * as an upper bound on the XIDs stored in the pages we'll actually scan
441  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
442  *
443  * Next acquire vistest, a related cutoff that's used in pruning. We
444  * expect vistest will always make heap_page_prune_and_freeze() remove any
445  * deleted tuple whose xmax is < OldestXmin. lazy_scan_prune must never
446  * become confused about whether a tuple should be frozen or removed. (In
447  * the future we might want to teach lazy_scan_prune to recompute vistest
448  * from time to time, to increase the number of dead tuples it can prune
449  * away.)
450  */
451  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
452  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
453  vacrel->vistest = GlobalVisTestFor(rel);
454  /* Initialize state used to track oldest extant XID/MXID */
455  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
456  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
457  vacrel->skippedallvis = false;
458  skipwithvm = true;
460  {
461  /*
462  * Force aggressive mode, and disable skipping blocks using the
463  * visibility map (even those set all-frozen)
464  */
465  vacrel->aggressive = true;
466  skipwithvm = false;
467  }
468 
469  vacrel->skipwithvm = skipwithvm;
470 
471  if (verbose)
472  {
473  if (vacrel->aggressive)
474  ereport(INFO,
475  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
476  vacrel->dbname, vacrel->relnamespace,
477  vacrel->relname)));
478  else
479  ereport(INFO,
480  (errmsg("vacuuming \"%s.%s.%s\"",
481  vacrel->dbname, vacrel->relnamespace,
482  vacrel->relname)));
483  }
484 
485  /*
486  * Allocate dead_items memory using dead_items_alloc. This handles
487  * parallel VACUUM initialization as part of allocating shared memory
488  * space used for dead_items. (But do a failsafe precheck first, to
489  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
490  * is already dangerously old.)
491  */
493  dead_items_alloc(vacrel, params->nworkers);
494 
495  /*
496  * Call lazy_scan_heap to perform all required heap pruning, index
497  * vacuuming, and heap vacuuming (plus related processing)
498  */
499  lazy_scan_heap(vacrel);
500 
501  /*
502  * Free resources managed by dead_items_alloc. This ends parallel mode in
503  * passing when necessary.
504  */
505  dead_items_cleanup(vacrel);
507 
508  /*
509  * Update pg_class entries for each of rel's indexes where appropriate.
510  *
511  * Unlike the later update to rel's pg_class entry, this is not critical.
512  * Maintains relpages/reltuples statistics used by the planner only.
513  */
514  if (vacrel->do_index_cleanup)
516 
517  /* Done with rel's indexes */
518  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
519 
520  /* Optionally truncate rel */
521  if (should_attempt_truncation(vacrel))
522  lazy_truncate_heap(vacrel);
523 
524  /* Pop the error context stack */
525  error_context_stack = errcallback.previous;
526 
527  /* Report that we are now doing final cleanup */
530 
531  /*
532  * Prepare to update rel's pg_class entry.
533  *
534  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
535  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
536  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
537  */
538  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
540  vacrel->cutoffs.relfrozenxid,
541  vacrel->NewRelfrozenXid));
542  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
544  vacrel->cutoffs.relminmxid,
545  vacrel->NewRelminMxid));
546  if (vacrel->skippedallvis)
547  {
548  /*
549  * Must keep original relfrozenxid in a non-aggressive VACUUM that
550  * chose to skip an all-visible page range. The state that tracks new
551  * values will have missed unfrozen XIDs from the pages we skipped.
552  */
553  Assert(!vacrel->aggressive);
556  }
557 
558  /*
559  * For safety, clamp relallvisible to be not more than what we're setting
560  * pg_class.relpages to
561  */
562  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
563  visibilitymap_count(rel, &new_rel_allvisible, NULL);
564  if (new_rel_allvisible > new_rel_pages)
565  new_rel_allvisible = new_rel_pages;
566 
567  /*
568  * Now actually update rel's pg_class entry.
569  *
570  * In principle new_live_tuples could be -1 indicating that we (still)
571  * don't know the tuple count. In practice that can't happen, since we
572  * scan every page that isn't skipped using the visibility map.
573  */
574  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
575  new_rel_allvisible, vacrel->nindexes > 0,
576  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
577  &frozenxid_updated, &minmulti_updated, false);
578 
579  /*
580  * Report results to the cumulative stats system, too.
581  *
582  * Deliberately avoid telling the stats system about LP_DEAD items that
583  * remain in the table due to VACUUM bypassing index and heap vacuuming.
584  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
585  * It seems like a good idea to err on the side of not vacuuming again too
586  * soon in cases where the failsafe prevented significant amounts of heap
587  * vacuuming.
588  */
590  rel->rd_rel->relisshared,
591  Max(vacrel->new_live_tuples, 0),
592  vacrel->recently_dead_tuples +
593  vacrel->missed_dead_tuples);
595 
596  if (instrument)
597  {
598  TimestampTz endtime = GetCurrentTimestamp();
599 
600  if (verbose || params->log_min_duration == 0 ||
601  TimestampDifferenceExceeds(starttime, endtime,
602  params->log_min_duration))
603  {
604  long secs_dur;
605  int usecs_dur;
606  WalUsage walusage;
608  char *msgfmt;
609  int32 diff;
610  int64 PageHitOp = VacuumPageHit - StartPageHit,
611  PageMissOp = VacuumPageMiss - StartPageMiss,
612  PageDirtyOp = VacuumPageDirty - StartPageDirty;
613  double read_rate = 0,
614  write_rate = 0;
615 
616  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
617  memset(&walusage, 0, sizeof(WalUsage));
618  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
619 
621  if (verbose)
622  {
623  /*
624  * Aggressiveness already reported earlier, in dedicated
625  * VACUUM VERBOSE ereport
626  */
627  Assert(!params->is_wraparound);
628  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
629  }
630  else if (params->is_wraparound)
631  {
632  /*
633  * While it's possible for a VACUUM to be both is_wraparound
634  * and !aggressive, that's just a corner-case -- is_wraparound
635  * implies aggressive. Produce distinct output for the corner
636  * case all the same, just in case.
637  */
638  if (vacrel->aggressive)
639  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
640  else
641  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
642  }
643  else
644  {
645  if (vacrel->aggressive)
646  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
647  else
648  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
649  }
650  appendStringInfo(&buf, msgfmt,
651  vacrel->dbname,
652  vacrel->relnamespace,
653  vacrel->relname,
654  vacrel->num_index_scans);
655  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
656  vacrel->removed_pages,
657  new_rel_pages,
658  vacrel->scanned_pages,
659  orig_rel_pages == 0 ? 100.0 :
660  100.0 * vacrel->scanned_pages / orig_rel_pages);
662  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
663  (long long) vacrel->tuples_deleted,
664  (long long) vacrel->new_rel_tuples,
665  (long long) vacrel->recently_dead_tuples);
666  if (vacrel->missed_dead_tuples > 0)
668  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
669  (long long) vacrel->missed_dead_tuples,
670  vacrel->missed_dead_pages);
671  diff = (int32) (ReadNextTransactionId() -
672  vacrel->cutoffs.OldestXmin);
674  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
675  vacrel->cutoffs.OldestXmin, diff);
676  if (frozenxid_updated)
677  {
678  diff = (int32) (vacrel->NewRelfrozenXid -
679  vacrel->cutoffs.relfrozenxid);
681  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
682  vacrel->NewRelfrozenXid, diff);
683  }
684  if (minmulti_updated)
685  {
686  diff = (int32) (vacrel->NewRelminMxid -
687  vacrel->cutoffs.relminmxid);
689  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
690  vacrel->NewRelminMxid, diff);
691  }
692  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
693  vacrel->frozen_pages,
694  orig_rel_pages == 0 ? 100.0 :
695  100.0 * vacrel->frozen_pages / orig_rel_pages,
696  (long long) vacrel->tuples_frozen);
697  if (vacrel->do_index_vacuuming)
698  {
699  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
700  appendStringInfoString(&buf, _("index scan not needed: "));
701  else
702  appendStringInfoString(&buf, _("index scan needed: "));
703 
704  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
705  }
706  else
707  {
709  appendStringInfoString(&buf, _("index scan bypassed: "));
710  else
711  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
712 
713  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
714  }
715  appendStringInfo(&buf, msgfmt,
716  vacrel->lpdead_item_pages,
717  orig_rel_pages == 0 ? 100.0 :
718  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
719  (long long) vacrel->lpdead_items);
720  for (int i = 0; i < vacrel->nindexes; i++)
721  {
722  IndexBulkDeleteResult *istat = vacrel->indstats[i];
723 
724  if (!istat)
725  continue;
726 
728  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
729  indnames[i],
730  istat->num_pages,
731  istat->pages_newly_deleted,
732  istat->pages_deleted,
733  istat->pages_free);
734  }
735  if (track_io_timing)
736  {
737  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
738  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
739 
740  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
741  read_ms, write_ms);
742  }
743  if (secs_dur > 0 || usecs_dur > 0)
744  {
745  read_rate = (double) BLCKSZ * PageMissOp / (1024 * 1024) /
746  (secs_dur + usecs_dur / 1000000.0);
747  write_rate = (double) BLCKSZ * PageDirtyOp / (1024 * 1024) /
748  (secs_dur + usecs_dur / 1000000.0);
749  }
750  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
751  read_rate, write_rate);
753  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
754  (long long) PageHitOp,
755  (long long) PageMissOp,
756  (long long) PageDirtyOp);
758  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
759  (long long) walusage.wal_records,
760  (long long) walusage.wal_fpi,
761  (unsigned long long) walusage.wal_bytes);
762  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
763 
764  ereport(verbose ? INFO : LOG,
765  (errmsg_internal("%s", buf.data)));
766  pfree(buf.data);
767  }
768  }
769 
770  /* Cleanup index statistics and index names */
771  for (int i = 0; i < vacrel->nindexes; i++)
772  {
773  if (vacrel->indstats[i])
774  pfree(vacrel->indstats[i]);
775 
776  if (instrument)
777  pfree(indnames[i]);
778  }
779 }
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1730
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1790
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1654
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
bool track_io_timing
Definition: bufmgr.c:142
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:281
signed int int32
Definition: c.h:494
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3153
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
int64 VacuumPageHit
Definition: globals.c:154
int64 VacuumPageMiss
Definition: globals.c:155
int64 VacuumPageDirty
Definition: globals.c:156
Oid MyDatabaseId
Definition: globals.c:91
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void * palloc0(Size size)
Definition: mcxt.c:1346
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:375
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:89
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:37
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define RelationGetNamespace(relation)
Definition: rel.h:546
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
bool verbose
Definition: vacuumlazy.c:175
int nindexes
Definition: vacuumlazy.c:141
int64 tuples_deleted
Definition: vacuumlazy.c:207
BlockNumber nonempty_pages
Definition: vacuumlazy.c:196
bool do_rel_truncate
Definition: vacuumlazy.c:157
BlockNumber scanned_pages
Definition: vacuumlazy.c:191
bool aggressive
Definition: vacuumlazy.c:148
GlobalVisState * vistest
Definition: vacuumlazy.c:161
BlockNumber removed_pages
Definition: vacuumlazy.c:192
int num_index_scans
Definition: vacuumlazy.c:205
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:202
double new_live_tuples
Definition: vacuumlazy.c:200
double new_rel_tuples
Definition: vacuumlazy.c:199
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:163
Relation rel
Definition: vacuumlazy.c:139
bool consider_bypass_optimization
Definition: vacuumlazy.c:152
BlockNumber rel_pages
Definition: vacuumlazy.c:190
int64 recently_dead_tuples
Definition: vacuumlazy.c:211
int64 tuples_frozen
Definition: vacuumlazy.c:208
BlockNumber frozen_pages
Definition: vacuumlazy.c:193
char * dbname
Definition: vacuumlazy.c:168
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:195
char * relnamespace
Definition: vacuumlazy.c:169
int64 live_tuples
Definition: vacuumlazy.c:210
int64 lpdead_items
Definition: vacuumlazy.c:209
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:144
bool skippedallvis
Definition: vacuumlazy.c:165
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:194
Relation * indrels
Definition: vacuumlazy.c:140
bool skipwithvm
Definition: vacuumlazy.c:150
bool do_index_cleanup
Definition: vacuumlazy.c:156
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:164
int64 missed_dead_tuples
Definition: vacuumlazy.c:212
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:160
char * relname
Definition: vacuumlazy.c:170
VacErrPhase phase
Definition: vacuumlazy.c:174
char * indname
Definition: vacuumlazy.c:171
bool do_index_vacuuming
Definition: vacuumlazy.c:155
int nworkers
Definition: vacuum.h:239
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2272
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1398
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2315
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1072
bool VacuumFailsafeActive
Definition: vacuum.c:96
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:2923
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3064
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3099
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2546
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2526
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:128
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:818
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2296
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2819
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, LVRelState::bstrategy, buf, ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, LVRelState::frozen_pages, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, LOG, VacuumParams::log_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyDatabaseId, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, TimestampDifference(), TimestampDifferenceExceeds(), track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, VacuumPageDirty, VacuumPageHit, VacuumPageMiss, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 10052 of file heapam.c.

10055 {
10056  TransactionId xid;
10057  HTSV_Result htsvResult;
10058 
10059  if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
10060  return;
10061 
10062  /*
10063  * Check to see whether the tuple has been written to by a concurrent
10064  * transaction, either to create it not visible to us, or to delete it
10065  * while it is visible to us. The "visible" bool indicates whether the
10066  * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
10067  * is going on with it.
10068  *
10069  * In the event of a concurrently inserted tuple that also happens to have
10070  * been concurrently updated (by a separate transaction), the xmin of the
10071  * tuple will be used -- not the updater's xid.
10072  */
10073  htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
10074  switch (htsvResult)
10075  {
10076  case HEAPTUPLE_LIVE:
10077  if (visible)
10078  return;
10079  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10080  break;
10083  if (visible)
10084  xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
10085  else
10086  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10087 
10089  {
10090  /* This is like the HEAPTUPLE_DEAD case */
10091  Assert(!visible);
10092  return;
10093  }
10094  break;
10096  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10097  break;
10098  case HEAPTUPLE_DEAD:
10099  Assert(!visible);
10100  return;
10101  default:
10102 
10103  /*
10104  * The only way to get to this default clause is if a new value is
10105  * added to the enum type without adding it to this switch
10106  * statement. That's a bug, so elog.
10107  */
10108  elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
10109 
10110  /*
10111  * In spite of having all enum values covered and calling elog on
10112  * this default, some compilers think this is a code path which
10113  * allows xid to be used below without initialization. Silence
10114  * that warning.
10115  */
10116  xid = InvalidTransactionId;
10117  }
10118 
10121 
10122  /*
10123  * Find top level xid. Bail out if xid is too early to be a conflict, or
10124  * if it's our own xid.
10125  */
10127  return;
10128  xid = SubTransGetTopmostTransaction(xid);
10130  return;
10131 
10132  CheckForSerializableConflictOut(relation, xid, snapshot);
10133 }
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4008
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:163
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:438

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1520 of file heapam_visibility.c.

1521 {
1522  TransactionId xmax;
1523 
1524  /* if there's no valid Xmax, then there's obviously no update either */
1525  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1526  return true;
1527 
1528  if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1529  return true;
1530 
1531  /* invalid xmax means no update */
1533  return true;
1534 
1535  /*
1536  * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1537  * necessarily have been updated
1538  */
1539  if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1540  return false;
1541 
1542  /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1543  xmax = HeapTupleGetUpdateXid(tuple);
1544 
1545  /* not LOCKED_ONLY, so it has to have an xmax */
1547 
1549  return false;
1550  if (TransactionIdIsInProgress(xmax))
1551  return false;
1552  if (TransactionIdDidCommit(xmax))
1553  return false;
1554 
1555  /*
1556  * not current, not in progress, not committed -- must have aborted or
1557  * crashed
1558  */
1559  return true;
1560 }
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1390

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
struct GlobalVisState vistest 
)

Definition at line 1465 of file heapam_visibility.c.

1466 {
1467  HeapTupleHeader tuple = htup->t_data;
1468 
1469  Assert(ItemPointerIsValid(&htup->t_self));
1470  Assert(htup->t_tableOid != InvalidOid);
1471 
1472  /*
1473  * If the inserting transaction is marked invalid, then it aborted, and
1474  * the tuple is definitely dead. If it's marked neither committed nor
1475  * invalid, then we assume it's still alive (since the presumption is that
1476  * all relevant hint bits were just set moments ago).
1477  */
1478  if (!HeapTupleHeaderXminCommitted(tuple))
1479  return HeapTupleHeaderXminInvalid(tuple);
1480 
1481  /*
1482  * If the inserting transaction committed, but any deleting transaction
1483  * aborted, the tuple is still alive.
1484  */
1485  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1486  return false;
1487 
1488  /*
1489  * If the XMAX is just a lock, the tuple is still alive.
1490  */
1492  return false;
1493 
1494  /*
1495  * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1496  * know without checking pg_multixact.
1497  */
1498  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1499  return false;
1500 
1501  /* If deleter isn't known to have committed, assume it's still running. */
1502  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1503  return false;
1504 
1505  /* Deleter committed, so tuple is dead if the XID is old enough. */
1506  return GlobalVisTestIsRemovableXid(vistest,
1507  HeapTupleHeaderGetRawXmax(tuple));
1508 }
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define HeapTupleHeaderXminInvalid(tup)
Definition: htup_details.h:325
#define InvalidOid
Definition: postgres_ext.h:36

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460 {
461  HeapTupleHeader tuple = htup->t_data;
462 
464  Assert(htup->t_tableOid != InvalidOid);
465 
466  if (!HeapTupleHeaderXminCommitted(tuple))
467  {
468  if (HeapTupleHeaderXminInvalid(tuple))
469  return TM_Invisible;
470 
471  /* Used by pre-9.0 binary upgrades */
472  if (tuple->t_infomask & HEAP_MOVED_OFF)
473  {
475 
477  return TM_Invisible;
478  if (!TransactionIdIsInProgress(xvac))
479  {
480  if (TransactionIdDidCommit(xvac))
481  {
482  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484  return TM_Invisible;
485  }
486  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488  }
489  }
490  /* Used by pre-9.0 binary upgrades */
491  else if (tuple->t_infomask & HEAP_MOVED_IN)
492  {
494 
496  {
497  if (TransactionIdIsInProgress(xvac))
498  return TM_Invisible;
499  if (TransactionIdDidCommit(xvac))
500  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502  else
503  {
504  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506  return TM_Invisible;
507  }
508  }
509  }
511  {
512  if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513  return TM_Invisible; /* inserted after scan started */
514 
515  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516  return TM_Ok;
517 
519  {
520  TransactionId xmax;
521 
522  xmax = HeapTupleHeaderGetRawXmax(tuple);
523 
524  /*
525  * Careful here: even though this tuple was created by our own
526  * transaction, it might be locked by other transactions, if
527  * the original version was key-share locked when we updated
528  * it.
529  */
530 
531  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532  {
533  if (MultiXactIdIsRunning(xmax, true))
534  return TM_BeingModified;
535  else
536  return TM_Ok;
537  }
538 
539  /*
540  * If the locker is gone, then there is nothing of interest
541  * left in this Xmax; otherwise, report the tuple as
542  * locked/updated.
543  */
544  if (!TransactionIdIsInProgress(xmax))
545  return TM_Ok;
546  return TM_BeingModified;
547  }
548 
549  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550  {
551  TransactionId xmax;
552 
553  xmax = HeapTupleGetUpdateXid(tuple);
554 
555  /* not LOCKED_ONLY, so it has to have an xmax */
557 
558  /* deleting subtransaction must have aborted */
560  {
562  false))
563  return TM_BeingModified;
564  return TM_Ok;
565  }
566  else
567  {
568  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569  return TM_SelfModified; /* updated after scan started */
570  else
571  return TM_Invisible; /* updated before scan started */
572  }
573  }
574 
576  {
577  /* deleting subtransaction must have aborted */
578  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580  return TM_Ok;
581  }
582 
583  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584  return TM_SelfModified; /* updated after scan started */
585  else
586  return TM_Invisible; /* updated before scan started */
587  }
589  return TM_Invisible;
591  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593  else
594  {
595  /* it must have aborted or crashed */
596  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598  return TM_Invisible;
599  }
600  }
601 
602  /* by here, the inserting transaction has committed */
603 
604  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605  return TM_Ok;
606 
607  if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608  {
610  return TM_Ok;
611  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612  return TM_Updated; /* updated by other */
613  else
614  return TM_Deleted; /* deleted by other */
615  }
616 
617  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618  {
619  TransactionId xmax;
620 
621  if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
622  return TM_Ok;
623 
625  {
627  return TM_BeingModified;
628 
630  return TM_Ok;
631  }
632 
633  xmax = HeapTupleGetUpdateXid(tuple);
634  if (!TransactionIdIsValid(xmax))
635  {
637  return TM_BeingModified;
638  }
639 
640  /* not LOCKED_ONLY, so it has to have an xmax */
642 
644  {
645  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646  return TM_SelfModified; /* updated after scan started */
647  else
648  return TM_Invisible; /* updated before scan started */
649  }
650 
652  return TM_BeingModified;
653 
654  if (TransactionIdDidCommit(xmax))
655  {
656  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657  return TM_Updated;
658  else
659  return TM_Deleted;
660  }
661 
662  /*
663  * By here, the update in the Xmax is either aborted or crashed, but
664  * what about the other members?
665  */
666 
668  {
669  /*
670  * There's no member, even just a locker, alive anymore, so we can
671  * mark the Xmax as invalid.
672  */
673  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675  return TM_Ok;
676  }
677  else
678  {
679  /* There are lockers running */
680  return TM_BeingModified;
681  }
682  }
683 
685  {
687  return TM_BeingModified;
688  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689  return TM_SelfModified; /* updated after scan started */
690  else
691  return TM_Invisible; /* updated before scan started */
692  }
693 
695  return TM_BeingModified;
696 
698  {
699  /* it must have aborted or crashed */
700  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702  return TM_Ok;
703  }
704 
705  /* xmax transaction committed */
706 
708  {
709  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711  return TM_Ok;
712  }
713 
714  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717  return TM_Updated; /* updated by other */
718  else
719  return TM_Deleted; /* deleted by other */
720 }
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:557

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetXvac, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1162 of file heapam_visibility.c.

1164 {
1165  TransactionId dead_after = InvalidTransactionId;
1166  HTSV_Result res;
1167 
1168  res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1169 
1171  {
1172  Assert(TransactionIdIsValid(dead_after));
1173 
1174  if (TransactionIdPrecedes(dead_after, OldestXmin))
1175  res = HEAPTUPLE_DEAD;
1176  }
1177  else
1178  Assert(!TransactionIdIsValid(dead_after));
1179 
1180  return res;
1181 }
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert, HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, res, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_is_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), and tuple_all_visible().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1196 of file heapam_visibility.c.

1197 {
1198  HeapTupleHeader tuple = htup->t_data;
1199 
1200  Assert(ItemPointerIsValid(&htup->t_self));
1201  Assert(htup->t_tableOid != InvalidOid);
1202  Assert(dead_after != NULL);
1203 
1204  *dead_after = InvalidTransactionId;
1205 
1206  /*
1207  * Has inserting transaction committed?
1208  *
1209  * If the inserting transaction aborted, then the tuple was never visible
1210  * to any other transaction, so we can delete it immediately.
1211  */
1212  if (!HeapTupleHeaderXminCommitted(tuple))
1213  {
1214  if (HeapTupleHeaderXminInvalid(tuple))
1215  return HEAPTUPLE_DEAD;
1216  /* Used by pre-9.0 binary upgrades */
1217  else if (tuple->t_infomask & HEAP_MOVED_OFF)
1218  {
1219  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1220 
1223  if (TransactionIdIsInProgress(xvac))
1225  if (TransactionIdDidCommit(xvac))
1226  {
1227  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1229  return HEAPTUPLE_DEAD;
1230  }
1231  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1233  }
1234  /* Used by pre-9.0 binary upgrades */
1235  else if (tuple->t_infomask & HEAP_MOVED_IN)
1236  {
1237  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1238 
1241  if (TransactionIdIsInProgress(xvac))
1243  if (TransactionIdDidCommit(xvac))
1244  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1246  else
1247  {
1248  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1250  return HEAPTUPLE_DEAD;
1251  }
1252  }
1254  {
1255  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1257  /* only locked? run infomask-only check first, for performance */
1258  if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
1261  /* inserted and then deleted by same xact */
1264  /* deleting subtransaction must have aborted */
1266  }
1268  {
1269  /*
1270  * It'd be possible to discern between INSERT/DELETE in progress
1271  * here by looking at xmax - but that doesn't seem beneficial for
1272  * the majority of callers and even detrimental for some. We'd
1273  * rather have callers look at/wait for xmin than xmax. It's
1274  * always correct to return INSERT_IN_PROGRESS because that's
1275  * what's happening from the view of other backends.
1276  */
1278  }
1280  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1281  HeapTupleHeaderGetRawXmin(tuple));
1282  else
1283  {
1284  /*
1285  * Not in Progress, Not Committed, so either Aborted or crashed
1286  */
1287  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1289  return HEAPTUPLE_DEAD;
1290  }
1291 
1292  /*
1293  * At this point the xmin is known committed, but we might not have
1294  * been able to set the hint bit yet; so we can no longer Assert that
1295  * it's set.
1296  */
1297  }
1298 
1299  /*
1300  * Okay, the inserter committed, so it was good at some point. Now what
1301  * about the deleting transaction?
1302  */
1303  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1304  return HEAPTUPLE_LIVE;
1305 
1307  {
1308  /*
1309  * "Deleting" xact really only locked it, so the tuple is live in any
1310  * case. However, we should make sure that either XMAX_COMMITTED or
1311  * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1312  * examining the tuple for future xacts.
1313  */
1314  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1315  {
1316  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1317  {
1318  /*
1319  * If it's a pre-pg_upgrade tuple, the multixact cannot
1320  * possibly be running; otherwise have to check.
1321  */
1322  if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1324  true))
1325  return HEAPTUPLE_LIVE;
1327  }
1328  else
1329  {
1331  return HEAPTUPLE_LIVE;
1332  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1334  }
1335  }
1336 
1337  /*
1338  * We don't really care whether xmax did commit, abort or crash. We
1339  * know that xmax did lock the tuple, but it did not and will never
1340  * actually update it.
1341  */
1342 
1343  return HEAPTUPLE_LIVE;
1344  }
1345 
1346  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1347  {
1348  TransactionId xmax = HeapTupleGetUpdateXid(tuple);
1349 
1350  /* already checked above */
1352 
1353  /* not LOCKED_ONLY, so it has to have an xmax */
1355 
1356  if (TransactionIdIsInProgress(xmax))
1358  else if (TransactionIdDidCommit(xmax))
1359  {
1360  /*
1361  * The multixact might still be running due to lockers. Need to
1362  * allow for pruning if below the xid horizon regardless --
1363  * otherwise we could end up with a tuple where the updater has to
1364  * be removed due to the horizon, but is not pruned away. It's
1365  * not a problem to prune that tuple, because any remaining
1366  * lockers will also be present in newer tuple versions.
1367  */
1368  *dead_after = xmax;
1369  return HEAPTUPLE_RECENTLY_DEAD;
1370  }
1371  else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1372  {
1373  /*
1374  * Not in Progress, Not Committed, so either Aborted or crashed.
1375  * Mark the Xmax as invalid.
1376  */
1378  }
1379 
1380  return HEAPTUPLE_LIVE;
1381  }
1382 
1383  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1384  {
1388  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1389  HeapTupleHeaderGetRawXmax(tuple));
1390  else
1391  {
1392  /*
1393  * Not in Progress, Not Committed, so either Aborted or crashed
1394  */
1395  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1397  return HEAPTUPLE_LIVE;
1398  }
1399 
1400  /*
1401  * At this point the xmax is known committed, but we might not have
1402  * been able to set the hint bit yet; so we can no longer Assert that
1403  * it's set.
1404  */
1405  }
1406 
1407  /*
1408  * Deleter committed, allow caller to check if it was recent enough that
1409  * some open transactions could still see the tuple.
1410  */
1411  *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1412  return HEAPTUPLE_RECENTLY_DEAD;
1413 }

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXvac, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1767 of file heapam_visibility.c.

1768 {
1769  switch (snapshot->snapshot_type)
1770  {
1771  case SNAPSHOT_MVCC:
1772  return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1773  case SNAPSHOT_SELF:
1774  return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1775  case SNAPSHOT_ANY:
1776  return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1777  case SNAPSHOT_TOAST:
1778  return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1779  case SNAPSHOT_DIRTY:
1780  return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1782  return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1784  return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1785  }
1786 
1787  return false; /* keep compiler quiet */
1788 }
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:74
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:118
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:109
@ SNAPSHOT_DIRTY
Definition: snapshot.h:102
SnapshotType snapshot_type
Definition: snapshot.h:144

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_scan_bitmap_next_block(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143 {
144  SetHintBits(tuple, buffer, infomask, xid);
145 }

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
TransactionId  conflict_xid,
bool  lp_truncate_only,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2032 of file pruneheap.c.

2040 {
2041  xl_heap_prune xlrec;
2042  XLogRecPtr recptr;
2043  uint8 info;
2044 
2045  /* The following local variables hold data registered in the WAL record: */
2047  xlhp_freeze_plans freeze_plans;
2048  xlhp_prune_items redirect_items;
2049  xlhp_prune_items dead_items;
2050  xlhp_prune_items unused_items;
2051  OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
2052 
2053  xlrec.flags = 0;
2054 
2055  /*
2056  * Prepare data for the buffer. The arrays are not actually in the
2057  * buffer, but we pretend that they are. When XLogInsert stores a full
2058  * page image, the arrays can be omitted.
2059  */
2060  XLogBeginInsert();
2061  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2062  if (nfrozen > 0)
2063  {
2064  int nplans;
2065 
2066  xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2067 
2068  /*
2069  * Prepare deduplicated representation for use in the WAL record. This
2070  * destructively sorts frozen tuples array in-place.
2071  */
2072  nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2073 
2074  freeze_plans.nplans = nplans;
2075  XLogRegisterBufData(0, (char *) &freeze_plans,
2076  offsetof(xlhp_freeze_plans, plans));
2077  XLogRegisterBufData(0, (char *) plans,
2078  sizeof(xlhp_freeze_plan) * nplans);
2079  }
2080  if (nredirected > 0)
2081  {
2082  xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2083 
2084  redirect_items.ntargets = nredirected;
2085  XLogRegisterBufData(0, (char *) &redirect_items,
2086  offsetof(xlhp_prune_items, data));
2087  XLogRegisterBufData(0, (char *) redirected,
2088  sizeof(OffsetNumber[2]) * nredirected);
2089  }
2090  if (ndead > 0)
2091  {
2092  xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2093 
2094  dead_items.ntargets = ndead;
2095  XLogRegisterBufData(0, (char *) &dead_items,
2096  offsetof(xlhp_prune_items, data));
2097  XLogRegisterBufData(0, (char *) dead,
2098  sizeof(OffsetNumber) * ndead);
2099  }
2100  if (nunused > 0)
2101  {
2103 
2104  unused_items.ntargets = nunused;
2105  XLogRegisterBufData(0, (char *) &unused_items,
2106  offsetof(xlhp_prune_items, data));
2107  XLogRegisterBufData(0, (char *) unused,
2108  sizeof(OffsetNumber) * nunused);
2109  }
2110  if (nfrozen > 0)
2111  XLogRegisterBufData(0, (char *) frz_offsets,
2112  sizeof(OffsetNumber) * nfrozen);
2113 
2114  /*
2115  * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
2116  * flag above.
2117  */
2119  xlrec.flags |= XLHP_IS_CATALOG_REL;
2120  if (TransactionIdIsValid(conflict_xid))
2122  if (cleanup_lock)
2123  xlrec.flags |= XLHP_CLEANUP_LOCK;
2124  else
2125  {
2126  Assert(nredirected == 0 && ndead == 0);
2127  /* also, any items in 'unused' must've been LP_DEAD previously */
2128  }
2129  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
2130  if (TransactionIdIsValid(conflict_xid))
2131  XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
2132 
2133  switch (reason)
2134  {
2135  case PRUNE_ON_ACCESS:
2137  break;
2138  case PRUNE_VACUUM_SCAN:
2140  break;
2141  case PRUNE_VACUUM_CLEANUP:
2143  break;
2144  default:
2145  elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2146  break;
2147  }
2148  recptr = XLogInsert(RM_HEAP2_ID, info);
2149 
2150  PageSetLSN(BufferGetPage(buffer), recptr);
2151 }
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:60
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:59
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:61
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1957

References Assert, BufferGetPage(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 1952 of file heapam.c.

1953 {
1954  if (bistate->current_buf != InvalidBuffer)
1955  ReleaseBuffer(bistate->current_buf);
1956  bistate->current_buf = InvalidBuffer;
1957 
1958  /*
1959  * Despite the name, we also reset bulk relation extension state.
1960  * Otherwise we can end up erroring out due to looking for free space in
1961  * ->next_free of one partition, even though ->next_free was set when
1962  * extending another partition. It could obviously also be bad for
1963  * efficiency to look at existing blocks at offsets from another
1964  * partition, even if we don't error out.
1965  */
1966  bistate->next_free = InvalidBlockNumber;
1967  bistate->last_free = InvalidBlockNumber;
1968 }

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5245 of file reorderbuffer.c.

5249 {
5252  ForkNumber forkno;
5253  BlockNumber blockno;
5254  bool updated_mapping = false;
5255 
5256  /*
5257  * Return unresolved if tuplecid_data is not valid. That's because when
5258  * streaming in-progress transactions we may run into tuples with the CID
5259  * before actually decoding them. Think e.g. about INSERT followed by
5260  * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5261  * INSERT. So in such cases, we assume the CID is from the future
5262  * command.
5263  */
5264  if (tuplecid_data == NULL)
5265  return false;
5266 
5267  /* be careful about padding */
5268  memset(&key, 0, sizeof(key));
5269 
5270  Assert(!BufferIsLocal(buffer));
5271 
5272  /*
5273  * get relfilelocator from the buffer, no convenient way to access it
5274  * other than that.
5275  */
5276  BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5277 
5278  /* tuples can only be in the main fork */
5279  Assert(forkno == MAIN_FORKNUM);
5280  Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5281 
5282  ItemPointerCopy(&htup->t_self,
5283  &key.tid);
5284 
5285 restart:
5286  ent = (ReorderBufferTupleCidEnt *)
5288 
5289  /*
5290  * failed to find a mapping, check whether the table was rewritten and
5291  * apply mapping if so, but only do that once - there can be no new
5292  * mappings while we are in here since we have to hold a lock on the
5293  * relation.
5294  */
5295  if (ent == NULL && !updated_mapping)
5296  {
5298  /* now check but don't update for a mapping again */
5299  updated_mapping = true;
5300  goto restart;
5301  }
5302  else if (ent == NULL)
5303  return false;
5304 
5305  if (cmin)
5306  *cmin = ent->cmin;
5307  if (cmax)
5308  *cmax = ent->cmax;
5309  return true;
5310 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3688
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_FIND
Definition: hsearch.h:113
ForkNumber
Definition: relpath.h:48
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:102

References Assert, BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
ItemPointer  tid 
)

Definition at line 3100 of file heapam.c.

3101 {
3102  TM_Result result;
3103  TM_FailureData tmfd;
3104 
3105  result = heap_delete(relation, tid,
3107  true /* wait for commit */ ,
3108  &tmfd, false /* changingPart */ );
3109  switch (result)
3110  {
3111  case TM_SelfModified:
3112  /* Tuple was already updated in current command? */
3113  elog(ERROR, "tuple already updated by self");
3114  break;
3115 
3116  case TM_Ok:
3117  /* done successfully */
3118  break;
3119 
3120  case TM_Updated:
3121  elog(ERROR, "tuple concurrently updated");
3122  break;
3123 
3124  case TM_Deleted:
3125  elog(ERROR, "tuple concurrently deleted");
3126  break;
3127 
3128  default:
3129  elog(ERROR, "unrecognized heap_delete status: %u", result);
3130  break;
3131  }
3132 }
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2679
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:826

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2621 of file heapam.c.

2622 {
2623  heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2624 }
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1990

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4217 of file heapam.c.

4219 {
4220  TM_Result result;
4221  TM_FailureData tmfd;
4222  LockTupleMode lockmode;
4223 
4224  result = heap_update(relation, otid, tup,
4226  true /* wait for commit */ ,
4227  &tmfd, &lockmode, update_indexes);
4228  switch (result)
4229  {
4230  case TM_SelfModified:
4231  /* Tuple was already updated in current command? */
4232  elog(ERROR, "tuple already updated by self");
4233  break;
4234 
4235  case TM_Ok:
4236  /* done successfully */
4237  break;
4238 
4239  case TM_Updated:
4240  elog(ERROR, "tuple concurrently updated");
4241  break;
4242 
4243  case TM_Deleted:
4244  elog(ERROR, "tuple concurrently deleted");
4245  break;
4246 
4247  default:
4248  elog(ERROR, "unrecognized heap_update status: %u", result);
4249  break;
4250  }
4251 }
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3146
LockTupleMode
Definition: lockoptions.h:50

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().