PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 137 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 136 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 38 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 42 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 41 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 287 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 48 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 44 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 109 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 123 of file heapam.h.

124 {
125  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
126  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
127  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
128  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
129  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
130 } HTSV_Result;
HTSV_Result
Definition: heapam.h:124
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:127
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_LIVE
Definition: heapam.h:126
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_DEAD
Definition: heapam.h:125

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 267 of file heapam.h.

268 {
269  PRUNE_ON_ACCESS, /* on-access pruning */
270  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
271  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
272 } PruneReason;
PruneReason
Definition: heapam.h:268
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:271
@ PRUNE_ON_ACCESS
Definition: heapam.h:269
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:270

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1944 of file heapam.c.

1945 {
1946  if (bistate->current_buf != InvalidBuffer)
1947  ReleaseBuffer(bistate->current_buf);
1948  FreeAccessStrategy(bistate->strategy);
1949  pfree(bistate);
1950 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4936
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1521
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), moveMergedTablesRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1927 of file heapam.c.

1928 {
1929  BulkInsertState bistate;
1930 
1931  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1933  bistate->current_buf = InvalidBuffer;
1934  bistate->next_free = InvalidBlockNumber;
1935  bistate->last_free = InvalidBlockNumber;
1936  bistate->already_extended_by = 0;
1937  return bistate;
1938 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:44
void * palloc(Size size)
Definition: mcxt.c:1317
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), moveMergedTablesRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5906 of file heapam.c.

5907 {
5909  ItemId lp;
5910  HeapTupleData tp;
5911  Page page;
5912  BlockNumber block;
5913  Buffer buffer;
5914 
5915  Assert(ItemPointerIsValid(tid));
5916 
5917  block = ItemPointerGetBlockNumber(tid);
5918  buffer = ReadBuffer(relation, block);
5919  page = BufferGetPage(buffer);
5920 
5922 
5923  /*
5924  * Page can't be all visible, we just inserted into it, and are still
5925  * running.
5926  */
5927  Assert(!PageIsAllVisible(page));
5928 
5929  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5930  Assert(ItemIdIsNormal(lp));
5931 
5932  tp.t_tableOid = RelationGetRelid(relation);
5933  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5934  tp.t_len = ItemIdGetLength(lp);
5935  tp.t_self = *tid;
5936 
5937  /*
5938  * Sanity check that the tuple really is a speculatively inserted tuple,
5939  * inserted by us.
5940  */
5941  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5942  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5943  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5944  elog(ERROR, "attempted to kill a non-speculative tuple");
5946 
5947  /*
5948  * No need to check for serializable conflicts here. There is never a
5949  * need for a combo CID, either. No need to extract replica identity, or
5950  * do anything special with infomask bits.
5951  */
5952 
5954 
5955  /*
5956  * The tuple will become DEAD immediately. Flag that this page is a
5957  * candidate for pruning by setting xmin to TransactionXmin. While not
5958  * immediately prunable, it is the oldest xid we can cheaply determine
5959  * that's safe against wraparound / being older than the table's
5960  * relfrozenxid. To defend against the unlikely case of a new relation
5961  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5962  * if so (vacuum can't subsequently move relfrozenxid to beyond
5963  * TransactionXmin, so there's no race here).
5964  */
5966  {
5967  TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
5968  TransactionId prune_xid;
5969 
5970  if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
5971  prune_xid = relfrozenxid;
5972  else
5973  prune_xid = TransactionXmin;
5974  PageSetPrunable(page, prune_xid);
5975  }
5976 
5977  /* store transaction information of xact deleting the tuple */
5980 
5981  /*
5982  * Set the tuple header xmin to InvalidTransactionId. This makes the
5983  * tuple immediately invisible everyone. (In particular, to any
5984  * transactions waiting on the speculative token, woken up later.)
5985  */
5987 
5988  /* Clear the speculative insertion token too */
5989  tp.t_data->t_ctid = tp.t_self;
5990 
5991  MarkBufferDirty(buffer);
5992 
5993  /*
5994  * XLOG stuff
5995  *
5996  * The WAL records generated here match heap_delete(). The same recovery
5997  * routines are used.
5998  */
5999  if (RelationNeedsWAL(relation))
6000  {
6001  xl_heap_delete xlrec;
6002  XLogRecPtr recptr;
6003 
6004  xlrec.flags = XLH_DELETE_IS_SUPER;
6006  tp.t_data->t_infomask2);
6008  xlrec.xmax = xid;
6009 
6010  XLogBeginInsert();
6011  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6012  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6013 
6014  /* No replica identity & replication origin logged */
6015 
6016  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6017 
6018  PageSetLSN(page, recptr);
6019  }
6020 
6021  END_CRIT_SECTION();
6022 
6023  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6024 
6025  if (HeapTupleHasExternal(&tp))
6026  {
6027  Assert(!IsToastRelation(relation));
6028  heap_toast_delete(relation, &tp, true);
6029  }
6030 
6031  /*
6032  * Never need to mark tuple for invalidation, since catalogs don't support
6033  * speculative insertion
6034  */
6035 
6036  /* Now we can release the buffer */
6037  ReleaseBuffer(buffer);
6038 
6039  /* count deletion, as we counted the insertion too */
6040  pgstat_count_heap_delete(relation);
6041 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2543
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5171
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:773
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define PageSetPrunable(page, xid)
Definition: bufpage.h:447
#define Assert(condition)
Definition: c.h:858
uint32 TransactionId
Definition: c.h:652
bool IsToastRelation(Relation relation)
Definition: catalog.c:166
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2638
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:120
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:104
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
union HeapTupleHeaderData::@48 t_choice
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:114
OffsetNumber offnum
Definition: heapam_xlog.h:115
uint8 infobits_set
Definition: heapam_xlog.h:116
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:452
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1038 of file heapam.c.

1042 {
1043  HeapScanDesc scan;
1044 
1045  /*
1046  * increment relation ref count while scanning relation
1047  *
1048  * This is just to make really sure the relcache entry won't go away while
1049  * the scan has a pointer to it. Caller should be holding the rel open
1050  * anyway, so this is redundant in all normal scenarios...
1051  */
1053 
1054  /*
1055  * allocate and initialize scan descriptor
1056  */
1057  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1058 
1059  scan->rs_base.rs_rd = relation;
1060  scan->rs_base.rs_snapshot = snapshot;
1061  scan->rs_base.rs_nkeys = nkeys;
1062  scan->rs_base.rs_flags = flags;
1063  scan->rs_base.rs_parallel = parallel_scan;
1064  scan->rs_strategy = NULL; /* set in initscan */
1065  scan->rs_vmbuffer = InvalidBuffer;
1066  scan->rs_empty_tuples_pending = 0;
1067 
1068  /*
1069  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1070  */
1071  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1073 
1074  /*
1075  * For seqscan and sample scans in a serializable transaction, acquire a
1076  * predicate lock on the entire relation. This is required not only to
1077  * lock all the matching tuples, but also to conflict with new insertions
1078  * into the table. In an indexscan, we take page locks on the index pages
1079  * covering the range specified in the scan qual, but in a heap scan there
1080  * is nothing more fine-grained to lock. A bitmap scan is a different
1081  * story, there we have already scanned the index and locked the index
1082  * pages covering the predicate. But in that case we still have to lock
1083  * any matching heap tuples. For sample scan we could optimize the locking
1084  * to be at least page-level granularity, but we'd need to add per-tuple
1085  * locking for that.
1086  */
1088  {
1089  /*
1090  * Ensure a missing snapshot is noticed reliably, even if the
1091  * isolation mode means predicate locking isn't performed (and
1092  * therefore the snapshot isn't used here).
1093  */
1094  Assert(snapshot);
1095  PredicateLockRelation(relation, snapshot);
1096  }
1097 
1098  /* we only need to set this up once */
1099  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1100 
1101  /*
1102  * Allocate memory to keep track of page allocation for parallel workers
1103  * when doing a parallel scan.
1104  */
1105  if (parallel_scan != NULL)
1107  else
1108  scan->rs_parallelworkerdata = NULL;
1109 
1110  /*
1111  * we do this here instead of in initscan() because heap_rescan also calls
1112  * initscan() and we don't want to allocate memory again
1113  */
1114  if (nkeys > 0)
1115  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1116  else
1117  scan->rs_base.rs_key = NULL;
1118 
1119  initscan(scan, key, false);
1120 
1121  scan->rs_read_stream = NULL;
1122 
1123  /*
1124  * Set up a read stream for sequential scans and TID range scans. This
1125  * should be done after initscan() because initscan() allocates the
1126  * BufferAccessStrategy object passed to the read stream API.
1127  */
1128  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1130  {
1132 
1133  if (scan->rs_base.rs_parallel)
1135  else
1137 
1139  scan->rs_strategy,
1140  scan->rs_base.rs_rd,
1141  MAIN_FORKNUM,
1142  cb,
1143  scan,
1144  0);
1145  }
1146 
1147 
1148  return (TableScanDesc) scan;
1149 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:232
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:270
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:294
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:109
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2561
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:566
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:49
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2159
@ MAIN_FORKNUM
Definition: relpath.h:50
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
Buffer rs_vmbuffer
Definition: heapam.h:101
BufferAccessStrategy rs_strategy
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:92
HeapTupleData rs_ctup
Definition: heapam.h:72
ReadStream * rs_read_stream
Definition: heapam.h:75
int rs_empty_tuples_pending
Definition: heapam.h:102
TableScanDescData rs_base
Definition: heapam.h:55
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2683 of file heapam.c.

2686 {
2687  TM_Result result;
2689  ItemId lp;
2690  HeapTupleData tp;
2691  Page page;
2692  BlockNumber block;
2693  Buffer buffer;
2694  Buffer vmbuffer = InvalidBuffer;
2695  TransactionId new_xmax;
2696  uint16 new_infomask,
2697  new_infomask2;
2698  bool have_tuple_lock = false;
2699  bool iscombo;
2700  bool all_visible_cleared = false;
2701  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2702  bool old_key_copied = false;
2703 
2704  Assert(ItemPointerIsValid(tid));
2705 
2706  /*
2707  * Forbid this during a parallel operation, lest it allocate a combo CID.
2708  * Other workers might need that combo CID for visibility checks, and we
2709  * have no provision for broadcasting it to them.
2710  */
2711  if (IsInParallelMode())
2712  ereport(ERROR,
2713  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2714  errmsg("cannot delete tuples during a parallel operation")));
2715 
2716  block = ItemPointerGetBlockNumber(tid);
2717  buffer = ReadBuffer(relation, block);
2718  page = BufferGetPage(buffer);
2719 
2720  /*
2721  * Before locking the buffer, pin the visibility map page if it appears to
2722  * be necessary. Since we haven't got the lock yet, someone else might be
2723  * in the middle of changing this, so we'll need to recheck after we have
2724  * the lock.
2725  */
2726  if (PageIsAllVisible(page))
2727  visibilitymap_pin(relation, block, &vmbuffer);
2728 
2730 
2731  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2732  Assert(ItemIdIsNormal(lp));
2733 
2734  tp.t_tableOid = RelationGetRelid(relation);
2735  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2736  tp.t_len = ItemIdGetLength(lp);
2737  tp.t_self = *tid;
2738 
2739 l1:
2740 
2741  /*
2742  * If we didn't pin the visibility map page and the page has become all
2743  * visible while we were busy locking the buffer, we'll have to unlock and
2744  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2745  * unfortunate, but hopefully shouldn't happen often.
2746  */
2747  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2748  {
2749  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2750  visibilitymap_pin(relation, block, &vmbuffer);
2752  }
2753 
2754  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2755 
2756  if (result == TM_Invisible)
2757  {
2758  UnlockReleaseBuffer(buffer);
2759  ereport(ERROR,
2760  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2761  errmsg("attempted to delete invisible tuple")));
2762  }
2763  else if (result == TM_BeingModified && wait)
2764  {
2765  TransactionId xwait;
2766  uint16 infomask;
2767 
2768  /* must copy state data before unlocking buffer */
2769  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2770  infomask = tp.t_data->t_infomask;
2771 
2772  /*
2773  * Sleep until concurrent transaction ends -- except when there's a
2774  * single locker and it's our own transaction. Note we don't care
2775  * which lock mode the locker has, because we need the strongest one.
2776  *
2777  * Before sleeping, we need to acquire tuple lock to establish our
2778  * priority for the tuple (see heap_lock_tuple). LockTuple will
2779  * release us when we are next-in-line for the tuple.
2780  *
2781  * If we are forced to "start over" below, we keep the tuple lock;
2782  * this arranges that we stay at the head of the line while rechecking
2783  * tuple state.
2784  */
2785  if (infomask & HEAP_XMAX_IS_MULTI)
2786  {
2787  bool current_is_member = false;
2788 
2789  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2790  LockTupleExclusive, &current_is_member))
2791  {
2792  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2793 
2794  /*
2795  * Acquire the lock, if necessary (but skip it when we're
2796  * requesting a lock and already have one; avoids deadlock).
2797  */
2798  if (!current_is_member)
2800  LockWaitBlock, &have_tuple_lock);
2801 
2802  /* wait for multixact */
2804  relation, &(tp.t_self), XLTW_Delete,
2805  NULL);
2807 
2808  /*
2809  * If xwait had just locked the tuple then some other xact
2810  * could update this tuple before we get to this point. Check
2811  * for xmax change, and start over if so.
2812  *
2813  * We also must start over if we didn't pin the VM page, and
2814  * the page has become all visible.
2815  */
2816  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2817  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2819  xwait))
2820  goto l1;
2821  }
2822 
2823  /*
2824  * You might think the multixact is necessarily done here, but not
2825  * so: it could have surviving members, namely our own xact or
2826  * other subxacts of this backend. It is legal for us to delete
2827  * the tuple in either case, however (the latter case is
2828  * essentially a situation of upgrading our former shared lock to
2829  * exclusive). We don't bother changing the on-disk hint bits
2830  * since we are about to overwrite the xmax altogether.
2831  */
2832  }
2833  else if (!TransactionIdIsCurrentTransactionId(xwait))
2834  {
2835  /*
2836  * Wait for regular transaction to end; but first, acquire tuple
2837  * lock.
2838  */
2839  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2841  LockWaitBlock, &have_tuple_lock);
2842  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2844 
2845  /*
2846  * xwait is done, but if xwait had just locked the tuple then some
2847  * other xact could update this tuple before we get to this point.
2848  * Check for xmax change, and start over if so.
2849  *
2850  * We also must start over if we didn't pin the VM page, and the
2851  * page has become all visible.
2852  */
2853  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2854  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2856  xwait))
2857  goto l1;
2858 
2859  /* Otherwise check if it committed or aborted */
2860  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2861  }
2862 
2863  /*
2864  * We may overwrite if previous xmax aborted, or if it committed but
2865  * only locked the tuple without updating it.
2866  */
2867  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2870  result = TM_Ok;
2871  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2872  result = TM_Updated;
2873  else
2874  result = TM_Deleted;
2875  }
2876 
2877  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2878  if (result != TM_Ok)
2879  {
2880  Assert(result == TM_SelfModified ||
2881  result == TM_Updated ||
2882  result == TM_Deleted ||
2883  result == TM_BeingModified);
2885  Assert(result != TM_Updated ||
2886  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2887  }
2888 
2889  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2890  {
2891  /* Perform additional check for transaction-snapshot mode RI updates */
2892  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2893  result = TM_Updated;
2894  }
2895 
2896  if (result != TM_Ok)
2897  {
2898  tmfd->ctid = tp.t_data->t_ctid;
2900  if (result == TM_SelfModified)
2901  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2902  else
2903  tmfd->cmax = InvalidCommandId;
2904  UnlockReleaseBuffer(buffer);
2905  if (have_tuple_lock)
2906  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2907  if (vmbuffer != InvalidBuffer)
2908  ReleaseBuffer(vmbuffer);
2909  return result;
2910  }
2911 
2912  /*
2913  * We're about to do the actual delete -- check for conflict first, to
2914  * avoid possibly having to roll back work we've just done.
2915  *
2916  * This is safe without a recheck as long as there is no possibility of
2917  * another process scanning the page between this check and the delete
2918  * being visible to the scan (i.e., an exclusive buffer content lock is
2919  * continuously held from this point until the tuple delete is visible).
2920  */
2921  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2922 
2923  /* replace cid with a combo CID if necessary */
2924  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2925 
2926  /*
2927  * Compute replica identity tuple before entering the critical section so
2928  * we don't PANIC upon a memory allocation failure.
2929  */
2930  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2931 
2932  /*
2933  * If this is the first possibly-multixact-able operation in the current
2934  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2935  * certain that the transaction will never become a member of any older
2936  * MultiXactIds than that. (We have to do this even if we end up just
2937  * using our own TransactionId below, since some other backend could
2938  * incorporate our XID into a MultiXact immediately afterwards.)
2939  */
2941 
2944  xid, LockTupleExclusive, true,
2945  &new_xmax, &new_infomask, &new_infomask2);
2946 
2948 
2949  /*
2950  * If this transaction commits, the tuple will become DEAD sooner or
2951  * later. Set flag that this page is a candidate for pruning once our xid
2952  * falls below the OldestXmin horizon. If the transaction finally aborts,
2953  * the subsequent page pruning will be a no-op and the hint will be
2954  * cleared.
2955  */
2956  PageSetPrunable(page, xid);
2957 
2958  if (PageIsAllVisible(page))
2959  {
2960  all_visible_cleared = true;
2961  PageClearAllVisible(page);
2962  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2963  vmbuffer, VISIBILITYMAP_VALID_BITS);
2964  }
2965 
2966  /* store transaction information of xact deleting the tuple */
2969  tp.t_data->t_infomask |= new_infomask;
2970  tp.t_data->t_infomask2 |= new_infomask2;
2972  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2973  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2974  /* Make sure there is no forward chain link in t_ctid */
2975  tp.t_data->t_ctid = tp.t_self;
2976 
2977  /* Signal that this is actually a move into another partition */
2978  if (changingPart)
2980 
2981  MarkBufferDirty(buffer);
2982 
2983  /*
2984  * XLOG stuff
2985  *
2986  * NB: heap_abort_speculative() uses the same xlog record and replay
2987  * routines.
2988  */
2989  if (RelationNeedsWAL(relation))
2990  {
2991  xl_heap_delete xlrec;
2992  xl_heap_header xlhdr;
2993  XLogRecPtr recptr;
2994 
2995  /*
2996  * For logical decode we need combo CIDs to properly decode the
2997  * catalog
2998  */
3000  log_heap_new_cid(relation, &tp);
3001 
3002  xlrec.flags = 0;
3003  if (all_visible_cleared)
3005  if (changingPart)
3008  tp.t_data->t_infomask2);
3010  xlrec.xmax = new_xmax;
3011 
3012  if (old_key_tuple != NULL)
3013  {
3014  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3016  else
3018  }
3019 
3020  XLogBeginInsert();
3021  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3022 
3023  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3024 
3025  /*
3026  * Log replica identity of the deleted tuple if there is one
3027  */
3028  if (old_key_tuple != NULL)
3029  {
3030  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3031  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3032  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3033 
3034  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3035  XLogRegisterData((char *) old_key_tuple->t_data
3037  old_key_tuple->t_len
3039  }
3040 
3041  /* filtering by origin on a row level is much more efficient */
3043 
3044  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3045 
3046  PageSetLSN(page, recptr);
3047  }
3048 
3049  END_CRIT_SECTION();
3050 
3051  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3052 
3053  if (vmbuffer != InvalidBuffer)
3054  ReleaseBuffer(vmbuffer);
3055 
3056  /*
3057  * If the tuple has toasted out-of-line attributes, we need to delete
3058  * those items too. We have to do this before releasing the buffer
3059  * because we need to look at the contents of the tuple, but it's OK to
3060  * release the content lock on the buffer first.
3061  */
3062  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3063  relation->rd_rel->relkind != RELKIND_MATVIEW)
3064  {
3065  /* toast table entries should never be recursively toasted */
3067  }
3068  else if (HeapTupleHasExternal(&tp))
3069  heap_toast_delete(relation, &tp, false);
3070 
3071  /*
3072  * Mark tuple for invalidation from system caches at next command
3073  * boundary. We have to do this before releasing the buffer because we
3074  * need to look at the contents of the tuple.
3075  */
3076  CacheInvalidateHeapTuple(relation, &tp, NULL);
3077 
3078  /* Now we can release the buffer */
3079  ReleaseBuffer(buffer);
3080 
3081  /*
3082  * Release the lmgr tuple lock, if we had it.
3083  */
3084  if (have_tuple_lock)
3085  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3086 
3087  pgstat_count_heap_delete(relation);
3088 
3089  if (old_key_tuple != NULL && old_key_copied)
3090  heap_freetuple(old_key_tuple);
3091 
3092  return TM_Ok;
3093 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3736
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4953
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define InvalidCommandId
Definition: c.h:669
unsigned short uint16
Definition: c.h:505
TransactionId MultiXactId
Definition: c.h:662
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7115
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8579
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5057
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8660
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5008
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7292
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2660
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:170
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1905
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:103
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:101
#define SizeOfHeapHeader
Definition: heapam_xlog.h:156
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:105
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:102
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1204
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:657
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:672
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4321
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
uint16 t_infomask
Definition: heapam_xlog.h:152
uint16 t_infomask2
Definition: heapam_xlog.h:151
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:939
bool IsInParallelMode(void)
Definition: xact.c:1087
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1210 of file heapam.c.

1211 {
1212  HeapScanDesc scan = (HeapScanDesc) sscan;
1213 
1214  /* Note: no locking manipulations needed */
1215 
1216  /*
1217  * unpin scan buffers
1218  */
1219  if (BufferIsValid(scan->rs_cbuf))
1220  ReleaseBuffer(scan->rs_cbuf);
1221 
1222  if (BufferIsValid(scan->rs_vmbuffer))
1223  ReleaseBuffer(scan->rs_vmbuffer);
1224 
1225  /*
1226  * Must free the read stream before freeing the BufferAccessStrategy.
1227  */
1228  if (scan->rs_read_stream)
1230 
1231  /*
1232  * decrement relation reference count and free scan descriptor storage
1233  */
1235 
1236  if (scan->rs_base.rs_key)
1237  pfree(scan->rs_base.rs_key);
1238 
1239  if (scan->rs_strategy != NULL)
1241 
1242  if (scan->rs_parallelworkerdata != NULL)
1244 
1245  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1247 
1248  pfree(scan);
1249 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:850
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2172
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:67
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1511 of file heapam.c.

1516 {
1517  ItemPointer tid = &(tuple->t_self);
1518  ItemId lp;
1519  Buffer buffer;
1520  Page page;
1521  OffsetNumber offnum;
1522  bool valid;
1523 
1524  /*
1525  * Fetch and pin the appropriate page of the relation.
1526  */
1527  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1528 
1529  /*
1530  * Need share lock on buffer to examine tuple commit status.
1531  */
1532  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1533  page = BufferGetPage(buffer);
1534 
1535  /*
1536  * We'd better check for out-of-range offnum in case of VACUUM since the
1537  * TID was obtained.
1538  */
1539  offnum = ItemPointerGetOffsetNumber(tid);
1540  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1541  {
1542  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1543  ReleaseBuffer(buffer);
1544  *userbuf = InvalidBuffer;
1545  tuple->t_data = NULL;
1546  return false;
1547  }
1548 
1549  /*
1550  * get the item line pointer corresponding to the requested tid
1551  */
1552  lp = PageGetItemId(page, offnum);
1553 
1554  /*
1555  * Must check for deleted tuple.
1556  */
1557  if (!ItemIdIsNormal(lp))
1558  {
1559  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1560  ReleaseBuffer(buffer);
1561  *userbuf = InvalidBuffer;
1562  tuple->t_data = NULL;
1563  return false;
1564  }
1565 
1566  /*
1567  * fill in *tuple fields
1568  */
1569  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1570  tuple->t_len = ItemIdGetLength(lp);
1571  tuple->t_tableOid = RelationGetRelid(relation);
1572 
1573  /*
1574  * check tuple visibility, then release lock
1575  */
1576  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1577 
1578  if (valid)
1579  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1580  HeapTupleHeaderGetXmin(tuple->t_data));
1581 
1582  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1583 
1584  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1585 
1586  if (valid)
1587  {
1588  /*
1589  * All checks passed, so return the tuple as valid. Caller is now
1590  * responsible for releasing the buffer.
1591  */
1592  *userbuf = buffer;
1593 
1594  return true;
1595  }
1596 
1597  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1598  if (keep_buf)
1599  *userbuf = buffer;
1600  else
1601  {
1602  ReleaseBuffer(buffer);
1603  *userbuf = InvalidBuffer;
1604  tuple->t_data = NULL;
1605  }
1606 
1607  return false;
1608 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10061
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2606

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5819 of file heapam.c.

5820 {
5821  Buffer buffer;
5822  Page page;
5823  OffsetNumber offnum;
5824  ItemId lp = NULL;
5825  HeapTupleHeader htup;
5826 
5827  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5829  page = (Page) BufferGetPage(buffer);
5830 
5831  offnum = ItemPointerGetOffsetNumber(tid);
5832  if (PageGetMaxOffsetNumber(page) >= offnum)
5833  lp = PageGetItemId(page, offnum);
5834 
5835  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5836  elog(ERROR, "invalid lp");
5837 
5838  htup = (HeapTupleHeader) PageGetItem(page, lp);
5839 
5840  /* NO EREPORT(ERROR) from here till changes are logged */
5842 
5844 
5845  MarkBufferDirty(buffer);
5846 
5847  /*
5848  * Replace the speculative insertion token with a real t_ctid, pointing to
5849  * itself like it does on regular tuples.
5850  */
5851  htup->t_ctid = *tid;
5852 
5853  /* XLOG stuff */
5854  if (RelationNeedsWAL(relation))
5855  {
5856  xl_heap_confirm xlrec;
5857  XLogRecPtr recptr;
5858 
5859  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5860 
5861  XLogBeginInsert();
5862 
5863  /* We want the same filtering on this as on a plain insert */
5865 
5866  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5867  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5868 
5869  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5870 
5871  PageSetLSN(page, recptr);
5872  }
5873 
5874  END_CRIT_SECTION();
5875 
5876  UnlockReleaseBuffer(buffer);
5877 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:422
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:419

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6900 of file heapam.c.

6901 {
6902  Page page = BufferGetPage(buffer);
6903 
6904  for (int i = 0; i < ntuples; i++)
6905  {
6906  HeapTupleFreeze *frz = tuples + i;
6907  ItemId itemid = PageGetItemId(page, frz->offset);
6908  HeapTupleHeader htup;
6909 
6910  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6911  heap_execute_freeze_tuple(htup, frz);
6912  }
6913 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6824
int i
Definition: isn.c:73
OffsetNumber offset
Definition: heapam.h:151

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6922 of file heapam.c.

6925 {
6926  HeapTupleFreeze frz;
6927  bool do_freeze;
6928  bool totally_frozen;
6929  struct VacuumCutoffs cutoffs;
6930  HeapPageFreeze pagefrz;
6931 
6932  cutoffs.relfrozenxid = relfrozenxid;
6933  cutoffs.relminmxid = relminmxid;
6934  cutoffs.OldestXmin = FreezeLimit;
6935  cutoffs.OldestMxact = MultiXactCutoff;
6936  cutoffs.FreezeLimit = FreezeLimit;
6937  cutoffs.MultiXactCutoff = MultiXactCutoff;
6938 
6939  pagefrz.freeze_required = true;
6940  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6941  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6942  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6943  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6944 
6945  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6946  &pagefrz, &frz, &totally_frozen);
6947 
6948  /*
6949  * Note that because this is not a WAL-logged operation, we don't need to
6950  * fill in the offset in the freeze record.
6951  */
6952 
6953  if (do_freeze)
6954  heap_execute_freeze_tuple(tuple, &frz);
6955  return do_freeze;
6956 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6550
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1783 of file heapam.c.

1785 {
1786  Relation relation = sscan->rs_rd;
1787  Snapshot snapshot = sscan->rs_snapshot;
1788  ItemPointerData ctid;
1789  TransactionId priorXmax;
1790 
1791  /*
1792  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1793  * Assume that t_ctid links are valid however - there shouldn't be invalid
1794  * ones in the table.
1795  */
1796  Assert(ItemPointerIsValid(tid));
1797 
1798  /*
1799  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1800  * need to examine, and *tid is the TID we will return if ctid turns out
1801  * to be bogus.
1802  *
1803  * Note that we will loop until we reach the end of the t_ctid chain.
1804  * Depending on the snapshot passed, there might be at most one visible
1805  * version of the row, but we don't try to optimize for that.
1806  */
1807  ctid = *tid;
1808  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1809  for (;;)
1810  {
1811  Buffer buffer;
1812  Page page;
1813  OffsetNumber offnum;
1814  ItemId lp;
1815  HeapTupleData tp;
1816  bool valid;
1817 
1818  /*
1819  * Read, pin, and lock the page.
1820  */
1821  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1822  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1823  page = BufferGetPage(buffer);
1824 
1825  /*
1826  * Check for bogus item number. This is not treated as an error
1827  * condition because it can happen while following a t_ctid link. We
1828  * just assume that the prior tid is OK and return it unchanged.
1829  */
1830  offnum = ItemPointerGetOffsetNumber(&ctid);
1831  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1832  {
1833  UnlockReleaseBuffer(buffer);
1834  break;
1835  }
1836  lp = PageGetItemId(page, offnum);
1837  if (!ItemIdIsNormal(lp))
1838  {
1839  UnlockReleaseBuffer(buffer);
1840  break;
1841  }
1842 
1843  /* OK to access the tuple */
1844  tp.t_self = ctid;
1845  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1846  tp.t_len = ItemIdGetLength(lp);
1847  tp.t_tableOid = RelationGetRelid(relation);
1848 
1849  /*
1850  * After following a t_ctid link, we might arrive at an unrelated
1851  * tuple. Check for XMIN match.
1852  */
1853  if (TransactionIdIsValid(priorXmax) &&
1855  {
1856  UnlockReleaseBuffer(buffer);
1857  break;
1858  }
1859 
1860  /*
1861  * Check tuple visibility; if visible, set it as the new result
1862  * candidate.
1863  */
1864  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1865  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1866  if (valid)
1867  *tid = ctid;
1868 
1869  /*
1870  * If there's a valid t_ctid link, follow it, else we're done.
1871  */
1872  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1876  {
1877  UnlockReleaseBuffer(buffer);
1878  break;
1879  }
1880 
1881  ctid = tp.t_data->t_ctid;
1882  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1883  UnlockReleaseBuffer(buffer);
1884  } /* end of loop */
1885 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1785 of file pruneheap.c.

1786 {
1787  OffsetNumber offnum,
1788  maxoff;
1789 
1790  MemSet(root_offsets, InvalidOffsetNumber,
1792 
1793  maxoff = PageGetMaxOffsetNumber(page);
1794  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1795  {
1796  ItemId lp = PageGetItemId(page, offnum);
1797  HeapTupleHeader htup;
1798  OffsetNumber nextoffnum;
1799  TransactionId priorXmax;
1800 
1801  /* skip unused and dead items */
1802  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1803  continue;
1804 
1805  if (ItemIdIsNormal(lp))
1806  {
1807  htup = (HeapTupleHeader) PageGetItem(page, lp);
1808 
1809  /*
1810  * Check if this tuple is part of a HOT-chain rooted at some other
1811  * tuple. If so, skip it for now; we'll process it when we find
1812  * its root.
1813  */
1814  if (HeapTupleHeaderIsHeapOnly(htup))
1815  continue;
1816 
1817  /*
1818  * This is either a plain tuple or the root of a HOT-chain.
1819  * Remember it in the mapping.
1820  */
1821  root_offsets[offnum - 1] = offnum;
1822 
1823  /* If it's not the start of a HOT-chain, we're done with it */
1824  if (!HeapTupleHeaderIsHotUpdated(htup))
1825  continue;
1826 
1827  /* Set up to scan the HOT-chain */
1828  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1829  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1830  }
1831  else
1832  {
1833  /* Must be a redirect item. We do not set its root_offsets entry */
1835  /* Set up to scan the HOT-chain */
1836  nextoffnum = ItemIdGetRedirect(lp);
1837  priorXmax = InvalidTransactionId;
1838  }
1839 
1840  /*
1841  * Now follow the HOT-chain and collect other tuples in the chain.
1842  *
1843  * Note: Even though this is a nested loop, the complexity of the
1844  * function is O(N) because a tuple in the page should be visited not
1845  * more than twice, once in the outer loop and once in HOT-chain
1846  * chases.
1847  */
1848  for (;;)
1849  {
1850  /* Sanity check (pure paranoia) */
1851  if (offnum < FirstOffsetNumber)
1852  break;
1853 
1854  /*
1855  * An offset past the end of page's line pointer array is possible
1856  * when the array was truncated
1857  */
1858  if (offnum > maxoff)
1859  break;
1860 
1861  lp = PageGetItemId(page, nextoffnum);
1862 
1863  /* Check for broken chains */
1864  if (!ItemIdIsNormal(lp))
1865  break;
1866 
1867  htup = (HeapTupleHeader) PageGetItem(page, lp);
1868 
1869  if (TransactionIdIsValid(priorXmax) &&
1870  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1871  break;
1872 
1873  /* Remember the root line pointer for this item */
1874  root_offsets[nextoffnum - 1] = offnum;
1875 
1876  /* Advance to next chain member, if any */
1877  if (!HeapTupleHeaderIsHotUpdated(htup))
1878  break;
1879 
1880  /* HOT implies it can't have moved to different partition */
1882 
1883  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1884  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1885  }
1886  }
1887 }
#define MemSet(start, val, len)
Definition: c.h:1020
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1252 of file heapam.c.

1253 {
1254  HeapScanDesc scan = (HeapScanDesc) sscan;
1255 
1256  /*
1257  * This is still widely used directly, without going through table AM, so
1258  * add a safety check. It's possible we should, at a later point,
1259  * downgrade this to an assert. The reason for checking the AM routine,
1260  * rather than the AM oid, is that this allows to write regression tests
1261  * that create another AM reusing the heap handler.
1262  */
1264  ereport(ERROR,
1265  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1266  errmsg_internal("only heap AM is supported")));
1267 
1268  /*
1269  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1270  * for catalog or regular tables. See detailed comments in xact.c where
1271  * these variables are declared. Normally we have such a check at tableam
1272  * level API but this is called from many places so we need to ensure it
1273  * here.
1274  */
1276  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1277 
1278  /* Note: no locking manipulations needed */
1279 
1280  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1281  heapgettup_pagemode(scan, direction,
1282  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1283  else
1284  heapgettup(scan, direction,
1285  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1286 
1287  if (scan->rs_ctup.t_data == NULL)
1288  return NULL;
1289 
1290  /*
1291  * if we get here it means we have a new current scan tuple, so point to
1292  * the proper return buffer and return the tuple.
1293  */
1294 
1296 
1297  return &scan->rs_ctup;
1298 }
#define unlikely(x)
Definition: c.h:311
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:838
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:948
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1301 of file heapam.c.

1302 {
1303  HeapScanDesc scan = (HeapScanDesc) sscan;
1304 
1305  /* Note: no locking manipulations needed */
1306 
1307  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1308  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1309  else
1310  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1311 
1312  if (scan->rs_ctup.t_data == NULL)
1313  {
1314  ExecClearTuple(slot);
1315  return false;
1316  }
1317 
1318  /*
1319  * if we get here it means we have a new current scan tuple, so point to
1320  * the proper return buffer and return the tuple.
1321  */
1322 
1324 
1325  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1326  scan->rs_cbuf);
1327  return true;
1328 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1404 of file heapam.c.

1406 {
1407  HeapScanDesc scan = (HeapScanDesc) sscan;
1408  ItemPointer mintid = &sscan->rs_mintid;
1409  ItemPointer maxtid = &sscan->rs_maxtid;
1410 
1411  /* Note: no locking manipulations needed */
1412  for (;;)
1413  {
1414  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1415  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1416  else
1417  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1418 
1419  if (scan->rs_ctup.t_data == NULL)
1420  {
1421  ExecClearTuple(slot);
1422  return false;
1423  }
1424 
1425  /*
1426  * heap_set_tidrange will have used heap_setscanlimits to limit the
1427  * range of pages we scan to only ones that can contain the TID range
1428  * we're scanning for. Here we must filter out any tuples from these
1429  * pages that are outside of that range.
1430  */
1431  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1432  {
1433  ExecClearTuple(slot);
1434 
1435  /*
1436  * When scanning backwards, the TIDs will be in descending order.
1437  * Future tuples in this direction will be lower still, so we can
1438  * just return false to indicate there will be no more tuples.
1439  */
1440  if (ScanDirectionIsBackward(direction))
1441  return false;
1442 
1443  continue;
1444  }
1445 
1446  /*
1447  * Likewise for the final page, we must filter out TIDs greater than
1448  * maxtid.
1449  */
1450  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1451  {
1452  ExecClearTuple(slot);
1453 
1454  /*
1455  * When scanning forward, the TIDs will be in ascending order.
1456  * Future tuples in this direction will be higher still, so we can
1457  * just return false to indicate there will be no more tuples.
1458  */
1459  if (ScanDirectionIsForward(direction))
1460  return false;
1461  continue;
1462  }
1463 
1464  break;
1465  }
1466 
1467  /*
1468  * if we get here it means we have a new current scan tuple, so point to
1469  * the proper return buffer and return the tuple.
1470  */
1472 
1473  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1474  return true;
1475 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1631 of file heapam.c.

1634 {
1635  Page page = BufferGetPage(buffer);
1636  TransactionId prev_xmax = InvalidTransactionId;
1637  BlockNumber blkno;
1638  OffsetNumber offnum;
1639  bool at_chain_start;
1640  bool valid;
1641  bool skip;
1642  GlobalVisState *vistest = NULL;
1643 
1644  /* If this is not the first call, previous call returned a (live!) tuple */
1645  if (all_dead)
1646  *all_dead = first_call;
1647 
1648  blkno = ItemPointerGetBlockNumber(tid);
1649  offnum = ItemPointerGetOffsetNumber(tid);
1650  at_chain_start = first_call;
1651  skip = !first_call;
1652 
1653  /* XXX: we should assert that a snapshot is pushed or registered */
1655  Assert(BufferGetBlockNumber(buffer) == blkno);
1656 
1657  /* Scan through possible multiple members of HOT-chain */
1658  for (;;)
1659  {
1660  ItemId lp;
1661 
1662  /* check for bogus TID */
1663  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1664  break;
1665 
1666  lp = PageGetItemId(page, offnum);
1667 
1668  /* check for unused, dead, or redirected items */
1669  if (!ItemIdIsNormal(lp))
1670  {
1671  /* We should only see a redirect at start of chain */
1672  if (ItemIdIsRedirected(lp) && at_chain_start)
1673  {
1674  /* Follow the redirect */
1675  offnum = ItemIdGetRedirect(lp);
1676  at_chain_start = false;
1677  continue;
1678  }
1679  /* else must be end of chain */
1680  break;
1681  }
1682 
1683  /*
1684  * Update heapTuple to point to the element of the HOT chain we're
1685  * currently investigating. Having t_self set correctly is important
1686  * because the SSI checks and the *Satisfies routine for historical
1687  * MVCC snapshots need the correct tid to decide about the visibility.
1688  */
1689  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1690  heapTuple->t_len = ItemIdGetLength(lp);
1691  heapTuple->t_tableOid = RelationGetRelid(relation);
1692  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1693 
1694  /*
1695  * Shouldn't see a HEAP_ONLY tuple at chain start.
1696  */
1697  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1698  break;
1699 
1700  /*
1701  * The xmin should match the previous xmax value, else chain is
1702  * broken.
1703  */
1704  if (TransactionIdIsValid(prev_xmax) &&
1705  !TransactionIdEquals(prev_xmax,
1706  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1707  break;
1708 
1709  /*
1710  * When first_call is true (and thus, skip is initially false) we'll
1711  * return the first tuple we find. But on later passes, heapTuple
1712  * will initially be pointing to the tuple we returned last time.
1713  * Returning it again would be incorrect (and would loop forever), so
1714  * we skip it and return the next match we find.
1715  */
1716  if (!skip)
1717  {
1718  /* If it's visible per the snapshot, we must return it */
1719  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1720  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1721  buffer, snapshot);
1722 
1723  if (valid)
1724  {
1725  ItemPointerSetOffsetNumber(tid, offnum);
1726  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1727  HeapTupleHeaderGetXmin(heapTuple->t_data));
1728  if (all_dead)
1729  *all_dead = false;
1730  return true;
1731  }
1732  }
1733  skip = false;
1734 
1735  /*
1736  * If we can't see it, maybe no one else can either. At caller
1737  * request, check whether all chain members are dead to all
1738  * transactions.
1739  *
1740  * Note: if you change the criterion here for what is "dead", fix the
1741  * planner's get_actual_variable_range() function to match.
1742  */
1743  if (all_dead && *all_dead)
1744  {
1745  if (!vistest)
1746  vistest = GlobalVisTestFor(relation);
1747 
1748  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1749  *all_dead = false;
1750  }
1751 
1752  /*
1753  * Check to see if HOT chain continues past this tuple; if so fetch
1754  * the next offnum and loop around.
1755  */
1756  if (HeapTupleIsHotUpdated(heapTuple))
1757  {
1759  blkno);
1760  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1761  at_chain_start = false;
1762  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1763  }
1764  else
1765  break; /* end of chain */
1766  }
1767 
1768  return false;
1769 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4111
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7636 of file heapam.c.

7637 {
7638  /* Initial assumption is that earlier pruning took care of conflict */
7639  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7642  Page page = NULL;
7644  TransactionId priorXmax;
7645 #ifdef USE_PREFETCH
7646  IndexDeletePrefetchState prefetch_state;
7647  int prefetch_distance;
7648 #endif
7649  SnapshotData SnapshotNonVacuumable;
7650  int finalndeltids = 0,
7651  nblocksaccessed = 0;
7652 
7653  /* State that's only used in bottom-up index deletion case */
7654  int nblocksfavorable = 0;
7655  int curtargetfreespace = delstate->bottomupfreespace,
7656  lastfreespace = 0,
7657  actualfreespace = 0;
7658  bool bottomup_final_block = false;
7659 
7660  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7661 
7662  /* Sort caller's deltids array by TID for further processing */
7663  index_delete_sort(delstate);
7664 
7665  /*
7666  * Bottom-up case: resort deltids array in an order attuned to where the
7667  * greatest number of promising TIDs are to be found, and determine how
7668  * many blocks from the start of sorted array should be considered
7669  * favorable. This will also shrink the deltids array in order to
7670  * eliminate completely unfavorable blocks up front.
7671  */
7672  if (delstate->bottomup)
7673  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7674 
7675 #ifdef USE_PREFETCH
7676  /* Initialize prefetch state. */
7677  prefetch_state.cur_hblkno = InvalidBlockNumber;
7678  prefetch_state.next_item = 0;
7679  prefetch_state.ndeltids = delstate->ndeltids;
7680  prefetch_state.deltids = delstate->deltids;
7681 
7682  /*
7683  * Determine the prefetch distance that we will attempt to maintain.
7684  *
7685  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7686  * sure that isn't a catalog relation before we call code that does
7687  * syscache lookups, to avoid risk of deadlock.
7688  */
7689  if (IsCatalogRelation(rel))
7690  prefetch_distance = maintenance_io_concurrency;
7691  else
7692  prefetch_distance =
7694 
7695  /* Cap initial prefetch distance for bottom-up deletion caller */
7696  if (delstate->bottomup)
7697  {
7698  Assert(nblocksfavorable >= 1);
7699  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7700  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7701  }
7702 
7703  /* Start prefetching. */
7704  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7705 #endif
7706 
7707  /* Iterate over deltids, determine which to delete, check their horizon */
7708  Assert(delstate->ndeltids > 0);
7709  for (int i = 0; i < delstate->ndeltids; i++)
7710  {
7711  TM_IndexDelete *ideltid = &delstate->deltids[i];
7712  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7713  ItemPointer htid = &ideltid->tid;
7714  OffsetNumber offnum;
7715 
7716  /*
7717  * Read buffer, and perform required extra steps each time a new block
7718  * is encountered. Avoid refetching if it's the same block as the one
7719  * from the last htid.
7720  */
7721  if (blkno == InvalidBlockNumber ||
7722  ItemPointerGetBlockNumber(htid) != blkno)
7723  {
7724  /*
7725  * Consider giving up early for bottom-up index deletion caller
7726  * first. (Only prefetch next-next block afterwards, when it
7727  * becomes clear that we're at least going to access the next
7728  * block in line.)
7729  *
7730  * Sometimes the first block frees so much space for bottom-up
7731  * caller that the deletion process can end without accessing any
7732  * more blocks. It is usually necessary to access 2 or 3 blocks
7733  * per bottom-up deletion operation, though.
7734  */
7735  if (delstate->bottomup)
7736  {
7737  /*
7738  * We often allow caller to delete a few additional items
7739  * whose entries we reached after the point that space target
7740  * from caller was satisfied. The cost of accessing the page
7741  * was already paid at that point, so it made sense to finish
7742  * it off. When that happened, we finalize everything here
7743  * (by finishing off the whole bottom-up deletion operation
7744  * without needlessly paying the cost of accessing any more
7745  * blocks).
7746  */
7747  if (bottomup_final_block)
7748  break;
7749 
7750  /*
7751  * Give up when we didn't enable our caller to free any
7752  * additional space as a result of processing the page that we
7753  * just finished up with. This rule is the main way in which
7754  * we keep the cost of bottom-up deletion under control.
7755  */
7756  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7757  break;
7758  lastfreespace = actualfreespace; /* for next time */
7759 
7760  /*
7761  * Deletion operation (which is bottom-up) will definitely
7762  * access the next block in line. Prepare for that now.
7763  *
7764  * Decay target free space so that we don't hang on for too
7765  * long with a marginal case. (Space target is only truly
7766  * helpful when it allows us to recognize that we don't need
7767  * to access more than 1 or 2 blocks to satisfy caller due to
7768  * agreeable workload characteristics.)
7769  *
7770  * We are a bit more patient when we encounter contiguous
7771  * blocks, though: these are treated as favorable blocks. The
7772  * decay process is only applied when the next block in line
7773  * is not a favorable/contiguous block. This is not an
7774  * exception to the general rule; we still insist on finding
7775  * at least one deletable item per block accessed. See
7776  * bottomup_nblocksfavorable() for full details of the theory
7777  * behind favorable blocks and heap block locality in general.
7778  *
7779  * Note: The first block in line is always treated as a
7780  * favorable block, so the earliest possible point that the
7781  * decay can be applied is just before we access the second
7782  * block in line. The Assert() verifies this for us.
7783  */
7784  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7785  if (nblocksfavorable > 0)
7786  nblocksfavorable--;
7787  else
7788  curtargetfreespace /= 2;
7789  }
7790 
7791  /* release old buffer */
7792  if (BufferIsValid(buf))
7794 
7795  blkno = ItemPointerGetBlockNumber(htid);
7796  buf = ReadBuffer(rel, blkno);
7797  nblocksaccessed++;
7798  Assert(!delstate->bottomup ||
7799  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7800 
7801 #ifdef USE_PREFETCH
7802 
7803  /*
7804  * To maintain the prefetch distance, prefetch one more page for
7805  * each page we read.
7806  */
7807  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7808 #endif
7809 
7811 
7812  page = BufferGetPage(buf);
7813  maxoff = PageGetMaxOffsetNumber(page);
7814  }
7815 
7816  /*
7817  * In passing, detect index corruption involving an index page with a
7818  * TID that points to a location in the heap that couldn't possibly be
7819  * correct. We only do this with actual TIDs from caller's index page
7820  * (not items reached by traversing through a HOT chain).
7821  */
7822  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7823 
7824  if (istatus->knowndeletable)
7825  Assert(!delstate->bottomup && !istatus->promising);
7826  else
7827  {
7828  ItemPointerData tmp = *htid;
7829  HeapTupleData heapTuple;
7830 
7831  /* Are any tuples from this HOT chain non-vacuumable? */
7832  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7833  &heapTuple, NULL, true))
7834  continue; /* can't delete entry */
7835 
7836  /* Caller will delete, since whole HOT chain is vacuumable */
7837  istatus->knowndeletable = true;
7838 
7839  /* Maintain index free space info for bottom-up deletion case */
7840  if (delstate->bottomup)
7841  {
7842  Assert(istatus->freespace > 0);
7843  actualfreespace += istatus->freespace;
7844  if (actualfreespace >= curtargetfreespace)
7845  bottomup_final_block = true;
7846  }
7847  }
7848 
7849  /*
7850  * Maintain snapshotConflictHorizon value for deletion operation as a
7851  * whole by advancing current value using heap tuple headers. This is
7852  * loosely based on the logic for pruning a HOT chain.
7853  */
7854  offnum = ItemPointerGetOffsetNumber(htid);
7855  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7856  for (;;)
7857  {
7858  ItemId lp;
7859  HeapTupleHeader htup;
7860 
7861  /* Sanity check (pure paranoia) */
7862  if (offnum < FirstOffsetNumber)
7863  break;
7864 
7865  /*
7866  * An offset past the end of page's line pointer array is possible
7867  * when the array was truncated
7868  */
7869  if (offnum > maxoff)
7870  break;
7871 
7872  lp = PageGetItemId(page, offnum);
7873  if (ItemIdIsRedirected(lp))
7874  {
7875  offnum = ItemIdGetRedirect(lp);
7876  continue;
7877  }
7878 
7879  /*
7880  * We'll often encounter LP_DEAD line pointers (especially with an
7881  * entry marked knowndeletable by our caller up front). No heap
7882  * tuple headers get examined for an htid that leads us to an
7883  * LP_DEAD item. This is okay because the earlier pruning
7884  * operation that made the line pointer LP_DEAD in the first place
7885  * must have considered the original tuple header as part of
7886  * generating its own snapshotConflictHorizon value.
7887  *
7888  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
7889  * the same strategy that index vacuuming uses in all cases. Index
7890  * VACUUM WAL records don't even have a snapshotConflictHorizon
7891  * field of their own for this reason.
7892  */
7893  if (!ItemIdIsNormal(lp))
7894  break;
7895 
7896  htup = (HeapTupleHeader) PageGetItem(page, lp);
7897 
7898  /*
7899  * Check the tuple XMIN against prior XMAX, if any
7900  */
7901  if (TransactionIdIsValid(priorXmax) &&
7902  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7903  break;
7904 
7906  &snapshotConflictHorizon);
7907 
7908  /*
7909  * If the tuple is not HOT-updated, then we are at the end of this
7910  * HOT-chain. No need to visit later tuples from the same update
7911  * chain (they get their own index entries) -- just move on to
7912  * next htid from index AM caller.
7913  */
7914  if (!HeapTupleHeaderIsHotUpdated(htup))
7915  break;
7916 
7917  /* Advance to next HOT chain member */
7918  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7919  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7920  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7921  }
7922 
7923  /* Enable further/final shrinking of deltids for caller */
7924  finalndeltids = i + 1;
7925  }
7926 
7928 
7929  /*
7930  * Shrink deltids array to exclude non-deletable entries at the end. This
7931  * is not just a minor optimization. Final deltids array size might be
7932  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7933  * ndeltids being zero in all cases with zero total deletable entries.
7934  */
7935  Assert(finalndeltids > 0 || delstate->bottomup);
7936  delstate->ndeltids = finalndeltids;
7937 
7938  return snapshotConflictHorizon;
7939 }
int maintenance_io_concurrency
Definition: bufmgr.c:185
#define Min(x, y)
Definition: c.h:1004
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8194
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7491
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7576
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:190
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1631
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7981
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
TM_IndexDelete * deltids
Definition: tableam.h:254
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 6063 of file heapam.c.

6064 {
6065  Buffer buffer;
6066  Page page;
6067  OffsetNumber offnum;
6068  ItemId lp = NULL;
6069  HeapTupleHeader htup;
6070  uint32 oldlen;
6071  uint32 newlen;
6072 
6073  /*
6074  * For now, we don't allow parallel updates. Unlike a regular update,
6075  * this should never create a combo CID, so it might be possible to relax
6076  * this restriction, but not without more thought and testing. It's not
6077  * clear that it would be useful, anyway.
6078  */
6079  if (IsInParallelMode())
6080  ereport(ERROR,
6081  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
6082  errmsg("cannot update tuples during a parallel operation")));
6083 
6084  INJECTION_POINT("inplace-before-pin");
6085  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
6087  page = (Page) BufferGetPage(buffer);
6088 
6089  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
6090  if (PageGetMaxOffsetNumber(page) >= offnum)
6091  lp = PageGetItemId(page, offnum);
6092 
6093  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6094  elog(ERROR, "invalid lp");
6095 
6096  htup = (HeapTupleHeader) PageGetItem(page, lp);
6097 
6098  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
6099  newlen = tuple->t_len - tuple->t_data->t_hoff;
6100  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6101  elog(ERROR, "wrong tuple length");
6102 
6103  /* NO EREPORT(ERROR) from here till changes are logged */
6105 
6106  memcpy((char *) htup + htup->t_hoff,
6107  (char *) tuple->t_data + tuple->t_data->t_hoff,
6108  newlen);
6109 
6110  MarkBufferDirty(buffer);
6111 
6112  /* XLOG stuff */
6113  if (RelationNeedsWAL(relation))
6114  {
6115  xl_heap_inplace xlrec;
6116  XLogRecPtr recptr;
6117 
6118  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6119 
6120  XLogBeginInsert();
6121  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6122 
6123  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6124  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6125 
6126  /* inplace updates aren't decoded atm, don't log the origin */
6127 
6128  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6129 
6130  PageSetLSN(page, recptr);
6131  }
6132 
6133  END_CRIT_SECTION();
6134 
6135  UnlockReleaseBuffer(buffer);
6136 
6137  /*
6138  * Send out shared cache inval if necessary. Note that because we only
6139  * pass the new version of the tuple, this mustn't be used for any
6140  * operations that could change catcache lookup keys. But we aren't
6141  * bothering with index updates either, so that's true a fortiori.
6142  */
6144  CacheInvalidateHeapTuple(relation, tuple, NULL);
6145 }
unsigned int uint32
Definition: c.h:506
#define SizeOfHeapInplace
Definition: heapam_xlog.h:430
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define INJECTION_POINT(name)
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
OffsetNumber offnum
Definition: heapam_xlog.h:427
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, INJECTION_POINT, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1994 of file heapam.c.

1996 {
1998  HeapTuple heaptup;
1999  Buffer buffer;
2000  Buffer vmbuffer = InvalidBuffer;
2001  bool all_visible_cleared = false;
2002 
2003  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2005  RelationGetNumberOfAttributes(relation));
2006 
2007  /*
2008  * Fill in tuple header fields and toast the tuple if necessary.
2009  *
2010  * Note: below this point, heaptup is the data we actually intend to store
2011  * into the relation; tup is the caller's original untoasted data.
2012  */
2013  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2014 
2015  /*
2016  * Find buffer to insert this tuple into. If the page is all visible,
2017  * this will also pin the requisite visibility map page.
2018  */
2019  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2020  InvalidBuffer, options, bistate,
2021  &vmbuffer, NULL,
2022  0);
2023 
2024  /*
2025  * We're about to do the actual insert -- but check for conflict first, to
2026  * avoid possibly having to roll back work we've just done.
2027  *
2028  * This is safe without a recheck as long as there is no possibility of
2029  * another process scanning the page between this check and the insert
2030  * being visible to the scan (i.e., an exclusive buffer content lock is
2031  * continuously held from this point until the tuple insert is visible).
2032  *
2033  * For a heap insert, we only need to check for table-level SSI locks. Our
2034  * new tuple can't possibly conflict with existing tuple locks, and heap
2035  * page locks are only consolidated versions of tuple locks; they do not
2036  * lock "gaps" as index page locks do. So we don't need to specify a
2037  * buffer when making the call, which makes for a faster check.
2038  */
2040 
2041  /* NO EREPORT(ERROR) from here till changes are logged */
2043 
2044  RelationPutHeapTuple(relation, buffer, heaptup,
2045  (options & HEAP_INSERT_SPECULATIVE) != 0);
2046 
2047  if (PageIsAllVisible(BufferGetPage(buffer)))
2048  {
2049  all_visible_cleared = true;
2051  visibilitymap_clear(relation,
2052  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2053  vmbuffer, VISIBILITYMAP_VALID_BITS);
2054  }
2055 
2056  /*
2057  * XXX Should we set PageSetPrunable on this page ?
2058  *
2059  * The inserting transaction may eventually abort thus making this tuple
2060  * DEAD and hence available for pruning. Though we don't want to optimize
2061  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2062  * aborted tuple will never be pruned until next vacuum is triggered.
2063  *
2064  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2065  */
2066 
2067  MarkBufferDirty(buffer);
2068 
2069  /* XLOG stuff */
2070  if (RelationNeedsWAL(relation))
2071  {
2072  xl_heap_insert xlrec;
2073  xl_heap_header xlhdr;
2074  XLogRecPtr recptr;
2075  Page page = BufferGetPage(buffer);
2076  uint8 info = XLOG_HEAP_INSERT;
2077  int bufflags = 0;
2078 
2079  /*
2080  * If this is a catalog, we need to transmit combo CIDs to properly
2081  * decode, so log that as well.
2082  */
2084  log_heap_new_cid(relation, heaptup);
2085 
2086  /*
2087  * If this is the single and first tuple on page, we can reinit the
2088  * page instead of restoring the whole thing. Set flag, and hide
2089  * buffer references from XLogInsert.
2090  */
2091  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2093  {
2094  info |= XLOG_HEAP_INIT_PAGE;
2095  bufflags |= REGBUF_WILL_INIT;
2096  }
2097 
2098  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2099  xlrec.flags = 0;
2100  if (all_visible_cleared)
2105 
2106  /*
2107  * For logical decoding, we need the tuple even if we're doing a full
2108  * page write, so make sure it's included even if we take a full-page
2109  * image. (XXX We could alternatively store a pointer into the FPW).
2110  */
2111  if (RelationIsLogicallyLogged(relation) &&
2113  {
2115  bufflags |= REGBUF_KEEP_DATA;
2116 
2117  if (IsToastRelation(relation))
2119  }
2120 
2121  XLogBeginInsert();
2122  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2123 
2124  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2125  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2126  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2127 
2128  /*
2129  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2130  * write the whole page to the xlog, we don't need to store
2131  * xl_heap_header in the xlog.
2132  */
2133  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2134  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2135  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2137  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2138  heaptup->t_len - SizeofHeapTupleHeader);
2139 
2140  /* filtering by origin on a row level is much more efficient */
2142 
2143  recptr = XLogInsert(RM_HEAP_ID, info);
2144 
2145  PageSetLSN(page, recptr);
2146  }
2147 
2148  END_CRIT_SECTION();
2149 
2150  UnlockReleaseBuffer(buffer);
2151  if (vmbuffer != InvalidBuffer)
2152  ReleaseBuffer(vmbuffer);
2153 
2154  /*
2155  * If tuple is cachable, mark it for invalidation from the caches in case
2156  * we abort. Note it is OK to do this after releasing the buffer, because
2157  * the heaptup data structure is all in local memory, not in the shared
2158  * buffer.
2159  */
2160  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2161 
2162  /* Note: speculative insertions are counted too, even if aborted later */
2163  pgstat_count_heap_insert(relation, 1);
2164 
2165  /*
2166  * If heaptup is a private copy, release it. Don't forget to copy t_self
2167  * back to the caller's image, too.
2168  */
2169  if (heaptup != tup)
2170  {
2171  tup->t_self = heaptup->t_self;
2172  heap_freetuple(heaptup);
2173  }
2174 }
unsigned char uint8
Definition: c.h:504
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2183
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:37
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:75
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:71
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:167
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:74
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:161
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4310 of file heapam.c.

4314 {
4315  TM_Result result;
4316  ItemPointer tid = &(tuple->t_self);
4317  ItemId lp;
4318  Page page;
4319  Buffer vmbuffer = InvalidBuffer;
4320  BlockNumber block;
4321  TransactionId xid,
4322  xmax;
4323  uint16 old_infomask,
4324  new_infomask,
4325  new_infomask2;
4326  bool first_time = true;
4327  bool skip_tuple_lock = false;
4328  bool have_tuple_lock = false;
4329  bool cleared_all_frozen = false;
4330 
4331  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4332  block = ItemPointerGetBlockNumber(tid);
4333 
4334  /*
4335  * Before locking the buffer, pin the visibility map page if it appears to
4336  * be necessary. Since we haven't got the lock yet, someone else might be
4337  * in the middle of changing this, so we'll need to recheck after we have
4338  * the lock.
4339  */
4340  if (PageIsAllVisible(BufferGetPage(*buffer)))
4341  visibilitymap_pin(relation, block, &vmbuffer);
4342 
4344 
4345  page = BufferGetPage(*buffer);
4346  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4347  Assert(ItemIdIsNormal(lp));
4348 
4349  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4350  tuple->t_len = ItemIdGetLength(lp);
4351  tuple->t_tableOid = RelationGetRelid(relation);
4352 
4353 l3:
4354  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4355 
4356  if (result == TM_Invisible)
4357  {
4358  /*
4359  * This is possible, but only when locking a tuple for ON CONFLICT
4360  * UPDATE. We return this value here rather than throwing an error in
4361  * order to give that case the opportunity to throw a more specific
4362  * error.
4363  */
4364  result = TM_Invisible;
4365  goto out_locked;
4366  }
4367  else if (result == TM_BeingModified ||
4368  result == TM_Updated ||
4369  result == TM_Deleted)
4370  {
4371  TransactionId xwait;
4372  uint16 infomask;
4373  uint16 infomask2;
4374  bool require_sleep;
4375  ItemPointerData t_ctid;
4376 
4377  /* must copy state data before unlocking buffer */
4378  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4379  infomask = tuple->t_data->t_infomask;
4380  infomask2 = tuple->t_data->t_infomask2;
4381  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4382 
4383  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4384 
4385  /*
4386  * If any subtransaction of the current top transaction already holds
4387  * a lock as strong as or stronger than what we're requesting, we
4388  * effectively hold the desired lock already. We *must* succeed
4389  * without trying to take the tuple lock, else we will deadlock
4390  * against anyone wanting to acquire a stronger lock.
4391  *
4392  * Note we only do this the first time we loop on the HTSU result;
4393  * there is no point in testing in subsequent passes, because
4394  * evidently our own transaction cannot have acquired a new lock after
4395  * the first time we checked.
4396  */
4397  if (first_time)
4398  {
4399  first_time = false;
4400 
4401  if (infomask & HEAP_XMAX_IS_MULTI)
4402  {
4403  int i;
4404  int nmembers;
4405  MultiXactMember *members;
4406 
4407  /*
4408  * We don't need to allow old multixacts here; if that had
4409  * been the case, HeapTupleSatisfiesUpdate would have returned
4410  * MayBeUpdated and we wouldn't be here.
4411  */
4412  nmembers =
4413  GetMultiXactIdMembers(xwait, &members, false,
4414  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4415 
4416  for (i = 0; i < nmembers; i++)
4417  {
4418  /* only consider members of our own transaction */
4419  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4420  continue;
4421 
4422  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4423  {
4424  pfree(members);
4425  result = TM_Ok;
4426  goto out_unlocked;
4427  }
4428  else
4429  {
4430  /*
4431  * Disable acquisition of the heavyweight tuple lock.
4432  * Otherwise, when promoting a weaker lock, we might
4433  * deadlock with another locker that has acquired the
4434  * heavyweight tuple lock and is waiting for our
4435  * transaction to finish.
4436  *
4437  * Note that in this case we still need to wait for
4438  * the multixact if required, to avoid acquiring
4439  * conflicting locks.
4440  */
4441  skip_tuple_lock = true;
4442  }
4443  }
4444 
4445  if (members)
4446  pfree(members);
4447  }
4448  else if (TransactionIdIsCurrentTransactionId(xwait))
4449  {
4450  switch (mode)
4451  {
4452  case LockTupleKeyShare:
4453  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4454  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4455  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4456  result = TM_Ok;
4457  goto out_unlocked;
4458  case LockTupleShare:
4459  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4460  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4461  {
4462  result = TM_Ok;
4463  goto out_unlocked;
4464  }
4465  break;
4467  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4468  {
4469  result = TM_Ok;
4470  goto out_unlocked;
4471  }
4472  break;
4473  case LockTupleExclusive:
4474  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4475  infomask2 & HEAP_KEYS_UPDATED)
4476  {
4477  result = TM_Ok;
4478  goto out_unlocked;
4479  }
4480  break;
4481  }
4482  }
4483  }
4484 
4485  /*
4486  * Initially assume that we will have to wait for the locking
4487  * transaction(s) to finish. We check various cases below in which
4488  * this can be turned off.
4489  */
4490  require_sleep = true;
4491  if (mode == LockTupleKeyShare)
4492  {
4493  /*
4494  * If we're requesting KeyShare, and there's no update present, we
4495  * don't need to wait. Even if there is an update, we can still
4496  * continue if the key hasn't been modified.
4497  *
4498  * However, if there are updates, we need to walk the update chain
4499  * to mark future versions of the row as locked, too. That way,
4500  * if somebody deletes that future version, we're protected
4501  * against the key going away. This locking of future versions
4502  * could block momentarily, if a concurrent transaction is
4503  * deleting a key; or it could return a value to the effect that
4504  * the transaction deleting the key has already committed. So we
4505  * do this before re-locking the buffer; otherwise this would be
4506  * prone to deadlocks.
4507  *
4508  * Note that the TID we're locking was grabbed before we unlocked
4509  * the buffer. For it to change while we're not looking, the
4510  * other properties we're testing for below after re-locking the
4511  * buffer would also change, in which case we would restart this
4512  * loop above.
4513  */
4514  if (!(infomask2 & HEAP_KEYS_UPDATED))
4515  {
4516  bool updated;
4517 
4518  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4519 
4520  /*
4521  * If there are updates, follow the update chain; bail out if
4522  * that cannot be done.
4523  */
4524  if (follow_updates && updated)
4525  {
4526  TM_Result res;
4527 
4528  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4530  mode);
4531  if (res != TM_Ok)
4532  {
4533  result = res;
4534  /* recovery code expects to have buffer lock held */
4536  goto failed;
4537  }
4538  }
4539 
4541 
4542  /*
4543  * Make sure it's still an appropriate lock, else start over.
4544  * Also, if it wasn't updated before we released the lock, but
4545  * is updated now, we start over too; the reason is that we
4546  * now need to follow the update chain to lock the new
4547  * versions.
4548  */
4549  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4550  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4551  !updated))
4552  goto l3;
4553 
4554  /* Things look okay, so we can skip sleeping */
4555  require_sleep = false;
4556 
4557  /*
4558  * Note we allow Xmax to change here; other updaters/lockers
4559  * could have modified it before we grabbed the buffer lock.
4560  * However, this is not a problem, because with the recheck we
4561  * just did we ensure that they still don't conflict with the
4562  * lock we want.
4563  */
4564  }
4565  }
4566  else if (mode == LockTupleShare)
4567  {
4568  /*
4569  * If we're requesting Share, we can similarly avoid sleeping if
4570  * there's no update and no exclusive lock present.
4571  */
4572  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4573  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4574  {
4576 
4577  /*
4578  * Make sure it's still an appropriate lock, else start over.
4579  * See above about allowing xmax to change.
4580  */
4581  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4583  goto l3;
4584  require_sleep = false;
4585  }
4586  }
4587  else if (mode == LockTupleNoKeyExclusive)
4588  {
4589  /*
4590  * If we're requesting NoKeyExclusive, we might also be able to
4591  * avoid sleeping; just ensure that there no conflicting lock
4592  * already acquired.
4593  */
4594  if (infomask & HEAP_XMAX_IS_MULTI)
4595  {
4596  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4597  mode, NULL))
4598  {
4599  /*
4600  * No conflict, but if the xmax changed under us in the
4601  * meantime, start over.
4602  */
4604  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4606  xwait))
4607  goto l3;
4608 
4609  /* otherwise, we're good */
4610  require_sleep = false;
4611  }
4612  }
4613  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4614  {
4616 
4617  /* if the xmax changed in the meantime, start over */
4618  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4620  xwait))
4621  goto l3;
4622  /* otherwise, we're good */
4623  require_sleep = false;
4624  }
4625  }
4626 
4627  /*
4628  * As a check independent from those above, we can also avoid sleeping
4629  * if the current transaction is the sole locker of the tuple. Note
4630  * that the strength of the lock already held is irrelevant; this is
4631  * not about recording the lock in Xmax (which will be done regardless
4632  * of this optimization, below). Also, note that the cases where we
4633  * hold a lock stronger than we are requesting are already handled
4634  * above by not doing anything.
4635  *
4636  * Note we only deal with the non-multixact case here; MultiXactIdWait
4637  * is well equipped to deal with this situation on its own.
4638  */
4639  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4641  {
4642  /* ... but if the xmax changed in the meantime, start over */
4644  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4646  xwait))
4647  goto l3;
4649  require_sleep = false;
4650  }
4651 
4652  /*
4653  * Time to sleep on the other transaction/multixact, if necessary.
4654  *
4655  * If the other transaction is an update/delete that's already
4656  * committed, then sleeping cannot possibly do any good: if we're
4657  * required to sleep, get out to raise an error instead.
4658  *
4659  * By here, we either have already acquired the buffer exclusive lock,
4660  * or we must wait for the locking transaction or multixact; so below
4661  * we ensure that we grab buffer lock after the sleep.
4662  */
4663  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4664  {
4666  goto failed;
4667  }
4668  else if (require_sleep)
4669  {
4670  /*
4671  * Acquire tuple lock to establish our priority for the tuple, or
4672  * die trying. LockTuple will release us when we are next-in-line
4673  * for the tuple. We must do this even if we are share-locking,
4674  * but not if we already have a weaker lock on the tuple.
4675  *
4676  * If we are forced to "start over" below, we keep the tuple lock;
4677  * this arranges that we stay at the head of the line while
4678  * rechecking tuple state.
4679  */
4680  if (!skip_tuple_lock &&
4681  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4682  &have_tuple_lock))
4683  {
4684  /*
4685  * This can only happen if wait_policy is Skip and the lock
4686  * couldn't be obtained.
4687  */
4688  result = TM_WouldBlock;
4689  /* recovery code expects to have buffer lock held */
4691  goto failed;
4692  }
4693 
4694  if (infomask & HEAP_XMAX_IS_MULTI)
4695  {
4697 
4698  /* We only ever lock tuples, never update them */
4699  if (status >= MultiXactStatusNoKeyUpdate)
4700  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4701 
4702  /* wait for multixact to end, or die trying */
4703  switch (wait_policy)
4704  {
4705  case LockWaitBlock:
4706  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4707  relation, &tuple->t_self, XLTW_Lock, NULL);
4708  break;
4709  case LockWaitSkip:
4711  status, infomask, relation,
4712  NULL))
4713  {
4714  result = TM_WouldBlock;
4715  /* recovery code expects to have buffer lock held */
4717  goto failed;
4718  }
4719  break;
4720  case LockWaitError:
4722  status, infomask, relation,
4723  NULL))
4724  ereport(ERROR,
4725  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4726  errmsg("could not obtain lock on row in relation \"%s\"",
4727  RelationGetRelationName(relation))));
4728 
4729  break;
4730  }
4731 
4732  /*
4733  * Of course, the multixact might not be done here: if we're
4734  * requesting a light lock mode, other transactions with light
4735  * locks could still be alive, as well as locks owned by our
4736  * own xact or other subxacts of this backend. We need to
4737  * preserve the surviving MultiXact members. Note that it
4738  * isn't absolutely necessary in the latter case, but doing so
4739  * is simpler.
4740  */
4741  }
4742  else
4743  {
4744  /* wait for regular transaction to end, or die trying */
4745  switch (wait_policy)
4746  {
4747  case LockWaitBlock:
4748  XactLockTableWait(xwait, relation, &tuple->t_self,
4749  XLTW_Lock);
4750  break;
4751  case LockWaitSkip:
4752  if (!ConditionalXactLockTableWait(xwait))
4753  {
4754  result = TM_WouldBlock;
4755  /* recovery code expects to have buffer lock held */
4757  goto failed;
4758  }
4759  break;
4760  case LockWaitError:
4761  if (!ConditionalXactLockTableWait(xwait))
4762  ereport(ERROR,
4763  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4764  errmsg("could not obtain lock on row in relation \"%s\"",
4765  RelationGetRelationName(relation))));
4766  break;
4767  }
4768  }
4769 
4770  /* if there are updates, follow the update chain */
4771  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4772  {
4773  TM_Result res;
4774 
4775  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4777  mode);
4778  if (res != TM_Ok)
4779  {
4780  result = res;
4781  /* recovery code expects to have buffer lock held */
4783  goto failed;
4784  }
4785  }
4786 
4788 
4789  /*
4790  * xwait is done, but if xwait had just locked the tuple then some
4791  * other xact could update this tuple before we get to this point.
4792  * Check for xmax change, and start over if so.
4793  */
4794  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4796  xwait))
4797  goto l3;
4798 
4799  if (!(infomask & HEAP_XMAX_IS_MULTI))
4800  {
4801  /*
4802  * Otherwise check if it committed or aborted. Note we cannot
4803  * be here if the tuple was only locked by somebody who didn't
4804  * conflict with us; that would have been handled above. So
4805  * that transaction must necessarily be gone by now. But
4806  * don't check for this in the multixact case, because some
4807  * locker transactions might still be running.
4808  */
4809  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4810  }
4811  }
4812 
4813  /* By here, we're certain that we hold buffer exclusive lock again */
4814 
4815  /*
4816  * We may lock if previous xmax aborted, or if it committed but only
4817  * locked the tuple without updating it; or if we didn't have to wait
4818  * at all for whatever reason.
4819  */
4820  if (!require_sleep ||
4821  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4824  result = TM_Ok;
4825  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4826  result = TM_Updated;
4827  else
4828  result = TM_Deleted;
4829  }
4830 
4831 failed:
4832  if (result != TM_Ok)
4833  {
4834  Assert(result == TM_SelfModified || result == TM_Updated ||
4835  result == TM_Deleted || result == TM_WouldBlock);
4836 
4837  /*
4838  * When locking a tuple under LockWaitSkip semantics and we fail with
4839  * TM_WouldBlock above, it's possible for concurrent transactions to
4840  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4841  * this assert is slightly different from the equivalent one in
4842  * heap_delete and heap_update.
4843  */
4844  Assert((result == TM_WouldBlock) ||
4845  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4846  Assert(result != TM_Updated ||
4847  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4848  tmfd->ctid = tuple->t_data->t_ctid;
4849  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4850  if (result == TM_SelfModified)
4851  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4852  else
4853  tmfd->cmax = InvalidCommandId;
4854  goto out_locked;
4855  }
4856 
4857  /*
4858  * If we didn't pin the visibility map page and the page has become all
4859  * visible while we were busy locking the buffer, or during some
4860  * subsequent window during which we had it unlocked, we'll have to unlock
4861  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4862  * unfortunate, especially since we'll now have to recheck whether the
4863  * tuple has been locked or updated under us, but hopefully it won't
4864  * happen very often.
4865  */
4866  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4867  {
4868  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4869  visibilitymap_pin(relation, block, &vmbuffer);
4871  goto l3;
4872  }
4873 
4874  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4875  old_infomask = tuple->t_data->t_infomask;
4876 
4877  /*
4878  * If this is the first possibly-multixact-able operation in the current
4879  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4880  * certain that the transaction will never become a member of any older
4881  * MultiXactIds than that. (We have to do this even if we end up just
4882  * using our own TransactionId below, since some other backend could
4883  * incorporate our XID into a MultiXact immediately afterwards.)
4884  */
4886 
4887  /*
4888  * Compute the new xmax and infomask to store into the tuple. Note we do
4889  * not modify the tuple just yet, because that would leave it in the wrong
4890  * state if multixact.c elogs.
4891  */
4892  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4893  GetCurrentTransactionId(), mode, false,
4894  &xid, &new_infomask, &new_infomask2);
4895 
4897 
4898  /*
4899  * Store transaction information of xact locking the tuple.
4900  *
4901  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4902  * possibly generating a useless combo CID. Moreover, if we're locking a
4903  * previously updated tuple, it's important to preserve the Cmax.
4904  *
4905  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4906  * we would break the HOT chain.
4907  */
4908  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4909  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4910  tuple->t_data->t_infomask |= new_infomask;
4911  tuple->t_data->t_infomask2 |= new_infomask2;
4912  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4914  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4915 
4916  /*
4917  * Make sure there is no forward chain link in t_ctid. Note that in the
4918  * cases where the tuple has been updated, we must not overwrite t_ctid,
4919  * because it was set by the updater. Moreover, if the tuple has been
4920  * updated, we need to follow the update chain to lock the new versions of
4921  * the tuple as well.
4922  */
4923  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4924  tuple->t_data->t_ctid = *tid;
4925 
4926  /* Clear only the all-frozen bit on visibility map if needed */
4927  if (PageIsAllVisible(page) &&
4928  visibilitymap_clear(relation, block, vmbuffer,
4930  cleared_all_frozen = true;
4931 
4932 
4933  MarkBufferDirty(*buffer);
4934 
4935  /*
4936  * XLOG stuff. You might think that we don't need an XLOG record because
4937  * there is no state change worth restoring after a crash. You would be
4938  * wrong however: we have just written either a TransactionId or a
4939  * MultiXactId that may never have been seen on disk before, and we need
4940  * to make sure that there are XLOG entries covering those ID numbers.
4941  * Else the same IDs might be re-used after a crash, which would be
4942  * disastrous if this page made it to disk before the crash. Essentially
4943  * we have to enforce the WAL log-before-data rule even in this case.
4944  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4945  * entries for everything anyway.)
4946  */
4947  if (RelationNeedsWAL(relation))
4948  {
4949  xl_heap_lock xlrec;
4950  XLogRecPtr recptr;
4951 
4952  XLogBeginInsert();
4953  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4954 
4955  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4956  xlrec.xmax = xid;
4957  xlrec.infobits_set = compute_infobits(new_infomask,
4958  tuple->t_data->t_infomask2);
4959  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4960  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4961 
4962  /* we don't decode row locks atm, so no need to log the origin */
4963 
4964  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4965 
4966  PageSetLSN(page, recptr);
4967  }
4968 
4969  END_CRIT_SECTION();
4970 
4971  result = TM_Ok;
4972 
4973 out_locked:
4974  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4975 
4976 out_unlocked:
4977  if (BufferIsValid(vmbuffer))
4978  ReleaseBuffer(vmbuffer);
4979 
4980  /*
4981  * Don't update the visibility map here. Locking a tuple doesn't change
4982  * visibility info.
4983  */
4984 
4985  /*
4986  * Now that we have successfully marked the tuple as locked, we can
4987  * release the lmgr tuple lock, if we had it.
4988  */
4989  if (have_tuple_lock)
4990  UnlockTupleTuplock(relation, tid, mode);
4991 
4992  return result;
4993 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:219
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5774
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7314
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4262
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:392
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:403
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:730
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1293
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:399
OffsetNumber offnum
Definition: heapam_xlog.h:398
TransactionId xmax
Definition: heapam_xlog.h:397
@ TM_WouldBlock
Definition: tableam.h:110
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2263 of file heapam.c.

2265 {
2267  HeapTuple *heaptuples;
2268  int i;
2269  int ndone;
2270  PGAlignedBlock scratch;
2271  Page page;
2272  Buffer vmbuffer = InvalidBuffer;
2273  bool needwal;
2274  Size saveFreeSpace;
2275  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2276  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2277  bool starting_with_empty_page = false;
2278  int npages = 0;
2279  int npages_used = 0;
2280 
2281  /* currently not needed (thus unsupported) for heap_multi_insert() */
2283 
2284  needwal = RelationNeedsWAL(relation);
2285  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2287 
2288  /* Toast and set header data in all the slots */
2289  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2290  for (i = 0; i < ntuples; i++)
2291  {
2292  HeapTuple tuple;
2293 
2294  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2295  slots[i]->tts_tableOid = RelationGetRelid(relation);
2296  tuple->t_tableOid = slots[i]->tts_tableOid;
2297  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2298  options);
2299  }
2300 
2301  /*
2302  * We're about to do the actual inserts -- but check for conflict first,
2303  * to minimize the possibility of having to roll back work we've just
2304  * done.
2305  *
2306  * A check here does not definitively prevent a serialization anomaly;
2307  * that check MUST be done at least past the point of acquiring an
2308  * exclusive buffer content lock on every buffer that will be affected,
2309  * and MAY be done after all inserts are reflected in the buffers and
2310  * those locks are released; otherwise there is a race condition. Since
2311  * multiple buffers can be locked and unlocked in the loop below, and it
2312  * would not be feasible to identify and lock all of those buffers before
2313  * the loop, we must do a final check at the end.
2314  *
2315  * The check here could be omitted with no loss of correctness; it is
2316  * present strictly as an optimization.
2317  *
2318  * For heap inserts, we only need to check for table-level SSI locks. Our
2319  * new tuples can't possibly conflict with existing tuple locks, and heap
2320  * page locks are only consolidated versions of tuple locks; they do not
2321  * lock "gaps" as index page locks do. So we don't need to specify a
2322  * buffer when making the call, which makes for a faster check.
2323  */
2325 
2326  ndone = 0;
2327  while (ndone < ntuples)
2328  {
2329  Buffer buffer;
2330  bool all_visible_cleared = false;
2331  bool all_frozen_set = false;
2332  int nthispage;
2333 
2335 
2336  /*
2337  * Compute number of pages needed to fit the to-be-inserted tuples in
2338  * the worst case. This will be used to determine how much to extend
2339  * the relation by in RelationGetBufferForTuple(), if needed. If we
2340  * filled a prior page from scratch, we can just update our last
2341  * computation, but if we started with a partially filled page,
2342  * recompute from scratch, the number of potentially required pages
2343  * can vary due to tuples needing to fit onto the page, page headers
2344  * etc.
2345  */
2346  if (ndone == 0 || !starting_with_empty_page)
2347  {
2348  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2349  saveFreeSpace);
2350  npages_used = 0;
2351  }
2352  else
2353  npages_used++;
2354 
2355  /*
2356  * Find buffer where at least the next tuple will fit. If the page is
2357  * all-visible, this will also pin the requisite visibility map page.
2358  *
2359  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2360  * empty page. See all_frozen_set below.
2361  */
2362  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2363  InvalidBuffer, options, bistate,
2364  &vmbuffer, NULL,
2365  npages - npages_used);
2366  page = BufferGetPage(buffer);
2367 
2368  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2369 
2370  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2371  all_frozen_set = true;
2372 
2373  /* NO EREPORT(ERROR) from here till changes are logged */
2375 
2376  /*
2377  * RelationGetBufferForTuple has ensured that the first tuple fits.
2378  * Put that on the page, and then as many other tuples as fit.
2379  */
2380  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2381 
2382  /*
2383  * For logical decoding we need combo CIDs to properly decode the
2384  * catalog.
2385  */
2386  if (needwal && need_cids)
2387  log_heap_new_cid(relation, heaptuples[ndone]);
2388 
2389  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2390  {
2391  HeapTuple heaptup = heaptuples[ndone + nthispage];
2392 
2393  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2394  break;
2395 
2396  RelationPutHeapTuple(relation, buffer, heaptup, false);
2397 
2398  /*
2399  * For logical decoding we need combo CIDs to properly decode the
2400  * catalog.
2401  */
2402  if (needwal && need_cids)
2403  log_heap_new_cid(relation, heaptup);
2404  }
2405 
2406  /*
2407  * If the page is all visible, need to clear that, unless we're only
2408  * going to add further frozen rows to it.
2409  *
2410  * If we're only adding already frozen rows to a previously empty
2411  * page, mark it as all-visible.
2412  */
2413  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2414  {
2415  all_visible_cleared = true;
2416  PageClearAllVisible(page);
2417  visibilitymap_clear(relation,
2418  BufferGetBlockNumber(buffer),
2419  vmbuffer, VISIBILITYMAP_VALID_BITS);
2420  }
2421  else if (all_frozen_set)
2422  PageSetAllVisible(page);
2423 
2424  /*
2425  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2426  */
2427 
2428  MarkBufferDirty(buffer);
2429 
2430  /* XLOG stuff */
2431  if (needwal)
2432  {
2433  XLogRecPtr recptr;
2434  xl_heap_multi_insert *xlrec;
2436  char *tupledata;
2437  int totaldatalen;
2438  char *scratchptr = scratch.data;
2439  bool init;
2440  int bufflags = 0;
2441 
2442  /*
2443  * If the page was previously empty, we can reinit the page
2444  * instead of restoring the whole thing.
2445  */
2446  init = starting_with_empty_page;
2447 
2448  /* allocate xl_heap_multi_insert struct from the scratch area */
2449  xlrec = (xl_heap_multi_insert *) scratchptr;
2450  scratchptr += SizeOfHeapMultiInsert;
2451 
2452  /*
2453  * Allocate offsets array. Unless we're reinitializing the page,
2454  * in that case the tuples are stored in order starting at
2455  * FirstOffsetNumber and we don't need to store the offsets
2456  * explicitly.
2457  */
2458  if (!init)
2459  scratchptr += nthispage * sizeof(OffsetNumber);
2460 
2461  /* the rest of the scratch space is used for tuple data */
2462  tupledata = scratchptr;
2463 
2464  /* check that the mutually exclusive flags are not both set */
2465  Assert(!(all_visible_cleared && all_frozen_set));
2466 
2467  xlrec->flags = 0;
2468  if (all_visible_cleared)
2470  if (all_frozen_set)
2472 
2473  xlrec->ntuples = nthispage;
2474 
2475  /*
2476  * Write out an xl_multi_insert_tuple and the tuple data itself
2477  * for each tuple.
2478  */
2479  for (i = 0; i < nthispage; i++)
2480  {
2481  HeapTuple heaptup = heaptuples[ndone + i];
2482  xl_multi_insert_tuple *tuphdr;
2483  int datalen;
2484 
2485  if (!init)
2486  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2487  /* xl_multi_insert_tuple needs two-byte alignment. */
2488  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2489  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2490 
2491  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2492  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2493  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2494 
2495  /* write bitmap [+ padding] [+ oid] + data */
2496  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2497  memcpy(scratchptr,
2498  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2499  datalen);
2500  tuphdr->datalen = datalen;
2501  scratchptr += datalen;
2502  }
2503  totaldatalen = scratchptr - tupledata;
2504  Assert((scratchptr - scratch.data) < BLCKSZ);
2505 
2506  if (need_tuple_data)
2508 
2509  /*
2510  * Signal that this is the last xl_heap_multi_insert record
2511  * emitted by this call to heap_multi_insert(). Needed for logical
2512  * decoding so it knows when to cleanup temporary data.
2513  */
2514  if (ndone + nthispage == ntuples)
2515  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2516 
2517  if (init)
2518  {
2519  info |= XLOG_HEAP_INIT_PAGE;
2520  bufflags |= REGBUF_WILL_INIT;
2521  }
2522 
2523  /*
2524  * If we're doing logical decoding, include the new tuple data
2525  * even if we take a full-page image of the page.
2526  */
2527  if (need_tuple_data)
2528  bufflags |= REGBUF_KEEP_DATA;
2529 
2530  XLogBeginInsert();
2531  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2532  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2533 
2534  XLogRegisterBufData(0, tupledata, totaldatalen);
2535 
2536  /* filtering by origin on a row level is much more efficient */
2538 
2539  recptr = XLogInsert(RM_HEAP2_ID, info);
2540 
2541  PageSetLSN(page, recptr);
2542  }
2543 
2544  END_CRIT_SECTION();
2545 
2546  /*
2547  * If we've frozen everything on the page, update the visibilitymap.
2548  * We're already holding pin on the vmbuffer.
2549  */
2550  if (all_frozen_set)
2551  {
2552  Assert(PageIsAllVisible(page));
2553  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2554 
2555  /*
2556  * It's fine to use InvalidTransactionId here - this is only used
2557  * when HEAP_INSERT_FROZEN is specified, which intentionally
2558  * violates visibility rules.
2559  */
2560  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2561  InvalidXLogRecPtr, vmbuffer,
2564  }
2565 
2566  UnlockReleaseBuffer(buffer);
2567  ndone += nthispage;
2568 
2569  /*
2570  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2571  * likely that we'll insert into subsequent heap pages that are likely
2572  * to use the same vm page.
2573  */
2574  }
2575 
2576  /* We're done with inserting all tuples, so release the last vmbuffer. */
2577  if (vmbuffer != InvalidBuffer)
2578  ReleaseBuffer(vmbuffer);
2579 
2580  /*
2581  * We're done with the actual inserts. Check for conflicts again, to
2582  * ensure that all rw-conflicts in to these inserts are detected. Without
2583  * this final check, a sequential scan of the heap may have locked the
2584  * table after the "before" check, missing one opportunity to detect the
2585  * conflict, and then scanned the table before the new tuples were there,
2586  * missing the other chance to detect the conflict.
2587  *
2588  * For heap inserts, we only need to check for table-level SSI locks. Our
2589  * new tuples can't possibly conflict with existing tuple locks, and heap
2590  * page locks are only consolidated versions of tuple locks; they do not
2591  * lock "gaps" as index page locks do. So we don't need to specify a
2592  * buffer when making the call.
2593  */
2595 
2596  /*
2597  * If tuples are cachable, mark them for invalidation from the caches in
2598  * case we abort. Note it is OK to do this after releasing the buffer,
2599  * because the heaptuples data structure is all in local memory, not in
2600  * the shared buffer.
2601  */
2602  if (IsCatalogRelation(relation))
2603  {
2604  for (i = 0; i < ntuples; i++)
2605  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2606  }
2607 
2608  /* copy t_self fields back to the caller's slots */
2609  for (i = 0; i < ntuples; i++)
2610  slots[i]->tts_tid = heaptuples[i]->t_self;
2611 
2612  pgstat_count_heap_insert(relation, ntuples);
2613 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
#define MAXALIGN(LEN)
Definition: c.h:811
#define SHORTALIGN(LEN)
Definition: c.h:807
size_t Size
Definition: c.h:605
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2231
#define HEAP_INSERT_FROZEN
Definition: heapam.h:36
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:187
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:63
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:72
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:78
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:198
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:184
char data[BLCKSZ]
Definition: c.h:1119
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 350 of file pruneheap.c.

359 {
360  Page page = BufferGetPage(buffer);
361  BlockNumber blockno = BufferGetBlockNumber(buffer);
362  OffsetNumber offnum,
363  maxoff;
364  PruneState prstate;
365  HeapTupleData tup;
366  bool do_freeze;
367  bool do_prune;
368  bool do_hint;
369  bool hint_bit_fpi;
370  int64 fpi_before = pgWalUsage.wal_fpi;
371 
372  /* Copy parameters to prstate */
373  prstate.vistest = vistest;
375  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
376  prstate.cutoffs = cutoffs;
377 
378  /*
379  * Our strategy is to scan the page and make lists of items to change,
380  * then apply the changes within a critical section. This keeps as much
381  * logic as possible out of the critical section, and also ensures that
382  * WAL replay will work the same as the normal case.
383  *
384  * First, initialize the new pd_prune_xid value to zero (indicating no
385  * prunable tuples). If we find any tuples which may soon become
386  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
387  * initialize the rest of our working state.
388  */
391  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
392  prstate.nroot_items = 0;
393  prstate.nheaponly_items = 0;
394 
395  /* initialize page freezing working state */
396  prstate.pagefrz.freeze_required = false;
397  if (prstate.freeze)
398  {
399  Assert(new_relfrozen_xid && new_relmin_mxid);
400  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
401  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
402  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
403  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
404  }
405  else
406  {
407  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
412  }
413 
414  prstate.ndeleted = 0;
415  prstate.live_tuples = 0;
416  prstate.recently_dead_tuples = 0;
417  prstate.hastup = false;
418  prstate.lpdead_items = 0;
419  prstate.deadoffsets = presult->deadoffsets;
420 
421  /*
422  * Caller may update the VM after we're done. We can keep track of
423  * whether the page will be all-visible and all-frozen after pruning and
424  * freezing to help the caller to do that.
425  *
426  * Currently, only VACUUM sets the VM bits. To save the effort, only do
427  * the bookkeeping if the caller needs it. Currently, that's tied to
428  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
429  * to update the VM bits without also freezing or freeze without also
430  * setting the VM bits.
431  *
432  * In addition to telling the caller whether it can set the VM bit, we
433  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
434  * the whole page would become frozen, we consider opportunistically
435  * freezing tuples. We will not be able to freeze the whole page if there
436  * are tuples present that are not visible to everyone or if there are
437  * dead tuples which are not yet removable. However, dead tuples which
438  * will be removed by the end of vacuuming should not preclude us from
439  * opportunistically freezing. Because of that, we do not clear
440  * all_visible when we see LP_DEAD items. We fix that at the end of the
441  * function, when we return the value to the caller, so that the caller
442  * doesn't set the VM bit incorrectly.
443  */
444  if (prstate.freeze)
445  {
446  prstate.all_visible = true;
447  prstate.all_frozen = true;
448  }
449  else
450  {
451  /*
452  * Initializing to false allows skipping the work to update them in
453  * heap_prune_record_unchanged_lp_normal().
454  */
455  prstate.all_visible = false;
456  prstate.all_frozen = false;
457  }
458 
459  /*
460  * The visibility cutoff xid is the newest xmin of live tuples on the
461  * page. In the common case, this will be set as the conflict horizon the
462  * caller can use for updating the VM. If, at the end of freezing and
463  * pruning, the page is all-frozen, there is no possibility that any
464  * running transaction on the standby does not see tuples on the page as
465  * all-visible, so the conflict horizon remains InvalidTransactionId.
466  */
468 
469  maxoff = PageGetMaxOffsetNumber(page);
470  tup.t_tableOid = RelationGetRelid(relation);
471 
472  /*
473  * Determine HTSV for all tuples, and queue them up for processing as HOT
474  * chain roots or as heap-only items.
475  *
476  * Determining HTSV only once for each tuple is required for correctness,
477  * to deal with cases where running HTSV twice could result in different
478  * results. For example, RECENTLY_DEAD can turn to DEAD if another
479  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
480  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
481  * transaction aborts.
482  *
483  * It's also good for performance. Most commonly tuples within a page are
484  * stored at decreasing offsets (while the items are stored at increasing
485  * offsets). When processing all tuples on a page this leads to reading
486  * memory at decreasing offsets within a page, with a variable stride.
487  * That's hard for CPU prefetchers to deal with. Processing the items in
488  * reverse order (and thus the tuples in increasing order) increases
489  * prefetching efficiency significantly / decreases the number of cache
490  * misses.
491  */
492  for (offnum = maxoff;
493  offnum >= FirstOffsetNumber;
494  offnum = OffsetNumberPrev(offnum))
495  {
496  ItemId itemid = PageGetItemId(page, offnum);
497  HeapTupleHeader htup;
498 
499  /*
500  * Set the offset number so that we can display it along with any
501  * error that occurred while processing this tuple.
502  */
503  *off_loc = offnum;
504 
505  prstate.processed[offnum] = false;
506  prstate.htsv[offnum] = -1;
507 
508  /* Nothing to do if slot doesn't contain a tuple */
509  if (!ItemIdIsUsed(itemid))
510  {
511  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
512  continue;
513  }
514 
515  if (ItemIdIsDead(itemid))
516  {
517  /*
518  * If the caller set mark_unused_now true, we can set dead line
519  * pointers LP_UNUSED now.
520  */
521  if (unlikely(prstate.mark_unused_now))
522  heap_prune_record_unused(&prstate, offnum, false);
523  else
524  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
525  continue;
526  }
527 
528  if (ItemIdIsRedirected(itemid))
529  {
530  /* This is the start of a HOT chain */
531  prstate.root_items[prstate.nroot_items++] = offnum;
532  continue;
533  }
534 
535  Assert(ItemIdIsNormal(itemid));
536 
537  /*
538  * Get the tuple's visibility status and queue it up for processing.
539  */
540  htup = (HeapTupleHeader) PageGetItem(page, itemid);
541  tup.t_data = htup;
542  tup.t_len = ItemIdGetLength(itemid);
543  ItemPointerSet(&tup.t_self, blockno, offnum);
544 
545  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
546  buffer);
547 
548  if (!HeapTupleHeaderIsHeapOnly(htup))
549  prstate.root_items[prstate.nroot_items++] = offnum;
550  else
551  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
552  }
553 
554  /*
555  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
556  * an FPI to be emitted.
557  */
558  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
559 
560  /*
561  * Process HOT chains.
562  *
563  * We added the items to the array starting from 'maxoff', so by
564  * processing the array in reverse order, we process the items in
565  * ascending offset number order. The order doesn't matter for
566  * correctness, but some quick micro-benchmarking suggests that this is
567  * faster. (Earlier PostgreSQL versions, which scanned all the items on
568  * the page instead of using the root_items array, also did it in
569  * ascending offset number order.)
570  */
571  for (int i = prstate.nroot_items - 1; i >= 0; i--)
572  {
573  offnum = prstate.root_items[i];
574 
575  /* Ignore items already processed as part of an earlier chain */
576  if (prstate.processed[offnum])
577  continue;
578 
579  /* see preceding loop */
580  *off_loc = offnum;
581 
582  /* Process this item or chain of items */
583  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
584  }
585 
586  /*
587  * Process any heap-only tuples that were not already processed as part of
588  * a HOT chain.
589  */
590  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
591  {
592  offnum = prstate.heaponly_items[i];
593 
594  if (prstate.processed[offnum])
595  continue;
596 
597  /* see preceding loop */
598  *off_loc = offnum;
599 
600  /*
601  * If the tuple is DEAD and doesn't chain to anything else, mark it
602  * unused. (If it does chain, we can only remove it as part of
603  * pruning its chain.)
604  *
605  * We need this primarily to handle aborted HOT updates, that is,
606  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607  * chain, since the parent tuple might be re-updated before any
608  * pruning occurs. So we have to be able to reap them separately from
609  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610  * return true for an XMIN_INVALID tuple, so this code will work even
611  * when there were sequential updates within the aborted transaction.)
612  */
613  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
614  {
615  ItemId itemid = PageGetItemId(page, offnum);
616  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617 
619  {
621  &prstate.latest_xid_removed);
622  heap_prune_record_unused(&prstate, offnum, true);
623  }
624  else
625  {
626  /*
627  * This tuple should've been processed and removed as part of
628  * a HOT chain, so something's wrong. To preserve evidence,
629  * we don't dare to remove it. We cannot leave behind a DEAD
630  * tuple either, because that will cause VACUUM to error out.
631  * Throwing an error with a distinct error message seems like
632  * the least bad option.
633  */
634  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635  blockno, offnum);
636  }
637  }
638  else
639  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
640  }
641 
642  /* We should now have processed every tuple exactly once */
643 #ifdef USE_ASSERT_CHECKING
644  for (offnum = FirstOffsetNumber;
645  offnum <= maxoff;
646  offnum = OffsetNumberNext(offnum))
647  {
648  *off_loc = offnum;
649 
650  Assert(prstate.processed[offnum]);
651  }
652 #endif
653 
654  /* Clear the offset information once we have processed the given page. */
655  *off_loc = InvalidOffsetNumber;
656 
657  do_prune = prstate.nredirected > 0 ||
658  prstate.ndead > 0 ||
659  prstate.nunused > 0;
660 
661  /*
662  * Even if we don't prune anything, if we found a new value for the
663  * pd_prune_xid field or the page was marked full, we will update the hint
664  * bit.
665  */
666  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
667  PageIsFull(page);
668 
669  /*
670  * Decide if we want to go ahead with freezing according to the freeze
671  * plans we prepared, or not.
672  */
673  do_freeze = false;
674  if (prstate.freeze)
675  {
676  if (prstate.pagefrz.freeze_required)
677  {
678  /*
679  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
680  * from before FreezeLimit/MultiXactCutoff is present. Must
681  * freeze to advance relfrozenxid/relminmxid.
682  */
683  do_freeze = true;
684  }
685  else
686  {
687  /*
688  * Opportunistically freeze the page if we are generating an FPI
689  * anyway and if doing so means that we can set the page
690  * all-frozen afterwards (might not happen until VACUUM's final
691  * heap pass).
692  *
693  * XXX: Previously, we knew if pruning emitted an FPI by checking
694  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
695  * and prune records were combined, this heuristic couldn't be
696  * used anymore. The opportunistic freeze heuristic must be
697  * improved; however, for now, try to approximate the old logic.
698  */
699  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
700  {
701  /*
702  * Freezing would make the page all-frozen. Have already
703  * emitted an FPI or will do so anyway?
704  */
705  if (RelationNeedsWAL(relation))
706  {
707  if (hint_bit_fpi)
708  do_freeze = true;
709  else if (do_prune)
710  {
711  if (XLogCheckBufferNeedsBackup(buffer))
712  do_freeze = true;
713  }
714  else if (do_hint)
715  {
717  do_freeze = true;
718  }
719  }
720  }
721  }
722  }
723 
724  if (do_freeze)
725  {
726  /*
727  * Validate the tuples we will be freezing before entering the
728  * critical section.
729  */
730  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
731  }
732  else if (prstate.nfrozen > 0)
733  {
734  /*
735  * The page contained some tuples that were not already frozen, and we
736  * chose not to freeze them now. The page won't be all-frozen then.
737  */
738  Assert(!prstate.pagefrz.freeze_required);
739 
740  prstate.all_frozen = false;
741  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
742  }
743  else
744  {
745  /*
746  * We have no freeze plans to execute. The page might already be
747  * all-frozen (perhaps only following pruning), though. Such pages
748  * can be marked all-frozen in the VM by our caller, even though none
749  * of its tuples were newly frozen here.
750  */
751  }
752 
753  /* Any error while applying the changes is critical */
755 
756  if (do_hint)
757  {
758  /*
759  * Update the page's pd_prune_xid field to either zero, or the lowest
760  * XID of any soon-prunable tuple.
761  */
762  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
763 
764  /*
765  * Also clear the "page is full" flag, since there's no point in
766  * repeating the prune/defrag process until something else happens to
767  * the page.
768  */
769  PageClearFull(page);
770 
771  /*
772  * If that's all we had to do to the page, this is a non-WAL-logged
773  * hint. If we are going to freeze or prune the page, we will mark
774  * the buffer dirty below.
775  */
776  if (!do_freeze && !do_prune)
777  MarkBufferDirtyHint(buffer, true);
778  }
779 
780  if (do_prune || do_freeze)
781  {
782  /* Apply the planned item changes and repair page fragmentation. */
783  if (do_prune)
784  {
785  heap_page_prune_execute(buffer, false,
786  prstate.redirected, prstate.nredirected,
787  prstate.nowdead, prstate.ndead,
788  prstate.nowunused, prstate.nunused);
789  }
790 
791  if (do_freeze)
792  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
793 
794  MarkBufferDirty(buffer);
795 
796  /*
797  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
798  */
799  if (RelationNeedsWAL(relation))
800  {
801  /*
802  * The snapshotConflictHorizon for the whole record should be the
803  * most conservative of all the horizons calculated for any of the
804  * possible modifications. If this record will prune tuples, any
805  * transactions on the standby older than the youngest xmax of the
806  * most recently removed tuple this record will prune will
807  * conflict. If this record will freeze tuples, any transactions
808  * on the standby with xids older than the youngest tuple this
809  * record will freeze will conflict.
810  */
811  TransactionId frz_conflict_horizon = InvalidTransactionId;
812  TransactionId conflict_xid;
813 
814  /*
815  * We can use the visibility_cutoff_xid as our cutoff for
816  * conflicts when the whole page is eligible to become all-frozen
817  * in the VM once we're done with it. Otherwise we generate a
818  * conservative cutoff by stepping back from OldestXmin.
819  */
820  if (do_freeze)
821  {
822  if (prstate.all_visible && prstate.all_frozen)
823  frz_conflict_horizon = prstate.visibility_cutoff_xid;
824  else
825  {
826  /* Avoids false conflicts when hot_standby_feedback in use */
827  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
828  TransactionIdRetreat(frz_conflict_horizon);
829  }
830  }
831 
832  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
833  conflict_xid = frz_conflict_horizon;
834  else
835  conflict_xid = prstate.latest_xid_removed;
836 
837  log_heap_prune_and_freeze(relation, buffer,
838  conflict_xid,
839  true, reason,
840  prstate.frozen, prstate.nfrozen,
841  prstate.redirected, prstate.nredirected,
842  prstate.nowdead, prstate.ndead,
843  prstate.nowunused, prstate.nunused);
844  }
845  }
846 
848 
849  /* Copy information back for caller */
850  presult->ndeleted = prstate.ndeleted;
851  presult->nnewlpdead = prstate.ndead;
852  presult->nfrozen = prstate.nfrozen;
853  presult->live_tuples = prstate.live_tuples;
854  presult->recently_dead_tuples = prstate.recently_dead_tuples;
855 
856  /*
857  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
858  * make the choice of whether or not to freeze the page unaffected by the
859  * short-term presence of LP_DEAD items. These LP_DEAD items were
860  * effectively assumed to be LP_UNUSED items in the making. It doesn't
861  * matter which vacuum heap pass (initial pass or final pass) ends up
862  * setting the page all-frozen, as long as the ongoing VACUUM does it.
863  *
864  * Now that freezing has been finalized, unset all_visible if there are
865  * any LP_DEAD items on the page. It needs to reflect the present state
866  * of the page, as expected by our caller.
867  */
868  if (prstate.all_visible && prstate.lpdead_items == 0)
869  {
870  presult->all_visible = prstate.all_visible;
871  presult->all_frozen = prstate.all_frozen;
872  }
873  else
874  {
875  presult->all_visible = false;
876  presult->all_frozen = false;
877  }
878 
879  presult->hastup = prstate.hastup;
880 
881  /*
882  * For callers planning to update the visibility map, the conflict horizon
883  * for that record must be the newest xmin on the page. However, if the
884  * page is completely frozen, there can be no conflict and the
885  * vm_conflict_horizon should remain InvalidTransactionId. This includes
886  * the case that we just froze all the tuples; the prune-freeze record
887  * included the conflict XID already so the caller doesn't need it.
888  */
889  if (presult->all_frozen)
891  else
892  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
893 
894  presult->lpdead_items = prstate.lpdead_items;
895  /* the presult->deadoffsets array was already filled in */
896 
897  if (prstate.freeze)
898  {
899  if (presult->nfrozen > 0)
900  {
901  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
902  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
903  }
904  else
905  {
906  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
907  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
908  }
909  }
910 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:5000
PageHeaderData * PageHeader
Definition: bufpage.h:173
static void PageClearFull(Page page)
Definition: bufpage.h:423
static bool PageIsFull(Page page)
Definition: bufpage.h:413
#define likely(x)
Definition: c.h:310
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6900
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6847
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:42
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:41
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:999
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1508
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1297
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1330
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1319
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:917
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1561
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:219
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:207
bool freeze_required
Definition: heapam.h:181
MultiXactId FreezePageRelminMxid
Definition: heapam.h:208
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:218
int recently_dead_tuples
Definition: heapam.h:234
TransactionId vm_conflict_horizon
Definition: heapam.h:249
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:263
bool all_visible
Definition: heapam.h:247
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:267
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert, BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1561 of file pruneheap.c.

1565 {
1566  Page page = (Page) BufferGetPage(buffer);
1567  OffsetNumber *offnum;
1569 
1570  /* Shouldn't be called unless there's something to do */
1571  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1572 
1573  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1574  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1575 
1576  /* Update all redirected line pointers */
1577  offnum = redirected;
1578  for (int i = 0; i < nredirected; i++)
1579  {
1580  OffsetNumber fromoff = *offnum++;
1581  OffsetNumber tooff = *offnum++;
1582  ItemId fromlp = PageGetItemId(page, fromoff);
1584 
1585 #ifdef USE_ASSERT_CHECKING
1586 
1587  /*
1588  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1589  * must be the first item from a HOT chain. If the item has tuple
1590  * storage then it can't be a heap-only tuple. Otherwise we are just
1591  * maintaining an existing LP_REDIRECT from an existing HOT chain that
1592  * has been pruned at least once before now.
1593  */
1594  if (!ItemIdIsRedirected(fromlp))
1595  {
1596  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1597 
1598  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1600  }
1601  else
1602  {
1603  /* We shouldn't need to redundantly set the redirect */
1604  Assert(ItemIdGetRedirect(fromlp) != tooff);
1605  }
1606 
1607  /*
1608  * The item that we're about to set as an LP_REDIRECT (the 'from'
1609  * item) will point to an existing item (the 'to' item) that is
1610  * already a heap-only tuple. There can be at most one LP_REDIRECT
1611  * item per HOT chain.
1612  *
1613  * We need to keep around an LP_REDIRECT item (after original
1614  * non-heap-only root tuple gets pruned away) so that it's always
1615  * possible for VACUUM to easily figure out what TID to delete from
1616  * indexes when an entire HOT chain becomes dead. A heap-only tuple
1617  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1618  * tuple can.
1619  *
1620  * This check may miss problems, e.g. the target of a redirect could
1621  * be marked as unused subsequently. The page_verify_redirects() check
1622  * below will catch such problems.
1623  */
1624  tolp = PageGetItemId(page, tooff);
1625  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1626  htup = (HeapTupleHeader) PageGetItem(page, tolp);
1628 #endif
1629 
1630  ItemIdSetRedirect(fromlp, tooff);
1631  }
1632 
1633  /* Update all now-dead line pointers */
1634  offnum = nowdead;
1635  for (int i = 0; i < ndead; i++)
1636  {
1637  OffsetNumber off = *offnum++;
1638  ItemId lp = PageGetItemId(page, off);
1639 
1640 #ifdef USE_ASSERT_CHECKING
1641 
1642  /*
1643  * An LP_DEAD line pointer must be left behind when the original item
1644  * (which is dead to everybody) could still be referenced by a TID in
1645  * an index. This should never be necessary with any individual
1646  * heap-only tuple item, though. (It's not clear how much of a problem
1647  * that would be, but there is no reason to allow it.)
1648  */
1649  if (ItemIdHasStorage(lp))
1650  {
1651  Assert(ItemIdIsNormal(lp));
1652  htup = (HeapTupleHeader) PageGetItem(page, lp);
1654  }
1655  else
1656  {
1657  /* Whole HOT chain becomes dead */
1659  }
1660 #endif
1661 
1662  ItemIdSetDead(lp);
1663  }
1664 
1665  /* Update all now-unused line pointers */
1666  offnum = nowunused;
1667  for (int i = 0; i < nunused; i++)
1668  {
1669  OffsetNumber off = *offnum++;
1670  ItemId lp = PageGetItemId(page, off);
1671 
1672 #ifdef USE_ASSERT_CHECKING
1673 
1674  if (lp_truncate_only)
1675  {
1676  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1677  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1678  }
1679  else
1680  {
1681  /*
1682  * When heap_page_prune_and_freeze() was called, mark_unused_now
1683  * may have been passed as true, which allows would-be LP_DEAD
1684  * items to be made LP_UNUSED instead. This is only possible if
1685  * the relation has no indexes. If there are any dead items, then
1686  * mark_unused_now was not true and every item being marked
1687  * LP_UNUSED must refer to a heap-only tuple.
1688  */
1689  if (ndead > 0)
1690  {
1692  htup = (HeapTupleHeader) PageGetItem(page, lp);
1694  }
1695  else
1696  Assert(ItemIdIsUsed(lp));
1697  }
1698 
1699 #endif
1700 
1701  ItemIdSetUnused(lp);
1702  }
1703 
1704  if (lp_truncate_only)
1706  else
1707  {
1708  /*
1709  * Finally, repair any fragmentation, and update the page's hint bit
1710  * about whether it has free pointers.
1711  */
1713 
1714  /*
1715  * Now that the page has been modified, assert that redirect items
1716  * still point to valid targets.
1717  */
1718  page_verify_redirects(page);
1719  }
1720 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1737

References Assert, BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194 {
195  Page page = BufferGetPage(buffer);
196  TransactionId prune_xid;
197  GlobalVisState *vistest;
198  Size minfree;
199 
200  /*
201  * We can't write WAL in recovery mode, so there's no point trying to
202  * clean the page. The primary will likely issue a cleaning WAL record
203  * soon anyway, so this is no particular loss.
204  */
205  if (RecoveryInProgress())
206  return;
207 
208  /*
209  * First check whether there's any chance there's something to prune,
210  * determining the appropriate horizon is a waste if there's no prune_xid
211  * (i.e. no updates/deletes left potentially dead tuples around).
212  */
213  prune_xid = ((PageHeader) page)->pd_prune_xid;
214  if (!TransactionIdIsValid(prune_xid))
215  return;
216 
217  /*
218  * Check whether prune_xid indicates that there may be dead rows that can
219  * be cleaned up.
220  */
221  vistest = GlobalVisTestFor(relation);
222 
223  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224  return;
225 
226  /*
227  * We prune when a previous UPDATE failed to find enough space on the page
228  * for a new tuple version, or when free space falls below the relation's
229  * fill-factor target (but not less than 10%).
230  *
231  * Checking free space here is questionable since we aren't holding any
232  * lock on the buffer; in the worst case we could get a bogus answer. It's
233  * unlikely to be *seriously* wrong, though, since reading either pd_lower
234  * or pd_upper is probably atomic. Avoiding taking a lock seems more
235  * important than sometimes getting a wrong answer in what is after all
236  * just a heuristic estimate.
237  */
238  minfree = RelationGetTargetPageFreeSpace(relation,
240  minfree = Max(minfree, BLCKSZ / 10);
241 
242  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243  {
244  /* OK, try to get exclusive buffer lock */
245  if (!ConditionalLockBufferForCleanup(buffer))
246  return;
247 
248  /*
249  * Now that we have buffer lock, get accurate information about the
250  * page's free space, and recheck the heuristic about whether to
251  * prune.
252  */
253  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254  {
255  OffsetNumber dummy_off_loc;
256  PruneFreezeResult presult;
257 
258  /*
259  * For now, pass mark_unused_now as false regardless of whether or
260  * not the relation has indexes, since we cannot safely determine
261  * that during on-access pruning with the current implementation.
262  */
263  heap_page_prune_and_freeze(relation, buffer, vistest, 0,
264  NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
265 
266  /*
267  * Report the number of tuples reclaimed to pgstats. This is
268  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
269  *
270  * We derive the number of dead tuples like this to avoid totally
271  * forgetting about items that were set to LP_DEAD, since they
272  * still need to be cleaned up by VACUUM. We only want to count
273  * heap-only tuples that just became LP_UNUSED in our report,
274  * which don't.
275  *
276  * VACUUM doesn't have to compensate in the same way when it
277  * tracks ndeleted, since it will set the same LP_DEAD items to
278  * LP_UNUSED separately.
279  */
280  if (presult.ndeleted > presult.nnewlpdead)
282  presult.ndeleted - presult.nnewlpdead);
283  }
284 
285  /* And release buffer lock */
287 
288  /*
289  * We avoid reuse of any free space created on the page by unrelated
290  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
291  * free space should be reused by UPDATEs to *this* page.
292  */
293  }
294 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5412
#define Max(x, y)
Definition: c.h:998
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4268
void heap_page_prune_and_freeze(Relation relation, Buffer buffer, GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
Definition: pruneheap.c:350
bool RecoveryInProgress(void)
Definition: xlog.c:6304

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune_and_freeze(), LockBuffer(), Max, PruneFreezeResult::ndeleted, PruneFreezeResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), PRUNE_ON_ACCESS, RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heap_prepare_pagescan(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_pre_freeze_checks()

void heap_pre_freeze_checks ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6847 of file heapam.c.

6849 {
6850  Page page = BufferGetPage(buffer);
6851 
6852  for (int i = 0; i < ntuples; i++)
6853  {
6854  HeapTupleFreeze *frz = tuples + i;
6855  ItemId itemid = PageGetItemId(page, frz->offset);
6856  HeapTupleHeader htup;
6857 
6858  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6859 
6860  /* Deliberately avoid relying on tuple hint bits here */
6862  {
6864 
6866  if (unlikely(!TransactionIdDidCommit(xmin)))
6867  ereport(ERROR,
6869  errmsg_internal("uncommitted xmin %u needs to be frozen",
6870  xmin)));
6871  }
6872 
6873  /*
6874  * TransactionIdDidAbort won't work reliably in the presence of XIDs
6875  * left behind by transactions that were in progress during a crash,
6876  * so we can only check that xmax didn't commit
6877  */
6879  {
6881 
6883  if (unlikely(TransactionIdDidCommit(xmax)))
6884  ereport(ERROR,
6886  errmsg_internal("cannot freeze committed xmax %u",
6887  xmax)));
6888  }
6889  }
6890 }
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:137
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:136
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
uint8 checkflags
Definition: heapam.h:149
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42

References Assert, BufferGetPage(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), TransactionIdDidCommit(), TransactionIdIsNormal, and unlikely.

Referenced by heap_page_prune_and_freeze().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)

Definition at line 6550 of file heapam.c.

6554 {
6555  bool xmin_already_frozen = false,
6556  xmax_already_frozen = false;
6557  bool freeze_xmin = false,
6558  replace_xvac = false,
6559  replace_xmax = false,
6560  freeze_xmax = false;
6561  TransactionId xid;
6562 
6563  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6564  frz->t_infomask2 = tuple->t_infomask2;
6565  frz->t_infomask = tuple->t_infomask;
6566  frz->frzflags = 0;
6567  frz->checkflags = 0;
6568 
6569  /*
6570  * Process xmin, while keeping track of whether it's already frozen, or
6571  * will become frozen iff our freeze plan is executed by caller (could be
6572  * neither).
6573  */
6574  xid = HeapTupleHeaderGetXmin(tuple);
6575  if (!TransactionIdIsNormal(xid))
6576  xmin_already_frozen = true;
6577  else
6578  {
6579  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6580  ereport(ERROR,
6582  errmsg_internal("found xmin %u from before relfrozenxid %u",
6583  xid, cutoffs->relfrozenxid)));
6584 
6585  /* Will set freeze_xmin flags in freeze plan below */
6586  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6587 
6588  /* Verify that xmin committed if and when freeze plan is executed */
6589  if (freeze_xmin)
6591  }
6592 
6593  /*
6594  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6595  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6596  */
6597  xid = HeapTupleHeaderGetXvac(tuple);
6598  if (TransactionIdIsNormal(xid))
6599  {
6601  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6602 
6603  /*
6604  * For Xvac, we always freeze proactively. This allows totally_frozen
6605  * tracking to ignore xvac.
6606  */
6607  replace_xvac = pagefrz->freeze_required = true;
6608 
6609  /* Will set replace_xvac flags in freeze plan below */
6610  }
6611 
6612  /* Now process xmax */
6613  xid = frz->xmax;
6614  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6615  {
6616  /* Raw xmax is a MultiXactId */
6617  TransactionId newxmax;
6618  uint16 flags;
6619 
6620  /*
6621  * We will either remove xmax completely (in the "freeze_xmax" path),
6622  * process xmax by replacing it (in the "replace_xmax" path), or
6623  * perform no-op xmax processing. The only constraint is that the
6624  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6625  */
6626  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6627  &flags, pagefrz);
6628 
6629  if (flags & FRM_NOOP)
6630  {
6631  /*
6632  * xmax is a MultiXactId, and nothing about it changes for now.
6633  * This is the only case where 'freeze_required' won't have been
6634  * set for us by FreezeMultiXactId, as well as the only case where
6635  * neither freeze_xmax nor replace_xmax are set (given a multi).
6636  *
6637  * This is a no-op, but the call to FreezeMultiXactId might have
6638  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6639  * for us (the "freeze page" variants, specifically). That'll
6640  * make it safe for our caller to freeze the page later on, while
6641  * leaving this particular xmax undisturbed.
6642  *
6643  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6644  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6645  * job. A call to heap_tuple_should_freeze for this same tuple
6646  * will take place below if 'freeze_required' isn't set already.
6647  * (This repeats work from FreezeMultiXactId, but allows "no
6648  * freeze" tracker maintenance to happen in only one place.)
6649  */
6650  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6651  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6652  }
6653  else if (flags & FRM_RETURN_IS_XID)
6654  {
6655  /*
6656  * xmax will become an updater Xid (original MultiXact's updater
6657  * member Xid will be carried forward as a simple Xid in Xmax).
6658  */
6659  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
6660 
6661  /*
6662  * NB -- some of these transformations are only valid because we
6663  * know the return Xid is a tuple updater (i.e. not merely a
6664  * locker.) Also note that the only reason we don't explicitly
6665  * worry about HEAP_KEYS_UPDATED is because it lives in
6666  * t_infomask2 rather than t_infomask.
6667  */
6668  frz->t_infomask &= ~HEAP_XMAX_BITS;
6669  frz->xmax = newxmax;
6670  if (flags & FRM_MARK_COMMITTED)
6672  replace_xmax = true;
6673  }
6674  else if (flags & FRM_RETURN_IS_MULTI)
6675  {
6676  uint16 newbits;
6677  uint16 newbits2;
6678 
6679  /*
6680  * xmax is an old MultiXactId that we have to replace with a new
6681  * MultiXactId, to carry forward two or more original member XIDs.
6682  */
6683  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
6684 
6685  /*
6686  * We can't use GetMultiXactIdHintBits directly on the new multi
6687  * here; that routine initializes the masks to all zeroes, which
6688  * would lose other bits we need. Doing it this way ensures all
6689  * unrelated bits remain untouched.
6690  */
6691  frz->t_infomask &= ~HEAP_XMAX_BITS;
6692  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6693  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
6694  frz->t_infomask |= newbits;
6695  frz->t_infomask2 |= newbits2;
6696  frz->xmax = newxmax;
6697  replace_xmax = true;
6698  }
6699  else
6700  {
6701  /*
6702  * Freeze plan for tuple "freezes xmax" in the strictest sense:
6703  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
6704  */
6705  Assert(flags & FRM_INVALIDATE_XMAX);
6706  Assert(!TransactionIdIsValid(newxmax));
6707 
6708  /* Will set freeze_xmax flags in freeze plan below */
6709  freeze_xmax = true;
6710  }
6711 
6712  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
6713  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
6714  }
6715  else if (TransactionIdIsNormal(xid))
6716  {
6717  /* Raw xmax is normal XID */
6718  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6719  ereport(ERROR,
6721  errmsg_internal("found xmax %u from before relfrozenxid %u",
6722  xid, cutoffs->relfrozenxid)));
6723 
6724  /* Will set freeze_xmax flags in freeze plan below */
6725  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6726 
6727  /*
6728  * Verify that xmax aborted if and when freeze plan is executed,
6729  * provided it's from an update. (A lock-only xmax can be removed
6730  * independent of this, since the lock is released at xact end.)
6731  */
6732  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
6734  }
6735  else if (!TransactionIdIsValid(xid))
6736  {
6737  /* Raw xmax is InvalidTransactionId XID */
6738  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
6739  xmax_already_frozen = true;
6740  }
6741  else
6742  ereport(ERROR,
6744  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
6745  xid, tuple->t_infomask)));
6746 
6747  if (freeze_xmin)
6748  {
6749  Assert(!xmin_already_frozen);
6750 
6751  frz->t_infomask |= HEAP_XMIN_FROZEN;
6752  }
6753  if (replace_xvac)
6754  {
6755  /*
6756  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
6757  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
6758  * transaction succeeded.
6759  */
6760  Assert(pagefrz->freeze_required);
6761  if (tuple->t_infomask & HEAP_MOVED_OFF)
6762  frz->frzflags |= XLH_INVALID_XVAC;
6763  else
6764  frz->frzflags |= XLH_FREEZE_XVAC;
6765  }
6766  if (replace_xmax)
6767  {
6768  Assert(!xmax_already_frozen && !freeze_xmax);
6769  Assert(pagefrz->freeze_required);
6770 
6771  /* Already set replace_xmax flags in freeze plan earlier */
6772  }
6773  if (freeze_xmax)
6774  {
6775  Assert(!xmax_already_frozen && !replace_xmax);
6776 
6777  frz->xmax = InvalidTransactionId;
6778 
6779  /*
6780  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
6781  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
6782  * Also get rid of the HEAP_KEYS_UPDATED bit.
6783  */
6784  frz->t_infomask &= ~HEAP_XMAX_BITS;
6785  frz->t_infomask |= HEAP_XMAX_INVALID;
6786  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
6787  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6788  }
6789 
6790  /*
6791  * Determine if this tuple is already totally frozen, or will become
6792  * totally frozen (provided caller executes freeze plans for the page)
6793  */
6794  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
6795  (freeze_xmax || xmax_already_frozen));
6796 
6797  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
6798  xmax_already_frozen))
6799  {
6800  /*
6801  * So far no previous tuple from the page made freezing mandatory.
6802  * Does this tuple force caller to freeze the entire page?
6803  */
6804  pagefrz->freeze_required =
6805  heap_tuple_should_freeze(tuple, cutoffs,
6806  &pagefrz->NoFreezePageRelfrozenXid,
6807  &pagefrz->NoFreezePageRelminMxid);
6808  }
6809 
6810  /* Tell caller if this tuple has a usable freeze plan set in *frz */
6811  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
6812 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:6966
#define FRM_RETURN_IS_XID
Definition: heapam.c:6149
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6200
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7383
#define FRM_MARK_COMMITTED
Definition: heapam.c:6151
#define FRM_NOOP
Definition: heapam.c:6147
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:6150
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:6148
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:339
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:338
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3309
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
uint8 frzflags
Definition: heapam.h:146
uint16 t_infomask2
Definition: heapam.h:144
TransactionId xmax
Definition: heapam.h:143
uint16 t_infomask
Definition: heapam.h:145
MultiXactId OldestMxact
Definition: vacuum.h:268
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert, HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and heap_prune_record_unchanged_lp_normal().

◆ heap_prepare_pagescan()

void heap_prepare_pagescan ( TableScanDesc  sscan)

Definition at line 494 of file heapam.c.

495 {
496  HeapScanDesc scan = (HeapScanDesc) sscan;
497  Buffer buffer = scan->rs_cbuf;
498  BlockNumber block = scan->rs_cblock;
499  Snapshot snapshot;
500  Page page;
501  int lines;
502  bool all_visible;
503  bool check_serializable;
504 
505  Assert(BufferGetBlockNumber(buffer) == block);
506 
507  /* ensure we're not accidentally being used when not in pagemode */
509  snapshot = scan->rs_base.rs_snapshot;
510 
511  /*
512  * Prune and repair fragmentation for the whole page, if possible.
513  */
514  heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
515 
516  /*
517  * We must hold share lock on the buffer content while examining tuple
518  * visibility. Afterwards, however, the tuples we have found to be
519  * visible are guaranteed good as long as we hold the buffer pin.
520  */
521  LockBuffer(buffer, BUFFER_LOCK_SHARE);
522 
523  page = BufferGetPage(buffer);
524  lines = PageGetMaxOffsetNumber(page);
525 
526  /*
527  * If the all-visible flag indicates that all tuples on the page are
528  * visible to everyone, we can skip the per-tuple visibility tests.
529  *
530  * Note: In hot standby, a tuple that's already visible to all
531  * transactions on the primary might still be invisible to a read-only
532  * transaction in the standby. We partly handle this problem by tracking
533  * the minimum xmin of visible tuples as the cut-off XID while marking a
534  * page all-visible on the primary and WAL log that along with the
535  * visibility map SET operation. In hot standby, we wait for (or abort)
536  * all transactions that can potentially may not see one or more tuples on
537  * the page. That's how index-only scans work fine in hot standby. A
538  * crucial difference between index-only scans and heap scans is that the
539  * index-only scan completely relies on the visibility map where as heap
540  * scan looks at the page-level PD_ALL_VISIBLE flag. We are not sure if
541  * the page-level flag can be trusted in the same way, because it might
542  * get propagated somehow without being explicitly WAL-logged, e.g. via a
543  * full page write. Until we can prove that beyond doubt, let's check each
544  * tuple for visibility the hard way.
545  */
546  all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
547  check_serializable =
549 
550  /*
551  * We call page_collect_tuples() with constant arguments, to get the
552  * compiler to constant fold the constant arguments. Separate calls with
553  * constant arguments, rather than variables, are needed on several
554  * compilers to actually perform constant folding.
555  */
556  if (likely(all_visible))
557  {
558  if (likely(!check_serializable))
559  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
560  block, lines, true, false);
561  else
562  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
563  block, lines, true, true);
564  }
565  else
566  {
567  if (likely(!check_serializable))
568  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
569  block, lines, false, false);
570  else
571  scan->rs_ntuples = page_collect_tuples(scan, snapshot, page, buffer,
572  block, lines, false, true);
573  }
574 
576 }
static pg_attribute_always_inline int page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, Page page, Buffer buffer, BlockNumber block, int lines, bool all_visible, bool check_serializable)
Definition: heapam.c:444
bool CheckForSerializableConflictOutNeeded(Relation relation, Snapshot snapshot)
Definition: predicate.c:3976
void heap_page_prune_opt(Relation relation, Buffer buffer)
Definition: pruneheap.c:193
BlockNumber rs_cblock
Definition: heapam.h:66
bool takenDuringRecovery
Definition: snapshot.h:184

References Assert, BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CheckForSerializableConflictOutNeeded(), heap_page_prune_opt(), likely, LockBuffer(), page_collect_tuples(), PageGetMaxOffsetNumber(), PageIsAllVisible(), HeapScanDescData::rs_base, HeapScanDescData::rs_cblock, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, HeapScanDescData::rs_ntuples, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, SO_ALLOW_PAGEMODE, and SnapshotData::takenDuringRecovery.

Referenced by heapam_scan_sample_next_block(), and heapgettup_pagemode().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1152 of file heapam.c.

1154 {
1155  HeapScanDesc scan = (HeapScanDesc) sscan;
1156 
1157  if (set_params)
1158  {
1159  if (allow_strat)
1160  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1161  else
1162  scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT;
1163 
1164  if (allow_sync)
1165  scan->rs_base.rs_flags |= SO_ALLOW_SYNC;
1166  else
1167  scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC;
1168 
1169  if (allow_pagemode && scan->rs_base.rs_snapshot &&
1172  else
1174  }
1175 
1176  /*
1177  * unpin scan buffers
1178  */
1179  if (BufferIsValid(scan->rs_cbuf))
1180  ReleaseBuffer(scan->rs_cbuf);
1181 
1182  if (BufferIsValid(scan->rs_vmbuffer))
1183  {
1184  ReleaseBuffer(scan->rs_vmbuffer);
1185  scan->rs_vmbuffer = InvalidBuffer;
1186  }
1187 
1188  /*
1189  * Reset rs_empty_tuples_pending, a field only used by bitmap heap scan,
1190  * to avoid incorrectly emitting NULL-filled tuples from a previous scan
1191  * on rescan.
1192  */
1193  scan->rs_empty_tuples_pending = 0;
1194 
1195  /*
1196  * The read stream is reset on rescan. This must be done before
1197  * initscan(), as some state referred to by read_stream_reset() is reset
1198  * in initscan().
1199  */
1200  if (scan->rs_read_stream)
1202 
1203  /*
1204  * reinitialize scan descriptor
1205  */
1206  initscan(scan, key, true);
1207 }
void read_stream_reset(ReadStream *stream)
Definition: read_stream.c:823
@ SO_ALLOW_STRAT
Definition: tableam.h:58
@ SO_ALLOW_SYNC
Definition: tableam.h:60

References BufferIsValid(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, read_stream_reset(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_ALLOW_STRAT, and SO_ALLOW_SYNC.

◆ heap_set_tidrange()

void heap_set_tidrange ( TableScanDesc  sscan,
ItemPointer  mintid,
ItemPointer  maxtid 
)

Definition at line 1331 of file heapam.c.

1333 {
1334  HeapScanDesc scan = (HeapScanDesc) sscan;
1335  BlockNumber startBlk;
1336  BlockNumber numBlks;
1337  ItemPointerData highestItem;
1338  ItemPointerData lowestItem;
1339 
1340  /*
1341  * For relations without any pages, we can simply leave the TID range
1342  * unset. There will be no tuples to scan, therefore no tuples outside
1343  * the given TID range.
1344  */
1345  if (scan->rs_nblocks == 0)
1346  return;
1347 
1348  /*
1349  * Set up some ItemPointers which point to the first and last possible
1350  * tuples in the heap.
1351  */
1352  ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
1353  ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
1354 
1355  /*
1356  * If the given maximum TID is below the highest possible TID in the
1357  * relation, then restrict the range to that, otherwise we scan to the end
1358  * of the relation.
1359  */
1360  if (ItemPointerCompare(maxtid, &highestItem) < 0)
1361  ItemPointerCopy(maxtid, &highestItem);
1362 
1363  /*
1364  * If the given minimum TID is above the lowest possible TID in the
1365  * relation, then restrict the range to only scan for TIDs above that.
1366  */
1367  if (ItemPointerCompare(mintid, &lowestItem) > 0)
1368  ItemPointerCopy(mintid, &lowestItem);
1369 
1370  /*
1371  * Check for an empty range and protect from would be negative results
1372  * from the numBlks calculation below.
1373  */
1374  if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
1375  {
1376  /* Set an empty range of blocks to scan */
1377  heap_setscanlimits(sscan, 0, 0);
1378  return;
1379  }
1380 
1381  /*
1382  * Calculate the first block and the number of blocks we must scan. We
1383  * could be more aggressive here and perform some more validation to try
1384  * and further narrow the scope of blocks to scan by checking if the
1385  * lowestItem has an offset above MaxOffsetNumber. In this case, we could
1386  * advance startBlk by one. Likewise, if highestItem has an offset of 0
1387  * we could scan one fewer blocks. However, such an optimization does not
1388  * seem worth troubling over, currently.
1389  */
1390  startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
1391 
1392  numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
1393  ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
1394 
1395  /* Set the start block and number of blocks to scan */
1396  heap_setscanlimits(sscan, startBlk, numBlks);
1397 
1398  /* Finally, set the TID range in sscan */
1399  ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
1400  ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
1401 }
void heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
Definition: heapam.c:422
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
#define MaxOffsetNumber
Definition: off.h:28
BlockNumber rs_nblocks
Definition: heapam.h:58

References FirstOffsetNumber, heap_setscanlimits(), ItemPointerCompare(), ItemPointerCopy(), ItemPointerGetBlockNumberNoCheck(), ItemPointerSet(), MaxOffsetNumber, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, and HeapScanDescData::rs_nblocks.

◆ heap_setscanlimits()

void heap_setscanlimits ( TableScanDesc  sscan,
BlockNumber  startBlk,
BlockNumber  numBlks 
)

Definition at line 422 of file heapam.c.

423 {
424  HeapScanDesc scan = (HeapScanDesc) sscan;
425 
426  Assert(!scan->rs_inited); /* else too late to change */
427  /* else rs_startblock is significant */
428  Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC));
429 
430  /* Check startBlk is valid (but allow case of zero blocks...) */
431  Assert(startBlk == 0 || startBlk < scan->rs_nblocks);
432 
433  scan->rs_startblock = startBlk;
434  scan->rs_numblocks = numBlks;
435 }
bool rs_inited
Definition: heapam.h:64
BlockNumber rs_startblock
Definition: heapam.h:59
BlockNumber rs_numblocks
Definition: heapam.h:60

References Assert, HeapScanDescData::rs_base, TableScanDescData::rs_flags, HeapScanDescData::rs_inited, HeapScanDescData::rs_numblocks, HeapScanDescData::rs_startblock, and SO_ALLOW_SYNC.

Referenced by heap_set_tidrange(), and heapam_index_build_range_scan().

◆ heap_tuple_needs_eventual_freeze()

bool heap_tuple_needs_eventual_freeze ( HeapTupleHeader  tuple)

Definition at line 7328 of file heapam.c.

7329 {
7330  TransactionId xid;
7331 
7332  /*
7333  * If xmin is a normal transaction ID, this tuple is definitely not
7334  * frozen.
7335  */
7336  xid = HeapTupleHeaderGetXmin(tuple);
7337  if (TransactionIdIsNormal(xid))
7338  return true;
7339 
7340  /*
7341  * If xmax is a valid xact or multixact, this tuple is also not frozen.
7342  */
7343  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7344  {
7345  MultiXactId multi;
7346 
7347  multi = HeapTupleHeaderGetRawXmax(tuple);
7348  if (MultiXactIdIsValid(multi))
7349  return true;
7350  }
7351  else
7352  {
7353  xid = HeapTupleHeaderGetRawXmax(tuple);
7354  if (TransactionIdIsNormal(xid))
7355  return true;
7356  }
7357 
7358  if (tuple->t_infomask & HEAP_MOVED)
7359  {
7360  xid = HeapTupleHeaderGetXvac(tuple);
7361  if (TransactionIdIsNormal(xid))
7362  return true;
7363  }
7364 
7365  return false;
7366 }

References HEAP_MOVED, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, MultiXactIdIsValid, HeapTupleHeaderData::t_infomask, and TransactionIdIsNormal.

Referenced by collect_corrupt_items(), and heap_page_is_all_visible().

◆ heap_tuple_should_freeze()

bool heap_tuple_should_freeze ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
TransactionId NoFreezePageRelfrozenXid,
MultiXactId NoFreezePageRelminMxid 
)

Definition at line 7383 of file heapam.c.

7387 {
7388  TransactionId xid;
7389  MultiXactId multi;
7390  bool freeze = false;
7391 
7392  /* First deal with xmin */
7393  xid = HeapTupleHeaderGetXmin(tuple);
7394  if (TransactionIdIsNormal(xid))
7395  {
7397  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7398  *NoFreezePageRelfrozenXid = xid;
7399  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7400  freeze = true;
7401  }
7402 
7403  /* Now deal with xmax */
7404  xid = InvalidTransactionId;
7405  multi = InvalidMultiXactId;
7406  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
7407  multi = HeapTupleHeaderGetRawXmax(tuple);
7408  else
7409  xid = HeapTupleHeaderGetRawXmax(tuple);
7410 
7411  if (TransactionIdIsNormal(xid))
7412  {
7414  /* xmax is a non-permanent XID */
7415  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7416  *NoFreezePageRelfrozenXid = xid;
7417  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7418  freeze = true;
7419  }
7420  else if (!MultiXactIdIsValid(multi))
7421  {
7422  /* xmax is a permanent XID or invalid MultiXactId/XID */
7423  }
7424  else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
7425  {
7426  /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */
7427  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7428  *NoFreezePageRelminMxid = multi;
7429  /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */
7430  freeze = true;
7431  }
7432  else
7433  {
7434  /* xmax is a MultiXactId that may have an updater XID */
7435  MultiXactMember *members;
7436  int nmembers;
7437 
7438  Assert(MultiXactIdPrecedesOrEquals(cutoffs->relminmxid, multi));
7439  if (MultiXactIdPrecedes(multi, *NoFreezePageRelminMxid))
7440  *NoFreezePageRelminMxid = multi;
7441  if (MultiXactIdPrecedes(multi, cutoffs->MultiXactCutoff))
7442  freeze = true;
7443 
7444  /* need to check whether any member of the mxact is old */
7445  nmembers = GetMultiXactIdMembers(multi, &members, false,
7447 
7448  for (int i = 0; i < nmembers; i++)
7449  {
7450  xid = members[i].xid;
7452  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7453  *NoFreezePageRelfrozenXid = xid;
7454  if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit))
7455  freeze = true;
7456  }
7457  if (nmembers > 0)
7458  pfree(members);
7459  }
7460 
7461  if (tuple->t_infomask & HEAP_MOVED)
7462  {
7463  xid = HeapTupleHeaderGetXvac(tuple);
7464  if (TransactionIdIsNormal(xid))
7465  {
7467  if (TransactionIdPrecedes(xid, *NoFreezePageRelfrozenXid))
7468  *NoFreezePageRelfrozenXid = xid;
7469  /* heap_prepare_freeze_tuple forces xvac freezing */
7470  freeze = true;
7471  }
7472  }
7473 
7474  return freeze;
7475 }
#define HEAP_LOCKED_UPGRADED(infomask)
Definition: htup_details.h:249
bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3323
TransactionId xid
Definition: multixact.h:58

References Assert, VacuumCutoffs::FreezeLimit, GetMultiXactIdMembers(), HEAP_LOCKED_UPGRADED, HEAP_MOVED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, i, InvalidMultiXactId, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), MultiXactIdPrecedesOrEquals(), pfree(), VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, HeapTupleHeaderData::t_infomask, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), and MultiXactMember::xid.

Referenced by heap_prepare_freeze_tuple(), and lazy_scan_noprune().

◆ heap_update()

TM_Result heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  newtup,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
LockTupleMode lockmode,
TU_UpdateIndexes update_indexes 
)

Definition at line 3150 of file heapam.c.

3154 {
3155  TM_Result result;
3157  Bitmapset *hot_attrs;
3158  Bitmapset *sum_attrs;
3159  Bitmapset *key_attrs;
3160  Bitmapset *id_attrs;
3161  Bitmapset *interesting_attrs;
3162  Bitmapset *modified_attrs;
3163  ItemId lp;
3164  HeapTupleData oldtup;
3165  HeapTuple heaptup;
3166  HeapTuple old_key_tuple = NULL;
3167  bool old_key_copied = false;
3168  Page page;
3169  BlockNumber block;
3170  MultiXactStatus mxact_status;
3171  Buffer buffer,
3172  newbuf,
3173  vmbuffer = InvalidBuffer,
3174  vmbuffer_new = InvalidBuffer;
3175  bool need_toast;
3176  Size newtupsize,
3177  pagefree;
3178  bool have_tuple_lock = false;
3179  bool iscombo;
3180  bool use_hot_update = false;
3181  bool summarized_update = false;
3182  bool key_intact;
3183  bool all_visible_cleared = false;
3184  bool all_visible_cleared_new = false;
3185  bool checked_lockers;
3186  bool locker_remains;
3187  bool id_has_external = false;
3188  TransactionId xmax_new_tuple,
3189  xmax_old_tuple;
3190  uint16 infomask_old_tuple,
3191  infomask2_old_tuple,
3192  infomask_new_tuple,
3193  infomask2_new_tuple;
3194 
3195  Assert(ItemPointerIsValid(otid));
3196 
3197  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
3199  RelationGetNumberOfAttributes(relation));
3200 
3201  /*
3202  * Forbid this during a parallel operation, lest it allocate a combo CID.
3203  * Other workers might need that combo CID for visibility checks, and we
3204  * have no provision for broadcasting it to them.
3205  */
3206  if (IsInParallelMode())
3207  ereport(ERROR,
3208  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
3209  errmsg("cannot update tuples during a parallel operation")));
3210 
3211  /*
3212  * Fetch the list of attributes to be checked for various operations.
3213  *
3214  * For HOT considerations, this is wasted effort if we fail to update or
3215  * have to put the new tuple on a different page. But we must compute the
3216  * list before obtaining buffer lock --- in the worst case, if we are
3217  * doing an update on one of the relevant system catalogs, we could
3218  * deadlock if we try to fetch the list later. In any case, the relcache
3219  * caches the data so this is usually pretty cheap.
3220  *
3221  * We also need columns used by the replica identity and columns that are
3222  * considered the "key" of rows in the table.
3223  *
3224  * Note that we get copies of each bitmap, so we need not worry about
3225  * relcache flush happening midway through.
3226  */
3227  hot_attrs = RelationGetIndexAttrBitmap(relation,
3229  sum_attrs = RelationGetIndexAttrBitmap(relation,
3231  key_attrs = RelationGetIndexAttrBitmap(relation, INDEX_ATTR_BITMAP_KEY);
3232  id_attrs = RelationGetIndexAttrBitmap(relation,
3234  interesting_attrs = NULL;
3235  interesting_attrs = bms_add_members(interesting_attrs, hot_attrs);
3236  interesting_attrs = bms_add_members(interesting_attrs, sum_attrs);
3237  interesting_attrs = bms_add_members(interesting_attrs, key_attrs);
3238  interesting_attrs = bms_add_members(interesting_attrs, id_attrs);
3239 
3240  block = ItemPointerGetBlockNumber(otid);
3241  buffer = ReadBuffer(relation, block);
3242  page = BufferGetPage(buffer);
3243 
3244  /*
3245  * Before locking the buffer, pin the visibility map page if it appears to
3246  * be necessary. Since we haven't got the lock yet, someone else might be
3247  * in the middle of changing this, so we'll need to recheck after we have
3248  * the lock.
3249  */
3250  if (PageIsAllVisible(page))
3251  visibilitymap_pin(relation, block, &vmbuffer);
3252 
3254 
3255  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid));
3256  Assert(ItemIdIsNormal(lp));
3257 
3258  /*
3259  * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
3260  * properly.
3261  */
3262  oldtup.t_tableOid = RelationGetRelid(relation);
3263  oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
3264  oldtup.t_len = ItemIdGetLength(lp);
3265  oldtup.t_self = *otid;
3266 
3267  /* the new tuple is ready, except for this: */
3268  newtup->t_tableOid = RelationGetRelid(relation);
3269 
3270  /*
3271  * Determine columns modified by the update. Additionally, identify
3272  * whether any of the unmodified replica identity key attributes in the
3273  * old tuple is externally stored or not. This is required because for
3274  * such attributes the flattened value won't be WAL logged as part of the
3275  * new tuple so we must include it as part of the old_key_tuple. See
3276  * ExtractReplicaIdentity.
3277  */
3278  modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
3279  id_attrs, &oldtup,
3280  newtup, &id_has_external);
3281 
3282  /*
3283  * If we're not updating any "key" column, we can grab a weaker lock type.
3284  * This allows for more concurrency when we are running simultaneously
3285  * with foreign key checks.
3286  *
3287  * Note that if a column gets detoasted while executing the update, but
3288  * the value ends up being the same, this test will fail and we will use
3289  * the stronger lock. This is acceptable; the important case to optimize
3290  * is updates that don't manipulate key columns, not those that
3291  * serendipitously arrive at the same key values.
3292  */
3293  if (!bms_overlap(modified_attrs, key_attrs))
3294  {
3295  *lockmode = LockTupleNoKeyExclusive;
3296  mxact_status = MultiXactStatusNoKeyUpdate;
3297  key_intact = true;
3298 
3299  /*
3300  * If this is the first possibly-multixact-able operation in the
3301  * current transaction, set my per-backend OldestMemberMXactId
3302  * setting. We can be certain that the transaction will never become a
3303  * member of any older MultiXactIds than that. (We have to do this
3304  * even if we end up just using our own TransactionId below, since
3305  * some other backend could incorporate our XID into a MultiXact
3306  * immediately afterwards.)
3307  */
3309  }
3310  else
3311  {
3312  *lockmode = LockTupleExclusive;
3313  mxact_status = MultiXactStatusUpdate;
3314  key_intact = false;
3315  }
3316 
3317  /*
3318  * Note: beyond this point, use oldtup not otid to refer to old tuple.
3319  * otid may very well point at newtup->t_self, which we will overwrite
3320  * with the new tuple's location, so there's great risk of confusion if we
3321  * use otid anymore.
3322  */
3323 
3324 l2:
3325  checked_lockers = false;
3326  locker_remains = false;
3327  result = HeapTupleSatisfiesUpdate(&oldtup, cid, buffer);
3328 
3329  /* see below about the "no wait" case */
3330  Assert(result != TM_BeingModified || wait);
3331 
3332  if (result == TM_Invisible)
3333  {
3334  UnlockReleaseBuffer(buffer);
3335  ereport(ERROR,
3336  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
3337  errmsg("attempted to update invisible tuple")));
3338  }
3339  else if (result == TM_BeingModified && wait)
3340  {
3341  TransactionId xwait;
3342  uint16 infomask;
3343  bool can_continue = false;
3344 
3345  /*
3346  * XXX note that we don't consider the "no wait" case here. This
3347  * isn't a problem currently because no caller uses that case, but it
3348  * should be fixed if such a caller is introduced. It wasn't a
3349  * problem previously because this code would always wait, but now
3350  * that some tuple locks do not conflict with one of the lock modes we
3351  * use, it is possible that this case is interesting to handle
3352  * specially.
3353  *
3354  * This may cause failures with third-party code that calls
3355  * heap_update directly.
3356  */
3357 
3358  /* must copy state data before unlocking buffer */
3359  xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3360  infomask = oldtup.t_data->t_infomask;
3361 
3362  /*
3363  * Now we have to do something about the existing locker. If it's a
3364  * multi, sleep on it; we might be awakened before it is completely
3365  * gone (or even not sleep at all in some cases); we need to preserve
3366  * it as locker, unless it is gone completely.
3367  *
3368  * If it's not a multi, we need to check for sleeping conditions
3369  * before actually going to sleep. If the update doesn't conflict
3370  * with the locks, we just continue without sleeping (but making sure
3371  * it is preserved).
3372  *
3373  * Before sleeping, we need to acquire tuple lock to establish our
3374  * priority for the tuple (see heap_lock_tuple). LockTuple will
3375  * release us when we are next-in-line for the tuple. Note we must
3376  * not acquire the tuple lock until we're sure we're going to sleep;
3377  * otherwise we're open for race conditions with other transactions
3378  * holding the tuple lock which sleep on us.
3379  *
3380  * If we are forced to "start over" below, we keep the tuple lock;
3381  * this arranges that we stay at the head of the line while rechecking
3382  * tuple state.
3383  */
3384  if (infomask & HEAP_XMAX_IS_MULTI)
3385  {
3386  TransactionId update_xact;
3387  int remain;
3388  bool current_is_member = false;
3389 
3390  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
3391  *lockmode, &current_is_member))
3392  {
3393  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3394 
3395  /*
3396  * Acquire the lock, if necessary (but skip it when we're
3397  * requesting a lock and already have one; avoids deadlock).
3398  */
3399  if (!current_is_member)
3400  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3401  LockWaitBlock, &have_tuple_lock);
3402 
3403  /* wait for multixact */
3404  MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
3405  relation, &oldtup.t_self, XLTW_Update,
3406  &remain);
3407  checked_lockers = true;
3408  locker_remains = remain != 0;
3410 
3411  /*
3412  * If xwait had just locked the tuple then some other xact
3413  * could update this tuple before we get to this point. Check
3414  * for xmax change, and start over if so.
3415  */
3417  infomask) ||
3419  xwait))
3420  goto l2;
3421  }
3422 
3423  /*
3424  * Note that the multixact may not be done by now. It could have
3425  * surviving members; our own xact or other subxacts of this
3426  * backend, and also any other concurrent transaction that locked
3427  * the tuple with LockTupleKeyShare if we only got
3428  * LockTupleNoKeyExclusive. If this is the case, we have to be
3429  * careful to mark the updated tuple with the surviving members in
3430  * Xmax.
3431  *
3432  * Note that there could have been another update in the
3433  * MultiXact. In that case, we need to check whether it committed
3434  * or aborted. If it aborted we are safe to update it again;
3435  * otherwise there is an update conflict, and we have to return
3436  * TableTuple{Deleted, Updated} below.
3437  *
3438  * In the LockTupleExclusive case, we still need to preserve the
3439  * surviving members: those would include the tuple locks we had
3440  * before this one, which are important to keep in case this
3441  * subxact aborts.
3442  */
3444  update_xact = HeapTupleGetUpdateXid(oldtup.t_data);
3445  else
3446  update_xact = InvalidTransactionId;
3447 
3448  /*
3449  * There was no UPDATE in the MultiXact; or it aborted. No
3450  * TransactionIdIsInProgress() call needed here, since we called
3451  * MultiXactIdWait() above.
3452  */
3453  if (!TransactionIdIsValid(update_xact) ||
3454  TransactionIdDidAbort(update_xact))
3455  can_continue = true;
3456  }
3457  else if (TransactionIdIsCurrentTransactionId(xwait))
3458  {
3459  /*
3460  * The only locker is ourselves; we can avoid grabbing the tuple
3461  * lock here, but must preserve our locking information.
3462  */
3463  checked_lockers = true;
3464  locker_remains = true;
3465  can_continue = true;
3466  }
3467  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) && key_intact)
3468  {
3469  /*
3470  * If it's just a key-share locker, and we're not changing the key
3471  * columns, we don't need to wait for it to end; but we need to
3472  * preserve it as locker.
3473  */
3474  checked_lockers = true;
3475  locker_remains = true;
3476  can_continue = true;
3477  }
3478  else
3479  {
3480  /*
3481  * Wait for regular transaction to end; but first, acquire tuple
3482  * lock.
3483  */
3484  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3485  heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3486  LockWaitBlock, &have_tuple_lock);
3487  XactLockTableWait(xwait, relation, &oldtup.t_self,
3488  XLTW_Update);
3489  checked_lockers = true;
3491 
3492  /*
3493  * xwait is done, but if xwait had just locked the tuple then some
3494  * other xact could update this tuple before we get to this point.
3495  * Check for xmax change, and start over if so.
3496  */
3497  if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) ||
3498  !TransactionIdEquals(xwait,
3500  goto l2;
3501 
3502  /* Otherwise check if it committed or aborted */
3503  UpdateXmaxHintBits(oldtup.t_data, buffer, xwait);
3504  if (oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)
3505  can_continue = true;
3506  }
3507 
3508  if (can_continue)
3509  result = TM_Ok;
3510  else if (!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid))
3511  result = TM_Updated;
3512  else
3513  result = TM_Deleted;
3514  }
3515 
3516  /* Sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
3517  if (result != TM_Ok)
3518  {
3519  Assert(result == TM_SelfModified ||
3520  result == TM_Updated ||
3521  result == TM_Deleted ||
3522  result == TM_BeingModified);
3523  Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID));
3524  Assert(result != TM_Updated ||
3525  !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid));
3526  }
3527 
3528  if (crosscheck != InvalidSnapshot && result == TM_Ok)
3529  {
3530  /* Perform additional check for transaction-snapshot mode RI updates */
3531  if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer))
3532  result = TM_Updated;
3533  }
3534 
3535  if (result != TM_Ok)
3536  {
3537  tmfd->ctid = oldtup.t_data->t_ctid;
3538  tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data);
3539  if (result == TM_SelfModified)
3540  tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data);
3541  else
3542  tmfd->cmax = InvalidCommandId;
3543  UnlockReleaseBuffer(buffer);
3544  if (have_tuple_lock)
3545  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3546  if (vmbuffer != InvalidBuffer)
3547  ReleaseBuffer(vmbuffer);
3548  *update_indexes = TU_None;
3549 
3550  bms_free(hot_attrs);
3551  bms_free(sum_attrs);
3552  bms_free(key_attrs);
3553  bms_free(id_attrs);
3554  bms_free(modified_attrs);
3555  bms_free(interesting_attrs);
3556  return result;
3557  }
3558 
3559  /*
3560  * If we didn't pin the visibility map page and the page has become all
3561  * visible while we were busy locking the buffer, or during some
3562  * subsequent window during which we had it unlocked, we'll have to unlock
3563  * and re-lock, to avoid holding the buffer lock across an I/O. That's a
3564  * bit unfortunate, especially since we'll now have to recheck whether the
3565  * tuple has been locked or updated under us, but hopefully it won't
3566  * happen very often.
3567  */
3568  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3569  {
3570  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3571  visibilitymap_pin(relation, block, &vmbuffer);
3573  goto l2;
3574  }
3575 
3576  /* Fill in transaction status data */
3577 
3578  /*
3579  * If the tuple we're updating is locked, we need to preserve the locking
3580  * info in the old tuple's Xmax. Prepare a new Xmax value for this.
3581  */
3583  oldtup.t_data->t_infomask,
3584  oldtup.t_data->t_infomask2,
3585  xid, *lockmode, true,
3586  &xmax_old_tuple, &infomask_old_tuple,
3587  &infomask2_old_tuple);
3588 
3589  /*
3590  * And also prepare an Xmax value for the new copy of the tuple. If there
3591  * was no xmax previously, or there was one but all lockers are now gone,
3592  * then use InvalidTransactionId; otherwise, get the xmax from the old
3593  * tuple. (In rare cases that might also be InvalidTransactionId and yet
3594  * not have the HEAP_XMAX_INVALID bit set; that's fine.)
3595  */
3596  if ((oldtup.t_data->t_infomask & HEAP_XMAX_INVALID) ||
3598  (checked_lockers && !locker_remains))
3599  xmax_new_tuple = InvalidTransactionId;
3600  else
3601  xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data);
3602 
3603  if (!TransactionIdIsValid(xmax_new_tuple))
3604  {
3605  infomask_new_tuple = HEAP_XMAX_INVALID;
3606  infomask2_new_tuple = 0;
3607  }
3608  else
3609  {
3610  /*
3611  * If we found a valid Xmax for the new tuple, then the infomask bits
3612  * to use on the new tuple depend on what was there on the old one.
3613  * Note that since we're doing an update, the only possibility is that
3614  * the lockers had FOR KEY SHARE lock.
3615  */
3616  if (oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI)
3617  {
3618  GetMultiXactIdHintBits(xmax_new_tuple, &infomask_new_tuple,
3619  &infomask2_new_tuple);
3620  }
3621  else
3622  {
3623  infomask_new_tuple = HEAP_XMAX_KEYSHR_LOCK | HEAP_XMAX_LOCK_ONLY;
3624  infomask2_new_tuple = 0;
3625  }
3626  }
3627 
3628  /*
3629  * Prepare the new tuple with the appropriate initial values of Xmin and
3630  * Xmax, as well as initial infomask bits as computed above.
3631  */
3632  newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
3633  newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK);
3634  HeapTupleHeaderSetXmin(newtup->t_data, xid);
3635  HeapTupleHeaderSetCmin(newtup->t_data, cid);
3636  newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple;
3637  newtup->t_data->t_infomask2 |= infomask2_new_tuple;
3638  HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple);
3639 
3640  /*
3641  * Replace cid with a combo CID if necessary. Note that we already put
3642  * the plain cid into the new tuple.
3643  */
3644  HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo);
3645 
3646  /*
3647  * If the toaster needs to be activated, OR if the new tuple will not fit
3648  * on the same page as the old, then we need to release the content lock
3649  * (but not the pin!) on the old tuple's buffer while we are off doing
3650  * TOAST and/or table-file-extension work. We must mark the old tuple to
3651  * show that it's locked, else other processes may try to update it
3652  * themselves.
3653  *
3654  * We need to invoke the toaster if there are already any out-of-line
3655  * toasted values present, or if the new tuple is over-threshold.
3656  */
3657  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3658  relation->rd_rel->relkind != RELKIND_MATVIEW)
3659  {
3660  /* toast table entries should never be recursively toasted */
3661  Assert(!HeapTupleHasExternal(&oldtup));
3662  Assert(!HeapTupleHasExternal(newtup));
3663  need_toast = false;
3664  }
3665  else
3666  need_toast = (HeapTupleHasExternal(&oldtup) ||
3667  HeapTupleHasExternal(newtup) ||
3668  newtup->t_len > TOAST_TUPLE_THRESHOLD);
3669 
3670  pagefree = PageGetHeapFreeSpace(page);
3671 
3672  newtupsize = MAXALIGN(newtup->t_len);
3673 
3674  if (need_toast || newtupsize > pagefree)
3675  {
3676  TransactionId xmax_lock_old_tuple;
3677  uint16 infomask_lock_old_tuple,
3678  infomask2_lock_old_tuple;
3679  bool cleared_all_frozen = false;
3680 
3681  /*
3682  * To prevent concurrent sessions from updating the tuple, we have to
3683  * temporarily mark it locked, while we release the page-level lock.
3684  *
3685  * To satisfy the rule that any xid potentially appearing in a buffer
3686  * written out to disk, we unfortunately have to WAL log this
3687  * temporary modification. We can reuse xl_heap_lock for this
3688  * purpose. If we crash/error before following through with the
3689  * actual update, xmax will be of an aborted transaction, allowing
3690  * other sessions to proceed.
3691  */
3692 
3693  /*
3694  * Compute xmax / infomask appropriate for locking the tuple. This has
3695  * to be done separately from the combo that's going to be used for
3696  * updating, because the potentially created multixact would otherwise
3697  * be wrong.
3698  */
3700  oldtup.t_data->t_infomask,
3701  oldtup.t_data->t_infomask2,
3702  xid, *lockmode, false,
3703  &xmax_lock_old_tuple, &infomask_lock_old_tuple,
3704  &infomask2_lock_old_tuple);
3705 
3706  Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple));
3707 
3709 
3710  /* Clear obsolete visibility flags ... */
3711  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3712  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3713  HeapTupleClearHotUpdated(&oldtup);
3714  /* ... and store info about transaction updating this tuple */
3715  Assert(TransactionIdIsValid(xmax_lock_old_tuple));
3716  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple);
3717  oldtup.t_data->t_infomask |= infomask_lock_old_tuple;
3718  oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple;
3719  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3720 
3721  /* temporarily make it look not-updated, but locked */
3722  oldtup.t_data->t_ctid = oldtup.t_self;
3723 
3724  /*
3725  * Clear all-frozen bit on visibility map if needed. We could
3726  * immediately reset ALL_VISIBLE, but given that the WAL logging
3727  * overhead would be unchanged, that doesn't seem necessarily
3728  * worthwhile.
3729  */
3730  if (PageIsAllVisible(page) &&
3731  visibilitymap_clear(relation, block, vmbuffer,
3733  cleared_all_frozen = true;
3734 
3735  MarkBufferDirty(buffer);
3736 
3737  if (RelationNeedsWAL(relation))
3738  {
3739  xl_heap_lock xlrec;
3740  XLogRecPtr recptr;
3741 
3742  XLogBeginInsert();
3743  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3744 
3745  xlrec.offnum = ItemPointerGetOffsetNumber(&oldtup.t_self);
3746  xlrec.xmax = xmax_lock_old_tuple;
3748  oldtup.t_data->t_infomask2);
3749  xlrec.flags =
3750  cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
3751  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
3752  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
3753  PageSetLSN(page, recptr);
3754  }
3755 
3756  END_CRIT_SECTION();
3757 
3758  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3759 
3760  /*
3761  * Let the toaster do its thing, if needed.
3762  *
3763  * Note: below this point, heaptup is the data we actually intend to
3764  * store into the relation; newtup is the caller's original untoasted
3765  * data.
3766  */
3767  if (need_toast)
3768  {
3769  /* Note we always use WAL and FSM during updates */
3770  heaptup = heap_toast_insert_or_update(relation, newtup, &oldtup, 0);
3771  newtupsize = MAXALIGN(heaptup->t_len);
3772  }
3773  else
3774  heaptup = newtup;
3775 
3776  /*
3777  * Now, do we need a new page for the tuple, or not? This is a bit
3778  * tricky since someone else could have added tuples to the page while
3779  * we weren't looking. We have to recheck the available space after
3780  * reacquiring the buffer lock. But don't bother to do that if the
3781  * former amount of free space is still not enough; it's unlikely
3782  * there's more free now than before.
3783  *
3784  * What's more, if we need to get a new page, we will need to acquire
3785  * buffer locks on both old and new pages. To avoid deadlock against
3786  * some other backend trying to get the same two locks in the other
3787  * order, we must be consistent about the order we get the locks in.
3788  * We use the rule "lock the lower-numbered page of the relation
3789  * first". To implement this, we must do RelationGetBufferForTuple
3790  * while not holding the lock on the old page, and we must rely on it
3791  * to get the locks on both pages in the correct order.
3792  *
3793  * Another consideration is that we need visibility map page pin(s) if
3794  * we will have to clear the all-visible flag on either page. If we
3795  * call RelationGetBufferForTuple, we rely on it to acquire any such
3796  * pins; but if we don't, we have to handle that here. Hence we need
3797  * a loop.
3798  */
3799  for (;;)
3800  {
3801  if (newtupsize > pagefree)
3802  {
3803  /* It doesn't fit, must use RelationGetBufferForTuple. */
3804  newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
3805  buffer, 0, NULL,
3806  &vmbuffer_new, &vmbuffer,
3807  0);
3808  /* We're all done. */
3809  break;
3810  }
3811  /* Acquire VM page pin if needed and we don't have it. */
3812  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
3813  visibilitymap_pin(relation, block, &vmbuffer);
3814  /* Re-acquire the lock on the old tuple's page. */
3816  /* Re-check using the up-to-date free space */
3817  pagefree = PageGetHeapFreeSpace(page);
3818  if (newtupsize > pagefree ||
3819  (vmbuffer == InvalidBuffer && PageIsAllVisible(page)))
3820  {
3821  /*
3822  * Rats, it doesn't fit anymore, or somebody just now set the
3823  * all-visible flag. We must now unlock and loop to avoid
3824  * deadlock. Fortunately, this path should seldom be taken.
3825  */
3826  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3827  }
3828  else
3829  {
3830  /* We're all done. */
3831  newbuf = buffer;
3832  break;
3833  }
3834  }
3835  }
3836  else
3837  {
3838  /* No TOAST work needed, and it'll fit on same page */
3839  newbuf = buffer;
3840  heaptup = newtup;
3841  }
3842 
3843  /*
3844  * We're about to do the actual update -- check for conflict first, to
3845  * avoid possibly having to roll back work we've just done.
3846  *
3847  * This is safe without a recheck as long as there is no possibility of
3848  * another process scanning the pages between this check and the update
3849  * being visible to the scan (i.e., exclusive buffer content lock(s) are
3850  * continuously held from this point until the tuple update is visible).
3851  *
3852  * For the new tuple the only check needed is at the relation level, but
3853  * since both tuples are in the same relation and the check for oldtup
3854  * will include checking the relation level, there is no benefit to a
3855  * separate check for the new tuple.
3856  */
3857  CheckForSerializableConflictIn(relation, &oldtup.t_self,
3858  BufferGetBlockNumber(buffer));
3859 
3860  /*
3861  * At this point newbuf and buffer are both pinned and locked, and newbuf
3862  * has enough space for the new tuple. If they are the same buffer, only
3863  * one pin is held.
3864  */
3865 
3866  if (newbuf == buffer)
3867  {
3868  /*
3869  * Since the new tuple is going into the same page, we might be able
3870  * to do a HOT update. Check if any of the index columns have been
3871  * changed.
3872  */
3873  if (!bms_overlap(modified_attrs, hot_attrs))
3874  {
3875  use_hot_update = true;
3876 
3877  /*
3878  * If none of the columns that are used in hot-blocking indexes
3879  * were updated, we can apply HOT, but we do still need to check
3880  * if we need to update the summarizing indexes, and update those
3881  * indexes if the columns were updated, or we may fail to detect
3882  * e.g. value bound changes in BRIN minmax indexes.
3883  */
3884  if (bms_overlap(modified_attrs, sum_attrs))
3885  summarized_update = true;
3886  }
3887  }
3888  else
3889  {
3890  /* Set a hint that the old page could use prune/defrag */
3891  PageSetFull(page);
3892  }
3893 
3894  /*
3895  * Compute replica identity tuple before entering the critical section so
3896  * we don't PANIC upon a memory allocation failure.
3897  * ExtractReplicaIdentity() will return NULL if nothing needs to be
3898  * logged. Pass old key required as true only if the replica identity key
3899  * columns are modified or it has external data.
3900  */
3901  old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
3902  bms_overlap(modified_attrs, id_attrs) ||
3903  id_has_external,
3904  &old_key_copied);
3905 
3906  /* NO EREPORT(ERROR) from here till changes are logged */
3908 
3909  /*
3910  * If this transaction commits, the old tuple will become DEAD sooner or
3911  * later. Set flag that this page is a candidate for pruning once our xid
3912  * falls below the OldestXmin horizon. If the transaction finally aborts,
3913  * the subsequent page pruning will be a no-op and the hint will be
3914  * cleared.
3915  *
3916  * XXX Should we set hint on newbuf as well? If the transaction aborts,
3917  * there would be a prunable tuple in the newbuf; but for now we choose
3918  * not to optimize for aborts. Note that heap_xlog_update must be kept in
3919  * sync if this decision changes.
3920  */
3921  PageSetPrunable(page, xid);
3922 
3923  if (use_hot_update)
3924  {
3925  /* Mark the old tuple as HOT-updated */
3926  HeapTupleSetHotUpdated(&oldtup);
3927  /* And mark the new tuple as heap-only */
3928  HeapTupleSetHeapOnly(heaptup);
3929  /* Mark the caller's copy too, in case different from heaptup */
3930  HeapTupleSetHeapOnly(newtup);
3931  }
3932  else
3933  {
3934  /* Make sure tuples are correctly marked as not-HOT */
3935  HeapTupleClearHotUpdated(&oldtup);
3936  HeapTupleClearHeapOnly(heaptup);
3937  HeapTupleClearHeapOnly(newtup);
3938  }
3939 
3940  RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */
3941 
3942 
3943  /* Clear obsolete visibility flags, possibly set by ourselves above... */
3944  oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
3945  oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
3946  /* ... and store info about transaction updating this tuple */
3947  Assert(TransactionIdIsValid(xmax_old_tuple));
3948  HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple);
3949  oldtup.t_data->t_infomask |= infomask_old_tuple;
3950  oldtup.t_data->t_infomask2 |= infomask2_old_tuple;
3951  HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo);
3952 
3953  /* record address of new tuple in t_ctid of old one */
3954  oldtup.t_data->t_ctid = heaptup->t_self;
3955 
3956  /* clear PD_ALL_VISIBLE flags, reset all visibilitymap bits */
3957  if (PageIsAllVisible(BufferGetPage(buffer)))
3958  {
3959  all_visible_cleared = true;
3961  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
3962  vmbuffer, VISIBILITYMAP_VALID_BITS);
3963  }
3964  if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
3965  {
3966  all_visible_cleared_new = true;
3968  visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
3969  vmbuffer_new, VISIBILITYMAP_VALID_BITS);
3970  }
3971 
3972  if (newbuf != buffer)
3973  MarkBufferDirty(newbuf);
3974  MarkBufferDirty(buffer);
3975 
3976  /* XLOG stuff */
3977  if (RelationNeedsWAL(relation))
3978  {
3979  XLogRecPtr recptr;
3980 
3981  /*
3982  * For logical decoding we need combo CIDs to properly decode the
3983  * catalog.
3984  */
3986  {
3987  log_heap_new_cid(relation, &oldtup);
3988  log_heap_new_cid(relation, heaptup);
3989  }
3990 
3991  recptr = log_heap_update(relation, buffer,
3992  newbuf, &oldtup, heaptup,
3993  old_key_tuple,
3994  all_visible_cleared,
3995  all_visible_cleared_new);
3996  if (newbuf != buffer)
3997  {
3998  PageSetLSN(BufferGetPage(newbuf), recptr);
3999  }
4000  PageSetLSN(BufferGetPage(buffer), recptr);
4001  }
4002 
4003  END_CRIT_SECTION();
4004 
4005  if (newbuf != buffer)
4006  LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
4007  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
4008 
4009  /*
4010  * Mark old tuple for invalidation from system caches at next command
4011  * boundary, and mark the new tuple for invalidation in case we abort. We
4012  * have to do this before releasing the buffer because oldtup is in the
4013  * buffer. (heaptup is all in local memory, but it's necessary to process
4014  * both tuple versions in one call to inval.c so we can avoid redundant
4015  * sinval messages.)
4016  */
4017  CacheInvalidateHeapTuple(relation, &oldtup, heaptup);
4018 
4019  /* Now we can release the buffer(s) */
4020  if (newbuf != buffer)
4021  ReleaseBuffer(newbuf);
4022  ReleaseBuffer(buffer);
4023  if (BufferIsValid(vmbuffer_new))
4024  ReleaseBuffer(vmbuffer_new);
4025  if (BufferIsValid(vmbuffer))
4026  ReleaseBuffer(vmbuffer);
4027 
4028  /*
4029  * Release the lmgr tuple lock, if we had it.
4030  */
4031  if (have_tuple_lock)
4032  UnlockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
4033 
4034  pgstat_count_heap_update(relation, use_hot_update, newbuf != buffer);
4035 
4036  /*
4037  * If heaptup is a private copy, release it. Don't forget to copy t_self
4038  * back to the caller's image, too.
4039  */
4040  if (heaptup != newtup)
4041  {
4042  newtup->t_self = heaptup->t_self;
4043  heap_freetuple(heaptup);
4044  }
4045 
4046  /*
4047  * If it is a HOT update, the update may still need to update summarized
4048  * indexes, lest we fail to update those summaries and get incorrect
4049  * results (for example, minmax bounds of the block may change with this
4050  * update).
4051  */
4052  if (use_hot_update)
4053  {
4054  if (summarized_update)
4055  *update_indexes = TU_Summarizing;
4056  else
4057  *update_indexes = TU_None;
4058  }
4059  else
4060  *update_indexes = TU_All;
4061 
4062  if (old_key_tuple != NULL && old_key_copied)
4063  heap_freetuple(old_key_tuple);
4064 
4065  bms_free(hot_attrs);
4066  bms_free(sum_attrs);
4067  bms_free(key_attrs);
4068  bms_free(id_attrs);
4069  bms_free(modified_attrs);
4070  bms_free(interesting_attrs);
4071 
4072  return TM_Ok;
4073 }
void bms_free(Bitmapset *a)
Definition: bitmapset.c:239
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:917
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
static void PageSetFull(Page page)
Definition: bufpage.h:418
TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple)
Definition: heapam.c:7099
static Bitmapset * HeapDetermineColumnsInfo(Relation relation, Bitmapset *interesting_cols, Bitmapset *external_cols, HeapTuple oldtup, HeapTuple newtup, bool *has_external)
Definition: heapam.c:4131
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, bool all_visible_cleared, bool new_all_visible_cleared)
Definition: heapam.c:8357
HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
Definition: heaptoast.c:96
#define TOAST_TUPLE_THRESHOLD
Definition: heaptoast.h:48
#define HeapTupleSetHotUpdated(tuple)
Definition: htup_details.h:677
#define HEAP2_XACT_MASK
Definition: htup_details.h:279
#define HEAP_XMAX_LOCK_ONLY
Definition: htup_details.h:197
#define HeapTupleHeaderSetCmin(tup, cid)
Definition: htup_details.h:393
#define HEAP_XACT_MASK
Definition: htup_details.h:215
#define HeapTupleSetHeapOnly(tuple)
Definition: htup_details.h:686
#define HeapTupleClearHeapOnly(tuple)
Definition: htup_details.h:689
#define HEAP_UPDATED
Definition: htup_details.h:210
#define HEAP_XMAX_KEYSHR_LOCK
Definition: htup_details.h:194
#define HeapTupleClearHotUpdated(tuple)
Definition: htup_details.h:680
@ XLTW_Update
Definition: lmgr.h:27
void pgstat_count_heap_update(Relation rel, bool hot, bool newpage)
Bitmapset * RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind)
Definition: relcache.c:5244
@ INDEX_ATTR_BITMAP_KEY
Definition: relcache.h:61
@ INDEX_ATTR_BITMAP_HOT_BLOCKING
Definition: relcache.h:64
@ INDEX_ATTR_BITMAP_SUMMARIZED
Definition: relcache.h:65
@ INDEX_ATTR_BITMAP_IDENTITY_KEY
Definition: relcache.h:63
@ TU_Summarizing
Definition: tableam.h:126
@ TU_All
Definition: tableam.h:123
@ TU_None
Definition: tableam.h:120
bool TransactionIdDidAbort(TransactionId transactionId)
Definition: transam.c:188

References Assert, bms_add_members(), bms_free(), bms_overlap(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), BufferIsValid(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_lock::flags, GetCurrentTransactionId(), GetMultiXactIdHintBits(), HEAP2_XACT_MASK, heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_LOCKED_UPGRADED, HEAP_MOVED, heap_toast_insert_or_update(), HEAP_UPDATED, HEAP_XACT_MASK, HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_KEYSHR_LOCK, HEAP_XMAX_LOCK_ONLY, HeapDetermineColumnsInfo(), HeapTupleClearHeapOnly, HeapTupleClearHotUpdated, HeapTupleGetUpdateXid(), HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetNatts, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderSetCmax, HeapTupleHeaderSetCmin, HeapTupleHeaderSetXmax, HeapTupleHeaderSetXmin, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), HeapTupleSetHeapOnly, HeapTupleSetHotUpdated, INDEX_ATTR_BITMAP_HOT_BLOCKING, INDEX_ATTR_BITMAP_IDENTITY_KEY, INDEX_ATTR_BITMAP_KEY, INDEX_ATTR_BITMAP_SUMMARIZED, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, InvalidTransactionId, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockTupleNoKeyExclusive, LockWaitBlock, log_heap_new_cid(), log_heap_update(), MarkBufferDirty(), MAXALIGN, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, MultiXactStatusUpdate, xl_heap_lock::offnum, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetFull(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_update(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetBufferForTuple(), RelationGetIndexAttrBitmap(), RelationGetNumberOfAttributes, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TOAST_TUPLE_THRESHOLD, TransactionIdDidAbort(), TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TransactionIdIsValid, TU_All, TU_None, TU_Summarizing, UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Update, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_update(), and simple_heap_update().

◆ heap_vacuum_rel()

void heap_vacuum_rel ( Relation  rel,
struct VacuumParams params,
BufferAccessStrategy  bstrategy 
)

Definition at line 295 of file vacuumlazy.c.

297 {
298  LVRelState *vacrel;
299  bool verbose,
300  instrument,
301  skipwithvm,
302  frozenxid_updated,
303  minmulti_updated;
304  BlockNumber orig_rel_pages,
305  new_rel_pages,
306  new_rel_allvisible;
307  PGRUsage ru0;
308  TimestampTz starttime = 0;
309  PgStat_Counter startreadtime = 0,
310  startwritetime = 0;
311  WalUsage startwalusage = pgWalUsage;
312  BufferUsage startbufferusage = pgBufferUsage;
313  ErrorContextCallback errcallback;
314  char **indnames = NULL;
315 
316  verbose = (params->options & VACOPT_VERBOSE) != 0;
317  instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
318  params->log_min_duration >= 0));
319  if (instrument)
320  {
321  pg_rusage_init(&ru0);
322  starttime = GetCurrentTimestamp();
323  if (track_io_timing)
324  {
325  startreadtime = pgStatBlockReadTime;
326  startwritetime = pgStatBlockWriteTime;
327  }
328  }
329 
331  RelationGetRelid(rel));
332 
333  /*
334  * Setup error traceback support for ereport() first. The idea is to set
335  * up an error context callback to display additional information on any
336  * error during a vacuum. During different phases of vacuum, we update
337  * the state so that the error context callback always display current
338  * information.
339  *
340  * Copy the names of heap rel into local memory for error reporting
341  * purposes, too. It isn't always safe to assume that we can get the name
342  * of each rel. It's convenient for code in lazy_scan_heap to always use
343  * these temp copies.
344  */
345  vacrel = (LVRelState *) palloc0(sizeof(LVRelState));
348  vacrel->relname = pstrdup(RelationGetRelationName(rel));
349  vacrel->indname = NULL;
351  vacrel->verbose = verbose;
352  errcallback.callback = vacuum_error_callback;
353  errcallback.arg = vacrel;
354  errcallback.previous = error_context_stack;
355  error_context_stack = &errcallback;
356 
357  /* Set up high level stuff about rel and its indexes */
358  vacrel->rel = rel;
359  vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
360  &vacrel->indrels);
361  vacrel->bstrategy = bstrategy;
362  if (instrument && vacrel->nindexes > 0)
363  {
364  /* Copy index names used by instrumentation (not error reporting) */
365  indnames = palloc(sizeof(char *) * vacrel->nindexes);
366  for (int i = 0; i < vacrel->nindexes; i++)
367  indnames[i] = pstrdup(RelationGetRelationName(vacrel->indrels[i]));
368  }
369 
370  /*
371  * The index_cleanup param either disables index vacuuming and cleanup or
372  * forces it to go ahead when we would otherwise apply the index bypass
373  * optimization. The default is 'auto', which leaves the final decision
374  * up to lazy_vacuum().
375  *
376  * The truncate param allows user to avoid attempting relation truncation,
377  * though it can't force truncation to happen.
378  */
381  params->truncate != VACOPTVALUE_AUTO);
382 
383  /*
384  * While VacuumFailSafeActive is reset to false before calling this, we
385  * still need to reset it here due to recursive calls.
386  */
387  VacuumFailsafeActive = false;
388  vacrel->consider_bypass_optimization = true;
389  vacrel->do_index_vacuuming = true;
390  vacrel->do_index_cleanup = true;
391  vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
392  if (params->index_cleanup == VACOPTVALUE_DISABLED)
393  {
394  /* Force disable index vacuuming up-front */
395  vacrel->do_index_vacuuming = false;
396  vacrel->do_index_cleanup = false;
397  }
398  else if (params->index_cleanup == VACOPTVALUE_ENABLED)
399  {
400  /* Force index vacuuming. Note that failsafe can still bypass. */
401  vacrel->consider_bypass_optimization = false;
402  }
403  else
404  {
405  /* Default/auto, make all decisions dynamically */
407  }
408 
409  /* Initialize page counters explicitly (be tidy) */
410  vacrel->scanned_pages = 0;
411  vacrel->removed_pages = 0;
412  vacrel->frozen_pages = 0;
413  vacrel->lpdead_item_pages = 0;
414  vacrel->missed_dead_pages = 0;
415  vacrel->nonempty_pages = 0;
416  /* dead_items_alloc allocates vacrel->dead_items later on */
417 
418  /* Allocate/initialize output statistics state */
419  vacrel->new_rel_tuples = 0;
420  vacrel->new_live_tuples = 0;
421  vacrel->indstats = (IndexBulkDeleteResult **)
422  palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *));
423 
424  /* Initialize remaining counters (be tidy) */
425  vacrel->num_index_scans = 0;
426  vacrel->tuples_deleted = 0;
427  vacrel->tuples_frozen = 0;
428  vacrel->lpdead_items = 0;
429  vacrel->live_tuples = 0;
430  vacrel->recently_dead_tuples = 0;
431  vacrel->missed_dead_tuples = 0;
432 
433  /*
434  * Get cutoffs that determine which deleted tuples are considered DEAD,
435  * not just RECENTLY_DEAD, and which XIDs/MXIDs to freeze. Then determine
436  * the extent of the blocks that we'll scan in lazy_scan_heap. It has to
437  * happen in this order to ensure that the OldestXmin cutoff field works
438  * as an upper bound on the XIDs stored in the pages we'll actually scan
439  * (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
440  *
441  * Next acquire vistest, a related cutoff that's used in pruning. We use
442  * vistest in combination with OldestXmin to ensure that
443  * heap_page_prune_and_freeze() always removes any deleted tuple whose
444  * xmax is < OldestXmin. lazy_scan_prune must never become confused about
445  * whether a tuple should be frozen or removed. (In the future we might
446  * want to teach lazy_scan_prune to recompute vistest from time to time,
447  * to increase the number of dead tuples it can prune away.)
448  */
449  vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
450  vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
451  vacrel->vistest = GlobalVisTestFor(rel);
452  /* Initialize state used to track oldest extant XID/MXID */
453  vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
454  vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
455  vacrel->skippedallvis = false;
456  skipwithvm = true;
458  {
459  /*
460  * Force aggressive mode, and disable skipping blocks using the
461  * visibility map (even those set all-frozen)
462  */
463  vacrel->aggressive = true;
464  skipwithvm = false;
465  }
466 
467  vacrel->skipwithvm = skipwithvm;
468 
469  if (verbose)
470  {
471  if (vacrel->aggressive)
472  ereport(INFO,
473  (errmsg("aggressively vacuuming \"%s.%s.%s\"",
474  vacrel->dbname, vacrel->relnamespace,
475  vacrel->relname)));
476  else
477  ereport(INFO,
478  (errmsg("vacuuming \"%s.%s.%s\"",
479  vacrel->dbname, vacrel->relnamespace,
480  vacrel->relname)));
481  }
482 
483  /*
484  * Allocate dead_items memory using dead_items_alloc. This handles
485  * parallel VACUUM initialization as part of allocating shared memory
486  * space used for dead_items. (But do a failsafe precheck first, to
487  * ensure that parallel VACUUM won't be attempted at all when relfrozenxid
488  * is already dangerously old.)
489  */
491  dead_items_alloc(vacrel, params->nworkers);
492 
493  /*
494  * Call lazy_scan_heap to perform all required heap pruning, index
495  * vacuuming, and heap vacuuming (plus related processing)
496  */
497  lazy_scan_heap(vacrel);
498 
499  /*
500  * Free resources managed by dead_items_alloc. This ends parallel mode in
501  * passing when necessary.
502  */
503  dead_items_cleanup(vacrel);
505 
506  /*
507  * Update pg_class entries for each of rel's indexes where appropriate.
508  *
509  * Unlike the later update to rel's pg_class entry, this is not critical.
510  * Maintains relpages/reltuples statistics used by the planner only.
511  */
512  if (vacrel->do_index_cleanup)
514 
515  /* Done with rel's indexes */
516  vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock);
517 
518  /* Optionally truncate rel */
519  if (should_attempt_truncation(vacrel))
520  lazy_truncate_heap(vacrel);
521 
522  /* Pop the error context stack */
523  error_context_stack = errcallback.previous;
524 
525  /* Report that we are now doing final cleanup */
528 
529  /*
530  * Prepare to update rel's pg_class entry.
531  *
532  * Aggressive VACUUMs must always be able to advance relfrozenxid to a
533  * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff.
534  * Non-aggressive VACUUMs may advance them by any amount, or not at all.
535  */
536  Assert(vacrel->NewRelfrozenXid == vacrel->cutoffs.OldestXmin ||
538  vacrel->cutoffs.relfrozenxid,
539  vacrel->NewRelfrozenXid));
540  Assert(vacrel->NewRelminMxid == vacrel->cutoffs.OldestMxact ||
542  vacrel->cutoffs.relminmxid,
543  vacrel->NewRelminMxid));
544  if (vacrel->skippedallvis)
545  {
546  /*
547  * Must keep original relfrozenxid in a non-aggressive VACUUM that
548  * chose to skip an all-visible page range. The state that tracks new
549  * values will have missed unfrozen XIDs from the pages we skipped.
550  */
551  Assert(!vacrel->aggressive);
554  }
555 
556  /*
557  * For safety, clamp relallvisible to be not more than what we're setting
558  * pg_class.relpages to
559  */
560  new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */
561  visibilitymap_count(rel, &new_rel_allvisible, NULL);
562  if (new_rel_allvisible > new_rel_pages)
563  new_rel_allvisible = new_rel_pages;
564 
565  /*
566  * Now actually update rel's pg_class entry.
567  *
568  * In principle new_live_tuples could be -1 indicating that we (still)
569  * don't know the tuple count. In practice that can't happen, since we
570  * scan every page that isn't skipped using the visibility map.
571  */
572  vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
573  new_rel_allvisible, vacrel->nindexes > 0,
574  vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
575  &frozenxid_updated, &minmulti_updated, false);
576 
577  /*
578  * Report results to the cumulative stats system, too.
579  *
580  * Deliberately avoid telling the stats system about LP_DEAD items that
581  * remain in the table due to VACUUM bypassing index and heap vacuuming.
582  * ANALYZE will consider the remaining LP_DEAD items to be dead "tuples".
583  * It seems like a good idea to err on the side of not vacuuming again too
584  * soon in cases where the failsafe prevented significant amounts of heap
585  * vacuuming.
586  */
588  rel->rd_rel->relisshared,
589  Max(vacrel->new_live_tuples, 0),
590  vacrel->recently_dead_tuples +
591  vacrel->missed_dead_tuples);
593 
594  if (instrument)
595  {
596  TimestampTz endtime = GetCurrentTimestamp();
597 
598  if (verbose || params->log_min_duration == 0 ||
599  TimestampDifferenceExceeds(starttime, endtime,
600  params->log_min_duration))
601  {
602  long secs_dur;
603  int usecs_dur;
604  WalUsage walusage;
605  BufferUsage bufferusage;
607  char *msgfmt;
608  int32 diff;
609  double read_rate = 0,
610  write_rate = 0;
611 
612  TimestampDifference(starttime, endtime, &secs_dur, &usecs_dur);
613  memset(&walusage, 0, sizeof(WalUsage));
614  WalUsageAccumDiff(&walusage, &pgWalUsage, &startwalusage);
615  memset(&bufferusage, 0, sizeof(BufferUsage));
616  BufferUsageAccumDiff(&bufferusage, &pgBufferUsage, &startbufferusage);
617 
619  if (verbose)
620  {
621  /*
622  * Aggressiveness already reported earlier, in dedicated
623  * VACUUM VERBOSE ereport
624  */
625  Assert(!params->is_wraparound);
626  msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
627  }
628  else if (params->is_wraparound)
629  {
630  /*
631  * While it's possible for a VACUUM to be both is_wraparound
632  * and !aggressive, that's just a corner-case -- is_wraparound
633  * implies aggressive. Produce distinct output for the corner
634  * case all the same, just in case.
635  */
636  if (vacrel->aggressive)
637  msgfmt = _("automatic aggressive vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
638  else
639  msgfmt = _("automatic vacuum to prevent wraparound of table \"%s.%s.%s\": index scans: %d\n");
640  }
641  else
642  {
643  if (vacrel->aggressive)
644  msgfmt = _("automatic aggressive vacuum of table \"%s.%s.%s\": index scans: %d\n");
645  else
646  msgfmt = _("automatic vacuum of table \"%s.%s.%s\": index scans: %d\n");
647  }
648  appendStringInfo(&buf, msgfmt,
649  vacrel->dbname,
650  vacrel->relnamespace,
651  vacrel->relname,
652  vacrel->num_index_scans);
653  appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
654  vacrel->removed_pages,
655  new_rel_pages,
656  vacrel->scanned_pages,
657  orig_rel_pages == 0 ? 100.0 :
658  100.0 * vacrel->scanned_pages / orig_rel_pages);
660  _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
661  (long long) vacrel->tuples_deleted,
662  (long long) vacrel->new_rel_tuples,
663  (long long) vacrel->recently_dead_tuples);
664  if (vacrel->missed_dead_tuples > 0)
666  _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"),
667  (long long) vacrel->missed_dead_tuples,
668  vacrel->missed_dead_pages);
669  diff = (int32) (ReadNextTransactionId() -
670  vacrel->cutoffs.OldestXmin);
672  _("removable cutoff: %u, which was %d XIDs old when operation ended\n"),
673  vacrel->cutoffs.OldestXmin, diff);
674  if (frozenxid_updated)
675  {
676  diff = (int32) (vacrel->NewRelfrozenXid -
677  vacrel->cutoffs.relfrozenxid);
679  _("new relfrozenxid: %u, which is %d XIDs ahead of previous value\n"),
680  vacrel->NewRelfrozenXid, diff);
681  }
682  if (minmulti_updated)
683  {
684  diff = (int32) (vacrel->NewRelminMxid -
685  vacrel->cutoffs.relminmxid);
687  _("new relminmxid: %u, which is %d MXIDs ahead of previous value\n"),
688  vacrel->NewRelminMxid, diff);
689  }
690  appendStringInfo(&buf, _("frozen: %u pages from table (%.2f%% of total) had %lld tuples frozen\n"),
691  vacrel->frozen_pages,
692  orig_rel_pages == 0 ? 100.0 :
693  100.0 * vacrel->frozen_pages / orig_rel_pages,
694  (long long) vacrel->tuples_frozen);
695  if (vacrel->do_index_vacuuming)
696  {
697  if (vacrel->nindexes == 0 || vacrel->num_index_scans == 0)
698  appendStringInfoString(&buf, _("index scan not needed: "));
699  else
700  appendStringInfoString(&buf, _("index scan needed: "));
701 
702  msgfmt = _("%u pages from table (%.2f%% of total) had %lld dead item identifiers removed\n");
703  }
704  else
705  {
707  appendStringInfoString(&buf, _("index scan bypassed: "));
708  else
709  appendStringInfoString(&buf, _("index scan bypassed by failsafe: "));
710 
711  msgfmt = _("%u pages from table (%.2f%% of total) have %lld dead item identifiers\n");
712  }
713  appendStringInfo(&buf, msgfmt,
714  vacrel->lpdead_item_pages,
715  orig_rel_pages == 0 ? 100.0 :
716  100.0 * vacrel->lpdead_item_pages / orig_rel_pages,
717  (long long) vacrel->lpdead_items);
718  for (int i = 0; i < vacrel->nindexes; i++)
719  {
720  IndexBulkDeleteResult *istat = vacrel->indstats[i];
721 
722  if (!istat)
723  continue;
724 
726  _("index \"%s\": pages: %u in total, %u newly deleted, %u currently deleted, %u reusable\n"),
727  indnames[i],
728  istat->num_pages,
729  istat->pages_newly_deleted,
730  istat->pages_deleted,
731  istat->pages_free);
732  }
733  if (track_io_timing)
734  {
735  double read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
736  double write_ms = (double) (pgStatBlockWriteTime - startwritetime) / 1000;
737 
738  appendStringInfo(&buf, _("I/O timings: read: %.3f ms, write: %.3f ms\n"),
739  read_ms, write_ms);
740  }
741  if (secs_dur > 0 || usecs_dur > 0)
742  {
743  read_rate = (double) BLCKSZ * (bufferusage.shared_blks_read + bufferusage.local_blks_read) /
744  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
745  write_rate = (double) BLCKSZ * (bufferusage.shared_blks_dirtied + bufferusage.local_blks_dirtied) /
746  (1024 * 1024) / (secs_dur + usecs_dur / 1000000.0);
747  }
748  appendStringInfo(&buf, _("avg read rate: %.3f MB/s, avg write rate: %.3f MB/s\n"),
749  read_rate, write_rate);
751  _("buffer usage: %lld hits, %lld misses, %lld dirtied\n"),
752  (long long) (bufferusage.shared_blks_hit + bufferusage.local_blks_hit),
753  (long long) (bufferusage.shared_blks_read + bufferusage.local_blks_read),
754  (long long) (bufferusage.shared_blks_dirtied + bufferusage.local_blks_dirtied));
756  _("WAL usage: %lld records, %lld full page images, %llu bytes\n"),
757  (long long) walusage.wal_records,
758  (long long) walusage.wal_fpi,
759  (unsigned long long) walusage.wal_bytes);
760  appendStringInfo(&buf, _("system usage: %s"), pg_rusage_show(&ru0));
761 
762  ereport(verbose ? INFO : LOG,
763  (errmsg_internal("%s", buf.data)));
764  pfree(buf.data);
765  }
766  }
767 
768  /* Cleanup index statistics and index names */
769  for (int i = 0; i < vacrel->nindexes; i++)
770  {
771  if (vacrel->indstats[i])
772  pfree(vacrel->indstats[i]);
773 
774  if (instrument)
775  pfree(indnames[i]);
776  }
777 }
void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, long *secs, int *microsecs)
Definition: timestamp.c:1731
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1791
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1655
void pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
void pgstat_progress_update_param(int index, int64 val)
void pgstat_progress_end_command(void)
@ PROGRESS_COMMAND_VACUUM
bool track_io_timing
Definition: bufmgr.c:170
#define RelationGetNumberOfBlocks(reln)
Definition: bufmgr.h:273
signed int int32
Definition: c.h:494
int64 TimestampTz
Definition: timestamp.h:39
char * get_database_name(Oid dbid)
Definition: dbcommands.c:3166
ErrorContextCallback * error_context_stack
Definition: elog.c:94
#define _(x)
Definition: elog.c:90
#define LOG
Definition: elog.h:31
#define INFO
Definition: elog.h:34
Oid MyDatabaseId
Definition: globals.c:92
int verbose
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:286
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
#define NoLock
Definition: lockdefs.h:34
#define RowExclusiveLock
Definition: lockdefs.h:38
char * get_namespace_name(Oid nspid)
Definition: lsyscache.c:3366
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void * palloc0(Size size)
Definition: mcxt.c:1347
#define AmAutoVacuumWorkerProcess()
Definition: miscadmin.h:375
const char * pg_rusage_show(const PGRUsage *ru0)
Definition: pg_rusage.c:40
void pg_rusage_init(PGRUsage *ru0)
Definition: pg_rusage.c:27
int64 PgStat_Counter
Definition: pgstat.h:89
PgStat_Counter pgStatBlockReadTime
PgStat_Counter pgStatBlockWriteTime
void pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter livetuples, PgStat_Counter deadtuples)
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP
Definition: progress.h:38
#define PROGRESS_VACUUM_PHASE
Definition: progress.h:21
#define RelationGetNamespace(relation)
Definition: rel.h:546
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
int64 shared_blks_read
Definition: instrument.h:27
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
struct ErrorContextCallback * previous
Definition: elog.h:295
void(* callback)(void *arg)
Definition: elog.h:296
BlockNumber pages_deleted
Definition: genam.h:82
BlockNumber pages_newly_deleted
Definition: genam.h:81
BlockNumber pages_free
Definition: genam.h:83
BlockNumber num_pages
Definition: genam.h:77
bool verbose
Definition: vacuumlazy.c:175
int nindexes
Definition: vacuumlazy.c:141
int64 tuples_deleted
Definition: vacuumlazy.c:207
BlockNumber nonempty_pages
Definition: vacuumlazy.c:196
bool do_rel_truncate
Definition: vacuumlazy.c:157
BlockNumber scanned_pages
Definition: vacuumlazy.c:191
bool aggressive
Definition: vacuumlazy.c:148
GlobalVisState * vistest
Definition: vacuumlazy.c:161
BlockNumber removed_pages
Definition: vacuumlazy.c:192
int num_index_scans
Definition: vacuumlazy.c:205
IndexBulkDeleteResult ** indstats
Definition: vacuumlazy.c:202
double new_live_tuples
Definition: vacuumlazy.c:200
double new_rel_tuples
Definition: vacuumlazy.c:199
TransactionId NewRelfrozenXid
Definition: vacuumlazy.c:163
Relation rel
Definition: vacuumlazy.c:139
bool consider_bypass_optimization
Definition: vacuumlazy.c:152
BlockNumber rel_pages
Definition: vacuumlazy.c:190
int64 recently_dead_tuples
Definition: vacuumlazy.c:211
int64 tuples_frozen
Definition: vacuumlazy.c:208
BlockNumber frozen_pages
Definition: vacuumlazy.c:193
char * dbname
Definition: vacuumlazy.c:168
BlockNumber missed_dead_pages
Definition: vacuumlazy.c:195
char * relnamespace
Definition: vacuumlazy.c:169
int64 live_tuples
Definition: vacuumlazy.c:210
int64 lpdead_items
Definition: vacuumlazy.c:209
BufferAccessStrategy bstrategy
Definition: vacuumlazy.c:144
bool skippedallvis
Definition: vacuumlazy.c:165
BlockNumber lpdead_item_pages
Definition: vacuumlazy.c:194
Relation * indrels
Definition: vacuumlazy.c:140
bool skipwithvm
Definition: vacuumlazy.c:150
bool do_index_cleanup
Definition: vacuumlazy.c:156
MultiXactId NewRelminMxid
Definition: vacuumlazy.c:164
int64 missed_dead_tuples
Definition: vacuumlazy.c:212
struct VacuumCutoffs cutoffs
Definition: vacuumlazy.c:160
char * relname
Definition: vacuumlazy.c:170
VacErrPhase phase
Definition: vacuumlazy.c:174
char * indname
Definition: vacuumlazy.c:171
bool do_index_vacuuming
Definition: vacuumlazy.c:155
int nworkers
Definition: vacuum.h:239
VacOptValue truncate
Definition: vacuum.h:231
bits32 options
Definition: vacuum.h:219
bool is_wraparound
Definition: vacuum.h:226
int log_min_duration
Definition: vacuum.h:227
VacOptValue index_cleanup
Definition: vacuum.h:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_records
Definition: instrument.h:53
static TransactionId ReadNextTransactionId(void)
Definition: transam.h:315
void vac_open_indexes(Relation relation, LOCKMODE lockmode, int *nindexes, Relation **Irel)
Definition: vacuum.c:2276
void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact)
Definition: vacuum.c:1398
void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
Definition: vacuum.c:2319
bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs)
Definition: vacuum.c:1072
bool VacuumFailsafeActive
Definition: vacuum.c:96
#define VACOPT_VERBOSE
Definition: vacuum.h:182
@ VACOPTVALUE_AUTO
Definition: vacuum.h:203
@ VACOPTVALUE_ENABLED
Definition: vacuum.h:205
@ VACOPTVALUE_UNSPECIFIED
Definition: vacuum.h:202
@ VACOPTVALUE_DISABLED
Definition: vacuum.h:204
#define VACOPT_DISABLE_PAGE_SKIPPING
Definition: vacuum.h:188
static void dead_items_cleanup(LVRelState *vacrel)
Definition: vacuumlazy.c:2932
static void update_relstats_all_indexes(LVRelState *vacrel)
Definition: vacuumlazy.c:3073
static void vacuum_error_callback(void *arg)
Definition: vacuumlazy.c:3108
static void lazy_truncate_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:2549
static bool should_attempt_truncation(LVRelState *vacrel)
Definition: vacuumlazy.c:2529
@ VACUUM_ERRCB_PHASE_UNKNOWN
Definition: vacuumlazy.c:128
static void lazy_scan_heap(LVRelState *vacrel)
Definition: vacuumlazy.c:816
static bool lazy_check_wraparound_failsafe(LVRelState *vacrel)
Definition: vacuumlazy.c:2299
static void dead_items_alloc(LVRelState *vacrel, int nworkers)
Definition: vacuumlazy.c:2822
void visibilitymap_count(Relation rel, BlockNumber *all_visible, BlockNumber *all_frozen)

References _, LVRelState::aggressive, AmAutoVacuumWorkerProcess, appendStringInfo(), appendStringInfoString(), ErrorContextCallback::arg, Assert, LVRelState::bstrategy, buf, BufferUsageAccumDiff(), ErrorContextCallback::callback, LVRelState::consider_bypass_optimization, LVRelState::cutoffs, LVRelState::dbname, dead_items_alloc(), dead_items_cleanup(), LVRelState::do_index_cleanup, LVRelState::do_index_vacuuming, LVRelState::do_rel_truncate, ereport, errmsg(), errmsg_internal(), error_context_stack, VacuumCutoffs::FreezeLimit, LVRelState::frozen_pages, get_database_name(), get_namespace_name(), GetCurrentTimestamp(), GlobalVisTestFor(), i, VacuumParams::index_cleanup, LVRelState::indname, LVRelState::indrels, LVRelState::indstats, INFO, initStringInfo(), InvalidMultiXactId, InvalidTransactionId, VacuumParams::is_wraparound, IsInParallelMode(), lazy_check_wraparound_failsafe(), lazy_scan_heap(), lazy_truncate_heap(), LVRelState::live_tuples, BufferUsage::local_blks_dirtied, BufferUsage::local_blks_hit, BufferUsage::local_blks_read, LOG, VacuumParams::log_min_duration, LVRelState::lpdead_item_pages, LVRelState::lpdead_items, Max, LVRelState::missed_dead_pages, LVRelState::missed_dead_tuples, VacuumCutoffs::MultiXactCutoff, MultiXactIdPrecedesOrEquals(), MyDatabaseId, LVRelState::new_live_tuples, LVRelState::new_rel_tuples, LVRelState::NewRelfrozenXid, LVRelState::NewRelminMxid, LVRelState::nindexes, NoLock, LVRelState::nonempty_pages, LVRelState::num_index_scans, IndexBulkDeleteResult::num_pages, VacuumParams::nworkers, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumParams::options, IndexBulkDeleteResult::pages_deleted, IndexBulkDeleteResult::pages_free, IndexBulkDeleteResult::pages_newly_deleted, palloc(), palloc0(), pfree(), pg_rusage_init(), pg_rusage_show(), pgBufferUsage, pgstat_progress_end_command(), pgstat_progress_start_command(), pgstat_progress_update_param(), pgstat_report_vacuum(), pgStatBlockReadTime, pgStatBlockWriteTime, pgWalUsage, LVRelState::phase, ErrorContextCallback::previous, PROGRESS_COMMAND_VACUUM, PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP, pstrdup(), RelationData::rd_rel, ReadNextTransactionId(), LVRelState::recently_dead_tuples, LVRelState::rel, LVRelState::rel_pages, RelationGetNamespace, RelationGetNumberOfBlocks, RelationGetRelationName, RelationGetRelid, VacuumCutoffs::relfrozenxid, VacuumCutoffs::relminmxid, LVRelState::relname, LVRelState::relnamespace, LVRelState::removed_pages, RowExclusiveLock, LVRelState::scanned_pages, BufferUsage::shared_blks_dirtied, BufferUsage::shared_blks_hit, BufferUsage::shared_blks_read, should_attempt_truncation(), LVRelState::skippedallvis, LVRelState::skipwithvm, TimestampDifference(), TimestampDifferenceExceeds(), track_io_timing, TransactionIdPrecedesOrEquals(), VacuumParams::truncate, LVRelState::tuples_deleted, LVRelState::tuples_frozen, update_relstats_all_indexes(), vac_close_indexes(), vac_open_indexes(), vac_update_relstats(), VACOPT_DISABLE_PAGE_SKIPPING, VACOPT_VERBOSE, VACOPTVALUE_AUTO, VACOPTVALUE_DISABLED, VACOPTVALUE_ENABLED, VACOPTVALUE_UNSPECIFIED, VACUUM_ERRCB_PHASE_UNKNOWN, vacuum_error_callback(), vacuum_get_cutoffs(), VacuumFailsafeActive, LVRelState::verbose, verbose, visibilitymap_count(), LVRelState::vistest, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_records, and WalUsageAccumDiff().

◆ HeapCheckForSerializableConflictOut()

void HeapCheckForSerializableConflictOut ( bool  visible,
Relation  relation,
HeapTuple  tuple,
Buffer  buffer,
Snapshot  snapshot 
)

Definition at line 10061 of file heapam.c.

10064 {
10065  TransactionId xid;
10066  HTSV_Result htsvResult;
10067 
10068  if (!CheckForSerializableConflictOutNeeded(relation, snapshot))
10069  return;
10070 
10071  /*
10072  * Check to see whether the tuple has been written to by a concurrent
10073  * transaction, either to create it not visible to us, or to delete it
10074  * while it is visible to us. The "visible" bool indicates whether the
10075  * tuple is visible to us, while HeapTupleSatisfiesVacuum checks what else
10076  * is going on with it.
10077  *
10078  * In the event of a concurrently inserted tuple that also happens to have
10079  * been concurrently updated (by a separate transaction), the xmin of the
10080  * tuple will be used -- not the updater's xid.
10081  */
10082  htsvResult = HeapTupleSatisfiesVacuum(tuple, TransactionXmin, buffer);
10083  switch (htsvResult)
10084  {
10085  case HEAPTUPLE_LIVE:
10086  if (visible)
10087  return;
10088  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10089  break;
10092  if (visible)
10093  xid = HeapTupleHeaderGetUpdateXid(tuple->t_data);
10094  else
10095  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10096 
10098  {
10099  /* This is like the HEAPTUPLE_DEAD case */
10100  Assert(!visible);
10101  return;
10102  }
10103  break;
10105  xid = HeapTupleHeaderGetXmin(tuple->t_data);
10106  break;
10107  case HEAPTUPLE_DEAD:
10108  Assert(!visible);
10109  return;
10110  default:
10111 
10112  /*
10113  * The only way to get to this default clause is if a new value is
10114  * added to the enum type without adding it to this switch
10115  * statement. That's a bug, so elog.
10116  */
10117  elog(ERROR, "unrecognized return value from HeapTupleSatisfiesVacuum: %u", htsvResult);
10118 
10119  /*
10120  * In spite of having all enum values covered and calling elog on
10121  * this default, some compilers think this is a code path which
10122  * allows xid to be used below without initialization. Silence
10123  * that warning.
10124  */
10125  xid = InvalidTransactionId;
10126  }
10127 
10130 
10131  /*
10132  * Find top level xid. Bail out if xid is too early to be a conflict, or
10133  * if it's our own xid.
10134  */
10136  return;
10137  xid = SubTransGetTopmostTransaction(xid);
10139  return;
10140 
10141  CheckForSerializableConflictOut(relation, xid, snapshot);
10142 }
HTSV_Result HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
void CheckForSerializableConflictOut(Relation relation, TransactionId xid, Snapshot snapshot)
Definition: predicate.c:4008
TransactionId SubTransGetTopmostTransaction(TransactionId xid)
Definition: subtrans.c:163
bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:329
TransactionId GetTopTransactionIdIfAny(void)
Definition: xact.c:439

References Assert, CheckForSerializableConflictOut(), CheckForSerializableConflictOutNeeded(), elog, ERROR, GetTopTransactionIdIfAny(), HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleSatisfiesVacuum(), InvalidTransactionId, SubTransGetTopmostTransaction(), HeapTupleData::t_data, TransactionIdEquals, TransactionIdFollowsOrEquals(), TransactionIdIsValid, TransactionIdPrecedes(), and TransactionXmin.

Referenced by heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heapam_scan_bitmap_next_block(), heapam_scan_sample_next_tuple(), heapgettup(), and page_collect_tuples().

◆ HeapTupleHeaderIsOnlyLocked()

bool HeapTupleHeaderIsOnlyLocked ( HeapTupleHeader  tuple)

Definition at line 1520 of file heapam_visibility.c.

1521 {
1522  TransactionId xmax;
1523 
1524  /* if there's no valid Xmax, then there's obviously no update either */
1525  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1526  return true;
1527 
1528  if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
1529  return true;
1530 
1531  /* invalid xmax means no update */
1533  return true;
1534 
1535  /*
1536  * if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
1537  * necessarily have been updated
1538  */
1539  if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
1540  return false;
1541 
1542  /* ... but if it's a multi, then perhaps the updating Xid aborted. */
1543  xmax = HeapTupleGetUpdateXid(tuple);
1544 
1545  /* not LOCKED_ONLY, so it has to have an xmax */
1547 
1549  return false;
1550  if (TransactionIdIsInProgress(xmax))
1551  return false;
1552  if (TransactionIdDidCommit(xmax))
1553  return false;
1554 
1555  /*
1556  * not current, not in progress, not committed -- must have aborted or
1557  * crashed
1558  */
1559  return true;
1560 }
bool TransactionIdIsInProgress(TransactionId xid)
Definition: procarray.c:1402

References Assert, HEAP_XMAX_INVALID, HEAP_XMAX_IS_MULTI, HEAP_XMAX_LOCK_ONLY, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderData::t_infomask, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_get_latest_tid(), heap_lock_tuple(), heap_lock_updated_tuple_rec(), HeapTupleSatisfiesVacuumHorizon(), and rewrite_heap_tuple().

◆ HeapTupleIsSurelyDead()

bool HeapTupleIsSurelyDead ( HeapTuple  htup,
struct GlobalVisState vistest 
)

Definition at line 1465 of file heapam_visibility.c.

1466 {
1467  HeapTupleHeader tuple = htup->t_data;
1468 
1469  Assert(ItemPointerIsValid(&htup->t_self));
1470  Assert(htup->t_tableOid != InvalidOid);
1471 
1472  /*
1473  * If the inserting transaction is marked invalid, then it aborted, and
1474  * the tuple is definitely dead. If it's marked neither committed nor
1475  * invalid, then we assume it's still alive (since the presumption is that
1476  * all relevant hint bits were just set moments ago).
1477  */
1478  if (!HeapTupleHeaderXminCommitted(tuple))
1479  return HeapTupleHeaderXminInvalid(tuple);
1480 
1481  /*
1482  * If the inserting transaction committed, but any deleting transaction
1483  * aborted, the tuple is still alive.
1484  */
1485  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1486  return false;
1487 
1488  /*
1489  * If the XMAX is just a lock, the tuple is still alive.
1490  */
1492  return false;
1493 
1494  /*
1495  * If the Xmax is a MultiXact, it might be dead or alive, but we cannot
1496  * know without checking pg_multixact.
1497  */
1498  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1499  return false;
1500 
1501  /* If deleter isn't known to have committed, assume it's still running. */
1502  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1503  return false;
1504 
1505  /* Deleter committed, so tuple is dead if the XID is old enough. */
1506  return GlobalVisTestIsRemovableXid(vistest,
1507  HeapTupleHeaderGetRawXmax(tuple));
1508 }
#define HeapTupleHeaderXminCommitted(tup)
Definition: htup_details.h:320
#define HeapTupleHeaderXminInvalid(tup)
Definition: htup_details.h:325
#define InvalidOid
Definition: postgres_ext.h:36

References Assert, GlobalVisTestIsRemovableXid(), HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHeaderGetRawXmax, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, ItemPointerIsValid(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_hot_search_buffer().

◆ HeapTupleSatisfiesUpdate()

TM_Result HeapTupleSatisfiesUpdate ( HeapTuple  htup,
CommandId  curcid,
Buffer  buffer 
)

Definition at line 458 of file heapam_visibility.c.

460 {
461  HeapTupleHeader tuple = htup->t_data;
462 
464  Assert(htup->t_tableOid != InvalidOid);
465 
466  if (!HeapTupleHeaderXminCommitted(tuple))
467  {
468  if (HeapTupleHeaderXminInvalid(tuple))
469  return TM_Invisible;
470 
471  /* Used by pre-9.0 binary upgrades */
472  if (tuple->t_infomask & HEAP_MOVED_OFF)
473  {
475 
477  return TM_Invisible;
478  if (!TransactionIdIsInProgress(xvac))
479  {
480  if (TransactionIdDidCommit(xvac))
481  {
482  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
484  return TM_Invisible;
485  }
486  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
488  }
489  }
490  /* Used by pre-9.0 binary upgrades */
491  else if (tuple->t_infomask & HEAP_MOVED_IN)
492  {
494 
496  {
497  if (TransactionIdIsInProgress(xvac))
498  return TM_Invisible;
499  if (TransactionIdDidCommit(xvac))
500  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
502  else
503  {
504  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
506  return TM_Invisible;
507  }
508  }
509  }
511  {
512  if (HeapTupleHeaderGetCmin(tuple) >= curcid)
513  return TM_Invisible; /* inserted after scan started */
514 
515  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
516  return TM_Ok;
517 
519  {
520  TransactionId xmax;
521 
522  xmax = HeapTupleHeaderGetRawXmax(tuple);
523 
524  /*
525  * Careful here: even though this tuple was created by our own
526  * transaction, it might be locked by other transactions, if
527  * the original version was key-share locked when we updated
528  * it.
529  */
530 
531  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
532  {
533  if (MultiXactIdIsRunning(xmax, true))
534  return TM_BeingModified;
535  else
536  return TM_Ok;
537  }
538 
539  /*
540  * If the locker is gone, then there is nothing of interest
541  * left in this Xmax; otherwise, report the tuple as
542  * locked/updated.
543  */
544  if (!TransactionIdIsInProgress(xmax))
545  return TM_Ok;
546  return TM_BeingModified;
547  }
548 
549  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
550  {
551  TransactionId xmax;
552 
553  xmax = HeapTupleGetUpdateXid(tuple);
554 
555  /* not LOCKED_ONLY, so it has to have an xmax */
557 
558  /* deleting subtransaction must have aborted */
560  {
562  false))
563  return TM_BeingModified;
564  return TM_Ok;
565  }
566  else
567  {
568  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
569  return TM_SelfModified; /* updated after scan started */
570  else
571  return TM_Invisible; /* updated before scan started */
572  }
573  }
574 
576  {
577  /* deleting subtransaction must have aborted */
578  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
580  return TM_Ok;
581  }
582 
583  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
584  return TM_SelfModified; /* updated after scan started */
585  else
586  return TM_Invisible; /* updated before scan started */
587  }
589  return TM_Invisible;
591  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
593  else
594  {
595  /* it must have aborted or crashed */
596  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
598  return TM_Invisible;
599  }
600  }
601 
602  /* by here, the inserting transaction has committed */
603 
604  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
605  return TM_Ok;
606 
607  if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
608  {
610  return TM_Ok;
611  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
612  return TM_Updated; /* updated by other */
613  else
614  return TM_Deleted; /* deleted by other */
615  }
616 
617  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
618  {
619  TransactionId xmax;
620 
621  if (HEAP_LOCKED_UPGRADED(tuple->t_infomask))
622  return TM_Ok;
623 
625  {
627  return TM_BeingModified;
628 
630  return TM_Ok;
631  }
632 
633  xmax = HeapTupleGetUpdateXid(tuple);
634  if (!TransactionIdIsValid(xmax))
635  {
637  return TM_BeingModified;
638  }
639 
640  /* not LOCKED_ONLY, so it has to have an xmax */
642 
644  {
645  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
646  return TM_SelfModified; /* updated after scan started */
647  else
648  return TM_Invisible; /* updated before scan started */
649  }
650 
652  return TM_BeingModified;
653 
654  if (TransactionIdDidCommit(xmax))
655  {
656  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
657  return TM_Updated;
658  else
659  return TM_Deleted;
660  }
661 
662  /*
663  * By here, the update in the Xmax is either aborted or crashed, but
664  * what about the other members?
665  */
666 
668  {
669  /*
670  * There's no member, even just a locker, alive anymore, so we can
671  * mark the Xmax as invalid.
672  */
673  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
675  return TM_Ok;
676  }
677  else
678  {
679  /* There are lockers running */
680  return TM_BeingModified;
681  }
682  }
683 
685  {
687  return TM_BeingModified;
688  if (HeapTupleHeaderGetCmax(tuple) >= curcid)
689  return TM_SelfModified; /* updated after scan started */
690  else
691  return TM_Invisible; /* updated before scan started */
692  }
693 
695  return TM_BeingModified;
696 
698  {
699  /* it must have aborted or crashed */
700  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
702  return TM_Ok;
703  }
704 
705  /* xmax transaction committed */
706 
708  {
709  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
711  return TM_Ok;
712  }
713 
714  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
716  if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid))
717  return TM_Updated; /* updated by other */
718  else
719  return TM_Deleted; /* deleted by other */
720 }
CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup)
Definition: combocid.c:104
static void SetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
#define HEAP_XMIN_COMMITTED
Definition: htup_details.h:204
#define HEAP_MOVED_IN
Definition: htup_details.h:212
#define HEAP_XMIN_INVALID
Definition: htup_details.h:205
bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly)
Definition: multixact.c:598

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HeapTupleGetUpdateXid(), HeapTupleHeaderGetCmax(), HeapTupleHeaderGetCmin(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetXvac, HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerEquals(), ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_delete(), heap_lock_tuple(), heap_update(), and pgrowlocks().

◆ HeapTupleSatisfiesVacuum()

HTSV_Result HeapTupleSatisfiesVacuum ( HeapTuple  htup,
TransactionId  OldestXmin,
Buffer  buffer 
)

Definition at line 1162 of file heapam_visibility.c.

1164 {
1165  TransactionId dead_after = InvalidTransactionId;
1166  HTSV_Result res;
1167 
1168  res = HeapTupleSatisfiesVacuumHorizon(htup, buffer, &dead_after);
1169 
1171  {
1172  Assert(TransactionIdIsValid(dead_after));
1173 
1174  if (TransactionIdPrecedes(dead_after, OldestXmin))
1175  res = HEAPTUPLE_DEAD;
1176  }
1177  else
1178  Assert(!TransactionIdIsValid(dead_after));
1179 
1180  return res;
1181 }
HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *dead_after)

References Assert, HEAPTUPLE_DEAD, HEAPTUPLE_RECENTLY_DEAD, HeapTupleSatisfiesVacuumHorizon(), InvalidTransactionId, res, TransactionIdIsValid, and TransactionIdPrecedes().

Referenced by heap_page_is_all_visible(), heapam_index_build_range_scan(), heapam_relation_copy_for_cluster(), heapam_scan_analyze_next_tuple(), HeapCheckForSerializableConflictOut(), lazy_scan_noprune(), statapprox_heap(), and tuple_all_visible().

◆ HeapTupleSatisfiesVacuumHorizon()

HTSV_Result HeapTupleSatisfiesVacuumHorizon ( HeapTuple  htup,
Buffer  buffer,
TransactionId dead_after 
)

Definition at line 1196 of file heapam_visibility.c.

1197 {
1198  HeapTupleHeader tuple = htup->t_data;
1199 
1200  Assert(ItemPointerIsValid(&htup->t_self));
1201  Assert(htup->t_tableOid != InvalidOid);
1202  Assert(dead_after != NULL);
1203 
1204  *dead_after = InvalidTransactionId;
1205 
1206  /*
1207  * Has inserting transaction committed?
1208  *
1209  * If the inserting transaction aborted, then the tuple was never visible
1210  * to any other transaction, so we can delete it immediately.
1211  */
1212  if (!HeapTupleHeaderXminCommitted(tuple))
1213  {
1214  if (HeapTupleHeaderXminInvalid(tuple))
1215  return HEAPTUPLE_DEAD;
1216  /* Used by pre-9.0 binary upgrades */
1217  else if (tuple->t_infomask & HEAP_MOVED_OFF)
1218  {
1219  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1220 
1223  if (TransactionIdIsInProgress(xvac))
1225  if (TransactionIdDidCommit(xvac))
1226  {
1227  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1229  return HEAPTUPLE_DEAD;
1230  }
1231  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1233  }
1234  /* Used by pre-9.0 binary upgrades */
1235  else if (tuple->t_infomask & HEAP_MOVED_IN)
1236  {
1237  TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
1238 
1241  if (TransactionIdIsInProgress(xvac))
1243  if (TransactionIdDidCommit(xvac))
1244  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1246  else
1247  {
1248  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1250  return HEAPTUPLE_DEAD;
1251  }
1252  }
1254  {
1255  if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
1257  /* only locked? run infomask-only check first, for performance */
1258  if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
1261  /* inserted and then deleted by same xact */
1264  /* deleting subtransaction must have aborted */
1266  }
1268  {
1269  /*
1270  * It'd be possible to discern between INSERT/DELETE in progress
1271  * here by looking at xmax - but that doesn't seem beneficial for
1272  * the majority of callers and even detrimental for some. We'd
1273  * rather have callers look at/wait for xmin than xmax. It's
1274  * always correct to return INSERT_IN_PROGRESS because that's
1275  * what's happening from the view of other backends.
1276  */
1278  }
1280  SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
1281  HeapTupleHeaderGetRawXmin(tuple));
1282  else
1283  {
1284  /*
1285  * Not in Progress, Not Committed, so either Aborted or crashed
1286  */
1287  SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
1289  return HEAPTUPLE_DEAD;
1290  }
1291 
1292  /*
1293  * At this point the xmin is known committed, but we might not have
1294  * been able to set the hint bit yet; so we can no longer Assert that
1295  * it's set.
1296  */
1297  }
1298 
1299  /*
1300  * Okay, the inserter committed, so it was good at some point. Now what
1301  * about the deleting transaction?
1302  */
1303  if (tuple->t_infomask & HEAP_XMAX_INVALID)
1304  return HEAPTUPLE_LIVE;
1305 
1307  {
1308  /*
1309  * "Deleting" xact really only locked it, so the tuple is live in any
1310  * case. However, we should make sure that either XMAX_COMMITTED or
1311  * XMAX_INVALID gets set once the xact is gone, to reduce the costs of
1312  * examining the tuple for future xacts.
1313  */
1314  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1315  {
1316  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1317  {
1318  /*
1319  * If it's a pre-pg_upgrade tuple, the multixact cannot
1320  * possibly be running; otherwise have to check.
1321  */
1322  if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) &&
1324  true))
1325  return HEAPTUPLE_LIVE;
1327  }
1328  else
1329  {
1331  return HEAPTUPLE_LIVE;
1332  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1334  }
1335  }
1336 
1337  /*
1338  * We don't really care whether xmax did commit, abort or crash. We
1339  * know that xmax did lock the tuple, but it did not and will never
1340  * actually update it.
1341  */
1342 
1343  return HEAPTUPLE_LIVE;
1344  }
1345 
1346  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
1347  {
1348  TransactionId xmax = HeapTupleGetUpdateXid(tuple);
1349 
1350  /* already checked above */
1352 
1353  /* not LOCKED_ONLY, so it has to have an xmax */
1355 
1356  if (TransactionIdIsInProgress(xmax))
1358  else if (TransactionIdDidCommit(xmax))
1359  {
1360  /*
1361  * The multixact might still be running due to lockers. Need to
1362  * allow for pruning if below the xid horizon regardless --
1363  * otherwise we could end up with a tuple where the updater has to
1364  * be removed due to the horizon, but is not pruned away. It's
1365  * not a problem to prune that tuple, because any remaining
1366  * lockers will also be present in newer tuple versions.
1367  */
1368  *dead_after = xmax;
1369  return HEAPTUPLE_RECENTLY_DEAD;
1370  }
1371  else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
1372  {
1373  /*
1374  * Not in Progress, Not Committed, so either Aborted or crashed.
1375  * Mark the Xmax as invalid.
1376  */
1378  }
1379 
1380  return HEAPTUPLE_LIVE;
1381  }
1382 
1383  if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
1384  {
1388  SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
1389  HeapTupleHeaderGetRawXmax(tuple));
1390  else
1391  {
1392  /*
1393  * Not in Progress, Not Committed, so either Aborted or crashed
1394  */
1395  SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
1397  return HEAPTUPLE_LIVE;
1398  }
1399 
1400  /*
1401  * At this point the xmax is known committed, but we might not have
1402  * been able to set the hint bit yet; so we can no longer Assert that
1403  * it's set.
1404  */
1405  }
1406 
1407  /*
1408  * Deleter committed, allow caller to check if it was recent enough that
1409  * some open transactions could still see the tuple.
1410  */
1411  *dead_after = HeapTupleHeaderGetRawXmax(tuple);
1412  return HEAPTUPLE_RECENTLY_DEAD;
1413 }

References Assert, HEAP_LOCKED_UPGRADED, HEAP_MOVED_IN, HEAP_MOVED_OFF, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_COMMITTED, HEAP_XMIN_INVALID, HEAPTUPLE_DEAD, HEAPTUPLE_DELETE_IN_PROGRESS, HEAPTUPLE_INSERT_IN_PROGRESS, HEAPTUPLE_LIVE, HEAPTUPLE_RECENTLY_DEAD, HeapTupleGetUpdateXid(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXvac, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderXminCommitted, HeapTupleHeaderXminInvalid, InvalidOid, InvalidTransactionId, ItemPointerIsValid(), MultiXactIdIsRunning(), SetHintBits(), HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdDidCommit(), TransactionIdIsCurrentTransactionId(), TransactionIdIsInProgress(), and TransactionIdIsValid.

Referenced by heap_prune_satisfies_vacuum(), HeapTupleSatisfiesNonVacuumable(), and HeapTupleSatisfiesVacuum().

◆ HeapTupleSatisfiesVisibility()

bool HeapTupleSatisfiesVisibility ( HeapTuple  htup,
Snapshot  snapshot,
Buffer  buffer 
)

Definition at line 1767 of file heapam_visibility.c.

1768 {
1769  switch (snapshot->snapshot_type)
1770  {
1771  case SNAPSHOT_MVCC:
1772  return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
1773  case SNAPSHOT_SELF:
1774  return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
1775  case SNAPSHOT_ANY:
1776  return HeapTupleSatisfiesAny(htup, snapshot, buffer);
1777  case SNAPSHOT_TOAST:
1778  return HeapTupleSatisfiesToast(htup, snapshot, buffer);
1779  case SNAPSHOT_DIRTY:
1780  return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
1782  return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, buffer);
1784  return HeapTupleSatisfiesNonVacuumable(htup, snapshot, buffer);
1785  }
1786 
1787  return false; /* keep compiler quiet */
1788 }
static bool HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesNonVacuumable(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
static bool HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer)
@ SNAPSHOT_TOAST
Definition: snapshot.h:74
@ SNAPSHOT_SELF
Definition: snapshot.h:64
@ SNAPSHOT_NON_VACUUMABLE
Definition: snapshot.h:118
@ SNAPSHOT_MVCC
Definition: snapshot.h:50
@ SNAPSHOT_ANY
Definition: snapshot.h:69
@ SNAPSHOT_HISTORIC_MVCC
Definition: snapshot.h:109
@ SNAPSHOT_DIRTY
Definition: snapshot.h:102
SnapshotType snapshot_type
Definition: snapshot.h:144

References HeapTupleSatisfiesAny(), HeapTupleSatisfiesDirty(), HeapTupleSatisfiesHistoricMVCC(), HeapTupleSatisfiesMVCC(), HeapTupleSatisfiesNonVacuumable(), HeapTupleSatisfiesSelf(), HeapTupleSatisfiesToast(), SNAPSHOT_ANY, SNAPSHOT_DIRTY, SNAPSHOT_HISTORIC_MVCC, SNAPSHOT_MVCC, SNAPSHOT_NON_VACUUMABLE, SNAPSHOT_SELF, SNAPSHOT_TOAST, and SnapshotData::snapshot_type.

Referenced by heap_delete(), heap_fetch(), heap_get_latest_tid(), heap_hot_search_buffer(), heap_update(), heapam_scan_bitmap_next_block(), heapam_tuple_satisfies_snapshot(), heapgettup(), page_collect_tuples(), pgstat_heap(), SampleHeapTupleVisible(), and ScanSourceDatabasePgClassPage().

◆ HeapTupleSetHintBits()

void HeapTupleSetHintBits ( HeapTupleHeader  tuple,
Buffer  buffer,
uint16  infomask,
TransactionId  xid 
)

Definition at line 141 of file heapam_visibility.c.

143 {
144  SetHintBits(tuple, buffer, infomask, xid);
145 }

References SetHintBits().

Referenced by UpdateXmaxHintBits().

◆ log_heap_prune_and_freeze()

void log_heap_prune_and_freeze ( Relation  relation,
Buffer  buffer,
TransactionId  conflict_xid,
bool  cleanup_lock,
PruneReason  reason,
HeapTupleFreeze frozen,
int  nfrozen,
OffsetNumber redirected,
int  nredirected,
OffsetNumber dead,
int  ndead,
OffsetNumber unused,
int  nunused 
)

Definition at line 2053 of file pruneheap.c.

2061 {
2062  xl_heap_prune xlrec;
2063  XLogRecPtr recptr;
2064  uint8 info;
2065 
2066  /* The following local variables hold data registered in the WAL record: */
2068  xlhp_freeze_plans freeze_plans;
2069  xlhp_prune_items redirect_items;
2070  xlhp_prune_items dead_items;
2071  xlhp_prune_items unused_items;
2072  OffsetNumber frz_offsets[MaxHeapTuplesPerPage];
2073 
2074  xlrec.flags = 0;
2075 
2076  /*
2077  * Prepare data for the buffer. The arrays are not actually in the
2078  * buffer, but we pretend that they are. When XLogInsert stores a full
2079  * page image, the arrays can be omitted.
2080  */
2081  XLogBeginInsert();
2082  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2083  if (nfrozen > 0)
2084  {
2085  int nplans;
2086 
2087  xlrec.flags |= XLHP_HAS_FREEZE_PLANS;
2088 
2089  /*
2090  * Prepare deduplicated representation for use in the WAL record. This
2091  * destructively sorts frozen tuples array in-place.
2092  */
2093  nplans = heap_log_freeze_plan(frozen, nfrozen, plans, frz_offsets);
2094 
2095  freeze_plans.nplans = nplans;
2096  XLogRegisterBufData(0, (char *) &freeze_plans,
2097  offsetof(xlhp_freeze_plans, plans));
2098  XLogRegisterBufData(0, (char *) plans,
2099  sizeof(xlhp_freeze_plan) * nplans);
2100  }
2101  if (nredirected > 0)
2102  {
2103  xlrec.flags |= XLHP_HAS_REDIRECTIONS;
2104 
2105  redirect_items.ntargets = nredirected;
2106  XLogRegisterBufData(0, (char *) &redirect_items,
2107  offsetof(xlhp_prune_items, data));
2108  XLogRegisterBufData(0, (char *) redirected,
2109  sizeof(OffsetNumber[2]) * nredirected);
2110  }
2111  if (ndead > 0)
2112  {
2113  xlrec.flags |= XLHP_HAS_DEAD_ITEMS;
2114 
2115  dead_items.ntargets = ndead;
2116  XLogRegisterBufData(0, (char *) &dead_items,
2117  offsetof(xlhp_prune_items, data));
2118  XLogRegisterBufData(0, (char *) dead,
2119  sizeof(OffsetNumber) * ndead);
2120  }
2121  if (nunused > 0)
2122  {
2124 
2125  unused_items.ntargets = nunused;
2126  XLogRegisterBufData(0, (char *) &unused_items,
2127  offsetof(xlhp_prune_items, data));
2128  XLogRegisterBufData(0, (char *) unused,
2129  sizeof(OffsetNumber) * nunused);
2130  }
2131  if (nfrozen > 0)
2132  XLogRegisterBufData(0, (char *) frz_offsets,
2133  sizeof(OffsetNumber) * nfrozen);
2134 
2135  /*
2136  * Prepare the main xl_heap_prune record. We already set the XLPH_HAS_*
2137  * flag above.
2138  */
2140  xlrec.flags |= XLHP_IS_CATALOG_REL;
2141  if (TransactionIdIsValid(conflict_xid))
2143  if (cleanup_lock)
2144  xlrec.flags |= XLHP_CLEANUP_LOCK;
2145  else
2146  {
2147  Assert(nredirected == 0 && ndead == 0);
2148  /* also, any items in 'unused' must've been LP_DEAD previously */
2149  }
2150  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
2151  if (TransactionIdIsValid(conflict_xid))
2152  XLogRegisterData((char *) &conflict_xid, sizeof(TransactionId));
2153 
2154  switch (reason)
2155  {
2156  case PRUNE_ON_ACCESS:
2158  break;
2159  case PRUNE_VACUUM_SCAN:
2161  break;
2162  case PRUNE_VACUUM_CLEANUP:
2164  break;
2165  default:
2166  elog(ERROR, "unrecognized prune reason: %d", (int) reason);
2167  break;
2168  }
2169  recptr = XLogInsert(RM_HEAP2_ID, info);
2170 
2171  PageSetLSN(BufferGetPage(buffer), recptr);
2172 }
#define XLHP_HAS_CONFLICT_HORIZON
Definition: heapam_xlog.h:316
#define XLHP_HAS_FREEZE_PLANS
Definition: heapam_xlog.h:322
#define SizeOfHeapPrune
Definition: heapam_xlog.h:295
#define XLHP_HAS_NOW_UNUSED_ITEMS
Definition: heapam_xlog.h:331
#define XLHP_HAS_REDIRECTIONS
Definition: heapam_xlog.h:329
#define XLOG_HEAP2_PRUNE_VACUUM_SCAN
Definition: heapam_xlog.h:60
#define XLOG_HEAP2_PRUNE_ON_ACCESS
Definition: heapam_xlog.h:59
#define XLHP_CLEANUP_LOCK
Definition: heapam_xlog.h:308
#define XLHP_HAS_DEAD_ITEMS
Definition: heapam_xlog.h:330
#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP
Definition: heapam_xlog.h:61
#define XLHP_IS_CATALOG_REL
Definition: heapam_xlog.h:298
const void * data
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xlhp_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: pruneheap.c:1978

References Assert, BufferGetPage(), data, elog, ERROR, xl_heap_prune::flags, heap_log_freeze_plan(), MaxHeapTuplesPerPage, xlhp_freeze_plans::nplans, xlhp_prune_items::ntargets, PageSetLSN(), PRUNE_ON_ACCESS, PRUNE_VACUUM_CLEANUP, PRUNE_VACUUM_SCAN, REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, SizeOfHeapPrune, TransactionIdIsValid, XLHP_CLEANUP_LOCK, XLHP_HAS_CONFLICT_HORIZON, XLHP_HAS_DEAD_ITEMS, XLHP_HAS_FREEZE_PLANS, XLHP_HAS_NOW_UNUSED_ITEMS, XLHP_HAS_REDIRECTIONS, XLHP_IS_CATALOG_REL, XLOG_HEAP2_PRUNE_ON_ACCESS, XLOG_HEAP2_PRUNE_VACUUM_CLEANUP, XLOG_HEAP2_PRUNE_VACUUM_SCAN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_and_freeze(), and lazy_vacuum_heap_page().

◆ ReleaseBulkInsertStatePin()

void ReleaseBulkInsertStatePin ( BulkInsertState  bistate)

Definition at line 1956 of file heapam.c.

1957 {
1958  if (bistate->current_buf != InvalidBuffer)
1959  ReleaseBuffer(bistate->current_buf);
1960  bistate->current_buf = InvalidBuffer;
1961 
1962  /*
1963  * Despite the name, we also reset bulk relation extension state.
1964  * Otherwise we can end up erroring out due to looking for free space in
1965  * ->next_free of one partition, even though ->next_free was set when
1966  * extending another partition. It could obviously also be bad for
1967  * efficiency to look at existing blocks at offsets from another
1968  * partition, even if we don't error out.
1969  */
1970  bistate->next_free = InvalidBlockNumber;
1971  bistate->last_free = InvalidBlockNumber;
1972 }

References BulkInsertStateData::current_buf, InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, and ReleaseBuffer().

Referenced by CopyFrom().

◆ ResolveCminCmaxDuringDecoding()

bool ResolveCminCmaxDuringDecoding ( struct HTAB tuplecid_data,
Snapshot  snapshot,
HeapTuple  htup,
Buffer  buffer,
CommandId cmin,
CommandId cmax 
)

Definition at line 5245 of file reorderbuffer.c.

5249 {
5252  ForkNumber forkno;
5253  BlockNumber blockno;
5254  bool updated_mapping = false;
5255 
5256  /*
5257  * Return unresolved if tuplecid_data is not valid. That's because when
5258  * streaming in-progress transactions we may run into tuples with the CID
5259  * before actually decoding them. Think e.g. about INSERT followed by
5260  * TRUNCATE, where the TRUNCATE may not be decoded yet when applying the
5261  * INSERT. So in such cases, we assume the CID is from the future
5262  * command.
5263  */
5264  if (tuplecid_data == NULL)
5265  return false;
5266 
5267  /* be careful about padding */
5268  memset(&key, 0, sizeof(key));
5269 
5270  Assert(!BufferIsLocal(buffer));
5271 
5272  /*
5273  * get relfilelocator from the buffer, no convenient way to access it
5274  * other than that.
5275  */
5276  BufferGetTag(buffer, &key.rlocator, &forkno, &blockno);
5277 
5278  /* tuples can only be in the main fork */
5279  Assert(forkno == MAIN_FORKNUM);
5280  Assert(blockno == ItemPointerGetBlockNumber(&htup->t_self));
5281 
5282  ItemPointerCopy(&htup->t_self,
5283  &key.tid);
5284 
5285 restart:
5286  ent = (ReorderBufferTupleCidEnt *)
5288 
5289  /*
5290  * failed to find a mapping, check whether the table was rewritten and
5291  * apply mapping if so, but only do that once - there can be no new
5292  * mappings while we are in here since we have to hold a lock on the
5293  * relation.
5294  */
5295  if (ent == NULL && !updated_mapping)
5296  {
5298  /* now check but don't update for a mapping again */
5299  updated_mapping = true;
5300  goto restart;
5301  }
5302  else if (ent == NULL)
5303  return false;
5304 
5305  if (cmin)
5306  *cmin = ent->cmin;
5307  if (cmax)
5308  *cmax = ent->cmax;
5309  return true;
5310 }
#define BufferIsLocal(buffer)
Definition: buf.h:37
void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: bufmgr.c:3757
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:955
@ HASH_FIND
Definition: hsearch.h:113
ForkNumber
Definition: relpath.h:48
static void UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot)
static HTAB * tuplecid_data
Definition: snapmgr.c:102

References Assert, BufferGetTag(), BufferIsLocal, ReorderBufferTupleCidEnt::cmax, ReorderBufferTupleCidEnt::cmin, HASH_FIND, hash_search(), ItemPointerCopy(), ItemPointerGetBlockNumber(), sort-test::key, MAIN_FORKNUM, HeapTupleData::t_self, HeapTupleData::t_tableOid, tuplecid_data, and UpdateLogicalMappings().

Referenced by HeapTupleSatisfiesHistoricMVCC().

◆ simple_heap_delete()

void simple_heap_delete ( Relation  relation,
ItemPointer  tid 
)

Definition at line 3104 of file heapam.c.

3105 {
3106  TM_Result result;
3107  TM_FailureData tmfd;
3108 
3109  result = heap_delete(relation, tid,
3111  true /* wait for commit */ ,
3112  &tmfd, false /* changingPart */ );
3113  switch (result)
3114  {
3115  case TM_SelfModified:
3116  /* Tuple was already updated in current command? */
3117  elog(ERROR, "tuple already updated by self");
3118  break;
3119 
3120  case TM_Ok:
3121  /* done successfully */
3122  break;
3123 
3124  case TM_Updated:
3125  elog(ERROR, "tuple concurrently updated");
3126  break;
3127 
3128  case TM_Deleted:
3129  elog(ERROR, "tuple concurrently deleted");
3130  break;
3131 
3132  default:
3133  elog(ERROR, "unrecognized heap_delete status: %u", result);
3134  break;
3135  }
3136 }
TM_Result heap_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, bool changingPart)
Definition: heapam.c:2683
CommandId GetCurrentCommandId(bool used)
Definition: xact.c:827

References elog, ERROR, GetCurrentCommandId(), heap_delete(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleDelete(), and toast_delete_datum().

◆ simple_heap_insert()

void simple_heap_insert ( Relation  relation,
HeapTuple  tup 
)

Definition at line 2625 of file heapam.c.

2626 {
2627  heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
2628 }
void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
Definition: heapam.c:1994

References GetCurrentCommandId(), and heap_insert().

Referenced by CatalogTupleInsert(), CatalogTupleInsertWithInfo(), and InsertOneTuple().

◆ simple_heap_update()

void simple_heap_update ( Relation  relation,
ItemPointer  otid,
HeapTuple  tup,
TU_UpdateIndexes update_indexes 
)

Definition at line 4221 of file heapam.c.

4223 {
4224  TM_Result result;
4225  TM_FailureData tmfd;
4226  LockTupleMode lockmode;
4227 
4228  result = heap_update(relation, otid, tup,
4230  true /* wait for commit */ ,
4231  &tmfd, &lockmode, update_indexes);
4232  switch (result)
4233  {
4234  case TM_SelfModified:
4235  /* Tuple was already updated in current command? */
4236  elog(ERROR, "tuple already updated by self");
4237  break;
4238 
4239  case TM_Ok:
4240  /* done successfully */
4241  break;
4242 
4243  case TM_Updated:
4244  elog(ERROR, "tuple concurrently updated");
4245  break;
4246 
4247  case TM_Deleted:
4248  elog(ERROR, "tuple concurrently deleted");
4249  break;
4250 
4251  default:
4252  elog(ERROR, "unrecognized heap_update status: %u", result);
4253  break;
4254  }
4255 }
TM_Result heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
Definition: heapam.c:3150
LockTupleMode
Definition: lockoptions.h:50

References elog, ERROR, GetCurrentCommandId(), heap_update(), InvalidSnapshot, TM_Deleted, TM_Ok, TM_SelfModified, and TM_Updated.

Referenced by CatalogTupleUpdate(), and CatalogTupleUpdateWithInfo().