PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 137 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 136 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 38 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 42 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 41 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 287 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 48 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 44 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 109 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 123 of file heapam.h.

124 {
125  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
126  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
127  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
128  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
129  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
130 } HTSV_Result;
HTSV_Result
Definition: heapam.h:124
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:127
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_LIVE
Definition: heapam.h:126
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_DEAD
Definition: heapam.h:125

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 267 of file heapam.h.

268 {
269  PRUNE_ON_ACCESS, /* on-access pruning */
270  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
271  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
272 } PruneReason;
PruneReason
Definition: heapam.h:268
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:271
@ PRUNE_ON_ACCESS
Definition: heapam.h:269
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:270

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1943 of file heapam.c.

1944 {
1945  if (bistate->current_buf != InvalidBuffer)
1946  ReleaseBuffer(bistate->current_buf);
1947  FreeAccessStrategy(bistate->strategy);
1948  pfree(bistate);
1949 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4896
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1520
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), deleteSplitPartitionContext(), intorel_shutdown(), moveMergedTablesRows(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1926 of file heapam.c.

1927 {
1928  BulkInsertState bistate;
1929 
1930  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1932  bistate->current_buf = InvalidBuffer;
1933  bistate->next_free = InvalidBlockNumber;
1934  bistate->last_free = InvalidBlockNumber;
1935  bistate->already_extended_by = 0;
1936  return bistate;
1937 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:44
void * palloc(Size size)
Definition: mcxt.c:1316
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), createSplitPartitionContext(), intorel_startup(), moveMergedTablesRows(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5905 of file heapam.c.

5906 {
5908  ItemId lp;
5909  HeapTupleData tp;
5910  Page page;
5911  BlockNumber block;
5912  Buffer buffer;
5913 
5914  Assert(ItemPointerIsValid(tid));
5915 
5916  block = ItemPointerGetBlockNumber(tid);
5917  buffer = ReadBuffer(relation, block);
5918  page = BufferGetPage(buffer);
5919 
5921 
5922  /*
5923  * Page can't be all visible, we just inserted into it, and are still
5924  * running.
5925  */
5926  Assert(!PageIsAllVisible(page));
5927 
5928  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5929  Assert(ItemIdIsNormal(lp));
5930 
5931  tp.t_tableOid = RelationGetRelid(relation);
5932  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5933  tp.t_len = ItemIdGetLength(lp);
5934  tp.t_self = *tid;
5935 
5936  /*
5937  * Sanity check that the tuple really is a speculatively inserted tuple,
5938  * inserted by us.
5939  */
5940  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5941  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5942  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5943  elog(ERROR, "attempted to kill a non-speculative tuple");
5945 
5946  /*
5947  * No need to check for serializable conflicts here. There is never a
5948  * need for a combo CID, either. No need to extract replica identity, or
5949  * do anything special with infomask bits.
5950  */
5951 
5953 
5954  /*
5955  * The tuple will become DEAD immediately. Flag that this page is a
5956  * candidate for pruning by setting xmin to TransactionXmin. While not
5957  * immediately prunable, it is the oldest xid we can cheaply determine
5958  * that's safe against wraparound / being older than the table's
5959  * relfrozenxid. To defend against the unlikely case of a new relation
5960  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5961  * if so (vacuum can't subsequently move relfrozenxid to beyond
5962  * TransactionXmin, so there's no race here).
5963  */
5965  {
5966  TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
5967  TransactionId prune_xid;
5968 
5969  if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
5970  prune_xid = relfrozenxid;
5971  else
5972  prune_xid = TransactionXmin;
5973  PageSetPrunable(page, prune_xid);
5974  }
5975 
5976  /* store transaction information of xact deleting the tuple */
5979 
5980  /*
5981  * Set the tuple header xmin to InvalidTransactionId. This makes the
5982  * tuple immediately invisible everyone. (In particular, to any
5983  * transactions waiting on the speculative token, woken up later.)
5984  */
5986 
5987  /* Clear the speculative insertion token too */
5988  tp.t_data->t_ctid = tp.t_self;
5989 
5990  MarkBufferDirty(buffer);
5991 
5992  /*
5993  * XLOG stuff
5994  *
5995  * The WAL records generated here match heap_delete(). The same recovery
5996  * routines are used.
5997  */
5998  if (RelationNeedsWAL(relation))
5999  {
6000  xl_heap_delete xlrec;
6001  XLogRecPtr recptr;
6002 
6003  xlrec.flags = XLH_DELETE_IS_SUPER;
6005  tp.t_data->t_infomask2);
6007  xlrec.xmax = xid;
6008 
6009  XLogBeginInsert();
6010  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6011  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6012 
6013  /* No replica identity & replication origin logged */
6014 
6015  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6016 
6017  PageSetLSN(page, recptr);
6018  }
6019 
6020  END_CRIT_SECTION();
6021 
6022  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6023 
6024  if (HeapTupleHasExternal(&tp))
6025  {
6026  Assert(!IsToastRelation(relation));
6027  heap_toast_delete(relation, &tp, true);
6028  }
6029 
6030  /*
6031  * Never need to mark tuple for invalidation, since catalogs don't support
6032  * speculative insertion
6033  */
6034 
6035  /* Now we can release the buffer */
6036  ReleaseBuffer(buffer);
6037 
6038  /* count deletion, as we counted the insertion too */
6039  pgstat_count_heap_delete(relation);
6040 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2520
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5131
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:745
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:193
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:404
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:195
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageSetPrunable(page, xid)
Definition: bufpage.h:444
#define Assert(condition)
Definition: c.h:858
uint32 TransactionId
Definition: c.h:652
bool IsToastRelation(Relation relation)
Definition: catalog.c:145
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2637
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:120
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:104
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
union HeapTupleHeaderData::@48 t_choice
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:114
OffsetNumber offnum
Definition: heapam_xlog.h:115
uint8 infobits_set
Definition: heapam_xlog.h:116
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:451
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:364
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1037 of file heapam.c.

1041 {
1042  HeapScanDesc scan;
1043 
1044  /*
1045  * increment relation ref count while scanning relation
1046  *
1047  * This is just to make really sure the relcache entry won't go away while
1048  * the scan has a pointer to it. Caller should be holding the rel open
1049  * anyway, so this is redundant in all normal scenarios...
1050  */
1052 
1053  /*
1054  * allocate and initialize scan descriptor
1055  */
1056  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1057 
1058  scan->rs_base.rs_rd = relation;
1059  scan->rs_base.rs_snapshot = snapshot;
1060  scan->rs_base.rs_nkeys = nkeys;
1061  scan->rs_base.rs_flags = flags;
1062  scan->rs_base.rs_parallel = parallel_scan;
1063  scan->rs_strategy = NULL; /* set in initscan */
1064  scan->rs_vmbuffer = InvalidBuffer;
1065  scan->rs_empty_tuples_pending = 0;
1066 
1067  /*
1068  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1069  */
1070  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1072 
1073  /*
1074  * For seqscan and sample scans in a serializable transaction, acquire a
1075  * predicate lock on the entire relation. This is required not only to
1076  * lock all the matching tuples, but also to conflict with new insertions
1077  * into the table. In an indexscan, we take page locks on the index pages
1078  * covering the range specified in the scan qual, but in a heap scan there
1079  * is nothing more fine-grained to lock. A bitmap scan is a different
1080  * story, there we have already scanned the index and locked the index
1081  * pages covering the predicate. But in that case we still have to lock
1082  * any matching heap tuples. For sample scan we could optimize the locking
1083  * to be at least page-level granularity, but we'd need to add per-tuple
1084  * locking for that.
1085  */
1087  {
1088  /*
1089  * Ensure a missing snapshot is noticed reliably, even if the
1090  * isolation mode means predicate locking isn't performed (and
1091  * therefore the snapshot isn't used here).
1092  */
1093  Assert(snapshot);
1094  PredicateLockRelation(relation, snapshot);
1095  }
1096 
1097  /* we only need to set this up once */
1098  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1099 
1100  /*
1101  * Allocate memory to keep track of page allocation for parallel workers
1102  * when doing a parallel scan.
1103  */
1104  if (parallel_scan != NULL)
1106  else
1107  scan->rs_parallelworkerdata = NULL;
1108 
1109  /*
1110  * we do this here instead of in initscan() because heap_rescan also calls
1111  * initscan() and we don't want to allocate memory again
1112  */
1113  if (nkeys > 0)
1114  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1115  else
1116  scan->rs_base.rs_key = NULL;
1117 
1118  initscan(scan, key, false);
1119 
1120  scan->rs_read_stream = NULL;
1121 
1122  /*
1123  * Set up a read stream for sequential scans and TID range scans. This
1124  * should be done after initscan() because initscan() allocates the
1125  * BufferAccessStrategy object passed to the read stream API.
1126  */
1127  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1129  {
1131 
1132  if (scan->rs_base.rs_parallel)
1134  else
1136 
1138  scan->rs_strategy,
1139  scan->rs_base.rs_rd,
1140  MAIN_FORKNUM,
1141  cb,
1142  scan,
1143  0);
1144  }
1145 
1146 
1147  return (TableScanDesc) scan;
1148 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:231
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:269
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:293
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:109
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2561
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:410
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:48
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:35
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2161
@ MAIN_FORKNUM
Definition: relpath.h:50
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
Buffer rs_vmbuffer
Definition: heapam.h:101
BufferAccessStrategy rs_strategy
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:92
HeapTupleData rs_ctup
Definition: heapam.h:72
ReadStream * rs_read_stream
Definition: heapam.h:75
int rs_empty_tuples_pending
Definition: heapam.h:102
TableScanDescData rs_base
Definition: heapam.h:55
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2682 of file heapam.c.

2685 {
2686  TM_Result result;
2688  ItemId lp;
2689  HeapTupleData tp;
2690  Page page;
2691  BlockNumber block;
2692  Buffer buffer;
2693  Buffer vmbuffer = InvalidBuffer;
2694  TransactionId new_xmax;
2695  uint16 new_infomask,
2696  new_infomask2;
2697  bool have_tuple_lock = false;
2698  bool iscombo;
2699  bool all_visible_cleared = false;
2700  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2701  bool old_key_copied = false;
2702 
2703  Assert(ItemPointerIsValid(tid));
2704 
2705  /*
2706  * Forbid this during a parallel operation, lest it allocate a combo CID.
2707  * Other workers might need that combo CID for visibility checks, and we
2708  * have no provision for broadcasting it to them.
2709  */
2710  if (IsInParallelMode())
2711  ereport(ERROR,
2712  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2713  errmsg("cannot delete tuples during a parallel operation")));
2714 
2715  block = ItemPointerGetBlockNumber(tid);
2716  buffer = ReadBuffer(relation, block);
2717  page = BufferGetPage(buffer);
2718 
2719  /*
2720  * Before locking the buffer, pin the visibility map page if it appears to
2721  * be necessary. Since we haven't got the lock yet, someone else might be
2722  * in the middle of changing this, so we'll need to recheck after we have
2723  * the lock.
2724  */
2725  if (PageIsAllVisible(page))
2726  visibilitymap_pin(relation, block, &vmbuffer);
2727 
2729 
2730  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2731  Assert(ItemIdIsNormal(lp));
2732 
2733  tp.t_tableOid = RelationGetRelid(relation);
2734  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2735  tp.t_len = ItemIdGetLength(lp);
2736  tp.t_self = *tid;
2737 
2738 l1:
2739 
2740  /*
2741  * If we didn't pin the visibility map page and the page has become all
2742  * visible while we were busy locking the buffer, we'll have to unlock and
2743  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2744  * unfortunate, but hopefully shouldn't happen often.
2745  */
2746  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2747  {
2748  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2749  visibilitymap_pin(relation, block, &vmbuffer);
2751  }
2752 
2753  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2754 
2755  if (result == TM_Invisible)
2756  {
2757  UnlockReleaseBuffer(buffer);
2758  ereport(ERROR,
2759  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2760  errmsg("attempted to delete invisible tuple")));
2761  }
2762  else if (result == TM_BeingModified && wait)
2763  {
2764  TransactionId xwait;
2765  uint16 infomask;
2766 
2767  /* must copy state data before unlocking buffer */
2768  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2769  infomask = tp.t_data->t_infomask;
2770 
2771  /*
2772  * Sleep until concurrent transaction ends -- except when there's a
2773  * single locker and it's our own transaction. Note we don't care
2774  * which lock mode the locker has, because we need the strongest one.
2775  *
2776  * Before sleeping, we need to acquire tuple lock to establish our
2777  * priority for the tuple (see heap_lock_tuple). LockTuple will
2778  * release us when we are next-in-line for the tuple.
2779  *
2780  * If we are forced to "start over" below, we keep the tuple lock;
2781  * this arranges that we stay at the head of the line while rechecking
2782  * tuple state.
2783  */
2784  if (infomask & HEAP_XMAX_IS_MULTI)
2785  {
2786  bool current_is_member = false;
2787 
2788  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2789  LockTupleExclusive, &current_is_member))
2790  {
2791  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2792 
2793  /*
2794  * Acquire the lock, if necessary (but skip it when we're
2795  * requesting a lock and already have one; avoids deadlock).
2796  */
2797  if (!current_is_member)
2799  LockWaitBlock, &have_tuple_lock);
2800 
2801  /* wait for multixact */
2803  relation, &(tp.t_self), XLTW_Delete,
2804  NULL);
2806 
2807  /*
2808  * If xwait had just locked the tuple then some other xact
2809  * could update this tuple before we get to this point. Check
2810  * for xmax change, and start over if so.
2811  *
2812  * We also must start over if we didn't pin the VM page, and
2813  * the page has become all visible.
2814  */
2815  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2816  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2818  xwait))
2819  goto l1;
2820  }
2821 
2822  /*
2823  * You might think the multixact is necessarily done here, but not
2824  * so: it could have surviving members, namely our own xact or
2825  * other subxacts of this backend. It is legal for us to delete
2826  * the tuple in either case, however (the latter case is
2827  * essentially a situation of upgrading our former shared lock to
2828  * exclusive). We don't bother changing the on-disk hint bits
2829  * since we are about to overwrite the xmax altogether.
2830  */
2831  }
2832  else if (!TransactionIdIsCurrentTransactionId(xwait))
2833  {
2834  /*
2835  * Wait for regular transaction to end; but first, acquire tuple
2836  * lock.
2837  */
2838  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2840  LockWaitBlock, &have_tuple_lock);
2841  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2843 
2844  /*
2845  * xwait is done, but if xwait had just locked the tuple then some
2846  * other xact could update this tuple before we get to this point.
2847  * Check for xmax change, and start over if so.
2848  *
2849  * We also must start over if we didn't pin the VM page, and the
2850  * page has become all visible.
2851  */
2852  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2853  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2855  xwait))
2856  goto l1;
2857 
2858  /* Otherwise check if it committed or aborted */
2859  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2860  }
2861 
2862  /*
2863  * We may overwrite if previous xmax aborted, or if it committed but
2864  * only locked the tuple without updating it.
2865  */
2866  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2869  result = TM_Ok;
2870  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2871  result = TM_Updated;
2872  else
2873  result = TM_Deleted;
2874  }
2875 
2876  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2877  if (result != TM_Ok)
2878  {
2879  Assert(result == TM_SelfModified ||
2880  result == TM_Updated ||
2881  result == TM_Deleted ||
2882  result == TM_BeingModified);
2884  Assert(result != TM_Updated ||
2885  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2886  }
2887 
2888  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2889  {
2890  /* Perform additional check for transaction-snapshot mode RI updates */
2891  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2892  result = TM_Updated;
2893  }
2894 
2895  if (result != TM_Ok)
2896  {
2897  tmfd->ctid = tp.t_data->t_ctid;
2899  if (result == TM_SelfModified)
2900  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2901  else
2902  tmfd->cmax = InvalidCommandId;
2903  UnlockReleaseBuffer(buffer);
2904  if (have_tuple_lock)
2905  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2906  if (vmbuffer != InvalidBuffer)
2907  ReleaseBuffer(vmbuffer);
2908  return result;
2909  }
2910 
2911  /*
2912  * We're about to do the actual delete -- check for conflict first, to
2913  * avoid possibly having to roll back work we've just done.
2914  *
2915  * This is safe without a recheck as long as there is no possibility of
2916  * another process scanning the page between this check and the delete
2917  * being visible to the scan (i.e., an exclusive buffer content lock is
2918  * continuously held from this point until the tuple delete is visible).
2919  */
2920  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2921 
2922  /* replace cid with a combo CID if necessary */
2923  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2924 
2925  /*
2926  * Compute replica identity tuple before entering the critical section so
2927  * we don't PANIC upon a memory allocation failure.
2928  */
2929  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2930 
2931  /*
2932  * If this is the first possibly-multixact-able operation in the current
2933  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2934  * certain that the transaction will never become a member of any older
2935  * MultiXactIds than that. (We have to do this even if we end up just
2936  * using our own TransactionId below, since some other backend could
2937  * incorporate our XID into a MultiXact immediately afterwards.)
2938  */
2940 
2943  xid, LockTupleExclusive, true,
2944  &new_xmax, &new_infomask, &new_infomask2);
2945 
2947 
2948  /*
2949  * If this transaction commits, the tuple will become DEAD sooner or
2950  * later. Set flag that this page is a candidate for pruning once our xid
2951  * falls below the OldestXmin horizon. If the transaction finally aborts,
2952  * the subsequent page pruning will be a no-op and the hint will be
2953  * cleared.
2954  */
2955  PageSetPrunable(page, xid);
2956 
2957  if (PageIsAllVisible(page))
2958  {
2959  all_visible_cleared = true;
2960  PageClearAllVisible(page);
2961  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2962  vmbuffer, VISIBILITYMAP_VALID_BITS);
2963  }
2964 
2965  /* store transaction information of xact deleting the tuple */
2968  tp.t_data->t_infomask |= new_infomask;
2969  tp.t_data->t_infomask2 |= new_infomask2;
2971  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2972  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2973  /* Make sure there is no forward chain link in t_ctid */
2974  tp.t_data->t_ctid = tp.t_self;
2975 
2976  /* Signal that this is actually a move into another partition */
2977  if (changingPart)
2979 
2980  MarkBufferDirty(buffer);
2981 
2982  /*
2983  * XLOG stuff
2984  *
2985  * NB: heap_abort_speculative() uses the same xlog record and replay
2986  * routines.
2987  */
2988  if (RelationNeedsWAL(relation))
2989  {
2990  xl_heap_delete xlrec;
2991  xl_heap_header xlhdr;
2992  XLogRecPtr recptr;
2993 
2994  /*
2995  * For logical decode we need combo CIDs to properly decode the
2996  * catalog
2997  */
2999  log_heap_new_cid(relation, &tp);
3000 
3001  xlrec.flags = 0;
3002  if (all_visible_cleared)
3004  if (changingPart)
3007  tp.t_data->t_infomask2);
3009  xlrec.xmax = new_xmax;
3010 
3011  if (old_key_tuple != NULL)
3012  {
3013  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3015  else
3017  }
3018 
3019  XLogBeginInsert();
3020  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3021 
3022  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3023 
3024  /*
3025  * Log replica identity of the deleted tuple if there is one
3026  */
3027  if (old_key_tuple != NULL)
3028  {
3029  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3030  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3031  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3032 
3033  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3034  XLogRegisterData((char *) old_key_tuple->t_data
3036  old_key_tuple->t_len
3038  }
3039 
3040  /* filtering by origin on a row level is much more efficient */
3042 
3043  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3044 
3045  PageSetLSN(page, recptr);
3046  }
3047 
3048  END_CRIT_SECTION();
3049 
3050  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3051 
3052  if (vmbuffer != InvalidBuffer)
3053  ReleaseBuffer(vmbuffer);
3054 
3055  /*
3056  * If the tuple has toasted out-of-line attributes, we need to delete
3057  * those items too. We have to do this before releasing the buffer
3058  * because we need to look at the contents of the tuple, but it's OK to
3059  * release the content lock on the buffer first.
3060  */
3061  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3062  relation->rd_rel->relkind != RELKIND_MATVIEW)
3063  {
3064  /* toast table entries should never be recursively toasted */
3066  }
3067  else if (HeapTupleHasExternal(&tp))
3068  heap_toast_delete(relation, &tp, false);
3069 
3070  /*
3071  * Mark tuple for invalidation from system caches at next command
3072  * boundary. We have to do this before releasing the buffer because we
3073  * need to look at the contents of the tuple.
3074  */
3075  CacheInvalidateHeapTuple(relation, &tp, NULL);
3076 
3077  /* Now we can release the buffer */
3078  ReleaseBuffer(buffer);
3079 
3080  /*
3081  * Release the lmgr tuple lock, if we had it.
3082  */
3083  if (have_tuple_lock)
3084  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3085 
3086  pgstat_count_heap_delete(relation);
3087 
3088  if (old_key_tuple != NULL && old_key_copied)
3089  heap_freetuple(old_key_tuple);
3090 
3091  return TM_Ok;
3092 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3713
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4913
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define InvalidCommandId
Definition: c.h:669
unsigned short uint16
Definition: c.h:505
TransactionId MultiXactId
Definition: c.h:662
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:857
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7113
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8577
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5056
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8658
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5007
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7290
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2659
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:169
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1904
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:103
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:101
#define SizeOfHeapHeader
Definition: heapam_xlog.h:156
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:105
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:102
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1204
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:667
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:631
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4321
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
uint16 t_infomask
Definition: heapam_xlog.h:152
uint16 t_infomask2
Definition: heapam_xlog.h:151
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:938
bool IsInParallelMode(void)
Definition: xact.c:1086
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:152
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1209 of file heapam.c.

1210 {
1211  HeapScanDesc scan = (HeapScanDesc) sscan;
1212 
1213  /* Note: no locking manipulations needed */
1214 
1215  /*
1216  * unpin scan buffers
1217  */
1218  if (BufferIsValid(scan->rs_cbuf))
1219  ReleaseBuffer(scan->rs_cbuf);
1220 
1221  if (BufferIsValid(scan->rs_vmbuffer))
1222  ReleaseBuffer(scan->rs_vmbuffer);
1223 
1224  /*
1225  * Must free the read stream before freeing the BufferAccessStrategy.
1226  */
1227  if (scan->rs_read_stream)
1229 
1230  /*
1231  * decrement relation reference count and free scan descriptor storage
1232  */
1234 
1235  if (scan->rs_base.rs_key)
1236  pfree(scan->rs_base.rs_key);
1237 
1238  if (scan->rs_strategy != NULL)
1240 
1241  if (scan->rs_parallelworkerdata != NULL)
1243 
1244  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1246 
1247  pfree(scan);
1248 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:355
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:802
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2174
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:67
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1510 of file heapam.c.

1515 {
1516  ItemPointer tid = &(tuple->t_self);
1517  ItemId lp;
1518  Buffer buffer;
1519  Page page;
1520  OffsetNumber offnum;
1521  bool valid;
1522 
1523  /*
1524  * Fetch and pin the appropriate page of the relation.
1525  */
1526  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1527 
1528  /*
1529  * Need share lock on buffer to examine tuple commit status.
1530  */
1531  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1532  page = BufferGetPage(buffer);
1533 
1534  /*
1535  * We'd better check for out-of-range offnum in case of VACUUM since the
1536  * TID was obtained.
1537  */
1538  offnum = ItemPointerGetOffsetNumber(tid);
1539  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1540  {
1541  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1542  ReleaseBuffer(buffer);
1543  *userbuf = InvalidBuffer;
1544  tuple->t_data = NULL;
1545  return false;
1546  }
1547 
1548  /*
1549  * get the item line pointer corresponding to the requested tid
1550  */
1551  lp = PageGetItemId(page, offnum);
1552 
1553  /*
1554  * Must check for deleted tuple.
1555  */
1556  if (!ItemIdIsNormal(lp))
1557  {
1558  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1559  ReleaseBuffer(buffer);
1560  *userbuf = InvalidBuffer;
1561  tuple->t_data = NULL;
1562  return false;
1563  }
1564 
1565  /*
1566  * fill in *tuple fields
1567  */
1568  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1569  tuple->t_len = ItemIdGetLength(lp);
1570  tuple->t_tableOid = RelationGetRelid(relation);
1571 
1572  /*
1573  * check tuple visibility, then release lock
1574  */
1575  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1576 
1577  if (valid)
1578  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1579  HeapTupleHeaderGetXmin(tuple->t_data));
1580 
1581  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1582 
1583  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1584 
1585  if (valid)
1586  {
1587  /*
1588  * All checks passed, so return the tuple as valid. Caller is now
1589  * responsible for releasing the buffer.
1590  */
1591  *userbuf = buffer;
1592 
1593  return true;
1594  }
1595 
1596  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1597  if (keep_buf)
1598  *userbuf = buffer;
1599  else
1600  {
1601  ReleaseBuffer(buffer);
1602  *userbuf = InvalidBuffer;
1603  tuple->t_data = NULL;
1604  }
1605 
1606  return false;
1607 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:194
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10059
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2606

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5818 of file heapam.c.

5819 {
5820  Buffer buffer;
5821  Page page;
5822  OffsetNumber offnum;
5823  ItemId lp = NULL;
5824  HeapTupleHeader htup;
5825 
5826  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5828  page = (Page) BufferGetPage(buffer);
5829 
5830  offnum = ItemPointerGetOffsetNumber(tid);
5831  if (PageGetMaxOffsetNumber(page) >= offnum)
5832  lp = PageGetItemId(page, offnum);
5833 
5834  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5835  elog(ERROR, "invalid lp");
5836 
5837  htup = (HeapTupleHeader) PageGetItem(page, lp);
5838 
5839  /* NO EREPORT(ERROR) from here till changes are logged */
5841 
5843 
5844  MarkBufferDirty(buffer);
5845 
5846  /*
5847  * Replace the speculative insertion token with a real t_ctid, pointing to
5848  * itself like it does on regular tuples.
5849  */
5850  htup->t_ctid = *tid;
5851 
5852  /* XLOG stuff */
5853  if (RelationNeedsWAL(relation))
5854  {
5855  xl_heap_confirm xlrec;
5856  XLogRecPtr recptr;
5857 
5858  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5859 
5860  XLogBeginInsert();
5861 
5862  /* We want the same filtering on this as on a plain insert */
5864 
5865  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5866  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5867 
5868  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5869 
5870  PageSetLSN(page, recptr);
5871  }
5872 
5873  END_CRIT_SECTION();
5874 
5875  UnlockReleaseBuffer(buffer);
5876 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:422
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:419

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6898 of file heapam.c.

6899 {
6900  Page page = BufferGetPage(buffer);
6901 
6902  for (int i = 0; i < ntuples; i++)
6903  {
6904  HeapTupleFreeze *frz = tuples + i;
6905  ItemId itemid = PageGetItemId(page, frz->offset);
6906  HeapTupleHeader htup;
6907 
6908  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6909  heap_execute_freeze_tuple(htup, frz);
6910  }
6911 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6822
int i
Definition: isn.c:73
OffsetNumber offset
Definition: heapam.h:151

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6920 of file heapam.c.

6923 {
6924  HeapTupleFreeze frz;
6925  bool do_freeze;
6926  bool totally_frozen;
6927  struct VacuumCutoffs cutoffs;
6928  HeapPageFreeze pagefrz;
6929 
6930  cutoffs.relfrozenxid = relfrozenxid;
6931  cutoffs.relminmxid = relminmxid;
6932  cutoffs.OldestXmin = FreezeLimit;
6933  cutoffs.OldestMxact = MultiXactCutoff;
6934  cutoffs.FreezeLimit = FreezeLimit;
6935  cutoffs.MultiXactCutoff = MultiXactCutoff;
6936 
6937  pagefrz.freeze_required = true;
6938  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6939  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6940  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6941  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6942 
6943  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6944  &pagefrz, &frz, &totally_frozen);
6945 
6946  /*
6947  * Note that because this is not a WAL-logged operation, we don't need to
6948  * fill in the offset in the freeze record.
6949  */
6950 
6951  if (do_freeze)
6952  heap_execute_freeze_tuple(tuple, &frz);
6953  return do_freeze;
6954 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6548
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1782 of file heapam.c.

1784 {
1785  Relation relation = sscan->rs_rd;
1786  Snapshot snapshot = sscan->rs_snapshot;
1787  ItemPointerData ctid;
1788  TransactionId priorXmax;
1789 
1790  /*
1791  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1792  * Assume that t_ctid links are valid however - there shouldn't be invalid
1793  * ones in the table.
1794  */
1795  Assert(ItemPointerIsValid(tid));
1796 
1797  /*
1798  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1799  * need to examine, and *tid is the TID we will return if ctid turns out
1800  * to be bogus.
1801  *
1802  * Note that we will loop until we reach the end of the t_ctid chain.
1803  * Depending on the snapshot passed, there might be at most one visible
1804  * version of the row, but we don't try to optimize for that.
1805  */
1806  ctid = *tid;
1807  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1808  for (;;)
1809  {
1810  Buffer buffer;
1811  Page page;
1812  OffsetNumber offnum;
1813  ItemId lp;
1814  HeapTupleData tp;
1815  bool valid;
1816 
1817  /*
1818  * Read, pin, and lock the page.
1819  */
1820  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1821  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1822  page = BufferGetPage(buffer);
1823 
1824  /*
1825  * Check for bogus item number. This is not treated as an error
1826  * condition because it can happen while following a t_ctid link. We
1827  * just assume that the prior tid is OK and return it unchanged.
1828  */
1829  offnum = ItemPointerGetOffsetNumber(&ctid);
1830  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1831  {
1832  UnlockReleaseBuffer(buffer);
1833  break;
1834  }
1835  lp = PageGetItemId(page, offnum);
1836  if (!ItemIdIsNormal(lp))
1837  {
1838  UnlockReleaseBuffer(buffer);
1839  break;
1840  }
1841 
1842  /* OK to access the tuple */
1843  tp.t_self = ctid;
1844  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1845  tp.t_len = ItemIdGetLength(lp);
1846  tp.t_tableOid = RelationGetRelid(relation);
1847 
1848  /*
1849  * After following a t_ctid link, we might arrive at an unrelated
1850  * tuple. Check for XMIN match.
1851  */
1852  if (TransactionIdIsValid(priorXmax) &&
1854  {
1855  UnlockReleaseBuffer(buffer);
1856  break;
1857  }
1858 
1859  /*
1860  * Check tuple visibility; if visible, set it as the new result
1861  * candidate.
1862  */
1863  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1864  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1865  if (valid)
1866  *tid = ctid;
1867 
1868  /*
1869  * If there's a valid t_ctid link, follow it, else we're done.
1870  */
1871  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1875  {
1876  UnlockReleaseBuffer(buffer);
1877  break;
1878  }
1879 
1880  ctid = tp.t_data->t_ctid;
1881  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1882  UnlockReleaseBuffer(buffer);
1883  } /* end of loop */
1884 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1764 of file pruneheap.c.

1765 {
1766  OffsetNumber offnum,
1767  maxoff;
1768 
1769  MemSet(root_offsets, InvalidOffsetNumber,
1771 
1772  maxoff = PageGetMaxOffsetNumber(page);
1773  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1774  {
1775  ItemId lp = PageGetItemId(page, offnum);
1776  HeapTupleHeader htup;
1777  OffsetNumber nextoffnum;
1778  TransactionId priorXmax;
1779 
1780  /* skip unused and dead items */
1781  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1782  continue;
1783 
1784  if (ItemIdIsNormal(lp))
1785  {
1786  htup = (HeapTupleHeader) PageGetItem(page, lp);
1787 
1788  /*
1789  * Check if this tuple is part of a HOT-chain rooted at some other
1790  * tuple. If so, skip it for now; we'll process it when we find
1791  * its root.
1792  */
1793  if (HeapTupleHeaderIsHeapOnly(htup))
1794  continue;
1795 
1796  /*
1797  * This is either a plain tuple or the root of a HOT-chain.
1798  * Remember it in the mapping.
1799  */
1800  root_offsets[offnum - 1] = offnum;
1801 
1802  /* If it's not the start of a HOT-chain, we're done with it */
1803  if (!HeapTupleHeaderIsHotUpdated(htup))
1804  continue;
1805 
1806  /* Set up to scan the HOT-chain */
1807  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1808  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1809  }
1810  else
1811  {
1812  /* Must be a redirect item. We do not set its root_offsets entry */
1814  /* Set up to scan the HOT-chain */
1815  nextoffnum = ItemIdGetRedirect(lp);
1816  priorXmax = InvalidTransactionId;
1817  }
1818 
1819  /*
1820  * Now follow the HOT-chain and collect other tuples in the chain.
1821  *
1822  * Note: Even though this is a nested loop, the complexity of the
1823  * function is O(N) because a tuple in the page should be visited not
1824  * more than twice, once in the outer loop and once in HOT-chain
1825  * chases.
1826  */
1827  for (;;)
1828  {
1829  /* Sanity check (pure paranoia) */
1830  if (offnum < FirstOffsetNumber)
1831  break;
1832 
1833  /*
1834  * An offset past the end of page's line pointer array is possible
1835  * when the array was truncated
1836  */
1837  if (offnum > maxoff)
1838  break;
1839 
1840  lp = PageGetItemId(page, nextoffnum);
1841 
1842  /* Check for broken chains */
1843  if (!ItemIdIsNormal(lp))
1844  break;
1845 
1846  htup = (HeapTupleHeader) PageGetItem(page, lp);
1847 
1848  if (TransactionIdIsValid(priorXmax) &&
1849  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1850  break;
1851 
1852  /* Remember the root line pointer for this item */
1853  root_offsets[nextoffnum - 1] = offnum;
1854 
1855  /* Advance to next chain member, if any */
1856  if (!HeapTupleHeaderIsHotUpdated(htup))
1857  break;
1858 
1859  /* HOT implies it can't have moved to different partition */
1861 
1862  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1863  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1864  }
1865  }
1866 }
#define MemSet(start, val, len)
Definition: c.h:1020
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1251 of file heapam.c.

1252 {
1253  HeapScanDesc scan = (HeapScanDesc) sscan;
1254 
1255  /*
1256  * This is still widely used directly, without going through table AM, so
1257  * add a safety check. It's possible we should, at a later point,
1258  * downgrade this to an assert. The reason for checking the AM routine,
1259  * rather than the AM oid, is that this allows to write regression tests
1260  * that create another AM reusing the heap handler.
1261  */
1263  ereport(ERROR,
1264  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1265  errmsg_internal("only heap AM is supported")));
1266 
1267  /*
1268  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1269  * for catalog or regular tables. See detailed comments in xact.c where
1270  * these variables are declared. Normally we have such a check at tableam
1271  * level API but this is called from many places so we need to ensure it
1272  * here.
1273  */
1275  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1276 
1277  /* Note: no locking manipulations needed */
1278 
1279  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1280  heapgettup_pagemode(scan, direction,
1281  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1282  else
1283  heapgettup(scan, direction,
1284  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1285 
1286  if (scan->rs_ctup.t_data == NULL)
1287  return NULL;
1288 
1289  /*
1290  * if we get here it means we have a new current scan tuple, so point to
1291  * the proper return buffer and return the tuple.
1292  */
1293 
1295 
1296  return &scan->rs_ctup;
1297 }
#define unlikely(x)
Definition: c.h:311
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:837
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:947
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:98
TransactionId CheckXidAlive
Definition: xact.c:97

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1300 of file heapam.c.

1301 {
1302  HeapScanDesc scan = (HeapScanDesc) sscan;
1303 
1304  /* Note: no locking manipulations needed */
1305 
1306  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1307  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1308  else
1309  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1310 
1311  if (scan->rs_ctup.t_data == NULL)
1312  {
1313  ExecClearTuple(slot);
1314  return false;
1315  }
1316 
1317  /*
1318  * if we get here it means we have a new current scan tuple, so point to
1319  * the proper return buffer and return the tuple.
1320  */
1321 
1323 
1324  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1325  scan->rs_cbuf);
1326  return true;
1327 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1403 of file heapam.c.

1405 {
1406  HeapScanDesc scan = (HeapScanDesc) sscan;
1407  ItemPointer mintid = &sscan->rs_mintid;
1408  ItemPointer maxtid = &sscan->rs_maxtid;
1409 
1410  /* Note: no locking manipulations needed */
1411  for (;;)
1412  {
1413  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1414  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1415  else
1416  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1417 
1418  if (scan->rs_ctup.t_data == NULL)
1419  {
1420  ExecClearTuple(slot);
1421  return false;
1422  }
1423 
1424  /*
1425  * heap_set_tidrange will have used heap_setscanlimits to limit the
1426  * range of pages we scan to only ones that can contain the TID range
1427  * we're scanning for. Here we must filter out any tuples from these
1428  * pages that are outside of that range.
1429  */
1430  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1431  {
1432  ExecClearTuple(slot);
1433 
1434  /*
1435  * When scanning backwards, the TIDs will be in descending order.
1436  * Future tuples in this direction will be lower still, so we can
1437  * just return false to indicate there will be no more tuples.
1438  */
1439  if (ScanDirectionIsBackward(direction))
1440  return false;
1441 
1442  continue;
1443  }
1444 
1445  /*
1446  * Likewise for the final page, we must filter out TIDs greater than
1447  * maxtid.
1448  */
1449  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1450  {
1451  ExecClearTuple(slot);
1452 
1453  /*
1454  * When scanning forward, the TIDs will be in ascending order.
1455  * Future tuples in this direction will be higher still, so we can
1456  * just return false to indicate there will be no more tuples.
1457  */
1458  if (ScanDirectionIsForward(direction))
1459  return false;
1460  continue;
1461  }
1462 
1463  break;
1464  }
1465 
1466  /*
1467  * if we get here it means we have a new current scan tuple, so point to
1468  * the proper return buffer and return the tuple.
1469  */
1471 
1472  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1473  return true;
1474 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1630 of file heapam.c.

1633 {
1634  Page page = BufferGetPage(buffer);
1635  TransactionId prev_xmax = InvalidTransactionId;
1636  BlockNumber blkno;
1637  OffsetNumber offnum;
1638  bool at_chain_start;
1639  bool valid;
1640  bool skip;
1641  GlobalVisState *vistest = NULL;
1642 
1643  /* If this is not the first call, previous call returned a (live!) tuple */
1644  if (all_dead)
1645  *all_dead = first_call;
1646 
1647  blkno = ItemPointerGetBlockNumber(tid);
1648  offnum = ItemPointerGetOffsetNumber(tid);
1649  at_chain_start = first_call;
1650  skip = !first_call;
1651 
1652  /* XXX: we should assert that a snapshot is pushed or registered */
1654  Assert(BufferGetBlockNumber(buffer) == blkno);
1655 
1656  /* Scan through possible multiple members of HOT-chain */
1657  for (;;)
1658  {
1659  ItemId lp;
1660 
1661  /* check for bogus TID */
1662  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1663  break;
1664 
1665  lp = PageGetItemId(page, offnum);
1666 
1667  /* check for unused, dead, or redirected items */
1668  if (!ItemIdIsNormal(lp))
1669  {
1670  /* We should only see a redirect at start of chain */
1671  if (ItemIdIsRedirected(lp) && at_chain_start)
1672  {
1673  /* Follow the redirect */
1674  offnum = ItemIdGetRedirect(lp);
1675  at_chain_start = false;
1676  continue;
1677  }
1678  /* else must be end of chain */
1679  break;
1680  }
1681 
1682  /*
1683  * Update heapTuple to point to the element of the HOT chain we're
1684  * currently investigating. Having t_self set correctly is important
1685  * because the SSI checks and the *Satisfies routine for historical
1686  * MVCC snapshots need the correct tid to decide about the visibility.
1687  */
1688  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1689  heapTuple->t_len = ItemIdGetLength(lp);
1690  heapTuple->t_tableOid = RelationGetRelid(relation);
1691  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1692 
1693  /*
1694  * Shouldn't see a HEAP_ONLY tuple at chain start.
1695  */
1696  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1697  break;
1698 
1699  /*
1700  * The xmin should match the previous xmax value, else chain is
1701  * broken.
1702  */
1703  if (TransactionIdIsValid(prev_xmax) &&
1704  !TransactionIdEquals(prev_xmax,
1705  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1706  break;
1707 
1708  /*
1709  * When first_call is true (and thus, skip is initially false) we'll
1710  * return the first tuple we find. But on later passes, heapTuple
1711  * will initially be pointing to the tuple we returned last time.
1712  * Returning it again would be incorrect (and would loop forever), so
1713  * we skip it and return the next match we find.
1714  */
1715  if (!skip)
1716  {
1717  /* If it's visible per the snapshot, we must return it */
1718  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1719  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1720  buffer, snapshot);
1721 
1722  if (valid)
1723  {
1724  ItemPointerSetOffsetNumber(tid, offnum);
1725  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1726  HeapTupleHeaderGetXmin(heapTuple->t_data));
1727  if (all_dead)
1728  *all_dead = false;
1729  return true;
1730  }
1731  }
1732  skip = false;
1733 
1734  /*
1735  * If we can't see it, maybe no one else can either. At caller
1736  * request, check whether all chain members are dead to all
1737  * transactions.
1738  *
1739  * Note: if you change the criterion here for what is "dead", fix the
1740  * planner's get_actual_variable_range() function to match.
1741  */
1742  if (all_dead && *all_dead)
1743  {
1744  if (!vistest)
1745  vistest = GlobalVisTestFor(relation);
1746 
1747  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1748  *all_dead = false;
1749  }
1750 
1751  /*
1752  * Check to see if HOT chain continues past this tuple; if so fetch
1753  * the next offnum and loop around.
1754  */
1755  if (HeapTupleIsHotUpdated(heapTuple))
1756  {
1758  blkno);
1759  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1760  at_chain_start = false;
1761  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1762  }
1763  else
1764  break; /* end of chain */
1765  }
1766 
1767  return false;
1768 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4094
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7634 of file heapam.c.

7635 {
7636  /* Initial assumption is that earlier pruning took care of conflict */
7637  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7640  Page page = NULL;
7642  TransactionId priorXmax;
7643 #ifdef USE_PREFETCH
7644  IndexDeletePrefetchState prefetch_state;
7645  int prefetch_distance;
7646 #endif
7647  SnapshotData SnapshotNonVacuumable;
7648  int finalndeltids = 0,
7649  nblocksaccessed = 0;
7650 
7651  /* State that's only used in bottom-up index deletion case */
7652  int nblocksfavorable = 0;
7653  int curtargetfreespace = delstate->bottomupfreespace,
7654  lastfreespace = 0,
7655  actualfreespace = 0;
7656  bool bottomup_final_block = false;
7657 
7658  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7659 
7660  /* Sort caller's deltids array by TID for further processing */
7661  index_delete_sort(delstate);
7662 
7663  /*
7664  * Bottom-up case: resort deltids array in an order attuned to where the
7665  * greatest number of promising TIDs are to be found, and determine how
7666  * many blocks from the start of sorted array should be considered
7667  * favorable. This will also shrink the deltids array in order to
7668  * eliminate completely unfavorable blocks up front.
7669  */
7670  if (delstate->bottomup)
7671  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7672 
7673 #ifdef USE_PREFETCH
7674  /* Initialize prefetch state. */
7675  prefetch_state.cur_hblkno = InvalidBlockNumber;
7676  prefetch_state.next_item = 0;
7677  prefetch_state.ndeltids = delstate->ndeltids;
7678  prefetch_state.deltids = delstate->deltids;
7679 
7680  /*
7681  * Determine the prefetch distance that we will attempt to maintain.
7682  *
7683  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7684  * sure that isn't a catalog relation before we call code that does
7685  * syscache lookups, to avoid risk of deadlock.
7686  */
7687  if (IsCatalogRelation(rel))
7688  prefetch_distance = maintenance_io_concurrency;
7689  else
7690  prefetch_distance =
7692 
7693  /* Cap initial prefetch distance for bottom-up deletion caller */
7694  if (delstate->bottomup)
7695  {
7696  Assert(nblocksfavorable >= 1);
7697  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7698  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7699  }
7700 
7701  /* Start prefetching. */
7702  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7703 #endif
7704 
7705  /* Iterate over deltids, determine which to delete, check their horizon */
7706  Assert(delstate->ndeltids > 0);
7707  for (int i = 0; i < delstate->ndeltids; i++)
7708  {
7709  TM_IndexDelete *ideltid = &delstate->deltids[i];
7710  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7711  ItemPointer htid = &ideltid->tid;
7712  OffsetNumber offnum;
7713 
7714  /*
7715  * Read buffer, and perform required extra steps each time a new block
7716  * is encountered. Avoid refetching if it's the same block as the one
7717  * from the last htid.
7718  */
7719  if (blkno == InvalidBlockNumber ||
7720  ItemPointerGetBlockNumber(htid) != blkno)
7721  {
7722  /*
7723  * Consider giving up early for bottom-up index deletion caller
7724  * first. (Only prefetch next-next block afterwards, when it
7725  * becomes clear that we're at least going to access the next
7726  * block in line.)
7727  *
7728  * Sometimes the first block frees so much space for bottom-up
7729  * caller that the deletion process can end without accessing any
7730  * more blocks. It is usually necessary to access 2 or 3 blocks
7731  * per bottom-up deletion operation, though.
7732  */
7733  if (delstate->bottomup)
7734  {
7735  /*
7736  * We often allow caller to delete a few additional items
7737  * whose entries we reached after the point that space target
7738  * from caller was satisfied. The cost of accessing the page
7739  * was already paid at that point, so it made sense to finish
7740  * it off. When that happened, we finalize everything here
7741  * (by finishing off the whole bottom-up deletion operation
7742  * without needlessly paying the cost of accessing any more
7743  * blocks).
7744  */
7745  if (bottomup_final_block)
7746  break;
7747 
7748  /*
7749  * Give up when we didn't enable our caller to free any
7750  * additional space as a result of processing the page that we
7751  * just finished up with. This rule is the main way in which
7752  * we keep the cost of bottom-up deletion under control.
7753  */
7754  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7755  break;
7756  lastfreespace = actualfreespace; /* for next time */
7757 
7758  /*
7759  * Deletion operation (which is bottom-up) will definitely
7760  * access the next block in line. Prepare for that now.
7761  *
7762  * Decay target free space so that we don't hang on for too
7763  * long with a marginal case. (Space target is only truly
7764  * helpful when it allows us to recognize that we don't need
7765  * to access more than 1 or 2 blocks to satisfy caller due to
7766  * agreeable workload characteristics.)
7767  *
7768  * We are a bit more patient when we encounter contiguous
7769  * blocks, though: these are treated as favorable blocks. The
7770  * decay process is only applied when the next block in line
7771  * is not a favorable/contiguous block. This is not an
7772  * exception to the general rule; we still insist on finding
7773  * at least one deletable item per block accessed. See
7774  * bottomup_nblocksfavorable() for full details of the theory
7775  * behind favorable blocks and heap block locality in general.
7776  *
7777  * Note: The first block in line is always treated as a
7778  * favorable block, so the earliest possible point that the
7779  * decay can be applied is just before we access the second
7780  * block in line. The Assert() verifies this for us.
7781  */
7782  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7783  if (nblocksfavorable > 0)
7784  nblocksfavorable--;
7785  else
7786  curtargetfreespace /= 2;
7787  }
7788 
7789  /* release old buffer */
7790  if (BufferIsValid(buf))
7792 
7793  blkno = ItemPointerGetBlockNumber(htid);
7794  buf = ReadBuffer(rel, blkno);
7795  nblocksaccessed++;
7796  Assert(!delstate->bottomup ||
7797  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7798 
7799 #ifdef USE_PREFETCH
7800 
7801  /*
7802  * To maintain the prefetch distance, prefetch one more page for
7803  * each page we read.
7804  */
7805  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7806 #endif
7807 
7809 
7810  page = BufferGetPage(buf);
7811  maxoff = PageGetMaxOffsetNumber(page);
7812  }
7813 
7814  /*
7815  * In passing, detect index corruption involving an index page with a
7816  * TID that points to a location in the heap that couldn't possibly be
7817  * correct. We only do this with actual TIDs from caller's index page
7818  * (not items reached by traversing through a HOT chain).
7819  */
7820  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7821 
7822  if (istatus->knowndeletable)
7823  Assert(!delstate->bottomup && !istatus->promising);
7824  else
7825  {
7826  ItemPointerData tmp = *htid;
7827  HeapTupleData heapTuple;
7828 
7829  /* Are any tuples from this HOT chain non-vacuumable? */
7830  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7831  &heapTuple, NULL, true))
7832  continue; /* can't delete entry */
7833 
7834  /* Caller will delete, since whole HOT chain is vacuumable */
7835  istatus->knowndeletable = true;
7836 
7837  /* Maintain index free space info for bottom-up deletion case */
7838  if (delstate->bottomup)
7839  {
7840  Assert(istatus->freespace > 0);
7841  actualfreespace += istatus->freespace;
7842  if (actualfreespace >= curtargetfreespace)
7843  bottomup_final_block = true;
7844  }
7845  }
7846 
7847  /*
7848  * Maintain snapshotConflictHorizon value for deletion operation as a
7849  * whole by advancing current value using heap tuple headers. This is
7850  * loosely based on the logic for pruning a HOT chain.
7851  */
7852  offnum = ItemPointerGetOffsetNumber(htid);
7853  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7854  for (;;)
7855  {
7856  ItemId lp;
7857  HeapTupleHeader htup;
7858 
7859  /* Sanity check (pure paranoia) */
7860  if (offnum < FirstOffsetNumber)
7861  break;
7862 
7863  /*
7864  * An offset past the end of page's line pointer array is possible
7865  * when the array was truncated
7866  */
7867  if (offnum > maxoff)
7868  break;
7869 
7870  lp = PageGetItemId(page, offnum);
7871  if (ItemIdIsRedirected(lp))
7872  {
7873  offnum = ItemIdGetRedirect(lp);
7874  continue;
7875  }
7876 
7877  /*
7878  * We'll often encounter LP_DEAD line pointers (especially with an
7879  * entry marked knowndeletable by our caller up front). No heap
7880  * tuple headers get examined for an htid that leads us to an
7881  * LP_DEAD item. This is okay because the earlier pruning
7882  * operation that made the line pointer LP_DEAD in the first place
7883  * must have considered the original tuple header as part of
7884  * generating its own snapshotConflictHorizon value.
7885  *
7886  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
7887  * the same strategy that index vacuuming uses in all cases. Index
7888  * VACUUM WAL records don't even have a snapshotConflictHorizon
7889  * field of their own for this reason.
7890  */
7891  if (!ItemIdIsNormal(lp))
7892  break;
7893 
7894  htup = (HeapTupleHeader) PageGetItem(page, lp);
7895 
7896  /*
7897  * Check the tuple XMIN against prior XMAX, if any
7898  */
7899  if (TransactionIdIsValid(priorXmax) &&
7900  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7901  break;
7902 
7904  &snapshotConflictHorizon);
7905 
7906  /*
7907  * If the tuple is not HOT-updated, then we are at the end of this
7908  * HOT-chain. No need to visit later tuples from the same update
7909  * chain (they get their own index entries) -- just move on to
7910  * next htid from index AM caller.
7911  */
7912  if (!HeapTupleHeaderIsHotUpdated(htup))
7913  break;
7914 
7915  /* Advance to next HOT chain member */
7916  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7917  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7918  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7919  }
7920 
7921  /* Enable further/final shrinking of deltids for caller */
7922  finalndeltids = i + 1;
7923  }
7924 
7926 
7927  /*
7928  * Shrink deltids array to exclude non-deletable entries at the end. This
7929  * is not just a minor optimization. Final deltids array size might be
7930  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7931  * ndeltids being zero in all cases with zero total deletable entries.
7932  */
7933  Assert(finalndeltids > 0 || delstate->bottomup);
7934  delstate->ndeltids = finalndeltids;
7935 
7936  return snapshotConflictHorizon;
7937 }
int maintenance_io_concurrency
Definition: bufmgr.c:157
#define Min(x, y)
Definition: c.h:1004
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8192
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7489
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7574
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:189
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1630
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7979
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
TM_IndexDelete * deltids
Definition: tableam.h:254
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 6062 of file heapam.c.

6063 {
6064  Buffer buffer;
6065  Page page;
6066  OffsetNumber offnum;
6067  ItemId lp = NULL;
6068  HeapTupleHeader htup;
6069  uint32 oldlen;
6070  uint32 newlen;
6071 
6072  /*
6073  * For now, we don't allow parallel updates. Unlike a regular update,
6074  * this should never create a combo CID, so it might be possible to relax
6075  * this restriction, but not without more thought and testing. It's not
6076  * clear that it would be useful, anyway.
6077  */
6078  if (IsInParallelMode())
6079  ereport(ERROR,
6080  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
6081  errmsg("cannot update tuples during a parallel operation")));
6082 
6083  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
6085  page = (Page) BufferGetPage(buffer);
6086 
6087  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
6088  if (PageGetMaxOffsetNumber(page) >= offnum)
6089  lp = PageGetItemId(page, offnum);
6090 
6091  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6092  elog(ERROR, "invalid lp");
6093 
6094  htup = (HeapTupleHeader) PageGetItem(page, lp);
6095 
6096  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
6097  newlen = tuple->t_len - tuple->t_data->t_hoff;
6098  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6099  elog(ERROR, "wrong tuple length");
6100 
6101  /* NO EREPORT(ERROR) from here till changes are logged */
6103 
6104  memcpy((char *) htup + htup->t_hoff,
6105  (char *) tuple->t_data + tuple->t_data->t_hoff,
6106  newlen);
6107 
6108  MarkBufferDirty(buffer);
6109 
6110  /* XLOG stuff */
6111  if (RelationNeedsWAL(relation))
6112  {
6113  xl_heap_inplace xlrec;
6114  XLogRecPtr recptr;
6115 
6116  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6117 
6118  XLogBeginInsert();
6119  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6120 
6121  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6122  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6123 
6124  /* inplace updates aren't decoded atm, don't log the origin */
6125 
6126  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6127 
6128  PageSetLSN(page, recptr);
6129  }
6130 
6131  END_CRIT_SECTION();
6132 
6133  UnlockReleaseBuffer(buffer);
6134 
6135  /*
6136  * Send out shared cache inval if necessary. Note that because we only
6137  * pass the new version of the tuple, this mustn't be used for any
6138  * operations that could change catcache lookup keys. But we aren't
6139  * bothering with index updates either, so that's true a fortiori.
6140  */
6142  CacheInvalidateHeapTuple(relation, tuple, NULL);
6143 }
unsigned int uint32
Definition: c.h:506
#define SizeOfHeapInplace
Definition: heapam_xlog.h:431
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:454
OffsetNumber offnum
Definition: heapam_xlog.h:427
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:405

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1993 of file heapam.c.

1995 {
1997  HeapTuple heaptup;
1998  Buffer buffer;
1999  Buffer vmbuffer = InvalidBuffer;
2000  bool all_visible_cleared = false;
2001 
2002  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2004  RelationGetNumberOfAttributes(relation));
2005 
2006  /*
2007  * Fill in tuple header fields and toast the tuple if necessary.
2008  *
2009  * Note: below this point, heaptup is the data we actually intend to store
2010  * into the relation; tup is the caller's original untoasted data.
2011  */
2012  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2013 
2014  /*
2015  * Find buffer to insert this tuple into. If the page is all visible,
2016  * this will also pin the requisite visibility map page.
2017  */
2018  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2019  InvalidBuffer, options, bistate,
2020  &vmbuffer, NULL,
2021  0);
2022 
2023  /*
2024  * We're about to do the actual insert -- but check for conflict first, to
2025  * avoid possibly having to roll back work we've just done.
2026  *
2027  * This is safe without a recheck as long as there is no possibility of
2028  * another process scanning the page between this check and the insert
2029  * being visible to the scan (i.e., an exclusive buffer content lock is
2030  * continuously held from this point until the tuple insert is visible).
2031  *
2032  * For a heap insert, we only need to check for table-level SSI locks. Our
2033  * new tuple can't possibly conflict with existing tuple locks, and heap
2034  * page locks are only consolidated versions of tuple locks; they do not
2035  * lock "gaps" as index page locks do. So we don't need to specify a
2036  * buffer when making the call, which makes for a faster check.
2037  */
2039 
2040  /* NO EREPORT(ERROR) from here till changes are logged */
2042 
2043  RelationPutHeapTuple(relation, buffer, heaptup,
2044  (options & HEAP_INSERT_SPECULATIVE) != 0);
2045 
2046  if (PageIsAllVisible(BufferGetPage(buffer)))
2047  {
2048  all_visible_cleared = true;
2050  visibilitymap_clear(relation,
2051  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2052  vmbuffer, VISIBILITYMAP_VALID_BITS);
2053  }
2054 
2055  /*
2056  * XXX Should we set PageSetPrunable on this page ?
2057  *
2058  * The inserting transaction may eventually abort thus making this tuple
2059  * DEAD and hence available for pruning. Though we don't want to optimize
2060  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2061  * aborted tuple will never be pruned until next vacuum is triggered.
2062  *
2063  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2064  */
2065 
2066  MarkBufferDirty(buffer);
2067 
2068  /* XLOG stuff */
2069  if (RelationNeedsWAL(relation))
2070  {
2071  xl_heap_insert xlrec;
2072  xl_heap_header xlhdr;
2073  XLogRecPtr recptr;
2074  Page page = BufferGetPage(buffer);
2075  uint8 info = XLOG_HEAP_INSERT;
2076  int bufflags = 0;
2077 
2078  /*
2079  * If this is a catalog, we need to transmit combo CIDs to properly
2080  * decode, so log that as well.
2081  */
2083  log_heap_new_cid(relation, heaptup);
2084 
2085  /*
2086  * If this is the single and first tuple on page, we can reinit the
2087  * page instead of restoring the whole thing. Set flag, and hide
2088  * buffer references from XLogInsert.
2089  */
2090  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2092  {
2093  info |= XLOG_HEAP_INIT_PAGE;
2094  bufflags |= REGBUF_WILL_INIT;
2095  }
2096 
2097  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2098  xlrec.flags = 0;
2099  if (all_visible_cleared)
2104 
2105  /*
2106  * For logical decoding, we need the tuple even if we're doing a full
2107  * page write, so make sure it's included even if we take a full-page
2108  * image. (XXX We could alternatively store a pointer into the FPW).
2109  */
2110  if (RelationIsLogicallyLogged(relation) &&
2112  {
2114  bufflags |= REGBUF_KEEP_DATA;
2115 
2116  if (IsToastRelation(relation))
2118  }
2119 
2120  XLogBeginInsert();
2121  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2122 
2123  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2124  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2125  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2126 
2127  /*
2128  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2129  * write the whole page to the xlog, we don't need to store
2130  * xl_heap_header in the xlog.
2131  */
2132  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2133  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2134  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2136  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2137  heaptup->t_len - SizeofHeapTupleHeader);
2138 
2139  /* filtering by origin on a row level is much more efficient */
2141 
2142  recptr = XLogInsert(RM_HEAP_ID, info);
2143 
2144  PageSetLSN(page, recptr);
2145  }
2146 
2147  END_CRIT_SECTION();
2148 
2149  UnlockReleaseBuffer(buffer);
2150  if (vmbuffer != InvalidBuffer)
2151  ReleaseBuffer(vmbuffer);
2152 
2153  /*
2154  * If tuple is cachable, mark it for invalidation from the caches in case
2155  * we abort. Note it is OK to do this after releasing the buffer, because
2156  * the heaptup data structure is all in local memory, not in the shared
2157  * buffer.
2158  */
2159  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2160 
2161  /* Note: speculative insertions are counted too, even if aborted later */
2162  pgstat_count_heap_insert(relation, 1);
2163 
2164  /*
2165  * If heaptup is a private copy, release it. Don't forget to copy t_self
2166  * back to the caller's image, too.
2167  */
2168  if (heaptup != tup)
2169  {
2170  tup->t_self = heaptup->t_self;
2171  heap_freetuple(heaptup);
2172  }
2173 }
unsigned char uint8
Definition: c.h:504
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2182
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:37
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:75
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:71
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:167
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:74
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:161
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4309 of file heapam.c.

4313 {
4314  TM_Result result;
4315  ItemPointer tid = &(tuple->t_self);
4316  ItemId lp;
4317  Page page;
4318  Buffer vmbuffer = InvalidBuffer;
4319  BlockNumber block;
4320  TransactionId xid,
4321  xmax;
4322  uint16 old_infomask,
4323  new_infomask,
4324  new_infomask2;
4325  bool first_time = true;
4326  bool skip_tuple_lock = false;
4327  bool have_tuple_lock = false;
4328  bool cleared_all_frozen = false;
4329 
4330  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4331  block = ItemPointerGetBlockNumber(tid);
4332 
4333  /*
4334  * Before locking the buffer, pin the visibility map page if it appears to
4335  * be necessary. Since we haven't got the lock yet, someone else might be
4336  * in the middle of changing this, so we'll need to recheck after we have
4337  * the lock.
4338  */
4339  if (PageIsAllVisible(BufferGetPage(*buffer)))
4340  visibilitymap_pin(relation, block, &vmbuffer);
4341 
4343 
4344  page = BufferGetPage(*buffer);
4345  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4346  Assert(ItemIdIsNormal(lp));
4347 
4348  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4349  tuple->t_len = ItemIdGetLength(lp);
4350  tuple->t_tableOid = RelationGetRelid(relation);
4351 
4352 l3:
4353  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4354 
4355  if (result == TM_Invisible)
4356  {
4357  /*
4358  * This is possible, but only when locking a tuple for ON CONFLICT
4359  * UPDATE. We return this value here rather than throwing an error in
4360  * order to give that case the opportunity to throw a more specific
4361  * error.
4362  */
4363  result = TM_Invisible;
4364  goto out_locked;
4365  }
4366  else if (result == TM_BeingModified ||
4367  result == TM_Updated ||
4368  result == TM_Deleted)
4369  {
4370  TransactionId xwait;
4371  uint16 infomask;
4372  uint16 infomask2;
4373  bool require_sleep;
4374  ItemPointerData t_ctid;
4375 
4376  /* must copy state data before unlocking buffer */
4377  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4378  infomask = tuple->t_data->t_infomask;
4379  infomask2 = tuple->t_data->t_infomask2;
4380  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4381 
4382  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4383 
4384  /*
4385  * If any subtransaction of the current top transaction already holds
4386  * a lock as strong as or stronger than what we're requesting, we
4387  * effectively hold the desired lock already. We *must* succeed
4388  * without trying to take the tuple lock, else we will deadlock
4389  * against anyone wanting to acquire a stronger lock.
4390  *
4391  * Note we only do this the first time we loop on the HTSU result;
4392  * there is no point in testing in subsequent passes, because
4393  * evidently our own transaction cannot have acquired a new lock after
4394  * the first time we checked.
4395  */
4396  if (first_time)
4397  {
4398  first_time = false;
4399 
4400  if (infomask & HEAP_XMAX_IS_MULTI)
4401  {
4402  int i;
4403  int nmembers;
4404  MultiXactMember *members;
4405 
4406  /*
4407  * We don't need to allow old multixacts here; if that had
4408  * been the case, HeapTupleSatisfiesUpdate would have returned
4409  * MayBeUpdated and we wouldn't be here.
4410  */
4411  nmembers =
4412  GetMultiXactIdMembers(xwait, &members, false,
4413  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4414 
4415  for (i = 0; i < nmembers; i++)
4416  {
4417  /* only consider members of our own transaction */
4418  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4419  continue;
4420 
4421  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4422  {
4423  pfree(members);
4424  result = TM_Ok;
4425  goto out_unlocked;
4426  }
4427  else
4428  {
4429  /*
4430  * Disable acquisition of the heavyweight tuple lock.
4431  * Otherwise, when promoting a weaker lock, we might
4432  * deadlock with another locker that has acquired the
4433  * heavyweight tuple lock and is waiting for our
4434  * transaction to finish.
4435  *
4436  * Note that in this case we still need to wait for
4437  * the multixact if required, to avoid acquiring
4438  * conflicting locks.
4439  */
4440  skip_tuple_lock = true;
4441  }
4442  }
4443 
4444  if (members)
4445  pfree(members);
4446  }
4447  else if (TransactionIdIsCurrentTransactionId(xwait))
4448  {
4449  switch (mode)
4450  {
4451  case LockTupleKeyShare:
4452  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4453  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4454  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4455  result = TM_Ok;
4456  goto out_unlocked;
4457  case LockTupleShare:
4458  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4459  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4460  {
4461  result = TM_Ok;
4462  goto out_unlocked;
4463  }
4464  break;
4466  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4467  {
4468  result = TM_Ok;
4469  goto out_unlocked;
4470  }
4471  break;
4472  case LockTupleExclusive:
4473  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4474  infomask2 & HEAP_KEYS_UPDATED)
4475  {
4476  result = TM_Ok;
4477  goto out_unlocked;
4478  }
4479  break;
4480  }
4481  }
4482  }
4483 
4484  /*
4485  * Initially assume that we will have to wait for the locking
4486  * transaction(s) to finish. We check various cases below in which
4487  * this can be turned off.
4488  */
4489  require_sleep = true;
4490  if (mode == LockTupleKeyShare)
4491  {
4492  /*
4493  * If we're requesting KeyShare, and there's no update present, we
4494  * don't need to wait. Even if there is an update, we can still
4495  * continue if the key hasn't been modified.
4496  *
4497  * However, if there are updates, we need to walk the update chain
4498  * to mark future versions of the row as locked, too. That way,
4499  * if somebody deletes that future version, we're protected
4500  * against the key going away. This locking of future versions
4501  * could block momentarily, if a concurrent transaction is
4502  * deleting a key; or it could return a value to the effect that
4503  * the transaction deleting the key has already committed. So we
4504  * do this before re-locking the buffer; otherwise this would be
4505  * prone to deadlocks.
4506  *
4507  * Note that the TID we're locking was grabbed before we unlocked
4508  * the buffer. For it to change while we're not looking, the
4509  * other properties we're testing for below after re-locking the
4510  * buffer would also change, in which case we would restart this
4511  * loop above.
4512  */
4513  if (!(infomask2 & HEAP_KEYS_UPDATED))
4514  {
4515  bool updated;
4516 
4517  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4518 
4519  /*
4520  * If there are updates, follow the update chain; bail out if
4521  * that cannot be done.
4522  */
4523  if (follow_updates && updated)
4524  {
4525  TM_Result res;
4526 
4527  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4529  mode);
4530  if (res != TM_Ok)
4531  {
4532  result = res;
4533  /* recovery code expects to have buffer lock held */
4535  goto failed;
4536  }
4537  }
4538 
4540 
4541  /*
4542  * Make sure it's still an appropriate lock, else start over.
4543  * Also, if it wasn't updated before we released the lock, but
4544  * is updated now, we start over too; the reason is that we
4545  * now need to follow the update chain to lock the new
4546  * versions.
4547  */
4548  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4549  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4550  !updated))
4551  goto l3;
4552 
4553  /* Things look okay, so we can skip sleeping */
4554  require_sleep = false;
4555 
4556  /*
4557  * Note we allow Xmax to change here; other updaters/lockers
4558  * could have modified it before we grabbed the buffer lock.
4559  * However, this is not a problem, because with the recheck we
4560  * just did we ensure that they still don't conflict with the
4561  * lock we want.
4562  */
4563  }
4564  }
4565  else if (mode == LockTupleShare)
4566  {
4567  /*
4568  * If we're requesting Share, we can similarly avoid sleeping if
4569  * there's no update and no exclusive lock present.
4570  */
4571  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4572  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4573  {
4575 
4576  /*
4577  * Make sure it's still an appropriate lock, else start over.
4578  * See above about allowing xmax to change.
4579  */
4580  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4582  goto l3;
4583  require_sleep = false;
4584  }
4585  }
4586  else if (mode == LockTupleNoKeyExclusive)
4587  {
4588  /*
4589  * If we're requesting NoKeyExclusive, we might also be able to
4590  * avoid sleeping; just ensure that there no conflicting lock
4591  * already acquired.
4592  */
4593  if (infomask & HEAP_XMAX_IS_MULTI)
4594  {
4595  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4596  mode, NULL))
4597  {
4598  /*
4599  * No conflict, but if the xmax changed under us in the
4600  * meantime, start over.
4601  */
4603  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4605  xwait))
4606  goto l3;
4607 
4608  /* otherwise, we're good */
4609  require_sleep = false;
4610  }
4611  }
4612  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4613  {
4615 
4616  /* if the xmax changed in the meantime, start over */
4617  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4619  xwait))
4620  goto l3;
4621  /* otherwise, we're good */
4622  require_sleep = false;
4623  }
4624  }
4625 
4626  /*
4627  * As a check independent from those above, we can also avoid sleeping
4628  * if the current transaction is the sole locker of the tuple. Note
4629  * that the strength of the lock already held is irrelevant; this is
4630  * not about recording the lock in Xmax (which will be done regardless
4631  * of this optimization, below). Also, note that the cases where we
4632  * hold a lock stronger than we are requesting are already handled
4633  * above by not doing anything.
4634  *
4635  * Note we only deal with the non-multixact case here; MultiXactIdWait
4636  * is well equipped to deal with this situation on its own.
4637  */
4638  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4640  {
4641  /* ... but if the xmax changed in the meantime, start over */
4643  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4645  xwait))
4646  goto l3;
4648  require_sleep = false;
4649  }
4650 
4651  /*
4652  * Time to sleep on the other transaction/multixact, if necessary.
4653  *
4654  * If the other transaction is an update/delete that's already
4655  * committed, then sleeping cannot possibly do any good: if we're
4656  * required to sleep, get out to raise an error instead.
4657  *
4658  * By here, we either have already acquired the buffer exclusive lock,
4659  * or we must wait for the locking transaction or multixact; so below
4660  * we ensure that we grab buffer lock after the sleep.
4661  */
4662  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4663  {
4665  goto failed;
4666  }
4667  else if (require_sleep)
4668  {
4669  /*
4670  * Acquire tuple lock to establish our priority for the tuple, or
4671  * die trying. LockTuple will release us when we are next-in-line
4672  * for the tuple. We must do this even if we are share-locking,
4673  * but not if we already have a weaker lock on the tuple.
4674  *
4675  * If we are forced to "start over" below, we keep the tuple lock;
4676  * this arranges that we stay at the head of the line while
4677  * rechecking tuple state.
4678  */
4679  if (!skip_tuple_lock &&
4680  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4681  &have_tuple_lock))
4682  {
4683  /*
4684  * This can only happen if wait_policy is Skip and the lock
4685  * couldn't be obtained.
4686  */
4687  result = TM_WouldBlock;
4688  /* recovery code expects to have buffer lock held */
4690  goto failed;
4691  }
4692 
4693  if (infomask & HEAP_XMAX_IS_MULTI)
4694  {
4696 
4697  /* We only ever lock tuples, never update them */
4698  if (status >= MultiXactStatusNoKeyUpdate)
4699  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4700 
4701  /* wait for multixact to end, or die trying */
4702  switch (wait_policy)
4703  {
4704  case LockWaitBlock:
4705  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4706  relation, &tuple->t_self, XLTW_Lock, NULL);
4707  break;
4708  case LockWaitSkip:
4710  status, infomask, relation,
4711  NULL))
4712  {
4713  result = TM_WouldBlock;
4714  /* recovery code expects to have buffer lock held */
4716  goto failed;
4717  }
4718  break;
4719  case LockWaitError:
4721  status, infomask, relation,
4722  NULL))
4723  ereport(ERROR,
4724  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4725  errmsg("could not obtain lock on row in relation \"%s\"",
4726  RelationGetRelationName(relation))));
4727 
4728  break;
4729  }
4730 
4731  /*
4732  * Of course, the multixact might not be done here: if we're
4733  * requesting a light lock mode, other transactions with light
4734  * locks could still be alive, as well as locks owned by our
4735  * own xact or other subxacts of this backend. We need to
4736  * preserve the surviving MultiXact members. Note that it
4737  * isn't absolutely necessary in the latter case, but doing so
4738  * is simpler.
4739  */
4740  }
4741  else
4742  {
4743  /* wait for regular transaction to end, or die trying */
4744  switch (wait_policy)
4745  {
4746  case LockWaitBlock:
4747  XactLockTableWait(xwait, relation, &tuple->t_self,
4748  XLTW_Lock);
4749  break;
4750  case LockWaitSkip:
4751  if (!ConditionalXactLockTableWait(xwait))
4752  {
4753  result = TM_WouldBlock;
4754  /* recovery code expects to have buffer lock held */
4756  goto failed;
4757  }
4758  break;
4759  case LockWaitError:
4760  if (!ConditionalXactLockTableWait(xwait))
4761  ereport(ERROR,
4762  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4763  errmsg("could not obtain lock on row in relation \"%s\"",
4764  RelationGetRelationName(relation))));
4765  break;
4766  }
4767  }
4768 
4769  /* if there are updates, follow the update chain */
4770  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4771  {
4772  TM_Result res;
4773 
4774  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4776  mode);
4777  if (res != TM_Ok)
4778  {
4779  result = res;
4780  /* recovery code expects to have buffer lock held */
4782  goto failed;
4783  }
4784  }
4785 
4787 
4788  /*
4789  * xwait is done, but if xwait had just locked the tuple then some
4790  * other xact could update this tuple before we get to this point.
4791  * Check for xmax change, and start over if so.
4792  */
4793  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4795  xwait))
4796  goto l3;
4797 
4798  if (!(infomask & HEAP_XMAX_IS_MULTI))
4799  {
4800  /*
4801  * Otherwise check if it committed or aborted. Note we cannot
4802  * be here if the tuple was only locked by somebody who didn't
4803  * conflict with us; that would have been handled above. So
4804  * that transaction must necessarily be gone by now. But
4805  * don't check for this in the multixact case, because some
4806  * locker transactions might still be running.
4807  */
4808  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4809  }
4810  }
4811 
4812  /* By here, we're certain that we hold buffer exclusive lock again */
4813 
4814  /*
4815  * We may lock if previous xmax aborted, or if it committed but only
4816  * locked the tuple without updating it; or if we didn't have to wait
4817  * at all for whatever reason.
4818  */
4819  if (!require_sleep ||
4820  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4823  result = TM_Ok;
4824  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4825  result = TM_Updated;
4826  else
4827  result = TM_Deleted;
4828  }
4829 
4830 failed:
4831  if (result != TM_Ok)
4832  {
4833  Assert(result == TM_SelfModified || result == TM_Updated ||
4834  result == TM_Deleted || result == TM_WouldBlock);
4835 
4836  /*
4837  * When locking a tuple under LockWaitSkip semantics and we fail with
4838  * TM_WouldBlock above, it's possible for concurrent transactions to
4839  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4840  * this assert is slightly different from the equivalent one in
4841  * heap_delete and heap_update.
4842  */
4843  Assert((result == TM_WouldBlock) ||
4844  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4845  Assert(result != TM_Updated ||
4846  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4847  tmfd->ctid = tuple->t_data->t_ctid;
4848  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4849  if (result == TM_SelfModified)
4850  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4851  else
4852  tmfd->cmax = InvalidCommandId;
4853  goto out_locked;
4854  }
4855 
4856  /*
4857  * If we didn't pin the visibility map page and the page has become all
4858  * visible while we were busy locking the buffer, or during some
4859  * subsequent window during which we had it unlocked, we'll have to unlock
4860  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4861  * unfortunate, especially since we'll now have to recheck whether the
4862  * tuple has been locked or updated under us, but hopefully it won't
4863  * happen very often.
4864  */
4865  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4866  {
4867  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4868  visibilitymap_pin(relation, block, &vmbuffer);
4870  goto l3;
4871  }
4872 
4873  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4874  old_infomask = tuple->t_data->t_infomask;
4875 
4876  /*
4877  * If this is the first possibly-multixact-able operation in the current
4878  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4879  * certain that the transaction will never become a member of any older
4880  * MultiXactIds than that. (We have to do this even if we end up just
4881  * using our own TransactionId below, since some other backend could
4882  * incorporate our XID into a MultiXact immediately afterwards.)
4883  */
4885 
4886  /*
4887  * Compute the new xmax and infomask to store into the tuple. Note we do
4888  * not modify the tuple just yet, because that would leave it in the wrong
4889  * state if multixact.c elogs.
4890  */
4891  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4892  GetCurrentTransactionId(), mode, false,
4893  &xid, &new_infomask, &new_infomask2);
4894 
4896 
4897  /*
4898  * Store transaction information of xact locking the tuple.
4899  *
4900  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4901  * possibly generating a useless combo CID. Moreover, if we're locking a
4902  * previously updated tuple, it's important to preserve the Cmax.
4903  *
4904  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4905  * we would break the HOT chain.
4906  */
4907  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4908  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4909  tuple->t_data->t_infomask |= new_infomask;
4910  tuple->t_data->t_infomask2 |= new_infomask2;
4911  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4913  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4914 
4915  /*
4916  * Make sure there is no forward chain link in t_ctid. Note that in the
4917  * cases where the tuple has been updated, we must not overwrite t_ctid,
4918  * because it was set by the updater. Moreover, if the tuple has been
4919  * updated, we need to follow the update chain to lock the new versions of
4920  * the tuple as well.
4921  */
4922  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4923  tuple->t_data->t_ctid = *tid;
4924 
4925  /* Clear only the all-frozen bit on visibility map if needed */
4926  if (PageIsAllVisible(page) &&
4927  visibilitymap_clear(relation, block, vmbuffer,
4929  cleared_all_frozen = true;
4930 
4931 
4932  MarkBufferDirty(*buffer);
4933 
4934  /*
4935  * XLOG stuff. You might think that we don't need an XLOG record because
4936  * there is no state change worth restoring after a crash. You would be
4937  * wrong however: we have just written either a TransactionId or a
4938  * MultiXactId that may never have been seen on disk before, and we need
4939  * to make sure that there are XLOG entries covering those ID numbers.
4940  * Else the same IDs might be re-used after a crash, which would be
4941  * disastrous if this page made it to disk before the crash. Essentially
4942  * we have to enforce the WAL log-before-data rule even in this case.
4943  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4944  * entries for everything anyway.)
4945  */
4946  if (RelationNeedsWAL(relation))
4947  {
4948  xl_heap_lock xlrec;
4949  XLogRecPtr recptr;
4950 
4951  XLogBeginInsert();
4952  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4953 
4954  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4955  xlrec.xmax = xid;
4956  xlrec.infobits_set = compute_infobits(new_infomask,
4957  tuple->t_data->t_infomask2);
4958  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4959  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4960 
4961  /* we don't decode row locks atm, so no need to log the origin */
4962 
4963  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4964 
4965  PageSetLSN(page, recptr);
4966  }
4967 
4968  END_CRIT_SECTION();
4969 
4970  result = TM_Ok;
4971 
4972 out_locked:
4973  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4974 
4975 out_unlocked:
4976  if (BufferIsValid(vmbuffer))
4977  ReleaseBuffer(vmbuffer);
4978 
4979  /*
4980  * Don't update the visibility map here. Locking a tuple doesn't change
4981  * visibility info.
4982  */
4983 
4984  /*
4985  * Now that we have successfully marked the tuple as locked, we can
4986  * release the lmgr tuple lock, if we had it.
4987  */
4988  if (have_tuple_lock)
4989  UnlockTupleTuplock(relation, tid, mode);
4990 
4991  return result;
4992 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:218
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5773
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7312
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4261
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:392
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:403
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:740
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1252
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:399
OffsetNumber offnum
Definition: heapam_xlog.h:398
TransactionId xmax
Definition: heapam_xlog.h:397
@ TM_WouldBlock
Definition: tableam.h:110
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2262 of file heapam.c.

2264 {
2266  HeapTuple *heaptuples;
2267  int i;
2268  int ndone;
2269  PGAlignedBlock scratch;
2270  Page page;
2271  Buffer vmbuffer = InvalidBuffer;
2272  bool needwal;
2273  Size saveFreeSpace;
2274  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2275  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2276  bool starting_with_empty_page = false;
2277  int npages = 0;
2278  int npages_used = 0;
2279 
2280  /* currently not needed (thus unsupported) for heap_multi_insert() */
2282 
2283  needwal = RelationNeedsWAL(relation);
2284  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2286 
2287  /* Toast and set header data in all the slots */
2288  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2289  for (i = 0; i < ntuples; i++)
2290  {
2291  HeapTuple tuple;
2292 
2293  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2294  slots[i]->tts_tableOid = RelationGetRelid(relation);
2295  tuple->t_tableOid = slots[i]->tts_tableOid;
2296  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2297  options);
2298  }
2299 
2300  /*
2301  * We're about to do the actual inserts -- but check for conflict first,
2302  * to minimize the possibility of having to roll back work we've just
2303  * done.
2304  *
2305  * A check here does not definitively prevent a serialization anomaly;
2306  * that check MUST be done at least past the point of acquiring an
2307  * exclusive buffer content lock on every buffer that will be affected,
2308  * and MAY be done after all inserts are reflected in the buffers and
2309  * those locks are released; otherwise there is a race condition. Since
2310  * multiple buffers can be locked and unlocked in the loop below, and it
2311  * would not be feasible to identify and lock all of those buffers before
2312  * the loop, we must do a final check at the end.
2313  *
2314  * The check here could be omitted with no loss of correctness; it is
2315  * present strictly as an optimization.
2316  *
2317  * For heap inserts, we only need to check for table-level SSI locks. Our
2318  * new tuples can't possibly conflict with existing tuple locks, and heap
2319  * page locks are only consolidated versions of tuple locks; they do not
2320  * lock "gaps" as index page locks do. So we don't need to specify a
2321  * buffer when making the call, which makes for a faster check.
2322  */
2324 
2325  ndone = 0;
2326  while (ndone < ntuples)
2327  {
2328  Buffer buffer;
2329  bool all_visible_cleared = false;
2330  bool all_frozen_set = false;
2331  int nthispage;
2332 
2334 
2335  /*
2336  * Compute number of pages needed to fit the to-be-inserted tuples in
2337  * the worst case. This will be used to determine how much to extend
2338  * the relation by in RelationGetBufferForTuple(), if needed. If we
2339  * filled a prior page from scratch, we can just update our last
2340  * computation, but if we started with a partially filled page,
2341  * recompute from scratch, the number of potentially required pages
2342  * can vary due to tuples needing to fit onto the page, page headers
2343  * etc.
2344  */
2345  if (ndone == 0 || !starting_with_empty_page)
2346  {
2347  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2348  saveFreeSpace);
2349  npages_used = 0;
2350  }
2351  else
2352  npages_used++;
2353 
2354  /*
2355  * Find buffer where at least the next tuple will fit. If the page is
2356  * all-visible, this will also pin the requisite visibility map page.
2357  *
2358  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2359  * empty page. See all_frozen_set below.
2360  */
2361  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2362  InvalidBuffer, options, bistate,
2363  &vmbuffer, NULL,
2364  npages - npages_used);
2365  page = BufferGetPage(buffer);
2366 
2367  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2368 
2369  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2370  all_frozen_set = true;
2371 
2372  /* NO EREPORT(ERROR) from here till changes are logged */
2374 
2375  /*
2376  * RelationGetBufferForTuple has ensured that the first tuple fits.
2377  * Put that on the page, and then as many other tuples as fit.
2378  */
2379  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2380 
2381  /*
2382  * For logical decoding we need combo CIDs to properly decode the
2383  * catalog.
2384  */
2385  if (needwal && need_cids)
2386  log_heap_new_cid(relation, heaptuples[ndone]);
2387 
2388  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2389  {
2390  HeapTuple heaptup = heaptuples[ndone + nthispage];
2391 
2392  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2393  break;
2394 
2395  RelationPutHeapTuple(relation, buffer, heaptup, false);
2396 
2397  /*
2398  * For logical decoding we need combo CIDs to properly decode the
2399  * catalog.
2400  */
2401  if (needwal && need_cids)
2402  log_heap_new_cid(relation, heaptup);
2403  }
2404 
2405  /*
2406  * If the page is all visible, need to clear that, unless we're only
2407  * going to add further frozen rows to it.
2408  *
2409  * If we're only adding already frozen rows to a previously empty
2410  * page, mark it as all-visible.
2411  */
2412  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2413  {
2414  all_visible_cleared = true;
2415  PageClearAllVisible(page);
2416  visibilitymap_clear(relation,
2417  BufferGetBlockNumber(buffer),
2418  vmbuffer, VISIBILITYMAP_VALID_BITS);
2419  }
2420  else if (all_frozen_set)
2421  PageSetAllVisible(page);
2422 
2423  /*
2424  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2425  */
2426 
2427  MarkBufferDirty(buffer);
2428 
2429  /* XLOG stuff */
2430  if (needwal)
2431  {
2432  XLogRecPtr recptr;
2433  xl_heap_multi_insert *xlrec;
2435  char *tupledata;
2436  int totaldatalen;
2437  char *scratchptr = scratch.data;
2438  bool init;
2439  int bufflags = 0;
2440 
2441  /*
2442  * If the page was previously empty, we can reinit the page
2443  * instead of restoring the whole thing.
2444  */
2445  init = starting_with_empty_page;
2446 
2447  /* allocate xl_heap_multi_insert struct from the scratch area */
2448  xlrec = (xl_heap_multi_insert *) scratchptr;
2449  scratchptr += SizeOfHeapMultiInsert;
2450 
2451  /*
2452  * Allocate offsets array. Unless we're reinitializing the page,
2453  * in that case the tuples are stored in order starting at
2454  * FirstOffsetNumber and we don't need to store the offsets
2455  * explicitly.
2456  */
2457  if (!init)
2458  scratchptr += nthispage * sizeof(OffsetNumber);
2459 
2460  /* the rest of the scratch space is used for tuple data */
2461  tupledata = scratchptr;
2462 
2463  /* check that the mutually exclusive flags are not both set */
2464  Assert(!(all_visible_cleared && all_frozen_set));
2465 
2466  xlrec->flags = 0;
2467  if (all_visible_cleared)
2469  if (all_frozen_set)
2471 
2472  xlrec->ntuples = nthispage;
2473 
2474  /*
2475  * Write out an xl_multi_insert_tuple and the tuple data itself
2476  * for each tuple.
2477  */
2478  for (i = 0; i < nthispage; i++)
2479  {
2480  HeapTuple heaptup = heaptuples[ndone + i];
2481  xl_multi_insert_tuple *tuphdr;
2482  int datalen;
2483 
2484  if (!init)
2485  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2486  /* xl_multi_insert_tuple needs two-byte alignment. */
2487  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2488  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2489 
2490  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2491  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2492  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2493 
2494  /* write bitmap [+ padding] [+ oid] + data */
2495  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2496  memcpy(scratchptr,
2497  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2498  datalen);
2499  tuphdr->datalen = datalen;
2500  scratchptr += datalen;
2501  }
2502  totaldatalen = scratchptr - tupledata;
2503  Assert((scratchptr - scratch.data) < BLCKSZ);
2504 
2505  if (need_tuple_data)
2507 
2508  /*
2509  * Signal that this is the last xl_heap_multi_insert record
2510  * emitted by this call to heap_multi_insert(). Needed for logical
2511  * decoding so it knows when to cleanup temporary data.
2512  */
2513  if (ndone + nthispage == ntuples)
2514  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2515 
2516  if (init)
2517  {
2518  info |= XLOG_HEAP_INIT_PAGE;
2519  bufflags |= REGBUF_WILL_INIT;
2520  }
2521 
2522  /*
2523  * If we're doing logical decoding, include the new tuple data
2524  * even if we take a full-page image of the page.
2525  */
2526  if (need_tuple_data)
2527  bufflags |= REGBUF_KEEP_DATA;
2528 
2529  XLogBeginInsert();
2530  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2531  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2532 
2533  XLogRegisterBufData(0, tupledata, totaldatalen);
2534 
2535  /* filtering by origin on a row level is much more efficient */
2537 
2538  recptr = XLogInsert(RM_HEAP2_ID, info);
2539 
2540  PageSetLSN(page, recptr);
2541  }
2542 
2543  END_CRIT_SECTION();
2544 
2545  /*
2546  * If we've frozen everything on the page, update the visibilitymap.
2547  * We're already holding pin on the vmbuffer.
2548  */
2549  if (all_frozen_set)
2550  {
2551  Assert(PageIsAllVisible(page));
2552  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2553 
2554  /*
2555  * It's fine to use InvalidTransactionId here - this is only used
2556  * when HEAP_INSERT_FROZEN is specified, which intentionally
2557  * violates visibility rules.
2558  */
2559  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2560  InvalidXLogRecPtr, vmbuffer,
2563  }
2564 
2565  UnlockReleaseBuffer(buffer);
2566  ndone += nthispage;
2567 
2568  /*
2569  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2570  * likely that we'll insert into subsequent heap pages that are likely
2571  * to use the same vm page.
2572  */
2573  }
2574 
2575  /* We're done with inserting all tuples, so release the last vmbuffer. */
2576  if (vmbuffer != InvalidBuffer)
2577  ReleaseBuffer(vmbuffer);
2578 
2579  /*
2580  * We're done with the actual inserts. Check for conflicts again, to
2581  * ensure that all rw-conflicts in to these inserts are detected. Without
2582  * this final check, a sequential scan of the heap may have locked the
2583  * table after the "before" check, missing one opportunity to detect the
2584  * conflict, and then scanned the table before the new tuples were there,
2585  * missing the other chance to detect the conflict.
2586  *
2587  * For heap inserts, we only need to check for table-level SSI locks. Our
2588  * new tuples can't possibly conflict with existing tuple locks, and heap
2589  * page locks are only consolidated versions of tuple locks; they do not
2590  * lock "gaps" as index page locks do. So we don't need to specify a
2591  * buffer when making the call.
2592  */
2594 
2595  /*
2596  * If tuples are cachable, mark them for invalidation from the caches in
2597  * case we abort. Note it is OK to do this after releasing the buffer,
2598  * because the heaptuples data structure is all in local memory, not in
2599  * the shared buffer.
2600  */
2601  if (IsCatalogRelation(relation))
2602  {
2603  for (i = 0; i < ntuples; i++)
2604  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2605  }
2606 
2607  /* copy t_self fields back to the caller's slots */
2608  for (i = 0; i < ntuples; i++)
2609  slots[i]->tts_tid = heaptuples[i]->t_self;
2610 
2611  pgstat_count_heap_insert(relation, ntuples);
2612 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
#define MAXALIGN(LEN)
Definition: c.h:811
#define SHORTALIGN(LEN)
Definition: c.h:807
size_t Size
Definition: c.h:605
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2230
#define HEAP_INSERT_FROZEN
Definition: heapam.h:36
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:187
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:63
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:72
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:78
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:198
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:184
char data[BLCKSZ]
Definition: c.h:1119
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 348 of file pruneheap.c.

357 {
358  Page page = BufferGetPage(buffer);
359  BlockNumber blockno = BufferGetBlockNumber(buffer);
360  OffsetNumber offnum,
361  maxoff;
362  PruneState prstate;
363  HeapTupleData tup;
364  bool do_freeze;
365  bool do_prune;
366  bool do_hint;
367  bool hint_bit_fpi;
368  int64 fpi_before = pgWalUsage.wal_fpi;
369 
370  /* Copy parameters to prstate */
371  prstate.vistest = vistest;
373  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
374  prstate.cutoffs = cutoffs;
375 
376  /*
377  * Our strategy is to scan the page and make lists of items to change,
378  * then apply the changes within a critical section. This keeps as much
379  * logic as possible out of the critical section, and also ensures that
380  * WAL replay will work the same as the normal case.
381  *
382  * First, initialize the new pd_prune_xid value to zero (indicating no
383  * prunable tuples). If we find any tuples which may soon become
384  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
385  * initialize the rest of our working state.
386  */
389  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
390  prstate.nroot_items = 0;
391  prstate.nheaponly_items = 0;
392 
393  /* initialize page freezing working state */
394  prstate.pagefrz.freeze_required = false;
395  if (prstate.freeze)
396  {
397  Assert(new_relfrozen_xid && new_relmin_mxid);
398  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
399  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
400  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
401  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
402  }
403  else
404  {
405  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
410  }
411 
412  prstate.ndeleted = 0;
413  prstate.live_tuples = 0;
414  prstate.recently_dead_tuples = 0;
415  prstate.hastup = false;
416  prstate.lpdead_items = 0;
417  prstate.deadoffsets = presult->deadoffsets;
418 
419  /*
420  * Caller may update the VM after we're done. We can keep track of
421  * whether the page will be all-visible and all-frozen after pruning and
422  * freezing to help the caller to do that.
423  *
424  * Currently, only VACUUM sets the VM bits. To save the effort, only do
425  * the bookkeeping if the caller needs it. Currently, that's tied to
426  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
427  * to update the VM bits without also freezing or freeze without also
428  * setting the VM bits.
429  *
430  * In addition to telling the caller whether it can set the VM bit, we
431  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
432  * the whole page would become frozen, we consider opportunistically
433  * freezing tuples. We will not be able to freeze the whole page if there
434  * are tuples present that are not visible to everyone or if there are
435  * dead tuples which are not yet removable. However, dead tuples which
436  * will be removed by the end of vacuuming should not preclude us from
437  * opportunistically freezing. Because of that, we do not clear
438  * all_visible when we see LP_DEAD items. We fix that at the end of the
439  * function, when we return the value to the caller, so that the caller
440  * doesn't set the VM bit incorrectly.
441  */
442  if (prstate.freeze)
443  {
444  prstate.all_visible = true;
445  prstate.all_frozen = true;
446  }
447  else
448  {
449  /*
450  * Initializing to false allows skipping the work to update them in
451  * heap_prune_record_unchanged_lp_normal().
452  */
453  prstate.all_visible = false;
454  prstate.all_frozen = false;
455  }
456 
457  /*
458  * The visibility cutoff xid is the newest xmin of live tuples on the
459  * page. In the common case, this will be set as the conflict horizon the
460  * caller can use for updating the VM. If, at the end of freezing and
461  * pruning, the page is all-frozen, there is no possibility that any
462  * running transaction on the standby does not see tuples on the page as
463  * all-visible, so the conflict horizon remains InvalidTransactionId.
464  */
466 
467  maxoff = PageGetMaxOffsetNumber(page);
468  tup.t_tableOid = RelationGetRelid(relation);
469 
470  /*
471  * Determine HTSV for all tuples, and queue them up for processing as HOT
472  * chain roots or as heap-only items.
473  *
474  * Determining HTSV only once for each tuple is required for correctness,
475  * to deal with cases where running HTSV twice could result in different
476  * results. For example, RECENTLY_DEAD can turn to DEAD if another
477  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
478  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
479  * transaction aborts.
480  *
481  * It's also good for performance. Most commonly tuples within a page are
482  * stored at decreasing offsets (while the items are stored at increasing
483  * offsets). When processing all tuples on a page this leads to reading
484  * memory at decreasing offsets within a page, with a variable stride.
485  * That's hard for CPU prefetchers to deal with. Processing the items in
486  * reverse order (and thus the tuples in increasing order) increases
487  * prefetching efficiency significantly / decreases the number of cache
488  * misses.
489  */
490  for (offnum = maxoff;
491  offnum >= FirstOffsetNumber;
492  offnum = OffsetNumberPrev(offnum))
493  {
494  ItemId itemid = PageGetItemId(page, offnum);
495  HeapTupleHeader htup;
496 
497  /*
498  * Set the offset number so that we can display it along with any
499  * error that occurred while processing this tuple.
500  */
501  *off_loc = offnum;
502 
503  prstate.processed[offnum] = false;
504  prstate.htsv[offnum] = -1;
505 
506  /* Nothing to do if slot doesn't contain a tuple */
507  if (!ItemIdIsUsed(itemid))
508  {
509  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
510  continue;
511  }
512 
513  if (ItemIdIsDead(itemid))
514  {
515  /*
516  * If the caller set mark_unused_now true, we can set dead line
517  * pointers LP_UNUSED now.
518  */
519  if (unlikely(prstate.mark_unused_now))
520  heap_prune_record_unused(&prstate, offnum, false);
521  else
522  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
523  continue;
524  }
525 
526  if (ItemIdIsRedirected(itemid))
527  {
528  /* This is the start of a HOT chain */
529  prstate.root_items[prstate.nroot_items++] = offnum;
530  continue;
531  }
532 
533  Assert(ItemIdIsNormal(itemid));
534 
535  /*
536  * Get the tuple's visibility status and queue it up for processing.
537  */
538  htup = (HeapTupleHeader) PageGetItem(page, itemid);
539  tup.t_data = htup;
540  tup.t_len = ItemIdGetLength(itemid);
541  ItemPointerSet(&tup.t_self, blockno, offnum);
542 
543  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
544  buffer);
545 
546  if (!HeapTupleHeaderIsHeapOnly(htup))
547  prstate.root_items[prstate.nroot_items++] = offnum;
548  else
549  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
550  }
551 
552  /*
553  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
554  * an FPI to be emitted.
555  */
556  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
557 
558  /*
559  * Process HOT chains.
560  *
561  * We added the items to the array starting from 'maxoff', so by
562  * processing the array in reverse order, we process the items in
563  * ascending offset number order. The order doesn't matter for
564  * correctness, but some quick micro-benchmarking suggests that this is
565  * faster. (Earlier PostgreSQL versions, which scanned all the items on
566  * the page instead of using the root_items array, also did it in
567  * ascending offset number order.)
568  */
569  for (int i = prstate.nroot_items - 1; i >= 0; i--)
570  {
571  offnum = prstate.root_items[i];
572 
573  /* Ignore items already processed as part of an earlier chain */
574  if (prstate.processed[offnum])
575  continue;
576 
577  /* see preceding loop */
578  *off_loc = offnum;
579 
580  /* Process this item or chain of items */
581  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
582  }
583 
584  /*
585  * Process any heap-only tuples that were not already processed as part of
586  * a HOT chain.
587  */
588  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
589  {
590  offnum = prstate.heaponly_items[i];
591 
592  if (prstate.processed[offnum])
593  continue;
594 
595  /* see preceding loop */
596  *off_loc = offnum;
597 
598  /*
599  * If the tuple is DEAD and doesn't chain to anything else, mark it
600  * unused. (If it does chain, we can only remove it as part of
601  * pruning its chain.)
602  *
603  * We need this primarily to handle aborted HOT updates, that is,
604  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
605  * chain, since the parent tuple might be re-updated before any
606  * pruning occurs. So we have to be able to reap them separately from
607  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
608  * return true for an XMIN_INVALID tuple, so this code will work even
609  * when there were sequential updates within the aborted transaction.)
610  */
611  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
612  {
613  ItemId itemid = PageGetItemId(page, offnum);
614  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
615 
617  {
619  &prstate.latest_xid_removed);
620  heap_prune_record_unused(&prstate, offnum, true);
621  }
622  else
623  {
624  /*
625  * This tuple should've been processed and removed as part of
626  * a HOT chain, so something's wrong. To preserve evidence,
627  * we don't dare to remove it. We cannot leave behind a DEAD
628  * tuple either, because that will cause VACUUM to error out.
629  * Throwing an error with a distinct error message seems like
630  * the least bad option.
631  */
632  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
633  blockno, offnum);
634  }
635  }
636  else
637  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
638  }
639 
640  /* We should now have processed every tuple exactly once */
641 #ifdef USE_ASSERT_CHECKING
642  for (offnum = FirstOffsetNumber;
643  offnum <= maxoff;
644  offnum = OffsetNumberNext(offnum))
645  {
646  *off_loc = offnum;
647 
648  Assert(prstate.processed[offnum]);
649  }
650 #endif
651 
652  /* Clear the offset information once we have processed the given page. */
653  *off_loc = InvalidOffsetNumber;
654 
655  do_prune = prstate.nredirected > 0 ||
656  prstate.ndead > 0 ||
657  prstate.nunused > 0;
658 
659  /*
660  * Even if we don't prune anything, if we found a new value for the
661  * pd_prune_xid field or the page was marked full, we will update the hint
662  * bit.
663  */
664  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
665  PageIsFull(page);
666 
667  /*
668  * Decide if we want to go ahead with freezing according to the freeze
669  * plans we prepared, or not.
670  */
671  do_freeze = false;
672  if (prstate.freeze)
673  {
674  if (prstate.pagefrz.freeze_required)
675  {
676  /*
677  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
678  * from before FreezeLimit/MultiXactCutoff is present. Must
679  * freeze to advance relfrozenxid/relminmxid.
680  */
681  do_freeze = true;
682  }
683  else
684  {
685  /*
686  * Opportunistically freeze the page if we are generating an FPI
687  * anyway and if doing so means that we can set the page
688  * all-frozen afterwards (might not happen until VACUUM's final
689  * heap pass).
690  *
691  * XXX: Previously, we knew if pruning emitted an FPI by checking
692  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
693  * and prune records were combined, this heuristic couldn't be
694  * used anymore. The opportunistic freeze heuristic must be
695  * improved; however, for now, try to approximate the old logic.
696  */
697  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
698  {
699  /*
700  * Freezing would make the page all-frozen. Have already
701  * emitted an FPI or will do so anyway?
702  */
703  if (RelationNeedsWAL(relation))
704  {
705  if (hint_bit_fpi)
706  do_freeze = true;
707  else if (do_prune)
708  {
709  if (XLogCheckBufferNeedsBackup(buffer))
710  do_freeze = true;
711  }
712  else if (do_hint)
713  {
715  do_freeze = true;
716  }
717  }
718  }
719  }
720  }
721 
722  if (do_freeze)
723  {
724  /*
725  * Validate the tuples we will be freezing before entering the
726  * critical section.
727  */
728  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
729  }
730  else if (prstate.nfrozen > 0)
731  {
732  /*
733  * The page contained some tuples that were not already frozen, and we
734  * chose not to freeze them now. The page won't be all-frozen then.
735  */
736  Assert(!prstate.pagefrz.freeze_required);
737 
738  prstate.all_frozen = false;
739  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
740  }
741  else
742  {
743  /*
744  * We have no freeze plans to execute. The page might already be
745  * all-frozen (perhaps only following pruning), though. Such pages
746  * can be marked all-frozen in the VM by our caller, even though none
747  * of its tuples were newly frozen here.
748  */
749  }
750 
751  /* Any error while applying the changes is critical */
753 
754  if (do_hint)
755  {
756  /*
757  * Update the page's pd_prune_xid field to either zero, or the lowest
758  * XID of any soon-prunable tuple.
759  */
760  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
761 
762  /*
763  * Also clear the "page is full" flag, since there's no point in
764  * repeating the prune/defrag process until something else happens to
765  * the page.
766  */
767  PageClearFull(page);
768 
769  /*
770  * If that's all we had to do to the page, this is a non-WAL-logged
771  * hint. If we are going to freeze or prune the page, we will mark
772  * the buffer dirty below.
773  */
774  if (!do_freeze && !do_prune)
775  MarkBufferDirtyHint(buffer, true);
776  }
777 
778  if (do_prune || do_freeze)
779  {
780  /* Apply the planned item changes and repair page fragmentation. */
781  if (do_prune)
782  {
783  heap_page_prune_execute(buffer, false,
784  prstate.redirected, prstate.nredirected,
785  prstate.nowdead, prstate.ndead,
786  prstate.nowunused, prstate.nunused);
787  }
788 
789  if (do_freeze)
790  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
791 
792  MarkBufferDirty(buffer);
793 
794  /*
795  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
796  */
797  if (RelationNeedsWAL(relation))
798  {
799  /*
800  * The snapshotConflictHorizon for the whole record should be the
801  * most conservative of all the horizons calculated for any of the
802  * possible modifications. If this record will prune tuples, any
803  * transactions on the standby older than the youngest xmax of the
804  * most recently removed tuple this record will prune will
805  * conflict. If this record will freeze tuples, any transactions
806  * on the standby with xids older than the youngest tuple this
807  * record will freeze will conflict.
808  */
809  TransactionId frz_conflict_horizon = InvalidTransactionId;
810  TransactionId conflict_xid;
811 
812  /*
813  * We can use the visibility_cutoff_xid as our cutoff for
814  * conflicts when the whole page is eligible to become all-frozen
815  * in the VM once we're done with it. Otherwise we generate a
816  * conservative cutoff by stepping back from OldestXmin.
817  */
818  if (do_freeze)
819  {
820  if (prstate.all_visible && prstate.all_frozen)
821  frz_conflict_horizon = prstate.visibility_cutoff_xid;
822  else
823  {
824  /* Avoids false conflicts when hot_standby_feedback in use */
825  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
826  TransactionIdRetreat(frz_conflict_horizon);
827  }
828  }
829 
830  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
831  conflict_xid = frz_conflict_horizon;
832  else
833  conflict_xid = prstate.latest_xid_removed;
834 
835  log_heap_prune_and_freeze(relation, buffer,
836  conflict_xid,
837  true, reason,
838  prstate.frozen, prstate.nfrozen,
839  prstate.redirected, prstate.nredirected,
840  prstate.nowdead, prstate.ndead,
841  prstate.nowunused, prstate.nunused);
842  }
843  }
844 
846 
847  /* Copy information back for caller */
848  presult->ndeleted = prstate.ndeleted;
849  presult->nnewlpdead = prstate.ndead;
850  presult->nfrozen = prstate.nfrozen;
851  presult->live_tuples = prstate.live_tuples;
852  presult->recently_dead_tuples = prstate.recently_dead_tuples;
853 
854  /*
855  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
856  * make the choice of whether or not to freeze the page unaffected by the
857  * short-term presence of LP_DEAD items. These LP_DEAD items were
858  * effectively assumed to be LP_UNUSED items in the making. It doesn't
859  * matter which vacuum heap pass (initial pass or final pass) ends up
860  * setting the page all-frozen, as long as the ongoing VACUUM does it.
861  *
862  * Now that freezing has been finalized, unset all_visible if there are
863  * any LP_DEAD items on the page. It needs to reflect the present state
864  * of the page, as expected by our caller.
865  */
866  if (prstate.all_visible && prstate.lpdead_items == 0)
867  {
868  presult->all_visible = prstate.all_visible;
869  presult->all_frozen = prstate.all_frozen;
870  }
871  else
872  {
873  presult->all_visible = false;
874  presult->all_frozen = false;
875  }
876 
877  presult->hastup = prstate.hastup;
878 
879  /*
880  * For callers planning to update the visibility map, the conflict horizon
881  * for that record must be the newest xmin on the page. However, if the
882  * page is completely frozen, there can be no conflict and the
883  * vm_conflict_horizon should remain InvalidTransactionId. This includes
884  * the case that we just froze all the tuples; the prune-freeze record
885  * included the conflict XID already so the caller doesn't need it.
886  */
887  if (presult->all_frozen)
889  else
890  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
891 
892  presult->lpdead_items = prstate.lpdead_items;
893  /* the presult->deadoffsets array was already filled in */
894 
895  if (prstate.freeze)
896  {
897  if (presult->nfrozen > 0)
898  {
899  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
900  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
901  }
902  else
903  {
904  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
905  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
906  }
907  }
908 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4960
PageHeaderData * PageHeader
Definition: bufpage.h:170
static void PageClearFull(Page page)
Definition: bufpage.h:420
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define likely(x)
Definition: c.h:310
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6898
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6845
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:42
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:41
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:978
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1487
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1276
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1309
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2032
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1298
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:915
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1540
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:219
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:207
bool freeze_required
Definition: heapam.h:181
MultiXactId FreezePageRelminMxid
Definition: heapam.h:208
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:218
int recently_dead_tuples
Definition: heapam.h:234
TransactionId vm_conflict_horizon
Definition: heapam.h:249
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:263
bool all_visible
Definition: heapam.h:247
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:267
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:118
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert, BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1540 of file pruneheap.c.

1544 {
1545  Page page = (Page) BufferGetPage(buffer);
1546  OffsetNumber *offnum;
1548 
1549  /* Shouldn't be called unless there's something to do */
1550  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1551 
1552  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1553  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1554 
1555  /* Update all redirected line pointers */
1556  offnum = redirected;
1557  for (int i = 0; i < nredirected; i++)
1558  {
1559  OffsetNumber fromoff = *offnum++;
1560  OffsetNumber tooff = *offnum++;
1561  ItemId fromlp = PageGetItemId(page, fromoff);
1563 
1564 #ifdef USE_ASSERT_CHECKING
1565 
1566  /*
1567  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1568  * must be the first item from a HOT chain. If the item has tuple
1569  * storage then it can't be a heap-only tuple. Otherwise we are just
1570  * maintaining an existing LP_REDIRECT from an existing HOT chain that
1571  * has been pruned at least once before now.
1572  */
1573  if (!ItemIdIsRedirected(fromlp))
1574  {
1575  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1576 
1577  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1579  }
1580  else
1581  {
1582  /* We shouldn't need to redundantly set the redirect */
1583  Assert(ItemIdGetRedirect(fromlp) != tooff);
1584  }
1585 
1586  /*
1587  * The item that we're about to set as an LP_REDIRECT (the 'from'
1588  * item) will point to an existing item (the 'to' item) that is
1589  * already a heap-only tuple. There can be at most one LP_REDIRECT
1590  * item per HOT chain.
1591  *
1592  * We need to keep around an LP_REDIRECT item (after original
1593  * non-heap-only root tuple gets pruned away) so that it's always
1594  * possible for VACUUM to easily figure out what TID to delete from
1595  * indexes when an entire HOT chain becomes dead. A heap-only tuple
1596  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1597  * tuple can.
1598  *
1599  * This check may miss problems, e.g. the target of a redirect could
1600  * be marked as unused subsequently. The page_verify_redirects() check
1601  * below will catch such problems.
1602  */
1603  tolp = PageGetItemId(page, tooff);
1604  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1605  htup = (HeapTupleHeader) PageGetItem(page, tolp);
1607 #endif
1608 
1609  ItemIdSetRedirect(fromlp, tooff);
1610  }
1611 
1612  /* Update all now-dead line pointers */
1613  offnum = nowdead;
1614  for (int i = 0; i < ndead; i++)
1615  {
1616  OffsetNumber off = *offnum++;
1617  ItemId lp = PageGetItemId(page, off);
1618 
1619 #ifdef USE_ASSERT_CHECKING
1620 
1621  /*
1622  * An LP_DEAD line pointer must be left behind when the original item
1623  * (which is dead to everybody) could still be referenced by a TID in
1624  * an index. This should never be necessary with any individual
1625  * heap-only tuple item, though. (It's not clear how much of a problem
1626  * that would be, but there is no reason to allow it.)
1627  */
1628  if (ItemIdHasStorage(lp))
1629  {
1630  Assert(ItemIdIsNormal(lp));
1631  htup = (HeapTupleHeader) PageGetItem(page, lp);
1633  }
1634  else
1635  {
1636  /* Whole HOT chain becomes dead */
1638  }
1639 #endif
1640 
1641  ItemIdSetDead(lp);
1642  }
1643 
1644  /* Update all now-unused line pointers */
1645  offnum = nowunused;
1646  for (int i = 0; i < nunused; i++)
1647  {
1648  OffsetNumber off = *offnum++;
1649  ItemId lp = PageGetItemId(page, off);
1650 
1651 #ifdef USE_ASSERT_CHECKING
1652 
1653  if (lp_truncate_only)
1654  {
1655  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1656  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1657  }
1658  else
1659  {
1660  /*
1661  * When heap_page_prune_and_freeze() was called, mark_unused_now
1662  * may have been passed as true, which allows would-be LP_DEAD
1663  * items to be made LP_UNUSED instead. This is only possible if
1664  * the relation has no indexes. If there are any dead items, then
1665  * mark_unused_now was not true and every item being marked
1666  * LP_UNUSED must refer to a heap-only tuple.
1667  */
1668  if (ndead > 0)
1669  {
1671  htup = (HeapTupleHeader) PageGetItem(page, lp);
1673  }
1674  else
1675  Assert(ItemIdIsUsed(lp));
1676  }
1677 
1678 #endif
1679 
1680  ItemIdSetUnused(lp);
1681  }
1682 
1683  if (lp_truncate_only)
1685  else
1686  {
1687  /*
1688  * Finally, repair any fragmentation, and update the page's hint bit
1689  * about whether it has free pointers.
1690  */
1692 
1693  /*
1694  * Now that the page has been modified, assert that redirect items
1695  * still point to valid targets.
1696  */
1697  page_verify_redirects(page);
1698  }
1699 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1716

References Assert, BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194 {
195  Page page = BufferGetPage(buffer);
196  TransactionId prune_xid;
197  GlobalVisState *vistest;
198  Size minfree;
199 
200  /*
201  * We can't write WAL in recovery mode, so there's no point trying to
202  * clean the page. The primary will likely issue a cleaning WAL record
203  * soon anyway, so this is no particular loss.
204  */
205  if (RecoveryInProgress())
206  return;
207 
208  /*
209  * First check whether there's any chance there's something to prune,
210  * determining the appropriate horizon is a waste if there's no prune_xid
211  * (i.e. no updates/deletes left potentially dead tuples around).
212  */
213  prune_xid = ((PageHeader) page)->pd_prune_xid;
214  if (!TransactionIdIsValid(prune_xid))
215  return;
216 
217  /*
218  * Check whether prune_xid indicates that there may be dead rows that can
219  * be cleaned up.
220  */
221  vistest = GlobalVisTestFor(relation);
222 
223  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224  return;
225 
226  /*
227  * We prune when a previous UPDATE failed to find enough space on the page
228  * for a new tuple version, or when free space falls below the relation's
229  * fill-factor target (but not less than 10%).
230  *
231  * Checking free space here is questionable since we aren't holding any
232  * lock on the buffer; in the worst case we could get a bogus answer. It's
233  * unlikely to be *seriously* wrong, though, since reading either pd_lower
234  * or pd_upper is probably atomic. Avoiding taking a lock seems more
235  * important than sometimes getting a wrong answer in what is after all
236  * just a heuristic estimate.
237  */
238  minfree = RelationGetTargetPageFreeSpace(relation,
240  minfree = Max(minfree, BLCKSZ / 10);
241 
242  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243  {
244  /* OK, try to get exclusive buffer lock */
245  if (!ConditionalLockBufferForCleanup(buffer))
246  return;
247 
248  /*
249  * Now that we have buffer lock, get accurate information about the
250  * page's free space, and recheck the heuristic about whether to
251  * prune.
252  */
253  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254  {
255  OffsetNumber dummy_off_loc;
256  PruneFreezeResult presult;
257 
258  /*
259  * For now, pass mark_unused_now as false regardless of whether or
260  * not the relation has indexes, since we cannot safely determine
261  * that during on-access pruning with the current implementation.
262  */
263  heap_page_prune_and_freeze(relation, buffer, vistest, 0,