PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/read_stream.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneFreezeResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)
 
#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneFreezeResult PruneFreezeResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 
enum  PruneReason { PRUNE_ON_ACCESS , PRUNE_VACUUM_SCAN , PRUNE_VACUUM_CLEANUP }
 

Functions

TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heap_prepare_pagescan (TableScanDesc sscan)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_pre_freeze_checks (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
void heap_freeze_prepared_tuples (Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune_and_freeze (Relation relation, Buffer buffer, struct GlobalVisState *vistest, int options, struct VacuumCutoffs *cutoffs, PruneFreezeResult *presult, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, MultiXactId *new_relmin_mxid)
 
void heap_page_prune_execute (Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void log_heap_prune_and_freeze (Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 137 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 136 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 37 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 38 of file heapam.h.

◆ HEAP_PAGE_PRUNE_FREEZE

#define HEAP_PAGE_PRUNE_FREEZE   (1 << 1)

Definition at line 42 of file heapam.h.

◆ HEAP_PAGE_PRUNE_MARK_UNUSED_NOW

#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW   (1 << 0)

Definition at line 41 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 287 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 48 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 44 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 109 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneFreezeResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 123 of file heapam.h.

124 {
125  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
126  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
127  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
128  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
129  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
130 } HTSV_Result;
HTSV_Result
Definition: heapam.h:124
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:127
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:128
@ HEAPTUPLE_LIVE
Definition: heapam.h:126
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:129
@ HEAPTUPLE_DEAD
Definition: heapam.h:125

◆ PruneReason

Enumerator
PRUNE_ON_ACCESS 
PRUNE_VACUUM_SCAN 
PRUNE_VACUUM_CLEANUP 

Definition at line 267 of file heapam.h.

268 {
269  PRUNE_ON_ACCESS, /* on-access pruning */
270  PRUNE_VACUUM_SCAN, /* VACUUM 1st heap pass */
271  PRUNE_VACUUM_CLEANUP, /* VACUUM 2nd heap pass */
272 } PruneReason;
PruneReason
Definition: heapam.h:268
@ PRUNE_VACUUM_CLEANUP
Definition: heapam.h:271
@ PRUNE_ON_ACCESS
Definition: heapam.h:269
@ PRUNE_VACUUM_SCAN
Definition: heapam.h:270

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1944 of file heapam.c.

1945 {
1946  if (bistate->current_buf != InvalidBuffer)
1947  ReleaseBuffer(bistate->current_buf);
1948  FreeAccessStrategy(bistate->strategy);
1949  pfree(bistate);
1950 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4906
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:681
void pfree(void *pointer)
Definition: mcxt.c:1521
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1927 of file heapam.c.

1928 {
1929  BulkInsertState bistate;
1930 
1931  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1933  bistate->current_buf = InvalidBuffer;
1934  bistate->next_free = InvalidBlockNumber;
1935  bistate->last_free = InvalidBlockNumber;
1936  bistate->already_extended_by = 0;
1937  return bistate;
1938 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:38
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:44
void * palloc(Size size)
Definition: mcxt.c:1317
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5906 of file heapam.c.

5907 {
5909  ItemId lp;
5910  HeapTupleData tp;
5911  Page page;
5912  BlockNumber block;
5913  Buffer buffer;
5914 
5915  Assert(ItemPointerIsValid(tid));
5916 
5917  block = ItemPointerGetBlockNumber(tid);
5918  buffer = ReadBuffer(relation, block);
5919  page = BufferGetPage(buffer);
5920 
5922 
5923  /*
5924  * Page can't be all visible, we just inserted into it, and are still
5925  * running.
5926  */
5927  Assert(!PageIsAllVisible(page));
5928 
5929  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5930  Assert(ItemIdIsNormal(lp));
5931 
5932  tp.t_tableOid = RelationGetRelid(relation);
5933  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5934  tp.t_len = ItemIdGetLength(lp);
5935  tp.t_self = *tid;
5936 
5937  /*
5938  * Sanity check that the tuple really is a speculatively inserted tuple,
5939  * inserted by us.
5940  */
5941  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5942  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5943  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5944  elog(ERROR, "attempted to kill a non-speculative tuple");
5946 
5947  /*
5948  * No need to check for serializable conflicts here. There is never a
5949  * need for a combo CID, either. No need to extract replica identity, or
5950  * do anything special with infomask bits.
5951  */
5952 
5954 
5955  /*
5956  * The tuple will become DEAD immediately. Flag that this page is a
5957  * candidate for pruning by setting xmin to TransactionXmin. While not
5958  * immediately prunable, it is the oldest xid we can cheaply determine
5959  * that's safe against wraparound / being older than the table's
5960  * relfrozenxid. To defend against the unlikely case of a new relation
5961  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5962  * if so (vacuum can't subsequently move relfrozenxid to beyond
5963  * TransactionXmin, so there's no race here).
5964  */
5966  {
5967  TransactionId relfrozenxid = relation->rd_rel->relfrozenxid;
5968  TransactionId prune_xid;
5969 
5970  if (TransactionIdPrecedes(TransactionXmin, relfrozenxid))
5971  prune_xid = relfrozenxid;
5972  else
5973  prune_xid = TransactionXmin;
5974  PageSetPrunable(page, prune_xid);
5975  }
5976 
5977  /* store transaction information of xact deleting the tuple */
5980 
5981  /*
5982  * Set the tuple header xmin to InvalidTransactionId. This makes the
5983  * tuple immediately invisible everyone. (In particular, to any
5984  * transactions waiting on the speculative token, woken up later.)
5985  */
5987 
5988  /* Clear the speculative insertion token too */
5989  tp.t_data->t_ctid = tp.t_self;
5990 
5991  MarkBufferDirty(buffer);
5992 
5993  /*
5994  * XLOG stuff
5995  *
5996  * The WAL records generated here match heap_delete(). The same recovery
5997  * routines are used.
5998  */
5999  if (RelationNeedsWAL(relation))
6000  {
6001  xl_heap_delete xlrec;
6002  XLogRecPtr recptr;
6003 
6004  xlrec.flags = XLH_DELETE_IS_SUPER;
6006  tp.t_data->t_infomask2);
6008  xlrec.xmax = xid;
6009 
6010  XLogBeginInsert();
6011  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
6012  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6013 
6014  /* No replica identity & replication origin logged */
6015 
6016  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
6017 
6018  PageSetLSN(page, recptr);
6019  }
6020 
6021  END_CRIT_SECTION();
6022 
6023  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
6024 
6025  if (HeapTupleHasExternal(&tp))
6026  {
6027  Assert(!IsToastRelation(relation));
6028  heap_toast_delete(relation, &tp, true);
6029  }
6030 
6031  /*
6032  * Never need to mark tuple for invalidation, since catalogs don't support
6033  * speculative insertion
6034  */
6035 
6036  /* Now we can release the buffer */
6037  ReleaseBuffer(buffer);
6038 
6039  /* count deletion, as we counted the insertion too */
6040  pgstat_count_heap_delete(relation);
6041 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2514
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:5140
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:746
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:189
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:400
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:191
Pointer Page
Definition: bufpage.h:81
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:354
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:243
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:429
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:391
#define PageSetPrunable(page, xid)
Definition: bufpage.h:447
#define Assert(condition)
Definition: c.h:858
uint32 TransactionId
Definition: c.h:652
bool IsToastRelation(Relation relation)
Definition: catalog.c:166
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2638
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:120
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:104
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:505
#define RelationNeedsWAL(relation)
Definition: rel.h:628
TransactionId TransactionXmin
Definition: snapmgr.c:98
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
union HeapTupleHeaderData::@48 t_choice
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:114
OffsetNumber offnum
Definition: heapam_xlog.h:115
uint8 infobits_set
Definition: heapam_xlog.h:116
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:453
uint64 XLogRecPtr
Definition: xlogdefs.h:21
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:474
void XLogRegisterData(const char *data, uint32 len)
Definition: xloginsert.c:364
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:242
void XLogBeginInsert(void)
Definition: xloginsert.c:149
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog, END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 1038 of file heapam.c.

1042 {
1043  HeapScanDesc scan;
1044 
1045  /*
1046  * increment relation ref count while scanning relation
1047  *
1048  * This is just to make really sure the relcache entry won't go away while
1049  * the scan has a pointer to it. Caller should be holding the rel open
1050  * anyway, so this is redundant in all normal scenarios...
1051  */
1053 
1054  /*
1055  * allocate and initialize scan descriptor
1056  */
1057  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
1058 
1059  scan->rs_base.rs_rd = relation;
1060  scan->rs_base.rs_snapshot = snapshot;
1061  scan->rs_base.rs_nkeys = nkeys;
1062  scan->rs_base.rs_flags = flags;
1063  scan->rs_base.rs_parallel = parallel_scan;
1064  scan->rs_strategy = NULL; /* set in initscan */
1065  scan->rs_vmbuffer = InvalidBuffer;
1066  scan->rs_empty_tuples_pending = 0;
1067 
1068  /*
1069  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
1070  */
1071  if (!(snapshot && IsMVCCSnapshot(snapshot)))
1073 
1074  /*
1075  * For seqscan and sample scans in a serializable transaction, acquire a
1076  * predicate lock on the entire relation. This is required not only to
1077  * lock all the matching tuples, but also to conflict with new insertions
1078  * into the table. In an indexscan, we take page locks on the index pages
1079  * covering the range specified in the scan qual, but in a heap scan there
1080  * is nothing more fine-grained to lock. A bitmap scan is a different
1081  * story, there we have already scanned the index and locked the index
1082  * pages covering the predicate. But in that case we still have to lock
1083  * any matching heap tuples. For sample scan we could optimize the locking
1084  * to be at least page-level granularity, but we'd need to add per-tuple
1085  * locking for that.
1086  */
1088  {
1089  /*
1090  * Ensure a missing snapshot is noticed reliably, even if the
1091  * isolation mode means predicate locking isn't performed (and
1092  * therefore the snapshot isn't used here).
1093  */
1094  Assert(snapshot);
1095  PredicateLockRelation(relation, snapshot);
1096  }
1097 
1098  /* we only need to set this up once */
1099  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
1100 
1101  /*
1102  * Allocate memory to keep track of page allocation for parallel workers
1103  * when doing a parallel scan.
1104  */
1105  if (parallel_scan != NULL)
1107  else
1108  scan->rs_parallelworkerdata = NULL;
1109 
1110  /*
1111  * we do this here instead of in initscan() because heap_rescan also calls
1112  * initscan() and we don't want to allocate memory again
1113  */
1114  if (nkeys > 0)
1115  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1116  else
1117  scan->rs_base.rs_key = NULL;
1118 
1119  initscan(scan, key, false);
1120 
1121  scan->rs_read_stream = NULL;
1122 
1123  /*
1124  * Set up a read stream for sequential scans and TID range scans. This
1125  * should be done after initscan() because initscan() allocates the
1126  * BufferAccessStrategy object passed to the read stream API.
1127  */
1128  if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
1130  {
1132 
1133  if (scan->rs_base.rs_parallel)
1135  else
1137 
1139  scan->rs_strategy,
1140  scan->rs_base.rs_rd,
1141  MAIN_FORKNUM,
1142  cb,
1143  scan,
1144  0);
1145  }
1146 
1147 
1148  return (TableScanDesc) scan;
1149 }
static BlockNumber heap_scan_stream_read_next_parallel(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:232
static BlockNumber heap_scan_stream_read_next_serial(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: heapam.c:270
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:294
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:109
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2561
ReadStream * read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, ForkNumber forknum, ReadStreamBlockNumberCB callback, void *callback_private_data, size_t per_buffer_data_size)
Definition: read_stream.c:552
BlockNumber(* ReadStreamBlockNumberCB)(ReadStream *stream, void *callback_private_data, void *per_buffer_data)
Definition: read_stream.h:56
#define READ_STREAM_SEQUENTIAL
Definition: read_stream.h:36
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2159
@ MAIN_FORKNUM
Definition: relpath.h:58
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
Buffer rs_vmbuffer
Definition: heapam.h:101
BufferAccessStrategy rs_strategy
Definition: heapam.h:70
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:92
HeapTupleData rs_ctup
Definition: heapam.h:72
ReadStream * rs_read_stream
Definition: heapam.h:75
int rs_empty_tuples_pending
Definition: heapam.h:102
TableScanDescData rs_base
Definition: heapam.h:55
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_TYPE_TIDRANGESCAN
Definition: tableam.h:53
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:62
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:51
@ SO_TYPE_SEQSCAN
Definition: tableam.h:49

References Assert, heap_scan_stream_read_next_parallel(), heap_scan_stream_read_next_serial(), if(), initscan(), InvalidBuffer, IsMVCCSnapshot, sort-test::key, MAIN_FORKNUM, palloc(), PredicateLockRelation(), read_stream_begin_relation(), READ_STREAM_SEQUENTIAL, RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, HeapScanDescData::rs_empty_tuples_pending, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, SO_TYPE_TIDRANGESCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2683 of file heapam.c.

2686 {
2687  TM_Result result;
2689  ItemId lp;
2690  HeapTupleData tp;
2691  Page page;
2692  BlockNumber block;
2693  Buffer buffer;
2694  Buffer vmbuffer = InvalidBuffer;
2695  TransactionId new_xmax;
2696  uint16 new_infomask,
2697  new_infomask2;
2698  bool have_tuple_lock = false;
2699  bool iscombo;
2700  bool all_visible_cleared = false;
2701  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2702  bool old_key_copied = false;
2703 
2704  Assert(ItemPointerIsValid(tid));
2705 
2706  /*
2707  * Forbid this during a parallel operation, lest it allocate a combo CID.
2708  * Other workers might need that combo CID for visibility checks, and we
2709  * have no provision for broadcasting it to them.
2710  */
2711  if (IsInParallelMode())
2712  ereport(ERROR,
2713  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2714  errmsg("cannot delete tuples during a parallel operation")));
2715 
2716  block = ItemPointerGetBlockNumber(tid);
2717  buffer = ReadBuffer(relation, block);
2718  page = BufferGetPage(buffer);
2719 
2720  /*
2721  * Before locking the buffer, pin the visibility map page if it appears to
2722  * be necessary. Since we haven't got the lock yet, someone else might be
2723  * in the middle of changing this, so we'll need to recheck after we have
2724  * the lock.
2725  */
2726  if (PageIsAllVisible(page))
2727  visibilitymap_pin(relation, block, &vmbuffer);
2728 
2730 
2731  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2732  Assert(ItemIdIsNormal(lp));
2733 
2734  tp.t_tableOid = RelationGetRelid(relation);
2735  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2736  tp.t_len = ItemIdGetLength(lp);
2737  tp.t_self = *tid;
2738 
2739 l1:
2740 
2741  /*
2742  * If we didn't pin the visibility map page and the page has become all
2743  * visible while we were busy locking the buffer, we'll have to unlock and
2744  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2745  * unfortunate, but hopefully shouldn't happen often.
2746  */
2747  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2748  {
2749  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2750  visibilitymap_pin(relation, block, &vmbuffer);
2752  }
2753 
2754  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2755 
2756  if (result == TM_Invisible)
2757  {
2758  UnlockReleaseBuffer(buffer);
2759  ereport(ERROR,
2760  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2761  errmsg("attempted to delete invisible tuple")));
2762  }
2763  else if (result == TM_BeingModified && wait)
2764  {
2765  TransactionId xwait;
2766  uint16 infomask;
2767 
2768  /* must copy state data before unlocking buffer */
2769  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2770  infomask = tp.t_data->t_infomask;
2771 
2772  /*
2773  * Sleep until concurrent transaction ends -- except when there's a
2774  * single locker and it's our own transaction. Note we don't care
2775  * which lock mode the locker has, because we need the strongest one.
2776  *
2777  * Before sleeping, we need to acquire tuple lock to establish our
2778  * priority for the tuple (see heap_lock_tuple). LockTuple will
2779  * release us when we are next-in-line for the tuple.
2780  *
2781  * If we are forced to "start over" below, we keep the tuple lock;
2782  * this arranges that we stay at the head of the line while rechecking
2783  * tuple state.
2784  */
2785  if (infomask & HEAP_XMAX_IS_MULTI)
2786  {
2787  bool current_is_member = false;
2788 
2789  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2790  LockTupleExclusive, &current_is_member))
2791  {
2792  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2793 
2794  /*
2795  * Acquire the lock, if necessary (but skip it when we're
2796  * requesting a lock and already have one; avoids deadlock).
2797  */
2798  if (!current_is_member)
2800  LockWaitBlock, &have_tuple_lock);
2801 
2802  /* wait for multixact */
2804  relation, &(tp.t_self), XLTW_Delete,
2805  NULL);
2807 
2808  /*
2809  * If xwait had just locked the tuple then some other xact
2810  * could update this tuple before we get to this point. Check
2811  * for xmax change, and start over if so.
2812  *
2813  * We also must start over if we didn't pin the VM page, and
2814  * the page has become all visible.
2815  */
2816  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2817  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2819  xwait))
2820  goto l1;
2821  }
2822 
2823  /*
2824  * You might think the multixact is necessarily done here, but not
2825  * so: it could have surviving members, namely our own xact or
2826  * other subxacts of this backend. It is legal for us to delete
2827  * the tuple in either case, however (the latter case is
2828  * essentially a situation of upgrading our former shared lock to
2829  * exclusive). We don't bother changing the on-disk hint bits
2830  * since we are about to overwrite the xmax altogether.
2831  */
2832  }
2833  else if (!TransactionIdIsCurrentTransactionId(xwait))
2834  {
2835  /*
2836  * Wait for regular transaction to end; but first, acquire tuple
2837  * lock.
2838  */
2839  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2841  LockWaitBlock, &have_tuple_lock);
2842  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2844 
2845  /*
2846  * xwait is done, but if xwait had just locked the tuple then some
2847  * other xact could update this tuple before we get to this point.
2848  * Check for xmax change, and start over if so.
2849  *
2850  * We also must start over if we didn't pin the VM page, and the
2851  * page has become all visible.
2852  */
2853  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2854  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2856  xwait))
2857  goto l1;
2858 
2859  /* Otherwise check if it committed or aborted */
2860  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2861  }
2862 
2863  /*
2864  * We may overwrite if previous xmax aborted, or if it committed but
2865  * only locked the tuple without updating it.
2866  */
2867  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2870  result = TM_Ok;
2871  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2872  result = TM_Updated;
2873  else
2874  result = TM_Deleted;
2875  }
2876 
2877  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2878  if (result != TM_Ok)
2879  {
2880  Assert(result == TM_SelfModified ||
2881  result == TM_Updated ||
2882  result == TM_Deleted ||
2883  result == TM_BeingModified);
2885  Assert(result != TM_Updated ||
2886  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2887  }
2888 
2889  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2890  {
2891  /* Perform additional check for transaction-snapshot mode RI updates */
2892  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2893  result = TM_Updated;
2894  }
2895 
2896  if (result != TM_Ok)
2897  {
2898  tmfd->ctid = tp.t_data->t_ctid;
2900  if (result == TM_SelfModified)
2901  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2902  else
2903  tmfd->cmax = InvalidCommandId;
2904  UnlockReleaseBuffer(buffer);
2905  if (have_tuple_lock)
2906  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2907  if (vmbuffer != InvalidBuffer)
2908  ReleaseBuffer(vmbuffer);
2909  return result;
2910  }
2911 
2912  /*
2913  * We're about to do the actual delete -- check for conflict first, to
2914  * avoid possibly having to roll back work we've just done.
2915  *
2916  * This is safe without a recheck as long as there is no possibility of
2917  * another process scanning the page between this check and the delete
2918  * being visible to the scan (i.e., an exclusive buffer content lock is
2919  * continuously held from this point until the tuple delete is visible).
2920  */
2921  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2922 
2923  /* replace cid with a combo CID if necessary */
2924  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2925 
2926  /*
2927  * Compute replica identity tuple before entering the critical section so
2928  * we don't PANIC upon a memory allocation failure.
2929  */
2930  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2931 
2932  /*
2933  * If this is the first possibly-multixact-able operation in the current
2934  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2935  * certain that the transaction will never become a member of any older
2936  * MultiXactIds than that. (We have to do this even if we end up just
2937  * using our own TransactionId below, since some other backend could
2938  * incorporate our XID into a MultiXact immediately afterwards.)
2939  */
2941 
2944  xid, LockTupleExclusive, true,
2945  &new_xmax, &new_infomask, &new_infomask2);
2946 
2948 
2949  /*
2950  * If this transaction commits, the tuple will become DEAD sooner or
2951  * later. Set flag that this page is a candidate for pruning once our xid
2952  * falls below the OldestXmin horizon. If the transaction finally aborts,
2953  * the subsequent page pruning will be a no-op and the hint will be
2954  * cleared.
2955  */
2956  PageSetPrunable(page, xid);
2957 
2958  if (PageIsAllVisible(page))
2959  {
2960  all_visible_cleared = true;
2961  PageClearAllVisible(page);
2962  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2963  vmbuffer, VISIBILITYMAP_VALID_BITS);
2964  }
2965 
2966  /* store transaction information of xact deleting the tuple */
2969  tp.t_data->t_infomask |= new_infomask;
2970  tp.t_data->t_infomask2 |= new_infomask2;
2972  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2973  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2974  /* Make sure there is no forward chain link in t_ctid */
2975  tp.t_data->t_ctid = tp.t_self;
2976 
2977  /* Signal that this is actually a move into another partition */
2978  if (changingPart)
2980 
2981  MarkBufferDirty(buffer);
2982 
2983  /*
2984  * XLOG stuff
2985  *
2986  * NB: heap_abort_speculative() uses the same xlog record and replay
2987  * routines.
2988  */
2989  if (RelationNeedsWAL(relation))
2990  {
2991  xl_heap_delete xlrec;
2992  xl_heap_header xlhdr;
2993  XLogRecPtr recptr;
2994 
2995  /*
2996  * For logical decode we need combo CIDs to properly decode the
2997  * catalog
2998  */
3000  log_heap_new_cid(relation, &tp);
3001 
3002  xlrec.flags = 0;
3003  if (all_visible_cleared)
3005  if (changingPart)
3008  tp.t_data->t_infomask2);
3010  xlrec.xmax = new_xmax;
3011 
3012  if (old_key_tuple != NULL)
3013  {
3014  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
3016  else
3018  }
3019 
3020  XLogBeginInsert();
3021  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
3022 
3023  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
3024 
3025  /*
3026  * Log replica identity of the deleted tuple if there is one
3027  */
3028  if (old_key_tuple != NULL)
3029  {
3030  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
3031  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
3032  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
3033 
3034  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
3035  XLogRegisterData((char *) old_key_tuple->t_data
3037  old_key_tuple->t_len
3039  }
3040 
3041  /* filtering by origin on a row level is much more efficient */
3043 
3044  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
3045 
3046  PageSetLSN(page, recptr);
3047  }
3048 
3049  END_CRIT_SECTION();
3050 
3051  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
3052 
3053  if (vmbuffer != InvalidBuffer)
3054  ReleaseBuffer(vmbuffer);
3055 
3056  /*
3057  * If the tuple has toasted out-of-line attributes, we need to delete
3058  * those items too. We have to do this before releasing the buffer
3059  * because we need to look at the contents of the tuple, but it's OK to
3060  * release the content lock on the buffer first.
3061  */
3062  if (relation->rd_rel->relkind != RELKIND_RELATION &&
3063  relation->rd_rel->relkind != RELKIND_MATVIEW)
3064  {
3065  /* toast table entries should never be recursively toasted */
3067  }
3068  else if (HeapTupleHasExternal(&tp))
3069  heap_toast_delete(relation, &tp, false);
3070 
3071  /*
3072  * Mark tuple for invalidation from system caches at next command
3073  * boundary. We have to do this before releasing the buffer because we
3074  * need to look at the contents of the tuple.
3075  */
3076  CacheInvalidateHeapTuple(relation, &tp, NULL);
3077 
3078  /* Now we can release the buffer */
3079  ReleaseBuffer(buffer);
3080 
3081  /*
3082  * Release the lmgr tuple lock, if we had it.
3083  */
3084  if (have_tuple_lock)
3085  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
3086 
3087  pgstat_count_heap_delete(relation);
3088 
3089  if (old_key_tuple != NULL && old_key_copied)
3090  heap_freetuple(old_key_tuple);
3091 
3092  return TM_Ok;
3093 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3706
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4923
static void PageClearAllVisible(Page page)
Definition: bufpage.h:439
#define InvalidCommandId
Definition: c.h:669
unsigned short uint16
Definition: c.h:505
TransactionId MultiXactId
Definition: c.h:662
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7115
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8579
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:5057
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8660
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:5008
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7292
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2660
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:170
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1905
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:103
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:101
#define SizeOfHeapHeader
Definition: heapam_xlog.h:156
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:105
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:102
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1434
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1204
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:657
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:673
@ MultiXactStatusUpdate
Definition: multixact.h:46
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4321
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:684
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:151
CommandId cmax
Definition: tableam.h:152
ItemPointerData ctid
Definition: tableam.h:150
uint16 t_infomask
Definition: heapam_xlog.h:152
uint16 t_infomask2
Definition: heapam_xlog.h:151
TM_Result
Definition: tableam.h:80
@ TM_Ok
Definition: tableam.h:85
@ TM_BeingModified
Definition: tableam.h:107
@ TM_Deleted
Definition: tableam.h:100
@ TM_Updated
Definition: tableam.h:97
@ TM_SelfModified
Definition: tableam.h:91
@ TM_Invisible
Definition: tableam.h:88
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:940
bool IsInParallelMode(void)
Definition: xact.c:1088
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:154
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:456

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1210 of file heapam.c.

1211 {
1212  HeapScanDesc scan = (HeapScanDesc) sscan;
1213 
1214  /* Note: no locking manipulations needed */
1215 
1216  /*
1217  * unpin scan buffers
1218  */
1219  if (BufferIsValid(scan->rs_cbuf))
1220  ReleaseBuffer(scan->rs_cbuf);
1221 
1222  if (BufferIsValid(scan->rs_vmbuffer))
1223  ReleaseBuffer(scan->rs_vmbuffer);
1224 
1225  /*
1226  * Must free the read stream before freeing the BufferAccessStrategy.
1227  */
1228  if (scan->rs_read_stream)
1230 
1231  /*
1232  * decrement relation reference count and free scan descriptor storage
1233  */
1235 
1236  if (scan->rs_base.rs_key)
1237  pfree(scan->rs_base.rs_key);
1238 
1239  if (scan->rs_strategy != NULL)
1241 
1242  if (scan->rs_parallelworkerdata != NULL)
1244 
1245  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1247 
1248  pfree(scan);
1249 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:351
void read_stream_end(ReadStream *stream)
Definition: read_stream.c:833
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2172
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:836
Buffer rs_cbuf
Definition: heapam.h:67
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:65

References BufferIsValid(), FreeAccessStrategy(), pfree(), read_stream_end(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, HeapScanDescData::rs_read_stream, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, HeapScanDescData::rs_vmbuffer, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1511 of file heapam.c.

1516 {
1517  ItemPointer tid = &(tuple->t_self);
1518  ItemId lp;
1519  Buffer buffer;
1520  Page page;
1521  OffsetNumber offnum;
1522  bool valid;
1523 
1524  /*
1525  * Fetch and pin the appropriate page of the relation.
1526  */
1527  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1528 
1529  /*
1530  * Need share lock on buffer to examine tuple commit status.
1531  */
1532  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1533  page = BufferGetPage(buffer);
1534 
1535  /*
1536  * We'd better check for out-of-range offnum in case of VACUUM since the
1537  * TID was obtained.
1538  */
1539  offnum = ItemPointerGetOffsetNumber(tid);
1540  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1541  {
1542  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1543  ReleaseBuffer(buffer);
1544  *userbuf = InvalidBuffer;
1545  tuple->t_data = NULL;
1546  return false;
1547  }
1548 
1549  /*
1550  * get the item line pointer corresponding to the requested tid
1551  */
1552  lp = PageGetItemId(page, offnum);
1553 
1554  /*
1555  * Must check for deleted tuple.
1556  */
1557  if (!ItemIdIsNormal(lp))
1558  {
1559  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1560  ReleaseBuffer(buffer);
1561  *userbuf = InvalidBuffer;
1562  tuple->t_data = NULL;
1563  return false;
1564  }
1565 
1566  /*
1567  * fill in *tuple fields
1568  */
1569  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1570  tuple->t_len = ItemIdGetLength(lp);
1571  tuple->t_tableOid = RelationGetRelid(relation);
1572 
1573  /*
1574  * check tuple visibility, then release lock
1575  */
1576  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1577 
1578  if (valid)
1579  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1580  HeapTupleHeaderGetXmin(tuple->t_data));
1581 
1582  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1583 
1584  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1585 
1586  if (valid)
1587  {
1588  /*
1589  * All checks passed, so return the tuple as valid. Caller is now
1590  * responsible for releasing the buffer.
1591  */
1592  *userbuf = buffer;
1593 
1594  return true;
1595  }
1596 
1597  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1598  if (keep_buf)
1599  *userbuf = buffer;
1600  else
1601  {
1602  ReleaseBuffer(buffer);
1603  *userbuf = InvalidBuffer;
1604  tuple->t_data = NULL;
1605  }
1606 
1607  return false;
1608 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:190
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:372
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10061
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2606

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5819 of file heapam.c.

5820 {
5821  Buffer buffer;
5822  Page page;
5823  OffsetNumber offnum;
5824  ItemId lp = NULL;
5825  HeapTupleHeader htup;
5826 
5827  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5829  page = (Page) BufferGetPage(buffer);
5830 
5831  offnum = ItemPointerGetOffsetNumber(tid);
5832  if (PageGetMaxOffsetNumber(page) >= offnum)
5833  lp = PageGetItemId(page, offnum);
5834 
5835  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5836  elog(ERROR, "invalid lp");
5837 
5838  htup = (HeapTupleHeader) PageGetItem(page, lp);
5839 
5840  /* NO EREPORT(ERROR) from here till changes are logged */
5842 
5844 
5845  MarkBufferDirty(buffer);
5846 
5847  /*
5848  * Replace the speculative insertion token with a real t_ctid, pointing to
5849  * itself like it does on regular tuples.
5850  */
5851  htup->t_ctid = *tid;
5852 
5853  /* XLOG stuff */
5854  if (RelationNeedsWAL(relation))
5855  {
5856  xl_heap_confirm xlrec;
5857  XLogRecPtr recptr;
5858 
5859  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5860 
5861  XLogBeginInsert();
5862 
5863  /* We want the same filtering on this as on a plain insert */
5865 
5866  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5867  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5868 
5869  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5870 
5871  PageSetLSN(page, recptr);
5872  }
5873 
5874  END_CRIT_SECTION();
5875 
5876  UnlockReleaseBuffer(buffer);
5877 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:422
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:419

References Assert, BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog, END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_prepared_tuples()

void heap_freeze_prepared_tuples ( Buffer  buffer,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6900 of file heapam.c.

6901 {
6902  Page page = BufferGetPage(buffer);
6903 
6904  for (int i = 0; i < ntuples; i++)
6905  {
6906  HeapTupleFreeze *frz = tuples + i;
6907  ItemId itemid = PageGetItemId(page, frz->offset);
6908  HeapTupleHeader htup;
6909 
6910  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6911  heap_execute_freeze_tuple(htup, frz);
6912  }
6913 }
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6824
int i
Definition: isn.c:73
OffsetNumber offset
Definition: heapam.h:151

References BufferGetPage(), heap_execute_freeze_tuple(), i, HeapTupleFreeze::offset, PageGetItem(), and PageGetItemId().

Referenced by heap_page_prune_and_freeze().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6922 of file heapam.c.

6925 {
6926  HeapTupleFreeze frz;
6927  bool do_freeze;
6928  bool totally_frozen;
6929  struct VacuumCutoffs cutoffs;
6930  HeapPageFreeze pagefrz;
6931 
6932  cutoffs.relfrozenxid = relfrozenxid;
6933  cutoffs.relminmxid = relminmxid;
6934  cutoffs.OldestXmin = FreezeLimit;
6935  cutoffs.OldestMxact = MultiXactCutoff;
6936  cutoffs.FreezeLimit = FreezeLimit;
6937  cutoffs.MultiXactCutoff = MultiXactCutoff;
6938 
6939  pagefrz.freeze_required = true;
6940  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6941  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6942  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6943  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6944 
6945  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6946  &pagefrz, &frz, &totally_frozen);
6947 
6948  /*
6949  * Note that because this is not a WAL-logged operation, we don't need to
6950  * fill in the offset in the freeze record.
6951  */
6952 
6953  if (do_freeze)
6954  heap_execute_freeze_tuple(tuple, &frz);
6955  return do_freeze;
6956 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6550
TransactionId FreezeLimit
Definition: vacuum.h:277
TransactionId relfrozenxid
Definition: vacuum.h:251
MultiXactId relminmxid
Definition: vacuum.h:252
MultiXactId MultiXactCutoff
Definition: vacuum.h:278

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1783 of file heapam.c.

1785 {
1786  Relation relation = sscan->rs_rd;
1787  Snapshot snapshot = sscan->rs_snapshot;
1788  ItemPointerData ctid;
1789  TransactionId priorXmax;
1790 
1791  /*
1792  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1793  * Assume that t_ctid links are valid however - there shouldn't be invalid
1794  * ones in the table.
1795  */
1796  Assert(ItemPointerIsValid(tid));
1797 
1798  /*
1799  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1800  * need to examine, and *tid is the TID we will return if ctid turns out
1801  * to be bogus.
1802  *
1803  * Note that we will loop until we reach the end of the t_ctid chain.
1804  * Depending on the snapshot passed, there might be at most one visible
1805  * version of the row, but we don't try to optimize for that.
1806  */
1807  ctid = *tid;
1808  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1809  for (;;)
1810  {
1811  Buffer buffer;
1812  Page page;
1813  OffsetNumber offnum;
1814  ItemId lp;
1815  HeapTupleData tp;
1816  bool valid;
1817 
1818  /*
1819  * Read, pin, and lock the page.
1820  */
1821  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1822  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1823  page = BufferGetPage(buffer);
1824 
1825  /*
1826  * Check for bogus item number. This is not treated as an error
1827  * condition because it can happen while following a t_ctid link. We
1828  * just assume that the prior tid is OK and return it unchanged.
1829  */
1830  offnum = ItemPointerGetOffsetNumber(&ctid);
1831  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1832  {
1833  UnlockReleaseBuffer(buffer);
1834  break;
1835  }
1836  lp = PageGetItemId(page, offnum);
1837  if (!ItemIdIsNormal(lp))
1838  {
1839  UnlockReleaseBuffer(buffer);
1840  break;
1841  }
1842 
1843  /* OK to access the tuple */
1844  tp.t_self = ctid;
1845  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1846  tp.t_len = ItemIdGetLength(lp);
1847  tp.t_tableOid = RelationGetRelid(relation);
1848 
1849  /*
1850  * After following a t_ctid link, we might arrive at an unrelated
1851  * tuple. Check for XMIN match.
1852  */
1853  if (TransactionIdIsValid(priorXmax) &&
1855  {
1856  UnlockReleaseBuffer(buffer);
1857  break;
1858  }
1859 
1860  /*
1861  * Check tuple visibility; if visible, set it as the new result
1862  * candidate.
1863  */
1864  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1865  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1866  if (valid)
1867  *tid = ctid;
1868 
1869  /*
1870  * If there's a valid t_ctid link, follow it, else we're done.
1871  */
1872  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1876  {
1877  UnlockReleaseBuffer(buffer);
1878  break;
1879  }
1880 
1881  ctid = tp.t_data->t_ctid;
1882  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1883  UnlockReleaseBuffer(buffer);
1884  } /* end of loop */
1885 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert, BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1785 of file pruneheap.c.

1786 {
1787  OffsetNumber offnum,
1788  maxoff;
1789 
1790  MemSet(root_offsets, InvalidOffsetNumber,
1792 
1793  maxoff = PageGetMaxOffsetNumber(page);
1794  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1795  {
1796  ItemId lp = PageGetItemId(page, offnum);
1797  HeapTupleHeader htup;
1798  OffsetNumber nextoffnum;
1799  TransactionId priorXmax;
1800 
1801  /* skip unused and dead items */
1802  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1803  continue;
1804 
1805  if (ItemIdIsNormal(lp))
1806  {
1807  htup = (HeapTupleHeader) PageGetItem(page, lp);
1808 
1809  /*
1810  * Check if this tuple is part of a HOT-chain rooted at some other
1811  * tuple. If so, skip it for now; we'll process it when we find
1812  * its root.
1813  */
1814  if (HeapTupleHeaderIsHeapOnly(htup))
1815  continue;
1816 
1817  /*
1818  * This is either a plain tuple or the root of a HOT-chain.
1819  * Remember it in the mapping.
1820  */
1821  root_offsets[offnum - 1] = offnum;
1822 
1823  /* If it's not the start of a HOT-chain, we're done with it */
1824  if (!HeapTupleHeaderIsHotUpdated(htup))
1825  continue;
1826 
1827  /* Set up to scan the HOT-chain */
1828  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1829  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1830  }
1831  else
1832  {
1833  /* Must be a redirect item. We do not set its root_offsets entry */
1835  /* Set up to scan the HOT-chain */
1836  nextoffnum = ItemIdGetRedirect(lp);
1837  priorXmax = InvalidTransactionId;
1838  }
1839 
1840  /*
1841  * Now follow the HOT-chain and collect other tuples in the chain.
1842  *
1843  * Note: Even though this is a nested loop, the complexity of the
1844  * function is O(N) because a tuple in the page should be visited not
1845  * more than twice, once in the outer loop and once in HOT-chain
1846  * chases.
1847  */
1848  for (;;)
1849  {
1850  /* Sanity check (pure paranoia) */
1851  if (offnum < FirstOffsetNumber)
1852  break;
1853 
1854  /*
1855  * An offset past the end of page's line pointer array is possible
1856  * when the array was truncated
1857  */
1858  if (offnum > maxoff)
1859  break;
1860 
1861  lp = PageGetItemId(page, nextoffnum);
1862 
1863  /* Check for broken chains */
1864  if (!ItemIdIsNormal(lp))
1865  break;
1866 
1867  htup = (HeapTupleHeader) PageGetItem(page, lp);
1868 
1869  if (TransactionIdIsValid(priorXmax) &&
1870  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1871  break;
1872 
1873  /* Remember the root line pointer for this item */
1874  root_offsets[nextoffnum - 1] = offnum;
1875 
1876  /* Advance to next chain member, if any */
1877  if (!HeapTupleHeaderIsHotUpdated(htup))
1878  break;
1879 
1880  /* HOT implies it can't have moved to different partition */
1882 
1883  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1884  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1885  }
1886  }
1887 }
#define MemSet(start, val, len)
Definition: c.h:1020
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert, FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1252 of file heapam.c.

1253 {
1254  HeapScanDesc scan = (HeapScanDesc) sscan;
1255 
1256  /*
1257  * This is still widely used directly, without going through table AM, so
1258  * add a safety check. It's possible we should, at a later point,
1259  * downgrade this to an assert. The reason for checking the AM routine,
1260  * rather than the AM oid, is that this allows to write regression tests
1261  * that create another AM reusing the heap handler.
1262  */
1264  ereport(ERROR,
1265  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1266  errmsg_internal("only heap AM is supported")));
1267 
1268  /*
1269  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1270  * for catalog or regular tables. See detailed comments in xact.c where
1271  * these variables are declared. Normally we have such a check at tableam
1272  * level API but this is called from many places so we need to ensure it
1273  * here.
1274  */
1276  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1277 
1278  /* Note: no locking manipulations needed */
1279 
1280  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1281  heapgettup_pagemode(scan, direction,
1282  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1283  else
1284  heapgettup(scan, direction,
1285  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1286 
1287  if (scan->rs_ctup.t_data == NULL)
1288  return NULL;
1289 
1290  /*
1291  * if we get here it means we have a new current scan tuple, so point to
1292  * the proper return buffer and return the tuple.
1293  */
1294 
1296 
1297  return &scan->rs_ctup;
1298 }
#define unlikely(x)
Definition: c.h:311
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:838
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:948
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:648
const struct TableAmRoutine * rd_tableam
Definition: rel.h:189
bool bsysscan
Definition: xact.c:99
TransactionId CheckXidAlive
Definition: xact.c:98

References bsysscan, CheckXidAlive, elog, ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1301 of file heapam.c.

1302 {
1303  HeapScanDesc scan = (HeapScanDesc) sscan;
1304 
1305  /* Note: no locking manipulations needed */
1306 
1307  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1308  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1309  else
1310  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1311 
1312  if (scan->rs_ctup.t_data == NULL)
1313  {
1314  ExecClearTuple(slot);
1315  return false;
1316  }
1317 
1318  /*
1319  * if we get here it means we have a new current scan tuple, so point to
1320  * the proper return buffer and return the tuple.
1321  */
1322 
1324 
1325  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1326  scan->rs_cbuf);
1327  return true;
1328 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1479
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:454

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1404 of file heapam.c.

1406 {
1407  HeapScanDesc scan = (HeapScanDesc) sscan;
1408  ItemPointer mintid = &sscan->rs_mintid;
1409  ItemPointer maxtid = &sscan->rs_maxtid;
1410 
1411  /* Note: no locking manipulations needed */
1412  for (;;)
1413  {
1414  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1415  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1416  else
1417  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1418 
1419  if (scan->rs_ctup.t_data == NULL)
1420  {
1421  ExecClearTuple(slot);
1422  return false;
1423  }
1424 
1425  /*
1426  * heap_set_tidrange will have used heap_setscanlimits to limit the
1427  * range of pages we scan to only ones that can contain the TID range
1428  * we're scanning for. Here we must filter out any tuples from these
1429  * pages that are outside of that range.
1430  */
1431  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1432  {
1433  ExecClearTuple(slot);
1434 
1435  /*
1436  * When scanning backwards, the TIDs will be in descending order.
1437  * Future tuples in this direction will be lower still, so we can
1438  * just return false to indicate there will be no more tuples.
1439  */
1440  if (ScanDirectionIsBackward(direction))
1441  return false;
1442 
1443  continue;
1444  }
1445 
1446  /*
1447  * Likewise for the final page, we must filter out TIDs greater than
1448  * maxtid.
1449  */
1450  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1451  {
1452  ExecClearTuple(slot);
1453 
1454  /*
1455  * When scanning forward, the TIDs will be in ascending order.
1456  * Future tuples in this direction will be higher still, so we can
1457  * just return false to indicate there will be no more tuples.
1458  */
1459  if (ScanDirectionIsForward(direction))
1460  return false;
1461  continue;
1462  }
1463 
1464  break;
1465  }
1466 
1467  /*
1468  * if we get here it means we have a new current scan tuple, so point to
1469  * the proper return buffer and return the tuple.
1470  */
1472 
1473  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1474  return true;
1475 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1631 of file heapam.c.

1634 {
1635  Page page = BufferGetPage(buffer);
1636  TransactionId prev_xmax = InvalidTransactionId;
1637  BlockNumber blkno;
1638  OffsetNumber offnum;
1639  bool at_chain_start;
1640  bool valid;
1641  bool skip;
1642  GlobalVisState *vistest = NULL;
1643 
1644  /* If this is not the first call, previous call returned a (live!) tuple */
1645  if (all_dead)
1646  *all_dead = first_call;
1647 
1648  blkno = ItemPointerGetBlockNumber(tid);
1649  offnum = ItemPointerGetOffsetNumber(tid);
1650  at_chain_start = first_call;
1651  skip = !first_call;
1652 
1653  /* XXX: we should assert that a snapshot is pushed or registered */
1655  Assert(BufferGetBlockNumber(buffer) == blkno);
1656 
1657  /* Scan through possible multiple members of HOT-chain */
1658  for (;;)
1659  {
1660  ItemId lp;
1661 
1662  /* check for bogus TID */
1663  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1664  break;
1665 
1666  lp = PageGetItemId(page, offnum);
1667 
1668  /* check for unused, dead, or redirected items */
1669  if (!ItemIdIsNormal(lp))
1670  {
1671  /* We should only see a redirect at start of chain */
1672  if (ItemIdIsRedirected(lp) && at_chain_start)
1673  {
1674  /* Follow the redirect */
1675  offnum = ItemIdGetRedirect(lp);
1676  at_chain_start = false;
1677  continue;
1678  }
1679  /* else must be end of chain */
1680  break;
1681  }
1682 
1683  /*
1684  * Update heapTuple to point to the element of the HOT chain we're
1685  * currently investigating. Having t_self set correctly is important
1686  * because the SSI checks and the *Satisfies routine for historical
1687  * MVCC snapshots need the correct tid to decide about the visibility.
1688  */
1689  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1690  heapTuple->t_len = ItemIdGetLength(lp);
1691  heapTuple->t_tableOid = RelationGetRelid(relation);
1692  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1693 
1694  /*
1695  * Shouldn't see a HEAP_ONLY tuple at chain start.
1696  */
1697  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1698  break;
1699 
1700  /*
1701  * The xmin should match the previous xmax value, else chain is
1702  * broken.
1703  */
1704  if (TransactionIdIsValid(prev_xmax) &&
1705  !TransactionIdEquals(prev_xmax,
1706  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1707  break;
1708 
1709  /*
1710  * When first_call is true (and thus, skip is initially false) we'll
1711  * return the first tuple we find. But on later passes, heapTuple
1712  * will initially be pointing to the tuple we returned last time.
1713  * Returning it again would be incorrect (and would loop forever), so
1714  * we skip it and return the next match we find.
1715  */
1716  if (!skip)
1717  {
1718  /* If it's visible per the snapshot, we must return it */
1719  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1720  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1721  buffer, snapshot);
1722 
1723  if (valid)
1724  {
1725  ItemPointerSetOffsetNumber(tid, offnum);
1726  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1727  HeapTupleHeaderGetXmin(heapTuple->t_data));
1728  if (all_dead)
1729  *all_dead = false;
1730  return true;
1731  }
1732  }
1733  skip = false;
1734 
1735  /*
1736  * If we can't see it, maybe no one else can either. At caller
1737  * request, check whether all chain members are dead to all
1738  * transactions.
1739  *
1740  * Note: if you change the criterion here for what is "dead", fix the
1741  * planner's get_actual_variable_range() function to match.
1742  */
1743  if (all_dead && *all_dead)
1744  {
1745  if (!vistest)
1746  vistest = GlobalVisTestFor(relation);
1747 
1748  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1749  *all_dead = false;
1750  }
1751 
1752  /*
1753  * Check to see if HOT chain continues past this tuple; if so fetch
1754  * the next offnum and loop around.
1755  */
1756  if (HeapTupleIsHotUpdated(heapTuple))
1757  {
1759  blkno);
1760  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1761  at_chain_start = false;
1762  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1763  }
1764  else
1765  break; /* end of chain */
1766  }
1767 
1768  return false;
1769 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4111
TransactionId RecentXmin
Definition: snapmgr.c:99

References Assert, BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7636 of file heapam.c.

7637 {
7638  /* Initial assumption is that earlier pruning took care of conflict */
7639  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7642  Page page = NULL;
7644  TransactionId priorXmax;
7645 #ifdef USE_PREFETCH
7646  IndexDeletePrefetchState prefetch_state;
7647  int prefetch_distance;
7648 #endif
7649  SnapshotData SnapshotNonVacuumable;
7650  int finalndeltids = 0,
7651  nblocksaccessed = 0;
7652 
7653  /* State that's only used in bottom-up index deletion case */
7654  int nblocksfavorable = 0;
7655  int curtargetfreespace = delstate->bottomupfreespace,
7656  lastfreespace = 0,
7657  actualfreespace = 0;
7658  bool bottomup_final_block = false;
7659 
7660  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7661 
7662  /* Sort caller's deltids array by TID for further processing */
7663  index_delete_sort(delstate);
7664 
7665  /*
7666  * Bottom-up case: resort deltids array in an order attuned to where the
7667  * greatest number of promising TIDs are to be found, and determine how
7668  * many blocks from the start of sorted array should be considered
7669  * favorable. This will also shrink the deltids array in order to
7670  * eliminate completely unfavorable blocks up front.
7671  */
7672  if (delstate->bottomup)
7673  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7674 
7675 #ifdef USE_PREFETCH
7676  /* Initialize prefetch state. */
7677  prefetch_state.cur_hblkno = InvalidBlockNumber;
7678  prefetch_state.next_item = 0;
7679  prefetch_state.ndeltids = delstate->ndeltids;
7680  prefetch_state.deltids = delstate->deltids;
7681 
7682  /*
7683  * Determine the prefetch distance that we will attempt to maintain.
7684  *
7685  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7686  * sure that isn't a catalog relation before we call code that does
7687  * syscache lookups, to avoid risk of deadlock.
7688  */
7689  if (IsCatalogRelation(rel))
7690  prefetch_distance = maintenance_io_concurrency;
7691  else
7692  prefetch_distance =
7694 
7695  /* Cap initial prefetch distance for bottom-up deletion caller */
7696  if (delstate->bottomup)
7697  {
7698  Assert(nblocksfavorable >= 1);
7699  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7700  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7701  }
7702 
7703  /* Start prefetching. */
7704  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7705 #endif
7706 
7707  /* Iterate over deltids, determine which to delete, check their horizon */
7708  Assert(delstate->ndeltids > 0);
7709  for (int i = 0; i < delstate->ndeltids; i++)
7710  {
7711  TM_IndexDelete *ideltid = &delstate->deltids[i];
7712  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7713  ItemPointer htid = &ideltid->tid;
7714  OffsetNumber offnum;
7715 
7716  /*
7717  * Read buffer, and perform required extra steps each time a new block
7718  * is encountered. Avoid refetching if it's the same block as the one
7719  * from the last htid.
7720  */
7721  if (blkno == InvalidBlockNumber ||
7722  ItemPointerGetBlockNumber(htid) != blkno)
7723  {
7724  /*
7725  * Consider giving up early for bottom-up index deletion caller
7726  * first. (Only prefetch next-next block afterwards, when it
7727  * becomes clear that we're at least going to access the next
7728  * block in line.)
7729  *
7730  * Sometimes the first block frees so much space for bottom-up
7731  * caller that the deletion process can end without accessing any
7732  * more blocks. It is usually necessary to access 2 or 3 blocks
7733  * per bottom-up deletion operation, though.
7734  */
7735  if (delstate->bottomup)
7736  {
7737  /*
7738  * We often allow caller to delete a few additional items
7739  * whose entries we reached after the point that space target
7740  * from caller was satisfied. The cost of accessing the page
7741  * was already paid at that point, so it made sense to finish
7742  * it off. When that happened, we finalize everything here
7743  * (by finishing off the whole bottom-up deletion operation
7744  * without needlessly paying the cost of accessing any more
7745  * blocks).
7746  */
7747  if (bottomup_final_block)
7748  break;
7749 
7750  /*
7751  * Give up when we didn't enable our caller to free any
7752  * additional space as a result of processing the page that we
7753  * just finished up with. This rule is the main way in which
7754  * we keep the cost of bottom-up deletion under control.
7755  */
7756  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7757  break;
7758  lastfreespace = actualfreespace; /* for next time */
7759 
7760  /*
7761  * Deletion operation (which is bottom-up) will definitely
7762  * access the next block in line. Prepare for that now.
7763  *
7764  * Decay target free space so that we don't hang on for too
7765  * long with a marginal case. (Space target is only truly
7766  * helpful when it allows us to recognize that we don't need
7767  * to access more than 1 or 2 blocks to satisfy caller due to
7768  * agreeable workload characteristics.)
7769  *
7770  * We are a bit more patient when we encounter contiguous
7771  * blocks, though: these are treated as favorable blocks. The
7772  * decay process is only applied when the next block in line
7773  * is not a favorable/contiguous block. This is not an
7774  * exception to the general rule; we still insist on finding
7775  * at least one deletable item per block accessed. See
7776  * bottomup_nblocksfavorable() for full details of the theory
7777  * behind favorable blocks and heap block locality in general.
7778  *
7779  * Note: The first block in line is always treated as a
7780  * favorable block, so the earliest possible point that the
7781  * decay can be applied is just before we access the second
7782  * block in line. The Assert() verifies this for us.
7783  */
7784  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7785  if (nblocksfavorable > 0)
7786  nblocksfavorable--;
7787  else
7788  curtargetfreespace /= 2;
7789  }
7790 
7791  /* release old buffer */
7792  if (BufferIsValid(buf))
7794 
7795  blkno = ItemPointerGetBlockNumber(htid);
7796  buf = ReadBuffer(rel, blkno);
7797  nblocksaccessed++;
7798  Assert(!delstate->bottomup ||
7799  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7800 
7801 #ifdef USE_PREFETCH
7802 
7803  /*
7804  * To maintain the prefetch distance, prefetch one more page for
7805  * each page we read.
7806  */
7807  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7808 #endif
7809 
7811 
7812  page = BufferGetPage(buf);
7813  maxoff = PageGetMaxOffsetNumber(page);
7814  }
7815 
7816  /*
7817  * In passing, detect index corruption involving an index page with a
7818  * TID that points to a location in the heap that couldn't possibly be
7819  * correct. We only do this with actual TIDs from caller's index page
7820  * (not items reached by traversing through a HOT chain).
7821  */
7822  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7823 
7824  if (istatus->knowndeletable)
7825  Assert(!delstate->bottomup && !istatus->promising);
7826  else
7827  {
7828  ItemPointerData tmp = *htid;
7829  HeapTupleData heapTuple;
7830 
7831  /* Are any tuples from this HOT chain non-vacuumable? */
7832  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7833  &heapTuple, NULL, true))
7834  continue; /* can't delete entry */
7835 
7836  /* Caller will delete, since whole HOT chain is vacuumable */
7837  istatus->knowndeletable = true;
7838 
7839  /* Maintain index free space info for bottom-up deletion case */
7840  if (delstate->bottomup)
7841  {
7842  Assert(istatus->freespace > 0);
7843  actualfreespace += istatus->freespace;
7844  if (actualfreespace >= curtargetfreespace)
7845  bottomup_final_block = true;
7846  }
7847  }
7848 
7849  /*
7850  * Maintain snapshotConflictHorizon value for deletion operation as a
7851  * whole by advancing current value using heap tuple headers. This is
7852  * loosely based on the logic for pruning a HOT chain.
7853  */
7854  offnum = ItemPointerGetOffsetNumber(htid);
7855  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7856  for (;;)
7857  {
7858  ItemId lp;
7859  HeapTupleHeader htup;
7860 
7861  /* Sanity check (pure paranoia) */
7862  if (offnum < FirstOffsetNumber)
7863  break;
7864 
7865  /*
7866  * An offset past the end of page's line pointer array is possible
7867  * when the array was truncated
7868  */
7869  if (offnum > maxoff)
7870  break;
7871 
7872  lp = PageGetItemId(page, offnum);
7873  if (ItemIdIsRedirected(lp))
7874  {
7875  offnum = ItemIdGetRedirect(lp);
7876  continue;
7877  }
7878 
7879  /*
7880  * We'll often encounter LP_DEAD line pointers (especially with an
7881  * entry marked knowndeletable by our caller up front). No heap
7882  * tuple headers get examined for an htid that leads us to an
7883  * LP_DEAD item. This is okay because the earlier pruning
7884  * operation that made the line pointer LP_DEAD in the first place
7885  * must have considered the original tuple header as part of
7886  * generating its own snapshotConflictHorizon value.
7887  *
7888  * Relying on XLOG_HEAP2_PRUNE_VACUUM_SCAN records like this is
7889  * the same strategy that index vacuuming uses in all cases. Index
7890  * VACUUM WAL records don't even have a snapshotConflictHorizon
7891  * field of their own for this reason.
7892  */
7893  if (!ItemIdIsNormal(lp))
7894  break;
7895 
7896  htup = (HeapTupleHeader) PageGetItem(page, lp);
7897 
7898  /*
7899  * Check the tuple XMIN against prior XMAX, if any
7900  */
7901  if (TransactionIdIsValid(priorXmax) &&
7902  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7903  break;
7904 
7906  &snapshotConflictHorizon);
7907 
7908  /*
7909  * If the tuple is not HOT-updated, then we are at the end of this
7910  * HOT-chain. No need to visit later tuples from the same update
7911  * chain (they get their own index entries) -- just move on to
7912  * next htid from index AM caller.
7913  */
7914  if (!HeapTupleHeaderIsHotUpdated(htup))
7915  break;
7916 
7917  /* Advance to next HOT chain member */
7918  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7919  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7920  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7921  }
7922 
7923  /* Enable further/final shrinking of deltids for caller */
7924  finalndeltids = i + 1;
7925  }
7926 
7928 
7929  /*
7930  * Shrink deltids array to exclude non-deletable entries at the end. This
7931  * is not just a minor optimization. Final deltids array size might be
7932  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7933  * ndeltids being zero in all cases with zero total deletable entries.
7934  */
7935  Assert(finalndeltids > 0 || delstate->bottomup);
7936  delstate->ndeltids = finalndeltids;
7937 
7938  return snapshotConflictHorizon;
7939 }
int maintenance_io_concurrency
Definition: bufmgr.c:158
#define Min(x, y)
Definition: c.h:1004
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:103
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8194
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7491
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7576
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:190
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1631
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7981
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:255
int bottomupfreespace
Definition: tableam.h:250
TM_IndexDelete * deltids
Definition: tableam.h:254
ItemPointerData tid
Definition: tableam.h:213
bool knowndeletable
Definition: tableam.h:220
bool promising
Definition: tableam.h:223
int16 freespace
Definition: tableam.h:224

References Assert, TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 6063 of file heapam.c.

6064 {
6065  Buffer buffer;
6066  Page page;
6067  OffsetNumber offnum;
6068  ItemId lp = NULL;
6069  HeapTupleHeader htup;
6070  uint32 oldlen;
6071  uint32 newlen;
6072 
6073  /*
6074  * For now, we don't allow parallel updates. Unlike a regular update,
6075  * this should never create a combo CID, so it might be possible to relax
6076  * this restriction, but not without more thought and testing. It's not
6077  * clear that it would be useful, anyway.
6078  */
6079  if (IsInParallelMode())
6080  ereport(ERROR,
6081  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
6082  errmsg("cannot update tuples during a parallel operation")));
6083 
6084  INJECTION_POINT("inplace-before-pin");
6085  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
6087  page = (Page) BufferGetPage(buffer);
6088 
6089  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
6090  if (PageGetMaxOffsetNumber(page) >= offnum)
6091  lp = PageGetItemId(page, offnum);
6092 
6093  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
6094  elog(ERROR, "invalid lp");
6095 
6096  htup = (HeapTupleHeader) PageGetItem(page, lp);
6097 
6098  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
6099  newlen = tuple->t_len - tuple->t_data->t_hoff;
6100  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
6101  elog(ERROR, "wrong tuple length");
6102 
6103  /* NO EREPORT(ERROR) from here till changes are logged */
6105 
6106  memcpy((char *) htup + htup->t_hoff,
6107  (char *) tuple->t_data + tuple->t_data->t_hoff,
6108  newlen);
6109 
6110  MarkBufferDirty(buffer);
6111 
6112  /* XLOG stuff */
6113  if (RelationNeedsWAL(relation))
6114  {
6115  xl_heap_inplace xlrec;
6116  XLogRecPtr recptr;
6117 
6118  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
6119 
6120  XLogBeginInsert();
6121  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
6122 
6123  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6124  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
6125 
6126  /* inplace updates aren't decoded atm, don't log the origin */
6127 
6128  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
6129 
6130  PageSetLSN(page, recptr);
6131  }
6132 
6133  END_CRIT_SECTION();
6134 
6135  UnlockReleaseBuffer(buffer);
6136 
6137  /*
6138  * Send out shared cache inval if necessary. Note that because we only
6139  * pass the new version of the tuple, this mustn't be used for any
6140  * operations that could change catcache lookup keys. But we aren't
6141  * bothering with index updates either, so that's true a fortiori.
6142  */
6144  CacheInvalidateHeapTuple(relation, tuple, NULL);
6145 }
unsigned int uint32
Definition: c.h:506
#define SizeOfHeapInplace
Definition: heapam_xlog.h:430
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define INJECTION_POINT(name)
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:451
OffsetNumber offnum
Definition: heapam_xlog.h:427
void XLogRegisterBufData(uint8 block_id, const char *data, uint32 len)
Definition: xloginsert.c:405

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, INJECTION_POINT, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1994 of file heapam.c.

1996 {
1998  HeapTuple heaptup;
1999  Buffer buffer;
2000  Buffer vmbuffer = InvalidBuffer;
2001  bool all_visible_cleared = false;
2002 
2003  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
2005  RelationGetNumberOfAttributes(relation));
2006 
2007  /*
2008  * Fill in tuple header fields and toast the tuple if necessary.
2009  *
2010  * Note: below this point, heaptup is the data we actually intend to store
2011  * into the relation; tup is the caller's original untoasted data.
2012  */
2013  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
2014 
2015  /*
2016  * Find buffer to insert this tuple into. If the page is all visible,
2017  * this will also pin the requisite visibility map page.
2018  */
2019  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
2020  InvalidBuffer, options, bistate,
2021  &vmbuffer, NULL,
2022  0);
2023 
2024  /*
2025  * We're about to do the actual insert -- but check for conflict first, to
2026  * avoid possibly having to roll back work we've just done.
2027  *
2028  * This is safe without a recheck as long as there is no possibility of
2029  * another process scanning the page between this check and the insert
2030  * being visible to the scan (i.e., an exclusive buffer content lock is
2031  * continuously held from this point until the tuple insert is visible).
2032  *
2033  * For a heap insert, we only need to check for table-level SSI locks. Our
2034  * new tuple can't possibly conflict with existing tuple locks, and heap
2035  * page locks are only consolidated versions of tuple locks; they do not
2036  * lock "gaps" as index page locks do. So we don't need to specify a
2037  * buffer when making the call, which makes for a faster check.
2038  */
2040 
2041  /* NO EREPORT(ERROR) from here till changes are logged */
2043 
2044  RelationPutHeapTuple(relation, buffer, heaptup,
2045  (options & HEAP_INSERT_SPECULATIVE) != 0);
2046 
2047  if (PageIsAllVisible(BufferGetPage(buffer)))
2048  {
2049  all_visible_cleared = true;
2051  visibilitymap_clear(relation,
2052  ItemPointerGetBlockNumber(&(heaptup->t_self)),
2053  vmbuffer, VISIBILITYMAP_VALID_BITS);
2054  }
2055 
2056  /*
2057  * XXX Should we set PageSetPrunable on this page ?
2058  *
2059  * The inserting transaction may eventually abort thus making this tuple
2060  * DEAD and hence available for pruning. Though we don't want to optimize
2061  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
2062  * aborted tuple will never be pruned until next vacuum is triggered.
2063  *
2064  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
2065  */
2066 
2067  MarkBufferDirty(buffer);
2068 
2069  /* XLOG stuff */
2070  if (RelationNeedsWAL(relation))
2071  {
2072  xl_heap_insert xlrec;
2073  xl_heap_header xlhdr;
2074  XLogRecPtr recptr;
2075  Page page = BufferGetPage(buffer);
2076  uint8 info = XLOG_HEAP_INSERT;
2077  int bufflags = 0;
2078 
2079  /*
2080  * If this is a catalog, we need to transmit combo CIDs to properly
2081  * decode, so log that as well.
2082  */
2084  log_heap_new_cid(relation, heaptup);
2085 
2086  /*
2087  * If this is the single and first tuple on page, we can reinit the
2088  * page instead of restoring the whole thing. Set flag, and hide
2089  * buffer references from XLogInsert.
2090  */
2091  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
2093  {
2094  info |= XLOG_HEAP_INIT_PAGE;
2095  bufflags |= REGBUF_WILL_INIT;
2096  }
2097 
2098  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
2099  xlrec.flags = 0;
2100  if (all_visible_cleared)
2105 
2106  /*
2107  * For logical decoding, we need the tuple even if we're doing a full
2108  * page write, so make sure it's included even if we take a full-page
2109  * image. (XXX We could alternatively store a pointer into the FPW).
2110  */
2111  if (RelationIsLogicallyLogged(relation) &&
2113  {
2115  bufflags |= REGBUF_KEEP_DATA;
2116 
2117  if (IsToastRelation(relation))
2119  }
2120 
2121  XLogBeginInsert();
2122  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
2123 
2124  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
2125  xlhdr.t_infomask = heaptup->t_data->t_infomask;
2126  xlhdr.t_hoff = heaptup->t_data->t_hoff;
2127 
2128  /*
2129  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
2130  * write the whole page to the xlog, we don't need to store
2131  * xl_heap_header in the xlog.
2132  */
2133  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2134  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
2135  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
2137  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2138  heaptup->t_len - SizeofHeapTupleHeader);
2139 
2140  /* filtering by origin on a row level is much more efficient */
2142 
2143  recptr = XLogInsert(RM_HEAP_ID, info);
2144 
2145  PageSetLSN(page, recptr);
2146  }
2147 
2148  END_CRIT_SECTION();
2149 
2150  UnlockReleaseBuffer(buffer);
2151  if (vmbuffer != InvalidBuffer)
2152  ReleaseBuffer(vmbuffer);
2153 
2154  /*
2155  * If tuple is cachable, mark it for invalidation from the caches in case
2156  * we abort. Note it is OK to do this after releasing the buffer, because
2157  * the heaptup data structure is all in local memory, not in the shared
2158  * buffer.
2159  */
2160  CacheInvalidateHeapTuple(relation, heaptup, NULL);
2161 
2162  /* Note: speculative insertions are counted too, even if aborted later */
2163  pgstat_count_heap_insert(relation, 1);
2164 
2165  /*
2166  * If heaptup is a private copy, release it. Don't forget to copy t_self
2167  * back to the caller's image, too.
2168  */
2169  if (heaptup != tup)
2170  {
2171  tup->t_self = heaptup->t_self;
2172  heap_freetuple(heaptup);
2173  }
2174 }
unsigned char uint8
Definition: c.h:504
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2183
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:38
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:37
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:75
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:73
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:71
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:167
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:74
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:35
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:502
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:701
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:511
OffsetNumber offnum
Definition: heapam_xlog.h:161
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4310 of file heapam.c.

4314 {
4315  TM_Result result;
4316  ItemPointer tid = &(tuple->t_self);
4317  ItemId lp;
4318  Page page;
4319  Buffer vmbuffer = InvalidBuffer;
4320  BlockNumber block;
4321  TransactionId xid,
4322  xmax;
4323  uint16 old_infomask,
4324  new_infomask,
4325  new_infomask2;
4326  bool first_time = true;
4327  bool skip_tuple_lock = false;
4328  bool have_tuple_lock = false;
4329  bool cleared_all_frozen = false;
4330 
4331  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4332  block = ItemPointerGetBlockNumber(tid);
4333 
4334  /*
4335  * Before locking the buffer, pin the visibility map page if it appears to
4336  * be necessary. Since we haven't got the lock yet, someone else might be
4337  * in the middle of changing this, so we'll need to recheck after we have
4338  * the lock.
4339  */
4340  if (PageIsAllVisible(BufferGetPage(*buffer)))
4341  visibilitymap_pin(relation, block, &vmbuffer);
4342 
4344 
4345  page = BufferGetPage(*buffer);
4346  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4347  Assert(ItemIdIsNormal(lp));
4348 
4349  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4350  tuple->t_len = ItemIdGetLength(lp);
4351  tuple->t_tableOid = RelationGetRelid(relation);
4352 
4353 l3:
4354  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4355 
4356  if (result == TM_Invisible)
4357  {
4358  /*
4359  * This is possible, but only when locking a tuple for ON CONFLICT
4360  * UPDATE. We return this value here rather than throwing an error in
4361  * order to give that case the opportunity to throw a more specific
4362  * error.
4363  */
4364  result = TM_Invisible;
4365  goto out_locked;
4366  }
4367  else if (result == TM_BeingModified ||
4368  result == TM_Updated ||
4369  result == TM_Deleted)
4370  {
4371  TransactionId xwait;
4372  uint16 infomask;
4373  uint16 infomask2;
4374  bool require_sleep;
4375  ItemPointerData t_ctid;
4376 
4377  /* must copy state data before unlocking buffer */
4378  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4379  infomask = tuple->t_data->t_infomask;
4380  infomask2 = tuple->t_data->t_infomask2;
4381  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4382 
4383  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4384 
4385  /*
4386  * If any subtransaction of the current top transaction already holds
4387  * a lock as strong as or stronger than what we're requesting, we
4388  * effectively hold the desired lock already. We *must* succeed
4389  * without trying to take the tuple lock, else we will deadlock
4390  * against anyone wanting to acquire a stronger lock.
4391  *
4392  * Note we only do this the first time we loop on the HTSU result;
4393  * there is no point in testing in subsequent passes, because
4394  * evidently our own transaction cannot have acquired a new lock after
4395  * the first time we checked.
4396  */
4397  if (first_time)
4398  {
4399  first_time = false;
4400 
4401  if (infomask & HEAP_XMAX_IS_MULTI)
4402  {
4403  int i;
4404  int nmembers;
4405  MultiXactMember *members;
4406 
4407  /*
4408  * We don't need to allow old multixacts here; if that had
4409  * been the case, HeapTupleSatisfiesUpdate would have returned
4410  * MayBeUpdated and we wouldn't be here.
4411  */
4412  nmembers =
4413  GetMultiXactIdMembers(xwait, &members, false,
4414  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4415 
4416  for (i = 0; i < nmembers; i++)
4417  {
4418  /* only consider members of our own transaction */
4419  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4420  continue;
4421 
4422  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4423  {
4424  pfree(members);
4425  result = TM_Ok;
4426  goto out_unlocked;
4427  }
4428  else
4429  {
4430  /*
4431  * Disable acquisition of the heavyweight tuple lock.
4432  * Otherwise, when promoting a weaker lock, we might
4433  * deadlock with another locker that has acquired the
4434  * heavyweight tuple lock and is waiting for our
4435  * transaction to finish.
4436  *
4437  * Note that in this case we still need to wait for
4438  * the multixact if required, to avoid acquiring
4439  * conflicting locks.
4440  */
4441  skip_tuple_lock = true;
4442  }
4443  }
4444 
4445  if (members)
4446  pfree(members);
4447  }
4448  else if (TransactionIdIsCurrentTransactionId(xwait))
4449  {
4450  switch (mode)
4451  {
4452  case LockTupleKeyShare:
4453  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4454  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4455  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4456  result = TM_Ok;
4457  goto out_unlocked;
4458  case LockTupleShare:
4459  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4460  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4461  {
4462  result = TM_Ok;
4463  goto out_unlocked;
4464  }
4465  break;
4467  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4468  {
4469  result = TM_Ok;
4470  goto out_unlocked;
4471  }
4472  break;
4473  case LockTupleExclusive:
4474  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4475  infomask2 & HEAP_KEYS_UPDATED)
4476  {
4477  result = TM_Ok;
4478  goto out_unlocked;
4479  }
4480  break;
4481  }
4482  }
4483  }
4484 
4485  /*
4486  * Initially assume that we will have to wait for the locking
4487  * transaction(s) to finish. We check various cases below in which
4488  * this can be turned off.
4489  */
4490  require_sleep = true;
4491  if (mode == LockTupleKeyShare)
4492  {
4493  /*
4494  * If we're requesting KeyShare, and there's no update present, we
4495  * don't need to wait. Even if there is an update, we can still
4496  * continue if the key hasn't been modified.
4497  *
4498  * However, if there are updates, we need to walk the update chain
4499  * to mark future versions of the row as locked, too. That way,
4500  * if somebody deletes that future version, we're protected
4501  * against the key going away. This locking of future versions
4502  * could block momentarily, if a concurrent transaction is
4503  * deleting a key; or it could return a value to the effect that
4504  * the transaction deleting the key has already committed. So we
4505  * do this before re-locking the buffer; otherwise this would be
4506  * prone to deadlocks.
4507  *
4508  * Note that the TID we're locking was grabbed before we unlocked
4509  * the buffer. For it to change while we're not looking, the
4510  * other properties we're testing for below after re-locking the
4511  * buffer would also change, in which case we would restart this
4512  * loop above.
4513  */
4514  if (!(infomask2 & HEAP_KEYS_UPDATED))
4515  {
4516  bool updated;
4517 
4518  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4519 
4520  /*
4521  * If there are updates, follow the update chain; bail out if
4522  * that cannot be done.
4523  */
4524  if (follow_updates && updated)
4525  {
4526  TM_Result res;
4527 
4528  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4530  mode);
4531  if (res != TM_Ok)
4532  {
4533  result = res;
4534  /* recovery code expects to have buffer lock held */
4536  goto failed;
4537  }
4538  }
4539 
4541 
4542  /*
4543  * Make sure it's still an appropriate lock, else start over.
4544  * Also, if it wasn't updated before we released the lock, but
4545  * is updated now, we start over too; the reason is that we
4546  * now need to follow the update chain to lock the new
4547  * versions.
4548  */
4549  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4550  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4551  !updated))
4552  goto l3;
4553 
4554  /* Things look okay, so we can skip sleeping */
4555  require_sleep = false;
4556 
4557  /*
4558  * Note we allow Xmax to change here; other updaters/lockers
4559  * could have modified it before we grabbed the buffer lock.
4560  * However, this is not a problem, because with the recheck we
4561  * just did we ensure that they still don't conflict with the
4562  * lock we want.
4563  */
4564  }
4565  }
4566  else if (mode == LockTupleShare)
4567  {
4568  /*
4569  * If we're requesting Share, we can similarly avoid sleeping if
4570  * there's no update and no exclusive lock present.
4571  */
4572  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4573  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4574  {
4576 
4577  /*
4578  * Make sure it's still an appropriate lock, else start over.
4579  * See above about allowing xmax to change.
4580  */
4581  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4583  goto l3;
4584  require_sleep = false;
4585  }
4586  }
4587  else if (mode == LockTupleNoKeyExclusive)
4588  {
4589  /*
4590  * If we're requesting NoKeyExclusive, we might also be able to
4591  * avoid sleeping; just ensure that there no conflicting lock
4592  * already acquired.
4593  */
4594  if (infomask & HEAP_XMAX_IS_MULTI)
4595  {
4596  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4597  mode, NULL))
4598  {
4599  /*
4600  * No conflict, but if the xmax changed under us in the
4601  * meantime, start over.
4602  */
4604  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4606  xwait))
4607  goto l3;
4608 
4609  /* otherwise, we're good */
4610  require_sleep = false;
4611  }
4612  }
4613  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4614  {
4616 
4617  /* if the xmax changed in the meantime, start over */
4618  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4620  xwait))
4621  goto l3;
4622  /* otherwise, we're good */
4623  require_sleep = false;
4624  }
4625  }
4626 
4627  /*
4628  * As a check independent from those above, we can also avoid sleeping
4629  * if the current transaction is the sole locker of the tuple. Note
4630  * that the strength of the lock already held is irrelevant; this is
4631  * not about recording the lock in Xmax (which will be done regardless
4632  * of this optimization, below). Also, note that the cases where we
4633  * hold a lock stronger than we are requesting are already handled
4634  * above by not doing anything.
4635  *
4636  * Note we only deal with the non-multixact case here; MultiXactIdWait
4637  * is well equipped to deal with this situation on its own.
4638  */
4639  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4641  {
4642  /* ... but if the xmax changed in the meantime, start over */
4644  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4646  xwait))
4647  goto l3;
4649  require_sleep = false;
4650  }
4651 
4652  /*
4653  * Time to sleep on the other transaction/multixact, if necessary.
4654  *
4655  * If the other transaction is an update/delete that's already
4656  * committed, then sleeping cannot possibly do any good: if we're
4657  * required to sleep, get out to raise an error instead.
4658  *
4659  * By here, we either have already acquired the buffer exclusive lock,
4660  * or we must wait for the locking transaction or multixact; so below
4661  * we ensure that we grab buffer lock after the sleep.
4662  */
4663  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4664  {
4666  goto failed;
4667  }
4668  else if (require_sleep)
4669  {
4670  /*
4671  * Acquire tuple lock to establish our priority for the tuple, or
4672  * die trying. LockTuple will release us when we are next-in-line
4673  * for the tuple. We must do this even if we are share-locking,
4674  * but not if we already have a weaker lock on the tuple.
4675  *
4676  * If we are forced to "start over" below, we keep the tuple lock;
4677  * this arranges that we stay at the head of the line while
4678  * rechecking tuple state.
4679  */
4680  if (!skip_tuple_lock &&
4681  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4682  &have_tuple_lock))
4683  {
4684  /*
4685  * This can only happen if wait_policy is Skip and the lock
4686  * couldn't be obtained.
4687  */
4688  result = TM_WouldBlock;
4689  /* recovery code expects to have buffer lock held */
4691  goto failed;
4692  }
4693 
4694  if (infomask & HEAP_XMAX_IS_MULTI)
4695  {
4697 
4698  /* We only ever lock tuples, never update them */
4699  if (status >= MultiXactStatusNoKeyUpdate)
4700  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4701 
4702  /* wait for multixact to end, or die trying */
4703  switch (wait_policy)
4704  {
4705  case LockWaitBlock:
4706  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4707  relation, &tuple->t_self, XLTW_Lock, NULL);
4708  break;
4709  case LockWaitSkip:
4711  status, infomask, relation,
4712  NULL))
4713  {
4714  result = TM_WouldBlock;
4715  /* recovery code expects to have buffer lock held */
4717  goto failed;
4718  }
4719  break;
4720  case LockWaitError:
4722  status, infomask, relation,
4723  NULL))
4724  ereport(ERROR,
4725  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4726  errmsg("could not obtain lock on row in relation \"%s\"",
4727  RelationGetRelationName(relation))));
4728 
4729  break;
4730  }
4731 
4732  /*
4733  * Of course, the multixact might not be done here: if we're
4734  * requesting a light lock mode, other transactions with light
4735  * locks could still be alive, as well as locks owned by our
4736  * own xact or other subxacts of this backend. We need to
4737  * preserve the surviving MultiXact members. Note that it
4738  * isn't absolutely necessary in the latter case, but doing so
4739  * is simpler.
4740  */
4741  }
4742  else
4743  {
4744  /* wait for regular transaction to end, or die trying */
4745  switch (wait_policy)
4746  {
4747  case LockWaitBlock:
4748  XactLockTableWait(xwait, relation, &tuple->t_self,
4749  XLTW_Lock);
4750  break;
4751  case LockWaitSkip:
4752  if (!ConditionalXactLockTableWait(xwait))
4753  {
4754  result = TM_WouldBlock;
4755  /* recovery code expects to have buffer lock held */
4757  goto failed;
4758  }
4759  break;
4760  case LockWaitError:
4761  if (!ConditionalXactLockTableWait(xwait))
4762  ereport(ERROR,
4763  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4764  errmsg("could not obtain lock on row in relation \"%s\"",
4765  RelationGetRelationName(relation))));
4766  break;
4767  }
4768  }
4769 
4770  /* if there are updates, follow the update chain */
4771  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4772  {
4773  TM_Result res;
4774 
4775  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4777  mode);
4778  if (res != TM_Ok)
4779  {
4780  result = res;
4781  /* recovery code expects to have buffer lock held */
4783  goto failed;
4784  }
4785  }
4786 
4788 
4789  /*
4790  * xwait is done, but if xwait had just locked the tuple then some
4791  * other xact could update this tuple before we get to this point.
4792  * Check for xmax change, and start over if so.
4793  */
4794  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4796  xwait))
4797  goto l3;
4798 
4799  if (!(infomask & HEAP_XMAX_IS_MULTI))
4800  {
4801  /*
4802  * Otherwise check if it committed or aborted. Note we cannot
4803  * be here if the tuple was only locked by somebody who didn't
4804  * conflict with us; that would have been handled above. So
4805  * that transaction must necessarily be gone by now. But
4806  * don't check for this in the multixact case, because some
4807  * locker transactions might still be running.
4808  */
4809  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4810  }
4811  }
4812 
4813  /* By here, we're certain that we hold buffer exclusive lock again */
4814 
4815  /*
4816  * We may lock if previous xmax aborted, or if it committed but only
4817  * locked the tuple without updating it; or if we didn't have to wait
4818  * at all for whatever reason.
4819  */
4820  if (!require_sleep ||
4821  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4824  result = TM_Ok;
4825  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4826  result = TM_Updated;
4827  else
4828  result = TM_Deleted;
4829  }
4830 
4831 failed:
4832  if (result != TM_Ok)
4833  {
4834  Assert(result == TM_SelfModified || result == TM_Updated ||
4835  result == TM_Deleted || result == TM_WouldBlock);
4836 
4837  /*
4838  * When locking a tuple under LockWaitSkip semantics and we fail with
4839  * TM_WouldBlock above, it's possible for concurrent transactions to
4840  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4841  * this assert is slightly different from the equivalent one in
4842  * heap_delete and heap_update.
4843  */
4844  Assert((result == TM_WouldBlock) ||
4845  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4846  Assert(result != TM_Updated ||
4847  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4848  tmfd->ctid = tuple->t_data->t_ctid;
4849  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4850  if (result == TM_SelfModified)
4851  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4852  else
4853  tmfd->cmax = InvalidCommandId;
4854  goto out_locked;
4855  }
4856 
4857  /*
4858  * If we didn't pin the visibility map page and the page has become all
4859  * visible while we were busy locking the buffer, or during some
4860  * subsequent window during which we had it unlocked, we'll have to unlock
4861  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4862  * unfortunate, especially since we'll now have to recheck whether the
4863  * tuple has been locked or updated under us, but hopefully it won't
4864  * happen very often.
4865  */
4866  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4867  {
4868  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4869  visibilitymap_pin(relation, block, &vmbuffer);
4871  goto l3;
4872  }
4873 
4874  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4875  old_infomask = tuple->t_data->t_infomask;
4876 
4877  /*
4878  * If this is the first possibly-multixact-able operation in the current
4879  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4880  * certain that the transaction will never become a member of any older
4881  * MultiXactIds than that. (We have to do this even if we end up just
4882  * using our own TransactionId below, since some other backend could
4883  * incorporate our XID into a MultiXact immediately afterwards.)
4884  */
4886 
4887  /*
4888  * Compute the new xmax and infomask to store into the tuple. Note we do
4889  * not modify the tuple just yet, because that would leave it in the wrong
4890  * state if multixact.c elogs.
4891  */
4892  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4893  GetCurrentTransactionId(), mode, false,
4894  &xid, &new_infomask, &new_infomask2);
4895 
4897 
4898  /*
4899  * Store transaction information of xact locking the tuple.
4900  *
4901  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4902  * possibly generating a useless combo CID. Moreover, if we're locking a
4903  * previously updated tuple, it's important to preserve the Cmax.
4904  *
4905  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4906  * we would break the HOT chain.
4907  */
4908  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4909  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4910  tuple->t_data->t_infomask |= new_infomask;
4911  tuple->t_data->t_infomask2 |= new_infomask2;
4912  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4914  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4915 
4916  /*
4917  * Make sure there is no forward chain link in t_ctid. Note that in the
4918  * cases where the tuple has been updated, we must not overwrite t_ctid,
4919  * because it was set by the updater. Moreover, if the tuple has been
4920  * updated, we need to follow the update chain to lock the new versions of
4921  * the tuple as well.
4922  */
4923  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4924  tuple->t_data->t_ctid = *tid;
4925 
4926  /* Clear only the all-frozen bit on visibility map if needed */
4927  if (PageIsAllVisible(page) &&
4928  visibilitymap_clear(relation, block, vmbuffer,
4930  cleared_all_frozen = true;
4931 
4932 
4933  MarkBufferDirty(*buffer);
4934 
4935  /*
4936  * XLOG stuff. You might think that we don't need an XLOG record because
4937  * there is no state change worth restoring after a crash. You would be
4938  * wrong however: we have just written either a TransactionId or a
4939  * MultiXactId that may never have been seen on disk before, and we need
4940  * to make sure that there are XLOG entries covering those ID numbers.
4941  * Else the same IDs might be re-used after a crash, which would be
4942  * disastrous if this page made it to disk before the crash. Essentially
4943  * we have to enforce the WAL log-before-data rule even in this case.
4944  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4945  * entries for everything anyway.)
4946  */
4947  if (RelationNeedsWAL(relation))
4948  {
4949  xl_heap_lock xlrec;
4950  XLogRecPtr recptr;
4951 
4952  XLogBeginInsert();
4953  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4954 
4955  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4956  xlrec.xmax = xid;
4957  xlrec.infobits_set = compute_infobits(new_infomask,
4958  tuple->t_data->t_infomask2);
4959  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4960  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4961 
4962  /* we don't decode row locks atm, so no need to log the origin */
4963 
4964  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4965 
4966  PageSetLSN(page, recptr);
4967  }
4968 
4969  END_CRIT_SECTION();
4970 
4971  result = TM_Ok;
4972 
4973 out_locked:
4974  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4975 
4976 out_unlocked:
4977  if (BufferIsValid(vmbuffer))
4978  ReleaseBuffer(vmbuffer);
4979 
4980  /*
4981  * Don't update the visibility map here. Locking a tuple doesn't change
4982  * visibility info.
4983  */
4984 
4985  /*
4986  * Now that we have successfully marked the tuple as locked, we can
4987  * release the lmgr tuple lock, if we had it.
4988  */
4989  if (have_tuple_lock)
4990  UnlockTupleTuplock(relation, tid, mode);
4991 
4992  return result;
4993 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:219
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5774
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7314
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4262
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:392
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:403
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:730
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1299
MultiXactStatus
Definition: multixact.h:38
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:44
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:539
uint8 infobits_set
Definition: heapam_xlog.h:399
OffsetNumber offnum
Definition: heapam_xlog.h:398
TransactionId xmax
Definition: heapam_xlog.h:397
@ TM_WouldBlock
Definition: tableam.h:110
#define VISIBILITYMAP_ALL_FROZEN

References Assert, BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog, END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2263 of file heapam.c.

2265 {
2267  HeapTuple *heaptuples;
2268  int i;
2269  int ndone;
2270  PGAlignedBlock scratch;
2271  Page page;
2272  Buffer vmbuffer = InvalidBuffer;
2273  bool needwal;
2274  Size saveFreeSpace;
2275  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2276  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2277  bool starting_with_empty_page = false;
2278  int npages = 0;
2279  int npages_used = 0;
2280 
2281  /* currently not needed (thus unsupported) for heap_multi_insert() */
2283 
2284  needwal = RelationNeedsWAL(relation);
2285  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2287 
2288  /* Toast and set header data in all the slots */
2289  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2290  for (i = 0; i < ntuples; i++)
2291  {
2292  HeapTuple tuple;
2293 
2294  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2295  slots[i]->tts_tableOid = RelationGetRelid(relation);
2296  tuple->t_tableOid = slots[i]->tts_tableOid;
2297  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2298  options);
2299  }
2300 
2301  /*
2302  * We're about to do the actual inserts -- but check for conflict first,
2303  * to minimize the possibility of having to roll back work we've just
2304  * done.
2305  *
2306  * A check here does not definitively prevent a serialization anomaly;
2307  * that check MUST be done at least past the point of acquiring an
2308  * exclusive buffer content lock on every buffer that will be affected,
2309  * and MAY be done after all inserts are reflected in the buffers and
2310  * those locks are released; otherwise there is a race condition. Since
2311  * multiple buffers can be locked and unlocked in the loop below, and it
2312  * would not be feasible to identify and lock all of those buffers before
2313  * the loop, we must do a final check at the end.
2314  *
2315  * The check here could be omitted with no loss of correctness; it is
2316  * present strictly as an optimization.
2317  *
2318  * For heap inserts, we only need to check for table-level SSI locks. Our
2319  * new tuples can't possibly conflict with existing tuple locks, and heap
2320  * page locks are only consolidated versions of tuple locks; they do not
2321  * lock "gaps" as index page locks do. So we don't need to specify a
2322  * buffer when making the call, which makes for a faster check.
2323  */
2325 
2326  ndone = 0;
2327  while (ndone < ntuples)
2328  {
2329  Buffer buffer;
2330  bool all_visible_cleared = false;
2331  bool all_frozen_set = false;
2332  int nthispage;
2333 
2335 
2336  /*
2337  * Compute number of pages needed to fit the to-be-inserted tuples in
2338  * the worst case. This will be used to determine how much to extend
2339  * the relation by in RelationGetBufferForTuple(), if needed. If we
2340  * filled a prior page from scratch, we can just update our last
2341  * computation, but if we started with a partially filled page,
2342  * recompute from scratch, the number of potentially required pages
2343  * can vary due to tuples needing to fit onto the page, page headers
2344  * etc.
2345  */
2346  if (ndone == 0 || !starting_with_empty_page)
2347  {
2348  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2349  saveFreeSpace);
2350  npages_used = 0;
2351  }
2352  else
2353  npages_used++;
2354 
2355  /*
2356  * Find buffer where at least the next tuple will fit. If the page is
2357  * all-visible, this will also pin the requisite visibility map page.
2358  *
2359  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2360  * empty page. See all_frozen_set below.
2361  */
2362  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2363  InvalidBuffer, options, bistate,
2364  &vmbuffer, NULL,
2365  npages - npages_used);
2366  page = BufferGetPage(buffer);
2367 
2368  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2369 
2370  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2371  all_frozen_set = true;
2372 
2373  /* NO EREPORT(ERROR) from here till changes are logged */
2375 
2376  /*
2377  * RelationGetBufferForTuple has ensured that the first tuple fits.
2378  * Put that on the page, and then as many other tuples as fit.
2379  */
2380  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2381 
2382  /*
2383  * For logical decoding we need combo CIDs to properly decode the
2384  * catalog.
2385  */
2386  if (needwal && need_cids)
2387  log_heap_new_cid(relation, heaptuples[ndone]);
2388 
2389  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2390  {
2391  HeapTuple heaptup = heaptuples[ndone + nthispage];
2392 
2393  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2394  break;
2395 
2396  RelationPutHeapTuple(relation, buffer, heaptup, false);
2397 
2398  /*
2399  * For logical decoding we need combo CIDs to properly decode the
2400  * catalog.
2401  */
2402  if (needwal && need_cids)
2403  log_heap_new_cid(relation, heaptup);
2404  }
2405 
2406  /*
2407  * If the page is all visible, need to clear that, unless we're only
2408  * going to add further frozen rows to it.
2409  *
2410  * If we're only adding already frozen rows to a previously empty
2411  * page, mark it as all-visible.
2412  */
2413  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2414  {
2415  all_visible_cleared = true;
2416  PageClearAllVisible(page);
2417  visibilitymap_clear(relation,
2418  BufferGetBlockNumber(buffer),
2419  vmbuffer, VISIBILITYMAP_VALID_BITS);
2420  }
2421  else if (all_frozen_set)
2422  PageSetAllVisible(page);
2423 
2424  /*
2425  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2426  */
2427 
2428  MarkBufferDirty(buffer);
2429 
2430  /* XLOG stuff */
2431  if (needwal)
2432  {
2433  XLogRecPtr recptr;
2434  xl_heap_multi_insert *xlrec;
2436  char *tupledata;
2437  int totaldatalen;
2438  char *scratchptr = scratch.data;
2439  bool init;
2440  int bufflags = 0;
2441 
2442  /*
2443  * If the page was previously empty, we can reinit the page
2444  * instead of restoring the whole thing.
2445  */
2446  init = starting_with_empty_page;
2447 
2448  /* allocate xl_heap_multi_insert struct from the scratch area */
2449  xlrec = (xl_heap_multi_insert *) scratchptr;
2450  scratchptr += SizeOfHeapMultiInsert;
2451 
2452  /*
2453  * Allocate offsets array. Unless we're reinitializing the page,
2454  * in that case the tuples are stored in order starting at
2455  * FirstOffsetNumber and we don't need to store the offsets
2456  * explicitly.
2457  */
2458  if (!init)
2459  scratchptr += nthispage * sizeof(OffsetNumber);
2460 
2461  /* the rest of the scratch space is used for tuple data */
2462  tupledata = scratchptr;
2463 
2464  /* check that the mutually exclusive flags are not both set */
2465  Assert(!(all_visible_cleared && all_frozen_set));
2466 
2467  xlrec->flags = 0;
2468  if (all_visible_cleared)
2470  if (all_frozen_set)
2472 
2473  xlrec->ntuples = nthispage;
2474 
2475  /*
2476  * Write out an xl_multi_insert_tuple and the tuple data itself
2477  * for each tuple.
2478  */
2479  for (i = 0; i < nthispage; i++)
2480  {
2481  HeapTuple heaptup = heaptuples[ndone + i];
2482  xl_multi_insert_tuple *tuphdr;
2483  int datalen;
2484 
2485  if (!init)
2486  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2487  /* xl_multi_insert_tuple needs two-byte alignment. */
2488  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2489  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2490 
2491  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2492  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2493  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2494 
2495  /* write bitmap [+ padding] [+ oid] + data */
2496  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2497  memcpy(scratchptr,
2498  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2499  datalen);
2500  tuphdr->datalen = datalen;
2501  scratchptr += datalen;
2502  }
2503  totaldatalen = scratchptr - tupledata;
2504  Assert((scratchptr - scratch.data) < BLCKSZ);
2505 
2506  if (need_tuple_data)
2508 
2509  /*
2510  * Signal that this is the last xl_heap_multi_insert record
2511  * emitted by this call to heap_multi_insert(). Needed for logical
2512  * decoding so it knows when to cleanup temporary data.
2513  */
2514  if (ndone + nthispage == ntuples)
2515  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2516 
2517  if (init)
2518  {
2519  info |= XLOG_HEAP_INIT_PAGE;
2520  bufflags |= REGBUF_WILL_INIT;
2521  }
2522 
2523  /*
2524  * If we're doing logical decoding, include the new tuple data
2525  * even if we take a full-page image of the page.
2526  */
2527  if (need_tuple_data)
2528  bufflags |= REGBUF_KEEP_DATA;
2529 
2530  XLogBeginInsert();
2531  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2532  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2533 
2534  XLogRegisterBufData(0, tupledata, totaldatalen);
2535 
2536  /* filtering by origin on a row level is much more efficient */
2538 
2539  recptr = XLogInsert(RM_HEAP2_ID, info);
2540 
2541  PageSetLSN(page, recptr);
2542  }
2543 
2544  END_CRIT_SECTION();
2545 
2546  /*
2547  * If we've frozen everything on the page, update the visibilitymap.
2548  * We're already holding pin on the vmbuffer.
2549  */
2550  if (all_frozen_set)
2551  {
2552  Assert(PageIsAllVisible(page));
2553  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2554 
2555  /*
2556  * It's fine to use InvalidTransactionId here - this is only used
2557  * when HEAP_INSERT_FROZEN is specified, which intentionally
2558  * violates visibility rules.
2559  */
2560  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2561  InvalidXLogRecPtr, vmbuffer,
2564  }
2565 
2566  UnlockReleaseBuffer(buffer);
2567  ndone += nthispage;
2568 
2569  /*
2570  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2571  * likely that we'll insert into subsequent heap pages that are likely
2572  * to use the same vm page.
2573  */
2574  }
2575 
2576  /* We're done with inserting all tuples, so release the last vmbuffer. */
2577  if (vmbuffer != InvalidBuffer)
2578  ReleaseBuffer(vmbuffer);
2579 
2580  /*
2581  * We're done with the actual inserts. Check for conflicts again, to
2582  * ensure that all rw-conflicts in to these inserts are detected. Without
2583  * this final check, a sequential scan of the heap may have locked the
2584  * table after the "before" check, missing one opportunity to detect the
2585  * conflict, and then scanned the table before the new tuples were there,
2586  * missing the other chance to detect the conflict.
2587  *
2588  * For heap inserts, we only need to check for table-level SSI locks. Our
2589  * new tuples can't possibly conflict with existing tuple locks, and heap
2590  * page locks are only consolidated versions of tuple locks; they do not
2591  * lock "gaps" as index page locks do. So we don't need to specify a
2592  * buffer when making the call.
2593  */
2595 
2596  /*
2597  * If tuples are cachable, mark them for invalidation from the caches in
2598  * case we abort. Note it is OK to do this after releasing the buffer,
2599  * because the heaptuples data structure is all in local memory, not in
2600  * the shared buffer.
2601  */
2602  if (IsCatalogRelation(relation))
2603  {
2604  for (i = 0; i < ntuples; i++)
2605  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2606  }
2607 
2608  /* copy t_self fields back to the caller's slots */
2609  for (i = 0; i < ntuples; i++)
2610  slots[i]->tts_tid = heaptuples[i]->t_self;
2611 
2612  pgstat_count_heap_insert(relation, ntuples);
2613 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:434
#define MAXALIGN(LEN)
Definition: c.h:811
#define SHORTALIGN(LEN)
Definition: c.h:807
size_t Size
Definition: c.h:605
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1731
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2231
#define HEAP_INSERT_FROZEN
Definition: heapam.h:36
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:187
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:63
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:72
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:78
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:198
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:378
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:349
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:184
char data[BLCKSZ]
Definition: c.h:1119
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune_and_freeze()

void heap_page_prune_and_freeze ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
int  options,
struct VacuumCutoffs cutoffs,
PruneFreezeResult presult,
PruneReason  reason,
OffsetNumber off_loc,
TransactionId new_relfrozen_xid,
MultiXactId new_relmin_mxid 
)

Definition at line 350 of file pruneheap.c.

359 {
360  Page page = BufferGetPage(buffer);
361  BlockNumber blockno = BufferGetBlockNumber(buffer);
362  OffsetNumber offnum,
363  maxoff;
364  PruneState prstate;
365  HeapTupleData tup;
366  bool do_freeze;
367  bool do_prune;
368  bool do_hint;
369  bool hint_bit_fpi;
370  int64 fpi_before = pgWalUsage.wal_fpi;
371 
372  /* Copy parameters to prstate */
373  prstate.vistest = vistest;
375  prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
376  prstate.cutoffs = cutoffs;
377 
378  /*
379  * Our strategy is to scan the page and make lists of items to change,
380  * then apply the changes within a critical section. This keeps as much
381  * logic as possible out of the critical section, and also ensures that
382  * WAL replay will work the same as the normal case.
383  *
384  * First, initialize the new pd_prune_xid value to zero (indicating no
385  * prunable tuples). If we find any tuples which may soon become
386  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
387  * initialize the rest of our working state.
388  */
391  prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
392  prstate.nroot_items = 0;
393  prstate.nheaponly_items = 0;
394 
395  /* initialize page freezing working state */
396  prstate.pagefrz.freeze_required = false;
397  if (prstate.freeze)
398  {
399  Assert(new_relfrozen_xid && new_relmin_mxid);
400  prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
401  prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
402  prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
403  prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
404  }
405  else
406  {
407  Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
412  }
413 
414  prstate.ndeleted = 0;
415  prstate.live_tuples = 0;
416  prstate.recently_dead_tuples = 0;
417  prstate.hastup = false;
418  prstate.lpdead_items = 0;
419  prstate.deadoffsets = presult->deadoffsets;
420 
421  /*
422  * Caller may update the VM after we're done. We can keep track of
423  * whether the page will be all-visible and all-frozen after pruning and
424  * freezing to help the caller to do that.
425  *
426  * Currently, only VACUUM sets the VM bits. To save the effort, only do
427  * the bookkeeping if the caller needs it. Currently, that's tied to
428  * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
429  * to update the VM bits without also freezing or freeze without also
430  * setting the VM bits.
431  *
432  * In addition to telling the caller whether it can set the VM bit, we
433  * also use 'all_visible' and 'all_frozen' for our own decision-making. If
434  * the whole page would become frozen, we consider opportunistically
435  * freezing tuples. We will not be able to freeze the whole page if there
436  * are tuples present that are not visible to everyone or if there are
437  * dead tuples which are not yet removable. However, dead tuples which
438  * will be removed by the end of vacuuming should not preclude us from
439  * opportunistically freezing. Because of that, we do not clear
440  * all_visible when we see LP_DEAD items. We fix that at the end of the
441  * function, when we return the value to the caller, so that the caller
442  * doesn't set the VM bit incorrectly.
443  */
444  if (prstate.freeze)
445  {
446  prstate.all_visible = true;
447  prstate.all_frozen = true;
448  }
449  else
450  {
451  /*
452  * Initializing to false allows skipping the work to update them in
453  * heap_prune_record_unchanged_lp_normal().
454  */
455  prstate.all_visible = false;
456  prstate.all_frozen = false;
457  }
458 
459  /*
460  * The visibility cutoff xid is the newest xmin of live tuples on the
461  * page. In the common case, this will be set as the conflict horizon the
462  * caller can use for updating the VM. If, at the end of freezing and
463  * pruning, the page is all-frozen, there is no possibility that any
464  * running transaction on the standby does not see tuples on the page as
465  * all-visible, so the conflict horizon remains InvalidTransactionId.
466  */
468 
469  maxoff = PageGetMaxOffsetNumber(page);
470  tup.t_tableOid = RelationGetRelid(relation);
471 
472  /*
473  * Determine HTSV for all tuples, and queue them up for processing as HOT
474  * chain roots or as heap-only items.
475  *
476  * Determining HTSV only once for each tuple is required for correctness,
477  * to deal with cases where running HTSV twice could result in different
478  * results. For example, RECENTLY_DEAD can turn to DEAD if another
479  * checked item causes GlobalVisTestIsRemovableFullXid() to update the
480  * horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
481  * transaction aborts.
482  *
483  * It's also good for performance. Most commonly tuples within a page are
484  * stored at decreasing offsets (while the items are stored at increasing
485  * offsets). When processing all tuples on a page this leads to reading
486  * memory at decreasing offsets within a page, with a variable stride.
487  * That's hard for CPU prefetchers to deal with. Processing the items in
488  * reverse order (and thus the tuples in increasing order) increases
489  * prefetching efficiency significantly / decreases the number of cache
490  * misses.
491  */
492  for (offnum = maxoff;
493  offnum >= FirstOffsetNumber;
494  offnum = OffsetNumberPrev(offnum))
495  {
496  ItemId itemid = PageGetItemId(page, offnum);
497  HeapTupleHeader htup;
498 
499  /*
500  * Set the offset number so that we can display it along with any
501  * error that occurred while processing this tuple.
502  */
503  *off_loc = offnum;
504 
505  prstate.processed[offnum] = false;
506  prstate.htsv[offnum] = -1;
507 
508  /* Nothing to do if slot doesn't contain a tuple */
509  if (!ItemIdIsUsed(itemid))
510  {
511  heap_prune_record_unchanged_lp_unused(page, &prstate, offnum);
512  continue;
513  }
514 
515  if (ItemIdIsDead(itemid))
516  {
517  /*
518  * If the caller set mark_unused_now true, we can set dead line
519  * pointers LP_UNUSED now.
520  */
521  if (unlikely(prstate.mark_unused_now))
522  heap_prune_record_unused(&prstate, offnum, false);
523  else
524  heap_prune_record_unchanged_lp_dead(page, &prstate, offnum);
525  continue;
526  }
527 
528  if (ItemIdIsRedirected(itemid))
529  {
530  /* This is the start of a HOT chain */
531  prstate.root_items[prstate.nroot_items++] = offnum;
532  continue;
533  }
534 
535  Assert(ItemIdIsNormal(itemid));
536 
537  /*
538  * Get the tuple's visibility status and queue it up for processing.
539  */
540  htup = (HeapTupleHeader) PageGetItem(page, itemid);
541  tup.t_data = htup;
542  tup.t_len = ItemIdGetLength(itemid);
543  ItemPointerSet(&tup.t_self, blockno, offnum);
544 
545  prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
546  buffer);
547 
548  if (!HeapTupleHeaderIsHeapOnly(htup))
549  prstate.root_items[prstate.nroot_items++] = offnum;
550  else
551  prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
552  }
553 
554  /*
555  * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
556  * an FPI to be emitted.
557  */
558  hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
559 
560  /*
561  * Process HOT chains.
562  *
563  * We added the items to the array starting from 'maxoff', so by
564  * processing the array in reverse order, we process the items in
565  * ascending offset number order. The order doesn't matter for
566  * correctness, but some quick micro-benchmarking suggests that this is
567  * faster. (Earlier PostgreSQL versions, which scanned all the items on
568  * the page instead of using the root_items array, also did it in
569  * ascending offset number order.)
570  */
571  for (int i = prstate.nroot_items - 1; i >= 0; i--)
572  {
573  offnum = prstate.root_items[i];
574 
575  /* Ignore items already processed as part of an earlier chain */
576  if (prstate.processed[offnum])
577  continue;
578 
579  /* see preceding loop */
580  *off_loc = offnum;
581 
582  /* Process this item or chain of items */
583  heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
584  }
585 
586  /*
587  * Process any heap-only tuples that were not already processed as part of
588  * a HOT chain.
589  */
590  for (int i = prstate.nheaponly_items - 1; i >= 0; i--)
591  {
592  offnum = prstate.heaponly_items[i];
593 
594  if (prstate.processed[offnum])
595  continue;
596 
597  /* see preceding loop */
598  *off_loc = offnum;
599 
600  /*
601  * If the tuple is DEAD and doesn't chain to anything else, mark it
602  * unused. (If it does chain, we can only remove it as part of
603  * pruning its chain.)
604  *
605  * We need this primarily to handle aborted HOT updates, that is,
606  * XMIN_INVALID heap-only tuples. Those might not be linked to by any
607  * chain, since the parent tuple might be re-updated before any
608  * pruning occurs. So we have to be able to reap them separately from
609  * chain-pruning. (Note that HeapTupleHeaderIsHotUpdated will never
610  * return true for an XMIN_INVALID tuple, so this code will work even
611  * when there were sequential updates within the aborted transaction.)
612  */
613  if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
614  {
615  ItemId itemid = PageGetItemId(page, offnum);
616  HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
617 
619  {
621  &prstate.latest_xid_removed);
622  heap_prune_record_unused(&prstate, offnum, true);
623  }
624  else
625  {
626  /*
627  * This tuple should've been processed and removed as part of
628  * a HOT chain, so something's wrong. To preserve evidence,
629  * we don't dare to remove it. We cannot leave behind a DEAD
630  * tuple either, because that will cause VACUUM to error out.
631  * Throwing an error with a distinct error message seems like
632  * the least bad option.
633  */
634  elog(ERROR, "dead heap-only tuple (%u, %d) is not linked to from any HOT chain",
635  blockno, offnum);
636  }
637  }
638  else
639  heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
640  }
641 
642  /* We should now have processed every tuple exactly once */
643 #ifdef USE_ASSERT_CHECKING
644  for (offnum = FirstOffsetNumber;
645  offnum <= maxoff;
646  offnum = OffsetNumberNext(offnum))
647  {
648  *off_loc = offnum;
649 
650  Assert(prstate.processed[offnum]);
651  }
652 #endif
653 
654  /* Clear the offset information once we have processed the given page. */
655  *off_loc = InvalidOffsetNumber;
656 
657  do_prune = prstate.nredirected > 0 ||
658  prstate.ndead > 0 ||
659  prstate.nunused > 0;
660 
661  /*
662  * Even if we don't prune anything, if we found a new value for the
663  * pd_prune_xid field or the page was marked full, we will update the hint
664  * bit.
665  */
666  do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
667  PageIsFull(page);
668 
669  /*
670  * Decide if we want to go ahead with freezing according to the freeze
671  * plans we prepared, or not.
672  */
673  do_freeze = false;
674  if (prstate.freeze)
675  {
676  if (prstate.pagefrz.freeze_required)
677  {
678  /*
679  * heap_prepare_freeze_tuple indicated that at least one XID/MXID
680  * from before FreezeLimit/MultiXactCutoff is present. Must
681  * freeze to advance relfrozenxid/relminmxid.
682  */
683  do_freeze = true;
684  }
685  else
686  {
687  /*
688  * Opportunistically freeze the page if we are generating an FPI
689  * anyway and if doing so means that we can set the page
690  * all-frozen afterwards (might not happen until VACUUM's final
691  * heap pass).
692  *
693  * XXX: Previously, we knew if pruning emitted an FPI by checking
694  * pgWalUsage.wal_fpi before and after pruning. Once the freeze
695  * and prune records were combined, this heuristic couldn't be
696  * used anymore. The opportunistic freeze heuristic must be
697  * improved; however, for now, try to approximate the old logic.
698  */
699  if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
700  {
701  /*
702  * Freezing would make the page all-frozen. Have already
703  * emitted an FPI or will do so anyway?
704  */
705  if (RelationNeedsWAL(relation))
706  {
707  if (hint_bit_fpi)
708  do_freeze = true;
709  else if (do_prune)
710  {
711  if (XLogCheckBufferNeedsBackup(buffer))
712  do_freeze = true;
713  }
714  else if (do_hint)
715  {
717  do_freeze = true;
718  }
719  }
720  }
721  }
722  }
723 
724  if (do_freeze)
725  {
726  /*
727  * Validate the tuples we will be freezing before entering the
728  * critical section.
729  */
730  heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
731  }
732  else if (prstate.nfrozen > 0)
733  {
734  /*
735  * The page contained some tuples that were not already frozen, and we
736  * chose not to freeze them now. The page won't be all-frozen then.
737  */
738  Assert(!prstate.pagefrz.freeze_required);
739 
740  prstate.all_frozen = false;
741  prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
742  }
743  else
744  {
745  /*
746  * We have no freeze plans to execute. The page might already be
747  * all-frozen (perhaps only following pruning), though. Such pages
748  * can be marked all-frozen in the VM by our caller, even though none
749  * of its tuples were newly frozen here.
750  */
751  }
752 
753  /* Any error while applying the changes is critical */
755 
756  if (do_hint)
757  {
758  /*
759  * Update the page's pd_prune_xid field to either zero, or the lowest
760  * XID of any soon-prunable tuple.
761  */
762  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
763 
764  /*
765  * Also clear the "page is full" flag, since there's no point in
766  * repeating the prune/defrag process until something else happens to
767  * the page.
768  */
769  PageClearFull(page);
770 
771  /*
772  * If that's all we had to do to the page, this is a non-WAL-logged
773  * hint. If we are going to freeze or prune the page, we will mark
774  * the buffer dirty below.
775  */
776  if (!do_freeze && !do_prune)
777  MarkBufferDirtyHint(buffer, true);
778  }
779 
780  if (do_prune || do_freeze)
781  {
782  /* Apply the planned item changes and repair page fragmentation. */
783  if (do_prune)
784  {
785  heap_page_prune_execute(buffer, false,
786  prstate.redirected, prstate.nredirected,
787  prstate.nowdead, prstate.ndead,
788  prstate.nowunused, prstate.nunused);
789  }
790 
791  if (do_freeze)
792  heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
793 
794  MarkBufferDirty(buffer);
795 
796  /*
797  * Emit a WAL XLOG_HEAP2_PRUNE_FREEZE record showing what we did
798  */
799  if (RelationNeedsWAL(relation))
800  {
801  /*
802  * The snapshotConflictHorizon for the whole record should be the
803  * most conservative of all the horizons calculated for any of the
804  * possible modifications. If this record will prune tuples, any
805  * transactions on the standby older than the youngest xmax of the
806  * most recently removed tuple this record will prune will
807  * conflict. If this record will freeze tuples, any transactions
808  * on the standby with xids older than the youngest tuple this
809  * record will freeze will conflict.
810  */
811  TransactionId frz_conflict_horizon = InvalidTransactionId;
812  TransactionId conflict_xid;
813 
814  /*
815  * We can use the visibility_cutoff_xid as our cutoff for
816  * conflicts when the whole page is eligible to become all-frozen
817  * in the VM once we're done with it. Otherwise we generate a
818  * conservative cutoff by stepping back from OldestXmin.
819  */
820  if (do_freeze)
821  {
822  if (prstate.all_visible && prstate.all_frozen)
823  frz_conflict_horizon = prstate.visibility_cutoff_xid;
824  else
825  {
826  /* Avoids false conflicts when hot_standby_feedback in use */
827  frz_conflict_horizon = prstate.cutoffs->OldestXmin;
828  TransactionIdRetreat(frz_conflict_horizon);
829  }
830  }
831 
832  if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
833  conflict_xid = frz_conflict_horizon;
834  else
835  conflict_xid = prstate.latest_xid_removed;
836 
837  log_heap_prune_and_freeze(relation, buffer,
838  conflict_xid,
839  true, reason,
840  prstate.frozen, prstate.nfrozen,
841  prstate.redirected, prstate.nredirected,
842  prstate.nowdead, prstate.ndead,
843  prstate.nowunused, prstate.nunused);
844  }
845  }
846 
848 
849  /* Copy information back for caller */
850  presult->ndeleted = prstate.ndeleted;
851  presult->nnewlpdead = prstate.ndead;
852  presult->nfrozen = prstate.nfrozen;
853  presult->live_tuples = prstate.live_tuples;
854  presult->recently_dead_tuples = prstate.recently_dead_tuples;
855 
856  /*
857  * It was convenient to ignore LP_DEAD items in all_visible earlier on to
858  * make the choice of whether or not to freeze the page unaffected by the
859  * short-term presence of LP_DEAD items. These LP_DEAD items were
860  * effectively assumed to be LP_UNUSED items in the making. It doesn't
861  * matter which vacuum heap pass (initial pass or final pass) ends up
862  * setting the page all-frozen, as long as the ongoing VACUUM does it.
863  *
864  * Now that freezing has been finalized, unset all_visible if there are
865  * any LP_DEAD items on the page. It needs to reflect the present state
866  * of the page, as expected by our caller.
867  */
868  if (prstate.all_visible && prstate.lpdead_items == 0)
869  {
870  presult->all_visible = prstate.all_visible;
871  presult->all_frozen = prstate.all_frozen;
872  }
873  else
874  {
875  presult->all_visible = false;
876  presult->all_frozen = false;
877  }
878 
879  presult->hastup = prstate.hastup;
880 
881  /*
882  * For callers planning to update the visibility map, the conflict horizon
883  * for that record must be the newest xmin on the page. However, if the
884  * page is completely frozen, there can be no conflict and the
885  * vm_conflict_horizon should remain InvalidTransactionId. This includes
886  * the case that we just froze all the tuples; the prune-freeze record
887  * included the conflict XID already so the caller doesn't need it.
888  */
889  if (presult->all_frozen)
891  else
892  presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
893 
894  presult->lpdead_items = prstate.lpdead_items;
895  /* the presult->deadoffsets array was already filled in */
896 
897  if (prstate.freeze)
898  {
899  if (presult->nfrozen > 0)
900  {
901  *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
902  *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
903  }
904  else
905  {
906  *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
907  *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
908  }
909  }
910 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4970
PageHeaderData * PageHeader
Definition: bufpage.h:173
static void PageClearFull(Page page)
Definition: bufpage.h:423
static bool PageIsFull(Page page)
Definition: bufpage.h:413
#define likely(x)
Definition: c.h:310
void heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6900
void heap_pre_freeze_checks(Buffer buffer, HeapTupleFreeze *tuples, int ntuples)
Definition: heapam.c:6847
#define HEAP_PAGE_PRUNE_FREEZE
Definition: heapam.h:42
#define HEAP_PAGE_PRUNE_MARK_UNUSED_NOW
Definition: heapam.h:41
WalUsage pgWalUsage
Definition: instrument.c:22
#define InvalidMultiXactId
Definition: multixact.h:24
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate)
Definition: pruneheap.c:999
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1508
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal)
Definition: pruneheap.c:1297
static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1330
void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, PruneReason reason, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, OffsetNumber *unused, int nunused)
Definition: pruneheap.c:2053
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum)
Definition: pruneheap.c:1319
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:917
void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:1561
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:219
TransactionId FreezePageRelfrozenXid
Definition: heapam.h:207
bool freeze_required
Definition: heapam.h:181
MultiXactId FreezePageRelminMxid
Definition: heapam.h:208
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:218
int recently_dead_tuples
Definition: heapam.h:234
TransactionId vm_conflict_horizon
Definition: heapam.h:249
OffsetNumber deadoffsets[MaxHeapTuplesPerPage]
Definition: heapam.h:263
bool all_visible
Definition: heapam.h:247
HeapPageFreeze pagefrz
Definition: pruneheap.c:103
bool all_visible
Definition: pruneheap.c:150
int ndead
Definition: pruneheap.c:55
bool processed[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:86
OffsetNumber heaponly_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:78
TransactionId new_prune_xid
Definition: pruneheap.c:52
bool hastup
Definition: pruneheap.c:122
int recently_dead_tuples
Definition: pruneheap.c:119
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:60
int nroot_items
Definition: pruneheap.c:75
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:61
int nheaponly_items
Definition: pruneheap.c:77
bool mark_unused_now
Definition: pruneheap.c:43
int live_tuples
Definition: pruneheap.c:118
TransactionId visibility_cutoff_xid
Definition: pruneheap.c:152
bool all_frozen
Definition: pruneheap.c:151
GlobalVisState * vistest
Definition: pruneheap.c:41
struct VacuumCutoffs * cutoffs
Definition: pruneheap.c:46
HeapTupleFreeze frozen[MaxHeapTuplesPerPage]
Definition: pruneheap.c:62
int lpdead_items
Definition: pruneheap.c:128
int nfrozen
Definition: pruneheap.c:57
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:59
int ndeleted
Definition: pruneheap.c:115
bool freeze
Definition: pruneheap.c:45
int nredirected
Definition: pruneheap.c:54
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:98
TransactionId latest_xid_removed
Definition: pruneheap.c:53
int nunused
Definition: pruneheap.c:56
OffsetNumber root_items[MaxHeapTuplesPerPage]
Definition: pruneheap.c:76
OffsetNumber * deadoffsets
Definition: pruneheap.c:129
TransactionId OldestXmin
Definition: vacuum.h:267
int64 wal_fpi
Definition: instrument.h:54
bool TransactionIdFollows(TransactionId id1, TransactionId id2)
Definition: transam.c:314
#define TransactionIdRetreat(dest)
Definition: transam.h:141
#define XLogHintBitIsNeeded()
Definition: xlog.h:120
bool XLogCheckBufferNeedsBackup(Buffer buffer)
Definition: xloginsert.c:1027

References PruneState::all_frozen, PruneFreezeResult::all_frozen, PruneState::all_visible, PruneFreezeResult::all_visible, Assert, BufferGetBlockNumber(), BufferGetPage(), PruneState::cutoffs, PruneState::deadoffsets, PruneFreezeResult::deadoffsets, elog, END_CRIT_SECTION, ERROR, FirstOffsetNumber, PruneState::freeze, HeapPageFreeze::freeze_required, HeapPageFreeze::FreezePageRelfrozenXid, HeapPageFreeze::FreezePageRelminMxid, PruneState::frozen, PruneState::hastup, PruneFreezeResult::hastup, heap_freeze_prepared_tuples(), heap_page_prune_execute(), HEAP_PAGE_PRUNE_FREEZE, HEAP_PAGE_PRUNE_MARK_UNUSED_NOW, heap_pre_freeze_checks(), heap_prune_chain(), heap_prune_record_unchanged_lp_dead(), heap_prune_record_unchanged_lp_normal(), heap_prune_record_unchanged_lp_unused(), heap_prune_record_unused(), heap_prune_satisfies_vacuum(), PruneState::heaponly_items, HEAPTUPLE_DEAD, HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, PruneState::htsv, i, InvalidMultiXactId, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetLength, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerSet(), PruneState::latest_xid_removed, likely, PruneState::live_tuples, PruneFreezeResult::live_tuples, log_heap_prune_and_freeze(), PruneState::lpdead_items, PruneFreezeResult::lpdead_items, PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::ndead, PruneState::ndeleted, PruneFreezeResult::ndeleted, PruneState::new_prune_xid, PruneState::nfrozen, PruneFreezeResult::nfrozen, PruneState::nheaponly_items, PruneFreezeResult::nnewlpdead, HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, PruneState::nroot_items, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, VacuumCutoffs::OldestXmin, PageClearFull(), PruneState::pagefrz, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), pgWalUsage, PruneState::processed, PruneState::recently_dead_tuples, PruneFreezeResult::recently_dead_tuples, PruneState::redirected, RelationGetRelid, RelationNeedsWAL, PruneState::root_items, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdFollows(), TransactionIdRetreat, unlikely, PruneState::visibility_cutoff_xid, PruneState::vistest, PruneFreezeResult::vm_conflict_horizon, WalUsage::wal_fpi, XLogCheckBufferNeedsBackup(), and XLogHintBitIsNeeded.

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
bool  lp_truncate_only,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 1561 of file pruneheap.c.

1565 {
1566  Page page = (Page) BufferGetPage(buffer);
1567  OffsetNumber *offnum;
1569 
1570  /* Shouldn't be called unless there's something to do */
1571  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
1572 
1573  /* If 'lp_truncate_only', we can only remove already-dead line pointers */
1574  Assert(!lp_truncate_only || (nredirected == 0 && ndead == 0));
1575 
1576  /* Update all redirected line pointers */
1577  offnum = redirected;
1578  for (int i = 0; i < nredirected; i++)
1579  {
1580  OffsetNumber fromoff = *offnum++;
1581  OffsetNumber tooff = *offnum++;
1582  ItemId fromlp = PageGetItemId(page, fromoff);
1584 
1585 #ifdef USE_ASSERT_CHECKING
1586 
1587  /*
1588  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
1589  * must be the first item from a HOT chain. If the item has tuple
1590  * storage then it can't be a heap-only tuple. Otherwise we are just
1591  * maintaining an existing LP_REDIRECT from an existing HOT chain that
1592  * has been pruned at least once before now.
1593  */
1594  if (!ItemIdIsRedirected(fromlp))
1595  {
1596  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
1597 
1598  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
1600  }
1601  else
1602  {
1603  /* We shouldn't need to redundantly set the redirect */
1604  Assert(ItemIdGetRedirect(fromlp) != tooff);
1605  }
1606 
1607  /*
1608  * The item that we're about to set as an LP_REDIRECT (the 'from'
1609  * item) will point to an existing item (the 'to' item) that is
1610  * already a heap-only tuple. There can be at most one LP_REDIRECT
1611  * item per HOT chain.
1612  *
1613  * We need to keep around an LP_REDIRECT item (after original
1614  * non-heap-only root tuple gets pruned away) so that it's always
1615  * possible for VACUUM to easily figure out what TID to delete from
1616  * indexes when an entire HOT chain becomes dead. A heap-only tuple
1617  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
1618  * tuple can.
1619  *
1620  * This check may miss problems, e.g. the target of a redirect could
1621  * be marked as unused subsequently. The page_verify_redirects() check
1622  * below will catch such problems.
1623  */
1624  tolp = PageGetItemId(page, tooff);
1625  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
1626  htup = (HeapTupleHeader) PageGetItem(page, tolp);
1628 #endif
1629 
1630  ItemIdSetRedirect(fromlp, tooff);
1631  }
1632 
1633  /* Update all now-dead line pointers */
1634  offnum = nowdead;
1635  for (int i = 0; i < ndead; i++)
1636  {
1637  OffsetNumber off = *offnum++;
1638  ItemId lp = PageGetItemId(page, off);
1639 
1640 #ifdef USE_ASSERT_CHECKING
1641 
1642  /*
1643  * An LP_DEAD line pointer must be left behind when the original item
1644  * (which is dead to everybody) could still be referenced by a TID in
1645  * an index. This should never be necessary with any individual
1646  * heap-only tuple item, though. (It's not clear how much of a problem
1647  * that would be, but there is no reason to allow it.)
1648  */
1649  if (ItemIdHasStorage(lp))
1650  {
1651  Assert(ItemIdIsNormal(lp));
1652  htup = (HeapTupleHeader) PageGetItem(page, lp);
1654  }
1655  else
1656  {
1657  /* Whole HOT chain becomes dead */
1659  }
1660 #endif
1661 
1662  ItemIdSetDead(lp);
1663  }
1664 
1665  /* Update all now-unused line pointers */
1666  offnum = nowunused;
1667  for (int i = 0; i < nunused; i++)
1668  {
1669  OffsetNumber off = *offnum++;
1670  ItemId lp = PageGetItemId(page, off);
1671 
1672 #ifdef USE_ASSERT_CHECKING
1673 
1674  if (lp_truncate_only)
1675  {
1676  /* Setting LP_DEAD to LP_UNUSED in vacuum's second pass */
1677  Assert(ItemIdIsDead(lp) && !ItemIdHasStorage(lp));
1678  }
1679  else
1680  {
1681  /*
1682  * When heap_page_prune_and_freeze() was called, mark_unused_now
1683  * may have been passed as true, which allows would-be LP_DEAD
1684  * items to be made LP_UNUSED instead. This is only possible if
1685  * the relation has no indexes. If there are any dead items, then
1686  * mark_unused_now was not true and every item being marked
1687  * LP_UNUSED must refer to a heap-only tuple.
1688  */
1689  if (ndead > 0)
1690  {
1692  htup = (HeapTupleHeader) PageGetItem(page, lp);
1694  }
1695  else
1696  Assert(ItemIdIsUsed(lp));
1697  }
1698 
1699 #endif
1700 
1701  ItemIdSetUnused(lp);
1702  }
1703 
1704  if (lp_truncate_only)
1706  else
1707  {
1708  /*
1709  * Finally, repair any fragmentation, and update the page's hint bit
1710  * about whether it has free pointers.
1711  */
1713 
1714  /*
1715  * Now that the page has been modified, assert that redirect items
1716  * still point to valid targets.
1717  */
1718  page_verify_redirects(page);
1719  }
1720 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
void PageTruncateLinePointerArray(Page page)
Definition: bufpage.c:835
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:182
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1737

References Assert, BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), PageTruncateLinePointerArray(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune_and_freeze(), and heap_xlog_prune_freeze().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 193 of file pruneheap.c.

194 {
195  Page page = BufferGetPage(buffer);
196  TransactionId prune_xid;
197  GlobalVisState *vistest;
198  Size minfree;
199 
200  /*
201  * We can't write WAL in recovery mode, so there's no point trying to
202  * clean the page. The primary will likely issue a cleaning WAL record
203  * soon anyway, so this is no particular loss.
204  */
205  if (RecoveryInProgress())
206  return;
207 
208  /*
209  * First check whether there's any chance there's something to prune,
210  * determining the appropriate horizon is a waste if there's no prune_xid
211  * (i.e. no updates/deletes left potentially dead tuples around).
212  */
213  prune_xid = ((PageHeader) page)->pd_prune_xid;
214  if (!TransactionIdIsValid(prune_xid))
215  return;
216 
217  /*
218  * Check whether prune_xid indicates that there may be dead rows that can
219  * be cleaned up.
220  */
221  vistest = GlobalVisTestFor(relation);
222 
223  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
224  return;
225 
226  /*
227  * We prune when a previous UPDATE failed to find enough space on the page
228  * for a new tuple version, or when free space falls below the relation's
229  * fill-factor target (but not less than 10%).
230  *
231  * Checking free space here is questionable since we aren't holding any
232  * lock on the buffer; in the worst case we could get a bogus answer. It's
233  * unlikely to be *seriously* wrong, though, since reading either pd_lower
234  * or pd_upper is probably atomic. Avoiding taking a lock seems more
235  * important than sometimes getting a wrong answer in what is after all
236  * just a heuristic estimate.
237  */
238  minfree = RelationGetTargetPageFreeSpace(relation,
240  minfree = Max(minfree, BLCKSZ / 10);
241 
242  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
243  {
244  /* OK, try to get exclusive buffer lock */
245  if (!ConditionalLockBufferForCleanup(buffer))
246  return;
247 
248  /*
249  * Now that we have buffer lock, get accurate information about the
250  * page's free space, and recheck the heuristic about whether to
251  * prune.
252  */
253  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
254  {
255  OffsetNumber dummy_off_loc;
256  PruneFreezeResult presult;
257 
258  /*
259  * For now, pass mark_unused_now as false regardless of whether or
260  * not the relation has indexes, since we cannot safely determine
261  * that during on-access pruning with the current implementation.
262