PostgreSQL Source Code  git master
heapam.h File Reference
#include "access/relation.h"
#include "access/relscan.h"
#include "access/sdir.h"
#include "access/skey.h"
#include "access/table.h"
#include "access/tableam.h"
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
Include dependency graph for heapam.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Data Structures

struct  HeapScanDescData
 
struct  IndexFetchHeapData
 
struct  HeapTupleFreeze
 
struct  HeapPageFreeze
 
struct  PruneResult
 

Macros

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM
 
#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN
 
#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL
 
#define HEAP_INSERT_SPECULATIVE   0x0010
 
#define MaxLockTupleMode   LockTupleExclusive
 
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01
 
#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02
 
#define HeapScanIsValid(scan)   PointerIsValid(scan)
 

Typedefs

typedef struct BulkInsertStateDataBulkInsertState
 
typedef struct HeapScanDescData HeapScanDescData
 
typedef struct HeapScanDescDataHeapScanDesc
 
typedef struct IndexFetchHeapData IndexFetchHeapData
 
typedef struct HeapTupleFreeze HeapTupleFreeze
 
typedef struct HeapPageFreeze HeapPageFreeze
 
typedef struct PruneResult PruneResult
 

Enumerations

enum  HTSV_Result {
  HEAPTUPLE_DEAD , HEAPTUPLE_LIVE , HEAPTUPLE_RECENTLY_DEAD , HEAPTUPLE_INSERT_IN_PROGRESS ,
  HEAPTUPLE_DELETE_IN_PROGRESS
}
 

Functions

static HTSV_Result htsv_get_valid_status (int status)
 
TableScanDesc heap_beginscan (Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags)
 
void heap_setscanlimits (TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlks)
 
void heapgetpage (TableScanDesc sscan, BlockNumber block)
 
void heap_rescan (TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode)
 
void heap_endscan (TableScanDesc sscan)
 
HeapTuple heap_getnext (TableScanDesc sscan, ScanDirection direction)
 
bool heap_getnextslot (TableScanDesc sscan, ScanDirection direction, struct TupleTableSlot *slot)
 
void heap_set_tidrange (TableScanDesc sscan, ItemPointer mintid, ItemPointer maxtid)
 
bool heap_getnextslot_tidrange (TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot)
 
bool heap_fetch (Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf)
 
bool heap_hot_search_buffer (ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
 
void heap_get_latest_tid (TableScanDesc sscan, ItemPointer tid)
 
BulkInsertState GetBulkInsertState (void)
 
void FreeBulkInsertState (BulkInsertState)
 
void ReleaseBulkInsertStatePin (BulkInsertState bistate)
 
void heap_insert (Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate)
 
void heap_multi_insert (Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate)
 
TM_Result heap_delete (Relation relation, ItemPointer tid, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, bool changingPart)
 
void heap_finish_speculative (Relation relation, ItemPointer tid)
 
void heap_abort_speculative (Relation relation, ItemPointer tid)
 
TM_Result heap_update (Relation relation, ItemPointer otid, HeapTuple newtup, CommandId cid, Snapshot crosscheck, bool wait, struct TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes)
 
TM_Result heap_lock_tuple (Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, bool follow_updates, Buffer *buffer, struct TM_FailureData *tmfd)
 
void heap_inplace_update (Relation relation, HeapTuple tuple)
 
bool heap_prepare_freeze_tuple (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
 
void heap_freeze_execute_prepared (Relation rel, Buffer buffer, TransactionId snapshotConflictHorizon, HeapTupleFreeze *tuples, int ntuples)
 
bool heap_freeze_tuple (HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff)
 
bool heap_tuple_should_freeze (HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
 
bool heap_tuple_needs_eventual_freeze (HeapTupleHeader tuple)
 
void simple_heap_insert (Relation relation, HeapTuple tup)
 
void simple_heap_delete (Relation relation, ItemPointer tid)
 
void simple_heap_update (Relation relation, ItemPointer otid, HeapTuple tup, TU_UpdateIndexes *update_indexes)
 
TransactionId heap_index_delete_tuples (Relation rel, TM_IndexDeleteOp *delstate)
 
void heap_page_prune_opt (Relation relation, Buffer buffer)
 
void heap_page_prune (Relation relation, Buffer buffer, struct GlobalVisState *vistest, bool mark_unused_now, PruneResult *presult, OffsetNumber *off_loc)
 
void heap_page_prune_execute (Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
 
void heap_get_root_tuples (Page page, OffsetNumber *root_offsets)
 
void heap_vacuum_rel (Relation rel, struct VacuumParams *params, BufferAccessStrategy bstrategy)
 
bool HeapTupleSatisfiesVisibility (HeapTuple htup, Snapshot snapshot, Buffer buffer)
 
TM_Result HeapTupleSatisfiesUpdate (HeapTuple htup, CommandId curcid, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuum (HeapTuple htup, TransactionId OldestXmin, Buffer buffer)
 
HTSV_Result HeapTupleSatisfiesVacuumHorizon (HeapTuple htup, Buffer buffer, TransactionId *dead_after)
 
void HeapTupleSetHintBits (HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid)
 
bool HeapTupleHeaderIsOnlyLocked (HeapTupleHeader tuple)
 
bool HeapTupleIsSurelyDead (HeapTuple htup, struct GlobalVisState *vistest)
 
bool ResolveCminCmaxDuringDecoding (struct HTAB *tuplecid_data, Snapshot snapshot, HeapTuple htup, Buffer buffer, CommandId *cmin, CommandId *cmax)
 
void HeapCheckForSerializableConflictOut (bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
 

Macro Definition Documentation

◆ HEAP_FREEZE_CHECK_XMAX_ABORTED

#define HEAP_FREEZE_CHECK_XMAX_ABORTED   0x02

Definition at line 108 of file heapam.h.

◆ HEAP_FREEZE_CHECK_XMIN_COMMITTED

#define HEAP_FREEZE_CHECK_XMIN_COMMITTED   0x01

Definition at line 107 of file heapam.h.

◆ HEAP_INSERT_FROZEN

#define HEAP_INSERT_FROZEN   TABLE_INSERT_FROZEN

Definition at line 35 of file heapam.h.

◆ HEAP_INSERT_NO_LOGICAL

#define HEAP_INSERT_NO_LOGICAL   TABLE_INSERT_NO_LOGICAL

Definition at line 36 of file heapam.h.

◆ HEAP_INSERT_SKIP_FSM

#define HEAP_INSERT_SKIP_FSM   TABLE_INSERT_SKIP_FSM

Definition at line 34 of file heapam.h.

◆ HEAP_INSERT_SPECULATIVE

#define HEAP_INSERT_SPECULATIVE   0x0010

Definition at line 37 of file heapam.h.

◆ HeapScanIsValid

#define HeapScanIsValid (   scan)    PointerIsValid(scan)

Definition at line 241 of file heapam.h.

◆ MaxLockTupleMode

#define MaxLockTupleMode   LockTupleExclusive

Definition at line 43 of file heapam.h.

Typedef Documentation

◆ BulkInsertState

Definition at line 39 of file heapam.h.

◆ HeapPageFreeze

◆ HeapScanDesc

typedef struct HeapScanDescData* HeapScanDesc

Definition at line 80 of file heapam.h.

◆ HeapScanDescData

◆ HeapTupleFreeze

◆ IndexFetchHeapData

◆ PruneResult

typedef struct PruneResult PruneResult

Enumeration Type Documentation

◆ HTSV_Result

Enumerator
HEAPTUPLE_DEAD 
HEAPTUPLE_LIVE 
HEAPTUPLE_RECENTLY_DEAD 
HEAPTUPLE_INSERT_IN_PROGRESS 
HEAPTUPLE_DELETE_IN_PROGRESS 

Definition at line 94 of file heapam.h.

95 {
96  HEAPTUPLE_DEAD, /* tuple is dead and deletable */
97  HEAPTUPLE_LIVE, /* tuple is live (committed, no deleter) */
98  HEAPTUPLE_RECENTLY_DEAD, /* tuple is dead, but not deletable yet */
99  HEAPTUPLE_INSERT_IN_PROGRESS, /* inserting xact is still in progress */
100  HEAPTUPLE_DELETE_IN_PROGRESS, /* deleting xact is still in progress */
101 } HTSV_Result;
HTSV_Result
Definition: heapam.h:95
@ HEAPTUPLE_RECENTLY_DEAD
Definition: heapam.h:98
@ HEAPTUPLE_INSERT_IN_PROGRESS
Definition: heapam.h:99
@ HEAPTUPLE_LIVE
Definition: heapam.h:97
@ HEAPTUPLE_DELETE_IN_PROGRESS
Definition: heapam.h:100
@ HEAPTUPLE_DEAD
Definition: heapam.h:96

Function Documentation

◆ FreeBulkInsertState()

void FreeBulkInsertState ( BulkInsertState  bistate)

Definition at line 1778 of file heapam.c.

1779 {
1780  if (bistate->current_buf != InvalidBuffer)
1781  ReleaseBuffer(bistate->current_buf);
1782  FreeAccessStrategy(bistate->strategy);
1783  pfree(bistate);
1784 }
#define InvalidBuffer
Definition: buf.h:25
void ReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4561
void FreeAccessStrategy(BufferAccessStrategy strategy)
Definition: freelist.c:639
void pfree(void *pointer)
Definition: mcxt.c:1431
BufferAccessStrategy strategy
Definition: hio.h:31
Buffer current_buf
Definition: hio.h:32

References BulkInsertStateData::current_buf, FreeAccessStrategy(), InvalidBuffer, pfree(), ReleaseBuffer(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferCleanup(), intorel_shutdown(), and transientrel_shutdown().

◆ GetBulkInsertState()

BulkInsertState GetBulkInsertState ( void  )

Definition at line 1761 of file heapam.c.

1762 {
1763  BulkInsertState bistate;
1764 
1765  bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
1767  bistate->current_buf = InvalidBuffer;
1768  bistate->next_free = InvalidBlockNumber;
1769  bistate->last_free = InvalidBlockNumber;
1770  bistate->already_extended_by = 0;
1771  return bistate;
1772 }
#define InvalidBlockNumber
Definition: block.h:33
@ BAS_BULKWRITE
Definition: bufmgr.h:37
BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype)
Definition: freelist.c:541
struct BulkInsertStateData * BulkInsertState
Definition: heapam.h:39
void * palloc(Size size)
Definition: mcxt.c:1201
BlockNumber last_free
Definition: hio.h:49
uint32 already_extended_by
Definition: hio.h:50
BlockNumber next_free
Definition: hio.h:48

References BulkInsertStateData::already_extended_by, BAS_BULKWRITE, BulkInsertStateData::current_buf, GetAccessStrategy(), InvalidBlockNumber, InvalidBuffer, BulkInsertStateData::last_free, BulkInsertStateData::next_free, palloc(), and BulkInsertStateData::strategy.

Referenced by ATRewriteTable(), CopyFrom(), CopyMultiInsertBufferInit(), intorel_startup(), and transientrel_startup().

◆ heap_abort_speculative()

void heap_abort_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5740 of file heapam.c.

5741 {
5743  ItemId lp;
5744  HeapTupleData tp;
5745  Page page;
5746  BlockNumber block;
5747  Buffer buffer;
5748  TransactionId prune_xid;
5749 
5750  Assert(ItemPointerIsValid(tid));
5751 
5752  block = ItemPointerGetBlockNumber(tid);
5753  buffer = ReadBuffer(relation, block);
5754  page = BufferGetPage(buffer);
5755 
5757 
5758  /*
5759  * Page can't be all visible, we just inserted into it, and are still
5760  * running.
5761  */
5762  Assert(!PageIsAllVisible(page));
5763 
5764  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
5765  Assert(ItemIdIsNormal(lp));
5766 
5767  tp.t_tableOid = RelationGetRelid(relation);
5768  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
5769  tp.t_len = ItemIdGetLength(lp);
5770  tp.t_self = *tid;
5771 
5772  /*
5773  * Sanity check that the tuple really is a speculatively inserted tuple,
5774  * inserted by us.
5775  */
5776  if (tp.t_data->t_choice.t_heap.t_xmin != xid)
5777  elog(ERROR, "attempted to kill a tuple inserted by another transaction");
5778  if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data)))
5779  elog(ERROR, "attempted to kill a non-speculative tuple");
5781 
5782  /*
5783  * No need to check for serializable conflicts here. There is never a
5784  * need for a combo CID, either. No need to extract replica identity, or
5785  * do anything special with infomask bits.
5786  */
5787 
5789 
5790  /*
5791  * The tuple will become DEAD immediately. Flag that this page is a
5792  * candidate for pruning by setting xmin to TransactionXmin. While not
5793  * immediately prunable, it is the oldest xid we can cheaply determine
5794  * that's safe against wraparound / being older than the table's
5795  * relfrozenxid. To defend against the unlikely case of a new relation
5796  * having a newer relfrozenxid than our TransactionXmin, use relfrozenxid
5797  * if so (vacuum can't subsequently move relfrozenxid to beyond
5798  * TransactionXmin, so there's no race here).
5799  */
5801  if (TransactionIdPrecedes(TransactionXmin, relation->rd_rel->relfrozenxid))
5802  prune_xid = relation->rd_rel->relfrozenxid;
5803  else
5804  prune_xid = TransactionXmin;
5805  PageSetPrunable(page, prune_xid);
5806 
5807  /* store transaction information of xact deleting the tuple */
5810 
5811  /*
5812  * Set the tuple header xmin to InvalidTransactionId. This makes the
5813  * tuple immediately invisible everyone. (In particular, to any
5814  * transactions waiting on the speculative token, woken up later.)
5815  */
5817 
5818  /* Clear the speculative insertion token too */
5819  tp.t_data->t_ctid = tp.t_self;
5820 
5821  MarkBufferDirty(buffer);
5822 
5823  /*
5824  * XLOG stuff
5825  *
5826  * The WAL records generated here match heap_delete(). The same recovery
5827  * routines are used.
5828  */
5829  if (RelationNeedsWAL(relation))
5830  {
5831  xl_heap_delete xlrec;
5832  XLogRecPtr recptr;
5833 
5834  xlrec.flags = XLH_DELETE_IS_SUPER;
5836  tp.t_data->t_infomask2);
5838  xlrec.xmax = xid;
5839 
5840  XLogBeginInsert();
5841  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
5842  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5843 
5844  /* No replica identity & replication origin logged */
5845 
5846  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
5847 
5848  PageSetLSN(page, recptr);
5849  }
5850 
5851  END_CRIT_SECTION();
5852 
5853  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
5854 
5855  if (HeapTupleHasExternal(&tp))
5856  {
5857  Assert(!IsToastRelation(relation));
5858  heap_toast_delete(relation, &tp, true);
5859  }
5860 
5861  /*
5862  * Never need to mark tuple for invalidation, since catalogs don't support
5863  * speculative insertion
5864  */
5865 
5866  /* Now we can release the buffer */
5867  ReleaseBuffer(buffer);
5868 
5869  /* count deletion, as we counted the insertion too */
5870  pgstat_count_heap_delete(relation);
5871 }
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
void MarkBufferDirty(Buffer buffer)
Definition: bufmgr.c:2190
void LockBuffer(Buffer buffer, int mode)
Definition: bufmgr.c:4796
Buffer ReadBuffer(Relation reln, BlockNumber blockNum)
Definition: bufmgr.c:735
#define BUFFER_LOCK_UNLOCK
Definition: bufmgr.h:157
static Page BufferGetPage(Buffer buffer)
Definition: bufmgr.h:350
#define BUFFER_LOCK_EXCLUSIVE
Definition: bufmgr.h:159
Pointer Page
Definition: bufpage.h:78
static Item PageGetItem(Page page, ItemId itemId)
Definition: bufpage.h:351
static ItemId PageGetItemId(Page page, OffsetNumber offsetNumber)
Definition: bufpage.h:240
static bool PageIsAllVisible(Page page)
Definition: bufpage.h:426
static void PageSetLSN(Page page, XLogRecPtr lsn)
Definition: bufpage.h:388
#define PageSetPrunable(page, xid)
Definition: bufpage.h:444
uint32 TransactionId
Definition: c.h:641
bool IsToastRelation(Relation relation)
Definition: catalog.c:147
#define ERROR
Definition: elog.h:39
static uint8 compute_infobits(uint16 infomask, uint16 infomask2)
Definition: heapam.c:2472
#define XLOG_HEAP_DELETE
Definition: heapam_xlog.h:33
#define SizeOfHeapDelete
Definition: heapam_xlog.h:115
#define XLH_DELETE_IS_SUPER
Definition: heapam_xlog.h:99
void heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
Definition: heaptoast.c:43
HeapTupleHeaderData * HeapTupleHeader
Definition: htup.h:23
#define HEAP_KEYS_UPDATED
Definition: htup_details.h:275
#define HeapTupleHeaderIsHeapOnly(tup)
Definition: htup_details.h:499
#define HeapTupleHeaderSetXmin(tup, xid)
Definition: htup_details.h:315
#define HEAP_XMAX_BITS
Definition: htup_details.h:267
#define HeapTupleHasExternal(tuple)
Definition: htup_details.h:671
#define HEAP_MOVED
Definition: htup_details.h:213
#define HeapTupleHeaderIsSpeculative(tup)
Definition: htup_details.h:428
#define ItemIdGetLength(itemId)
Definition: itemid.h:59
#define ItemIdIsNormal(itemId)
Definition: itemid.h:99
static OffsetNumber ItemPointerGetOffsetNumber(const ItemPointerData *pointer)
Definition: itemptr.h:124
static BlockNumber ItemPointerGetBlockNumber(const ItemPointerData *pointer)
Definition: itemptr.h:103
static bool ItemPointerIsValid(const ItemPointerData *pointer)
Definition: itemptr.h:83
Assert(fmt[strlen(fmt) - 1] !='\n')
#define START_CRIT_SECTION()
Definition: miscadmin.h:149
#define END_CRIT_SECTION()
Definition: miscadmin.h:151
void pgstat_count_heap_delete(Relation rel)
#define RelationGetRelid(relation)
Definition: rel.h:504
#define RelationNeedsWAL(relation)
Definition: rel.h:627
TransactionId TransactionXmin
Definition: snapmgr.c:105
ItemPointerData t_self
Definition: htup.h:65
uint32 t_len
Definition: htup.h:64
HeapTupleHeader t_data
Definition: htup.h:68
Oid t_tableOid
Definition: htup.h:66
TransactionId t_xmin
Definition: htup_details.h:124
union HeapTupleHeaderData::@45 t_choice
ItemPointerData t_ctid
Definition: htup_details.h:161
HeapTupleFields t_heap
Definition: htup_details.h:157
Form_pg_class rd_rel
Definition: rel.h:111
TransactionId xmax
Definition: heapam_xlog.h:109
OffsetNumber offnum
Definition: heapam_xlog.h:110
uint8 infobits_set
Definition: heapam_xlog.h:111
bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.c:280
#define InvalidTransactionId
Definition: transam.h:31
#define TransactionIdIsValid(xid)
Definition: transam.h:41
TransactionId GetCurrentTransactionId(void)
Definition: xact.c:445
uint64 XLogRecPtr
Definition: xlogdefs.h:21
void XLogRegisterData(char *data, uint32 len)
Definition: xloginsert.c:365
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:475
void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Definition: xloginsert.c:243
void XLogBeginInsert(void)
Definition: xloginsert.c:150
#define REGBUF_STANDARD
Definition: xloginsert.h:34

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), compute_infobits(), elog(), END_CRIT_SECTION, ERROR, xl_heap_delete::flags, GetCurrentTransactionId(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HeapTupleHasExternal, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsSpeculative, HeapTupleHeaderSetXmin, xl_heap_delete::infobits_set, InvalidTransactionId, IsToastRelation(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), MarkBufferDirty(), xl_heap_delete::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, START_CRIT_SECTION, HeapTupleHeaderData::t_choice, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_heap, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, HeapTupleFields::t_xmin, TransactionIdIsValid, TransactionIdPrecedes(), TransactionXmin, XLH_DELETE_IS_SUPER, XLOG_HEAP_DELETE, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and xl_heap_delete::xmax.

Referenced by heapam_tuple_complete_speculative(), and toast_delete_datum().

◆ heap_beginscan()

TableScanDesc heap_beginscan ( Relation  relation,
Snapshot  snapshot,
int  nkeys,
ScanKey  key,
ParallelTableScanDesc  parallel_scan,
uint32  flags 
)

Definition at line 931 of file heapam.c.

935 {
936  HeapScanDesc scan;
937 
938  /*
939  * increment relation ref count while scanning relation
940  *
941  * This is just to make really sure the relcache entry won't go away while
942  * the scan has a pointer to it. Caller should be holding the rel open
943  * anyway, so this is redundant in all normal scenarios...
944  */
946 
947  /*
948  * allocate and initialize scan descriptor
949  */
950  scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
951 
952  scan->rs_base.rs_rd = relation;
953  scan->rs_base.rs_snapshot = snapshot;
954  scan->rs_base.rs_nkeys = nkeys;
955  scan->rs_base.rs_flags = flags;
956  scan->rs_base.rs_parallel = parallel_scan;
957  scan->rs_strategy = NULL; /* set in initscan */
958 
959  /*
960  * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
961  */
962  if (!(snapshot && IsMVCCSnapshot(snapshot)))
964 
965  /*
966  * For seqscan and sample scans in a serializable transaction, acquire a
967  * predicate lock on the entire relation. This is required not only to
968  * lock all the matching tuples, but also to conflict with new insertions
969  * into the table. In an indexscan, we take page locks on the index pages
970  * covering the range specified in the scan qual, but in a heap scan there
971  * is nothing more fine-grained to lock. A bitmap scan is a different
972  * story, there we have already scanned the index and locked the index
973  * pages covering the predicate. But in that case we still have to lock
974  * any matching heap tuples. For sample scan we could optimize the locking
975  * to be at least page-level granularity, but we'd need to add per-tuple
976  * locking for that.
977  */
979  {
980  /*
981  * Ensure a missing snapshot is noticed reliably, even if the
982  * isolation mode means predicate locking isn't performed (and
983  * therefore the snapshot isn't used here).
984  */
985  Assert(snapshot);
986  PredicateLockRelation(relation, snapshot);
987  }
988 
989  /* we only need to set this up once */
990  scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
991 
992  /*
993  * Allocate memory to keep track of page allocation for parallel workers
994  * when doing a parallel scan.
995  */
996  if (parallel_scan != NULL)
998  else
999  scan->rs_parallelworkerdata = NULL;
1000 
1001  /*
1002  * we do this here instead of in initscan() because heap_rescan also calls
1003  * initscan() and we don't want to allocate memory again
1004  */
1005  if (nkeys > 0)
1006  scan->rs_base.rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
1007  else
1008  scan->rs_base.rs_key = NULL;
1009 
1010  initscan(scan, key, false);
1011 
1012  return (TableScanDesc) scan;
1013 }
static void initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
Definition: heapam.c:233
struct HeapScanDescData * HeapScanDesc
Definition: heapam.h:80
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
void PredicateLockRelation(Relation relation, Snapshot snapshot)
Definition: predicate.c:2546
void RelationIncrementReferenceCount(Relation rel)
Definition: relcache.c:2155
ScanKeyData * ScanKey
Definition: skey.h:75
#define IsMVCCSnapshot(snapshot)
Definition: snapmgr.h:62
BufferAccessStrategy rs_strategy
Definition: heapam.h:65
ParallelBlockTableScanWorkerData * rs_parallelworkerdata
Definition: heapam.h:73
HeapTupleData rs_ctup
Definition: heapam.h:67
TableScanDescData rs_base
Definition: heapam.h:50
Relation rs_rd
Definition: relscan.h:34
uint32 rs_flags
Definition: relscan.h:47
struct ScanKeyData * rs_key
Definition: relscan.h:37
struct SnapshotData * rs_snapshot
Definition: relscan.h:35
struct ParallelTableScanDescData * rs_parallel
Definition: relscan.h:49
@ SO_ALLOW_PAGEMODE
Definition: tableam.h:61
@ SO_TYPE_SAMPLESCAN
Definition: tableam.h:50
@ SO_TYPE_SEQSCAN
Definition: tableam.h:48

References Assert(), if(), initscan(), IsMVCCSnapshot, sort-test::key, palloc(), PredicateLockRelation(), RelationGetRelid, RelationIncrementReferenceCount(), HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_parallel, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_ALLOW_PAGEMODE, SO_TYPE_SAMPLESCAN, SO_TYPE_SEQSCAN, and HeapTupleData::t_tableOid.

◆ heap_delete()

TM_Result heap_delete ( Relation  relation,
ItemPointer  tid,
CommandId  cid,
Snapshot  crosscheck,
bool  wait,
struct TM_FailureData tmfd,
bool  changingPart 
)

Definition at line 2517 of file heapam.c.

2520 {
2521  TM_Result result;
2523  ItemId lp;
2524  HeapTupleData tp;
2525  Page page;
2526  BlockNumber block;
2527  Buffer buffer;
2528  Buffer vmbuffer = InvalidBuffer;
2529  TransactionId new_xmax;
2530  uint16 new_infomask,
2531  new_infomask2;
2532  bool have_tuple_lock = false;
2533  bool iscombo;
2534  bool all_visible_cleared = false;
2535  HeapTuple old_key_tuple = NULL; /* replica identity of the tuple */
2536  bool old_key_copied = false;
2537 
2538  Assert(ItemPointerIsValid(tid));
2539 
2540  /*
2541  * Forbid this during a parallel operation, lest it allocate a combo CID.
2542  * Other workers might need that combo CID for visibility checks, and we
2543  * have no provision for broadcasting it to them.
2544  */
2545  if (IsInParallelMode())
2546  ereport(ERROR,
2547  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
2548  errmsg("cannot delete tuples during a parallel operation")));
2549 
2550  block = ItemPointerGetBlockNumber(tid);
2551  buffer = ReadBuffer(relation, block);
2552  page = BufferGetPage(buffer);
2553 
2554  /*
2555  * Before locking the buffer, pin the visibility map page if it appears to
2556  * be necessary. Since we haven't got the lock yet, someone else might be
2557  * in the middle of changing this, so we'll need to recheck after we have
2558  * the lock.
2559  */
2560  if (PageIsAllVisible(page))
2561  visibilitymap_pin(relation, block, &vmbuffer);
2562 
2564 
2565  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
2566  Assert(ItemIdIsNormal(lp));
2567 
2568  tp.t_tableOid = RelationGetRelid(relation);
2569  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
2570  tp.t_len = ItemIdGetLength(lp);
2571  tp.t_self = *tid;
2572 
2573 l1:
2574 
2575  /*
2576  * If we didn't pin the visibility map page and the page has become all
2577  * visible while we were busy locking the buffer, we'll have to unlock and
2578  * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
2579  * unfortunate, but hopefully shouldn't happen often.
2580  */
2581  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
2582  {
2583  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2584  visibilitymap_pin(relation, block, &vmbuffer);
2586  }
2587 
2588  result = HeapTupleSatisfiesUpdate(&tp, cid, buffer);
2589 
2590  if (result == TM_Invisible)
2591  {
2592  UnlockReleaseBuffer(buffer);
2593  ereport(ERROR,
2594  (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2595  errmsg("attempted to delete invisible tuple")));
2596  }
2597  else if (result == TM_BeingModified && wait)
2598  {
2599  TransactionId xwait;
2600  uint16 infomask;
2601 
2602  /* must copy state data before unlocking buffer */
2603  xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
2604  infomask = tp.t_data->t_infomask;
2605 
2606  /*
2607  * Sleep until concurrent transaction ends -- except when there's a
2608  * single locker and it's our own transaction. Note we don't care
2609  * which lock mode the locker has, because we need the strongest one.
2610  *
2611  * Before sleeping, we need to acquire tuple lock to establish our
2612  * priority for the tuple (see heap_lock_tuple). LockTuple will
2613  * release us when we are next-in-line for the tuple.
2614  *
2615  * If we are forced to "start over" below, we keep the tuple lock;
2616  * this arranges that we stay at the head of the line while rechecking
2617  * tuple state.
2618  */
2619  if (infomask & HEAP_XMAX_IS_MULTI)
2620  {
2621  bool current_is_member = false;
2622 
2623  if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
2624  LockTupleExclusive, &current_is_member))
2625  {
2626  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2627 
2628  /*
2629  * Acquire the lock, if necessary (but skip it when we're
2630  * requesting a lock and already have one; avoids deadlock).
2631  */
2632  if (!current_is_member)
2634  LockWaitBlock, &have_tuple_lock);
2635 
2636  /* wait for multixact */
2638  relation, &(tp.t_self), XLTW_Delete,
2639  NULL);
2641 
2642  /*
2643  * If xwait had just locked the tuple then some other xact
2644  * could update this tuple before we get to this point. Check
2645  * for xmax change, and start over if so.
2646  *
2647  * We also must start over if we didn't pin the VM page, and
2648  * the page has become all visible.
2649  */
2650  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2651  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2653  xwait))
2654  goto l1;
2655  }
2656 
2657  /*
2658  * You might think the multixact is necessarily done here, but not
2659  * so: it could have surviving members, namely our own xact or
2660  * other subxacts of this backend. It is legal for us to delete
2661  * the tuple in either case, however (the latter case is
2662  * essentially a situation of upgrading our former shared lock to
2663  * exclusive). We don't bother changing the on-disk hint bits
2664  * since we are about to overwrite the xmax altogether.
2665  */
2666  }
2667  else if (!TransactionIdIsCurrentTransactionId(xwait))
2668  {
2669  /*
2670  * Wait for regular transaction to end; but first, acquire tuple
2671  * lock.
2672  */
2673  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2675  LockWaitBlock, &have_tuple_lock);
2676  XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete);
2678 
2679  /*
2680  * xwait is done, but if xwait had just locked the tuple then some
2681  * other xact could update this tuple before we get to this point.
2682  * Check for xmax change, and start over if so.
2683  *
2684  * We also must start over if we didn't pin the VM page, and the
2685  * page has become all visible.
2686  */
2687  if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) ||
2688  xmax_infomask_changed(tp.t_data->t_infomask, infomask) ||
2690  xwait))
2691  goto l1;
2692 
2693  /* Otherwise check if it committed or aborted */
2694  UpdateXmaxHintBits(tp.t_data, buffer, xwait);
2695  }
2696 
2697  /*
2698  * We may overwrite if previous xmax aborted, or if it committed but
2699  * only locked the tuple without updating it.
2700  */
2701  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
2704  result = TM_Ok;
2705  else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid))
2706  result = TM_Updated;
2707  else
2708  result = TM_Deleted;
2709  }
2710 
2711  /* sanity check the result HeapTupleSatisfiesUpdate() and the logic above */
2712  if (result != TM_Ok)
2713  {
2714  Assert(result == TM_SelfModified ||
2715  result == TM_Updated ||
2716  result == TM_Deleted ||
2717  result == TM_BeingModified);
2719  Assert(result != TM_Updated ||
2720  !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid));
2721  }
2722 
2723  if (crosscheck != InvalidSnapshot && result == TM_Ok)
2724  {
2725  /* Perform additional check for transaction-snapshot mode RI updates */
2726  if (!HeapTupleSatisfiesVisibility(&tp, crosscheck, buffer))
2727  result = TM_Updated;
2728  }
2729 
2730  if (result != TM_Ok)
2731  {
2732  tmfd->ctid = tp.t_data->t_ctid;
2734  if (result == TM_SelfModified)
2735  tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data);
2736  else
2737  tmfd->cmax = InvalidCommandId;
2738  UnlockReleaseBuffer(buffer);
2739  if (have_tuple_lock)
2740  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2741  if (vmbuffer != InvalidBuffer)
2742  ReleaseBuffer(vmbuffer);
2743  return result;
2744  }
2745 
2746  /*
2747  * We're about to do the actual delete -- check for conflict first, to
2748  * avoid possibly having to roll back work we've just done.
2749  *
2750  * This is safe without a recheck as long as there is no possibility of
2751  * another process scanning the page between this check and the delete
2752  * being visible to the scan (i.e., an exclusive buffer content lock is
2753  * continuously held from this point until the tuple delete is visible).
2754  */
2755  CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer));
2756 
2757  /* replace cid with a combo CID if necessary */
2758  HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo);
2759 
2760  /*
2761  * Compute replica identity tuple before entering the critical section so
2762  * we don't PANIC upon a memory allocation failure.
2763  */
2764  old_key_tuple = ExtractReplicaIdentity(relation, &tp, true, &old_key_copied);
2765 
2766  /*
2767  * If this is the first possibly-multixact-able operation in the current
2768  * transaction, set my per-backend OldestMemberMXactId setting. We can be
2769  * certain that the transaction will never become a member of any older
2770  * MultiXactIds than that. (We have to do this even if we end up just
2771  * using our own TransactionId below, since some other backend could
2772  * incorporate our XID into a MultiXact immediately afterwards.)
2773  */
2775 
2778  xid, LockTupleExclusive, true,
2779  &new_xmax, &new_infomask, &new_infomask2);
2780 
2782 
2783  /*
2784  * If this transaction commits, the tuple will become DEAD sooner or
2785  * later. Set flag that this page is a candidate for pruning once our xid
2786  * falls below the OldestXmin horizon. If the transaction finally aborts,
2787  * the subsequent page pruning will be a no-op and the hint will be
2788  * cleared.
2789  */
2790  PageSetPrunable(page, xid);
2791 
2792  if (PageIsAllVisible(page))
2793  {
2794  all_visible_cleared = true;
2795  PageClearAllVisible(page);
2796  visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
2797  vmbuffer, VISIBILITYMAP_VALID_BITS);
2798  }
2799 
2800  /* store transaction information of xact deleting the tuple */
2803  tp.t_data->t_infomask |= new_infomask;
2804  tp.t_data->t_infomask2 |= new_infomask2;
2806  HeapTupleHeaderSetXmax(tp.t_data, new_xmax);
2807  HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo);
2808  /* Make sure there is no forward chain link in t_ctid */
2809  tp.t_data->t_ctid = tp.t_self;
2810 
2811  /* Signal that this is actually a move into another partition */
2812  if (changingPart)
2814 
2815  MarkBufferDirty(buffer);
2816 
2817  /*
2818  * XLOG stuff
2819  *
2820  * NB: heap_abort_speculative() uses the same xlog record and replay
2821  * routines.
2822  */
2823  if (RelationNeedsWAL(relation))
2824  {
2825  xl_heap_delete xlrec;
2826  xl_heap_header xlhdr;
2827  XLogRecPtr recptr;
2828 
2829  /*
2830  * For logical decode we need combo CIDs to properly decode the
2831  * catalog
2832  */
2834  log_heap_new_cid(relation, &tp);
2835 
2836  xlrec.flags = 0;
2837  if (all_visible_cleared)
2839  if (changingPart)
2842  tp.t_data->t_infomask2);
2844  xlrec.xmax = new_xmax;
2845 
2846  if (old_key_tuple != NULL)
2847  {
2848  if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL)
2850  else
2852  }
2853 
2854  XLogBeginInsert();
2855  XLogRegisterData((char *) &xlrec, SizeOfHeapDelete);
2856 
2857  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
2858 
2859  /*
2860  * Log replica identity of the deleted tuple if there is one
2861  */
2862  if (old_key_tuple != NULL)
2863  {
2864  xlhdr.t_infomask2 = old_key_tuple->t_data->t_infomask2;
2865  xlhdr.t_infomask = old_key_tuple->t_data->t_infomask;
2866  xlhdr.t_hoff = old_key_tuple->t_data->t_hoff;
2867 
2868  XLogRegisterData((char *) &xlhdr, SizeOfHeapHeader);
2869  XLogRegisterData((char *) old_key_tuple->t_data
2871  old_key_tuple->t_len
2873  }
2874 
2875  /* filtering by origin on a row level is much more efficient */
2877 
2878  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE);
2879 
2880  PageSetLSN(page, recptr);
2881  }
2882 
2883  END_CRIT_SECTION();
2884 
2885  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
2886 
2887  if (vmbuffer != InvalidBuffer)
2888  ReleaseBuffer(vmbuffer);
2889 
2890  /*
2891  * If the tuple has toasted out-of-line attributes, we need to delete
2892  * those items too. We have to do this before releasing the buffer
2893  * because we need to look at the contents of the tuple, but it's OK to
2894  * release the content lock on the buffer first.
2895  */
2896  if (relation->rd_rel->relkind != RELKIND_RELATION &&
2897  relation->rd_rel->relkind != RELKIND_MATVIEW)
2898  {
2899  /* toast table entries should never be recursively toasted */
2901  }
2902  else if (HeapTupleHasExternal(&tp))
2903  heap_toast_delete(relation, &tp, false);
2904 
2905  /*
2906  * Mark tuple for invalidation from system caches at next command
2907  * boundary. We have to do this before releasing the buffer because we
2908  * need to look at the contents of the tuple.
2909  */
2910  CacheInvalidateHeapTuple(relation, &tp, NULL);
2911 
2912  /* Now we can release the buffer */
2913  ReleaseBuffer(buffer);
2914 
2915  /*
2916  * Release the lmgr tuple lock, if we had it.
2917  */
2918  if (have_tuple_lock)
2919  UnlockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2920 
2921  pgstat_count_heap_delete(relation);
2922 
2923  if (old_key_tuple != NULL && old_key_copied)
2924  heap_freetuple(old_key_tuple);
2925 
2926  return TM_Ok;
2927 }
BlockNumber BufferGetBlockNumber(Buffer buffer)
Definition: bufmgr.c:3378
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:4578
static void PageClearAllVisible(Page page)
Definition: bufpage.h:436
#define InvalidCommandId
Definition: c.h:658
unsigned short uint16
Definition: c.h:494
TransactionId MultiXactId
Definition: c.h:651
void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo)
Definition: combocid.c:153
CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup)
Definition: combocid.c:118
int errcode(int sqlerrcode)
Definition: elog.c:860
int errmsg(const char *fmt,...)
Definition: elog.c:1075
#define ereport(elevel,...)
Definition: elog.h:149
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask, LockTupleMode lockmode, bool *current_is_member)
Definition: heapam.c:7126
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup)
Definition: heapam.c:8590
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask, uint16 old_infomask2, TransactionId add_to_xmax, LockTupleMode mode, bool is_update, TransactionId *result_xmax, uint16 *result_infomask, uint16 *result_infomask2)
Definition: heapam.c:4891
static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy)
Definition: heapam.c:8671
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, LockWaitPolicy wait_policy, bool *have_tuple_lock)
Definition: heapam.c:4842
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining)
Definition: heapam.c:7303
static bool xmax_infomask_changed(uint16 new_infomask, uint16 old_infomask)
Definition: heapam.c:2494
#define UnlockTupleTuplock(rel, tup, mode)
Definition: heapam.c:171
static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
Definition: heapam.c:1739
bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, Buffer buffer)
bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
TM_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer)
#define XLH_DELETE_CONTAINS_OLD_KEY
Definition: heapam_xlog.h:98
#define XLH_DELETE_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:96
#define SizeOfHeapHeader
Definition: heapam_xlog.h:151
#define XLH_DELETE_IS_PARTITION_MOVE
Definition: heapam_xlog.h:100
#define XLH_DELETE_CONTAINS_OLD_TUPLE
Definition: heapam_xlog.h:97
void heap_freetuple(HeapTuple htup)
Definition: heaptuple.c:1435
#define HEAP_XMAX_IS_LOCKED_ONLY(infomask)
Definition: htup_details.h:227
#define SizeofHeapTupleHeader
Definition: htup_details.h:185
#define HeapTupleHeaderSetXmax(tup, xid)
Definition: htup_details.h:376
#define HeapTupleHeaderClearHotUpdated(tup)
Definition: htup_details.h:494
#define HEAP_XMAX_IS_MULTI
Definition: htup_details.h:209
#define HEAP_XMAX_INVALID
Definition: htup_details.h:208
#define HeapTupleHeaderSetMovedPartitions(tup)
Definition: htup_details.h:447
#define HeapTupleHeaderGetRawXmax(tup)
Definition: htup_details.h:371
#define HeapTupleHeaderGetUpdateXid(tup)
Definition: htup_details.h:361
#define HeapTupleHeaderSetCmax(tup, cid, iscombo)
Definition: htup_details.h:401
void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple)
Definition: inval.c:1205
bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2)
Definition: itemptr.c:35
void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper)
Definition: lmgr.c:668
@ XLTW_Delete
Definition: lmgr.h:28
@ LockWaitBlock
Definition: lockoptions.h:39
@ LockTupleExclusive
Definition: lockoptions.h:58
void MultiXactIdSetOldestMember(void)
Definition: multixact.c:626
@ MultiXactStatusUpdate
Definition: multixact.h:50
void CheckForSerializableConflictIn(Relation relation, ItemPointer tid, BlockNumber blkno)
Definition: predicate.c:4306
#define RelationIsAccessibleInLogicalDecoding(relation)
Definition: rel.h:683
#define InvalidSnapshot
Definition: snapshot.h:123
TransactionId xmax
Definition: tableam.h:143
CommandId cmax
Definition: tableam.h:144
ItemPointerData ctid
Definition: tableam.h:142
uint16 t_infomask
Definition: heapam_xlog.h:147
uint16 t_infomask2
Definition: heapam_xlog.h:146
TM_Result
Definition: tableam.h:72
@ TM_Ok
Definition: tableam.h:77
@ TM_BeingModified
Definition: tableam.h:99
@ TM_Deleted
Definition: tableam.h:92
@ TM_Updated
Definition: tableam.h:89
@ TM_SelfModified
Definition: tableam.h:83
@ TM_Invisible
Definition: tableam.h:80
#define TransactionIdEquals(id1, id2)
Definition: transam.h:43
bool visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf, uint8 flags)
void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf)
#define VISIBILITYMAP_VALID_BITS
bool TransactionIdIsCurrentTransactionId(TransactionId xid)
Definition: xact.c:926
bool IsInParallelMode(void)
Definition: xact.c:1069
#define XLOG_INCLUDE_ORIGIN
Definition: xlog.h:152
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:457

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), TM_FailureData::ctid, DoesMultiXactIdConflict(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, ExtractReplicaIdentity(), xl_heap_delete::flags, GetCurrentTransactionId(), heap_acquire_tuplock(), heap_freetuple(), HEAP_KEYS_UPDATED, HEAP_MOVED, heap_toast_delete(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HeapTupleHasExternal, HeapTupleHeaderAdjustCmax(), HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetCmax, HeapTupleHeaderSetMovedPartitions, HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), HeapTupleSatisfiesVisibility(), xl_heap_delete::infobits_set, InvalidBuffer, InvalidCommandId, InvalidSnapshot, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), LockTupleExclusive, LockWaitBlock, log_heap_new_cid(), MarkBufferDirty(), MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusUpdate, xl_heap_delete::offnum, PageClearAllVisible(), PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), PageSetPrunable, pgstat_count_heap_delete(), RelationData::rd_rel, ReadBuffer(), REGBUF_STANDARD, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, ReleaseBuffer(), SizeOfHeapDelete, SizeOfHeapHeader, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), UnlockReleaseBuffer(), UnlockTupleTuplock, UpdateXmaxHintBits(), visibilitymap_clear(), visibilitymap_pin(), VISIBILITYMAP_VALID_BITS, XactLockTableWait(), XLH_DELETE_ALL_VISIBLE_CLEARED, XLH_DELETE_CONTAINS_OLD_KEY, XLH_DELETE_CONTAINS_OLD_TUPLE, XLH_DELETE_IS_PARTITION_MOVE, XLOG_HEAP_DELETE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLogSetRecordFlags(), XLTW_Delete, xl_heap_delete::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_delete(), and simple_heap_delete().

◆ heap_endscan()

void heap_endscan ( TableScanDesc  sscan)

Definition at line 1053 of file heapam.c.

1054 {
1055  HeapScanDesc scan = (HeapScanDesc) sscan;
1056 
1057  /* Note: no locking manipulations needed */
1058 
1059  /*
1060  * unpin scan buffers
1061  */
1062  if (BufferIsValid(scan->rs_cbuf))
1063  ReleaseBuffer(scan->rs_cbuf);
1064 
1065  /*
1066  * decrement relation reference count and free scan descriptor storage
1067  */
1069 
1070  if (scan->rs_base.rs_key)
1071  pfree(scan->rs_base.rs_key);
1072 
1073  if (scan->rs_strategy != NULL)
1075 
1076  if (scan->rs_parallelworkerdata != NULL)
1078 
1079  if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT)
1081 
1082  pfree(scan);
1083 }
static bool BufferIsValid(Buffer bufnum)
Definition: bufmgr.h:301
void RelationDecrementReferenceCount(Relation rel)
Definition: relcache.c:2168
void UnregisterSnapshot(Snapshot snapshot)
Definition: snapmgr.c:843
Buffer rs_cbuf
Definition: heapam.h:62
@ SO_TEMP_SNAPSHOT
Definition: tableam.h:64

References BufferIsValid(), FreeAccessStrategy(), pfree(), RelationDecrementReferenceCount(), ReleaseBuffer(), HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, TableScanDescData::rs_flags, TableScanDescData::rs_key, HeapScanDescData::rs_parallelworkerdata, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapScanDescData::rs_strategy, SO_TEMP_SNAPSHOT, and UnregisterSnapshot().

◆ heap_fetch()

bool heap_fetch ( Relation  relation,
Snapshot  snapshot,
HeapTuple  tuple,
Buffer userbuf,
bool  keep_buf 
)

Definition at line 1345 of file heapam.c.

1350 {
1351  ItemPointer tid = &(tuple->t_self);
1352  ItemId lp;
1353  Buffer buffer;
1354  Page page;
1355  OffsetNumber offnum;
1356  bool valid;
1357 
1358  /*
1359  * Fetch and pin the appropriate page of the relation.
1360  */
1361  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
1362 
1363  /*
1364  * Need share lock on buffer to examine tuple commit status.
1365  */
1366  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1367  page = BufferGetPage(buffer);
1368 
1369  /*
1370  * We'd better check for out-of-range offnum in case of VACUUM since the
1371  * TID was obtained.
1372  */
1373  offnum = ItemPointerGetOffsetNumber(tid);
1374  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1375  {
1376  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1377  ReleaseBuffer(buffer);
1378  *userbuf = InvalidBuffer;
1379  tuple->t_data = NULL;
1380  return false;
1381  }
1382 
1383  /*
1384  * get the item line pointer corresponding to the requested tid
1385  */
1386  lp = PageGetItemId(page, offnum);
1387 
1388  /*
1389  * Must check for deleted tuple.
1390  */
1391  if (!ItemIdIsNormal(lp))
1392  {
1393  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1394  ReleaseBuffer(buffer);
1395  *userbuf = InvalidBuffer;
1396  tuple->t_data = NULL;
1397  return false;
1398  }
1399 
1400  /*
1401  * fill in *tuple fields
1402  */
1403  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1404  tuple->t_len = ItemIdGetLength(lp);
1405  tuple->t_tableOid = RelationGetRelid(relation);
1406 
1407  /*
1408  * check tuple visibility, then release lock
1409  */
1410  valid = HeapTupleSatisfiesVisibility(tuple, snapshot, buffer);
1411 
1412  if (valid)
1413  PredicateLockTID(relation, &(tuple->t_self), snapshot,
1414  HeapTupleHeaderGetXmin(tuple->t_data));
1415 
1416  HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot);
1417 
1418  LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
1419 
1420  if (valid)
1421  {
1422  /*
1423  * All checks passed, so return the tuple as valid. Caller is now
1424  * responsible for releasing the buffer.
1425  */
1426  *userbuf = buffer;
1427 
1428  return true;
1429  }
1430 
1431  /* Tuple failed time qual, but maybe caller wants to see it anyway. */
1432  if (keep_buf)
1433  *userbuf = buffer;
1434  else
1435  {
1436  ReleaseBuffer(buffer);
1437  *userbuf = InvalidBuffer;
1438  tuple->t_data = NULL;
1439  }
1440 
1441  return false;
1442 }
#define BUFFER_LOCK_SHARE
Definition: bufmgr.h:158
static OffsetNumber PageGetMaxOffsetNumber(Page page)
Definition: bufpage.h:369
void HeapCheckForSerializableConflictOut(bool visible, Relation relation, HeapTuple tuple, Buffer buffer, Snapshot snapshot)
Definition: heapam.c:10157
#define HeapTupleHeaderGetXmin(tup)
Definition: htup_details.h:309
uint16 OffsetNumber
Definition: off.h:24
void PredicateLockTID(Relation relation, ItemPointer tid, Snapshot snapshot, TransactionId tuple_xid)
Definition: predicate.c:2591

References BUFFER_LOCK_SHARE, BUFFER_LOCK_UNLOCK, BufferGetPage(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetXmin, HeapTupleSatisfiesVisibility(), InvalidBuffer, ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), ReadBuffer(), RelationGetRelid, ReleaseBuffer(), HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, and HeapTupleData::t_tableOid.

Referenced by heap_lock_updated_tuple_rec(), heapam_fetch_row_version(), and heapam_tuple_lock().

◆ heap_finish_speculative()

void heap_finish_speculative ( Relation  relation,
ItemPointer  tid 
)

Definition at line 5653 of file heapam.c.

5654 {
5655  Buffer buffer;
5656  Page page;
5657  OffsetNumber offnum;
5658  ItemId lp = NULL;
5659  HeapTupleHeader htup;
5660 
5661  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
5663  page = (Page) BufferGetPage(buffer);
5664 
5665  offnum = ItemPointerGetOffsetNumber(tid);
5666  if (PageGetMaxOffsetNumber(page) >= offnum)
5667  lp = PageGetItemId(page, offnum);
5668 
5669  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5670  elog(ERROR, "invalid lp");
5671 
5672  htup = (HeapTupleHeader) PageGetItem(page, lp);
5673 
5674  /* NO EREPORT(ERROR) from here till changes are logged */
5676 
5678 
5679  MarkBufferDirty(buffer);
5680 
5681  /*
5682  * Replace the speculative insertion token with a real t_ctid, pointing to
5683  * itself like it does on regular tuples.
5684  */
5685  htup->t_ctid = *tid;
5686 
5687  /* XLOG stuff */
5688  if (RelationNeedsWAL(relation))
5689  {
5690  xl_heap_confirm xlrec;
5691  XLogRecPtr recptr;
5692 
5693  xlrec.offnum = ItemPointerGetOffsetNumber(tid);
5694 
5695  XLogBeginInsert();
5696 
5697  /* We want the same filtering on this as on a plain insert */
5699 
5700  XLogRegisterData((char *) &xlrec, SizeOfHeapConfirm);
5701  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5702 
5703  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CONFIRM);
5704 
5705  PageSetLSN(page, recptr);
5706  }
5707 
5708  END_CRIT_SECTION();
5709 
5710  UnlockReleaseBuffer(buffer);
5711 }
#define SizeOfHeapConfirm
Definition: heapam_xlog.h:307
#define XLOG_HEAP_CONFIRM
Definition: heapam_xlog.h:37
OffsetNumber offnum
Definition: heapam_xlog.h:304

References Assert(), BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), elog(), END_CRIT_SECTION, ERROR, HeapTupleHeaderIsSpeculative, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_confirm::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapConfirm, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, UnlockReleaseBuffer(), XLOG_HEAP_CONFIRM, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_complete_speculative().

◆ heap_freeze_execute_prepared()

void heap_freeze_execute_prepared ( Relation  rel,
Buffer  buffer,
TransactionId  snapshotConflictHorizon,
HeapTupleFreeze tuples,
int  ntuples 
)

Definition at line 6682 of file heapam.c.

6685 {
6686  Page page = BufferGetPage(buffer);
6687 
6688  Assert(ntuples > 0);
6689 
6690  /*
6691  * Perform xmin/xmax XID status sanity checks before critical section.
6692  *
6693  * heap_prepare_freeze_tuple doesn't perform these checks directly because
6694  * pg_xact lookups are relatively expensive. They shouldn't be repeated
6695  * by successive VACUUMs that each decide against freezing the same page.
6696  */
6697  for (int i = 0; i < ntuples; i++)
6698  {
6699  HeapTupleFreeze *frz = tuples + i;
6700  ItemId itemid = PageGetItemId(page, frz->offset);
6701  HeapTupleHeader htup;
6702 
6703  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6704 
6705  /* Deliberately avoid relying on tuple hint bits here */
6707  {
6709 
6711  if (unlikely(!TransactionIdDidCommit(xmin)))
6712  ereport(ERROR,
6714  errmsg_internal("uncommitted xmin %u needs to be frozen",
6715  xmin)));
6716  }
6717 
6718  /*
6719  * TransactionIdDidAbort won't work reliably in the presence of XIDs
6720  * left behind by transactions that were in progress during a crash,
6721  * so we can only check that xmax didn't commit
6722  */
6724  {
6726 
6728  if (unlikely(TransactionIdDidCommit(xmax)))
6729  ereport(ERROR,
6731  errmsg_internal("cannot freeze committed xmax %u",
6732  xmax)));
6733  }
6734  }
6735 
6737 
6738  for (int i = 0; i < ntuples; i++)
6739  {
6740  HeapTupleFreeze *frz = tuples + i;
6741  ItemId itemid = PageGetItemId(page, frz->offset);
6742  HeapTupleHeader htup;
6743 
6744  htup = (HeapTupleHeader) PageGetItem(page, itemid);
6745  heap_execute_freeze_tuple(htup, frz);
6746  }
6747 
6748  MarkBufferDirty(buffer);
6749 
6750  /* Now WAL-log freezing if necessary */
6751  if (RelationNeedsWAL(rel))
6752  {
6755  int nplans;
6756  xl_heap_freeze_page xlrec;
6757  XLogRecPtr recptr;
6758 
6759  /* Prepare deduplicated representation for use in WAL record */
6760  nplans = heap_log_freeze_plan(tuples, ntuples, plans, offsets);
6761 
6762  xlrec.snapshotConflictHorizon = snapshotConflictHorizon;
6764  xlrec.nplans = nplans;
6765 
6766  XLogBeginInsert();
6767  XLogRegisterData((char *) &xlrec, SizeOfHeapFreezePage);
6768 
6769  /*
6770  * The freeze plan array and offset array are not actually in the
6771  * buffer, but pretend that they are. When XLogInsert stores the
6772  * whole buffer, the arrays need not be stored too.
6773  */
6774  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
6775  XLogRegisterBufData(0, (char *) plans,
6776  nplans * sizeof(xl_heap_freeze_plan));
6777  XLogRegisterBufData(0, (char *) offsets,
6778  ntuples * sizeof(OffsetNumber));
6779 
6780  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE);
6781 
6782  PageSetLSN(page, recptr);
6783  }
6784 
6785  END_CRIT_SECTION();
6786 }
#define unlikely(x)
Definition: c.h:300
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1162
static int heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples, xl_heap_freeze_plan *plans_out, OffsetNumber *offsets_out)
Definition: heapam.c:6876
static void heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz)
Definition: heapam.c:6653
#define HEAP_FREEZE_CHECK_XMAX_ABORTED
Definition: heapam.h:108
#define HEAP_FREEZE_CHECK_XMIN_COMMITTED
Definition: heapam.h:107
#define SizeOfHeapFreezePage
Definition: heapam_xlog.h:357
#define XLOG_HEAP2_FREEZE_PAGE
Definition: heapam_xlog.h:56
#define HeapTupleHeaderGetRawXmin(tup)
Definition: htup_details.h:304
#define HeapTupleHeaderXminFrozen(tup)
Definition: htup_details.h:331
#define MaxHeapTuplesPerPage
Definition: htup_details.h:572
int i
Definition: isn.c:73
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:41
OffsetNumber offset
Definition: heapam.h:122
uint8 checkflags
Definition: heapam.h:120
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:347
bool TransactionIdDidCommit(TransactionId transactionId)
Definition: transam.c:126
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void XLogRegisterBufData(uint8 block_id, char *data, uint32 len)
Definition: xloginsert.c:406

References Assert(), BufferGetPage(), HeapTupleFreeze::checkflags, END_CRIT_SECTION, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, heap_execute_freeze_tuple(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, heap_log_freeze_plan(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetRawXmin, HeapTupleHeaderXminFrozen, i, xl_heap_freeze_page::isCatalogRel, MarkBufferDirty(), MaxHeapTuplesPerPage, xl_heap_freeze_page::nplans, HeapTupleFreeze::offset, PageGetItem(), PageGetItemId(), PageSetLSN(), REGBUF_STANDARD, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHeapFreezePage, xl_heap_freeze_page::snapshotConflictHorizon, START_CRIT_SECTION, TransactionIdDidCommit(), TransactionIdIsNormal, unlikely, XLOG_HEAP2_FREEZE_PAGE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by lazy_scan_prune().

◆ heap_freeze_tuple()

bool heap_freeze_tuple ( HeapTupleHeader  tuple,
TransactionId  relfrozenxid,
TransactionId  relminmxid,
TransactionId  FreezeLimit,
TransactionId  MultiXactCutoff 
)

Definition at line 6933 of file heapam.c.

6936 {
6937  HeapTupleFreeze frz;
6938  bool do_freeze;
6939  bool totally_frozen;
6940  struct VacuumCutoffs cutoffs;
6941  HeapPageFreeze pagefrz;
6942 
6943  cutoffs.relfrozenxid = relfrozenxid;
6944  cutoffs.relminmxid = relminmxid;
6945  cutoffs.OldestXmin = FreezeLimit;
6946  cutoffs.OldestMxact = MultiXactCutoff;
6947  cutoffs.FreezeLimit = FreezeLimit;
6948  cutoffs.MultiXactCutoff = MultiXactCutoff;
6949 
6950  pagefrz.freeze_required = true;
6951  pagefrz.FreezePageRelfrozenXid = FreezeLimit;
6952  pagefrz.FreezePageRelminMxid = MultiXactCutoff;
6953  pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
6954  pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
6955 
6956  do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
6957  &pagefrz, &frz, &totally_frozen);
6958 
6959  /*
6960  * Note that because this is not a WAL-logged operation, we don't need to
6961  * fill in the offset in the freeze record.
6962  */
6963 
6964  if (do_freeze)
6965  heap_execute_freeze_tuple(tuple, &frz);
6966  return do_freeze;
6967 }
bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen)
Definition: heapam.c:6379
TransactionId FreezeLimit
Definition: vacuum.h:275
TransactionId relfrozenxid
Definition: vacuum.h:249
MultiXactId relminmxid
Definition: vacuum.h:250
MultiXactId MultiXactCutoff
Definition: vacuum.h:276

References VacuumCutoffs::FreezeLimit, heap_execute_freeze_tuple(), heap_prepare_freeze_tuple(), VacuumCutoffs::MultiXactCutoff, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, and VacuumCutoffs::relminmxid.

Referenced by rewrite_heap_tuple().

◆ heap_get_latest_tid()

void heap_get_latest_tid ( TableScanDesc  sscan,
ItemPointer  tid 
)

Definition at line 1617 of file heapam.c.

1619 {
1620  Relation relation = sscan->rs_rd;
1621  Snapshot snapshot = sscan->rs_snapshot;
1622  ItemPointerData ctid;
1623  TransactionId priorXmax;
1624 
1625  /*
1626  * table_tuple_get_latest_tid() verified that the passed in tid is valid.
1627  * Assume that t_ctid links are valid however - there shouldn't be invalid
1628  * ones in the table.
1629  */
1630  Assert(ItemPointerIsValid(tid));
1631 
1632  /*
1633  * Loop to chase down t_ctid links. At top of loop, ctid is the tuple we
1634  * need to examine, and *tid is the TID we will return if ctid turns out
1635  * to be bogus.
1636  *
1637  * Note that we will loop until we reach the end of the t_ctid chain.
1638  * Depending on the snapshot passed, there might be at most one visible
1639  * version of the row, but we don't try to optimize for that.
1640  */
1641  ctid = *tid;
1642  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
1643  for (;;)
1644  {
1645  Buffer buffer;
1646  Page page;
1647  OffsetNumber offnum;
1648  ItemId lp;
1649  HeapTupleData tp;
1650  bool valid;
1651 
1652  /*
1653  * Read, pin, and lock the page.
1654  */
1655  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
1656  LockBuffer(buffer, BUFFER_LOCK_SHARE);
1657  page = BufferGetPage(buffer);
1658 
1659  /*
1660  * Check for bogus item number. This is not treated as an error
1661  * condition because it can happen while following a t_ctid link. We
1662  * just assume that the prior tid is OK and return it unchanged.
1663  */
1664  offnum = ItemPointerGetOffsetNumber(&ctid);
1665  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1666  {
1667  UnlockReleaseBuffer(buffer);
1668  break;
1669  }
1670  lp = PageGetItemId(page, offnum);
1671  if (!ItemIdIsNormal(lp))
1672  {
1673  UnlockReleaseBuffer(buffer);
1674  break;
1675  }
1676 
1677  /* OK to access the tuple */
1678  tp.t_self = ctid;
1679  tp.t_data = (HeapTupleHeader) PageGetItem(page, lp);
1680  tp.t_len = ItemIdGetLength(lp);
1681  tp.t_tableOid = RelationGetRelid(relation);
1682 
1683  /*
1684  * After following a t_ctid link, we might arrive at an unrelated
1685  * tuple. Check for XMIN match.
1686  */
1687  if (TransactionIdIsValid(priorXmax) &&
1689  {
1690  UnlockReleaseBuffer(buffer);
1691  break;
1692  }
1693 
1694  /*
1695  * Check tuple visibility; if visible, set it as the new result
1696  * candidate.
1697  */
1698  valid = HeapTupleSatisfiesVisibility(&tp, snapshot, buffer);
1699  HeapCheckForSerializableConflictOut(valid, relation, &tp, buffer, snapshot);
1700  if (valid)
1701  *tid = ctid;
1702 
1703  /*
1704  * If there's a valid t_ctid link, follow it, else we're done.
1705  */
1706  if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) ||
1710  {
1711  UnlockReleaseBuffer(buffer);
1712  break;
1713  }
1714 
1715  ctid = tp.t_data->t_ctid;
1716  priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data);
1717  UnlockReleaseBuffer(buffer);
1718  } /* end of loop */
1719 }
#define HeapTupleHeaderIndicatesMovedPartitions(tup)
Definition: htup_details.h:444

References Assert(), BUFFER_LOCK_SHARE, BufferGetPage(), HEAP_XMAX_INVALID, HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsOnlyLocked(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdIsNormal, ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerIsValid(), LockBuffer(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), ReadBuffer(), RelationGetRelid, TableScanDescData::rs_rd, TableScanDescData::rs_snapshot, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_get_root_tuples()

void heap_get_root_tuples ( Page  page,
OffsetNumber root_offsets 
)

Definition at line 1050 of file pruneheap.c.

1051 {
1052  OffsetNumber offnum,
1053  maxoff;
1054 
1055  MemSet(root_offsets, InvalidOffsetNumber,
1057 
1058  maxoff = PageGetMaxOffsetNumber(page);
1059  for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
1060  {
1061  ItemId lp = PageGetItemId(page, offnum);
1062  HeapTupleHeader htup;
1063  OffsetNumber nextoffnum;
1064  TransactionId priorXmax;
1065 
1066  /* skip unused and dead items */
1067  if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
1068  continue;
1069 
1070  if (ItemIdIsNormal(lp))
1071  {
1072  htup = (HeapTupleHeader) PageGetItem(page, lp);
1073 
1074  /*
1075  * Check if this tuple is part of a HOT-chain rooted at some other
1076  * tuple. If so, skip it for now; we'll process it when we find
1077  * its root.
1078  */
1079  if (HeapTupleHeaderIsHeapOnly(htup))
1080  continue;
1081 
1082  /*
1083  * This is either a plain tuple or the root of a HOT-chain.
1084  * Remember it in the mapping.
1085  */
1086  root_offsets[offnum - 1] = offnum;
1087 
1088  /* If it's not the start of a HOT-chain, we're done with it */
1089  if (!HeapTupleHeaderIsHotUpdated(htup))
1090  continue;
1091 
1092  /* Set up to scan the HOT-chain */
1093  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1094  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1095  }
1096  else
1097  {
1098  /* Must be a redirect item. We do not set its root_offsets entry */
1100  /* Set up to scan the HOT-chain */
1101  nextoffnum = ItemIdGetRedirect(lp);
1102  priorXmax = InvalidTransactionId;
1103  }
1104 
1105  /*
1106  * Now follow the HOT-chain and collect other tuples in the chain.
1107  *
1108  * Note: Even though this is a nested loop, the complexity of the
1109  * function is O(N) because a tuple in the page should be visited not
1110  * more than twice, once in the outer loop and once in HOT-chain
1111  * chases.
1112  */
1113  for (;;)
1114  {
1115  /* Sanity check (pure paranoia) */
1116  if (offnum < FirstOffsetNumber)
1117  break;
1118 
1119  /*
1120  * An offset past the end of page's line pointer array is possible
1121  * when the array was truncated
1122  */
1123  if (offnum > maxoff)
1124  break;
1125 
1126  lp = PageGetItemId(page, nextoffnum);
1127 
1128  /* Check for broken chains */
1129  if (!ItemIdIsNormal(lp))
1130  break;
1131 
1132  htup = (HeapTupleHeader) PageGetItem(page, lp);
1133 
1134  if (TransactionIdIsValid(priorXmax) &&
1135  !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup)))
1136  break;
1137 
1138  /* Remember the root line pointer for this item */
1139  root_offsets[nextoffnum - 1] = offnum;
1140 
1141  /* Advance to next chain member, if any */
1142  if (!HeapTupleHeaderIsHotUpdated(htup))
1143  break;
1144 
1145  /* HOT implies it can't have moved to different partition */
1147 
1148  nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
1149  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
1150  }
1151  }
1152 }
#define MemSet(start, val, len)
Definition: c.h:1009
#define HeapTupleHeaderIsHotUpdated(tup)
Definition: htup_details.h:482
#define ItemIdGetRedirect(itemId)
Definition: itemid.h:78
#define ItemIdIsDead(itemId)
Definition: itemid.h:113
#define ItemIdIsUsed(itemId)
Definition: itemid.h:92
#define ItemIdIsRedirected(itemId)
Definition: itemid.h:106
#define InvalidOffsetNumber
Definition: off.h:26
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define FirstOffsetNumber
Definition: off.h:27

References Assert(), FirstOffsetNumber, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIndicatesMovedPartitions, HeapTupleHeaderIsHeapOnly, HeapTupleHeaderIsHotUpdated, InvalidOffsetNumber, InvalidTransactionId, ItemIdGetRedirect, ItemIdIsDead, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemPointerGetOffsetNumber(), MaxHeapTuplesPerPage, MemSet, OffsetNumberNext, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), HeapTupleHeaderData::t_ctid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heapam_index_build_range_scan(), and heapam_index_validate_scan().

◆ heap_getnext()

HeapTuple heap_getnext ( TableScanDesc  sscan,
ScanDirection  direction 
)

Definition at line 1086 of file heapam.c.

1087 {
1088  HeapScanDesc scan = (HeapScanDesc) sscan;
1089 
1090  /*
1091  * This is still widely used directly, without going through table AM, so
1092  * add a safety check. It's possible we should, at a later point,
1093  * downgrade this to an assert. The reason for checking the AM routine,
1094  * rather than the AM oid, is that this allows to write regression tests
1095  * that create another AM reusing the heap handler.
1096  */
1098  ereport(ERROR,
1099  (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1100  errmsg_internal("only heap AM is supported")));
1101 
1102  /*
1103  * We don't expect direct calls to heap_getnext with valid CheckXidAlive
1104  * for catalog or regular tables. See detailed comments in xact.c where
1105  * these variables are declared. Normally we have such a check at tableam
1106  * level API but this is called from many places so we need to ensure it
1107  * here.
1108  */
1110  elog(ERROR, "unexpected heap_getnext call during logical decoding");
1111 
1112  /* Note: no locking manipulations needed */
1113 
1114  if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)
1115  heapgettup_pagemode(scan, direction,
1116  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1117  else
1118  heapgettup(scan, direction,
1119  scan->rs_base.rs_nkeys, scan->rs_base.rs_key);
1120 
1121  if (scan->rs_ctup.t_data == NULL)
1122  return NULL;
1123 
1124  /*
1125  * if we get here it means we have a new current scan tuple, so point to
1126  * the proper return buffer and return the tuple.
1127  */
1128 
1130 
1131  return &scan->rs_ctup;
1132 }
static void heapgettup(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:724
static void heapgettup_pagemode(HeapScanDesc scan, ScanDirection dir, int nkeys, ScanKey key)
Definition: heapam.c:839
const TableAmRoutine * GetHeapamTableAmRoutine(void)
#define pgstat_count_heap_getnext(rel)
Definition: pgstat.h:615
const struct TableAmRoutine * rd_tableam
Definition: rel.h:188
bool bsysscan
Definition: xact.c:100
TransactionId CheckXidAlive
Definition: xact.c:99

References bsysscan, CheckXidAlive, elog(), ereport, errcode(), errmsg_internal(), ERROR, GetHeapamTableAmRoutine(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, RelationData::rd_tableam, HeapScanDescData::rs_base, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, TransactionIdIsValid, and unlikely.

Referenced by AlterTableMoveAll(), AlterTableSpaceOptions(), check_db_file_conflict(), CreateDatabaseUsingFileCopy(), do_autovacuum(), DropSetting(), DropTableSpace(), find_typed_table_dependencies(), get_all_vacuum_rels(), get_database_list(), get_subscription_list(), get_tables_to_cluster(), get_tablespace_name(), get_tablespace_oid(), GetAllTablesPublicationRelations(), getRelationsInNamespace(), GetSchemaPublicationRelations(), heapam_index_build_range_scan(), heapam_index_validate_scan(), index_update_stats(), objectsInSchemaToOids(), pgrowlocks(), pgstat_heap(), populate_typ_list(), ReindexMultipleTables(), remove_dbtablespaces(), RemoveSubscriptionRel(), RenameTableSpace(), ThereIsAtLeastOneRole(), and vac_truncate_clog().

◆ heap_getnextslot()

bool heap_getnextslot ( TableScanDesc  sscan,
ScanDirection  direction,
struct TupleTableSlot slot 
)

Definition at line 1135 of file heapam.c.

1136 {
1137  HeapScanDesc scan = (HeapScanDesc) sscan;
1138 
1139  /* Note: no locking manipulations needed */
1140 
1141  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1142  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1143  else
1144  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1145 
1146  if (scan->rs_ctup.t_data == NULL)
1147  {
1148  ExecClearTuple(slot);
1149  return false;
1150  }
1151 
1152  /*
1153  * if we get here it means we have a new current scan tuple, so point to
1154  * the proper return buffer and return the tuple.
1155  */
1156 
1158 
1159  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot,
1160  scan->rs_cbuf);
1161  return true;
1162 }
TupleTableSlot * ExecStoreBufferHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer)
Definition: execTuples.c:1391
static TupleTableSlot * ExecClearTuple(TupleTableSlot *slot)
Definition: tuptable.h:433

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, SO_ALLOW_PAGEMODE, and HeapTupleData::t_data.

◆ heap_getnextslot_tidrange()

bool heap_getnextslot_tidrange ( TableScanDesc  sscan,
ScanDirection  direction,
TupleTableSlot slot 
)

Definition at line 1238 of file heapam.c.

1240 {
1241  HeapScanDesc scan = (HeapScanDesc) sscan;
1242  ItemPointer mintid = &sscan->rs_mintid;
1243  ItemPointer maxtid = &sscan->rs_maxtid;
1244 
1245  /* Note: no locking manipulations needed */
1246  for (;;)
1247  {
1248  if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
1249  heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1250  else
1251  heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
1252 
1253  if (scan->rs_ctup.t_data == NULL)
1254  {
1255  ExecClearTuple(slot);
1256  return false;
1257  }
1258 
1259  /*
1260  * heap_set_tidrange will have used heap_setscanlimits to limit the
1261  * range of pages we scan to only ones that can contain the TID range
1262  * we're scanning for. Here we must filter out any tuples from these
1263  * pages that are outside of that range.
1264  */
1265  if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
1266  {
1267  ExecClearTuple(slot);
1268 
1269  /*
1270  * When scanning backwards, the TIDs will be in descending order.
1271  * Future tuples in this direction will be lower still, so we can
1272  * just return false to indicate there will be no more tuples.
1273  */
1274  if (ScanDirectionIsBackward(direction))
1275  return false;
1276 
1277  continue;
1278  }
1279 
1280  /*
1281  * Likewise for the final page, we must filter out TIDs greater than
1282  * maxtid.
1283  */
1284  if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
1285  {
1286  ExecClearTuple(slot);
1287 
1288  /*
1289  * When scanning forward, the TIDs will be in ascending order.
1290  * Future tuples in this direction will be higher still, so we can
1291  * just return false to indicate there will be no more tuples.
1292  */
1293  if (ScanDirectionIsForward(direction))
1294  return false;
1295  continue;
1296  }
1297 
1298  break;
1299  }
1300 
1301  /*
1302  * if we get here it means we have a new current scan tuple, so point to
1303  * the proper return buffer and return the tuple.
1304  */
1306 
1307  ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
1308  return true;
1309 }
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
#define ScanDirectionIsForward(direction)
Definition: sdir.h:64
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ItemPointerData rs_mintid
Definition: relscan.h:40
ItemPointerData rs_maxtid
Definition: relscan.h:41

References ExecClearTuple(), ExecStoreBufferHeapTuple(), heapgettup(), heapgettup_pagemode(), ItemPointerCompare(), pgstat_count_heap_getnext, HeapScanDescData::rs_base, HeapScanDescData::rs_cbuf, HeapScanDescData::rs_ctup, TableScanDescData::rs_flags, TableScanDescData::rs_key, TableScanDescData::rs_maxtid, TableScanDescData::rs_mintid, TableScanDescData::rs_nkeys, TableScanDescData::rs_rd, ScanDirectionIsBackward, ScanDirectionIsForward, SO_ALLOW_PAGEMODE, HeapTupleData::t_data, and HeapTupleData::t_self.

◆ heap_hot_search_buffer()

bool heap_hot_search_buffer ( ItemPointer  tid,
Relation  relation,
Buffer  buffer,
Snapshot  snapshot,
HeapTuple  heapTuple,
bool all_dead,
bool  first_call 
)

Definition at line 1465 of file heapam.c.

1468 {
1469  Page page = BufferGetPage(buffer);
1470  TransactionId prev_xmax = InvalidTransactionId;
1471  BlockNumber blkno;
1472  OffsetNumber offnum;
1473  bool at_chain_start;
1474  bool valid;
1475  bool skip;
1476  GlobalVisState *vistest = NULL;
1477 
1478  /* If this is not the first call, previous call returned a (live!) tuple */
1479  if (all_dead)
1480  *all_dead = first_call;
1481 
1482  blkno = ItemPointerGetBlockNumber(tid);
1483  offnum = ItemPointerGetOffsetNumber(tid);
1484  at_chain_start = first_call;
1485  skip = !first_call;
1486 
1487  /* XXX: we should assert that a snapshot is pushed or registered */
1489  Assert(BufferGetBlockNumber(buffer) == blkno);
1490 
1491  /* Scan through possible multiple members of HOT-chain */
1492  for (;;)
1493  {
1494  ItemId lp;
1495 
1496  /* check for bogus TID */
1497  if (offnum < FirstOffsetNumber || offnum > PageGetMaxOffsetNumber(page))
1498  break;
1499 
1500  lp = PageGetItemId(page, offnum);
1501 
1502  /* check for unused, dead, or redirected items */
1503  if (!ItemIdIsNormal(lp))
1504  {
1505  /* We should only see a redirect at start of chain */
1506  if (ItemIdIsRedirected(lp) && at_chain_start)
1507  {
1508  /* Follow the redirect */
1509  offnum = ItemIdGetRedirect(lp);
1510  at_chain_start = false;
1511  continue;
1512  }
1513  /* else must be end of chain */
1514  break;
1515  }
1516 
1517  /*
1518  * Update heapTuple to point to the element of the HOT chain we're
1519  * currently investigating. Having t_self set correctly is important
1520  * because the SSI checks and the *Satisfies routine for historical
1521  * MVCC snapshots need the correct tid to decide about the visibility.
1522  */
1523  heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
1524  heapTuple->t_len = ItemIdGetLength(lp);
1525  heapTuple->t_tableOid = RelationGetRelid(relation);
1526  ItemPointerSet(&heapTuple->t_self, blkno, offnum);
1527 
1528  /*
1529  * Shouldn't see a HEAP_ONLY tuple at chain start.
1530  */
1531  if (at_chain_start && HeapTupleIsHeapOnly(heapTuple))
1532  break;
1533 
1534  /*
1535  * The xmin should match the previous xmax value, else chain is
1536  * broken.
1537  */
1538  if (TransactionIdIsValid(prev_xmax) &&
1539  !TransactionIdEquals(prev_xmax,
1540  HeapTupleHeaderGetXmin(heapTuple->t_data)))
1541  break;
1542 
1543  /*
1544  * When first_call is true (and thus, skip is initially false) we'll
1545  * return the first tuple we find. But on later passes, heapTuple
1546  * will initially be pointing to the tuple we returned last time.
1547  * Returning it again would be incorrect (and would loop forever), so
1548  * we skip it and return the next match we find.
1549  */
1550  if (!skip)
1551  {
1552  /* If it's visible per the snapshot, we must return it */
1553  valid = HeapTupleSatisfiesVisibility(heapTuple, snapshot, buffer);
1554  HeapCheckForSerializableConflictOut(valid, relation, heapTuple,
1555  buffer, snapshot);
1556 
1557  if (valid)
1558  {
1559  ItemPointerSetOffsetNumber(tid, offnum);
1560  PredicateLockTID(relation, &heapTuple->t_self, snapshot,
1561  HeapTupleHeaderGetXmin(heapTuple->t_data));
1562  if (all_dead)
1563  *all_dead = false;
1564  return true;
1565  }
1566  }
1567  skip = false;
1568 
1569  /*
1570  * If we can't see it, maybe no one else can either. At caller
1571  * request, check whether all chain members are dead to all
1572  * transactions.
1573  *
1574  * Note: if you change the criterion here for what is "dead", fix the
1575  * planner's get_actual_variable_range() function to match.
1576  */
1577  if (all_dead && *all_dead)
1578  {
1579  if (!vistest)
1580  vistest = GlobalVisTestFor(relation);
1581 
1582  if (!HeapTupleIsSurelyDead(heapTuple, vistest))
1583  *all_dead = false;
1584  }
1585 
1586  /*
1587  * Check to see if HOT chain continues past this tuple; if so fetch
1588  * the next offnum and loop around.
1589  */
1590  if (HeapTupleIsHotUpdated(heapTuple))
1591  {
1593  blkno);
1594  offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid);
1595  at_chain_start = false;
1596  prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
1597  }
1598  else
1599  break; /* end of chain */
1600  }
1601 
1602  return false;
1603 }
bool HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest)
#define HeapTupleIsHeapOnly(tuple)
Definition: htup_details.h:683
#define HeapTupleIsHotUpdated(tuple)
Definition: htup_details.h:674
static void ItemPointerSet(ItemPointerData *pointer, BlockNumber blockNumber, OffsetNumber offNum)
Definition: itemptr.h:135
static void ItemPointerSetOffsetNumber(ItemPointerData *pointer, OffsetNumber offsetNumber)
Definition: itemptr.h:158
static const struct exclude_list_item skip[]
Definition: pg_checksums.c:108
GlobalVisState * GlobalVisTestFor(Relation rel)
Definition: procarray.c:4018
TransactionId RecentXmin
Definition: snapmgr.c:106

References Assert(), BufferGetBlockNumber(), BufferGetPage(), GlobalVisTestFor(), HeapCheckForSerializableConflictOut(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleIsHeapOnly, HeapTupleIsHotUpdated, HeapTupleIsSurelyDead(), HeapTupleSatisfiesVisibility(), InvalidTransactionId, ItemIdGetLength, ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), ItemPointerSet(), ItemPointerSetOffsetNumber(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PredicateLockTID(), RecentXmin, RelationGetRelid, skip, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TransactionIdEquals, and TransactionIdIsValid.

Referenced by heap_index_delete_tuples(), heapam_index_fetch_tuple(), and heapam_scan_bitmap_next_block().

◆ heap_index_delete_tuples()

TransactionId heap_index_delete_tuples ( Relation  rel,
TM_IndexDeleteOp delstate 
)

Definition at line 7647 of file heapam.c.

7648 {
7649  /* Initial assumption is that earlier pruning took care of conflict */
7650  TransactionId snapshotConflictHorizon = InvalidTransactionId;
7653  Page page = NULL;
7655  TransactionId priorXmax;
7656 #ifdef USE_PREFETCH
7657  IndexDeletePrefetchState prefetch_state;
7658  int prefetch_distance;
7659 #endif
7660  SnapshotData SnapshotNonVacuumable;
7661  int finalndeltids = 0,
7662  nblocksaccessed = 0;
7663 
7664  /* State that's only used in bottom-up index deletion case */
7665  int nblocksfavorable = 0;
7666  int curtargetfreespace = delstate->bottomupfreespace,
7667  lastfreespace = 0,
7668  actualfreespace = 0;
7669  bool bottomup_final_block = false;
7670 
7671  InitNonVacuumableSnapshot(SnapshotNonVacuumable, GlobalVisTestFor(rel));
7672 
7673  /* Sort caller's deltids array by TID for further processing */
7674  index_delete_sort(delstate);
7675 
7676  /*
7677  * Bottom-up case: resort deltids array in an order attuned to where the
7678  * greatest number of promising TIDs are to be found, and determine how
7679  * many blocks from the start of sorted array should be considered
7680  * favorable. This will also shrink the deltids array in order to
7681  * eliminate completely unfavorable blocks up front.
7682  */
7683  if (delstate->bottomup)
7684  nblocksfavorable = bottomup_sort_and_shrink(delstate);
7685 
7686 #ifdef USE_PREFETCH
7687  /* Initialize prefetch state. */
7688  prefetch_state.cur_hblkno = InvalidBlockNumber;
7689  prefetch_state.next_item = 0;
7690  prefetch_state.ndeltids = delstate->ndeltids;
7691  prefetch_state.deltids = delstate->deltids;
7692 
7693  /*
7694  * Determine the prefetch distance that we will attempt to maintain.
7695  *
7696  * Since the caller holds a buffer lock somewhere in rel, we'd better make
7697  * sure that isn't a catalog relation before we call code that does
7698  * syscache lookups, to avoid risk of deadlock.
7699  */
7700  if (IsCatalogRelation(rel))
7701  prefetch_distance = maintenance_io_concurrency;
7702  else
7703  prefetch_distance =
7705 
7706  /* Cap initial prefetch distance for bottom-up deletion caller */
7707  if (delstate->bottomup)
7708  {
7709  Assert(nblocksfavorable >= 1);
7710  Assert(nblocksfavorable <= BOTTOMUP_MAX_NBLOCKS);
7711  prefetch_distance = Min(prefetch_distance, nblocksfavorable);
7712  }
7713 
7714  /* Start prefetching. */
7715  index_delete_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
7716 #endif
7717 
7718  /* Iterate over deltids, determine which to delete, check their horizon */
7719  Assert(delstate->ndeltids > 0);
7720  for (int i = 0; i < delstate->ndeltids; i++)
7721  {
7722  TM_IndexDelete *ideltid = &delstate->deltids[i];
7723  TM_IndexStatus *istatus = delstate->status + ideltid->id;
7724  ItemPointer htid = &ideltid->tid;
7725  OffsetNumber offnum;
7726 
7727  /*
7728  * Read buffer, and perform required extra steps each time a new block
7729  * is encountered. Avoid refetching if it's the same block as the one
7730  * from the last htid.
7731  */
7732  if (blkno == InvalidBlockNumber ||
7733  ItemPointerGetBlockNumber(htid) != blkno)
7734  {
7735  /*
7736  * Consider giving up early for bottom-up index deletion caller
7737  * first. (Only prefetch next-next block afterwards, when it
7738  * becomes clear that we're at least going to access the next
7739  * block in line.)
7740  *
7741  * Sometimes the first block frees so much space for bottom-up
7742  * caller that the deletion process can end without accessing any
7743  * more blocks. It is usually necessary to access 2 or 3 blocks
7744  * per bottom-up deletion operation, though.
7745  */
7746  if (delstate->bottomup)
7747  {
7748  /*
7749  * We often allow caller to delete a few additional items
7750  * whose entries we reached after the point that space target
7751  * from caller was satisfied. The cost of accessing the page
7752  * was already paid at that point, so it made sense to finish
7753  * it off. When that happened, we finalize everything here
7754  * (by finishing off the whole bottom-up deletion operation
7755  * without needlessly paying the cost of accessing any more
7756  * blocks).
7757  */
7758  if (bottomup_final_block)
7759  break;
7760 
7761  /*
7762  * Give up when we didn't enable our caller to free any
7763  * additional space as a result of processing the page that we
7764  * just finished up with. This rule is the main way in which
7765  * we keep the cost of bottom-up deletion under control.
7766  */
7767  if (nblocksaccessed >= 1 && actualfreespace == lastfreespace)
7768  break;
7769  lastfreespace = actualfreespace; /* for next time */
7770 
7771  /*
7772  * Deletion operation (which is bottom-up) will definitely
7773  * access the next block in line. Prepare for that now.
7774  *
7775  * Decay target free space so that we don't hang on for too
7776  * long with a marginal case. (Space target is only truly
7777  * helpful when it allows us to recognize that we don't need
7778  * to access more than 1 or 2 blocks to satisfy caller due to
7779  * agreeable workload characteristics.)
7780  *
7781  * We are a bit more patient when we encounter contiguous
7782  * blocks, though: these are treated as favorable blocks. The
7783  * decay process is only applied when the next block in line
7784  * is not a favorable/contiguous block. This is not an
7785  * exception to the general rule; we still insist on finding
7786  * at least one deletable item per block accessed. See
7787  * bottomup_nblocksfavorable() for full details of the theory
7788  * behind favorable blocks and heap block locality in general.
7789  *
7790  * Note: The first block in line is always treated as a
7791  * favorable block, so the earliest possible point that the
7792  * decay can be applied is just before we access the second
7793  * block in line. The Assert() verifies this for us.
7794  */
7795  Assert(nblocksaccessed > 0 || nblocksfavorable > 0);
7796  if (nblocksfavorable > 0)
7797  nblocksfavorable--;
7798  else
7799  curtargetfreespace /= 2;
7800  }
7801 
7802  /* release old buffer */
7803  if (BufferIsValid(buf))
7805 
7806  blkno = ItemPointerGetBlockNumber(htid);
7807  buf = ReadBuffer(rel, blkno);
7808  nblocksaccessed++;
7809  Assert(!delstate->bottomup ||
7810  nblocksaccessed <= BOTTOMUP_MAX_NBLOCKS);
7811 
7812 #ifdef USE_PREFETCH
7813 
7814  /*
7815  * To maintain the prefetch distance, prefetch one more page for
7816  * each page we read.
7817  */
7818  index_delete_prefetch_buffer(rel, &prefetch_state, 1);
7819 #endif
7820 
7822 
7823  page = BufferGetPage(buf);
7824  maxoff = PageGetMaxOffsetNumber(page);
7825  }
7826 
7827  /*
7828  * In passing, detect index corruption involving an index page with a
7829  * TID that points to a location in the heap that couldn't possibly be
7830  * correct. We only do this with actual TIDs from caller's index page
7831  * (not items reached by traversing through a HOT chain).
7832  */
7833  index_delete_check_htid(delstate, page, maxoff, htid, istatus);
7834 
7835  if (istatus->knowndeletable)
7836  Assert(!delstate->bottomup && !istatus->promising);
7837  else
7838  {
7839  ItemPointerData tmp = *htid;
7840  HeapTupleData heapTuple;
7841 
7842  /* Are any tuples from this HOT chain non-vacuumable? */
7843  if (heap_hot_search_buffer(&tmp, rel, buf, &SnapshotNonVacuumable,
7844  &heapTuple, NULL, true))
7845  continue; /* can't delete entry */
7846 
7847  /* Caller will delete, since whole HOT chain is vacuumable */
7848  istatus->knowndeletable = true;
7849 
7850  /* Maintain index free space info for bottom-up deletion case */
7851  if (delstate->bottomup)
7852  {
7853  Assert(istatus->freespace > 0);
7854  actualfreespace += istatus->freespace;
7855  if (actualfreespace >= curtargetfreespace)
7856  bottomup_final_block = true;
7857  }
7858  }
7859 
7860  /*
7861  * Maintain snapshotConflictHorizon value for deletion operation as a
7862  * whole by advancing current value using heap tuple headers. This is
7863  * loosely based on the logic for pruning a HOT chain.
7864  */
7865  offnum = ItemPointerGetOffsetNumber(htid);
7866  priorXmax = InvalidTransactionId; /* cannot check first XMIN */
7867  for (;;)
7868  {
7869  ItemId lp;
7870  HeapTupleHeader htup;
7871 
7872  /* Sanity check (pure paranoia) */
7873  if (offnum < FirstOffsetNumber)
7874  break;
7875 
7876  /*
7877  * An offset past the end of page's line pointer array is possible
7878  * when the array was truncated
7879  */
7880  if (offnum > maxoff)
7881  break;
7882 
7883  lp = PageGetItemId(page, offnum);
7884  if (ItemIdIsRedirected(lp))
7885  {
7886  offnum = ItemIdGetRedirect(lp);
7887  continue;
7888  }
7889 
7890  /*
7891  * We'll often encounter LP_DEAD line pointers (especially with an
7892  * entry marked knowndeletable by our caller up front). No heap
7893  * tuple headers get examined for an htid that leads us to an
7894  * LP_DEAD item. This is okay because the earlier pruning
7895  * operation that made the line pointer LP_DEAD in the first place
7896  * must have considered the original tuple header as part of
7897  * generating its own snapshotConflictHorizon value.
7898  *
7899  * Relying on XLOG_HEAP2_PRUNE records like this is the same
7900  * strategy that index vacuuming uses in all cases. Index VACUUM
7901  * WAL records don't even have a snapshotConflictHorizon field of
7902  * their own for this reason.
7903  */
7904  if (!ItemIdIsNormal(lp))
7905  break;
7906 
7907  htup = (HeapTupleHeader) PageGetItem(page, lp);
7908 
7909  /*
7910  * Check the tuple XMIN against prior XMAX, if any
7911  */
7912  if (TransactionIdIsValid(priorXmax) &&
7913  !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax))
7914  break;
7915 
7917  &snapshotConflictHorizon);
7918 
7919  /*
7920  * If the tuple is not HOT-updated, then we are at the end of this
7921  * HOT-chain. No need to visit later tuples from the same update
7922  * chain (they get their own index entries) -- just move on to
7923  * next htid from index AM caller.
7924  */
7925  if (!HeapTupleHeaderIsHotUpdated(htup))
7926  break;
7927 
7928  /* Advance to next HOT chain member */
7929  Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno);
7930  offnum = ItemPointerGetOffsetNumber(&htup->t_ctid);
7931  priorXmax = HeapTupleHeaderGetUpdateXid(htup);
7932  }
7933 
7934  /* Enable further/final shrinking of deltids for caller */
7935  finalndeltids = i + 1;
7936  }
7937 
7939 
7940  /*
7941  * Shrink deltids array to exclude non-deletable entries at the end. This
7942  * is not just a minor optimization. Final deltids array size might be
7943  * zero for a bottom-up caller. Index AM is explicitly allowed to rely on
7944  * ndeltids being zero in all cases with zero total deletable entries.
7945  */
7946  Assert(finalndeltids > 0 || delstate->bottomup);
7947  delstate->ndeltids = finalndeltids;
7948 
7949  return snapshotConflictHorizon;
7950 }
int maintenance_io_concurrency
Definition: bufmgr.c:154
#define Min(x, y)
Definition: c.h:993
bool IsCatalogRelation(Relation relation)
Definition: catalog.c:105
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
Definition: heapam.c:8205
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7502
static void index_delete_check_htid(TM_IndexDeleteOp *delstate, Page page, OffsetNumber maxoff, ItemPointer htid, TM_IndexStatus *istatus)
Definition: heapam.c:7587
#define BOTTOMUP_MAX_NBLOCKS
Definition: heapam.c:191
bool heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call)
Definition: heapam.c:1465
static void index_delete_sort(TM_IndexDeleteOp *delstate)
Definition: heapam.c:7992
static char * buf
Definition: pg_test_fsync.c:73
#define InitNonVacuumableSnapshot(snapshotdata, vistestp)
Definition: snapmgr.h:48
int get_tablespace_maintenance_io_concurrency(Oid spcid)
Definition: spccache.c:229
TM_IndexStatus * status
Definition: tableam.h:247
int bottomupfreespace
Definition: tableam.h:242
TM_IndexDelete * deltids
Definition: tableam.h:246
ItemPointerData tid
Definition: tableam.h:205
bool knowndeletable
Definition: tableam.h:212
bool promising
Definition: tableam.h:215
int16 freespace
Definition: tableam.h:216

References Assert(), TM_IndexDeleteOp::bottomup, BOTTOMUP_MAX_NBLOCKS, bottomup_sort_and_shrink(), TM_IndexDeleteOp::bottomupfreespace, buf, BUFFER_LOCK_SHARE, BufferGetPage(), BufferIsValid(), TM_IndexDeleteOp::deltids, FirstOffsetNumber, TM_IndexStatus::freespace, get_tablespace_maintenance_io_concurrency(), GlobalVisTestFor(), heap_hot_search_buffer(), HeapTupleHeaderAdvanceConflictHorizon(), HeapTupleHeaderGetUpdateXid, HeapTupleHeaderGetXmin, HeapTupleHeaderIsHotUpdated, i, TM_IndexDelete::id, index_delete_check_htid(), index_delete_sort(), InitNonVacuumableSnapshot, InvalidBlockNumber, InvalidBuffer, InvalidOffsetNumber, InvalidTransactionId, IsCatalogRelation(), ItemIdGetRedirect, ItemIdIsNormal, ItemIdIsRedirected, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), TM_IndexStatus::knowndeletable, LockBuffer(), maintenance_io_concurrency, Min, TM_IndexDeleteOp::ndeltids, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), TM_IndexStatus::promising, RelationData::rd_rel, ReadBuffer(), TM_IndexDeleteOp::status, HeapTupleHeaderData::t_ctid, TM_IndexDelete::tid, TransactionIdEquals, TransactionIdIsValid, and UnlockReleaseBuffer().

◆ heap_inplace_update()

void heap_inplace_update ( Relation  relation,
HeapTuple  tuple 
)

Definition at line 5893 of file heapam.c.

5894 {
5895  Buffer buffer;
5896  Page page;
5897  OffsetNumber offnum;
5898  ItemId lp = NULL;
5899  HeapTupleHeader htup;
5900  uint32 oldlen;
5901  uint32 newlen;
5902 
5903  /*
5904  * For now, we don't allow parallel updates. Unlike a regular update,
5905  * this should never create a combo CID, so it might be possible to relax
5906  * this restriction, but not without more thought and testing. It's not
5907  * clear that it would be useful, anyway.
5908  */
5909  if (IsInParallelMode())
5910  ereport(ERROR,
5911  (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
5912  errmsg("cannot update tuples during a parallel operation")));
5913 
5914  buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self)));
5916  page = (Page) BufferGetPage(buffer);
5917 
5918  offnum = ItemPointerGetOffsetNumber(&(tuple->t_self));
5919  if (PageGetMaxOffsetNumber(page) >= offnum)
5920  lp = PageGetItemId(page, offnum);
5921 
5922  if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
5923  elog(ERROR, "invalid lp");
5924 
5925  htup = (HeapTupleHeader) PageGetItem(page, lp);
5926 
5927  oldlen = ItemIdGetLength(lp) - htup->t_hoff;
5928  newlen = tuple->t_len - tuple->t_data->t_hoff;
5929  if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
5930  elog(ERROR, "wrong tuple length");
5931 
5932  /* NO EREPORT(ERROR) from here till changes are logged */
5934 
5935  memcpy((char *) htup + htup->t_hoff,
5936  (char *) tuple->t_data + tuple->t_data->t_hoff,
5937  newlen);
5938 
5939  MarkBufferDirty(buffer);
5940 
5941  /* XLOG stuff */
5942  if (RelationNeedsWAL(relation))
5943  {
5944  xl_heap_inplace xlrec;
5945  XLogRecPtr recptr;
5946 
5947  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
5948 
5949  XLogBeginInsert();
5950  XLogRegisterData((char *) &xlrec, SizeOfHeapInplace);
5951 
5952  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
5953  XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen);
5954 
5955  /* inplace updates aren't decoded atm, don't log the origin */
5956 
5957  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE);
5958 
5959  PageSetLSN(page, recptr);
5960  }
5961 
5962  END_CRIT_SECTION();
5963 
5964  UnlockReleaseBuffer(buffer);
5965 
5966  /*
5967  * Send out shared cache inval if necessary. Note that because we only
5968  * pass the new version of the tuple, this mustn't be used for any
5969  * operations that could change catcache lookup keys. But we aren't
5970  * bothering with index updates either, so that's true a fortiori.
5971  */
5973  CacheInvalidateHeapTuple(relation, tuple, NULL);
5974 }
unsigned int uint32
Definition: c.h:495
#define SizeOfHeapInplace
Definition: heapam_xlog.h:316
#define XLOG_HEAP_INPLACE
Definition: heapam_xlog.h:39
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:416
OffsetNumber offnum
Definition: heapam_xlog.h:312

References BUFFER_LOCK_EXCLUSIVE, BufferGetPage(), CacheInvalidateHeapTuple(), elog(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, IsBootstrapProcessingMode, IsInParallelMode(), ItemIdGetLength, ItemIdIsNormal, ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), MarkBufferDirty(), xl_heap_inplace::offnum, PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageSetLSN(), ReadBuffer(), REGBUF_STANDARD, RelationNeedsWAL, SizeOfHeapInplace, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleHeaderData::t_hoff, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), XLOG_HEAP_INPLACE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by create_toast_table(), dropdb(), EventTriggerOnLogin(), index_update_stats(), vac_update_datfrozenxid(), and vac_update_relstats().

◆ heap_insert()

void heap_insert ( Relation  relation,
HeapTuple  tup,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 1828 of file heapam.c.

1830 {
1832  HeapTuple heaptup;
1833  Buffer buffer;
1834  Buffer vmbuffer = InvalidBuffer;
1835  bool all_visible_cleared = false;
1836 
1837  /* Cheap, simplistic check that the tuple matches the rel's rowtype. */
1839  RelationGetNumberOfAttributes(relation));
1840 
1841  /*
1842  * Fill in tuple header fields and toast the tuple if necessary.
1843  *
1844  * Note: below this point, heaptup is the data we actually intend to store
1845  * into the relation; tup is the caller's original untoasted data.
1846  */
1847  heaptup = heap_prepare_insert(relation, tup, xid, cid, options);
1848 
1849  /*
1850  * Find buffer to insert this tuple into. If the page is all visible,
1851  * this will also pin the requisite visibility map page.
1852  */
1853  buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
1854  InvalidBuffer, options, bistate,
1855  &vmbuffer, NULL,
1856  0);
1857 
1858  /*
1859  * We're about to do the actual insert -- but check for conflict first, to
1860  * avoid possibly having to roll back work we've just done.
1861  *
1862  * This is safe without a recheck as long as there is no possibility of
1863  * another process scanning the page between this check and the insert
1864  * being visible to the scan (i.e., an exclusive buffer content lock is
1865  * continuously held from this point until the tuple insert is visible).
1866  *
1867  * For a heap insert, we only need to check for table-level SSI locks. Our
1868  * new tuple can't possibly conflict with existing tuple locks, and heap
1869  * page locks are only consolidated versions of tuple locks; they do not
1870  * lock "gaps" as index page locks do. So we don't need to specify a
1871  * buffer when making the call, which makes for a faster check.
1872  */
1874 
1875  /* NO EREPORT(ERROR) from here till changes are logged */
1877 
1878  RelationPutHeapTuple(relation, buffer, heaptup,
1879  (options & HEAP_INSERT_SPECULATIVE) != 0);
1880 
1881  if (PageIsAllVisible(BufferGetPage(buffer)))
1882  {
1883  all_visible_cleared = true;
1885  visibilitymap_clear(relation,
1886  ItemPointerGetBlockNumber(&(heaptup->t_self)),
1887  vmbuffer, VISIBILITYMAP_VALID_BITS);
1888  }
1889 
1890  /*
1891  * XXX Should we set PageSetPrunable on this page ?
1892  *
1893  * The inserting transaction may eventually abort thus making this tuple
1894  * DEAD and hence available for pruning. Though we don't want to optimize
1895  * for aborts, if no other tuple in this page is UPDATEd/DELETEd, the
1896  * aborted tuple will never be pruned until next vacuum is triggered.
1897  *
1898  * If you do add PageSetPrunable here, add it in heap_xlog_insert too.
1899  */
1900 
1901  MarkBufferDirty(buffer);
1902 
1903  /* XLOG stuff */
1904  if (RelationNeedsWAL(relation))
1905  {
1906  xl_heap_insert xlrec;
1907  xl_heap_header xlhdr;
1908  XLogRecPtr recptr;
1909  Page page = BufferGetPage(buffer);
1910  uint8 info = XLOG_HEAP_INSERT;
1911  int bufflags = 0;
1912 
1913  /*
1914  * If this is a catalog, we need to transmit combo CIDs to properly
1915  * decode, so log that as well.
1916  */
1918  log_heap_new_cid(relation, heaptup);
1919 
1920  /*
1921  * If this is the single and first tuple on page, we can reinit the
1922  * page instead of restoring the whole thing. Set flag, and hide
1923  * buffer references from XLogInsert.
1924  */
1925  if (ItemPointerGetOffsetNumber(&(heaptup->t_self)) == FirstOffsetNumber &&
1927  {
1928  info |= XLOG_HEAP_INIT_PAGE;
1929  bufflags |= REGBUF_WILL_INIT;
1930  }
1931 
1932  xlrec.offnum = ItemPointerGetOffsetNumber(&heaptup->t_self);
1933  xlrec.flags = 0;
1934  if (all_visible_cleared)
1939 
1940  /*
1941  * For logical decoding, we need the tuple even if we're doing a full
1942  * page write, so make sure it's included even if we take a full-page
1943  * image. (XXX We could alternatively store a pointer into the FPW).
1944  */
1945  if (RelationIsLogicallyLogged(relation) &&
1947  {
1949  bufflags |= REGBUF_KEEP_DATA;
1950 
1951  if (IsToastRelation(relation))
1953  }
1954 
1955  XLogBeginInsert();
1956  XLogRegisterData((char *) &xlrec, SizeOfHeapInsert);
1957 
1958  xlhdr.t_infomask2 = heaptup->t_data->t_infomask2;
1959  xlhdr.t_infomask = heaptup->t_data->t_infomask;
1960  xlhdr.t_hoff = heaptup->t_data->t_hoff;
1961 
1962  /*
1963  * note we mark xlhdr as belonging to buffer; if XLogInsert decides to
1964  * write the whole page to the xlog, we don't need to store
1965  * xl_heap_header in the xlog.
1966  */
1967  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
1968  XLogRegisterBufData(0, (char *) &xlhdr, SizeOfHeapHeader);
1969  /* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
1971  (char *) heaptup->t_data + SizeofHeapTupleHeader,
1972  heaptup->t_len - SizeofHeapTupleHeader);
1973 
1974  /* filtering by origin on a row level is much more efficient */
1976 
1977  recptr = XLogInsert(RM_HEAP_ID, info);
1978 
1979  PageSetLSN(page, recptr);
1980  }
1981 
1982  END_CRIT_SECTION();
1983 
1984  UnlockReleaseBuffer(buffer);
1985  if (vmbuffer != InvalidBuffer)
1986  ReleaseBuffer(vmbuffer);
1987 
1988  /*
1989  * If tuple is cachable, mark it for invalidation from the caches in case
1990  * we abort. Note it is OK to do this after releasing the buffer, because
1991  * the heaptup data structure is all in local memory, not in the shared
1992  * buffer.
1993  */
1994  CacheInvalidateHeapTuple(relation, heaptup, NULL);
1995 
1996  /* Note: speculative insertions are counted too, even if aborted later */
1997  pgstat_count_heap_insert(relation, 1);
1998 
1999  /*
2000  * If heaptup is a private copy, release it. Don't forget to copy t_self
2001  * back to the caller's image, too.
2002  */
2003  if (heaptup != tup)
2004  {
2005  tup->t_self = heaptup->t_self;
2006  heap_freetuple(heaptup);
2007  }
2008 }
unsigned char uint8
Definition: c.h:493
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options)
Definition: heapam.c:2017
#define HEAP_INSERT_SPECULATIVE
Definition: heapam.h:37
#define HEAP_INSERT_NO_LOGICAL
Definition: heapam.h:36
#define XLH_INSERT_ON_TOAST_RELATION
Definition: heapam_xlog.h:70
#define XLH_INSERT_IS_SPECULATIVE
Definition: heapam_xlog.h:68
#define XLH_INSERT_ALL_VISIBLE_CLEARED
Definition: heapam_xlog.h:66
#define XLOG_HEAP_INSERT
Definition: heapam_xlog.h:32
#define SizeOfHeapInsert
Definition: heapam_xlog.h:162
#define XLH_INSERT_CONTAINS_NEW_TUPLE
Definition: heapam_xlog.h:69
#define XLOG_HEAP_INIT_PAGE
Definition: heapam_xlog.h:46
void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, bool token)
Definition: hio.c:36
Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other, int num_pages)
Definition: hio.c:503
#define HeapTupleHeaderGetNatts(tup)
Definition: htup_details.h:529
void pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
#define RelationIsLogicallyLogged(relation)
Definition: rel.h:700
#define RelationGetNumberOfAttributes(relation)
Definition: rel.h:510
OffsetNumber offnum
Definition: heapam_xlog.h:156
#define REGBUF_KEEP_DATA
Definition: xloginsert.h:35
#define REGBUF_WILL_INIT
Definition: xloginsert.h:33

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CheckForSerializableConflictIn(), END_CRIT_SECTION, FirstOffsetNumber, xl_heap_insert::flags, GetCurrentTransactionId(), heap_freetuple(), HEAP_INSERT_NO_LOGICAL, HEAP_INSERT_SPECULATIVE, heap_prepare_insert(), HeapTupleHeaderGetNatts, InvalidBlockNumber, InvalidBuffer, IsToastRelation(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), xl_heap_insert::offnum, PageClearAllVisible(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetLSN(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetNumberOfAttributes, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SizeOfHeapHeader, SizeOfHeapInsert, SizeofHeapTupleHeader, START_CRIT_SECTION, HeapTupleData::t_data, xl_heap_header::t_hoff, HeapTupleHeaderData::t_hoff, xl_heap_header::t_infomask, HeapTupleHeaderData::t_infomask, xl_heap_header::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, UnlockReleaseBuffer(), visibilitymap_clear(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_IS_SPECULATIVE, XLH_INSERT_ON_TOAST_RELATION, XLOG_HEAP_INIT_PAGE, XLOG_HEAP_INSERT, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by heapam_tuple_insert(), heapam_tuple_insert_speculative(), simple_heap_insert(), and toast_save_datum().

◆ heap_lock_tuple()

TM_Result heap_lock_tuple ( Relation  relation,
HeapTuple  tuple,
CommandId  cid,
LockTupleMode  mode,
LockWaitPolicy  wait_policy,
bool  follow_updates,
Buffer buffer,
struct TM_FailureData tmfd 
)

Definition at line 4144 of file heapam.c.

4148 {
4149  TM_Result result;
4150  ItemPointer tid = &(tuple->t_self);
4151  ItemId lp;
4152  Page page;
4153  Buffer vmbuffer = InvalidBuffer;
4154  BlockNumber block;
4155  TransactionId xid,
4156  xmax;
4157  uint16 old_infomask,
4158  new_infomask,
4159  new_infomask2;
4160  bool first_time = true;
4161  bool skip_tuple_lock = false;
4162  bool have_tuple_lock = false;
4163  bool cleared_all_frozen = false;
4164 
4165  *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
4166  block = ItemPointerGetBlockNumber(tid);
4167 
4168  /*
4169  * Before locking the buffer, pin the visibility map page if it appears to
4170  * be necessary. Since we haven't got the lock yet, someone else might be
4171  * in the middle of changing this, so we'll need to recheck after we have
4172  * the lock.
4173  */
4174  if (PageIsAllVisible(BufferGetPage(*buffer)))
4175  visibilitymap_pin(relation, block, &vmbuffer);
4176 
4178 
4179  page = BufferGetPage(*buffer);
4180  lp = PageGetItemId(page, ItemPointerGetOffsetNumber(tid));
4181  Assert(ItemIdIsNormal(lp));
4182 
4183  tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
4184  tuple->t_len = ItemIdGetLength(lp);
4185  tuple->t_tableOid = RelationGetRelid(relation);
4186 
4187 l3:
4188  result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer);
4189 
4190  if (result == TM_Invisible)
4191  {
4192  /*
4193  * This is possible, but only when locking a tuple for ON CONFLICT
4194  * UPDATE. We return this value here rather than throwing an error in
4195  * order to give that case the opportunity to throw a more specific
4196  * error.
4197  */
4198  result = TM_Invisible;
4199  goto out_locked;
4200  }
4201  else if (result == TM_BeingModified ||
4202  result == TM_Updated ||
4203  result == TM_Deleted)
4204  {
4205  TransactionId xwait;
4206  uint16 infomask;
4207  uint16 infomask2;
4208  bool require_sleep;
4209  ItemPointerData t_ctid;
4210 
4211  /* must copy state data before unlocking buffer */
4212  xwait = HeapTupleHeaderGetRawXmax(tuple->t_data);
4213  infomask = tuple->t_data->t_infomask;
4214  infomask2 = tuple->t_data->t_infomask2;
4215  ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid);
4216 
4217  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4218 
4219  /*
4220  * If any subtransaction of the current top transaction already holds
4221  * a lock as strong as or stronger than what we're requesting, we
4222  * effectively hold the desired lock already. We *must* succeed
4223  * without trying to take the tuple lock, else we will deadlock
4224  * against anyone wanting to acquire a stronger lock.
4225  *
4226  * Note we only do this the first time we loop on the HTSU result;
4227  * there is no point in testing in subsequent passes, because
4228  * evidently our own transaction cannot have acquired a new lock after
4229  * the first time we checked.
4230  */
4231  if (first_time)
4232  {
4233  first_time = false;
4234 
4235  if (infomask & HEAP_XMAX_IS_MULTI)
4236  {
4237  int i;
4238  int nmembers;
4239  MultiXactMember *members;
4240 
4241  /*
4242  * We don't need to allow old multixacts here; if that had
4243  * been the case, HeapTupleSatisfiesUpdate would have returned
4244  * MayBeUpdated and we wouldn't be here.
4245  */
4246  nmembers =
4247  GetMultiXactIdMembers(xwait, &members, false,
4248  HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4249 
4250  for (i = 0; i < nmembers; i++)
4251  {
4252  /* only consider members of our own transaction */
4253  if (!TransactionIdIsCurrentTransactionId(members[i].xid))
4254  continue;
4255 
4256  if (TUPLOCK_from_mxstatus(members[i].status) >= mode)
4257  {
4258  pfree(members);
4259  result = TM_Ok;
4260  goto out_unlocked;
4261  }
4262  else
4263  {
4264  /*
4265  * Disable acquisition of the heavyweight tuple lock.
4266  * Otherwise, when promoting a weaker lock, we might
4267  * deadlock with another locker that has acquired the
4268  * heavyweight tuple lock and is waiting for our
4269  * transaction to finish.
4270  *
4271  * Note that in this case we still need to wait for
4272  * the multixact if required, to avoid acquiring
4273  * conflicting locks.
4274  */
4275  skip_tuple_lock = true;
4276  }
4277  }
4278 
4279  if (members)
4280  pfree(members);
4281  }
4282  else if (TransactionIdIsCurrentTransactionId(xwait))
4283  {
4284  switch (mode)
4285  {
4286  case LockTupleKeyShare:
4287  Assert(HEAP_XMAX_IS_KEYSHR_LOCKED(infomask) ||
4288  HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4289  HEAP_XMAX_IS_EXCL_LOCKED(infomask));
4290  result = TM_Ok;
4291  goto out_unlocked;
4292  case LockTupleShare:
4293  if (HEAP_XMAX_IS_SHR_LOCKED(infomask) ||
4294  HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4295  {
4296  result = TM_Ok;
4297  goto out_unlocked;
4298  }
4299  break;
4301  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4302  {
4303  result = TM_Ok;
4304  goto out_unlocked;
4305  }
4306  break;
4307  case LockTupleExclusive:
4308  if (HEAP_XMAX_IS_EXCL_LOCKED(infomask) &&
4309  infomask2 & HEAP_KEYS_UPDATED)
4310  {
4311  result = TM_Ok;
4312  goto out_unlocked;
4313  }
4314  break;
4315  }
4316  }
4317  }
4318 
4319  /*
4320  * Initially assume that we will have to wait for the locking
4321  * transaction(s) to finish. We check various cases below in which
4322  * this can be turned off.
4323  */
4324  require_sleep = true;
4325  if (mode == LockTupleKeyShare)
4326  {
4327  /*
4328  * If we're requesting KeyShare, and there's no update present, we
4329  * don't need to wait. Even if there is an update, we can still
4330  * continue if the key hasn't been modified.
4331  *
4332  * However, if there are updates, we need to walk the update chain
4333  * to mark future versions of the row as locked, too. That way,
4334  * if somebody deletes that future version, we're protected
4335  * against the key going away. This locking of future versions
4336  * could block momentarily, if a concurrent transaction is
4337  * deleting a key; or it could return a value to the effect that
4338  * the transaction deleting the key has already committed. So we
4339  * do this before re-locking the buffer; otherwise this would be
4340  * prone to deadlocks.
4341  *
4342  * Note that the TID we're locking was grabbed before we unlocked
4343  * the buffer. For it to change while we're not looking, the
4344  * other properties we're testing for below after re-locking the
4345  * buffer would also change, in which case we would restart this
4346  * loop above.
4347  */
4348  if (!(infomask2 & HEAP_KEYS_UPDATED))
4349  {
4350  bool updated;
4351 
4352  updated = !HEAP_XMAX_IS_LOCKED_ONLY(infomask);
4353 
4354  /*
4355  * If there are updates, follow the update chain; bail out if
4356  * that cannot be done.
4357  */
4358  if (follow_updates && updated)
4359  {
4360  TM_Result res;
4361 
4362  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4364  mode);
4365  if (res != TM_Ok)
4366  {
4367  result = res;
4368  /* recovery code expects to have buffer lock held */
4370  goto failed;
4371  }
4372  }
4373 
4375 
4376  /*
4377  * Make sure it's still an appropriate lock, else start over.
4378  * Also, if it wasn't updated before we released the lock, but
4379  * is updated now, we start over too; the reason is that we
4380  * now need to follow the update chain to lock the new
4381  * versions.
4382  */
4383  if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) &&
4384  ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) ||
4385  !updated))
4386  goto l3;
4387 
4388  /* Things look okay, so we can skip sleeping */
4389  require_sleep = false;
4390 
4391  /*
4392  * Note we allow Xmax to change here; other updaters/lockers
4393  * could have modified it before we grabbed the buffer lock.
4394  * However, this is not a problem, because with the recheck we
4395  * just did we ensure that they still don't conflict with the
4396  * lock we want.
4397  */
4398  }
4399  }
4400  else if (mode == LockTupleShare)
4401  {
4402  /*
4403  * If we're requesting Share, we can similarly avoid sleeping if
4404  * there's no update and no exclusive lock present.
4405  */
4406  if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
4407  !HEAP_XMAX_IS_EXCL_LOCKED(infomask))
4408  {
4410 
4411  /*
4412  * Make sure it's still an appropriate lock, else start over.
4413  * See above about allowing xmax to change.
4414  */
4415  if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) ||
4417  goto l3;
4418  require_sleep = false;
4419  }
4420  }
4421  else if (mode == LockTupleNoKeyExclusive)
4422  {
4423  /*
4424  * If we're requesting NoKeyExclusive, we might also be able to
4425  * avoid sleeping; just ensure that there no conflicting lock
4426  * already acquired.
4427  */
4428  if (infomask & HEAP_XMAX_IS_MULTI)
4429  {
4430  if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4431  mode, NULL))
4432  {
4433  /*
4434  * No conflict, but if the xmax changed under us in the
4435  * meantime, start over.
4436  */
4438  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4440  xwait))
4441  goto l3;
4442 
4443  /* otherwise, we're good */
4444  require_sleep = false;
4445  }
4446  }
4447  else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
4448  {
4450 
4451  /* if the xmax changed in the meantime, start over */
4452  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4454  xwait))
4455  goto l3;
4456  /* otherwise, we're good */
4457  require_sleep = false;
4458  }
4459  }
4460 
4461  /*
4462  * As a check independent from those above, we can also avoid sleeping
4463  * if the current transaction is the sole locker of the tuple. Note
4464  * that the strength of the lock already held is irrelevant; this is
4465  * not about recording the lock in Xmax (which will be done regardless
4466  * of this optimization, below). Also, note that the cases where we
4467  * hold a lock stronger than we are requesting are already handled
4468  * above by not doing anything.
4469  *
4470  * Note we only deal with the non-multixact case here; MultiXactIdWait
4471  * is well equipped to deal with this situation on its own.
4472  */
4473  if (require_sleep && !(infomask & HEAP_XMAX_IS_MULTI) &&
4475  {
4476  /* ... but if the xmax changed in the meantime, start over */
4478  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4480  xwait))
4481  goto l3;
4483  require_sleep = false;
4484  }
4485 
4486  /*
4487  * Time to sleep on the other transaction/multixact, if necessary.
4488  *
4489  * If the other transaction is an update/delete that's already
4490  * committed, then sleeping cannot possibly do any good: if we're
4491  * required to sleep, get out to raise an error instead.
4492  *
4493  * By here, we either have already acquired the buffer exclusive lock,
4494  * or we must wait for the locking transaction or multixact; so below
4495  * we ensure that we grab buffer lock after the sleep.
4496  */
4497  if (require_sleep && (result == TM_Updated || result == TM_Deleted))
4498  {
4500  goto failed;
4501  }
4502  else if (require_sleep)
4503  {
4504  /*
4505  * Acquire tuple lock to establish our priority for the tuple, or
4506  * die trying. LockTuple will release us when we are next-in-line
4507  * for the tuple. We must do this even if we are share-locking,
4508  * but not if we already have a weaker lock on the tuple.
4509  *
4510  * If we are forced to "start over" below, we keep the tuple lock;
4511  * this arranges that we stay at the head of the line while
4512  * rechecking tuple state.
4513  */
4514  if (!skip_tuple_lock &&
4515  !heap_acquire_tuplock(relation, tid, mode, wait_policy,
4516  &have_tuple_lock))
4517  {
4518  /*
4519  * This can only happen if wait_policy is Skip and the lock
4520  * couldn't be obtained.
4521  */
4522  result = TM_WouldBlock;
4523  /* recovery code expects to have buffer lock held */
4525  goto failed;
4526  }
4527 
4528  if (infomask & HEAP_XMAX_IS_MULTI)
4529  {
4531 
4532  /* We only ever lock tuples, never update them */
4533  if (status >= MultiXactStatusNoKeyUpdate)
4534  elog(ERROR, "invalid lock mode in heap_lock_tuple");
4535 
4536  /* wait for multixact to end, or die trying */
4537  switch (wait_policy)
4538  {
4539  case LockWaitBlock:
4540  MultiXactIdWait((MultiXactId) xwait, status, infomask,
4541  relation, &tuple->t_self, XLTW_Lock, NULL);
4542  break;
4543  case LockWaitSkip:
4545  status, infomask, relation,
4546  NULL))
4547  {
4548  result = TM_WouldBlock;
4549  /* recovery code expects to have buffer lock held */
4551  goto failed;
4552  }
4553  break;
4554  case LockWaitError:
4556  status, infomask, relation,
4557  NULL))
4558  ereport(ERROR,
4559  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4560  errmsg("could not obtain lock on row in relation \"%s\"",
4561  RelationGetRelationName(relation))));
4562 
4563  break;
4564  }
4565 
4566  /*
4567  * Of course, the multixact might not be done here: if we're
4568  * requesting a light lock mode, other transactions with light
4569  * locks could still be alive, as well as locks owned by our
4570  * own xact or other subxacts of this backend. We need to
4571  * preserve the surviving MultiXact members. Note that it
4572  * isn't absolutely necessary in the latter case, but doing so
4573  * is simpler.
4574  */
4575  }
4576  else
4577  {
4578  /* wait for regular transaction to end, or die trying */
4579  switch (wait_policy)
4580  {
4581  case LockWaitBlock:
4582  XactLockTableWait(xwait, relation, &tuple->t_self,
4583  XLTW_Lock);
4584  break;
4585  case LockWaitSkip:
4586  if (!ConditionalXactLockTableWait(xwait))
4587  {
4588  result = TM_WouldBlock;
4589  /* recovery code expects to have buffer lock held */
4591  goto failed;
4592  }
4593  break;
4594  case LockWaitError:
4595  if (!ConditionalXactLockTableWait(xwait))
4596  ereport(ERROR,
4597  (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4598  errmsg("could not obtain lock on row in relation \"%s\"",
4599  RelationGetRelationName(relation))));
4600  break;
4601  }
4602  }
4603 
4604  /* if there are updates, follow the update chain */
4605  if (follow_updates && !HEAP_XMAX_IS_LOCKED_ONLY(infomask))
4606  {
4607  TM_Result res;
4608 
4609  res = heap_lock_updated_tuple(relation, tuple, &t_ctid,
4611  mode);
4612  if (res != TM_Ok)
4613  {
4614  result = res;
4615  /* recovery code expects to have buffer lock held */
4617  goto failed;
4618  }
4619  }
4620 
4622 
4623  /*
4624  * xwait is done, but if xwait had just locked the tuple then some
4625  * other xact could update this tuple before we get to this point.
4626  * Check for xmax change, and start over if so.
4627  */
4628  if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4630  xwait))
4631  goto l3;
4632 
4633  if (!(infomask & HEAP_XMAX_IS_MULTI))
4634  {
4635  /*
4636  * Otherwise check if it committed or aborted. Note we cannot
4637  * be here if the tuple was only locked by somebody who didn't
4638  * conflict with us; that would have been handled above. So
4639  * that transaction must necessarily be gone by now. But
4640  * don't check for this in the multixact case, because some
4641  * locker transactions might still be running.
4642  */
4643  UpdateXmaxHintBits(tuple->t_data, *buffer, xwait);
4644  }
4645  }
4646 
4647  /* By here, we're certain that we hold buffer exclusive lock again */
4648 
4649  /*
4650  * We may lock if previous xmax aborted, or if it committed but only
4651  * locked the tuple without updating it; or if we didn't have to wait
4652  * at all for whatever reason.
4653  */
4654  if (!require_sleep ||
4655  (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) ||
4658  result = TM_Ok;
4659  else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid))
4660  result = TM_Updated;
4661  else
4662  result = TM_Deleted;
4663  }
4664 
4665 failed:
4666  if (result != TM_Ok)
4667  {
4668  Assert(result == TM_SelfModified || result == TM_Updated ||
4669  result == TM_Deleted || result == TM_WouldBlock);
4670 
4671  /*
4672  * When locking a tuple under LockWaitSkip semantics and we fail with
4673  * TM_WouldBlock above, it's possible for concurrent transactions to
4674  * release the lock and set HEAP_XMAX_INVALID in the meantime. So
4675  * this assert is slightly different from the equivalent one in
4676  * heap_delete and heap_update.
4677  */
4678  Assert((result == TM_WouldBlock) ||
4679  !(tuple->t_data->t_infomask & HEAP_XMAX_INVALID));
4680  Assert(result != TM_Updated ||
4681  !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid));
4682  tmfd->ctid = tuple->t_data->t_ctid;
4683  tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data);
4684  if (result == TM_SelfModified)
4685  tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data);
4686  else
4687  tmfd->cmax = InvalidCommandId;
4688  goto out_locked;
4689  }
4690 
4691  /*
4692  * If we didn't pin the visibility map page and the page has become all
4693  * visible while we were busy locking the buffer, or during some
4694  * subsequent window during which we had it unlocked, we'll have to unlock
4695  * and re-lock, to avoid holding the buffer lock across I/O. That's a bit
4696  * unfortunate, especially since we'll now have to recheck whether the
4697  * tuple has been locked or updated under us, but hopefully it won't
4698  * happen very often.
4699  */
4700  if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
4701  {
4702  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4703  visibilitymap_pin(relation, block, &vmbuffer);
4705  goto l3;
4706  }
4707 
4708  xmax = HeapTupleHeaderGetRawXmax(tuple->t_data);
4709  old_infomask = tuple->t_data->t_infomask;
4710 
4711  /*
4712  * If this is the first possibly-multixact-able operation in the current
4713  * transaction, set my per-backend OldestMemberMXactId setting. We can be
4714  * certain that the transaction will never become a member of any older
4715  * MultiXactIds than that. (We have to do this even if we end up just
4716  * using our own TransactionId below, since some other backend could
4717  * incorporate our XID into a MultiXact immediately afterwards.)
4718  */
4720 
4721  /*
4722  * Compute the new xmax and infomask to store into the tuple. Note we do
4723  * not modify the tuple just yet, because that would leave it in the wrong
4724  * state if multixact.c elogs.
4725  */
4726  compute_new_xmax_infomask(xmax, old_infomask, tuple->t_data->t_infomask2,
4727  GetCurrentTransactionId(), mode, false,
4728  &xid, &new_infomask, &new_infomask2);
4729 
4731 
4732  /*
4733  * Store transaction information of xact locking the tuple.
4734  *
4735  * Note: Cmax is meaningless in this context, so don't set it; this avoids
4736  * possibly generating a useless combo CID. Moreover, if we're locking a
4737  * previously updated tuple, it's important to preserve the Cmax.
4738  *
4739  * Also reset the HOT UPDATE bit, but only if there's no update; otherwise
4740  * we would break the HOT chain.
4741  */
4742  tuple->t_data->t_infomask &= ~HEAP_XMAX_BITS;
4743  tuple->t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED;
4744  tuple->t_data->t_infomask |= new_infomask;
4745  tuple->t_data->t_infomask2 |= new_infomask2;
4746  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4748  HeapTupleHeaderSetXmax(tuple->t_data, xid);
4749 
4750  /*
4751  * Make sure there is no forward chain link in t_ctid. Note that in the
4752  * cases where the tuple has been updated, we must not overwrite t_ctid,
4753  * because it was set by the updater. Moreover, if the tuple has been
4754  * updated, we need to follow the update chain to lock the new versions of
4755  * the tuple as well.
4756  */
4757  if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask))
4758  tuple->t_data->t_ctid = *tid;
4759 
4760  /* Clear only the all-frozen bit on visibility map if needed */
4761  if (PageIsAllVisible(page) &&
4762  visibilitymap_clear(relation, block, vmbuffer,
4764  cleared_all_frozen = true;
4765 
4766 
4767  MarkBufferDirty(*buffer);
4768 
4769  /*
4770  * XLOG stuff. You might think that we don't need an XLOG record because
4771  * there is no state change worth restoring after a crash. You would be
4772  * wrong however: we have just written either a TransactionId or a
4773  * MultiXactId that may never have been seen on disk before, and we need
4774  * to make sure that there are XLOG entries covering those ID numbers.
4775  * Else the same IDs might be re-used after a crash, which would be
4776  * disastrous if this page made it to disk before the crash. Essentially
4777  * we have to enforce the WAL log-before-data rule even in this case.
4778  * (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
4779  * entries for everything anyway.)
4780  */
4781  if (RelationNeedsWAL(relation))
4782  {
4783  xl_heap_lock xlrec;
4784  XLogRecPtr recptr;
4785 
4786  XLogBeginInsert();
4787  XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD);
4788 
4789  xlrec.offnum = ItemPointerGetOffsetNumber(&tuple->t_self);
4790  xlrec.xmax = xid;
4791  xlrec.infobits_set = compute_infobits(new_infomask,
4792  tuple->t_data->t_infomask2);
4793  xlrec.flags = cleared_all_frozen ? XLH_LOCK_ALL_FROZEN_CLEARED : 0;
4794  XLogRegisterData((char *) &xlrec, SizeOfHeapLock);
4795 
4796  /* we don't decode row locks atm, so no need to log the origin */
4797 
4798  recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_LOCK);
4799 
4800  PageSetLSN(page, recptr);
4801  }
4802 
4803  END_CRIT_SECTION();
4804 
4805  result = TM_Ok;
4806 
4807 out_locked:
4808  LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
4809 
4810 out_unlocked:
4811  if (BufferIsValid(vmbuffer))
4812  ReleaseBuffer(vmbuffer);
4813 
4814  /*
4815  * Don't update the visibility map here. Locking a tuple doesn't change
4816  * visibility info.
4817  */
4818 
4819  /*
4820  * Now that we have successfully marked the tuple as locked, we can
4821  * release the lmgr tuple lock, if we had it.
4822  */
4823  if (have_tuple_lock)
4824  UnlockTupleTuplock(relation, tid, mode);
4825 
4826  return result;
4827 }
#define TUPLOCK_from_mxstatus(status)
Definition: heapam.c:220
static TM_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode)
Definition: heapam.c:5608
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, Relation rel, int *remaining)
Definition: heapam.c:7325
static MultiXactStatus get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
Definition: heapam.c:4096
#define XLH_LOCK_ALL_FROZEN_CLEARED
Definition: heapam_xlog.h:277
#define XLOG_HEAP_LOCK
Definition: heapam_xlog.h:38
#define SizeOfHeapLock
Definition: heapam_xlog.h:288
#define HEAP_XMAX_IS_EXCL_LOCKED(infomask)
Definition: htup_details.h:261
#define HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)
Definition: htup_details.h:263
#define HEAP_XMAX_IS_SHR_LOCKED(infomask)
Definition: htup_details.h:259
static void ItemPointerCopy(const ItemPointerData *fromPointer, ItemPointerData *toPointer)
Definition: itemptr.h:172
bool ConditionalXactLockTableWait(TransactionId xid)
Definition: lmgr.c:741
@ XLTW_Lock
Definition: lmgr.h:29
@ LockWaitSkip
Definition: lockoptions.h:41
@ LockWaitError
Definition: lockoptions.h:43
@ LockTupleNoKeyExclusive
Definition: lockoptions.h:56
@ LockTupleShare
Definition: lockoptions.h:54
@ LockTupleKeyShare
Definition: lockoptions.h:52
int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool isLockOnly)
Definition: multixact.c:1225
MultiXactStatus
Definition: multixact.h:42
@ MultiXactStatusNoKeyUpdate
Definition: multixact.h:48
static PgChecksumMode mode
Definition: pg_checksums.c:56
#define RelationGetRelationName(relation)
Definition: rel.h:538
uint8 infobits_set
Definition: heapam_xlog.h:284
OffsetNumber offnum
Definition: heapam_xlog.h:283
TransactionId xmax
Definition: heapam_xlog.h:282
@ TM_WouldBlock
Definition: tableam.h:102
#define VISIBILITYMAP_ALL_FROZEN

References Assert(), BUFFER_LOCK_EXCLUSIVE, BUFFER_LOCK_UNLOCK, BufferGetPage(), BufferIsValid(), TM_FailureData::cmax, compute_infobits(), compute_new_xmax_infomask(), ConditionalMultiXactIdWait(), ConditionalXactLockTableWait(), TM_FailureData::ctid, DoesMultiXactIdConflict(), elog(), END_CRIT_SECTION, ereport, errcode(), errmsg(), ERROR, xl_heap_lock::flags, get_mxact_status_for_lock(), GetCurrentTransactionId(), GetMultiXactIdMembers(), heap_acquire_tuplock(), HEAP_KEYS_UPDATED, heap_lock_updated_tuple(), HEAP_XMAX_BITS, HEAP_XMAX_INVALID, HEAP_XMAX_IS_EXCL_LOCKED, HEAP_XMAX_IS_KEYSHR_LOCKED, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMAX_IS_SHR_LOCKED, HeapTupleHeaderClearHotUpdated, HeapTupleHeaderGetCmax(), HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetUpdateXid, HeapTupleHeaderIsOnlyLocked(), HeapTupleHeaderSetXmax, HeapTupleSatisfiesUpdate(), i, xl_heap_lock::infobits_set, InvalidBuffer, InvalidCommandId, ItemIdGetLength, ItemIdIsNormal, ItemPointerCopy(), ItemPointerEquals(), ItemPointerGetBlockNumber(), ItemPointerGetOffsetNumber(), LockBuffer(), LockTupleExclusive, LockTupleKeyShare, LockTupleNoKeyExclusive, LockTupleShare, LockWaitBlock, LockWaitError, LockWaitSkip, MarkBufferDirty(), mode, MultiXactIdSetOldestMember(), MultiXactIdWait(), MultiXactStatusNoKeyUpdate, xl_heap_lock::offnum, PageGetItem(), PageGetItemId(), PageIsAllVisible(), PageSetLSN(), pfree(), ReadBuffer(), REGBUF_STANDARD, RelationGetRelationName, RelationGetRelid, RelationNeedsWAL, ReleaseBuffer(), res, SizeOfHeapLock, START_CRIT_SECTION, HeapTupleHeaderData::t_ctid, HeapTupleData::t_data, HeapTupleHeaderData::t_infomask, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TM_BeingModified, TM_Deleted, TM_Invisible, TM_Ok, TM_SelfModified, TM_Updated, TM_WouldBlock, TransactionIdEquals, TransactionIdIsCurrentTransactionId(), TUPLOCK_from_mxstatus, UnlockTupleTuplock, UpdateXmaxHintBits(), VISIBILITYMAP_ALL_FROZEN, visibilitymap_clear(), visibilitymap_pin(), XactLockTableWait(), XLH_LOCK_ALL_FROZEN_CLEARED, XLOG_HEAP_LOCK, XLogBeginInsert(), XLogInsert(), XLogRegisterBuffer(), XLogRegisterData(), XLTW_Lock, xl_heap_lock::xmax, TM_FailureData::xmax, and xmax_infomask_changed().

Referenced by heapam_tuple_lock().

◆ heap_multi_insert()

void heap_multi_insert ( Relation  relation,
struct TupleTableSlot **  slots,
int  ntuples,
CommandId  cid,
int  options,
BulkInsertState  bistate 
)

Definition at line 2097 of file heapam.c.

2099 {
2101  HeapTuple *heaptuples;
2102  int i;
2103  int ndone;
2104  PGAlignedBlock scratch;
2105  Page page;
2106  Buffer vmbuffer = InvalidBuffer;
2107  bool needwal;
2108  Size saveFreeSpace;
2109  bool need_tuple_data = RelationIsLogicallyLogged(relation);
2110  bool need_cids = RelationIsAccessibleInLogicalDecoding(relation);
2111  bool starting_with_empty_page = false;
2112  int npages = 0;
2113  int npages_used = 0;
2114 
2115  /* currently not needed (thus unsupported) for heap_multi_insert() */
2117 
2118  needwal = RelationNeedsWAL(relation);
2119  saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
2121 
2122  /* Toast and set header data in all the slots */
2123  heaptuples = palloc(ntuples * sizeof(HeapTuple));
2124  for (i = 0; i < ntuples; i++)
2125  {
2126  HeapTuple tuple;
2127 
2128  tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL);
2129  slots[i]->tts_tableOid = RelationGetRelid(relation);
2130  tuple->t_tableOid = slots[i]->tts_tableOid;
2131  heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid,
2132  options);
2133  }
2134 
2135  /*
2136  * We're about to do the actual inserts -- but check for conflict first,
2137  * to minimize the possibility of having to roll back work we've just
2138  * done.
2139  *
2140  * A check here does not definitively prevent a serialization anomaly;
2141  * that check MUST be done at least past the point of acquiring an
2142  * exclusive buffer content lock on every buffer that will be affected,
2143  * and MAY be done after all inserts are reflected in the buffers and
2144  * those locks are released; otherwise there is a race condition. Since
2145  * multiple buffers can be locked and unlocked in the loop below, and it
2146  * would not be feasible to identify and lock all of those buffers before
2147  * the loop, we must do a final check at the end.
2148  *
2149  * The check here could be omitted with no loss of correctness; it is
2150  * present strictly as an optimization.
2151  *
2152  * For heap inserts, we only need to check for table-level SSI locks. Our
2153  * new tuples can't possibly conflict with existing tuple locks, and heap
2154  * page locks are only consolidated versions of tuple locks; they do not
2155  * lock "gaps" as index page locks do. So we don't need to specify a
2156  * buffer when making the call, which makes for a faster check.
2157  */
2159 
2160  ndone = 0;
2161  while (ndone < ntuples)
2162  {
2163  Buffer buffer;
2164  bool all_visible_cleared = false;
2165  bool all_frozen_set = false;
2166  int nthispage;
2167 
2169 
2170  /*
2171  * Compute number of pages needed to fit the to-be-inserted tuples in
2172  * the worst case. This will be used to determine how much to extend
2173  * the relation by in RelationGetBufferForTuple(), if needed. If we
2174  * filled a prior page from scratch, we can just update our last
2175  * computation, but if we started with a partially filled page,
2176  * recompute from scratch, the number of potentially required pages
2177  * can vary due to tuples needing to fit onto the page, page headers
2178  * etc.
2179  */
2180  if (ndone == 0 || !starting_with_empty_page)
2181  {
2182  npages = heap_multi_insert_pages(heaptuples, ndone, ntuples,
2183  saveFreeSpace);
2184  npages_used = 0;
2185  }
2186  else
2187  npages_used++;
2188 
2189  /*
2190  * Find buffer where at least the next tuple will fit. If the page is
2191  * all-visible, this will also pin the requisite visibility map page.
2192  *
2193  * Also pin visibility map page if COPY FREEZE inserts tuples into an
2194  * empty page. See all_frozen_set below.
2195  */
2196  buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
2197  InvalidBuffer, options, bistate,
2198  &vmbuffer, NULL,
2199  npages - npages_used);
2200  page = BufferGetPage(buffer);
2201 
2202  starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
2203 
2204  if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
2205  all_frozen_set = true;
2206 
2207  /* NO EREPORT(ERROR) from here till changes are logged */
2209 
2210  /*
2211  * RelationGetBufferForTuple has ensured that the first tuple fits.
2212  * Put that on the page, and then as many other tuples as fit.
2213  */
2214  RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false);
2215 
2216  /*
2217  * For logical decoding we need combo CIDs to properly decode the
2218  * catalog.
2219  */
2220  if (needwal && need_cids)
2221  log_heap_new_cid(relation, heaptuples[ndone]);
2222 
2223  for (nthispage = 1; ndone + nthispage < ntuples; nthispage++)
2224  {
2225  HeapTuple heaptup = heaptuples[ndone + nthispage];
2226 
2227  if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace)
2228  break;
2229 
2230  RelationPutHeapTuple(relation, buffer, heaptup, false);
2231 
2232  /*
2233  * For logical decoding we need combo CIDs to properly decode the
2234  * catalog.
2235  */
2236  if (needwal && need_cids)
2237  log_heap_new_cid(relation, heaptup);
2238  }
2239 
2240  /*
2241  * If the page is all visible, need to clear that, unless we're only
2242  * going to add further frozen rows to it.
2243  *
2244  * If we're only adding already frozen rows to a previously empty
2245  * page, mark it as all-visible.
2246  */
2247  if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
2248  {
2249  all_visible_cleared = true;
2250  PageClearAllVisible(page);
2251  visibilitymap_clear(relation,
2252  BufferGetBlockNumber(buffer),
2253  vmbuffer, VISIBILITYMAP_VALID_BITS);
2254  }
2255  else if (all_frozen_set)
2256  PageSetAllVisible(page);
2257 
2258  /*
2259  * XXX Should we set PageSetPrunable on this page ? See heap_insert()
2260  */
2261 
2262  MarkBufferDirty(buffer);
2263 
2264  /* XLOG stuff */
2265  if (needwal)
2266  {
2267  XLogRecPtr recptr;
2268  xl_heap_multi_insert *xlrec;
2270  char *tupledata;
2271  int totaldatalen;
2272  char *scratchptr = scratch.data;
2273  bool init;
2274  int bufflags = 0;
2275 
2276  /*
2277  * If the page was previously empty, we can reinit the page
2278  * instead of restoring the whole thing.
2279  */
2280  init = starting_with_empty_page;
2281 
2282  /* allocate xl_heap_multi_insert struct from the scratch area */
2283  xlrec = (xl_heap_multi_insert *) scratchptr;
2284  scratchptr += SizeOfHeapMultiInsert;
2285 
2286  /*
2287  * Allocate offsets array. Unless we're reinitializing the page,
2288  * in that case the tuples are stored in order starting at
2289  * FirstOffsetNumber and we don't need to store the offsets
2290  * explicitly.
2291  */
2292  if (!init)
2293  scratchptr += nthispage * sizeof(OffsetNumber);
2294 
2295  /* the rest of the scratch space is used for tuple data */
2296  tupledata = scratchptr;
2297 
2298  /* check that the mutually exclusive flags are not both set */
2299  Assert(!(all_visible_cleared && all_frozen_set));
2300 
2301  xlrec->flags = 0;
2302  if (all_visible_cleared)
2304  if (all_frozen_set)
2306 
2307  xlrec->ntuples = nthispage;
2308 
2309  /*
2310  * Write out an xl_multi_insert_tuple and the tuple data itself
2311  * for each tuple.
2312  */
2313  for (i = 0; i < nthispage; i++)
2314  {
2315  HeapTuple heaptup = heaptuples[ndone + i];
2316  xl_multi_insert_tuple *tuphdr;
2317  int datalen;
2318 
2319  if (!init)
2320  xlrec->offsets[i] = ItemPointerGetOffsetNumber(&heaptup->t_self);
2321  /* xl_multi_insert_tuple needs two-byte alignment. */
2322  tuphdr = (xl_multi_insert_tuple *) SHORTALIGN(scratchptr);
2323  scratchptr = ((char *) tuphdr) + SizeOfMultiInsertTuple;
2324 
2325  tuphdr->t_infomask2 = heaptup->t_data->t_infomask2;
2326  tuphdr->t_infomask = heaptup->t_data->t_infomask;
2327  tuphdr->t_hoff = heaptup->t_data->t_hoff;
2328 
2329  /* write bitmap [+ padding] [+ oid] + data */
2330  datalen = heaptup->t_len - SizeofHeapTupleHeader;
2331  memcpy(scratchptr,
2332  (char *) heaptup->t_data + SizeofHeapTupleHeader,
2333  datalen);
2334  tuphdr->datalen = datalen;
2335  scratchptr += datalen;
2336  }
2337  totaldatalen = scratchptr - tupledata;
2338  Assert((scratchptr - scratch.data) < BLCKSZ);
2339 
2340  if (need_tuple_data)
2342 
2343  /*
2344  * Signal that this is the last xl_heap_multi_insert record
2345  * emitted by this call to heap_multi_insert(). Needed for logical
2346  * decoding so it knows when to cleanup temporary data.
2347  */
2348  if (ndone + nthispage == ntuples)
2349  xlrec->flags |= XLH_INSERT_LAST_IN_MULTI;
2350 
2351  if (init)
2352  {
2353  info |= XLOG_HEAP_INIT_PAGE;
2354  bufflags |= REGBUF_WILL_INIT;
2355  }
2356 
2357  /*
2358  * If we're doing logical decoding, include the new tuple data
2359  * even if we take a full-page image of the page.
2360  */
2361  if (need_tuple_data)
2362  bufflags |= REGBUF_KEEP_DATA;
2363 
2364  XLogBeginInsert();
2365  XLogRegisterData((char *) xlrec, tupledata - scratch.data);
2366  XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
2367 
2368  XLogRegisterBufData(0, tupledata, totaldatalen);
2369 
2370  /* filtering by origin on a row level is much more efficient */
2372 
2373  recptr = XLogInsert(RM_HEAP2_ID, info);
2374 
2375  PageSetLSN(page, recptr);
2376  }
2377 
2378  END_CRIT_SECTION();
2379 
2380  /*
2381  * If we've frozen everything on the page, update the visibilitymap.
2382  * We're already holding pin on the vmbuffer.
2383  */
2384  if (all_frozen_set)
2385  {
2386  Assert(PageIsAllVisible(page));
2387  Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
2388 
2389  /*
2390  * It's fine to use InvalidTransactionId here - this is only used
2391  * when HEAP_INSERT_FROZEN is specified, which intentionally
2392  * violates visibility rules.
2393  */
2394  visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
2395  InvalidXLogRecPtr, vmbuffer,
2398  }
2399 
2400  UnlockReleaseBuffer(buffer);
2401  ndone += nthispage;
2402 
2403  /*
2404  * NB: Only release vmbuffer after inserting all tuples - it's fairly
2405  * likely that we'll insert into subsequent heap pages that are likely
2406  * to use the same vm page.
2407  */
2408  }
2409 
2410  /* We're done with inserting all tuples, so release the last vmbuffer. */
2411  if (vmbuffer != InvalidBuffer)
2412  ReleaseBuffer(vmbuffer);
2413 
2414  /*
2415  * We're done with the actual inserts. Check for conflicts again, to
2416  * ensure that all rw-conflicts in to these inserts are detected. Without
2417  * this final check, a sequential scan of the heap may have locked the
2418  * table after the "before" check, missing one opportunity to detect the
2419  * conflict, and then scanned the table before the new tuples were there,
2420  * missing the other chance to detect the conflict.
2421  *
2422  * For heap inserts, we only need to check for table-level SSI locks. Our
2423  * new tuples can't possibly conflict with existing tuple locks, and heap
2424  * page locks are only consolidated versions of tuple locks; they do not
2425  * lock "gaps" as index page locks do. So we don't need to specify a
2426  * buffer when making the call.
2427  */
2429 
2430  /*
2431  * If tuples are cachable, mark them for invalidation from the caches in
2432  * case we abort. Note it is OK to do this after releasing the buffer,
2433  * because the heaptuples data structure is all in local memory, not in
2434  * the shared buffer.
2435  */
2436  if (IsCatalogRelation(relation))
2437  {
2438  for (i = 0; i < ntuples; i++)
2439  CacheInvalidateHeapTuple(relation, heaptuples[i], NULL);
2440  }
2441 
2442  /* copy t_self fields back to the caller's slots */
2443  for (i = 0; i < ntuples; i++)
2444  slots[i]->tts_tid = heaptuples[i]->t_self;
2445 
2446  pgstat_count_heap_insert(relation, ntuples);
2447 }
Size PageGetHeapFreeSpace(Page page)
Definition: bufpage.c:991
static void PageSetAllVisible(Page page)
Definition: bufpage.h:431
#define MAXALIGN(LEN)
Definition: c.h:800
#define SHORTALIGN(LEN)
Definition: c.h:796
size_t Size
Definition: c.h:594
HeapTuple ExecFetchSlotHeapTuple(TupleTableSlot *slot, bool materialize, bool *shouldFree)
Definition: execTuples.c:1643
static int heap_multi_insert_pages(HeapTuple *heaptuples, int done, int ntuples, Size saveFreeSpace)
Definition: heapam.c:2065
#define HEAP_INSERT_FROZEN
Definition: heapam.h:35
#define SizeOfHeapMultiInsert
Definition: heapam_xlog.h:182
#define XLOG_HEAP2_MULTI_INSERT
Definition: heapam_xlog.h:58
#define XLH_INSERT_LAST_IN_MULTI
Definition: heapam_xlog.h:67
#define XLH_INSERT_ALL_FROZEN_SET
Definition: heapam_xlog.h:73
#define SizeOfMultiInsertTuple
Definition: heapam_xlog.h:193
int init
Definition: isn.c:75
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define RelationGetTargetPageFreeSpace(relation, defaultff)
Definition: rel.h:377
#define HEAP_DEFAULT_FILLFACTOR
Definition: rel.h:348
Oid tts_tableOid
Definition: tuptable.h:130
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:179
char data[BLCKSZ]
Definition: c.h:1108
void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, uint8 flags)
bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf)
#define VISIBILITYMAP_ALL_VISIBLE
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), BufferGetBlockNumber(), BufferGetPage(), CacheInvalidateHeapTuple(), CHECK_FOR_INTERRUPTS, CheckForSerializableConflictIn(), PGAlignedBlock::data, xl_multi_insert_tuple::datalen, END_CRIT_SECTION, ExecFetchSlotHeapTuple(), xl_heap_multi_insert::flags, GetCurrentTransactionId(), HEAP_DEFAULT_FILLFACTOR, HEAP_INSERT_FROZEN, HEAP_INSERT_NO_LOGICAL, heap_multi_insert_pages(), heap_prepare_insert(), i, init, InvalidBlockNumber, InvalidBuffer, InvalidTransactionId, InvalidXLogRecPtr, IsCatalogRelation(), ItemPointerGetOffsetNumber(), log_heap_new_cid(), MarkBufferDirty(), MAXALIGN, xl_heap_multi_insert::ntuples, xl_heap_multi_insert::offsets, PageClearAllVisible(), PageGetHeapFreeSpace(), PageGetMaxOffsetNumber(), PageIsAllVisible(), PageSetAllVisible(), PageSetLSN(), palloc(), pgstat_count_heap_insert(), REGBUF_KEEP_DATA, REGBUF_STANDARD, REGBUF_WILL_INIT, RelationGetBufferForTuple(), RelationGetRelid, RelationGetTargetPageFreeSpace, RelationIsAccessibleInLogicalDecoding, RelationIsLogicallyLogged, RelationNeedsWAL, RelationPutHeapTuple(), ReleaseBuffer(), SHORTALIGN, SizeOfHeapMultiInsert, SizeofHeapTupleHeader, SizeOfMultiInsertTuple, START_CRIT_SECTION, HeapTupleData::t_data, xl_multi_insert_tuple::t_hoff, HeapTupleHeaderData::t_hoff, xl_multi_insert_tuple::t_infomask, HeapTupleHeaderData::t_infomask, xl_multi_insert_tuple::t_infomask2, HeapTupleHeaderData::t_infomask2, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, TupleTableSlot::tts_tableOid, UnlockReleaseBuffer(), VISIBILITYMAP_ALL_FROZEN, VISIBILITYMAP_ALL_VISIBLE, visibilitymap_clear(), visibilitymap_pin_ok(), visibilitymap_set(), VISIBILITYMAP_VALID_BITS, XLH_INSERT_ALL_FROZEN_SET, XLH_INSERT_ALL_VISIBLE_CLEARED, XLH_INSERT_CONTAINS_NEW_TUPLE, XLH_INSERT_LAST_IN_MULTI, XLOG_HEAP2_MULTI_INSERT, XLOG_HEAP_INIT_PAGE, XLOG_INCLUDE_ORIGIN, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), XLogRegisterData(), and XLogSetRecordFlags().

Referenced by CatalogTuplesMultiInsertWithInfo().

◆ heap_page_prune()

void heap_page_prune ( Relation  relation,
Buffer  buffer,
struct GlobalVisState vistest,
bool  mark_unused_now,
PruneResult presult,
OffsetNumber off_loc 
)

Definition at line 216 of file pruneheap.c.

221 {
222  Page page = BufferGetPage(buffer);
223  BlockNumber blockno = BufferGetBlockNumber(buffer);
224  OffsetNumber offnum,
225  maxoff;
226  PruneState prstate;
227  HeapTupleData tup;
228 
229  /*
230  * Our strategy is to scan the page and make lists of items to change,
231  * then apply the changes within a critical section. This keeps as much
232  * logic as possible out of the critical section, and also ensures that
233  * WAL replay will work the same as the normal case.
234  *
235  * First, initialize the new pd_prune_xid value to zero (indicating no
236  * prunable tuples). If we find any tuples which may soon become
237  * prunable, we will save the lowest relevant XID in new_prune_xid. Also
238  * initialize the rest of our working state.
239  */
241  prstate.rel = relation;
242  prstate.vistest = vistest;
243  prstate.mark_unused_now = mark_unused_now;
245  prstate.nredirected = prstate.ndead = prstate.nunused = 0;
246  memset(prstate.marked, 0, sizeof(prstate.marked));
247 
248  /*
249  * presult->htsv is not initialized here because all ntuple spots in the
250  * array will be set either to a valid HTSV_Result value or -1.
251  */
252  presult->ndeleted = 0;
253  presult->nnewlpdead = 0;
254 
255  maxoff = PageGetMaxOffsetNumber(page);
256  tup.t_tableOid = RelationGetRelid(prstate.rel);
257 
258  /*
259  * Determine HTSV for all tuples.
260  *
261  * This is required for correctness to deal with cases where running HTSV
262  * twice could result in different results (e.g. RECENTLY_DEAD can turn to
263  * DEAD if another checked item causes GlobalVisTestIsRemovableFullXid()
264  * to update the horizon, INSERT_IN_PROGRESS can change to DEAD if the
265  * inserting transaction aborts, ...). That in turn could cause
266  * heap_prune_chain() to behave incorrectly if a tuple is reached twice,
267  * once directly via a heap_prune_chain() and once following a HOT chain.
268  *
269  * It's also good for performance. Most commonly tuples within a page are
270  * stored at decreasing offsets (while the items are stored at increasing
271  * offsets). When processing all tuples on a page this leads to reading
272  * memory at decreasing offsets within a page, with a variable stride.
273  * That's hard for CPU prefetchers to deal with. Processing the items in
274  * reverse order (and thus the tuples in increasing order) increases
275  * prefetching efficiency significantly / decreases the number of cache
276  * misses.
277  */
278  for (offnum = maxoff;
279  offnum >= FirstOffsetNumber;
280  offnum = OffsetNumberPrev(offnum))
281  {
282  ItemId itemid = PageGetItemId(page, offnum);
283  HeapTupleHeader htup;
284 
285  /* Nothing to do if slot doesn't contain a tuple */
286  if (!ItemIdIsNormal(itemid))
287  {
288  presult->htsv[offnum] = -1;
289  continue;
290  }
291 
292  htup = (HeapTupleHeader) PageGetItem(page, itemid);
293  tup.t_data = htup;
294  tup.t_len = ItemIdGetLength(itemid);
295  ItemPointerSet(&(tup.t_self), blockno, offnum);
296 
297  /*
298  * Set the offset number so that we can display it along with any
299  * error that occurred while processing this tuple.
300  */
301  if (off_loc)
302  *off_loc = offnum;
303 
304  presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
305  buffer);
306  }
307 
308  /* Scan the page */
309  for (offnum = FirstOffsetNumber;
310  offnum <= maxoff;
311  offnum = OffsetNumberNext(offnum))
312  {
313  ItemId itemid;
314 
315  /* Ignore items already processed as part of an earlier chain */
316  if (prstate.marked[offnum])
317  continue;
318 
319  /* see preceding loop */
320  if (off_loc)
321  *off_loc = offnum;
322 
323  /* Nothing to do if slot is empty */
324  itemid = PageGetItemId(page, offnum);
325  if (!ItemIdIsUsed(itemid))
326  continue;
327 
328  /* Process this item or chain of items */
329  presult->ndeleted += heap_prune_chain(buffer, offnum,
330  presult->htsv, &prstate);
331  }
332 
333  /* Clear the offset information once we have processed the given page. */
334  if (off_loc)
335  *off_loc = InvalidOffsetNumber;
336 
337  /* Any error while applying the changes is critical */
339 
340  /* Have we found any prunable items? */
341  if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
342  {
343  /*
344  * Apply the planned item changes, then repair page fragmentation, and
345  * update the page's hint bit about whether it has free line pointers.
346  */
348  prstate.redirected, prstate.nredirected,
349  prstate.nowdead, prstate.ndead,
350  prstate.nowunused, prstate.nunused);
351 
352  /*
353  * Update the page's pd_prune_xid field to either zero, or the lowest
354  * XID of any soon-prunable tuple.
355  */
356  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
357 
358  /*
359  * Also clear the "page is full" flag, since there's no point in
360  * repeating the prune/defrag process until something else happens to
361  * the page.
362  */
363  PageClearFull(page);
364 
365  MarkBufferDirty(buffer);
366 
367  /*
368  * Emit a WAL XLOG_HEAP2_PRUNE record showing what we did
369  */
370  if (RelationNeedsWAL(relation))
371  {
372  xl_heap_prune xlrec;
373  XLogRecPtr recptr;
374 
377  xlrec.nredirected = prstate.nredirected;
378  xlrec.ndead = prstate.ndead;
379 
380  XLogBeginInsert();
381  XLogRegisterData((char *) &xlrec, SizeOfHeapPrune);
382 
384 
385  /*
386  * The OffsetNumber arrays are not actually in the buffer, but we
387  * pretend that they are. When XLogInsert stores the whole
388  * buffer, the offset arrays need not be stored too.
389  */
390  if (prstate.nredirected > 0)
391  XLogRegisterBufData(0, (char *) prstate.redirected,
392  prstate.nredirected *
393  sizeof(OffsetNumber) * 2);
394 
395  if (prstate.ndead > 0)
396  XLogRegisterBufData(0, (char *) prstate.nowdead,
397  prstate.ndead * sizeof(OffsetNumber));
398 
399  if (prstate.nunused > 0)
400  XLogRegisterBufData(0, (char *) prstate.nowunused,
401  prstate.nunused * sizeof(OffsetNumber));
402 
403  recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_PRUNE);
404 
405  PageSetLSN(BufferGetPage(buffer), recptr);
406  }
407  }
408  else
409  {
410  /*
411  * If we didn't prune anything, but have found a new value for the
412  * pd_prune_xid field, update it and mark the buffer dirty. This is
413  * treated as a non-WAL-logged hint.
414  *
415  * Also clear the "page is full" flag if it is set, since there's no
416  * point in repeating the prune/defrag process until something else
417  * happens to the page.
418  */
419  if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
420  PageIsFull(page))
421  {
422  ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
423  PageClearFull(page);
424  MarkBufferDirtyHint(buffer, true);
425  }
426  }
427 
429 
430  /* Record number of newly-set-LP_DEAD items for caller */
431  presult->nnewlpdead = prstate.ndead;
432 }
void MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Definition: bufmgr.c:4625
PageHeaderData * PageHeader
Definition: bufpage.h:170
static void PageClearFull(Page page)
Definition: bufpage.h:420
static bool PageIsFull(Page page)
Definition: bufpage.h:410
#define XLOG_HEAP2_PRUNE
Definition: heapam_xlog.h:54
#define SizeOfHeapPrune
Definition: heapam_xlog.h:253
#define OffsetNumberPrev(offsetNumber)
Definition: off.h:54
static int heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
Definition: pruneheap.c:488
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
Definition: pruneheap.c:439
void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused)
Definition: pruneheap.c:840
int nnewlpdead
Definition: heapam.h:200
int ndeleted
Definition: heapam.h:199
int8 htsv[MaxHeapTuplesPerPage+1]
Definition: heapam.h:211
int ndead
Definition: pruneheap.c:44
TransactionId new_prune_xid
Definition: pruneheap.c:41
OffsetNumber nowdead[MaxHeapTuplesPerPage]
Definition: pruneheap.c:48
bool marked[MaxHeapTuplesPerPage+1]
Definition: pruneheap.c:57
OffsetNumber nowunused[MaxHeapTuplesPerPage]
Definition: pruneheap.c:49
bool mark_unused_now
Definition: pruneheap.c:39
GlobalVisState * vistest
Definition: pruneheap.c:37
Relation rel
Definition: pruneheap.c:34
OffsetNumber redirected[MaxHeapTuplesPerPage *2]
Definition: pruneheap.c:47
int nredirected
Definition: pruneheap.c:43
int nunused
Definition: pruneheap.c:45
TransactionId snapshotConflictHorizon
Definition: pruneheap.c:42
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:245
uint16 nredirected
Definition: heapam_xlog.h:246

References BufferGetBlockNumber(), BufferGetPage(), END_CRIT_SECTION, FirstOffsetNumber, heap_page_prune_execute(), heap_prune_chain(), heap_prune_satisfies_vacuum(), PruneResult::htsv, InvalidOffsetNumber, InvalidTransactionId, xl_heap_prune::isCatalogRel, ItemIdGetLength, ItemIdIsNormal, ItemIdIsUsed, ItemPointerSet(), PruneState::mark_unused_now, MarkBufferDirty(), MarkBufferDirtyHint(), PruneState::marked, PruneState::ndead, xl_heap_prune::ndead, PruneResult::ndeleted, PruneState::new_prune_xid, PruneResult::nnewlpdead, PruneState::nowdead, PruneState::nowunused, PruneState::nredirected, xl_heap_prune::nredirected, PruneState::nunused, OffsetNumberNext, OffsetNumberPrev, PageClearFull(), PageGetItem(), PageGetItemId(), PageGetMaxOffsetNumber(), PageIsFull(), PageSetLSN(), PruneState::redirected, REGBUF_STANDARD, PruneState::rel, RelationGetRelid, RelationIsAccessibleInLogicalDecoding, RelationNeedsWAL, SizeOfHeapPrune, PruneState::snapshotConflictHorizon, xl_heap_prune::snapshotConflictHorizon, START_CRIT_SECTION, HeapTupleData::t_data, HeapTupleData::t_len, HeapTupleData::t_self, HeapTupleData::t_tableOid, PruneState::vistest, XLOG_HEAP2_PRUNE, XLogBeginInsert(), XLogInsert(), XLogRegisterBufData(), XLogRegisterBuffer(), and XLogRegisterData().

Referenced by heap_page_prune_opt(), and lazy_scan_prune().

◆ heap_page_prune_execute()

void heap_page_prune_execute ( Buffer  buffer,
OffsetNumber redirected,
int  nredirected,
OffsetNumber nowdead,
int  ndead,
OffsetNumber nowunused,
int  nunused 
)

Definition at line 840 of file pruneheap.c.

844 {
845  Page page = (Page) BufferGetPage(buffer);
846  OffsetNumber *offnum;
848 
849  /* Shouldn't be called unless there's something to do */
850  Assert(nredirected > 0 || ndead > 0 || nunused > 0);
851 
852  /* Update all redirected line pointers */
853  offnum = redirected;
854  for (int i = 0; i < nredirected; i++)
855  {
856  OffsetNumber fromoff = *offnum++;
857  OffsetNumber tooff = *offnum++;
858  ItemId fromlp = PageGetItemId(page, fromoff);
860 
861 #ifdef USE_ASSERT_CHECKING
862 
863  /*
864  * Any existing item that we set as an LP_REDIRECT (any 'from' item)
865  * must be the first item from a HOT chain. If the item has tuple
866  * storage then it can't be a heap-only tuple. Otherwise we are just
867  * maintaining an existing LP_REDIRECT from an existing HOT chain that
868  * has been pruned at least once before now.
869  */
870  if (!ItemIdIsRedirected(fromlp))
871  {
872  Assert(ItemIdHasStorage(fromlp) && ItemIdIsNormal(fromlp));
873 
874  htup = (HeapTupleHeader) PageGetItem(page, fromlp);
876  }
877  else
878  {
879  /* We shouldn't need to redundantly set the redirect */
880  Assert(ItemIdGetRedirect(fromlp) != tooff);
881  }
882 
883  /*
884  * The item that we're about to set as an LP_REDIRECT (the 'from'
885  * item) will point to an existing item (the 'to' item) that is
886  * already a heap-only tuple. There can be at most one LP_REDIRECT
887  * item per HOT chain.
888  *
889  * We need to keep around an LP_REDIRECT item (after original
890  * non-heap-only root tuple gets pruned away) so that it's always
891  * possible for VACUUM to easily figure out what TID to delete from
892  * indexes when an entire HOT chain becomes dead. A heap-only tuple
893  * can never become LP_DEAD; an LP_REDIRECT item or a regular heap
894  * tuple can.
895  *
896  * This check may miss problems, e.g. the target of a redirect could
897  * be marked as unused subsequently. The page_verify_redirects() check
898  * below will catch such problems.
899  */
900  tolp = PageGetItemId(page, tooff);
901  Assert(ItemIdHasStorage(tolp) && ItemIdIsNormal(tolp));
902  htup = (HeapTupleHeader) PageGetItem(page, tolp);
904 #endif
905 
906  ItemIdSetRedirect(fromlp, tooff);
907  }
908 
909  /* Update all now-dead line pointers */
910  offnum = nowdead;
911  for (int i = 0; i < ndead; i++)
912  {
913  OffsetNumber off = *offnum++;
914  ItemId lp = PageGetItemId(page, off);
915 
916 #ifdef USE_ASSERT_CHECKING
917 
918  /*
919  * An LP_DEAD line pointer must be left behind when the original item
920  * (which is dead to everybody) could still be referenced by a TID in
921  * an index. This should never be necessary with any individual
922  * heap-only tuple item, though. (It's not clear how much of a problem
923  * that would be, but there is no reason to allow it.)
924  */
925  if (ItemIdHasStorage(lp))
926  {
927  Assert(ItemIdIsNormal(lp));
928  htup = (HeapTupleHeader) PageGetItem(page, lp);
930  }
931  else
932  {
933  /* Whole HOT chain becomes dead */
935  }
936 #endif
937 
938  ItemIdSetDead(lp);
939  }
940 
941  /* Update all now-unused line pointers */
942  offnum = nowunused;
943  for (int i = 0; i < nunused; i++)
944  {
945  OffsetNumber off = *offnum++;
946  ItemId lp = PageGetItemId(page, off);
947 
948 #ifdef USE_ASSERT_CHECKING
949 
950  /*
951  * When heap_page_prune() was called, mark_unused_now may have been
952  * passed as true, which allows would-be LP_DEAD items to be made
953  * LP_UNUSED instead. This is only possible if the relation has no
954  * indexes. If there are any dead items, then mark_unused_now was not
955  * true and every item being marked LP_UNUSED must refer to a
956  * heap-only tuple.
957  */
958  if (ndead > 0)
959  {
961  htup = (HeapTupleHeader) PageGetItem(page, lp);
963  }
964  else
965  {
966  Assert(ItemIdIsUsed(lp));
967  }
968 
969 #endif
970 
971  ItemIdSetUnused(lp);
972  }
973 
974  /*
975  * Finally, repair any fragmentation, and update the page's hint bit about
976  * whether it has free pointers.
977  */
979 
980  /*
981  * Now that the page has been modified, assert that redirect items still
982  * point to valid targets.
983  */
984  page_verify_redirects(page);
985 }
void PageRepairFragmentation(Page page)
Definition: bufpage.c:699
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:171
#define ItemIdSetRedirect(itemId, link)
Definition: itemid.h:152
#define ItemIdSetDead(itemId)
Definition: itemid.h:164
#define ItemIdSetUnused(itemId)
Definition: itemid.h:128
#define ItemIdHasStorage(itemId)
Definition: itemid.h:120
static void page_verify_redirects(Page page)
Definition: pruneheap.c:1002

References Assert(), BufferGetPage(), HeapTupleHeaderIsHeapOnly, i, ItemIdGetRedirect, ItemIdHasStorage, ItemIdIsNormal, ItemIdIsRedirected, ItemIdIsUsed, ItemIdSetDead, ItemIdSetRedirect, ItemIdSetUnused, page_verify_redirects(), PageGetItem(), PageGetItemId(), PageRepairFragmentation(), and PG_USED_FOR_ASSERTS_ONLY.

Referenced by heap_page_prune(), and heap_xlog_prune().

◆ heap_page_prune_opt()

void heap_page_prune_opt ( Relation  relation,
Buffer  buffer 
)

Definition at line 90 of file pruneheap.c.

91 {
92  Page page = BufferGetPage(buffer);
93  TransactionId prune_xid;
94  GlobalVisState *vistest;
95  Size minfree;
96 
97  /*
98  * We can't write WAL in recovery mode, so there's no point trying to
99  * clean the page. The primary will likely issue a cleaning WAL record
100  * soon anyway, so this is no particular loss.
101  */
102  if (RecoveryInProgress())
103  return;
104 
105  /*
106  * First check whether there's any chance there's something to prune,
107  * determining the appropriate horizon is a waste if there's no prune_xid
108  * (i.e. no updates/deletes left potentially dead tuples around).
109  */
110  prune_xid = ((PageHeader) page)->pd_prune_xid;
111  if (!TransactionIdIsValid(prune_xid))
112  return;
113 
114  /*
115  * Check whether prune_xid indicates that there may be dead rows that can
116  * be cleaned up.
117  */
118  vistest = GlobalVisTestFor(relation);
119 
120  if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
121  return;
122 
123  /*
124  * We prune when a previous UPDATE failed to find enough space on the page
125  * for a new tuple version, or when free space falls below the relation's
126  * fill-factor target (but not less than 10%).
127  *
128  * Checking free space here is questionable since we aren't holding any
129  * lock on the buffer; in the worst case we could get a bogus answer. It's
130  * unlikely to be *seriously* wrong, though, since reading either pd_lower
131  * or pd_upper is probably atomic. Avoiding taking a lock seems more
132  * important than sometimes getting a wrong answer in what is after all
133  * just a heuristic estimate.
134  */
135  minfree = RelationGetTargetPageFreeSpace(relation,
137  minfree = Max(minfree, BLCKSZ / 10);
138 
139  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
140  {
141  /* OK, try to get exclusive buffer lock */
142  if (!ConditionalLockBufferForCleanup(buffer))
143  return;
144 
145  /*
146  * Now that we have buffer lock, get accurate information about the
147  * page's free space, and recheck the heuristic about whether to
148  * prune.
149  */
150  if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
151  {
152  PruneResult presult;
153 
154  /*
155  * For now, pass mark_unused_now as false regardless of whether or
156  * not the relation has indexes, since we cannot safely determine
157  * that during on-access pruning with the current implementation.
158  */
159  heap_page_prune(relation, buffer, vistest, false,
160  &presult, NULL);
161 
162  /*
163  * Report the number of tuples reclaimed to pgstats. This is
164  * presult.ndeleted minus the number of newly-LP_DEAD-set items.
165  *
166  * We derive the number of dead tuples like this to avoid totally
167  * forgetting about items that were set to LP_DEAD, since they
168  * still need to be cleaned up by VACUUM. We only want to count
169  * heap-only tuples that just became LP_UNUSED in our report,
170  * which don't.
171  *
172  * VACUUM doesn't have to compensate in the same way when it
173  * tracks ndeleted, since it will set the same LP_DEAD items to
174  * LP_UNUSED separately.
175  */
176  if (presult.ndeleted > presult.nnewlpdead)
178  presult.ndeleted - presult.nnewlpdead);
179  }
180 
181  /* And release buffer lock */
183 
184  /*
185  * We avoid reuse of any free space created on the page by unrelated
186  * UPDATEs/INSERTs by opting to not update the FSM at this point. The
187  * free space should be reused by UPDATEs to *this* page.
188  */
189  }
190 }
bool ConditionalLockBufferForCleanup(Buffer buffer)
Definition: bufmgr.c:5037
#define Max(x, y)
Definition: c.h:987
void pgstat_update_heap_dead_tuples(Relation rel, int delta)
bool GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid)
Definition: procarray.c:4175
void heap_page_prune(Relation relation, Buffer buffer, GlobalVisState *vistest, bool mark_unused_now, PruneResult *presult, OffsetNumber *off_loc)
Definition: pruneheap.c:216
bool RecoveryInProgress(void)
Definition: xlog.c:6211

References BUFFER_LOCK_UNLOCK, BufferGetPage(), ConditionalLockBufferForCleanup(), GlobalVisTestFor(), GlobalVisTestIsRemovableXid(), HEAP_DEFAULT_FILLFACTOR, heap_page_prune(), LockBuffer(), Max, PruneResult::ndeleted, PruneResult::nnewlpdead, PageGetHeapFreeSpace(), PageIsFull(), pgstat_update_heap_dead_tuples(), RecoveryInProgress(), RelationGetTargetPageFreeSpace, and TransactionIdIsValid.

Referenced by heapam_index_fetch_tuple(), heapam_scan_bitmap_next_block(), and heapgetpage().

◆ heap_prepare_freeze_tuple()

bool heap_prepare_freeze_tuple ( HeapTupleHeader  tuple,
const struct VacuumCutoffs cutoffs,
HeapPageFreeze pagefrz,
HeapTupleFreeze frz,
bool totally_frozen 
)

Definition at line 6379 of file heapam.c.

6383 {
6384  bool xmin_already_frozen = false,
6385  xmax_already_frozen = false;
6386  bool freeze_xmin = false,
6387  replace_xvac = false,
6388  replace_xmax = false,
6389  freeze_xmax = false;
6390  TransactionId xid;
6391 
6392  frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
6393  frz->t_infomask2 = tuple->t_infomask2;
6394  frz->t_infomask = tuple->t_infomask;
6395  frz->frzflags = 0;
6396  frz->checkflags = 0;
6397 
6398  /*
6399  * Process xmin, while keeping track of whether it's already frozen, or
6400  * will become frozen iff our freeze plan is executed by caller (could be
6401  * neither).
6402  */
6403  xid = HeapTupleHeaderGetXmin(tuple);
6404  if (!TransactionIdIsNormal(xid))
6405  xmin_already_frozen = true;
6406  else
6407  {
6408  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6409  ereport(ERROR,
6411  errmsg_internal("found xmin %u from before relfrozenxid %u",
6412  xid, cutoffs->relfrozenxid)));
6413 
6414  /* Will set freeze_xmin flags in freeze plan below */
6415  freeze_xmin = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6416 
6417  /* Verify that xmin committed if and when freeze plan is executed */
6418  if (freeze_xmin)
6420  }
6421 
6422  /*
6423  * Old-style VACUUM FULL is gone, but we have to process xvac for as long
6424  * as we support having MOVED_OFF/MOVED_IN tuples in the database
6425  */
6426  xid = HeapTupleHeaderGetXvac(tuple);
6427  if (TransactionIdIsNormal(xid))
6428  {
6430  Assert(TransactionIdPrecedes(xid, cutoffs->OldestXmin));
6431 
6432  /*
6433  * For Xvac, we always freeze proactively. This allows totally_frozen
6434  * tracking to ignore xvac.
6435  */
6436  replace_xvac = pagefrz->freeze_required = true;
6437 
6438  /* Will set replace_xvac flags in freeze plan below */
6439  }
6440 
6441  /* Now process xmax */
6442  xid = frz->xmax;
6443  if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
6444  {
6445  /* Raw xmax is a MultiXactId */
6446  TransactionId newxmax;
6447  uint16 flags;
6448 
6449  /*
6450  * We will either remove xmax completely (in the "freeze_xmax" path),
6451  * process xmax by replacing it (in the "replace_xmax" path), or
6452  * perform no-op xmax processing. The only constraint is that the
6453  * FreezeLimit/MultiXactCutoff postcondition must never be violated.
6454  */
6455  newxmax = FreezeMultiXactId(xid, tuple->t_infomask, cutoffs,
6456  &flags, pagefrz);
6457 
6458  if (flags & FRM_NOOP)
6459  {
6460  /*
6461  * xmax is a MultiXactId, and nothing about it changes for now.
6462  * This is the only case where 'freeze_required' won't have been
6463  * set for us by FreezeMultiXactId, as well as the only case where
6464  * neither freeze_xmax nor replace_xmax are set (given a multi).
6465  *
6466  * This is a no-op, but the call to FreezeMultiXactId might have
6467  * ratcheted back NewRelfrozenXid and/or NewRelminMxid trackers
6468  * for us (the "freeze page" variants, specifically). That'll
6469  * make it safe for our caller to freeze the page later on, while
6470  * leaving this particular xmax undisturbed.
6471  *
6472  * FreezeMultiXactId is _not_ responsible for the "no freeze"
6473  * NewRelfrozenXid/NewRelminMxid trackers, though -- that's our
6474  * job. A call to heap_tuple_should_freeze for this same tuple
6475  * will take place below if 'freeze_required' isn't set already.
6476  * (This repeats work from FreezeMultiXactId, but allows "no
6477  * freeze" tracker maintenance to happen in only one place.)
6478  */
6479  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->MultiXactCutoff));
6480  Assert(MultiXactIdIsValid(newxmax) && xid == newxmax);
6481  }
6482  else if (flags & FRM_RETURN_IS_XID)
6483  {
6484  /*
6485  * xmax will become an updater Xid (original MultiXact's updater
6486  * member Xid will be carried forward as a simple Xid in Xmax).
6487  */
6488  Assert(!TransactionIdPrecedes(newxmax, cutoffs->OldestXmin));
6489 
6490  /*
6491  * NB -- some of these transformations are only valid because we
6492  * know the return Xid is a tuple updater (i.e. not merely a
6493  * locker.) Also note that the only reason we don't explicitly
6494  * worry about HEAP_KEYS_UPDATED is because it lives in
6495  * t_infomask2 rather than t_infomask.
6496  */
6497  frz->t_infomask &= ~HEAP_XMAX_BITS;
6498  frz->xmax = newxmax;
6499  if (flags & FRM_MARK_COMMITTED)
6501  replace_xmax = true;
6502  }
6503  else if (flags & FRM_RETURN_IS_MULTI)
6504  {
6505  uint16 newbits;
6506  uint16 newbits2;
6507 
6508  /*
6509  * xmax is an old MultiXactId that we have to replace with a new
6510  * MultiXactId, to carry forward two or more original member XIDs.
6511  */
6512  Assert(!MultiXactIdPrecedes(newxmax, cutoffs->OldestMxact));
6513 
6514  /*
6515  * We can't use GetMultiXactIdHintBits directly on the new multi
6516  * here; that routine initializes the masks to all zeroes, which
6517  * would lose other bits we need. Doing it this way ensures all
6518  * unrelated bits remain untouched.
6519  */
6520  frz->t_infomask &= ~HEAP_XMAX_BITS;
6521  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6522  GetMultiXactIdHintBits(newxmax, &newbits, &newbits2);
6523  frz->t_infomask |= newbits;
6524  frz->t_infomask2 |= newbits2;
6525  frz->xmax = newxmax;
6526  replace_xmax = true;
6527  }
6528  else
6529  {
6530  /*
6531  * Freeze plan for tuple "freezes xmax" in the strictest sense:
6532  * it'll leave nothing in xmax (neither an Xid nor a MultiXactId).
6533  */
6534  Assert(flags & FRM_INVALIDATE_XMAX);
6535  Assert(!TransactionIdIsValid(newxmax));
6536 
6537  /* Will set freeze_xmax flags in freeze plan below */
6538  freeze_xmax = true;
6539  }
6540 
6541  /* MultiXactId processing forces freezing (barring FRM_NOOP case) */
6542  Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax));
6543  }
6544  else if (TransactionIdIsNormal(xid))
6545  {
6546  /* Raw xmax is normal XID */
6547  if (TransactionIdPrecedes(xid, cutoffs->relfrozenxid))
6548  ereport(ERROR,
6550  errmsg_internal("found xmax %u from before relfrozenxid %u",
6551  xid, cutoffs->relfrozenxid)));
6552 
6553  /* Will set freeze_xmax flags in freeze plan below */
6554  freeze_xmax = TransactionIdPrecedes(xid, cutoffs->OldestXmin);
6555 
6556  /*
6557  * Verify that xmax aborted if and when freeze plan is executed,
6558  * provided it's from an update. (A lock-only xmax can be removed
6559  * independent of this, since the lock is released at xact end.)
6560  */
6561  if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
6563  }
6564  else if (!TransactionIdIsValid(xid))
6565  {
6566  /* Raw xmax is InvalidTransactionId XID */
6567  Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0);
6568  xmax_already_frozen = true;
6569  }
6570  else
6571  ereport(ERROR,
6573  errmsg_internal("found raw xmax %u (infomask 0x%04x) not invalid and not multi",
6574  xid, tuple->t_infomask)));
6575 
6576  if (freeze_xmin)
6577  {
6578  Assert(!xmin_already_frozen);
6579 
6580  frz->t_infomask |= HEAP_XMIN_FROZEN;
6581  }
6582  if (replace_xvac)
6583  {
6584  /*
6585  * If a MOVED_OFF tuple is not dead, the xvac transaction must have
6586  * failed; whereas a non-dead MOVED_IN tuple must mean the xvac
6587  * transaction succeeded.
6588  */
6589  Assert(pagefrz->freeze_required);
6590  if (tuple->t_infomask & HEAP_MOVED_OFF)
6591  frz->frzflags |= XLH_INVALID_XVAC;
6592  else
6593  frz->frzflags |= XLH_FREEZE_XVAC;
6594  }
6595  if (replace_xmax)
6596  {
6597  Assert(!xmax_already_frozen && !freeze_xmax);
6598  Assert(pagefrz->freeze_required);
6599 
6600  /* Already set replace_xmax flags in freeze plan earlier */
6601  }
6602  if (freeze_xmax)
6603  {
6604  Assert(!xmax_already_frozen && !replace_xmax);
6605 
6606  frz->xmax = InvalidTransactionId;
6607 
6608  /*
6609  * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
6610  * LOCKED. Normalize to INVALID just to be sure no one gets confused.
6611  * Also get rid of the HEAP_KEYS_UPDATED bit.
6612  */
6613  frz->t_infomask &= ~HEAP_XMAX_BITS;
6614  frz->t_infomask |= HEAP_XMAX_INVALID;
6615  frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
6616  frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
6617  }
6618 
6619  /*
6620  * Determine if this tuple is already totally frozen, or will become
6621  * totally frozen (provided caller executes freeze plans for the page)
6622  */
6623  *totally_frozen = ((freeze_xmin || xmin_already_frozen) &&
6624  (freeze_xmax || xmax_already_frozen));
6625 
6626  if (!pagefrz->freeze_required && !(xmin_already_frozen &&
6627  xmax_already_frozen))
6628  {
6629  /*
6630  * So far no previous tuple from the page made freezing mandatory.
6631  * Does this tuple force caller to freeze the entire page?
6632  */
6633  pagefrz->freeze_required =
6634  heap_tuple_should_freeze(tuple, cutoffs,
6635  &pagefrz->NoFreezePageRelfrozenXid,
6636  &pagefrz->NoFreezePageRelminMxid);
6637  }
6638 
6639  /* Tell caller if this tuple has a usable freeze plan set in *frz */
6640  return freeze_xmin || replace_xvac || replace_xmax || freeze_xmax;
6641 }
static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask, uint16 *new_infomask2)
Definition: heapam.c:6977
#define FRM_RETURN_IS_XID
Definition: heapam.c:5978
static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, const struct VacuumCutoffs *cutoffs, uint16 *flags, HeapPageFreeze *pagefrz)
Definition: heapam.c:6029
bool heap_tuple_should_freeze(HeapTupleHeader tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid)
Definition: heapam.c:7394
#define FRM_MARK_COMMITTED
Definition: heapam.c:5980
#define FRM_NOOP
Definition: heapam.c:5976
#define FRM_RETURN_IS_MULTI
Definition: heapam.c:5979
#define FRM_INVALIDATE_XMAX
Definition: heapam.c:5977
#define XLH_INVALID_XVAC
Definition: heapam_xlog.h:324
#define XLH_FREEZE_XVAC
Definition: heapam_xlog.h:323
#define HEAP_MOVED_OFF
Definition: htup_details.h:211
#define HEAP_XMIN_FROZEN
Definition: htup_details.h:206
#define HEAP_HOT_UPDATED
Definition: htup_details.h:276
#define HeapTupleHeaderGetXvac(tup)
Definition: htup_details.h:411
#define HEAP_XMAX_COMMITTED
Definition: htup_details.h:207
bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
Definition: multixact.c:3163
#define MultiXactIdIsValid(multi)
Definition: multixact.h:28
MultiXactId NoFreezePageRelminMxid
Definition: heapam.h:190
bool freeze_required
Definition: heapam.h:152
TransactionId NoFreezePageRelfrozenXid
Definition: heapam.h:189
uint8 frzflags
Definition: heapam.h:117
uint16 t_infomask2
Definition: heapam.h:115
TransactionId xmax
Definition: heapam.h:114
uint16 t_infomask
Definition: heapam.h:116
TransactionId OldestXmin
Definition: vacuum.h:265
MultiXactId OldestMxact
Definition: vacuum.h:266
bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2)
Definition: transam.c:299

References Assert(), HeapTupleFreeze::checkflags, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), ERROR, HeapPageFreeze::freeze_required, FreezeMultiXactId(), FRM_INVALIDATE_XMAX, FRM_MARK_COMMITTED, FRM_NOOP, FRM_RETURN_IS_MULTI, FRM_RETURN_IS_XID, HeapTupleFreeze::frzflags, GetMultiXactIdHintBits(), HEAP_FREEZE_CHECK_XMAX_ABORTED, HEAP_FREEZE_CHECK_XMIN_COMMITTED, HEAP_HOT_UPDATED, HEAP_KEYS_UPDATED, HEAP_MOVED_OFF, heap_tuple_should_freeze(), HEAP_XMAX_BITS, HEAP_XMAX_COMMITTED, HEAP_XMAX_INVALID, HEAP_XMAX_IS_LOCKED_ONLY, HEAP_XMAX_IS_MULTI, HEAP_XMIN_FROZEN, HeapTupleHeaderGetRawXmax, HeapTupleHeaderGetXmin, HeapTupleHeaderGetXvac, InvalidTransactionId, VacuumCutoffs::MultiXactCutoff, MultiXactIdIsValid, MultiXactIdPrecedes(), HeapPageFreeze::NoFreezePageRelfrozenXid, HeapPageFreeze::NoFreezePageRelminMxid, VacuumCutoffs::OldestMxact, VacuumCutoffs::OldestXmin, VacuumCutoffs::relfrozenxid, HeapTupleFreeze::t_infomask, HeapTupleHeaderData::t_infomask, HeapTupleFreeze::t_infomask2, HeapTupleHeaderData::t_infomask2, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdPrecedes(), TransactionIdPrecedesOrEquals(), XLH_FREEZE_XVAC, XLH_INVALID_XVAC, and HeapTupleFreeze::xmax.

Referenced by heap_freeze_tuple(), and lazy_scan_prune().

◆ heap_rescan()

void heap_rescan ( TableScanDesc  sscan,
ScanKey  key,
bool  set_params,
bool  allow_strat,
bool  allow_sync,
bool  allow_pagemode 
)

Definition at line 1016 of file heapam.c.

1018 {
1019  HeapScanDesc scan = (HeapScanDesc) sscan;
1020 
1021  if (set_params)
1022  {
1023  if (allow_strat)
1024  scan->rs_base.rs_flags |= SO_ALLOW_STRAT;
1025  else
1026  scan->rs_base.rs_flags &= ~