PostgreSQL Source Code git master
heapam_xlog.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * heapam_xlog.h
4 * POSTGRES heap access XLOG definitions.
5 *
6 *
7 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/access/heapam_xlog.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef HEAPAM_XLOG_H
15#define HEAPAM_XLOG_H
16
17#include "access/htup.h"
18#include "access/xlogreader.h"
19#include "lib/stringinfo.h"
20#include "storage/buf.h"
21#include "storage/bufpage.h"
23#include "storage/sinval.h"
24#include "utils/relcache.h"
25
26
27/*
28 * WAL record definitions for heapam.c's WAL operations
29 *
30 * XLOG allows to store some information in high 4 bits of log
31 * record xl_info field. We use 3 for opcode and one for init bit.
32 */
33#define XLOG_HEAP_INSERT 0x00
34#define XLOG_HEAP_DELETE 0x10
35#define XLOG_HEAP_UPDATE 0x20
36#define XLOG_HEAP_TRUNCATE 0x30
37#define XLOG_HEAP_HOT_UPDATE 0x40
38#define XLOG_HEAP_CONFIRM 0x50
39#define XLOG_HEAP_LOCK 0x60
40#define XLOG_HEAP_INPLACE 0x70
41
42#define XLOG_HEAP_OPMASK 0x70
43/*
44 * When we insert 1st item on new page in INSERT, UPDATE, HOT_UPDATE,
45 * or MULTI_INSERT, we can (and we do) restore entire page in redo
46 */
47#define XLOG_HEAP_INIT_PAGE 0x80
48/*
49 * We ran out of opcodes, so heapam.c now has a second RmgrId. These opcodes
50 * are associated with RM_HEAP2_ID, but are not logically different from
51 * the ones above associated with RM_HEAP_ID. XLOG_HEAP_OPMASK applies to
52 * these, too.
53 *
54 * There's no difference between XLOG_HEAP2_PRUNE_ON_ACCESS,
55 * XLOG_HEAP2_PRUNE_VACUUM_SCAN and XLOG_HEAP2_PRUNE_VACUUM_CLEANUP records.
56 * They have separate opcodes just for debugging and analysis purposes, to
57 * indicate why the WAL record was emitted.
58 */
59#define XLOG_HEAP2_REWRITE 0x00
60#define XLOG_HEAP2_PRUNE_ON_ACCESS 0x10
61#define XLOG_HEAP2_PRUNE_VACUUM_SCAN 0x20
62#define XLOG_HEAP2_PRUNE_VACUUM_CLEANUP 0x30
63#define XLOG_HEAP2_VISIBLE 0x40
64#define XLOG_HEAP2_MULTI_INSERT 0x50
65#define XLOG_HEAP2_LOCK_UPDATED 0x60
66#define XLOG_HEAP2_NEW_CID 0x70
67
68/*
69 * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available.
70 */
71/* PD_ALL_VISIBLE was cleared */
72#define XLH_INSERT_ALL_VISIBLE_CLEARED (1<<0)
73#define XLH_INSERT_LAST_IN_MULTI (1<<1)
74#define XLH_INSERT_IS_SPECULATIVE (1<<2)
75#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
76#define XLH_INSERT_ON_TOAST_RELATION (1<<4)
77
78/* all_frozen_set always implies all_visible_set */
79#define XLH_INSERT_ALL_FROZEN_SET (1<<5)
80
81/*
82 * xl_heap_update flag values, 8 bits are available.
83 */
84/* PD_ALL_VISIBLE was cleared */
85#define XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED (1<<0)
86/* PD_ALL_VISIBLE was cleared in the 2nd page */
87#define XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED (1<<1)
88#define XLH_UPDATE_CONTAINS_OLD_TUPLE (1<<2)
89#define XLH_UPDATE_CONTAINS_OLD_KEY (1<<3)
90#define XLH_UPDATE_CONTAINS_NEW_TUPLE (1<<4)
91#define XLH_UPDATE_PREFIX_FROM_OLD (1<<5)
92#define XLH_UPDATE_SUFFIX_FROM_OLD (1<<6)
93
94/* convenience macro for checking whether any form of old tuple was logged */
95#define XLH_UPDATE_CONTAINS_OLD \
96 (XLH_UPDATE_CONTAINS_OLD_TUPLE | XLH_UPDATE_CONTAINS_OLD_KEY)
97
98/*
99 * xl_heap_delete flag values, 8 bits are available.
100 */
101/* PD_ALL_VISIBLE was cleared */
102#define XLH_DELETE_ALL_VISIBLE_CLEARED (1<<0)
103#define XLH_DELETE_CONTAINS_OLD_TUPLE (1<<1)
104#define XLH_DELETE_CONTAINS_OLD_KEY (1<<2)
105#define XLH_DELETE_IS_SUPER (1<<3)
106#define XLH_DELETE_IS_PARTITION_MOVE (1<<4)
107
108/* convenience macro for checking whether any form of old tuple was logged */
109#define XLH_DELETE_CONTAINS_OLD \
110 (XLH_DELETE_CONTAINS_OLD_TUPLE | XLH_DELETE_CONTAINS_OLD_KEY)
111
112/* This is what we need to know about delete */
113typedef struct xl_heap_delete
114{
115 TransactionId xmax; /* xmax of the deleted tuple */
116 OffsetNumber offnum; /* deleted tuple's offset */
117 uint8 infobits_set; /* infomask bits */
120
121#define SizeOfHeapDelete (offsetof(xl_heap_delete, flags) + sizeof(uint8))
122
123/*
124 * xl_heap_truncate flag values, 8 bits are available.
125 */
126#define XLH_TRUNCATE_CASCADE (1<<0)
127#define XLH_TRUNCATE_RESTART_SEQS (1<<1)
128
129/*
130 * For truncate we list all truncated relids in an array, followed by all
131 * sequence relids that need to be restarted, if any.
132 * All rels are always within the same database, so we just list dbid once.
133 */
134typedef struct xl_heap_truncate
135{
141
142#define SizeOfHeapTruncate (offsetof(xl_heap_truncate, relids))
143
144/*
145 * We don't store the whole fixed part (HeapTupleHeaderData) of an inserted
146 * or updated tuple in WAL; we can save a few bytes by reconstructing the
147 * fields that are available elsewhere in the WAL record, or perhaps just
148 * plain needn't be reconstructed. These are the fields we must store.
149 */
150typedef struct xl_heap_header
151{
156
157#define SizeOfHeapHeader (offsetof(xl_heap_header, t_hoff) + sizeof(uint8))
158
159/* This is what we need to know about insert */
160typedef struct xl_heap_insert
161{
162 OffsetNumber offnum; /* inserted tuple's offset */
164
165 /* xl_heap_header & TUPLE DATA in backup block 0 */
167
168#define SizeOfHeapInsert (offsetof(xl_heap_insert, flags) + sizeof(uint8))
169
170/*
171 * This is what we need to know about a multi-insert.
172 *
173 * The main data of the record consists of this xl_heap_multi_insert header.
174 * 'offsets' array is omitted if the whole page is reinitialized
175 * (XLOG_HEAP_INIT_PAGE).
176 *
177 * In block 0's data portion, there is an xl_multi_insert_tuple struct,
178 * followed by the tuple data for each tuple. There is padding to align
179 * each xl_multi_insert_tuple struct.
180 */
182{
187
188#define SizeOfHeapMultiInsert offsetof(xl_heap_multi_insert, offsets)
189
191{
192 uint16 datalen; /* size of tuple data that follows */
196 /* TUPLE DATA FOLLOWS AT END OF STRUCT */
198
199#define SizeOfMultiInsertTuple (offsetof(xl_multi_insert_tuple, t_hoff) + sizeof(uint8))
200
201/*
202 * This is what we need to know about update|hot_update
203 *
204 * Backup blk 0: new page
205 *
206 * If XLH_UPDATE_PREFIX_FROM_OLD or XLH_UPDATE_SUFFIX_FROM_OLD flags are set,
207 * the prefix and/or suffix come first, as one or two uint16s.
208 *
209 * After that, xl_heap_header and new tuple data follow. The new tuple
210 * data doesn't include the prefix and suffix, which are copied from the
211 * old tuple on replay.
212 *
213 * If XLH_UPDATE_CONTAINS_NEW_TUPLE flag is given, the tuple data is
214 * included even if a full-page image was taken.
215 *
216 * Backup blk 1: old page, if different. (no data, just a reference to the blk)
217 */
218typedef struct xl_heap_update
219{
220 TransactionId old_xmax; /* xmax of the old tuple */
221 OffsetNumber old_offnum; /* old tuple's offset */
222 uint8 old_infobits_set; /* infomask bits to set on old tuple */
224 TransactionId new_xmax; /* xmax of the new tuple */
225 OffsetNumber new_offnum; /* new tuple's offset */
226
227 /*
228 * If XLH_UPDATE_CONTAINS_OLD_TUPLE or XLH_UPDATE_CONTAINS_OLD_KEY flags
229 * are set, xl_heap_header and tuple data for the old tuple follow.
230 */
232
233#define SizeOfHeapUpdate (offsetof(xl_heap_update, new_offnum) + sizeof(OffsetNumber))
234
235/*
236 * These structures and flags encode VACUUM pruning and freezing and on-access
237 * pruning page modifications.
238 *
239 * xl_heap_prune is the main record. The XLHP_HAS_* flags indicate which
240 * "sub-records" are included and the other XLHP_* flags provide additional
241 * information about the conditions for replay.
242 *
243 * The data for block reference 0 contains "sub-records" depending on which of
244 * the XLHP_HAS_* flags are set. See xlhp_* struct definitions below. The
245 * sub-records appear in the same order as the XLHP_* flags. An example
246 * record with every sub-record included:
247 *
248 *-----------------------------------------------------------------------------
249 * Main data section:
250 *
251 * xl_heap_prune
252 * uint8 flags
253 * TransactionId snapshot_conflict_horizon
254 *
255 * Block 0 data section:
256 *
257 * xlhp_freeze_plans
258 * uint16 nplans
259 * [2 bytes of padding]
260 * xlhp_freeze_plan plans[nplans]
261 *
262 * xlhp_prune_items
263 * uint16 nredirected
264 * OffsetNumber redirected[2 * nredirected]
265 *
266 * xlhp_prune_items
267 * uint16 ndead
268 * OffsetNumber nowdead[ndead]
269 *
270 * xlhp_prune_items
271 * uint16 nunused
272 * OffsetNumber nowunused[nunused]
273 *
274 * OffsetNumber frz_offsets[sum([plan.ntuples for plan in plans])]
275 *-----------------------------------------------------------------------------
276 *
277 * NOTE: because the record data is assembled from many optional parts, we
278 * have to pay close attention to alignment. In the main data section,
279 * 'snapshot_conflict_horizon' is stored unaligned after 'flags', to save
280 * space. In the block 0 data section, the freeze plans appear first, because
281 * they contain TransactionId fields that require 4-byte alignment. All the
282 * other fields require only 2-byte alignment. This is also the reason that
283 * 'frz_offsets' is stored separately from the xlhp_freeze_plan structs.
284 */
285typedef struct xl_heap_prune
286{
289
290 /*
291 * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows,
292 * unaligned
293 */
295
296#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8))
297
298/* to handle recovery conflict during logical decoding on standby */
299#define XLHP_IS_CATALOG_REL (1 << 1)
300
301/*
302 * Does replaying the record require a cleanup-lock?
303 *
304 * Pruning, in VACUUM's first pass or when otherwise accessing a page,
305 * requires a cleanup lock. For freezing, and VACUUM's second pass which
306 * marks LP_DEAD line pointers as unused without moving any tuple data, an
307 * ordinary exclusive lock is sufficient.
308 */
309#define XLHP_CLEANUP_LOCK (1 << 2)
310
311/*
312 * If we remove or freeze any entries that contain xids, we need to include a
313 * snapshot conflict horizon. It's used in Hot Standby mode to ensure that
314 * there are no queries running for which the removed tuples are still
315 * visible, or which still consider the frozen XIDs as running.
316 */
317#define XLHP_HAS_CONFLICT_HORIZON (1 << 3)
318
319/*
320 * Indicates that an xlhp_freeze_plans sub-record and one or more
321 * xlhp_freeze_plan sub-records are present.
322 */
323#define XLHP_HAS_FREEZE_PLANS (1 << 4)
324
325/*
326 * XLHP_HAS_REDIRECTIONS, XLHP_HAS_DEAD_ITEMS, and XLHP_HAS_NOW_UNUSED_ITEMS
327 * indicate that xlhp_prune_items sub-records with redirected, dead, and
328 * unused item offsets are present.
329 */
330#define XLHP_HAS_REDIRECTIONS (1 << 5)
331#define XLHP_HAS_DEAD_ITEMS (1 << 6)
332#define XLHP_HAS_NOW_UNUSED_ITEMS (1 << 7)
333
334/*
335 * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples
336 * (appears in xl_heap_prune's xlhp_freeze_plans sub-record)
337 */
338/* 0x01 was XLH_FREEZE_XMIN */
339#define XLH_FREEZE_XVAC 0x02
340#define XLH_INVALID_XVAC 0x04
341
342typedef struct xlhp_freeze_plan
343{
348
349 /* Length of individual page offset numbers array for this plan */
352
353/*
354 * This is what we need to know about a block being frozen during vacuum
355 *
356 * The backup block's data contains an array of xlhp_freeze_plan structs (with
357 * nplans elements). The individual item offsets are located in an array at
358 * the end of the entire record with nplans * (each plan's ntuples) members
359 * Those offsets are in the same order as the plans. The REDO routine uses
360 * the offsets to freeze the corresponding heap tuples.
361 *
362 * (As of PostgreSQL 17, XLOG_HEAP2_PRUNE_VACUUM_SCAN records replace the
363 * separate XLOG_HEAP2_FREEZE_PAGE records.)
364 */
365typedef struct xlhp_freeze_plans
366{
370
371/*
372 * Generic sub-record type contained in block reference 0 of an xl_heap_prune
373 * record and used for redirect, dead, and unused items if any of
374 * XLHP_HAS_REDIRECTIONS/XLHP_HAS_DEAD_ITEMS/XLHP_HAS_NOW_UNUSED_ITEMS are
375 * set. Note that in the XLHP_HAS_REDIRECTIONS variant, there are actually 2
376 * * length number of OffsetNumbers in the data.
377 */
378typedef struct xlhp_prune_items
379{
383
384
385/* flags for infobits_set */
386#define XLHL_XMAX_IS_MULTI 0x01
387#define XLHL_XMAX_LOCK_ONLY 0x02
388#define XLHL_XMAX_EXCL_LOCK 0x04
389#define XLHL_XMAX_KEYSHR_LOCK 0x08
390#define XLHL_KEYS_UPDATED 0x10
391
392/* flag bits for xl_heap_lock / xl_heap_lock_updated's flag field */
393#define XLH_LOCK_ALL_FROZEN_CLEARED 0x01
394
395/* This is what we need to know about lock */
396typedef struct xl_heap_lock
397{
398 TransactionId xmax; /* might be a MultiXactId */
399 OffsetNumber offnum; /* locked tuple's offset on page */
400 uint8 infobits_set; /* infomask and infomask2 bits to set */
401 uint8 flags; /* XLH_LOCK_* flag bits */
403
404#define SizeOfHeapLock (offsetof(xl_heap_lock, flags) + sizeof(uint8))
405
406/* This is what we need to know about locking an updated version of a row */
408{
414
415#define SizeOfHeapLockUpdated (offsetof(xl_heap_lock_updated, flags) + sizeof(uint8))
416
417/* This is what we need to know about confirmation of speculative insertion */
418typedef struct xl_heap_confirm
419{
420 OffsetNumber offnum; /* confirmed tuple's offset on page */
422
423#define SizeOfHeapConfirm (offsetof(xl_heap_confirm, offnum) + sizeof(OffsetNumber))
424
425/* This is what we need to know about in-place update */
426typedef struct xl_heap_inplace
427{
428 OffsetNumber offnum; /* updated tuple's offset on page */
429 Oid dbId; /* MyDatabaseId */
430 Oid tsId; /* MyDatabaseTableSpace */
431 bool relcacheInitFileInval; /* invalidate relcache init files */
432 int nmsgs; /* number of shared inval msgs */
435
436#define MinSizeOfHeapInplace (offsetof(xl_heap_inplace, nmsgs) + sizeof(int))
437
438/*
439 * This is what we need to know about setting a visibility map bit
440 *
441 * Backup blk 0: visibility map buffer
442 * Backup blk 1: heap buffer
443 */
444typedef struct xl_heap_visible
445{
449
450#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
451
452typedef struct xl_heap_new_cid
453{
454 /*
455 * store toplevel xid so we don't have to merge cids from different
456 * transactions
457 */
461 CommandId combocid; /* just for debugging */
462
463 /*
464 * Store the relfilelocator/ctid pair to facilitate lookups.
465 */
469
470#define SizeOfHeapNewCid (offsetof(xl_heap_new_cid, target_tid) + sizeof(ItemPointerData))
471
472/* logical rewrite xlog record header */
474{
475 TransactionId mapped_xid; /* xid that might need to see the row */
476 Oid mapped_db; /* DbOid or InvalidOid for shared rels */
477 Oid mapped_rel; /* Oid of the mapped relation */
478 off_t offset; /* How far have we written so far */
479 uint32 num_mappings; /* Number of in-memory mappings */
480 XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */
482
484 TransactionId *snapshotConflictHorizon);
485
486extern void heap_redo(XLogReaderState *record);
487extern void heap_desc(StringInfo buf, XLogReaderState *record);
488extern const char *heap_identify(uint8 info);
489extern void heap_mask(char *pagedata, BlockNumber blkno);
490extern void heap2_redo(XLogReaderState *record);
491extern void heap2_desc(StringInfo buf, XLogReaderState *record);
492extern const char *heap2_identify(uint8 info);
494
495extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer,
496 Buffer vm_buffer,
497 TransactionId snapshotConflictHorizon,
498 uint8 vmflags);
499
500/* in heapdesc.c, so it can be shared between frontend/backend code */
502 int *nplans, xlhp_freeze_plan **plans,
503 OffsetNumber **frz_offsets,
504 int *nredirected, OffsetNumber **redirected,
505 int *ndead, OffsetNumber **nowdead,
506 int *nunused, OffsetNumber **nowunused);
507
508#endif /* HEAPAM_XLOG_H */
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
uint8_t uint8
Definition: c.h:486
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:420
uint16_t uint16
Definition: c.h:487
uint32_t uint32
Definition: c.h:488
uint32 CommandId
Definition: c.h:623
uint32 TransactionId
Definition: c.h:609
struct xlhp_freeze_plan xlhp_freeze_plan
struct xl_heap_rewrite_mapping xl_heap_rewrite_mapping
XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, Buffer vm_buffer, TransactionId snapshotConflictHorizon, uint8 vmflags)
Definition: heapam.c:8663
struct xl_heap_delete xl_heap_delete
void heap_desc(StringInfo buf, XLogReaderState *record)
Definition: heapdesc.c:184
void heap_redo(XLogReaderState *record)
Definition: heapam_xlog.c:1182
void heap2_desc(StringInfo buf, XLogReaderState *record)
Definition: heapdesc.c:264
void heap_xlog_deserialize_prune_and_freeze(char *cursor, uint8 flags, int *nplans, xlhp_freeze_plan **plans, OffsetNumber **frz_offsets, int *nredirected, OffsetNumber **redirected, int *ndead, OffsetNumber **nowdead, int *nunused, OffsetNumber **nowunused)
Definition: heapdesc.c:105
void heap_mask(char *pagedata, BlockNumber blkno)
Definition: heapam_xlog.c:1267
void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, TransactionId *snapshotConflictHorizon)
Definition: heapam.c:7832
struct xl_heap_new_cid xl_heap_new_cid
struct xl_heap_lock xl_heap_lock
void heap_xlog_logical_rewrite(XLogReaderState *r)
Definition: rewriteheap.c:1073
const char * heap2_identify(uint8 info)
Definition: heapdesc.c:434
struct xl_heap_prune xl_heap_prune
struct xl_heap_multi_insert xl_heap_multi_insert
struct xl_heap_confirm xl_heap_confirm
struct xl_heap_insert xl_heap_insert
const char * heap_identify(uint8 info)
Definition: heapdesc.c:389
struct xlhp_freeze_plans xlhp_freeze_plans
struct xl_heap_lock_updated xl_heap_lock_updated
struct xl_heap_inplace xl_heap_inplace
struct xlhp_prune_items xlhp_prune_items
void heap2_redo(XLogReaderState *record)
Definition: heapam_xlog.c:1228
struct xl_heap_header xl_heap_header
struct xl_multi_insert_tuple xl_multi_insert_tuple
struct xl_heap_visible xl_heap_visible
struct xl_heap_update xl_heap_update
struct xl_heap_truncate xl_heap_truncate
uint16 OffsetNumber
Definition: off.h:24
static char * buf
Definition: pg_test_fsync.c:72
unsigned int Oid
Definition: postgres_ext.h:32
Definition: type.h:138
OffsetNumber offnum
Definition: heapam_xlog.h:420
TransactionId xmax
Definition: heapam_xlog.h:115
OffsetNumber offnum
Definition: heapam_xlog.h:116
uint8 infobits_set
Definition: heapam_xlog.h:117
uint16 t_infomask
Definition: heapam_xlog.h:153
uint16 t_infomask2
Definition: heapam_xlog.h:152
OffsetNumber offnum
Definition: heapam_xlog.h:428
SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:433
bool relcacheInitFileInval
Definition: heapam_xlog.h:431
OffsetNumber offnum
Definition: heapam_xlog.h:162
TransactionId xmax
Definition: heapam_xlog.h:409
OffsetNumber offnum
Definition: heapam_xlog.h:410
uint8 infobits_set
Definition: heapam_xlog.h:400
OffsetNumber offnum
Definition: heapam_xlog.h:399
TransactionId xmax
Definition: heapam_xlog.h:398
OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:185
CommandId cmin
Definition: heapam_xlog.h:459
CommandId combocid
Definition: heapam_xlog.h:461
ItemPointerData target_tid
Definition: heapam_xlog.h:467
TransactionId top_xid
Definition: heapam_xlog.h:458
CommandId cmax
Definition: heapam_xlog.h:460
RelFileLocator target_locator
Definition: heapam_xlog.h:466
TransactionId mapped_xid
Definition: heapam_xlog.h:475
Oid relids[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:139
TransactionId new_xmax
Definition: heapam_xlog.h:224
uint8 old_infobits_set
Definition: heapam_xlog.h:222
TransactionId old_xmax
Definition: heapam_xlog.h:220
OffsetNumber old_offnum
Definition: heapam_xlog.h:221
OffsetNumber new_offnum
Definition: heapam_xlog.h:225
TransactionId snapshotConflictHorizon
Definition: heapam_xlog.h:446
TransactionId xmax
Definition: heapam_xlog.h:344
xlhp_freeze_plan plans[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:368
OffsetNumber data[FLEXIBLE_ARRAY_MEMBER]
Definition: heapam_xlog.h:381
uint64 XLogRecPtr
Definition: xlogdefs.h:21