PostgreSQL Source Code  git master
xlogreader.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * xlogreader.h
4  * Definitions for the generic XLog reading facility
5  *
6  * Portions Copyright (c) 2013-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/include/access/xlogreader.h
10  *
11  * NOTES
12  * See the definition of the XLogReaderState struct for instructions on
13  * how to use the XLogReader infrastructure.
14  *
15  * The basic idea is to allocate an XLogReaderState via
16  * XLogReaderAllocate(), position the reader to the first record with
17  * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18  * until it returns NULL.
19  *
20  * Callers supply a page_read callback if they want to call
21  * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22  * otherwise. The WALRead function can be used as a helper to write
23  * page_read callbacks, but it is not mandatory; callers that use it,
24  * must supply segment_open callbacks. The segment_close callback
25  * must always be supplied.
26  *
27  * After reading a record with XLogReadRecord(), it's decomposed into
28  * the per-block and main data parts, and the parts can be accessed
29  * with the XLogRec* macros and functions. You can also decode a
30  * record that's already constructed in memory, without reading from
31  * disk, by calling the DecodeXLogRecord() function.
32  *-------------------------------------------------------------------------
33  */
34 #ifndef XLOGREADER_H
35 #define XLOGREADER_H
36 
37 #ifndef FRONTEND
38 #include "access/transam.h"
39 #endif
40 
41 #include "access/xlogrecord.h"
42 #include "storage/buf.h"
43 
44 /* WALOpenSegment represents a WAL segment being read. */
45 typedef struct WALOpenSegment
46 {
47  int ws_file; /* segment file descriptor */
48  XLogSegNo ws_segno; /* segment number */
49  TimeLineID ws_tli; /* timeline ID of the currently open file */
51 
52 /* WALSegmentContext carries context information about WAL segments to read */
53 typedef struct WALSegmentContext
54 {
58 
59 typedef struct XLogReaderState XLogReaderState;
60 
61 /* Function type definitions for various xlogreader interactions */
63  XLogRecPtr targetPagePtr,
64  int reqLen,
65  XLogRecPtr targetRecPtr,
66  char *readBuf);
68  XLogSegNo nextSegNo,
69  TimeLineID *tli_p);
71 
72 typedef struct XLogReaderRoutine
73 {
74  /*
75  * Data input callback
76  *
77  * This callback shall read at least reqLen valid bytes of the xlog page
78  * starting at targetPagePtr, and store them in readBuf. The callback
79  * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80  * -1 on failure. The callback shall sleep, if necessary, to wait for the
81  * requested bytes to become available. The callback will not be invoked
82  * again for the same page unless more than the returned number of bytes
83  * are needed.
84  *
85  * targetRecPtr is the position of the WAL record we're reading. Usually
86  * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87  * to read and verify the page or segment header, before it reads the
88  * actual WAL record it's interested in. In that case, targetRecPtr can
89  * be used to determine which timeline to read the page from.
90  *
91  * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92  * read from.
93  */
95 
96  /*
97  * Callback to open the specified WAL segment for reading. ->seg.ws_file
98  * shall be set to the file descriptor of the opened segment. In case of
99  * failure, an error shall be raised by the callback and it shall not
100  * return.
101  *
102  * "nextSegNo" is the number of the segment to be opened.
103  *
104  * "tli_p" is an input/output argument. WALRead() uses it to pass the
105  * timeline in which the new segment should be found, but the callback can
106  * use it to return the TLI that it actually opened.
107  */
109 
110  /*
111  * WAL segment close callback. ->seg.ws_file shall be set to a negative
112  * number.
113  */
116 
117 #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118 
119 typedef struct
120 {
121  /* Is this block ref in use? */
122  bool in_use;
123 
124  /* Identify the block this refers to */
128 
129  /* Prefetching workspace. */
131 
132  /* copy of the fork_flags field from the XLogRecordBlockHeader */
134 
135  /* Information on full-page image, if any */
136  bool has_image; /* has image, even for consistency checking */
137  bool apply_image; /* has image that should be restored */
138  char *bkp_image;
143 
144  /* Buffer holding the rmgr-specific data associated with this block */
145  bool has_data;
146  char *data;
150 
151 /*
152  * The decoded contents of a record. This occupies a contiguous region of
153  * memory, with main_data and blocks[n].data pointing to memory after the
154  * members declared here.
155  */
156 typedef struct DecodedXLogRecord
157 {
158  /* Private member used for resource management. */
159  size_t size; /* total size of decoded record */
160  bool oversized; /* outside the regular decode buffer? */
161  struct DecodedXLogRecord *next; /* decoded record queue link */
162 
163  /* Public members. */
164  XLogRecPtr lsn; /* location */
165  XLogRecPtr next_lsn; /* location of next record */
166  XLogRecord header; /* header */
168  TransactionId toplevel_xid; /* XID of top-level transaction */
169  char *main_data; /* record's main data portion */
170  uint32 main_data_len; /* main data portion's length */
171  int max_block_id; /* highest block_id in use (-1 if none) */
174 
176 {
177  /*
178  * Operational callbacks
179  */
181 
182  /* ----------------------------------------
183  * Public parameters
184  * ----------------------------------------
185  */
186 
187  /*
188  * System identifier of the xlog files we're about to read. Set to zero
189  * (the default value) if unknown or unimportant.
190  */
192 
193  /*
194  * Opaque data for callbacks to use. Not used by XLogReader.
195  */
197 
198  /*
199  * Start and end point of last record read. EndRecPtr is also used as the
200  * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
201  * starting position and ReadRecPtr to invalid.
202  *
203  * Start and end point of last record returned by XLogReadRecord(). These
204  * are also available as record->lsn and record->next_lsn.
205  */
206  XLogRecPtr ReadRecPtr; /* start of last record read */
207  XLogRecPtr EndRecPtr; /* end+1 of last record read */
208 
209  /*
210  * Set at the end of recovery: the start point of a partial record at the
211  * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
212  * location of its first contrecord that went missing.
213  */
216  /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
218 
219 
220  /* ----------------------------------------
221  * Decoded representation of current record
222  *
223  * Use XLogRecGet* functions to investigate the record; these fields
224  * should not be accessed directly.
225  * ----------------------------------------
226  * Start and end point of the last record read and decoded by
227  * XLogReadRecordInternal(). NextRecPtr is also used as the position to
228  * decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
229  * the requested starting position.
230  */
231  XLogRecPtr DecodeRecPtr; /* start of last record decoded */
232  XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
233  XLogRecPtr PrevRecPtr; /* start of previous record decoded */
234 
235  /* Last record returned by XLogReadRecord(). */
237 
238  /* ----------------------------------------
239  * private/internal state
240  * ----------------------------------------
241  */
242 
243  /*
244  * Buffer for decoded records. This is a circular buffer, though
245  * individual records can't be split in the middle, so some space is often
246  * wasted at the end. Oversized records that don't fit in this space are
247  * allocated separately.
248  */
251  bool free_decode_buffer; /* need to free? */
252  char *decode_buffer_head; /* data is read from the head */
253  char *decode_buffer_tail; /* new data is written at the tail */
254 
255  /*
256  * Queue of records that have been decoded. This is a linked list that
257  * usually consists of consecutive records in decode_buffer, but may also
258  * contain oversized records allocated with palloc().
259  */
260  DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
261  DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
262 
263  /*
264  * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
265  * readLen bytes)
266  */
267  char *readBuf;
269 
270  /* last read XLOG position for data currently in readBuf */
274 
275  /*
276  * beginning of prior page read, and its TLI. Doesn't necessarily
277  * correspond to what's in readBuf; used for timeline sanity checks.
278  */
281 
282  /* beginning of the WAL record being read. */
284  /* timeline to read it from, 0 if a lookup is required */
286 
287  /*
288  * Safe point to read to in currTLI if current TLI is historical
289  * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
290  *
291  * Actually set to the start of the segment containing the timeline switch
292  * that ends currTLI's validity, not the LSN of the switch its self, since
293  * we can't assume the old segment will be present.
294  */
296 
297  /*
298  * If currTLI is not the most recent known timeline, the next timeline to
299  * read from when currTLIValidUntil is reached.
300  */
302 
303  /*
304  * Buffer for current ReadRecord result (expandable), used when a record
305  * crosses a page boundary.
306  */
309 
310  /* Buffer to hold error message */
313 
314  /*
315  * Flag to indicate to XLogPageReadCB that it should not block waiting for
316  * data.
317  */
319 };
320 
321 /*
322  * Check if XLogNextRecord() has any more queued records or an error to return.
323  */
324 static inline bool
326 {
327  return (state->decode_queue_head != NULL) || state->errormsg_deferred;
328 }
329 
330 /* Get a new XLogReader */
332  const char *waldir,
333  XLogReaderRoutine *routine,
334  void *private_data);
335 
336 /* Free an XLogReader */
337 extern void XLogReaderFree(XLogReaderState *state);
338 
339 /* Optionally provide a circular decoding buffer to allow readahead. */
341  void *buffer,
342  size_t size);
343 
344 /* Position the XLogReader to given record */
345 extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
347 
348 /* Return values from XLogPageReadCB. */
349 typedef enum XLogPageReadResult
350 {
351  XLREAD_SUCCESS = 0, /* record is successfully read */
352  XLREAD_FAIL = -1, /* failed during reading a record */
353  XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
355 
356 /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
358  char **errormsg);
359 
360 /* Consume the next record or error. */
362  char **errormsg);
363 
364 /* Release the previously returned record, if necessary. */
366 
367 /* Try to read ahead, if there is data and space. */
369  bool nonblocking);
370 
371 /* Validate a page */
373  XLogRecPtr recptr, char *phdr);
374 
375 /* Forget error produced by XLogReaderValidatePageHeader(). */
377 
378 /*
379  * Error information from WALRead that both backend and frontend caller can
380  * process. Currently only errors from pg_pread can be reported.
381  */
382 typedef struct WALReadError
383 {
384  int wre_errno; /* errno set by the last pg_pread() */
385  int wre_off; /* Offset we tried to read from. */
386  int wre_req; /* Bytes requested to be read. */
387  int wre_read; /* Bytes read by the last read(). */
388  WALOpenSegment wre_seg; /* Segment we tried to read from. */
390 
391 extern bool WALRead(XLogReaderState *state,
392  char *buf, XLogRecPtr startptr, Size count,
393  TimeLineID tli, WALReadError *errinfo);
394 
395 /* Functions for decoding an XLogRecord */
396 
397 extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
399  DecodedXLogRecord *decoded,
400  XLogRecord *record,
401  XLogRecPtr lsn,
402  char **errormsg);
403 
404 /*
405  * Macros that provide access to parts of the record most recently returned by
406  * XLogReadRecord() or XLogNextRecord().
407  */
408 #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
409 #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
410 #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
411 #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
412 #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
413 #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
414 #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
415 #define XLogRecGetData(decoder) ((decoder)->record->main_data)
416 #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
417 #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
418 #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
419 #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
420 #define XLogRecHasBlockRef(decoder, block_id) \
421  (((decoder)->record->max_block_id >= (block_id)) && \
422  ((decoder)->record->blocks[block_id].in_use))
423 #define XLogRecHasBlockImage(decoder, block_id) \
424  ((decoder)->record->blocks[block_id].has_image)
425 #define XLogRecBlockImageApply(decoder, block_id) \
426  ((decoder)->record->blocks[block_id].apply_image)
427 #define XLogRecHasBlockData(decoder, block_id) \
428  ((decoder)->record->blocks[block_id].has_data)
429 
430 #ifndef FRONTEND
432 #endif
433 
434 extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
435 extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
436 extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
437  RelFileLocator *rlocator, ForkNumber *forknum,
438  BlockNumber *blknum);
439 extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
440  RelFileLocator *rlocator, ForkNumber *forknum,
441  BlockNumber *blknum,
442  Buffer *prefetch_buffer);
443 
444 #endif /* XLOGREADER_H */
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
unsigned short uint16
Definition: c.h:492
unsigned int uint32
Definition: c.h:493
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:385
unsigned char uint8
Definition: c.h:491
uint32 TransactionId
Definition: c.h:639
size_t Size
Definition: c.h:592
#define MAXPGPATH
const void size_t len
static char * buf
Definition: pg_test_fsync.c:73
ForkNumber
Definition: relpath.h:48
static pg_noinline void Size size
Definition: slab.c:607
uint16 hole_length
Definition: xlogreader.h:140
char * bkp_image
Definition: xlogreader.h:138
Buffer prefetch_buffer
Definition: xlogreader.h:130
RelFileLocator rlocator
Definition: xlogreader.h:125
uint16 data_bufsz
Definition: xlogreader.h:148
BlockNumber blkno
Definition: xlogreader.h:127
ForkNumber forknum
Definition: xlogreader.h:126
uint16 hole_offset
Definition: xlogreader.h:139
XLogRecord header
Definition: xlogreader.h:166
XLogRecPtr next_lsn
Definition: xlogreader.h:165
struct DecodedXLogRecord * next
Definition: xlogreader.h:161
TransactionId toplevel_xid
Definition: xlogreader.h:168
uint32 main_data_len
Definition: xlogreader.h:170
RepOriginId record_origin
Definition: xlogreader.h:167
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition: xlogreader.h:172
XLogRecPtr lsn
Definition: xlogreader.h:164
XLogSegNo ws_segno
Definition: xlogreader.h:48
TimeLineID ws_tli
Definition: xlogreader.h:49
WALOpenSegment wre_seg
Definition: xlogreader.h:388
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:55
WALSegmentCloseCB segment_close
Definition: xlogreader.h:114
WALSegmentOpenCB segment_open
Definition: xlogreader.h:108
XLogPageReadCB page_read
Definition: xlogreader.h:94
XLogRecPtr missingContrecPtr
Definition: xlogreader.h:215
DecodedXLogRecord * record
Definition: xlogreader.h:236
char * errormsg_buf
Definition: xlogreader.h:311
WALSegmentContext segcxt
Definition: xlogreader.h:271
XLogRecPtr PrevRecPtr
Definition: xlogreader.h:233
size_t decode_buffer_size
Definition: xlogreader.h:250
DecodedXLogRecord * decode_queue_head
Definition: xlogreader.h:260
XLogRecPtr EndRecPtr
Definition: xlogreader.h:207
uint32 readRecordBufSize
Definition: xlogreader.h:308
uint64 system_identifier
Definition: xlogreader.h:191
bool free_decode_buffer
Definition: xlogreader.h:251
char * decode_buffer
Definition: xlogreader.h:249
TimeLineID nextTLI
Definition: xlogreader.h:301
XLogRecPtr currTLIValidUntil
Definition: xlogreader.h:295
char * readRecordBuf
Definition: xlogreader.h:307
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
XLogRecPtr abortedRecPtr
Definition: xlogreader.h:214
DecodedXLogRecord * decode_queue_tail
Definition: xlogreader.h:261
XLogReaderRoutine routine
Definition: xlogreader.h:180
char * decode_buffer_head
Definition: xlogreader.h:252
bool errormsg_deferred
Definition: xlogreader.h:312
TimeLineID latestPageTLI
Definition: xlogreader.h:280
TimeLineID currTLI
Definition: xlogreader.h:285
XLogRecPtr overwrittenRecPtr
Definition: xlogreader.h:217
XLogRecPtr DecodeRecPtr
Definition: xlogreader.h:231
XLogRecPtr currRecPtr
Definition: xlogreader.h:283
XLogRecPtr latestPagePtr
Definition: xlogreader.h:279
char * decode_buffer_tail
Definition: xlogreader.h:253
WALOpenSegment seg
Definition: xlogreader.h:272
void * private_data
Definition: xlogreader.h:196
XLogRecPtr NextRecPtr
Definition: xlogreader.h:232
Definition: regguts.h:323
int wal_segment_size
Definition: xlog.c:143
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:1997
void XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
Definition: xlogreader.c:90
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition: xlogreader.h:325
struct DecodedXLogRecord DecodedXLogRecord
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1503
struct WALReadError WALReadError
void(* WALSegmentCloseCB)(XLogReaderState *xlogreader)
Definition: xlogreader.h:70
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:325
void XLogReaderResetError(XLogReaderState *state)
Definition: xlogreader.c:1365
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:1224
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:2167
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:161
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1971
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:966
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
struct WALOpenSegment WALOpenSegment
struct XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:389
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
XLogPageReadResult
Definition: xlogreader.h:350
@ XLREAD_WOULDBLOCK
Definition: xlogreader.h:353
@ XLREAD_SUCCESS
Definition: xlogreader.h:351
@ XLREAD_FAIL
Definition: xlogreader.h:352
XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:1383
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:231
struct WALSegmentContext WALSegmentContext
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:2056
void(* WALSegmentOpenCB)(XLogReaderState *xlogreader, XLogSegNo nextSegNo, TimeLineID *tli_p)
Definition: xlogreader.h:67
int(* XLogPageReadCB)(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
Definition: xlogreader.h:62
struct XLogReaderRoutine XLogReaderRoutine
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition: xlogreader.c:249
static XLogReaderState * xlogreader
Definition: xlogrecovery.c:188