PostgreSQL Source Code git master
xlogreader.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * xlogreader.c
4 * Generic XLog reading facility
5 *
6 * Portions Copyright (c) 2013-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/access/transam/xlogreader.c
10 *
11 * NOTES
12 * See xlogreader.h for more notes on this facility.
13 *
14 * This file is compiled as both front-end and backend code, so it
15 * may not use ereport, server-defined static variables, etc.
16 *-------------------------------------------------------------------------
17 */
18#include "postgres.h"
19
20#include <unistd.h>
21#ifdef USE_LZ4
22#include <lz4.h>
23#endif
24#ifdef USE_ZSTD
25#include <zstd.h>
26#endif
27
28#include "access/transam.h"
30#include "access/xlogreader.h"
31#include "access/xlogrecord.h"
32#include "catalog/pg_control.h"
34#include "replication/origin.h"
35
36#ifndef FRONTEND
37#include "pgstat.h"
38#else
39#include "common/logging.h"
40#endif
41
42static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
44static void allocate_recordbuf(XLogReaderState *state, uint32 reclength);
46 int reqLen);
50 XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
52 XLogRecPtr recptr);
55 int segsize, const char *waldir);
56
57/* size of the buffer allocated for error message. */
58#define MAX_ERRORMSG_LEN 1000
59
60/*
61 * Default size; large enough that typical users of XLogReader won't often need
62 * to use the 'oversized' memory allocation code path.
63 */
64#define DEFAULT_DECODE_BUFFER_SIZE (64 * 1024)
65
66/*
67 * Construct a string in state->errormsg_buf explaining what's wrong with
68 * the current record being read.
69 */
70static void
72{
73 va_list args;
74
75 fmt = _(fmt);
76
78 vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args);
79 va_end(args);
80
81 state->errormsg_deferred = true;
82}
83
84/*
85 * Set the size of the decoding buffer. A pointer to a caller supplied memory
86 * region may also be passed in, in which case non-oversized records will be
87 * decoded there.
88 */
89void
91{
92 Assert(state->decode_buffer == NULL);
93
94 state->decode_buffer = buffer;
95 state->decode_buffer_size = size;
96 state->decode_buffer_tail = buffer;
97 state->decode_buffer_head = buffer;
98}
99
100/*
101 * Allocate and initialize a new XLogReader.
102 *
103 * Returns NULL if the xlogreader couldn't be allocated.
104 */
106XLogReaderAllocate(int wal_segment_size, const char *waldir,
107 XLogReaderRoutine *routine, void *private_data)
108{
110
114 if (!state)
115 return NULL;
116
117 /* initialize caller-provided support functions */
118 state->routine = *routine;
119
120 /*
121 * Permanently allocate readBuf. We do it this way, rather than just
122 * making a static array, for two reasons: (1) no need to waste the
123 * storage in most instantiations of the backend; (2) a static char array
124 * isn't guaranteed to have any particular alignment, whereas
125 * palloc_extended() will provide MAXALIGN'd storage.
126 */
127 state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ,
129 if (!state->readBuf)
130 {
131 pfree(state);
132 return NULL;
133 }
134
135 /* Initialize segment info. */
137 waldir);
138
139 /* system_identifier initialized to zeroes above */
140 state->private_data = private_data;
141 /* ReadRecPtr, EndRecPtr and readLen initialized to zeroes above */
142 state->errormsg_buf = palloc_extended(MAX_ERRORMSG_LEN + 1,
144 if (!state->errormsg_buf)
145 {
146 pfree(state->readBuf);
147 pfree(state);
148 return NULL;
149 }
150 state->errormsg_buf[0] = '\0';
151
152 /*
153 * Allocate an initial readRecordBuf of minimal size, which can later be
154 * enlarged if necessary.
155 */
157 return state;
158}
159
160void
162{
163 if (state->seg.ws_file != -1)
164 state->routine.segment_close(state);
165
166 if (state->decode_buffer && state->free_decode_buffer)
167 pfree(state->decode_buffer);
168
169 pfree(state->errormsg_buf);
170 if (state->readRecordBuf)
171 pfree(state->readRecordBuf);
172 pfree(state->readBuf);
173 pfree(state);
174}
175
176/*
177 * Allocate readRecordBuf to fit a record of at least the given length.
178 *
179 * readRecordBufSize is set to the new buffer size.
180 *
181 * To avoid useless small increases, round its size to a multiple of
182 * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start
183 * with. (That is enough for all "normal" records, but very large commit or
184 * abort records might need more space.)
185 *
186 * Note: This routine should *never* be called for xl_tot_len until the header
187 * of the record has been fully validated.
188 */
189static void
191{
192 uint32 newSize = reclength;
193
194 newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
195 newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
196
197 if (state->readRecordBuf)
198 pfree(state->readRecordBuf);
199 state->readRecordBuf = (char *) palloc(newSize);
200 state->readRecordBufSize = newSize;
201}
202
203/*
204 * Initialize the passed segment structs.
205 */
206static void
208 int segsize, const char *waldir)
209{
210 seg->ws_file = -1;
211 seg->ws_segno = 0;
212 seg->ws_tli = 0;
213
214 segcxt->ws_segsize = segsize;
215 if (waldir)
216 snprintf(segcxt->ws_dir, MAXPGPATH, "%s", waldir);
217}
218
219/*
220 * Begin reading WAL at 'RecPtr'.
221 *
222 * 'RecPtr' should point to the beginning of a valid WAL record. Pointing at
223 * the beginning of a page is also OK, if there is a new record right after
224 * the page header, i.e. not a continuation.
225 *
226 * This does not make any attempt to read the WAL yet, and hence cannot fail.
227 * If the starting address is not correct, the first call to XLogReadRecord()
228 * will error out.
229 */
230void
232{
233 Assert(!XLogRecPtrIsInvalid(RecPtr));
234
236
237 /* Begin at the passed-in record pointer. */
238 state->EndRecPtr = RecPtr;
239 state->NextRecPtr = RecPtr;
240 state->ReadRecPtr = InvalidXLogRecPtr;
241 state->DecodeRecPtr = InvalidXLogRecPtr;
242}
243
244/*
245 * Release the last record that was returned by XLogNextRecord(), if any, to
246 * free up space. Returns the LSN past the end of the record.
247 */
250{
251 DecodedXLogRecord *record;
252 XLogRecPtr next_lsn;
253
254 if (!state->record)
255 return InvalidXLogRecPtr;
256
257 /*
258 * Remove it from the decoded record queue. It must be the oldest item
259 * decoded, decode_queue_head.
260 */
261 record = state->record;
262 next_lsn = record->next_lsn;
263 Assert(record == state->decode_queue_head);
264 state->record = NULL;
265 state->decode_queue_head = record->next;
266
267 /* It might also be the newest item decoded, decode_queue_tail. */
268 if (state->decode_queue_tail == record)
269 state->decode_queue_tail = NULL;
270
271 /* Release the space. */
272 if (unlikely(record->oversized))
273 {
274 /* It's not in the decode buffer, so free it to release space. */
275 pfree(record);
276 }
277 else
278 {
279 /* It must be the head (oldest) record in the decode buffer. */
280 Assert(state->decode_buffer_head == (char *) record);
281
282 /*
283 * We need to update head to point to the next record that is in the
284 * decode buffer, if any, being careful to skip oversized ones
285 * (they're not in the decode buffer).
286 */
287 record = record->next;
288 while (unlikely(record && record->oversized))
289 record = record->next;
290
291 if (record)
292 {
293 /* Adjust head to release space up to the next record. */
294 state->decode_buffer_head = (char *) record;
295 }
296 else
297 {
298 /*
299 * Otherwise we might as well just reset head and tail to the
300 * start of the buffer space, because we're empty. This means
301 * we'll keep overwriting the same piece of memory if we're not
302 * doing any prefetching.
303 */
304 state->decode_buffer_head = state->decode_buffer;
305 state->decode_buffer_tail = state->decode_buffer;
306 }
307 }
308
309 return next_lsn;
310}
311
312/*
313 * Attempt to read an XLOG record.
314 *
315 * XLogBeginRead() or XLogFindNextRecord() and then XLogReadAhead() must be
316 * called before the first call to XLogNextRecord(). This functions returns
317 * records and errors that were put into an internal queue by XLogReadAhead().
318 *
319 * On success, a record is returned.
320 *
321 * The returned record (or *errormsg) points to an internal buffer that's
322 * valid until the next call to XLogNextRecord.
323 */
326{
327 /* Release the last record returned by XLogNextRecord(). */
329
330 if (state->decode_queue_head == NULL)
331 {
332 *errormsg = NULL;
333 if (state->errormsg_deferred)
334 {
335 if (state->errormsg_buf[0] != '\0')
336 *errormsg = state->errormsg_buf;
337 state->errormsg_deferred = false;
338 }
339
340 /*
341 * state->EndRecPtr is expected to have been set by the last call to
342 * XLogBeginRead() or XLogNextRecord(), and is the location of the
343 * error.
344 */
345 Assert(!XLogRecPtrIsInvalid(state->EndRecPtr));
346
347 return NULL;
348 }
349
350 /*
351 * Record this as the most recent record returned, so that we'll release
352 * it next time. This also exposes it to the traditional
353 * XLogRecXXX(xlogreader) macros, which work with the decoder rather than
354 * the record for historical reasons.
355 */
356 state->record = state->decode_queue_head;
357
358 /*
359 * Update the pointers to the beginning and one-past-the-end of this
360 * record, again for the benefit of historical code that expected the
361 * decoder to track this rather than accessing these fields of the record
362 * itself.
363 */
364 state->ReadRecPtr = state->record->lsn;
365 state->EndRecPtr = state->record->next_lsn;
366
367 *errormsg = NULL;
368
369 return state->record;
370}
371
372/*
373 * Attempt to read an XLOG record.
374 *
375 * XLogBeginRead() or XLogFindNextRecord() must be called before the first call
376 * to XLogReadRecord().
377 *
378 * If the page_read callback fails to read the requested data, NULL is
379 * returned. The callback is expected to have reported the error; errormsg
380 * is set to NULL.
381 *
382 * If the reading fails for some other reason, NULL is also returned, and
383 * *errormsg is set to a string with details of the failure.
384 *
385 * The returned pointer (or *errormsg) points to an internal buffer that's
386 * valid until the next call to XLogReadRecord.
387 */
390{
391 DecodedXLogRecord *decoded;
392
393 /*
394 * Release last returned record, if there is one. We need to do this so
395 * that we can check for empty decode queue accurately.
396 */
398
399 /*
400 * Call XLogReadAhead() in blocking mode to make sure there is something
401 * in the queue, though we don't use the result.
402 */
404 XLogReadAhead(state, false /* nonblocking */ );
405
406 /* Consume the head record or error. */
407 decoded = XLogNextRecord(state, errormsg);
408 if (decoded)
409 {
410 /*
411 * This function returns a pointer to the record's header, not the
412 * actual decoded record. The caller will access the decoded record
413 * through the XLogRecGetXXX() macros, which reach the decoded
414 * recorded as xlogreader->record.
415 */
416 Assert(state->record == decoded);
417 return &decoded->header;
418 }
419
420 return NULL;
421}
422
423/*
424 * Allocate space for a decoded record. The only member of the returned
425 * object that is initialized is the 'oversized' flag, indicating that the
426 * decoded record wouldn't fit in the decode buffer and must eventually be
427 * freed explicitly.
428 *
429 * The caller is responsible for adjusting decode_buffer_tail with the real
430 * size after successfully decoding a record into this space. This way, if
431 * decoding fails, then there is nothing to undo unless the 'oversized' flag
432 * was set and pfree() must be called.
433 *
434 * Return NULL if there is no space in the decode buffer and allow_oversized
435 * is false, or if memory allocation fails for an oversized buffer.
436 */
437static DecodedXLogRecord *
438XLogReadRecordAlloc(XLogReaderState *state, size_t xl_tot_len, bool allow_oversized)
439{
440 size_t required_space = DecodeXLogRecordRequiredSpace(xl_tot_len);
441 DecodedXLogRecord *decoded = NULL;
442
443 /* Allocate a circular decode buffer if we don't have one already. */
444 if (unlikely(state->decode_buffer == NULL))
445 {
446 if (state->decode_buffer_size == 0)
447 state->decode_buffer_size = DEFAULT_DECODE_BUFFER_SIZE;
448 state->decode_buffer = palloc(state->decode_buffer_size);
449 state->decode_buffer_head = state->decode_buffer;
450 state->decode_buffer_tail = state->decode_buffer;
451 state->free_decode_buffer = true;
452 }
453
454 /* Try to allocate space in the circular decode buffer. */
455 if (state->decode_buffer_tail >= state->decode_buffer_head)
456 {
457 /* Empty, or tail is to the right of head. */
458 if (required_space <=
459 state->decode_buffer_size -
460 (state->decode_buffer_tail - state->decode_buffer))
461 {
462 /*-
463 * There is space between tail and end.
464 *
465 * +-----+--------------------+-----+
466 * | |////////////////////|here!|
467 * +-----+--------------------+-----+
468 * ^ ^
469 * | |
470 * h t
471 */
472 decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
473 decoded->oversized = false;
474 return decoded;
475 }
476 else if (required_space <
477 state->decode_buffer_head - state->decode_buffer)
478 {
479 /*-
480 * There is space between start and head.
481 *
482 * +-----+--------------------+-----+
483 * |here!|////////////////////| |
484 * +-----+--------------------+-----+
485 * ^ ^
486 * | |
487 * h t
488 */
489 decoded = (DecodedXLogRecord *) state->decode_buffer;
490 decoded->oversized = false;
491 return decoded;
492 }
493 }
494 else
495 {
496 /* Tail is to the left of head. */
497 if (required_space <
498 state->decode_buffer_head - state->decode_buffer_tail)
499 {
500 /*-
501 * There is space between tail and head.
502 *
503 * +-----+--------------------+-----+
504 * |/////|here! |/////|
505 * +-----+--------------------+-----+
506 * ^ ^
507 * | |
508 * t h
509 */
510 decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
511 decoded->oversized = false;
512 return decoded;
513 }
514 }
515
516 /* Not enough space in the decode buffer. Are we allowed to allocate? */
517 if (allow_oversized)
518 {
519 decoded = palloc(required_space);
520 decoded->oversized = true;
521 return decoded;
522 }
523
524 return NULL;
525}
526
529{
530 XLogRecPtr RecPtr;
531 XLogRecord *record;
532 XLogRecPtr targetPagePtr;
533 bool randAccess;
534 uint32 len,
535 total_len;
536 uint32 targetRecOff;
537 uint32 pageHeaderSize;
538 bool assembled;
539 bool gotheader;
540 int readOff;
541 DecodedXLogRecord *decoded;
542 char *errormsg; /* not used */
543
544 /*
545 * randAccess indicates whether to verify the previous-record pointer of
546 * the record we're reading. We only do this if we're reading
547 * sequentially, which is what we initially assume.
548 */
549 randAccess = false;
550
551 /* reset error state */
552 state->errormsg_buf[0] = '\0';
553 decoded = NULL;
554
555 state->abortedRecPtr = InvalidXLogRecPtr;
556 state->missingContrecPtr = InvalidXLogRecPtr;
557
558 RecPtr = state->NextRecPtr;
559
560 if (state->DecodeRecPtr != InvalidXLogRecPtr)
561 {
562 /* read the record after the one we just read */
563
564 /*
565 * NextRecPtr is pointing to end+1 of the previous WAL record. If
566 * we're at a page boundary, no more records can fit on the current
567 * page. We must skip over the page header, but we can't do that until
568 * we've read in the page, since the header size is variable.
569 */
570 }
571 else
572 {
573 /*
574 * Caller supplied a position to start at.
575 *
576 * In this case, NextRecPtr should already be pointing either to a
577 * valid record starting position or alternatively to the beginning of
578 * a page. See the header comments for XLogBeginRead.
579 */
580 Assert(RecPtr % XLOG_BLCKSZ == 0 || XRecOffIsValid(RecPtr));
581 randAccess = true;
582 }
583
584restart:
585 state->nonblocking = nonblocking;
586 state->currRecPtr = RecPtr;
587 assembled = false;
588
589 targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
590 targetRecOff = RecPtr % XLOG_BLCKSZ;
591
592 /*
593 * Read the page containing the record into state->readBuf. Request enough
594 * byte to cover the whole record header, or at least the part of it that
595 * fits on the same page.
596 */
597 readOff = ReadPageInternal(state, targetPagePtr,
598 Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
600 return XLREAD_WOULDBLOCK;
601 else if (readOff < 0)
602 goto err;
603
604 /*
605 * ReadPageInternal always returns at least the page header, so we can
606 * examine it now.
607 */
608 pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
609 if (targetRecOff == 0)
610 {
611 /*
612 * At page start, so skip over page header.
613 */
614 RecPtr += pageHeaderSize;
615 targetRecOff = pageHeaderSize;
616 }
617 else if (targetRecOff < pageHeaderSize)
618 {
619 report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u",
620 LSN_FORMAT_ARGS(RecPtr),
621 pageHeaderSize, targetRecOff);
622 goto err;
623 }
624
625 if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
626 targetRecOff == pageHeaderSize)
627 {
628 report_invalid_record(state, "contrecord is requested by %X/%X",
629 LSN_FORMAT_ARGS(RecPtr));
630 goto err;
631 }
632
633 /* ReadPageInternal has verified the page header */
634 Assert(pageHeaderSize <= readOff);
635
636 /*
637 * Read the record length.
638 *
639 * NB: Even though we use an XLogRecord pointer here, the whole record
640 * header might not fit on this page. xl_tot_len is the first field of the
641 * struct, so it must be on this page (the records are MAXALIGNed), but we
642 * cannot access any other fields until we've verified that we got the
643 * whole header.
644 */
645 record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);
646 total_len = record->xl_tot_len;
647
648 /*
649 * If the whole record header is on this page, validate it immediately.
650 * Otherwise do just a basic sanity check on xl_tot_len, and validate the
651 * rest of the header after reading it from the next page. The xl_tot_len
652 * check is necessary here to ensure that we enter the "Need to reassemble
653 * record" code path below; otherwise we might fail to apply
654 * ValidXLogRecordHeader at all.
655 */
656 if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
657 {
658 if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr, record,
659 randAccess))
660 goto err;
661 gotheader = true;
662 }
663 else
664 {
665 /* There may be no next page if it's too small. */
666 if (total_len < SizeOfXLogRecord)
667 {
669 "invalid record length at %X/%X: expected at least %u, got %u",
670 LSN_FORMAT_ARGS(RecPtr),
671 (uint32) SizeOfXLogRecord, total_len);
672 goto err;
673 }
674 /* We'll validate the header once we have the next page. */
675 gotheader = false;
676 }
677
678 /*
679 * Try to find space to decode this record, if we can do so without
680 * calling palloc. If we can't, we'll try again below after we've
681 * validated that total_len isn't garbage bytes from a recycled WAL page.
682 */
683 decoded = XLogReadRecordAlloc(state,
684 total_len,
685 false /* allow_oversized */ );
686 if (decoded == NULL && nonblocking)
687 {
688 /*
689 * There is no space in the circular decode buffer, and the caller is
690 * only reading ahead. The caller should consume existing records to
691 * make space.
692 */
693 return XLREAD_WOULDBLOCK;
694 }
695
696 len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ;
697 if (total_len > len)
698 {
699 /* Need to reassemble record */
700 char *contdata;
701 XLogPageHeader pageHeader;
702 char *buffer;
703 uint32 gotlen;
704
705 assembled = true;
706
707 /*
708 * We always have space for a couple of pages, enough to validate a
709 * boundary-spanning record header.
710 */
711 Assert(state->readRecordBufSize >= XLOG_BLCKSZ * 2);
712 Assert(state->readRecordBufSize >= len);
713
714 /* Copy the first fragment of the record from the first page. */
715 memcpy(state->readRecordBuf,
716 state->readBuf + RecPtr % XLOG_BLCKSZ, len);
717 buffer = state->readRecordBuf + len;
718 gotlen = len;
719
720 do
721 {
722 /* Calculate pointer to beginning of next page */
723 targetPagePtr += XLOG_BLCKSZ;
724
725 /* Wait for the next page to become available */
726 readOff = ReadPageInternal(state, targetPagePtr,
727 Min(total_len - gotlen + SizeOfXLogShortPHD,
728 XLOG_BLCKSZ));
729
731 return XLREAD_WOULDBLOCK;
732 else if (readOff < 0)
733 goto err;
734
736
737 pageHeader = (XLogPageHeader) state->readBuf;
738
739 /*
740 * If we were expecting a continuation record and got an
741 * "overwrite contrecord" flag, that means the continuation record
742 * was overwritten with a different record. Restart the read by
743 * assuming the address to read is the location where we found
744 * this flag; but keep track of the LSN of the record we were
745 * reading, for later verification.
746 */
748 {
749 state->overwrittenRecPtr = RecPtr;
750 RecPtr = targetPagePtr;
751 goto restart;
752 }
753
754 /* Check that the continuation on next page looks valid */
755 if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
756 {
758 "there is no contrecord flag at %X/%X",
759 LSN_FORMAT_ARGS(RecPtr));
760 goto err;
761 }
762
763 /*
764 * Cross-check that xlp_rem_len agrees with how much of the record
765 * we expect there to be left.
766 */
767 if (pageHeader->xlp_rem_len == 0 ||
768 total_len != (pageHeader->xlp_rem_len + gotlen))
769 {
771 "invalid contrecord length %u (expected %lld) at %X/%X",
772 pageHeader->xlp_rem_len,
773 ((long long) total_len) - gotlen,
774 LSN_FORMAT_ARGS(RecPtr));
775 goto err;
776 }
777
778 /* Append the continuation from this page to the buffer */
779 pageHeaderSize = XLogPageHeaderSize(pageHeader);
780
781 if (readOff < pageHeaderSize)
782 readOff = ReadPageInternal(state, targetPagePtr,
783 pageHeaderSize);
784
785 Assert(pageHeaderSize <= readOff);
786
787 contdata = (char *) state->readBuf + pageHeaderSize;
788 len = XLOG_BLCKSZ - pageHeaderSize;
789 if (pageHeader->xlp_rem_len < len)
790 len = pageHeader->xlp_rem_len;
791
792 if (readOff < pageHeaderSize + len)
793 readOff = ReadPageInternal(state, targetPagePtr,
794 pageHeaderSize + len);
795
796 memcpy(buffer, (char *) contdata, len);
797 buffer += len;
798 gotlen += len;
799
800 /* If we just reassembled the record header, validate it. */
801 if (!gotheader)
802 {
803 record = (XLogRecord *) state->readRecordBuf;
804 if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr,
805 record, randAccess))
806 goto err;
807 gotheader = true;
808 }
809
810 /*
811 * We might need a bigger buffer. We have validated the record
812 * header, in the case that it split over a page boundary. We've
813 * also cross-checked total_len against xlp_rem_len on the second
814 * page, and verified xlp_pageaddr on both.
815 */
816 if (total_len > state->readRecordBufSize)
817 {
818 char save_copy[XLOG_BLCKSZ * 2];
819
820 /*
821 * Save and restore the data we already had. It can't be more
822 * than two pages.
823 */
824 Assert(gotlen <= lengthof(save_copy));
825 Assert(gotlen <= state->readRecordBufSize);
826 memcpy(save_copy, state->readRecordBuf, gotlen);
827 allocate_recordbuf(state, total_len);
828 memcpy(state->readRecordBuf, save_copy, gotlen);
829 buffer = state->readRecordBuf + gotlen;
830 }
831 } while (gotlen < total_len);
832 Assert(gotheader);
833
834 record = (XLogRecord *) state->readRecordBuf;
835 if (!ValidXLogRecord(state, record, RecPtr))
836 goto err;
837
838 pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
839 state->DecodeRecPtr = RecPtr;
840 state->NextRecPtr = targetPagePtr + pageHeaderSize
841 + MAXALIGN(pageHeader->xlp_rem_len);
842 }
843 else
844 {
845 /* Wait for the record data to become available */
846 readOff = ReadPageInternal(state, targetPagePtr,
847 Min(targetRecOff + total_len, XLOG_BLCKSZ));
849 return XLREAD_WOULDBLOCK;
850 else if (readOff < 0)
851 goto err;
852
853 /* Record does not cross a page boundary */
854 if (!ValidXLogRecord(state, record, RecPtr))
855 goto err;
856
857 state->NextRecPtr = RecPtr + MAXALIGN(total_len);
858
859 state->DecodeRecPtr = RecPtr;
860 }
861
862 /*
863 * Special processing if it's an XLOG SWITCH record
864 */
865 if (record->xl_rmid == RM_XLOG_ID &&
866 (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
867 {
868 /* Pretend it extends to end of segment */
869 state->NextRecPtr += state->segcxt.ws_segsize - 1;
870 state->NextRecPtr -= XLogSegmentOffset(state->NextRecPtr, state->segcxt.ws_segsize);
871 }
872
873 /*
874 * If we got here without a DecodedXLogRecord, it means we needed to
875 * validate total_len before trusting it, but by now we've done that.
876 */
877 if (decoded == NULL)
878 {
879 Assert(!nonblocking);
880 decoded = XLogReadRecordAlloc(state,
881 total_len,
882 true /* allow_oversized */ );
883 /* allocation should always happen under allow_oversized */
884 Assert(decoded != NULL);
885 }
886
887 if (DecodeXLogRecord(state, decoded, record, RecPtr, &errormsg))
888 {
889 /* Record the location of the next record. */
890 decoded->next_lsn = state->NextRecPtr;
891
892 /*
893 * If it's in the decode buffer, mark the decode buffer space as
894 * occupied.
895 */
896 if (!decoded->oversized)
897 {
898 /* The new decode buffer head must be MAXALIGNed. */
899 Assert(decoded->size == MAXALIGN(decoded->size));
900 if ((char *) decoded == state->decode_buffer)
901 state->decode_buffer_tail = state->decode_buffer + decoded->size;
902 else
903 state->decode_buffer_tail += decoded->size;
904 }
905
906 /* Insert it into the queue of decoded records. */
907 Assert(state->decode_queue_tail != decoded);
908 if (state->decode_queue_tail)
909 state->decode_queue_tail->next = decoded;
910 state->decode_queue_tail = decoded;
911 if (!state->decode_queue_head)
912 state->decode_queue_head = decoded;
913 return XLREAD_SUCCESS;
914 }
915
916err:
917 if (assembled)
918 {
919 /*
920 * We get here when a record that spans multiple pages needs to be
921 * assembled, but something went wrong -- perhaps a contrecord piece
922 * was lost. If caller is WAL replay, it will know where the aborted
923 * record was and where to direct followup WAL to be written, marking
924 * the next piece with XLP_FIRST_IS_OVERWRITE_CONTRECORD, which will
925 * in turn signal downstream WAL consumers that the broken WAL record
926 * is to be ignored.
927 */
928 state->abortedRecPtr = RecPtr;
929 state->missingContrecPtr = targetPagePtr;
930
931 /*
932 * If we got here without reporting an error, make sure an error is
933 * queued so that XLogPrefetcherReadRecord() doesn't bring us back a
934 * second time and clobber the above state.
935 */
936 state->errormsg_deferred = true;
937 }
938
939 if (decoded && decoded->oversized)
940 pfree(decoded);
941
942 /*
943 * Invalidate the read state. We might read from a different source after
944 * failure.
945 */
947
948 /*
949 * If an error was written to errormsg_buf, it'll be returned to the
950 * caller of XLogReadRecord() after all successfully decoded records from
951 * the read queue.
952 */
953
954 return XLREAD_FAIL;
955}
956
957/*
958 * Try to decode the next available record, and return it. The record will
959 * also be returned to XLogNextRecord(), which must be called to 'consume'
960 * each record.
961 *
962 * If nonblocking is true, may return NULL due to lack of data or WAL decoding
963 * space.
964 */
967{
968 XLogPageReadResult result;
969
970 if (state->errormsg_deferred)
971 return NULL;
972
973 result = XLogDecodeNextRecord(state, nonblocking);
974 if (result == XLREAD_SUCCESS)
975 {
976 Assert(state->decode_queue_tail != NULL);
977 return state->decode_queue_tail;
978 }
979
980 return NULL;
981}
982
983/*
984 * Read a single xlog page including at least [pageptr, reqLen] of valid data
985 * via the page_read() callback.
986 *
987 * Returns XLREAD_FAIL if the required page cannot be read for some
988 * reason; errormsg_buf is set in that case (unless the error occurs in the
989 * page_read callback).
990 *
991 * Returns XLREAD_WOULDBLOCK if the requested data can't be read without
992 * waiting. This can be returned only if the installed page_read callback
993 * respects the state->nonblocking flag, and cannot read the requested data
994 * immediately.
995 *
996 * We fetch the page from a reader-local cache if we know we have the required
997 * data and if there hasn't been any error since caching the data.
998 */
999static int
1001{
1002 int readLen;
1003 uint32 targetPageOff;
1004 XLogSegNo targetSegNo;
1005 XLogPageHeader hdr;
1006
1007 Assert((pageptr % XLOG_BLCKSZ) == 0);
1008
1009 XLByteToSeg(pageptr, targetSegNo, state->segcxt.ws_segsize);
1010 targetPageOff = XLogSegmentOffset(pageptr, state->segcxt.ws_segsize);
1011
1012 /* check whether we have all the requested data already */
1013 if (targetSegNo == state->seg.ws_segno &&
1014 targetPageOff == state->segoff && reqLen <= state->readLen)
1015 return state->readLen;
1016
1017 /*
1018 * Invalidate contents of internal buffer before read attempt. Just set
1019 * the length to 0, rather than a full XLogReaderInvalReadState(), so we
1020 * don't forget the segment we last successfully read.
1021 */
1022 state->readLen = 0;
1023
1024 /*
1025 * Data is not in our buffer.
1026 *
1027 * Every time we actually read the segment, even if we looked at parts of
1028 * it before, we need to do verification as the page_read callback might
1029 * now be rereading data from a different source.
1030 *
1031 * Whenever switching to a new WAL segment, we read the first page of the
1032 * file and validate its header, even if that's not where the target
1033 * record is. This is so that we can check the additional identification
1034 * info that is present in the first page's "long" header.
1035 */
1036 if (targetSegNo != state->seg.ws_segno && targetPageOff != 0)
1037 {
1038 XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
1039
1040 readLen = state->routine.page_read(state, targetSegmentPtr, XLOG_BLCKSZ,
1041 state->currRecPtr,
1042 state->readBuf);
1044 return XLREAD_WOULDBLOCK;
1045 else if (readLen < 0)
1046 goto err;
1047
1048 /* we can be sure to have enough WAL available, we scrolled back */
1049 Assert(readLen == XLOG_BLCKSZ);
1050
1051 if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
1052 state->readBuf))
1053 goto err;
1054 }
1055
1056 /*
1057 * First, read the requested data length, but at least a short page header
1058 * so that we can validate it.
1059 */
1060 readLen = state->routine.page_read(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
1061 state->currRecPtr,
1062 state->readBuf);
1064 return XLREAD_WOULDBLOCK;
1065 else if (readLen < 0)
1066 goto err;
1067
1068 Assert(readLen <= XLOG_BLCKSZ);
1069
1070 /* Do we have enough data to check the header length? */
1072 goto err;
1073
1074 Assert(readLen >= reqLen);
1075
1076 hdr = (XLogPageHeader) state->readBuf;
1077
1078 /* still not enough */
1080 {
1081 readLen = state->routine.page_read(state, pageptr, XLogPageHeaderSize(hdr),
1082 state->currRecPtr,
1083 state->readBuf);
1085 return XLREAD_WOULDBLOCK;
1086 else if (readLen < 0)
1087 goto err;
1088 }
1089
1090 /*
1091 * Now that we know we have the full header, validate it.
1092 */
1093 if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
1094 goto err;
1095
1096 /* update read state information */
1097 state->seg.ws_segno = targetSegNo;
1098 state->segoff = targetPageOff;
1099 state->readLen = readLen;
1100
1101 return readLen;
1102
1103err:
1105
1106 return XLREAD_FAIL;
1107}
1108
1109/*
1110 * Invalidate the xlogreader's read state to force a re-read.
1111 */
1112static void
1114{
1115 state->seg.ws_segno = 0;
1116 state->segoff = 0;
1117 state->readLen = 0;
1118}
1119
1120/*
1121 * Validate an XLOG record header.
1122 *
1123 * This is just a convenience subroutine to avoid duplicated code in
1124 * XLogReadRecord. It's not intended for use from anywhere else.
1125 */
1126static bool
1128 XLogRecPtr PrevRecPtr, XLogRecord *record,
1129 bool randAccess)
1130{
1131 if (record->xl_tot_len < SizeOfXLogRecord)
1132 {
1134 "invalid record length at %X/%X: expected at least %u, got %u",
1135 LSN_FORMAT_ARGS(RecPtr),
1137 return false;
1138 }
1139 if (!RmgrIdIsValid(record->xl_rmid))
1140 {
1142 "invalid resource manager ID %u at %X/%X",
1143 record->xl_rmid, LSN_FORMAT_ARGS(RecPtr));
1144 return false;
1145 }
1146 if (randAccess)
1147 {
1148 /*
1149 * We can't exactly verify the prev-link, but surely it should be less
1150 * than the record's own address.
1151 */
1152 if (!(record->xl_prev < RecPtr))
1153 {
1155 "record with incorrect prev-link %X/%X at %X/%X",
1156 LSN_FORMAT_ARGS(record->xl_prev),
1157 LSN_FORMAT_ARGS(RecPtr));
1158 return false;
1159 }
1160 }
1161 else
1162 {
1163 /*
1164 * Record's prev-link should exactly match our previous location. This
1165 * check guards against torn WAL pages where a stale but valid-looking
1166 * WAL record starts on a sector boundary.
1167 */
1168 if (record->xl_prev != PrevRecPtr)
1169 {
1171 "record with incorrect prev-link %X/%X at %X/%X",
1172 LSN_FORMAT_ARGS(record->xl_prev),
1173 LSN_FORMAT_ARGS(RecPtr));
1174 return false;
1175 }
1176 }
1177
1178 return true;
1179}
1180
1181
1182/*
1183 * CRC-check an XLOG record. We do not believe the contents of an XLOG
1184 * record (other than to the minimal extent of computing the amount of
1185 * data to read in) until we've checked the CRCs.
1186 *
1187 * We assume all of the record (that is, xl_tot_len bytes) has been read
1188 * into memory at *record. Also, ValidXLogRecordHeader() has accepted the
1189 * record's header, which means in particular that xl_tot_len is at least
1190 * SizeOfXLogRecord.
1191 */
1192static bool
1194{
1195 pg_crc32c crc;
1196
1198
1199 /* Calculate the CRC */
1201 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
1202 /* include the record header last */
1203 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
1204 FIN_CRC32C(crc);
1205
1206 if (!EQ_CRC32C(record->xl_crc, crc))
1207 {
1209 "incorrect resource manager data checksum in record at %X/%X",
1210 LSN_FORMAT_ARGS(recptr));
1211 return false;
1212 }
1213
1214 return true;
1215}
1216
1217/*
1218 * Validate a page header.
1219 *
1220 * Check if 'phdr' is valid as the header of the XLog page at position
1221 * 'recptr'.
1222 */
1223bool
1225 char *phdr)
1226{
1227 XLogSegNo segno;
1228 int32 offset;
1229 XLogPageHeader hdr = (XLogPageHeader) phdr;
1230
1231 Assert((recptr % XLOG_BLCKSZ) == 0);
1232
1233 XLByteToSeg(recptr, segno, state->segcxt.ws_segsize);
1234 offset = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
1235
1236 if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
1237 {
1238 char fname[MAXFNAMELEN];
1239
1240 XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1241
1243 "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
1244 hdr->xlp_magic,
1245 fname,
1246 LSN_FORMAT_ARGS(recptr),
1247 offset);
1248 return false;
1249 }
1250
1251 if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
1252 {
1253 char fname[MAXFNAMELEN];
1254
1255 XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1256
1258 "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
1259 hdr->xlp_info,
1260 fname,
1261 LSN_FORMAT_ARGS(recptr),
1262 offset);
1263 return false;
1264 }
1265
1266 if (hdr->xlp_info & XLP_LONG_HEADER)
1267 {
1268 XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
1269
1270 if (state->system_identifier &&
1271 longhdr->xlp_sysid != state->system_identifier)
1272 {
1274 "WAL file is from different database system: WAL file database system identifier is %llu, pg_control database system identifier is %llu",
1275 (unsigned long long) longhdr->xlp_sysid,
1276 (unsigned long long) state->system_identifier);
1277 return false;
1278 }
1279 else if (longhdr->xlp_seg_size != state->segcxt.ws_segsize)
1280 {
1282 "WAL file is from different database system: incorrect segment size in page header");
1283 return false;
1284 }
1285 else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ)
1286 {
1288 "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header");
1289 return false;
1290 }
1291 }
1292 else if (offset == 0)
1293 {
1294 char fname[MAXFNAMELEN];
1295
1296 XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1297
1298 /* hmm, first page of file doesn't have a long header? */
1300 "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
1301 hdr->xlp_info,
1302 fname,
1303 LSN_FORMAT_ARGS(recptr),
1304 offset);
1305 return false;
1306 }
1307
1308 /*
1309 * Check that the address on the page agrees with what we expected. This
1310 * check typically fails when an old WAL segment is recycled, and hasn't
1311 * yet been overwritten with new data yet.
1312 */
1313 if (hdr->xlp_pageaddr != recptr)
1314 {
1315 char fname[MAXFNAMELEN];
1316
1317 XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1318
1320 "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
1322 fname,
1323 LSN_FORMAT_ARGS(recptr),
1324 offset);
1325 return false;
1326 }
1327
1328 /*
1329 * Since child timelines are always assigned a TLI greater than their
1330 * immediate parent's TLI, we should never see TLI go backwards across
1331 * successive pages of a consistent WAL sequence.
1332 *
1333 * Sometimes we re-read a segment that's already been (partially) read. So
1334 * we only verify TLIs for pages that are later than the last remembered
1335 * LSN.
1336 */
1337 if (recptr > state->latestPagePtr)
1338 {
1339 if (hdr->xlp_tli < state->latestPageTLI)
1340 {
1341 char fname[MAXFNAMELEN];
1342
1343 XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1344
1346 "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
1347 hdr->xlp_tli,
1348 state->latestPageTLI,
1349 fname,
1350 LSN_FORMAT_ARGS(recptr),
1351 offset);
1352 return false;
1353 }
1354 }
1355 state->latestPagePtr = recptr;
1356 state->latestPageTLI = hdr->xlp_tli;
1357
1358 return true;
1359}
1360
1361/*
1362 * Forget about an error produced by XLogReaderValidatePageHeader().
1363 */
1364void
1366{
1367 state->errormsg_buf[0] = '\0';
1368 state->errormsg_deferred = false;
1369}
1370
1371/*
1372 * Find the first record with an lsn >= RecPtr.
1373 *
1374 * This is different from XLogBeginRead() in that RecPtr doesn't need to point
1375 * to a valid record boundary. Useful for checking whether RecPtr is a valid
1376 * xlog address for reading, and to find the first valid address after some
1377 * address when dumping records for debugging purposes.
1378 *
1379 * This positions the reader, like XLogBeginRead(), so that the next call to
1380 * XLogReadRecord() will read the next valid record.
1381 */
1384{
1385 XLogRecPtr tmpRecPtr;
1387 XLogPageHeader header;
1388 char *errormsg;
1389
1390 Assert(!XLogRecPtrIsInvalid(RecPtr));
1391
1392 /* Make sure ReadPageInternal() can't return XLREAD_WOULDBLOCK. */
1393 state->nonblocking = false;
1394
1395 /*
1396 * skip over potential continuation data, keeping in mind that it may span
1397 * multiple pages
1398 */
1399 tmpRecPtr = RecPtr;
1400 while (true)
1401 {
1402 XLogRecPtr targetPagePtr;
1403 int targetRecOff;
1404 uint32 pageHeaderSize;
1405 int readLen;
1406
1407 /*
1408 * Compute targetRecOff. It should typically be equal or greater than
1409 * short page-header since a valid record can't start anywhere before
1410 * that, except when caller has explicitly specified the offset that
1411 * falls somewhere there or when we are skipping multi-page
1412 * continuation record. It doesn't matter though because
1413 * ReadPageInternal() is prepared to handle that and will read at
1414 * least short page-header worth of data
1415 */
1416 targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
1417
1418 /* scroll back to page boundary */
1419 targetPagePtr = tmpRecPtr - targetRecOff;
1420
1421 /* Read the page containing the record */
1422 readLen = ReadPageInternal(state, targetPagePtr, targetRecOff);
1423 if (readLen < 0)
1424 goto err;
1425
1426 header = (XLogPageHeader) state->readBuf;
1427
1428 pageHeaderSize = XLogPageHeaderSize(header);
1429
1430 /* make sure we have enough data for the page header */
1431 readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize);
1432 if (readLen < 0)
1433 goto err;
1434
1435 /* skip over potential continuation data */
1436 if (header->xlp_info & XLP_FIRST_IS_CONTRECORD)
1437 {
1438 /*
1439 * If the length of the remaining continuation data is more than
1440 * what can fit in this page, the continuation record crosses over
1441 * this page. Read the next page and try again. xlp_rem_len in the
1442 * next page header will contain the remaining length of the
1443 * continuation data
1444 *
1445 * Note that record headers are MAXALIGN'ed
1446 */
1447 if (MAXALIGN(header->xlp_rem_len) >= (XLOG_BLCKSZ - pageHeaderSize))
1448 tmpRecPtr = targetPagePtr + XLOG_BLCKSZ;
1449 else
1450 {
1451 /*
1452 * The previous continuation record ends in this page. Set
1453 * tmpRecPtr to point to the first valid record
1454 */
1455 tmpRecPtr = targetPagePtr + pageHeaderSize
1456 + MAXALIGN(header->xlp_rem_len);
1457 break;
1458 }
1459 }
1460 else
1461 {
1462 tmpRecPtr = targetPagePtr + pageHeaderSize;
1463 break;
1464 }
1465 }
1466
1467 /*
1468 * we know now that tmpRecPtr is an address pointing to a valid XLogRecord
1469 * because either we're at the first record after the beginning of a page
1470 * or we just jumped over the remaining data of a continuation.
1471 */
1472 XLogBeginRead(state, tmpRecPtr);
1473 while (XLogReadRecord(state, &errormsg) != NULL)
1474 {
1475 /* past the record we've found, break out */
1476 if (RecPtr <= state->ReadRecPtr)
1477 {
1478 /* Rewind the reader to the beginning of the last record. */
1479 found = state->ReadRecPtr;
1480 XLogBeginRead(state, found);
1481 return found;
1482 }
1483 }
1484
1485err:
1487
1488 return InvalidXLogRecPtr;
1489}
1490
1491/*
1492 * Helper function to ease writing of XLogReaderRoutine->page_read callbacks.
1493 * If this function is used, caller must supply a segment_open callback in
1494 * 'state', as that is used here.
1495 *
1496 * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
1497 * fetched from timeline 'tli'.
1498 *
1499 * Returns true if succeeded, false if an error occurs, in which case
1500 * 'errinfo' receives error details.
1501 */
1502bool
1504 char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
1505 WALReadError *errinfo)
1506{
1507 char *p;
1508 XLogRecPtr recptr;
1509 Size nbytes;
1510
1511 p = buf;
1512 recptr = startptr;
1513 nbytes = count;
1514
1515 while (nbytes > 0)
1516 {
1517 uint32 startoff;
1518 int segbytes;
1519 int readbytes;
1520
1521 startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
1522
1523 /*
1524 * If the data we want is not in a segment we have open, close what we
1525 * have (if anything) and open the next one, using the caller's
1526 * provided segment_open callback.
1527 */
1528 if (state->seg.ws_file < 0 ||
1529 !XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
1530 tli != state->seg.ws_tli)
1531 {
1532 XLogSegNo nextSegNo;
1533
1534 if (state->seg.ws_file >= 0)
1535 state->routine.segment_close(state);
1536
1537 XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
1538 state->routine.segment_open(state, nextSegNo, &tli);
1539
1540 /* This shouldn't happen -- indicates a bug in segment_open */
1541 Assert(state->seg.ws_file >= 0);
1542
1543 /* Update the current segment info. */
1544 state->seg.ws_tli = tli;
1545 state->seg.ws_segno = nextSegNo;
1546 }
1547
1548 /* How many bytes are within this segment? */
1549 if (nbytes > (state->segcxt.ws_segsize - startoff))
1550 segbytes = state->segcxt.ws_segsize - startoff;
1551 else
1552 segbytes = nbytes;
1553
1554#ifndef FRONTEND
1555 pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
1556#endif
1557
1558 /* Reset errno first; eases reporting non-errno-affecting errors */
1559 errno = 0;
1560 readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
1561
1562#ifndef FRONTEND
1564#endif
1565
1566 if (readbytes <= 0)
1567 {
1568 errinfo->wre_errno = errno;
1569 errinfo->wre_req = segbytes;
1570 errinfo->wre_read = readbytes;
1571 errinfo->wre_off = startoff;
1572 errinfo->wre_seg = state->seg;
1573 return false;
1574 }
1575
1576 /* Update state for read */
1577 recptr += readbytes;
1578 nbytes -= readbytes;
1579 p += readbytes;
1580 }
1581
1582 return true;
1583}
1584
1585/* ----------------------------------------
1586 * Functions for decoding the data and block references in a record.
1587 * ----------------------------------------
1588 */
1589
1590/*
1591 * Private function to reset the state, forgetting all decoded records, if we
1592 * are asked to move to a new read position.
1593 */
1594static void
1596{
1598
1599 /* Reset the decoded record queue, freeing any oversized records. */
1600 while ((r = state->decode_queue_head) != NULL)
1601 {
1602 state->decode_queue_head = r->next;
1603 if (r->oversized)
1604 pfree(r);
1605 }
1606 state->decode_queue_tail = NULL;
1607 state->decode_queue_head = NULL;
1608 state->record = NULL;
1609
1610 /* Reset the decode buffer to empty. */
1611 state->decode_buffer_tail = state->decode_buffer;
1612 state->decode_buffer_head = state->decode_buffer;
1613
1614 /* Clear error state. */
1615 state->errormsg_buf[0] = '\0';
1616 state->errormsg_deferred = false;
1617}
1618
1619/*
1620 * Compute the maximum possible amount of padding that could be required to
1621 * decode a record, given xl_tot_len from the record's header. This is the
1622 * amount of output buffer space that we need to decode a record, though we
1623 * might not finish up using it all.
1624 *
1625 * This computation is pessimistic and assumes the maximum possible number of
1626 * blocks, due to lack of better information.
1627 */
1628size_t
1630{
1631 size_t size = 0;
1632
1633 /* Account for the fixed size part of the decoded record struct. */
1634 size += offsetof(DecodedXLogRecord, blocks[0]);
1635 /* Account for the flexible blocks array of maximum possible size. */
1636 size += sizeof(DecodedBkpBlock) * (XLR_MAX_BLOCK_ID + 1);
1637 /* Account for all the raw main and block data. */
1638 size += xl_tot_len;
1639 /* We might insert padding before main_data. */
1640 size += (MAXIMUM_ALIGNOF - 1);
1641 /* We might insert padding before each block's data. */
1642 size += (MAXIMUM_ALIGNOF - 1) * (XLR_MAX_BLOCK_ID + 1);
1643 /* We might insert padding at the end. */
1644 size += (MAXIMUM_ALIGNOF - 1);
1645
1646 return size;
1647}
1648
1649/*
1650 * Decode a record. "decoded" must point to a MAXALIGNed memory area that has
1651 * space for at least DecodeXLogRecordRequiredSpace(record) bytes. On
1652 * success, decoded->size contains the actual space occupied by the decoded
1653 * record, which may turn out to be less.
1654 *
1655 * Only decoded->oversized member must be initialized already, and will not be
1656 * modified. Other members will be initialized as required.
1657 *
1658 * On error, a human-readable error message is returned in *errormsg, and
1659 * the return value is false.
1660 */
1661bool
1663 DecodedXLogRecord *decoded,
1664 XLogRecord *record,
1665 XLogRecPtr lsn,
1666 char **errormsg)
1667{
1668 /*
1669 * read next _size bytes from record buffer, but check for overrun first.
1670 */
1671#define COPY_HEADER_FIELD(_dst, _size) \
1672 do { \
1673 if (remaining < _size) \
1674 goto shortdata_err; \
1675 memcpy(_dst, ptr, _size); \
1676 ptr += _size; \
1677 remaining -= _size; \
1678 } while(0)
1679
1680 char *ptr;
1681 char *out;
1683 uint32 datatotal;
1684 RelFileLocator *rlocator = NULL;
1685 uint8 block_id;
1686
1687 decoded->header = *record;
1688 decoded->lsn = lsn;
1689 decoded->next = NULL;
1692 decoded->main_data = NULL;
1693 decoded->main_data_len = 0;
1694 decoded->max_block_id = -1;
1695 ptr = (char *) record;
1696 ptr += SizeOfXLogRecord;
1698
1699 /* Decode the headers */
1700 datatotal = 0;
1701 while (remaining > datatotal)
1702 {
1703 COPY_HEADER_FIELD(&block_id, sizeof(uint8));
1704
1705 if (block_id == XLR_BLOCK_ID_DATA_SHORT)
1706 {
1707 /* XLogRecordDataHeaderShort */
1708 uint8 main_data_len;
1709
1710 COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
1711
1712 decoded->main_data_len = main_data_len;
1713 datatotal += main_data_len;
1714 break; /* by convention, the main data fragment is
1715 * always last */
1716 }
1717 else if (block_id == XLR_BLOCK_ID_DATA_LONG)
1718 {
1719 /* XLogRecordDataHeaderLong */
1720 uint32 main_data_len;
1721
1722 COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
1723 decoded->main_data_len = main_data_len;
1724 datatotal += main_data_len;
1725 break; /* by convention, the main data fragment is
1726 * always last */
1727 }
1728 else if (block_id == XLR_BLOCK_ID_ORIGIN)
1729 {
1730 COPY_HEADER_FIELD(&decoded->record_origin, sizeof(RepOriginId));
1731 }
1732 else if (block_id == XLR_BLOCK_ID_TOPLEVEL_XID)
1733 {
1734 COPY_HEADER_FIELD(&decoded->toplevel_xid, sizeof(TransactionId));
1735 }
1736 else if (block_id <= XLR_MAX_BLOCK_ID)
1737 {
1738 /* XLogRecordBlockHeader */
1739 DecodedBkpBlock *blk;
1740 uint8 fork_flags;
1741
1742 /* mark any intervening block IDs as not in use */
1743 for (int i = decoded->max_block_id + 1; i < block_id; ++i)
1744 decoded->blocks[i].in_use = false;
1745
1746 if (block_id <= decoded->max_block_id)
1747 {
1749 "out-of-order block_id %u at %X/%X",
1750 block_id,
1751 LSN_FORMAT_ARGS(state->ReadRecPtr));
1752 goto err;
1753 }
1754 decoded->max_block_id = block_id;
1755
1756 blk = &decoded->blocks[block_id];
1757 blk->in_use = true;
1758 blk->apply_image = false;
1759
1760 COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
1761 blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
1762 blk->flags = fork_flags;
1763 blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
1764 blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
1765
1767
1768 COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
1769 /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
1770 if (blk->has_data && blk->data_len == 0)
1771 {
1773 "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
1774 LSN_FORMAT_ARGS(state->ReadRecPtr));
1775 goto err;
1776 }
1777 if (!blk->has_data && blk->data_len != 0)
1778 {
1780 "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
1781 (unsigned int) blk->data_len,
1782 LSN_FORMAT_ARGS(state->ReadRecPtr));
1783 goto err;
1784 }
1785 datatotal += blk->data_len;
1786
1787 if (blk->has_image)
1788 {
1789 COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
1790 COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
1791 COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
1792
1793 blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
1794
1796 {
1797 if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
1798 COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
1799 else
1800 blk->hole_length = 0;
1801 }
1802 else
1803 blk->hole_length = BLCKSZ - blk->bimg_len;
1804 datatotal += blk->bimg_len;
1805
1806 /*
1807 * cross-check that hole_offset > 0, hole_length > 0 and
1808 * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
1809 */
1810 if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1811 (blk->hole_offset == 0 ||
1812 blk->hole_length == 0 ||
1813 blk->bimg_len == BLCKSZ))
1814 {
1816 "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
1817 (unsigned int) blk->hole_offset,
1818 (unsigned int) blk->hole_length,
1819 (unsigned int) blk->bimg_len,
1820 LSN_FORMAT_ARGS(state->ReadRecPtr));
1821 goto err;
1822 }
1823
1824 /*
1825 * cross-check that hole_offset == 0 and hole_length == 0 if
1826 * the HAS_HOLE flag is not set.
1827 */
1828 if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1829 (blk->hole_offset != 0 || blk->hole_length != 0))
1830 {
1832 "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
1833 (unsigned int) blk->hole_offset,
1834 (unsigned int) blk->hole_length,
1835 LSN_FORMAT_ARGS(state->ReadRecPtr));
1836 goto err;
1837 }
1838
1839 /*
1840 * Cross-check that bimg_len < BLCKSZ if it is compressed.
1841 */
1842 if (BKPIMAGE_COMPRESSED(blk->bimg_info) &&
1843 blk->bimg_len == BLCKSZ)
1844 {
1846 "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
1847 (unsigned int) blk->bimg_len,
1848 LSN_FORMAT_ARGS(state->ReadRecPtr));
1849 goto err;
1850 }
1851
1852 /*
1853 * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE is
1854 * set nor COMPRESSED().
1855 */
1856 if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1858 blk->bimg_len != BLCKSZ)
1859 {
1861 "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
1862 (unsigned int) blk->data_len,
1863 LSN_FORMAT_ARGS(state->ReadRecPtr));
1864 goto err;
1865 }
1866 }
1867 if (!(fork_flags & BKPBLOCK_SAME_REL))
1868 {
1870 rlocator = &blk->rlocator;
1871 }
1872 else
1873 {
1874 if (rlocator == NULL)
1875 {
1877 "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
1878 LSN_FORMAT_ARGS(state->ReadRecPtr));
1879 goto err;
1880 }
1881
1882 blk->rlocator = *rlocator;
1883 }
1884 COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
1885 }
1886 else
1887 {
1889 "invalid block_id %u at %X/%X",
1890 block_id, LSN_FORMAT_ARGS(state->ReadRecPtr));
1891 goto err;
1892 }
1893 }
1894
1895 if (remaining != datatotal)
1896 goto shortdata_err;
1897
1898 /*
1899 * Ok, we've parsed the fragment headers, and verified that the total
1900 * length of the payload in the fragments is equal to the amount of data
1901 * left. Copy the data of each fragment to contiguous space after the
1902 * blocks array, inserting alignment padding before the data fragments so
1903 * they can be cast to struct pointers by REDO routines.
1904 */
1905 out = ((char *) decoded) +
1906 offsetof(DecodedXLogRecord, blocks) +
1907 sizeof(decoded->blocks[0]) * (decoded->max_block_id + 1);
1908
1909 /* block data first */
1910 for (block_id = 0; block_id <= decoded->max_block_id; block_id++)
1911 {
1912 DecodedBkpBlock *blk = &decoded->blocks[block_id];
1913
1914 if (!blk->in_use)
1915 continue;
1916
1917 Assert(blk->has_image || !blk->apply_image);
1918
1919 if (blk->has_image)
1920 {
1921 /* no need to align image */
1922 blk->bkp_image = out;
1923 memcpy(out, ptr, blk->bimg_len);
1924 ptr += blk->bimg_len;
1925 out += blk->bimg_len;
1926 }
1927 if (blk->has_data)
1928 {
1929 out = (char *) MAXALIGN(out);
1930 blk->data = out;
1931 memcpy(blk->data, ptr, blk->data_len);
1932 ptr += blk->data_len;
1933 out += blk->data_len;
1934 }
1935 }
1936
1937 /* and finally, the main data */
1938 if (decoded->main_data_len > 0)
1939 {
1940 out = (char *) MAXALIGN(out);
1941 decoded->main_data = out;
1942 memcpy(decoded->main_data, ptr, decoded->main_data_len);
1943 ptr += decoded->main_data_len;
1944 out += decoded->main_data_len;
1945 }
1946
1947 /* Report the actual size we used. */
1948 decoded->size = MAXALIGN(out - (char *) decoded);
1950 decoded->size);
1951
1952 return true;
1953
1954shortdata_err:
1956 "record with invalid length at %X/%X",
1957 LSN_FORMAT_ARGS(state->ReadRecPtr));
1958err:
1959 *errormsg = state->errormsg_buf;
1960
1961 return false;
1962}
1963
1964/*
1965 * Returns information about the block that a block reference refers to.
1966 *
1967 * This is like XLogRecGetBlockTagExtended, except that the block reference
1968 * must exist and there's no access to prefetch_buffer.
1969 */
1970void
1972 RelFileLocator *rlocator, ForkNumber *forknum,
1973 BlockNumber *blknum)
1974{
1975 if (!XLogRecGetBlockTagExtended(record, block_id, rlocator, forknum,
1976 blknum, NULL))
1977 {
1978#ifndef FRONTEND
1979 elog(ERROR, "could not locate backup block with ID %d in WAL record",
1980 block_id);
1981#else
1982 pg_fatal("could not locate backup block with ID %d in WAL record",
1983 block_id);
1984#endif
1985 }
1986}
1987
1988/*
1989 * Returns information about the block that a block reference refers to,
1990 * optionally including the buffer that the block may already be in.
1991 *
1992 * If the WAL record contains a block reference with the given ID, *rlocator,
1993 * *forknum, *blknum and *prefetch_buffer are filled in (if not NULL), and
1994 * returns true. Otherwise returns false.
1995 */
1996bool
1998 RelFileLocator *rlocator, ForkNumber *forknum,
1999 BlockNumber *blknum,
2000 Buffer *prefetch_buffer)
2001{
2002 DecodedBkpBlock *bkpb;
2003
2004 if (!XLogRecHasBlockRef(record, block_id))
2005 return false;
2006
2007 bkpb = &record->record->blocks[block_id];
2008 if (rlocator)
2009 *rlocator = bkpb->rlocator;
2010 if (forknum)
2011 *forknum = bkpb->forknum;
2012 if (blknum)
2013 *blknum = bkpb->blkno;
2014 if (prefetch_buffer)
2015 *prefetch_buffer = bkpb->prefetch_buffer;
2016 return true;
2017}
2018
2019/*
2020 * Returns the data associated with a block reference, or NULL if there is
2021 * no data (e.g. because a full-page image was taken instead). The returned
2022 * pointer points to a MAXALIGNed buffer.
2023 */
2024char *
2026{
2027 DecodedBkpBlock *bkpb;
2028
2029 if (block_id > record->record->max_block_id ||
2030 !record->record->blocks[block_id].in_use)
2031 return NULL;
2032
2033 bkpb = &record->record->blocks[block_id];
2034
2035 if (!bkpb->has_data)
2036 {
2037 if (len)
2038 *len = 0;
2039 return NULL;
2040 }
2041 else
2042 {
2043 if (len)
2044 *len = bkpb->data_len;
2045 return bkpb->data;
2046 }
2047}
2048
2049/*
2050 * Restore a full-page image from a backup block attached to an XLOG record.
2051 *
2052 * Returns true if a full-page image is restored, and false on failure with
2053 * an error to be consumed by the caller.
2054 */
2055bool
2056RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
2057{
2058 DecodedBkpBlock *bkpb;
2059 char *ptr;
2060 PGAlignedBlock tmp;
2061
2062 if (block_id > record->record->max_block_id ||
2063 !record->record->blocks[block_id].in_use)
2064 {
2065 report_invalid_record(record,
2066 "could not restore image at %X/%X with invalid block %d specified",
2067 LSN_FORMAT_ARGS(record->ReadRecPtr),
2068 block_id);
2069 return false;
2070 }
2071 if (!record->record->blocks[block_id].has_image)
2072 {
2073 report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
2074 LSN_FORMAT_ARGS(record->ReadRecPtr),
2075 block_id);
2076 return false;
2077 }
2078
2079 bkpb = &record->record->blocks[block_id];
2080 ptr = bkpb->bkp_image;
2081
2082 if (BKPIMAGE_COMPRESSED(bkpb->bimg_info))
2083 {
2084 /* If a backup block image is compressed, decompress it */
2085 bool decomp_success = true;
2086
2087 if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
2088 {
2089 if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
2090 BLCKSZ - bkpb->hole_length, true) < 0)
2091 decomp_success = false;
2092 }
2093 else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
2094 {
2095#ifdef USE_LZ4
2096 if (LZ4_decompress_safe(ptr, tmp.data,
2097 bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
2098 decomp_success = false;
2099#else
2100 report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
2101 LSN_FORMAT_ARGS(record->ReadRecPtr),
2102 "LZ4",
2103 block_id);
2104 return false;
2105#endif
2106 }
2107 else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
2108 {
2109#ifdef USE_ZSTD
2110 size_t decomp_result = ZSTD_decompress(tmp.data,
2111 BLCKSZ - bkpb->hole_length,
2112 ptr, bkpb->bimg_len);
2113
2114 if (ZSTD_isError(decomp_result))
2115 decomp_success = false;
2116#else
2117 report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
2118 LSN_FORMAT_ARGS(record->ReadRecPtr),
2119 "zstd",
2120 block_id);
2121 return false;
2122#endif
2123 }
2124 else
2125 {
2126 report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
2127 LSN_FORMAT_ARGS(record->ReadRecPtr),
2128 block_id);
2129 return false;
2130 }
2131
2132 if (!decomp_success)
2133 {
2134 report_invalid_record(record, "could not decompress image at %X/%X, block %d",
2135 LSN_FORMAT_ARGS(record->ReadRecPtr),
2136 block_id);
2137 return false;
2138 }
2139
2140 ptr = tmp.data;
2141 }
2142
2143 /* generate page, taking into account hole if necessary */
2144 if (bkpb->hole_length == 0)
2145 {
2146 memcpy(page, ptr, BLCKSZ);
2147 }
2148 else
2149 {
2150 memcpy(page, ptr, bkpb->hole_offset);
2151 /* must zero-fill the hole */
2152 MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
2153 memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
2154 ptr + bkpb->hole_offset,
2155 BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
2156 }
2157
2158 return true;
2159}
2160
2161#ifndef FRONTEND
2162
2163/*
2164 * Extract the FullTransactionId from a WAL record.
2165 */
2168{
2169 TransactionId xid,
2170 next_xid;
2171 uint32 epoch;
2172
2173 /*
2174 * This function is only safe during replay, because it depends on the
2175 * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
2176 */
2178
2179 xid = XLogRecGetXid(record);
2182
2183 /*
2184 * If xid is numerically greater than next_xid, it has to be from the last
2185 * epoch.
2186 */
2187 if (unlikely(xid > next_xid))
2188 --epoch;
2189
2191}
2192
2193#endif
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
#define Min(x, y)
Definition: c.h:961
#define MAXALIGN(LEN)
Definition: c.h:768
uint8_t uint8
Definition: c.h:486
#define Max(x, y)
Definition: c.h:955
#define Assert(condition)
Definition: c.h:815
#define pg_attribute_printf(f, a)
Definition: c.h:213
int32_t int32
Definition: c.h:484
uint16_t uint16
Definition: c.h:487
#define unlikely(x)
Definition: c.h:333
uint32_t uint32
Definition: c.h:488
#define lengthof(array)
Definition: c.h:745
#define MemSet(start, val, len)
Definition: c.h:977
uint32 TransactionId
Definition: c.h:609
size_t Size
Definition: c.h:562
#define _(x)
Definition: elog.c:90
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
void err(int eval, const char *fmt,...)
Definition: err.c:43
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:30
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:29
bool IsUnderPostmaster
Definition: globals.c:119
int remaining
Definition: informix.c:692
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
static void const char * fmt
va_end(args)
va_start(args, fmt)
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
void * palloc_extended(Size size, int flags)
Definition: mcxt.c:1368
#define AmStartupProcess()
Definition: miscadmin.h:388
#define InvalidRepOriginId
Definition: origin.h:33
#define pg_fatal(...)
#define MAXPGPATH
#define XLOG_SWITCH
Definition: pg_control.h:72
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
const void size_t len
return crc
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
static char * buf
Definition: pg_test_fsync.c:72
#define vsnprintf
Definition: port.h:237
#define pg_pread
Definition: port.h:225
#define snprintf
Definition: port.h:238
ForkNumber
Definition: relpath.h:56
#define RmgrIdIsValid(rmid)
Definition: rmgr.h:53
static pg_noinline void Size size
Definition: slab.c:607
uint16 hole_length
Definition: xlogreader.h:140
char * bkp_image
Definition: xlogreader.h:138
Buffer prefetch_buffer
Definition: xlogreader.h:130
RelFileLocator rlocator
Definition: xlogreader.h:125
BlockNumber blkno
Definition: xlogreader.h:127
ForkNumber forknum
Definition: xlogreader.h:126
uint16 hole_offset
Definition: xlogreader.h:139
XLogRecord header
Definition: xlogreader.h:166
XLogRecPtr next_lsn
Definition: xlogreader.h:165
struct DecodedXLogRecord * next
Definition: xlogreader.h:161
TransactionId toplevel_xid
Definition: xlogreader.h:168
uint32 main_data_len
Definition: xlogreader.h:170
RepOriginId record_origin
Definition: xlogreader.h:167
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition: xlogreader.h:172
XLogRecPtr lsn
Definition: xlogreader.h:164
FullTransactionId nextXid
Definition: transam.h:220
XLogSegNo ws_segno
Definition: xlogreader.h:48
TimeLineID ws_tli
Definition: xlogreader.h:49
WALOpenSegment wre_seg
Definition: xlogreader.h:388
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:55
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
DecodedXLogRecord * record
Definition: xlogreader.h:236
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
pg_crc32c xl_crc
Definition: xlogrecord.h:49
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
RmgrId xl_rmid
Definition: xlogrecord.h:47
Definition: regguts.h:323
struct state * next
Definition: regguts.h:332
#define InvalidTransactionId
Definition: transam.h:31
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
#define XidFromFullTransactionId(x)
Definition: transam.h:48
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
char data[BLCKSZ]
Definition: c.h:1076
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:85
static void pgstat_report_wait_end(void)
Definition: wait_event.h:101
static const unsigned __int64 epoch
int wal_segment_size
Definition: xlog.c:143
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define MAXFNAMELEN
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_ALL_FLAGS
Definition: xlog_internal.h:82
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XRecOffIsValid(xlrp)
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
#define XLogPageHeaderSize(hdr)
Definition: xlog_internal.h:84
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:1997
static XLogPageReadResult XLogDecodeNextRecord(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:528
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:106
void XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
Definition: xlogreader.c:90
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:966
static void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, int segsize, const char *waldir)
Definition: xlogreader.c:207
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2025
static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
Definition: xlogreader.c:1000
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:325
static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2
Definition: xlogreader.c:71
static void static void allocate_recordbuf(XLogReaderState *state, uint32 reclength)
Definition: xlogreader.c:190
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1503
#define MAX_ERRORMSG_LEN
Definition: xlogreader.c:58
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:389
void XLogReaderResetError(XLogReaderState *state)
Definition: xlogreader.c:1365
static void XLogReaderInvalReadState(XLogReaderState *state)
Definition: xlogreader.c:1113
#define COPY_HEADER_FIELD(_dst, _size)
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:1224
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:2167
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:161
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1971
static void ResetDecoder(XLogReaderState *state)
Definition: xlogreader.c:1595
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1662
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
Definition: xlogreader.c:1193
#define DEFAULT_DECODE_BUFFER_SIZE
Definition: xlogreader.c:64
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1629
static DecodedXLogRecord * XLogReadRecordAlloc(XLogReaderState *state, size_t xl_tot_len, bool allow_oversized)
Definition: xlogreader.c:438
XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:1383
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:231
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:2056
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition: xlogreader.c:249
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess)
Definition: xlogreader.c:1127
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition: xlogreader.h:325
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:412
XLogPageReadResult
Definition: xlogreader.h:350
@ XLREAD_WOULDBLOCK
Definition: xlogreader.h:353
@ XLREAD_SUCCESS
Definition: xlogreader.h:351
@ XLREAD_FAIL
Definition: xlogreader.h:352
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
#define BKPIMAGE_COMPRESS_ZSTD
Definition: xlogrecord.h:162
#define BKPBLOCK_FORK_MASK
Definition: xlogrecord.h:195
#define BKPBLOCK_HAS_DATA
Definition: xlogrecord.h:198
#define BKPIMAGE_APPLY
Definition: xlogrecord.h:158
#define BKPIMAGE_HAS_HOLE
Definition: xlogrecord.h:157
#define XLR_BLOCK_ID_DATA_LONG
Definition: xlogrecord.h:242
#define BKPIMAGE_COMPRESS_LZ4
Definition: xlogrecord.h:161
#define BKPIMAGE_COMPRESSED(info)
Definition: xlogrecord.h:164
#define XLR_BLOCK_ID_TOPLEVEL_XID
Definition: xlogrecord.h:244
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define XLR_MAX_BLOCK_ID
Definition: xlogrecord.h:239
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
#define BKPBLOCK_SAME_REL
Definition: xlogrecord.h:200
#define BKPIMAGE_COMPRESS_PGLZ
Definition: xlogrecord.h:160
#define XLR_BLOCK_ID_ORIGIN
Definition: xlogrecord.h:243
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
#define BKPBLOCK_HAS_IMAGE
Definition: xlogrecord.h:197
static uint32 readOff
Definition: xlogrecovery.c:232
static uint32 readLen
Definition: xlogrecovery.c:233