PostgreSQL Source Code  git master
xlogreader.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * xlogreader.c
4  * Generic XLog reading facility
5  *
6  * Portions Copyright (c) 2013-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/access/transam/xlogreader.c
10  *
11  * NOTES
12  * See xlogreader.h for more notes on this facility.
13  *
14  * This file is compiled as both front-end and backend code, so it
15  * may not use ereport, server-defined static variables, etc.
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <unistd.h>
21 #ifdef USE_LZ4
22 #include <lz4.h>
23 #endif
24 #ifdef USE_ZSTD
25 #include <zstd.h>
26 #endif
27 
28 #include "access/transam.h"
29 #include "access/xlog_internal.h"
30 #include "access/xlogreader.h"
31 #include "access/xlogrecord.h"
32 #include "catalog/pg_control.h"
33 #include "common/pg_lzcompress.h"
34 #include "replication/origin.h"
35 
36 #ifndef FRONTEND
37 #include "miscadmin.h"
38 #include "pgstat.h"
39 #include "utils/memutils.h"
40 #else
41 #include "common/logging.h"
42 #endif
43 
44 static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
45  pg_attribute_printf(2, 3);
46 static void allocate_recordbuf(XLogReaderState *state, uint32 reclength);
48  int reqLen);
52  XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
53 static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
54  XLogRecPtr recptr);
55 static void ResetDecoder(XLogReaderState *state);
56 static void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt,
57  int segsize, const char *waldir);
58 
59 /* size of the buffer allocated for error message. */
60 #define MAX_ERRORMSG_LEN 1000
61 
62 /*
63  * Default size; large enough that typical users of XLogReader won't often need
64  * to use the 'oversized' memory allocation code path.
65  */
66 #define DEFAULT_DECODE_BUFFER_SIZE (64 * 1024)
67 
68 /*
69  * Construct a string in state->errormsg_buf explaining what's wrong with
70  * the current record being read.
71  */
72 static void
74 {
75  va_list args;
76 
77  fmt = _(fmt);
78 
79  va_start(args, fmt);
80  vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args);
81  va_end(args);
82 
83  state->errormsg_deferred = true;
84 }
85 
86 /*
87  * Set the size of the decoding buffer. A pointer to a caller supplied memory
88  * region may also be passed in, in which case non-oversized records will be
89  * decoded there.
90  */
91 void
92 XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
93 {
94  Assert(state->decode_buffer == NULL);
95 
96  state->decode_buffer = buffer;
97  state->decode_buffer_size = size;
98  state->decode_buffer_tail = buffer;
99  state->decode_buffer_head = buffer;
100 }
101 
102 /*
103  * Allocate and initialize a new XLogReader.
104  *
105  * Returns NULL if the xlogreader couldn't be allocated.
106  */
108 XLogReaderAllocate(int wal_segment_size, const char *waldir,
109  XLogReaderRoutine *routine, void *private_data)
110 {
112 
113  state = (XLogReaderState *)
116  if (!state)
117  return NULL;
118 
119  /* initialize caller-provided support functions */
120  state->routine = *routine;
121 
122  /*
123  * Permanently allocate readBuf. We do it this way, rather than just
124  * making a static array, for two reasons: (1) no need to waste the
125  * storage in most instantiations of the backend; (2) a static char array
126  * isn't guaranteed to have any particular alignment, whereas
127  * palloc_extended() will provide MAXALIGN'd storage.
128  */
129  state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ,
131  if (!state->readBuf)
132  {
133  pfree(state);
134  return NULL;
135  }
136 
137  /* Initialize segment info. */
139  waldir);
140 
141  /* system_identifier initialized to zeroes above */
142  state->private_data = private_data;
143  /* ReadRecPtr, EndRecPtr and readLen initialized to zeroes above */
144  state->errormsg_buf = palloc_extended(MAX_ERRORMSG_LEN + 1,
146  if (!state->errormsg_buf)
147  {
148  pfree(state->readBuf);
149  pfree(state);
150  return NULL;
151  }
152  state->errormsg_buf[0] = '\0';
153 
154  /*
155  * Allocate an initial readRecordBuf of minimal size, which can later be
156  * enlarged if necessary.
157  */
159  return state;
160 }
161 
162 void
164 {
165  if (state->seg.ws_file != -1)
166  state->routine.segment_close(state);
167 
168  if (state->decode_buffer && state->free_decode_buffer)
169  pfree(state->decode_buffer);
170 
171  pfree(state->errormsg_buf);
172  if (state->readRecordBuf)
173  pfree(state->readRecordBuf);
174  pfree(state->readBuf);
175  pfree(state);
176 }
177 
178 /*
179  * Allocate readRecordBuf to fit a record of at least the given length.
180  *
181  * readRecordBufSize is set to the new buffer size.
182  *
183  * To avoid useless small increases, round its size to a multiple of
184  * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start
185  * with. (That is enough for all "normal" records, but very large commit or
186  * abort records might need more space.)
187  *
188  * Note: This routine should *never* be called for xl_tot_len until the header
189  * of the record has been fully validated.
190  */
191 static void
193 {
194  uint32 newSize = reclength;
195 
196  newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
197  newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
198 
199  if (state->readRecordBuf)
200  pfree(state->readRecordBuf);
201  state->readRecordBuf = (char *) palloc(newSize);
202  state->readRecordBufSize = newSize;
203 }
204 
205 /*
206  * Initialize the passed segment structs.
207  */
208 static void
210  int segsize, const char *waldir)
211 {
212  seg->ws_file = -1;
213  seg->ws_segno = 0;
214  seg->ws_tli = 0;
215 
216  segcxt->ws_segsize = segsize;
217  if (waldir)
218  snprintf(segcxt->ws_dir, MAXPGPATH, "%s", waldir);
219 }
220 
221 /*
222  * Begin reading WAL at 'RecPtr'.
223  *
224  * 'RecPtr' should point to the beginning of a valid WAL record. Pointing at
225  * the beginning of a page is also OK, if there is a new record right after
226  * the page header, i.e. not a continuation.
227  *
228  * This does not make any attempt to read the WAL yet, and hence cannot fail.
229  * If the starting address is not correct, the first call to XLogReadRecord()
230  * will error out.
231  */
232 void
234 {
235  Assert(!XLogRecPtrIsInvalid(RecPtr));
236 
238 
239  /* Begin at the passed-in record pointer. */
240  state->EndRecPtr = RecPtr;
241  state->NextRecPtr = RecPtr;
242  state->ReadRecPtr = InvalidXLogRecPtr;
243  state->DecodeRecPtr = InvalidXLogRecPtr;
244 }
245 
246 /*
247  * Release the last record that was returned by XLogNextRecord(), if any, to
248  * free up space. Returns the LSN past the end of the record.
249  */
252 {
253  DecodedXLogRecord *record;
254  XLogRecPtr next_lsn;
255 
256  if (!state->record)
257  return InvalidXLogRecPtr;
258 
259  /*
260  * Remove it from the decoded record queue. It must be the oldest item
261  * decoded, decode_queue_head.
262  */
263  record = state->record;
264  next_lsn = record->next_lsn;
265  Assert(record == state->decode_queue_head);
266  state->record = NULL;
267  state->decode_queue_head = record->next;
268 
269  /* It might also be the newest item decoded, decode_queue_tail. */
270  if (state->decode_queue_tail == record)
271  state->decode_queue_tail = NULL;
272 
273  /* Release the space. */
274  if (unlikely(record->oversized))
275  {
276  /* It's not in the decode buffer, so free it to release space. */
277  pfree(record);
278  }
279  else
280  {
281  /* It must be the head (oldest) record in the decode buffer. */
282  Assert(state->decode_buffer_head == (char *) record);
283 
284  /*
285  * We need to update head to point to the next record that is in the
286  * decode buffer, if any, being careful to skip oversized ones
287  * (they're not in the decode buffer).
288  */
289  record = record->next;
290  while (unlikely(record && record->oversized))
291  record = record->next;
292 
293  if (record)
294  {
295  /* Adjust head to release space up to the next record. */
296  state->decode_buffer_head = (char *) record;
297  }
298  else
299  {
300  /*
301  * Otherwise we might as well just reset head and tail to the
302  * start of the buffer space, because we're empty. This means
303  * we'll keep overwriting the same piece of memory if we're not
304  * doing any prefetching.
305  */
306  state->decode_buffer_head = state->decode_buffer;
307  state->decode_buffer_tail = state->decode_buffer;
308  }
309  }
310 
311  return next_lsn;
312 }
313 
314 /*
315  * Attempt to read an XLOG record.
316  *
317  * XLogBeginRead() or XLogFindNextRecord() and then XLogReadAhead() must be
318  * called before the first call to XLogNextRecord(). This functions returns
319  * records and errors that were put into an internal queue by XLogReadAhead().
320  *
321  * On success, a record is returned.
322  *
323  * The returned record (or *errormsg) points to an internal buffer that's
324  * valid until the next call to XLogNextRecord.
325  */
328 {
329  /* Release the last record returned by XLogNextRecord(). */
331 
332  if (state->decode_queue_head == NULL)
333  {
334  *errormsg = NULL;
335  if (state->errormsg_deferred)
336  {
337  if (state->errormsg_buf[0] != '\0')
338  *errormsg = state->errormsg_buf;
339  state->errormsg_deferred = false;
340  }
341 
342  /*
343  * state->EndRecPtr is expected to have been set by the last call to
344  * XLogBeginRead() or XLogNextRecord(), and is the location of the
345  * error.
346  */
347  Assert(!XLogRecPtrIsInvalid(state->EndRecPtr));
348 
349  return NULL;
350  }
351 
352  /*
353  * Record this as the most recent record returned, so that we'll release
354  * it next time. This also exposes it to the traditional
355  * XLogRecXXX(xlogreader) macros, which work with the decoder rather than
356  * the record for historical reasons.
357  */
358  state->record = state->decode_queue_head;
359 
360  /*
361  * Update the pointers to the beginning and one-past-the-end of this
362  * record, again for the benefit of historical code that expected the
363  * decoder to track this rather than accessing these fields of the record
364  * itself.
365  */
366  state->ReadRecPtr = state->record->lsn;
367  state->EndRecPtr = state->record->next_lsn;
368 
369  *errormsg = NULL;
370 
371  return state->record;
372 }
373 
374 /*
375  * Attempt to read an XLOG record.
376  *
377  * XLogBeginRead() or XLogFindNextRecord() must be called before the first call
378  * to XLogReadRecord().
379  *
380  * If the page_read callback fails to read the requested data, NULL is
381  * returned. The callback is expected to have reported the error; errormsg
382  * is set to NULL.
383  *
384  * If the reading fails for some other reason, NULL is also returned, and
385  * *errormsg is set to a string with details of the failure.
386  *
387  * The returned pointer (or *errormsg) points to an internal buffer that's
388  * valid until the next call to XLogReadRecord.
389  */
390 XLogRecord *
392 {
393  DecodedXLogRecord *decoded;
394 
395  /*
396  * Release last returned record, if there is one. We need to do this so
397  * that we can check for empty decode queue accurately.
398  */
400 
401  /*
402  * Call XLogReadAhead() in blocking mode to make sure there is something
403  * in the queue, though we don't use the result.
404  */
406  XLogReadAhead(state, false /* nonblocking */ );
407 
408  /* Consume the head record or error. */
409  decoded = XLogNextRecord(state, errormsg);
410  if (decoded)
411  {
412  /*
413  * This function returns a pointer to the record's header, not the
414  * actual decoded record. The caller will access the decoded record
415  * through the XLogRecGetXXX() macros, which reach the decoded
416  * recorded as xlogreader->record.
417  */
418  Assert(state->record == decoded);
419  return &decoded->header;
420  }
421 
422  return NULL;
423 }
424 
425 /*
426  * Allocate space for a decoded record. The only member of the returned
427  * object that is initialized is the 'oversized' flag, indicating that the
428  * decoded record wouldn't fit in the decode buffer and must eventually be
429  * freed explicitly.
430  *
431  * The caller is responsible for adjusting decode_buffer_tail with the real
432  * size after successfully decoding a record into this space. This way, if
433  * decoding fails, then there is nothing to undo unless the 'oversized' flag
434  * was set and pfree() must be called.
435  *
436  * Return NULL if there is no space in the decode buffer and allow_oversized
437  * is false, or if memory allocation fails for an oversized buffer.
438  */
439 static DecodedXLogRecord *
440 XLogReadRecordAlloc(XLogReaderState *state, size_t xl_tot_len, bool allow_oversized)
441 {
442  size_t required_space = DecodeXLogRecordRequiredSpace(xl_tot_len);
443  DecodedXLogRecord *decoded = NULL;
444 
445  /* Allocate a circular decode buffer if we don't have one already. */
446  if (unlikely(state->decode_buffer == NULL))
447  {
448  if (state->decode_buffer_size == 0)
449  state->decode_buffer_size = DEFAULT_DECODE_BUFFER_SIZE;
450  state->decode_buffer = palloc(state->decode_buffer_size);
451  state->decode_buffer_head = state->decode_buffer;
452  state->decode_buffer_tail = state->decode_buffer;
453  state->free_decode_buffer = true;
454  }
455 
456  /* Try to allocate space in the circular decode buffer. */
457  if (state->decode_buffer_tail >= state->decode_buffer_head)
458  {
459  /* Empty, or tail is to the right of head. */
460  if (required_space <=
461  state->decode_buffer_size -
462  (state->decode_buffer_tail - state->decode_buffer))
463  {
464  /*-
465  * There is space between tail and end.
466  *
467  * +-----+--------------------+-----+
468  * | |////////////////////|here!|
469  * +-----+--------------------+-----+
470  * ^ ^
471  * | |
472  * h t
473  */
474  decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
475  decoded->oversized = false;
476  return decoded;
477  }
478  else if (required_space <
479  state->decode_buffer_head - state->decode_buffer)
480  {
481  /*-
482  * There is space between start and head.
483  *
484  * +-----+--------------------+-----+
485  * |here!|////////////////////| |
486  * +-----+--------------------+-----+
487  * ^ ^
488  * | |
489  * h t
490  */
491  decoded = (DecodedXLogRecord *) state->decode_buffer;
492  decoded->oversized = false;
493  return decoded;
494  }
495  }
496  else
497  {
498  /* Tail is to the left of head. */
499  if (required_space <
500  state->decode_buffer_head - state->decode_buffer_tail)
501  {
502  /*-
503  * There is space between tail and head.
504  *
505  * +-----+--------------------+-----+
506  * |/////|here! |/////|
507  * +-----+--------------------+-----+
508  * ^ ^
509  * | |
510  * t h
511  */
512  decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
513  decoded->oversized = false;
514  return decoded;
515  }
516  }
517 
518  /* Not enough space in the decode buffer. Are we allowed to allocate? */
519  if (allow_oversized)
520  {
521  decoded = palloc(required_space);
522  decoded->oversized = true;
523  return decoded;
524  }
525 
526  return NULL;
527 }
528 
529 static XLogPageReadResult
531 {
532  XLogRecPtr RecPtr;
533  XLogRecord *record;
534  XLogRecPtr targetPagePtr;
535  bool randAccess;
536  uint32 len,
537  total_len;
538  uint32 targetRecOff;
539  uint32 pageHeaderSize;
540  bool assembled;
541  bool gotheader;
542  int readOff;
543  DecodedXLogRecord *decoded;
544  char *errormsg; /* not used */
545 
546  /*
547  * randAccess indicates whether to verify the previous-record pointer of
548  * the record we're reading. We only do this if we're reading
549  * sequentially, which is what we initially assume.
550  */
551  randAccess = false;
552 
553  /* reset error state */
554  state->errormsg_buf[0] = '\0';
555  decoded = NULL;
556 
557  state->abortedRecPtr = InvalidXLogRecPtr;
558  state->missingContrecPtr = InvalidXLogRecPtr;
559 
560  RecPtr = state->NextRecPtr;
561 
562  if (state->DecodeRecPtr != InvalidXLogRecPtr)
563  {
564  /* read the record after the one we just read */
565 
566  /*
567  * NextRecPtr is pointing to end+1 of the previous WAL record. If
568  * we're at a page boundary, no more records can fit on the current
569  * page. We must skip over the page header, but we can't do that until
570  * we've read in the page, since the header size is variable.
571  */
572  }
573  else
574  {
575  /*
576  * Caller supplied a position to start at.
577  *
578  * In this case, NextRecPtr should already be pointing either to a
579  * valid record starting position or alternatively to the beginning of
580  * a page. See the header comments for XLogBeginRead.
581  */
582  Assert(RecPtr % XLOG_BLCKSZ == 0 || XRecOffIsValid(RecPtr));
583  randAccess = true;
584  }
585 
586 restart:
587  state->nonblocking = nonblocking;
588  state->currRecPtr = RecPtr;
589  assembled = false;
590 
591  targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
592  targetRecOff = RecPtr % XLOG_BLCKSZ;
593 
594  /*
595  * Read the page containing the record into state->readBuf. Request enough
596  * byte to cover the whole record header, or at least the part of it that
597  * fits on the same page.
598  */
599  readOff = ReadPageInternal(state, targetPagePtr,
600  Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
601  if (readOff == XLREAD_WOULDBLOCK)
602  return XLREAD_WOULDBLOCK;
603  else if (readOff < 0)
604  goto err;
605 
606  /*
607  * ReadPageInternal always returns at least the page header, so we can
608  * examine it now.
609  */
610  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
611  if (targetRecOff == 0)
612  {
613  /*
614  * At page start, so skip over page header.
615  */
616  RecPtr += pageHeaderSize;
617  targetRecOff = pageHeaderSize;
618  }
619  else if (targetRecOff < pageHeaderSize)
620  {
621  report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u",
622  LSN_FORMAT_ARGS(RecPtr),
623  pageHeaderSize, targetRecOff);
624  goto err;
625  }
626 
627  if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
628  targetRecOff == pageHeaderSize)
629  {
630  report_invalid_record(state, "contrecord is requested by %X/%X",
631  LSN_FORMAT_ARGS(RecPtr));
632  goto err;
633  }
634 
635  /* ReadPageInternal has verified the page header */
636  Assert(pageHeaderSize <= readOff);
637 
638  /*
639  * Read the record length.
640  *
641  * NB: Even though we use an XLogRecord pointer here, the whole record
642  * header might not fit on this page. xl_tot_len is the first field of the
643  * struct, so it must be on this page (the records are MAXALIGNed), but we
644  * cannot access any other fields until we've verified that we got the
645  * whole header.
646  */
647  record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);
648  total_len = record->xl_tot_len;
649 
650  /*
651  * If the whole record header is on this page, validate it immediately.
652  * Otherwise do just a basic sanity check on xl_tot_len, and validate the
653  * rest of the header after reading it from the next page. The xl_tot_len
654  * check is necessary here to ensure that we enter the "Need to reassemble
655  * record" code path below; otherwise we might fail to apply
656  * ValidXLogRecordHeader at all.
657  */
658  if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
659  {
660  if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr, record,
661  randAccess))
662  goto err;
663  gotheader = true;
664  }
665  else
666  {
667  /* There may be no next page if it's too small. */
668  if (total_len < SizeOfXLogRecord)
669  {
671  "invalid record length at %X/%X: expected at least %u, got %u",
672  LSN_FORMAT_ARGS(RecPtr),
673  (uint32) SizeOfXLogRecord, total_len);
674  goto err;
675  }
676  /* We'll validate the header once we have the next page. */
677  gotheader = false;
678  }
679 
680  /*
681  * Try to find space to decode this record, if we can do so without
682  * calling palloc. If we can't, we'll try again below after we've
683  * validated that total_len isn't garbage bytes from a recycled WAL page.
684  */
685  decoded = XLogReadRecordAlloc(state,
686  total_len,
687  false /* allow_oversized */ );
688  if (decoded == NULL && nonblocking)
689  {
690  /*
691  * There is no space in the circular decode buffer, and the caller is
692  * only reading ahead. The caller should consume existing records to
693  * make space.
694  */
695  return XLREAD_WOULDBLOCK;
696  }
697 
698  len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ;
699  if (total_len > len)
700  {
701  /* Need to reassemble record */
702  char *contdata;
703  XLogPageHeader pageHeader;
704  char *buffer;
705  uint32 gotlen;
706 
707  assembled = true;
708 
709  /*
710  * We always have space for a couple of pages, enough to validate a
711  * boundary-spanning record header.
712  */
713  Assert(state->readRecordBufSize >= XLOG_BLCKSZ * 2);
714  Assert(state->readRecordBufSize >= len);
715 
716  /* Copy the first fragment of the record from the first page. */
717  memcpy(state->readRecordBuf,
718  state->readBuf + RecPtr % XLOG_BLCKSZ, len);
719  buffer = state->readRecordBuf + len;
720  gotlen = len;
721 
722  do
723  {
724  /* Calculate pointer to beginning of next page */
725  targetPagePtr += XLOG_BLCKSZ;
726 
727  /* Wait for the next page to become available */
728  readOff = ReadPageInternal(state, targetPagePtr,
729  Min(total_len - gotlen + SizeOfXLogShortPHD,
730  XLOG_BLCKSZ));
731 
732  if (readOff == XLREAD_WOULDBLOCK)
733  return XLREAD_WOULDBLOCK;
734  else if (readOff < 0)
735  goto err;
736 
738 
739  pageHeader = (XLogPageHeader) state->readBuf;
740 
741  /*
742  * If we were expecting a continuation record and got an
743  * "overwrite contrecord" flag, that means the continuation record
744  * was overwritten with a different record. Restart the read by
745  * assuming the address to read is the location where we found
746  * this flag; but keep track of the LSN of the record we were
747  * reading, for later verification.
748  */
750  {
751  state->overwrittenRecPtr = RecPtr;
752  RecPtr = targetPagePtr;
753  goto restart;
754  }
755 
756  /* Check that the continuation on next page looks valid */
757  if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
758  {
760  "there is no contrecord flag at %X/%X",
761  LSN_FORMAT_ARGS(RecPtr));
762  goto err;
763  }
764 
765  /*
766  * Cross-check that xlp_rem_len agrees with how much of the record
767  * we expect there to be left.
768  */
769  if (pageHeader->xlp_rem_len == 0 ||
770  total_len != (pageHeader->xlp_rem_len + gotlen))
771  {
773  "invalid contrecord length %u (expected %lld) at %X/%X",
774  pageHeader->xlp_rem_len,
775  ((long long) total_len) - gotlen,
776  LSN_FORMAT_ARGS(RecPtr));
777  goto err;
778  }
779 
780  /* Append the continuation from this page to the buffer */
781  pageHeaderSize = XLogPageHeaderSize(pageHeader);
782 
783  if (readOff < pageHeaderSize)
784  readOff = ReadPageInternal(state, targetPagePtr,
785  pageHeaderSize);
786 
787  Assert(pageHeaderSize <= readOff);
788 
789  contdata = (char *) state->readBuf + pageHeaderSize;
790  len = XLOG_BLCKSZ - pageHeaderSize;
791  if (pageHeader->xlp_rem_len < len)
792  len = pageHeader->xlp_rem_len;
793 
794  if (readOff < pageHeaderSize + len)
795  readOff = ReadPageInternal(state, targetPagePtr,
796  pageHeaderSize + len);
797 
798  memcpy(buffer, (char *) contdata, len);
799  buffer += len;
800  gotlen += len;
801 
802  /* If we just reassembled the record header, validate it. */
803  if (!gotheader)
804  {
805  record = (XLogRecord *) state->readRecordBuf;
806  if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr,
807  record, randAccess))
808  goto err;
809  gotheader = true;
810  }
811 
812  /*
813  * We might need a bigger buffer. We have validated the record
814  * header, in the case that it split over a page boundary. We've
815  * also cross-checked total_len against xlp_rem_len on the second
816  * page, and verified xlp_pageaddr on both.
817  */
818  if (total_len > state->readRecordBufSize)
819  {
820  char save_copy[XLOG_BLCKSZ * 2];
821 
822  /*
823  * Save and restore the data we already had. It can't be more
824  * than two pages.
825  */
826  Assert(gotlen <= lengthof(save_copy));
827  Assert(gotlen <= state->readRecordBufSize);
828  memcpy(save_copy, state->readRecordBuf, gotlen);
829  allocate_recordbuf(state, total_len);
830  memcpy(state->readRecordBuf, save_copy, gotlen);
831  buffer = state->readRecordBuf + gotlen;
832  }
833  } while (gotlen < total_len);
834  Assert(gotheader);
835 
836  record = (XLogRecord *) state->readRecordBuf;
837  if (!ValidXLogRecord(state, record, RecPtr))
838  goto err;
839 
840  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
841  state->DecodeRecPtr = RecPtr;
842  state->NextRecPtr = targetPagePtr + pageHeaderSize
843  + MAXALIGN(pageHeader->xlp_rem_len);
844  }
845  else
846  {
847  /* Wait for the record data to become available */
848  readOff = ReadPageInternal(state, targetPagePtr,
849  Min(targetRecOff + total_len, XLOG_BLCKSZ));
850  if (readOff == XLREAD_WOULDBLOCK)
851  return XLREAD_WOULDBLOCK;
852  else if (readOff < 0)
853  goto err;
854 
855  /* Record does not cross a page boundary */
856  if (!ValidXLogRecord(state, record, RecPtr))
857  goto err;
858 
859  state->NextRecPtr = RecPtr + MAXALIGN(total_len);
860 
861  state->DecodeRecPtr = RecPtr;
862  }
863 
864  /*
865  * Special processing if it's an XLOG SWITCH record
866  */
867  if (record->xl_rmid == RM_XLOG_ID &&
868  (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
869  {
870  /* Pretend it extends to end of segment */
871  state->NextRecPtr += state->segcxt.ws_segsize - 1;
872  state->NextRecPtr -= XLogSegmentOffset(state->NextRecPtr, state->segcxt.ws_segsize);
873  }
874 
875  /*
876  * If we got here without a DecodedXLogRecord, it means we needed to
877  * validate total_len before trusting it, but by now we've done that.
878  */
879  if (decoded == NULL)
880  {
881  Assert(!nonblocking);
882  decoded = XLogReadRecordAlloc(state,
883  total_len,
884  true /* allow_oversized */ );
885  /* allocation should always happen under allow_oversized */
886  Assert(decoded != NULL);
887  }
888 
889  if (DecodeXLogRecord(state, decoded, record, RecPtr, &errormsg))
890  {
891  /* Record the location of the next record. */
892  decoded->next_lsn = state->NextRecPtr;
893 
894  /*
895  * If it's in the decode buffer, mark the decode buffer space as
896  * occupied.
897  */
898  if (!decoded->oversized)
899  {
900  /* The new decode buffer head must be MAXALIGNed. */
901  Assert(decoded->size == MAXALIGN(decoded->size));
902  if ((char *) decoded == state->decode_buffer)
903  state->decode_buffer_tail = state->decode_buffer + decoded->size;
904  else
905  state->decode_buffer_tail += decoded->size;
906  }
907 
908  /* Insert it into the queue of decoded records. */
909  Assert(state->decode_queue_tail != decoded);
910  if (state->decode_queue_tail)
911  state->decode_queue_tail->next = decoded;
912  state->decode_queue_tail = decoded;
913  if (!state->decode_queue_head)
914  state->decode_queue_head = decoded;
915  return XLREAD_SUCCESS;
916  }
917 
918 err:
919  if (assembled)
920  {
921  /*
922  * We get here when a record that spans multiple pages needs to be
923  * assembled, but something went wrong -- perhaps a contrecord piece
924  * was lost. If caller is WAL replay, it will know where the aborted
925  * record was and where to direct followup WAL to be written, marking
926  * the next piece with XLP_FIRST_IS_OVERWRITE_CONTRECORD, which will
927  * in turn signal downstream WAL consumers that the broken WAL record
928  * is to be ignored.
929  */
930  state->abortedRecPtr = RecPtr;
931  state->missingContrecPtr = targetPagePtr;
932 
933  /*
934  * If we got here without reporting an error, make sure an error is
935  * queued so that XLogPrefetcherReadRecord() doesn't bring us back a
936  * second time and clobber the above state.
937  */
938  state->errormsg_deferred = true;
939  }
940 
941  if (decoded && decoded->oversized)
942  pfree(decoded);
943 
944  /*
945  * Invalidate the read state. We might read from a different source after
946  * failure.
947  */
949 
950  /*
951  * If an error was written to errmsg_buf, it'll be returned to the caller
952  * of XLogReadRecord() after all successfully decoded records from the
953  * read queue.
954  */
955 
956  return XLREAD_FAIL;
957 }
958 
959 /*
960  * Try to decode the next available record, and return it. The record will
961  * also be returned to XLogNextRecord(), which must be called to 'consume'
962  * each record.
963  *
964  * If nonblocking is true, may return NULL due to lack of data or WAL decoding
965  * space.
966  */
969 {
970  XLogPageReadResult result;
971 
972  if (state->errormsg_deferred)
973  return NULL;
974 
975  result = XLogDecodeNextRecord(state, nonblocking);
976  if (result == XLREAD_SUCCESS)
977  {
978  Assert(state->decode_queue_tail != NULL);
979  return state->decode_queue_tail;
980  }
981 
982  return NULL;
983 }
984 
985 /*
986  * Read a single xlog page including at least [pageptr, reqLen] of valid data
987  * via the page_read() callback.
988  *
989  * Returns XLREAD_FAIL if the required page cannot be read for some
990  * reason; errormsg_buf is set in that case (unless the error occurs in the
991  * page_read callback).
992  *
993  * Returns XLREAD_WOULDBLOCK if the requested data can't be read without
994  * waiting. This can be returned only if the installed page_read callback
995  * respects the state->nonblocking flag, and cannot read the requested data
996  * immediately.
997  *
998  * We fetch the page from a reader-local cache if we know we have the required
999  * data and if there hasn't been any error since caching the data.
1000  */
1001 static int
1003 {
1004  int readLen;
1005  uint32 targetPageOff;
1006  XLogSegNo targetSegNo;
1007  XLogPageHeader hdr;
1008 
1009  Assert((pageptr % XLOG_BLCKSZ) == 0);
1010 
1011  XLByteToSeg(pageptr, targetSegNo, state->segcxt.ws_segsize);
1012  targetPageOff = XLogSegmentOffset(pageptr, state->segcxt.ws_segsize);
1013 
1014  /* check whether we have all the requested data already */
1015  if (targetSegNo == state->seg.ws_segno &&
1016  targetPageOff == state->segoff && reqLen <= state->readLen)
1017  return state->readLen;
1018 
1019  /*
1020  * Invalidate contents of internal buffer before read attempt. Just set
1021  * the length to 0, rather than a full XLogReaderInvalReadState(), so we
1022  * don't forget the segment we last successfully read.
1023  */
1024  state->readLen = 0;
1025 
1026  /*
1027  * Data is not in our buffer.
1028  *
1029  * Every time we actually read the segment, even if we looked at parts of
1030  * it before, we need to do verification as the page_read callback might
1031  * now be rereading data from a different source.
1032  *
1033  * Whenever switching to a new WAL segment, we read the first page of the
1034  * file and validate its header, even if that's not where the target
1035  * record is. This is so that we can check the additional identification
1036  * info that is present in the first page's "long" header.
1037  */
1038  if (targetSegNo != state->seg.ws_segno && targetPageOff != 0)
1039  {
1040  XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
1041 
1042  readLen = state->routine.page_read(state, targetSegmentPtr, XLOG_BLCKSZ,
1043  state->currRecPtr,
1044  state->readBuf);
1045  if (readLen == XLREAD_WOULDBLOCK)
1046  return XLREAD_WOULDBLOCK;
1047  else if (readLen < 0)
1048  goto err;
1049 
1050  /* we can be sure to have enough WAL available, we scrolled back */
1051  Assert(readLen == XLOG_BLCKSZ);
1052 
1053  if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
1054  state->readBuf))
1055  goto err;
1056  }
1057 
1058  /*
1059  * First, read the requested data length, but at least a short page header
1060  * so that we can validate it.
1061  */
1062  readLen = state->routine.page_read(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
1063  state->currRecPtr,
1064  state->readBuf);
1065  if (readLen == XLREAD_WOULDBLOCK)
1066  return XLREAD_WOULDBLOCK;
1067  else if (readLen < 0)
1068  goto err;
1069 
1070  Assert(readLen <= XLOG_BLCKSZ);
1071 
1072  /* Do we have enough data to check the header length? */
1073  if (readLen <= SizeOfXLogShortPHD)
1074  goto err;
1075 
1076  Assert(readLen >= reqLen);
1077 
1078  hdr = (XLogPageHeader) state->readBuf;
1079 
1080  /* still not enough */
1081  if (readLen < XLogPageHeaderSize(hdr))
1082  {
1083  readLen = state->routine.page_read(state, pageptr, XLogPageHeaderSize(hdr),
1084  state->currRecPtr,
1085  state->readBuf);
1086  if (readLen == XLREAD_WOULDBLOCK)
1087  return XLREAD_WOULDBLOCK;
1088  else if (readLen < 0)
1089  goto err;
1090  }
1091 
1092  /*
1093  * Now that we know we have the full header, validate it.
1094  */
1095  if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
1096  goto err;
1097 
1098  /* update read state information */
1099  state->seg.ws_segno = targetSegNo;
1100  state->segoff = targetPageOff;
1101  state->readLen = readLen;
1102 
1103  return readLen;
1104 
1105 err:
1107 
1108  return XLREAD_FAIL;
1109 }
1110 
1111 /*
1112  * Invalidate the xlogreader's read state to force a re-read.
1113  */
1114 static void
1116 {
1117  state->seg.ws_segno = 0;
1118  state->segoff = 0;
1119  state->readLen = 0;
1120 }
1121 
1122 /*
1123  * Validate an XLOG record header.
1124  *
1125  * This is just a convenience subroutine to avoid duplicated code in
1126  * XLogReadRecord. It's not intended for use from anywhere else.
1127  */
1128 static bool
1130  XLogRecPtr PrevRecPtr, XLogRecord *record,
1131  bool randAccess)
1132 {
1133  if (record->xl_tot_len < SizeOfXLogRecord)
1134  {
1136  "invalid record length at %X/%X: expected at least %u, got %u",
1137  LSN_FORMAT_ARGS(RecPtr),
1138  (uint32) SizeOfXLogRecord, record->xl_tot_len);
1139  return false;
1140  }
1141  if (!RmgrIdIsValid(record->xl_rmid))
1142  {
1144  "invalid resource manager ID %u at %X/%X",
1145  record->xl_rmid, LSN_FORMAT_ARGS(RecPtr));
1146  return false;
1147  }
1148  if (randAccess)
1149  {
1150  /*
1151  * We can't exactly verify the prev-link, but surely it should be less
1152  * than the record's own address.
1153  */
1154  if (!(record->xl_prev < RecPtr))
1155  {
1157  "record with incorrect prev-link %X/%X at %X/%X",
1158  LSN_FORMAT_ARGS(record->xl_prev),
1159  LSN_FORMAT_ARGS(RecPtr));
1160  return false;
1161  }
1162  }
1163  else
1164  {
1165  /*
1166  * Record's prev-link should exactly match our previous location. This
1167  * check guards against torn WAL pages where a stale but valid-looking
1168  * WAL record starts on a sector boundary.
1169  */
1170  if (record->xl_prev != PrevRecPtr)
1171  {
1173  "record with incorrect prev-link %X/%X at %X/%X",
1174  LSN_FORMAT_ARGS(record->xl_prev),
1175  LSN_FORMAT_ARGS(RecPtr));
1176  return false;
1177  }
1178  }
1179 
1180  return true;
1181 }
1182 
1183 
1184 /*
1185  * CRC-check an XLOG record. We do not believe the contents of an XLOG
1186  * record (other than to the minimal extent of computing the amount of
1187  * data to read in) until we've checked the CRCs.
1188  *
1189  * We assume all of the record (that is, xl_tot_len bytes) has been read
1190  * into memory at *record. Also, ValidXLogRecordHeader() has accepted the
1191  * record's header, which means in particular that xl_tot_len is at least
1192  * SizeOfXLogRecord.
1193  */
1194 static bool
1196 {
1197  pg_crc32c crc;
1198 
1199  Assert(record->xl_tot_len >= SizeOfXLogRecord);
1200 
1201  /* Calculate the CRC */
1202  INIT_CRC32C(crc);
1203  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
1204  /* include the record header last */
1205  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
1206  FIN_CRC32C(crc);
1207 
1208  if (!EQ_CRC32C(record->xl_crc, crc))
1209  {
1211  "incorrect resource manager data checksum in record at %X/%X",
1212  LSN_FORMAT_ARGS(recptr));
1213  return false;
1214  }
1215 
1216  return true;
1217 }
1218 
1219 /*
1220  * Validate a page header.
1221  *
1222  * Check if 'phdr' is valid as the header of the XLog page at position
1223  * 'recptr'.
1224  */
1225 bool
1227  char *phdr)
1228 {
1229  XLogSegNo segno;
1230  int32 offset;
1231  XLogPageHeader hdr = (XLogPageHeader) phdr;
1232 
1233  Assert((recptr % XLOG_BLCKSZ) == 0);
1234 
1235  XLByteToSeg(recptr, segno, state->segcxt.ws_segsize);
1236  offset = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
1237 
1238  if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
1239  {
1240  char fname[MAXFNAMELEN];
1241 
1242  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1243 
1245  "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
1246  hdr->xlp_magic,
1247  fname,
1248  LSN_FORMAT_ARGS(recptr),
1249  offset);
1250  return false;
1251  }
1252 
1253  if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
1254  {
1255  char fname[MAXFNAMELEN];
1256 
1257  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1258 
1260  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
1261  hdr->xlp_info,
1262  fname,
1263  LSN_FORMAT_ARGS(recptr),
1264  offset);
1265  return false;
1266  }
1267 
1268  if (hdr->xlp_info & XLP_LONG_HEADER)
1269  {
1270  XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
1271 
1272  if (state->system_identifier &&
1273  longhdr->xlp_sysid != state->system_identifier)
1274  {
1276  "WAL file is from different database system: WAL file database system identifier is %llu, pg_control database system identifier is %llu",
1277  (unsigned long long) longhdr->xlp_sysid,
1278  (unsigned long long) state->system_identifier);
1279  return false;
1280  }
1281  else if (longhdr->xlp_seg_size != state->segcxt.ws_segsize)
1282  {
1284  "WAL file is from different database system: incorrect segment size in page header");
1285  return false;
1286  }
1287  else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ)
1288  {
1290  "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header");
1291  return false;
1292  }
1293  }
1294  else if (offset == 0)
1295  {
1296  char fname[MAXFNAMELEN];
1297 
1298  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1299 
1300  /* hmm, first page of file doesn't have a long header? */
1302  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
1303  hdr->xlp_info,
1304  fname,
1305  LSN_FORMAT_ARGS(recptr),
1306  offset);
1307  return false;
1308  }
1309 
1310  /*
1311  * Check that the address on the page agrees with what we expected. This
1312  * check typically fails when an old WAL segment is recycled, and hasn't
1313  * yet been overwritten with new data yet.
1314  */
1315  if (hdr->xlp_pageaddr != recptr)
1316  {
1317  char fname[MAXFNAMELEN];
1318 
1319  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1320 
1322  "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
1324  fname,
1325  LSN_FORMAT_ARGS(recptr),
1326  offset);
1327  return false;
1328  }
1329 
1330  /*
1331  * Since child timelines are always assigned a TLI greater than their
1332  * immediate parent's TLI, we should never see TLI go backwards across
1333  * successive pages of a consistent WAL sequence.
1334  *
1335  * Sometimes we re-read a segment that's already been (partially) read. So
1336  * we only verify TLIs for pages that are later than the last remembered
1337  * LSN.
1338  */
1339  if (recptr > state->latestPagePtr)
1340  {
1341  if (hdr->xlp_tli < state->latestPageTLI)
1342  {
1343  char fname[MAXFNAMELEN];
1344 
1345  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
1346 
1348  "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
1349  hdr->xlp_tli,
1350  state->latestPageTLI,
1351  fname,
1352  LSN_FORMAT_ARGS(recptr),
1353  offset);
1354  return false;
1355  }
1356  }
1357  state->latestPagePtr = recptr;
1358  state->latestPageTLI = hdr->xlp_tli;
1359 
1360  return true;
1361 }
1362 
1363 /*
1364  * Forget about an error produced by XLogReaderValidatePageHeader().
1365  */
1366 void
1368 {
1369  state->errormsg_buf[0] = '\0';
1370  state->errormsg_deferred = false;
1371 }
1372 
1373 /*
1374  * Find the first record with an lsn >= RecPtr.
1375  *
1376  * This is different from XLogBeginRead() in that RecPtr doesn't need to point
1377  * to a valid record boundary. Useful for checking whether RecPtr is a valid
1378  * xlog address for reading, and to find the first valid address after some
1379  * address when dumping records for debugging purposes.
1380  *
1381  * This positions the reader, like XLogBeginRead(), so that the next call to
1382  * XLogReadRecord() will read the next valid record.
1383  */
1384 XLogRecPtr
1386 {
1387  XLogRecPtr tmpRecPtr;
1388  XLogRecPtr found = InvalidXLogRecPtr;
1389  XLogPageHeader header;
1390  char *errormsg;
1391 
1392  Assert(!XLogRecPtrIsInvalid(RecPtr));
1393 
1394  /* Make sure ReadPageInternal() can't return XLREAD_WOULDBLOCK. */
1395  state->nonblocking = false;
1396 
1397  /*
1398  * skip over potential continuation data, keeping in mind that it may span
1399  * multiple pages
1400  */
1401  tmpRecPtr = RecPtr;
1402  while (true)
1403  {
1404  XLogRecPtr targetPagePtr;
1405  int targetRecOff;
1406  uint32 pageHeaderSize;
1407  int readLen;
1408 
1409  /*
1410  * Compute targetRecOff. It should typically be equal or greater than
1411  * short page-header since a valid record can't start anywhere before
1412  * that, except when caller has explicitly specified the offset that
1413  * falls somewhere there or when we are skipping multi-page
1414  * continuation record. It doesn't matter though because
1415  * ReadPageInternal() is prepared to handle that and will read at
1416  * least short page-header worth of data
1417  */
1418  targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
1419 
1420  /* scroll back to page boundary */
1421  targetPagePtr = tmpRecPtr - targetRecOff;
1422 
1423  /* Read the page containing the record */
1424  readLen = ReadPageInternal(state, targetPagePtr, targetRecOff);
1425  if (readLen < 0)
1426  goto err;
1427 
1428  header = (XLogPageHeader) state->readBuf;
1429 
1430  pageHeaderSize = XLogPageHeaderSize(header);
1431 
1432  /* make sure we have enough data for the page header */
1433  readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize);
1434  if (readLen < 0)
1435  goto err;
1436 
1437  /* skip over potential continuation data */
1438  if (header->xlp_info & XLP_FIRST_IS_CONTRECORD)
1439  {
1440  /*
1441  * If the length of the remaining continuation data is more than
1442  * what can fit in this page, the continuation record crosses over
1443  * this page. Read the next page and try again. xlp_rem_len in the
1444  * next page header will contain the remaining length of the
1445  * continuation data
1446  *
1447  * Note that record headers are MAXALIGN'ed
1448  */
1449  if (MAXALIGN(header->xlp_rem_len) >= (XLOG_BLCKSZ - pageHeaderSize))
1450  tmpRecPtr = targetPagePtr + XLOG_BLCKSZ;
1451  else
1452  {
1453  /*
1454  * The previous continuation record ends in this page. Set
1455  * tmpRecPtr to point to the first valid record
1456  */
1457  tmpRecPtr = targetPagePtr + pageHeaderSize
1458  + MAXALIGN(header->xlp_rem_len);
1459  break;
1460  }
1461  }
1462  else
1463  {
1464  tmpRecPtr = targetPagePtr + pageHeaderSize;
1465  break;
1466  }
1467  }
1468 
1469  /*
1470  * we know now that tmpRecPtr is an address pointing to a valid XLogRecord
1471  * because either we're at the first record after the beginning of a page
1472  * or we just jumped over the remaining data of a continuation.
1473  */
1474  XLogBeginRead(state, tmpRecPtr);
1475  while (XLogReadRecord(state, &errormsg) != NULL)
1476  {
1477  /* past the record we've found, break out */
1478  if (RecPtr <= state->ReadRecPtr)
1479  {
1480  /* Rewind the reader to the beginning of the last record. */
1481  found = state->ReadRecPtr;
1482  XLogBeginRead(state, found);
1483  return found;
1484  }
1485  }
1486 
1487 err:
1489 
1490  return InvalidXLogRecPtr;
1491 }
1492 
1493 /*
1494  * Helper function to ease writing of XLogReaderRoutine->page_read callbacks.
1495  * If this function is used, caller must supply a segment_open callback in
1496  * 'state', as that is used here.
1497  *
1498  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
1499  * fetched from timeline 'tli'.
1500  *
1501  * Returns true if succeeded, false if an error occurs, in which case
1502  * 'errinfo' receives error details.
1503  */
1504 bool
1506  char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
1507  WALReadError *errinfo)
1508 {
1509  char *p;
1510  XLogRecPtr recptr;
1511  Size nbytes;
1512 
1513  p = buf;
1514  recptr = startptr;
1515  nbytes = count;
1516 
1517  while (nbytes > 0)
1518  {
1519  uint32 startoff;
1520  int segbytes;
1521  int readbytes;
1522 
1523  startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
1524 
1525  /*
1526  * If the data we want is not in a segment we have open, close what we
1527  * have (if anything) and open the next one, using the caller's
1528  * provided segment_open callback.
1529  */
1530  if (state->seg.ws_file < 0 ||
1531  !XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
1532  tli != state->seg.ws_tli)
1533  {
1534  XLogSegNo nextSegNo;
1535 
1536  if (state->seg.ws_file >= 0)
1537  state->routine.segment_close(state);
1538 
1539  XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
1540  state->routine.segment_open(state, nextSegNo, &tli);
1541 
1542  /* This shouldn't happen -- indicates a bug in segment_open */
1543  Assert(state->seg.ws_file >= 0);
1544 
1545  /* Update the current segment info. */
1546  state->seg.ws_tli = tli;
1547  state->seg.ws_segno = nextSegNo;
1548  }
1549 
1550  /* How many bytes are within this segment? */
1551  if (nbytes > (state->segcxt.ws_segsize - startoff))
1552  segbytes = state->segcxt.ws_segsize - startoff;
1553  else
1554  segbytes = nbytes;
1555 
1556 #ifndef FRONTEND
1557  pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
1558 #endif
1559 
1560  /* Reset errno first; eases reporting non-errno-affecting errors */
1561  errno = 0;
1562  readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
1563 
1564 #ifndef FRONTEND
1566 #endif
1567 
1568  if (readbytes <= 0)
1569  {
1570  errinfo->wre_errno = errno;
1571  errinfo->wre_req = segbytes;
1572  errinfo->wre_read = readbytes;
1573  errinfo->wre_off = startoff;
1574  errinfo->wre_seg = state->seg;
1575  return false;
1576  }
1577 
1578  /* Update state for read */
1579  recptr += readbytes;
1580  nbytes -= readbytes;
1581  p += readbytes;
1582  }
1583 
1584  return true;
1585 }
1586 
1587 /* ----------------------------------------
1588  * Functions for decoding the data and block references in a record.
1589  * ----------------------------------------
1590  */
1591 
1592 /*
1593  * Private function to reset the state, forgetting all decoded records, if we
1594  * are asked to move to a new read position.
1595  */
1596 static void
1598 {
1599  DecodedXLogRecord *r;
1600 
1601  /* Reset the decoded record queue, freeing any oversized records. */
1602  while ((r = state->decode_queue_head) != NULL)
1603  {
1604  state->decode_queue_head = r->next;
1605  if (r->oversized)
1606  pfree(r);
1607  }
1608  state->decode_queue_tail = NULL;
1609  state->decode_queue_head = NULL;
1610  state->record = NULL;
1611 
1612  /* Reset the decode buffer to empty. */
1613  state->decode_buffer_tail = state->decode_buffer;
1614  state->decode_buffer_head = state->decode_buffer;
1615 
1616  /* Clear error state. */
1617  state->errormsg_buf[0] = '\0';
1618  state->errormsg_deferred = false;
1619 }
1620 
1621 /*
1622  * Compute the maximum possible amount of padding that could be required to
1623  * decode a record, given xl_tot_len from the record's header. This is the
1624  * amount of output buffer space that we need to decode a record, though we
1625  * might not finish up using it all.
1626  *
1627  * This computation is pessimistic and assumes the maximum possible number of
1628  * blocks, due to lack of better information.
1629  */
1630 size_t
1632 {
1633  size_t size = 0;
1634 
1635  /* Account for the fixed size part of the decoded record struct. */
1636  size += offsetof(DecodedXLogRecord, blocks[0]);
1637  /* Account for the flexible blocks array of maximum possible size. */
1638  size += sizeof(DecodedBkpBlock) * (XLR_MAX_BLOCK_ID + 1);
1639  /* Account for all the raw main and block data. */
1640  size += xl_tot_len;
1641  /* We might insert padding before main_data. */
1642  size += (MAXIMUM_ALIGNOF - 1);
1643  /* We might insert padding before each block's data. */
1644  size += (MAXIMUM_ALIGNOF - 1) * (XLR_MAX_BLOCK_ID + 1);
1645  /* We might insert padding at the end. */
1646  size += (MAXIMUM_ALIGNOF - 1);
1647 
1648  return size;
1649 }
1650 
1651 /*
1652  * Decode a record. "decoded" must point to a MAXALIGNed memory area that has
1653  * space for at least DecodeXLogRecordRequiredSpace(record) bytes. On
1654  * success, decoded->size contains the actual space occupied by the decoded
1655  * record, which may turn out to be less.
1656  *
1657  * Only decoded->oversized member must be initialized already, and will not be
1658  * modified. Other members will be initialized as required.
1659  *
1660  * On error, a human-readable error message is returned in *errormsg, and
1661  * the return value is false.
1662  */
1663 bool
1665  DecodedXLogRecord *decoded,
1666  XLogRecord *record,
1667  XLogRecPtr lsn,
1668  char **errormsg)
1669 {
1670  /*
1671  * read next _size bytes from record buffer, but check for overrun first.
1672  */
1673 #define COPY_HEADER_FIELD(_dst, _size) \
1674  do { \
1675  if (remaining < _size) \
1676  goto shortdata_err; \
1677  memcpy(_dst, ptr, _size); \
1678  ptr += _size; \
1679  remaining -= _size; \
1680  } while(0)
1681 
1682  char *ptr;
1683  char *out;
1684  uint32 remaining;
1685  uint32 datatotal;
1686  RelFileLocator *rlocator = NULL;
1687  uint8 block_id;
1688 
1689  decoded->header = *record;
1690  decoded->lsn = lsn;
1691  decoded->next = NULL;
1692  decoded->record_origin = InvalidRepOriginId;
1694  decoded->main_data = NULL;
1695  decoded->main_data_len = 0;
1696  decoded->max_block_id = -1;
1697  ptr = (char *) record;
1698  ptr += SizeOfXLogRecord;
1700 
1701  /* Decode the headers */
1702  datatotal = 0;
1703  while (remaining > datatotal)
1704  {
1705  COPY_HEADER_FIELD(&block_id, sizeof(uint8));
1706 
1707  if (block_id == XLR_BLOCK_ID_DATA_SHORT)
1708  {
1709  /* XLogRecordDataHeaderShort */
1710  uint8 main_data_len;
1711 
1712  COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
1713 
1714  decoded->main_data_len = main_data_len;
1715  datatotal += main_data_len;
1716  break; /* by convention, the main data fragment is
1717  * always last */
1718  }
1719  else if (block_id == XLR_BLOCK_ID_DATA_LONG)
1720  {
1721  /* XLogRecordDataHeaderLong */
1722  uint32 main_data_len;
1723 
1724  COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
1725  decoded->main_data_len = main_data_len;
1726  datatotal += main_data_len;
1727  break; /* by convention, the main data fragment is
1728  * always last */
1729  }
1730  else if (block_id == XLR_BLOCK_ID_ORIGIN)
1731  {
1732  COPY_HEADER_FIELD(&decoded->record_origin, sizeof(RepOriginId));
1733  }
1734  else if (block_id == XLR_BLOCK_ID_TOPLEVEL_XID)
1735  {
1736  COPY_HEADER_FIELD(&decoded->toplevel_xid, sizeof(TransactionId));
1737  }
1738  else if (block_id <= XLR_MAX_BLOCK_ID)
1739  {
1740  /* XLogRecordBlockHeader */
1741  DecodedBkpBlock *blk;
1742  uint8 fork_flags;
1743 
1744  /* mark any intervening block IDs as not in use */
1745  for (int i = decoded->max_block_id + 1; i < block_id; ++i)
1746  decoded->blocks[i].in_use = false;
1747 
1748  if (block_id <= decoded->max_block_id)
1749  {
1751  "out-of-order block_id %u at %X/%X",
1752  block_id,
1753  LSN_FORMAT_ARGS(state->ReadRecPtr));
1754  goto err;
1755  }
1756  decoded->max_block_id = block_id;
1757 
1758  blk = &decoded->blocks[block_id];
1759  blk->in_use = true;
1760  blk->apply_image = false;
1761 
1762  COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
1763  blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
1764  blk->flags = fork_flags;
1765  blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
1766  blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
1767 
1769 
1770  COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
1771  /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
1772  if (blk->has_data && blk->data_len == 0)
1773  {
1775  "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
1776  LSN_FORMAT_ARGS(state->ReadRecPtr));
1777  goto err;
1778  }
1779  if (!blk->has_data && blk->data_len != 0)
1780  {
1782  "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
1783  (unsigned int) blk->data_len,
1784  LSN_FORMAT_ARGS(state->ReadRecPtr));
1785  goto err;
1786  }
1787  datatotal += blk->data_len;
1788 
1789  if (blk->has_image)
1790  {
1791  COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
1792  COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
1793  COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
1794 
1795  blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
1796 
1797  if (BKPIMAGE_COMPRESSED(blk->bimg_info))
1798  {
1799  if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
1800  COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
1801  else
1802  blk->hole_length = 0;
1803  }
1804  else
1805  blk->hole_length = BLCKSZ - blk->bimg_len;
1806  datatotal += blk->bimg_len;
1807 
1808  /*
1809  * cross-check that hole_offset > 0, hole_length > 0 and
1810  * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
1811  */
1812  if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1813  (blk->hole_offset == 0 ||
1814  blk->hole_length == 0 ||
1815  blk->bimg_len == BLCKSZ))
1816  {
1818  "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
1819  (unsigned int) blk->hole_offset,
1820  (unsigned int) blk->hole_length,
1821  (unsigned int) blk->bimg_len,
1822  LSN_FORMAT_ARGS(state->ReadRecPtr));
1823  goto err;
1824  }
1825 
1826  /*
1827  * cross-check that hole_offset == 0 and hole_length == 0 if
1828  * the HAS_HOLE flag is not set.
1829  */
1830  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1831  (blk->hole_offset != 0 || blk->hole_length != 0))
1832  {
1834  "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
1835  (unsigned int) blk->hole_offset,
1836  (unsigned int) blk->hole_length,
1837  LSN_FORMAT_ARGS(state->ReadRecPtr));
1838  goto err;
1839  }
1840 
1841  /*
1842  * Cross-check that bimg_len < BLCKSZ if it is compressed.
1843  */
1844  if (BKPIMAGE_COMPRESSED(blk->bimg_info) &&
1845  blk->bimg_len == BLCKSZ)
1846  {
1848  "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
1849  (unsigned int) blk->bimg_len,
1850  LSN_FORMAT_ARGS(state->ReadRecPtr));
1851  goto err;
1852  }
1853 
1854  /*
1855  * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE is
1856  * set nor COMPRESSED().
1857  */
1858  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1859  !BKPIMAGE_COMPRESSED(blk->bimg_info) &&
1860  blk->bimg_len != BLCKSZ)
1861  {
1863  "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
1864  (unsigned int) blk->data_len,
1865  LSN_FORMAT_ARGS(state->ReadRecPtr));
1866  goto err;
1867  }
1868  }
1869  if (!(fork_flags & BKPBLOCK_SAME_REL))
1870  {
1871  COPY_HEADER_FIELD(&blk->rlocator, sizeof(RelFileLocator));
1872  rlocator = &blk->rlocator;
1873  }
1874  else
1875  {
1876  if (rlocator == NULL)
1877  {
1879  "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
1880  LSN_FORMAT_ARGS(state->ReadRecPtr));
1881  goto err;
1882  }
1883 
1884  blk->rlocator = *rlocator;
1885  }
1886  COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
1887  }
1888  else
1889  {
1891  "invalid block_id %u at %X/%X",
1892  block_id, LSN_FORMAT_ARGS(state->ReadRecPtr));
1893  goto err;
1894  }
1895  }
1896 
1897  if (remaining != datatotal)
1898  goto shortdata_err;
1899 
1900  /*
1901  * Ok, we've parsed the fragment headers, and verified that the total
1902  * length of the payload in the fragments is equal to the amount of data
1903  * left. Copy the data of each fragment to contiguous space after the
1904  * blocks array, inserting alignment padding before the data fragments so
1905  * they can be cast to struct pointers by REDO routines.
1906  */
1907  out = ((char *) decoded) +
1908  offsetof(DecodedXLogRecord, blocks) +
1909  sizeof(decoded->blocks[0]) * (decoded->max_block_id + 1);
1910 
1911  /* block data first */
1912  for (block_id = 0; block_id <= decoded->max_block_id; block_id++)
1913  {
1914  DecodedBkpBlock *blk = &decoded->blocks[block_id];
1915 
1916  if (!blk->in_use)
1917  continue;
1918 
1919  Assert(blk->has_image || !blk->apply_image);
1920 
1921  if (blk->has_image)
1922  {
1923  /* no need to align image */
1924  blk->bkp_image = out;
1925  memcpy(out, ptr, blk->bimg_len);
1926  ptr += blk->bimg_len;
1927  out += blk->bimg_len;
1928  }
1929  if (blk->has_data)
1930  {
1931  out = (char *) MAXALIGN(out);
1932  blk->data = out;
1933  memcpy(blk->data, ptr, blk->data_len);
1934  ptr += blk->data_len;
1935  out += blk->data_len;
1936  }
1937  }
1938 
1939  /* and finally, the main data */
1940  if (decoded->main_data_len > 0)
1941  {
1942  out = (char *) MAXALIGN(out);
1943  decoded->main_data = out;
1944  memcpy(decoded->main_data, ptr, decoded->main_data_len);
1945  ptr += decoded->main_data_len;
1946  out += decoded->main_data_len;
1947  }
1948 
1949  /* Report the actual size we used. */
1950  decoded->size = MAXALIGN(out - (char *) decoded);
1952  decoded->size);
1953 
1954  return true;
1955 
1956 shortdata_err:
1958  "record with invalid length at %X/%X",
1959  LSN_FORMAT_ARGS(state->ReadRecPtr));
1960 err:
1961  *errormsg = state->errormsg_buf;
1962 
1963  return false;
1964 }
1965 
1966 /*
1967  * Returns information about the block that a block reference refers to.
1968  *
1969  * This is like XLogRecGetBlockTagExtended, except that the block reference
1970  * must exist and there's no access to prefetch_buffer.
1971  */
1972 void
1974  RelFileLocator *rlocator, ForkNumber *forknum,
1975  BlockNumber *blknum)
1976 {
1977  if (!XLogRecGetBlockTagExtended(record, block_id, rlocator, forknum,
1978  blknum, NULL))
1979  {
1980 #ifndef FRONTEND
1981  elog(ERROR, "could not locate backup block with ID %d in WAL record",
1982  block_id);
1983 #else
1984  pg_fatal("could not locate backup block with ID %d in WAL record",
1985  block_id);
1986 #endif
1987  }
1988 }
1989 
1990 /*
1991  * Returns information about the block that a block reference refers to,
1992  * optionally including the buffer that the block may already be in.
1993  *
1994  * If the WAL record contains a block reference with the given ID, *rlocator,
1995  * *forknum, *blknum and *prefetch_buffer are filled in (if not NULL), and
1996  * returns true. Otherwise returns false.
1997  */
1998 bool
2000  RelFileLocator *rlocator, ForkNumber *forknum,
2001  BlockNumber *blknum,
2002  Buffer *prefetch_buffer)
2003 {
2004  DecodedBkpBlock *bkpb;
2005 
2006  if (!XLogRecHasBlockRef(record, block_id))
2007  return false;
2008 
2009  bkpb = &record->record->blocks[block_id];
2010  if (rlocator)
2011  *rlocator = bkpb->rlocator;
2012  if (forknum)
2013  *forknum = bkpb->forknum;
2014  if (blknum)
2015  *blknum = bkpb->blkno;
2016  if (prefetch_buffer)
2017  *prefetch_buffer = bkpb->prefetch_buffer;
2018  return true;
2019 }
2020 
2021 /*
2022  * Returns the data associated with a block reference, or NULL if there is
2023  * no data (e.g. because a full-page image was taken instead). The returned
2024  * pointer points to a MAXALIGNed buffer.
2025  */
2026 char *
2028 {
2029  DecodedBkpBlock *bkpb;
2030 
2031  if (block_id > record->record->max_block_id ||
2032  !record->record->blocks[block_id].in_use)
2033  return NULL;
2034 
2035  bkpb = &record->record->blocks[block_id];
2036 
2037  if (!bkpb->has_data)
2038  {
2039  if (len)
2040  *len = 0;
2041  return NULL;
2042  }
2043  else
2044  {
2045  if (len)
2046  *len = bkpb->data_len;
2047  return bkpb->data;
2048  }
2049 }
2050 
2051 /*
2052  * Restore a full-page image from a backup block attached to an XLOG record.
2053  *
2054  * Returns true if a full-page image is restored, and false on failure with
2055  * an error to be consumed by the caller.
2056  */
2057 bool
2058 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
2059 {
2060  DecodedBkpBlock *bkpb;
2061  char *ptr;
2062  PGAlignedBlock tmp;
2063 
2064  if (block_id > record->record->max_block_id ||
2065  !record->record->blocks[block_id].in_use)
2066  {
2067  report_invalid_record(record,
2068  "could not restore image at %X/%X with invalid block %d specified",
2069  LSN_FORMAT_ARGS(record->ReadRecPtr),
2070  block_id);
2071  return false;
2072  }
2073  if (!record->record->blocks[block_id].has_image)
2074  {
2075  report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
2076  LSN_FORMAT_ARGS(record->ReadRecPtr),
2077  block_id);
2078  return false;
2079  }
2080 
2081  bkpb = &record->record->blocks[block_id];
2082  ptr = bkpb->bkp_image;
2083 
2084  if (BKPIMAGE_COMPRESSED(bkpb->bimg_info))
2085  {
2086  /* If a backup block image is compressed, decompress it */
2087  bool decomp_success = true;
2088 
2089  if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
2090  {
2091  if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
2092  BLCKSZ - bkpb->hole_length, true) < 0)
2093  decomp_success = false;
2094  }
2095  else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
2096  {
2097 #ifdef USE_LZ4
2098  if (LZ4_decompress_safe(ptr, tmp.data,
2099  bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
2100  decomp_success = false;
2101 #else
2102  report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
2103  LSN_FORMAT_ARGS(record->ReadRecPtr),
2104  "LZ4",
2105  block_id);
2106  return false;
2107 #endif
2108  }
2109  else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
2110  {
2111 #ifdef USE_ZSTD
2112  size_t decomp_result = ZSTD_decompress(tmp.data,
2113  BLCKSZ - bkpb->hole_length,
2114  ptr, bkpb->bimg_len);
2115 
2116  if (ZSTD_isError(decomp_result))
2117  decomp_success = false;
2118 #else
2119  report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
2120  LSN_FORMAT_ARGS(record->ReadRecPtr),
2121  "zstd",
2122  block_id);
2123  return false;
2124 #endif
2125  }
2126  else
2127  {
2128  report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
2129  LSN_FORMAT_ARGS(record->ReadRecPtr),
2130  block_id);
2131  return false;
2132  }
2133 
2134  if (!decomp_success)
2135  {
2136  report_invalid_record(record, "could not decompress image at %X/%X, block %d",
2137  LSN_FORMAT_ARGS(record->ReadRecPtr),
2138  block_id);
2139  return false;
2140  }
2141 
2142  ptr = tmp.data;
2143  }
2144 
2145  /* generate page, taking into account hole if necessary */
2146  if (bkpb->hole_length == 0)
2147  {
2148  memcpy(page, ptr, BLCKSZ);
2149  }
2150  else
2151  {
2152  memcpy(page, ptr, bkpb->hole_offset);
2153  /* must zero-fill the hole */
2154  MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
2155  memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
2156  ptr + bkpb->hole_offset,
2157  BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
2158  }
2159 
2160  return true;
2161 }
2162 
2163 #ifndef FRONTEND
2164 
2165 /*
2166  * Extract the FullTransactionId from a WAL record.
2167  */
2170 {
2171  TransactionId xid,
2172  next_xid;
2173  uint32 epoch;
2174 
2175  /*
2176  * This function is only safe during replay, because it depends on the
2177  * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
2178  */
2180 
2181  xid = XLogRecGetXid(record);
2184 
2185  /*
2186  * If xid is numerically greater than next_xid, it has to be from the last
2187  * epoch.
2188  */
2189  if (unlikely(xid > next_xid))
2190  --epoch;
2191 
2193 }
2194 
2195 #endif
uint32 BlockNumber
Definition: block.h:31
int Buffer
Definition: buf.h:23
#define InvalidBuffer
Definition: buf.h:25
unsigned short uint16
Definition: c.h:494
unsigned int uint32
Definition: c.h:495
#define Min(x, y)
Definition: c.h:993
#define MAXALIGN(LEN)
Definition: c.h:800
signed int int32
Definition: c.h:483
#define Max(x, y)
Definition: c.h:987
#define pg_attribute_printf(f, a)
Definition: c.h:180
#define unlikely(x)
Definition: c.h:300
#define lengthof(array)
Definition: c.h:777
unsigned char uint8
Definition: c.h:493
#define MemSet(start, val, len)
Definition: c.h:1009
uint32 TransactionId
Definition: c.h:641
size_t Size
Definition: c.h:594
#define _(x)
Definition: elog.c:91
#define ERROR
Definition: elog.h:39
void err(int eval, const char *fmt,...)
Definition: err.c:43
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:18
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:17
bool IsUnderPostmaster
Definition: globals.c:116
int remaining
Definition: informix.c:667
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
static void const char * fmt
va_end(args)
Assert(fmt[strlen(fmt) - 1] !='\n')
va_start(args, fmt)
void pfree(void *pointer)
Definition: mcxt.c:1431
void * palloc_extended(Size size, int flags)
Definition: mcxt.c:1265
void * palloc(Size size)
Definition: mcxt.c:1201
#define AmStartupProcess()
Definition: miscadmin.h:455
#define InvalidRepOriginId
Definition: origin.h:33
#define pg_fatal(...)
#define MAXPGPATH
#define XLOG_SWITCH
Definition: pg_control.h:71
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
const void size_t len
return crc
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
static char * buf
Definition: pg_test_fsync.c:73
#define vsnprintf
Definition: port.h:237
#define pg_pread
Definition: port.h:225
#define snprintf
Definition: port.h:238
ForkNumber
Definition: relpath.h:48
#define RmgrIdIsValid(rmid)
Definition: rmgr.h:53
uint16 hole_length
Definition: xlogreader.h:140
char * bkp_image
Definition: xlogreader.h:138
Buffer prefetch_buffer
Definition: xlogreader.h:130
RelFileLocator rlocator
Definition: xlogreader.h:125
BlockNumber blkno
Definition: xlogreader.h:127
ForkNumber forknum
Definition: xlogreader.h:126
uint16 hole_offset
Definition: xlogreader.h:139
XLogRecord header
Definition: xlogreader.h:166
XLogRecPtr next_lsn
Definition: xlogreader.h:165
struct DecodedXLogRecord * next
Definition: xlogreader.h:161
TransactionId toplevel_xid
Definition: xlogreader.h:168
uint32 main_data_len
Definition: xlogreader.h:170
RepOriginId record_origin
Definition: xlogreader.h:167
DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER]
Definition: xlogreader.h:172
XLogRecPtr lsn
Definition: xlogreader.h:164
FullTransactionId nextXid
Definition: transam.h:220
XLogSegNo ws_segno
Definition: xlogreader.h:48
TimeLineID ws_tli
Definition: xlogreader.h:49
WALOpenSegment wre_seg
Definition: xlogreader.h:388
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:55
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
DecodedXLogRecord * record
Definition: xlogreader.h:236
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:206
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
pg_crc32c xl_crc
Definition: xlogrecord.h:49
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
RmgrId xl_rmid
Definition: xlogrecord.h:47
Definition: regguts.h:323
struct state * next
Definition: regguts.h:332
#define InvalidTransactionId
Definition: transam.h:31
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
#define XidFromFullTransactionId(x)
Definition: transam.h:48
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
char data[BLCKSZ]
Definition: c.h:1108
TransamVariablesData * TransamVariables
Definition: varsup.c:34
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:88
static void pgstat_report_wait_end(void)
Definition: wait_event.h:104
static const unsigned __int64 epoch
int wal_segment_size
Definition: xlog.c:147
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
#define MAXFNAMELEN
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_ALL_FLAGS
Definition: xlog_internal.h:82
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XRecOffIsValid(xlrp)
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
#define XLogPageHeaderSize(hdr)
Definition: xlog_internal.h:84
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint16 RepOriginId
Definition: xlogdefs.h:65
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48
bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum, Buffer *prefetch_buffer)
Definition: xlogreader.c:1999
static XLogPageReadResult XLogDecodeNextRecord(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:530
void XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
Definition: xlogreader.c:92
static void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, int segsize, const char *waldir)
Definition: xlogreader.c:209
static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
Definition: xlogreader.c:1002
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:391
static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2
Definition: xlogreader.c:73
static void static void allocate_recordbuf(XLogReaderState *state, uint32 reclength)
Definition: xlogreader.c:192
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1505
#define MAX_ERRORMSG_LEN
Definition: xlogreader.c:60
DecodedXLogRecord * XLogNextRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:327
void XLogReaderResetError(XLogReaderState *state)
Definition: xlogreader.c:1367
static void XLogReaderInvalReadState(XLogReaderState *state)
Definition: xlogreader.c:1115
#define COPY_HEADER_FIELD(_dst, _size)
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:1226
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:2169
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:163
void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1973
DecodedXLogRecord * XLogReadAhead(XLogReaderState *state, bool nonblocking)
Definition: xlogreader.c:968
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:108
static void ResetDecoder(XLogReaderState *state)
Definition: xlogreader.c:1597
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1664
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
Definition: xlogreader.c:1195
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:2027
#define DEFAULT_DECODE_BUFFER_SIZE
Definition: xlogreader.c:66
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1631
XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:1385
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:233
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:2058
static DecodedXLogRecord * XLogReadRecordAlloc(XLogReaderState *state, size_t xl_tot_len, bool allow_oversized)
Definition: xlogreader.c:440
XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state)
Definition: xlogreader.c:251
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess)
Definition: xlogreader.c:1129
static bool XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
Definition: xlogreader.h:325
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:412
XLogPageReadResult
Definition: xlogreader.h:350
@ XLREAD_WOULDBLOCK
Definition: xlogreader.h:353
@ XLREAD_SUCCESS
Definition: xlogreader.h:351
@ XLREAD_FAIL
Definition: xlogreader.h:352
#define XLogRecHasBlockRef(decoder, block_id)
Definition: xlogreader.h:420
#define BKPIMAGE_COMPRESS_ZSTD
Definition: xlogrecord.h:162
#define BKPBLOCK_FORK_MASK
Definition: xlogrecord.h:195
#define BKPBLOCK_HAS_DATA
Definition: xlogrecord.h:198
#define BKPIMAGE_APPLY
Definition: xlogrecord.h:158
#define BKPIMAGE_HAS_HOLE
Definition: xlogrecord.h:157
#define XLR_BLOCK_ID_DATA_LONG
Definition: xlogrecord.h:242
#define BKPIMAGE_COMPRESS_LZ4
Definition: xlogrecord.h:161
#define BKPIMAGE_COMPRESSED(info)
Definition: xlogrecord.h:164
#define XLR_BLOCK_ID_TOPLEVEL_XID
Definition: xlogrecord.h:244
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define XLR_MAX_BLOCK_ID
Definition: xlogrecord.h:239
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
#define BKPBLOCK_SAME_REL
Definition: xlogrecord.h:200
#define BKPIMAGE_COMPRESS_PGLZ
Definition: xlogrecord.h:160
#define XLR_BLOCK_ID_ORIGIN
Definition: xlogrecord.h:243
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
#define BKPBLOCK_HAS_IMAGE
Definition: xlogrecord.h:197
static uint32 readOff
Definition: xlogrecovery.c:233
static uint32 readLen
Definition: xlogrecovery.c:234