PostgreSQL Source Code  git master
xlogreader.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * xlogreader.c
4  * Generic XLog reading facility
5  *
6  * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/access/transam/xlogreader.c
10  *
11  * NOTES
12  * See xlogreader.h for more notes on this facility.
13  *
14  * This file is compiled as both front-end and backend code, so it
15  * may not use ereport, server-defined static variables, etc.
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <unistd.h>
21 
22 #include "access/transam.h"
23 #include "access/xlog_internal.h"
24 #include "access/xlogreader.h"
25 #include "access/xlogrecord.h"
26 #include "catalog/pg_control.h"
27 #include "common/pg_lzcompress.h"
28 #include "replication/origin.h"
29 
30 #ifndef FRONTEND
31 #include "miscadmin.h"
32 #include "pgstat.h"
33 #include "utils/memutils.h"
34 #endif
35 
36 static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
37  pg_attribute_printf(2, 3);
38 static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
39 static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr,
40  int reqLen);
41 static void XLogReaderInvalReadState(XLogReaderState *state);
42 static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
43  XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
44 static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
45  XLogRecPtr recptr);
46 static void ResetDecoder(XLogReaderState *state);
47 
48 /* size of the buffer allocated for error message. */
49 #define MAX_ERRORMSG_LEN 1000
50 
51 /*
52  * Construct a string in state->errormsg_buf explaining what's wrong with
53  * the current record being read.
54  */
55 static void
56 report_invalid_record(XLogReaderState *state, const char *fmt,...)
57 {
58  va_list args;
59 
60  fmt = _(fmt);
61 
62  va_start(args, fmt);
63  vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args);
64  va_end(args);
65 }
66 
67 /*
68  * Allocate and initialize a new XLogReader.
69  *
70  * Returns NULL if the xlogreader couldn't be allocated.
71  */
73 XLogReaderAllocate(int wal_segment_size, const char *waldir,
74  XLogPageReadCB pagereadfunc, void *private_data)
75 {
76  XLogReaderState *state;
77 
78  state = (XLogReaderState *)
81  if (!state)
82  return NULL;
83 
84  state->max_block_id = -1;
85 
86  /*
87  * Permanently allocate readBuf. We do it this way, rather than just
88  * making a static array, for two reasons: (1) no need to waste the
89  * storage in most instantiations of the backend; (2) a static char array
90  * isn't guaranteed to have any particular alignment, whereas
91  * palloc_extended() will provide MAXALIGN'd storage.
92  */
93  state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ,
95  if (!state->readBuf)
96  {
97  pfree(state);
98  return NULL;
99  }
100 
101  /* Initialize segment info. */
102  WALOpenSegmentInit(&state->seg, &state->segcxt, wal_segment_size,
103  waldir);
104 
105  state->read_page = pagereadfunc;
106  /* system_identifier initialized to zeroes above */
107  state->private_data = private_data;
108  /* ReadRecPtr, EndRecPtr and readLen initialized to zeroes above */
111  if (!state->errormsg_buf)
112  {
113  pfree(state->readBuf);
114  pfree(state);
115  return NULL;
116  }
117  state->errormsg_buf[0] = '\0';
118 
119  /*
120  * Allocate an initial readRecordBuf of minimal size, which can later be
121  * enlarged if necessary.
122  */
123  if (!allocate_recordbuf(state, 0))
124  {
125  pfree(state->errormsg_buf);
126  pfree(state->readBuf);
127  pfree(state);
128  return NULL;
129  }
130 
131  return state;
132 }
133 
134 void
136 {
137  int block_id;
138 
139  for (block_id = 0; block_id <= XLR_MAX_BLOCK_ID; block_id++)
140  {
141  if (state->blocks[block_id].data)
142  pfree(state->blocks[block_id].data);
143  }
144  if (state->main_data)
145  pfree(state->main_data);
146 
147  pfree(state->errormsg_buf);
148  if (state->readRecordBuf)
149  pfree(state->readRecordBuf);
150  pfree(state->readBuf);
151  pfree(state);
152 }
153 
154 /*
155  * Allocate readRecordBuf to fit a record of at least the given length.
156  * Returns true if successful, false if out of memory.
157  *
158  * readRecordBufSize is set to the new buffer size.
159  *
160  * To avoid useless small increases, round its size to a multiple of
161  * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start
162  * with. (That is enough for all "normal" records, but very large commit or
163  * abort records might need more space.)
164  */
165 static bool
167 {
168  uint32 newSize = reclength;
169 
170  newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
171  newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
172 
173 #ifndef FRONTEND
174 
175  /*
176  * Note that in much unlucky circumstances, the random data read from a
177  * recycled segment can cause this routine to be called with a size
178  * causing a hard failure at allocation. For a standby, this would cause
179  * the instance to stop suddenly with a hard failure, preventing it to
180  * retry fetching WAL from one of its sources which could allow it to move
181  * on with replay without a manual restart. If the data comes from a past
182  * recycled segment and is still valid, then the allocation may succeed
183  * but record checks are going to fail so this would be short-lived. If
184  * the allocation fails because of a memory shortage, then this is not a
185  * hard failure either per the guarantee given by MCXT_ALLOC_NO_OOM.
186  */
187  if (!AllocSizeIsValid(newSize))
188  return false;
189 
190 #endif
191 
192  if (state->readRecordBuf)
193  pfree(state->readRecordBuf);
194  state->readRecordBuf =
195  (char *) palloc_extended(newSize, MCXT_ALLOC_NO_OOM);
196  if (state->readRecordBuf == NULL)
197  {
198  state->readRecordBufSize = 0;
199  return false;
200  }
201  state->readRecordBufSize = newSize;
202  return true;
203 }
204 
205 /*
206  * Initialize the passed segment structs.
207  */
208 void
210  int segsize, const char *waldir)
211 {
212  seg->ws_file = -1;
213  seg->ws_segno = 0;
214  seg->ws_tli = 0;
215 
216  segcxt->ws_segsize = segsize;
217  if (waldir)
218  snprintf(segcxt->ws_dir, MAXPGPATH, "%s", waldir);
219 }
220 
221 /*
222  * Begin reading WAL at 'RecPtr'.
223  *
224  * 'RecPtr' should point to the beginnning of a valid WAL record. Pointing at
225  * the beginning of a page is also OK, if there is a new record right after
226  * the page header, i.e. not a continuation.
227  *
228  * This does not make any attempt to read the WAL yet, and hence cannot fail.
229  * If the starting address is not correct, the first call to XLogReadRecord()
230  * will error out.
231  */
232 void
234 {
235  Assert(!XLogRecPtrIsInvalid(RecPtr));
236 
237  ResetDecoder(state);
238 
239  /* Begin at the passed-in record pointer. */
240  state->EndRecPtr = RecPtr;
241  state->ReadRecPtr = InvalidXLogRecPtr;
242 }
243 
244 /*
245  * Attempt to read an XLOG record.
246  *
247  * XLogBeginRead() or XLogFindNextRecord() must be called before the first call
248  * to XLogReadRecord().
249  *
250  * If the read_page callback fails to read the requested data, NULL is
251  * returned. The callback is expected to have reported the error; errormsg
252  * is set to NULL.
253  *
254  * If the reading fails for some other reason, NULL is also returned, and
255  * *errormsg is set to a string with details of the failure.
256  *
257  * The returned pointer (or *errormsg) points to an internal buffer that's
258  * valid until the next call to XLogReadRecord.
259  */
260 XLogRecord *
261 XLogReadRecord(XLogReaderState *state, char **errormsg)
262 {
263  XLogRecPtr RecPtr;
264  XLogRecord *record;
265  XLogRecPtr targetPagePtr;
266  bool randAccess;
267  uint32 len,
268  total_len;
269  uint32 targetRecOff;
270  uint32 pageHeaderSize;
271  bool gotheader;
272  int readOff;
273 
274  /*
275  * randAccess indicates whether to verify the previous-record pointer of
276  * the record we're reading. We only do this if we're reading
277  * sequentially, which is what we initially assume.
278  */
279  randAccess = false;
280 
281  /* reset error state */
282  *errormsg = NULL;
283  state->errormsg_buf[0] = '\0';
284 
285  ResetDecoder(state);
286 
287  RecPtr = state->EndRecPtr;
288 
289  if (state->ReadRecPtr != InvalidXLogRecPtr)
290  {
291  /* read the record after the one we just read */
292 
293  /*
294  * EndRecPtr is pointing to end+1 of the previous WAL record. If
295  * we're at a page boundary, no more records can fit on the current
296  * page. We must skip over the page header, but we can't do that until
297  * we've read in the page, since the header size is variable.
298  */
299  }
300  else
301  {
302  /*
303  * Caller supplied a position to start at.
304  *
305  * In this case, EndRecPtr should already be pointing to a valid
306  * record starting position.
307  */
308  Assert(XRecOffIsValid(RecPtr));
309  randAccess = true;
310  }
311 
312  state->currRecPtr = RecPtr;
313 
314  targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
315  targetRecOff = RecPtr % XLOG_BLCKSZ;
316 
317  /*
318  * Read the page containing the record into state->readBuf. Request enough
319  * byte to cover the whole record header, or at least the part of it that
320  * fits on the same page.
321  */
322  readOff = ReadPageInternal(state, targetPagePtr,
323  Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
324  if (readOff < 0)
325  goto err;
326 
327  /*
328  * ReadPageInternal always returns at least the page header, so we can
329  * examine it now.
330  */
331  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
332  if (targetRecOff == 0)
333  {
334  /*
335  * At page start, so skip over page header.
336  */
337  RecPtr += pageHeaderSize;
338  targetRecOff = pageHeaderSize;
339  }
340  else if (targetRecOff < pageHeaderSize)
341  {
342  report_invalid_record(state, "invalid record offset at %X/%X",
343  (uint32) (RecPtr >> 32), (uint32) RecPtr);
344  goto err;
345  }
346 
347  if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
348  targetRecOff == pageHeaderSize)
349  {
350  report_invalid_record(state, "contrecord is requested by %X/%X",
351  (uint32) (RecPtr >> 32), (uint32) RecPtr);
352  goto err;
353  }
354 
355  /* ReadPageInternal has verified the page header */
356  Assert(pageHeaderSize <= readOff);
357 
358  /*
359  * Read the record length.
360  *
361  * NB: Even though we use an XLogRecord pointer here, the whole record
362  * header might not fit on this page. xl_tot_len is the first field of the
363  * struct, so it must be on this page (the records are MAXALIGNed), but we
364  * cannot access any other fields until we've verified that we got the
365  * whole header.
366  */
367  record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);
368  total_len = record->xl_tot_len;
369 
370  /*
371  * If the whole record header is on this page, validate it immediately.
372  * Otherwise do just a basic sanity check on xl_tot_len, and validate the
373  * rest of the header after reading it from the next page. The xl_tot_len
374  * check is necessary here to ensure that we enter the "Need to reassemble
375  * record" code path below; otherwise we might fail to apply
376  * ValidXLogRecordHeader at all.
377  */
378  if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
379  {
380  if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record,
381  randAccess))
382  goto err;
383  gotheader = true;
384  }
385  else
386  {
387  /* XXX: more validation should be done here */
388  if (total_len < SizeOfXLogRecord)
389  {
390  report_invalid_record(state,
391  "invalid record length at %X/%X: wanted %u, got %u",
392  (uint32) (RecPtr >> 32), (uint32) RecPtr,
393  (uint32) SizeOfXLogRecord, total_len);
394  goto err;
395  }
396  gotheader = false;
397  }
398 
399  len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ;
400  if (total_len > len)
401  {
402  /* Need to reassemble record */
403  char *contdata;
404  XLogPageHeader pageHeader;
405  char *buffer;
406  uint32 gotlen;
407 
408  /*
409  * Enlarge readRecordBuf as needed.
410  */
411  if (total_len > state->readRecordBufSize &&
412  !allocate_recordbuf(state, total_len))
413  {
414  /* We treat this as a "bogus data" condition */
415  report_invalid_record(state, "record length %u at %X/%X too long",
416  total_len,
417  (uint32) (RecPtr >> 32), (uint32) RecPtr);
418  goto err;
419  }
420 
421  /* Copy the first fragment of the record from the first page. */
422  memcpy(state->readRecordBuf,
423  state->readBuf + RecPtr % XLOG_BLCKSZ, len);
424  buffer = state->readRecordBuf + len;
425  gotlen = len;
426 
427  do
428  {
429  /* Calculate pointer to beginning of next page */
430  targetPagePtr += XLOG_BLCKSZ;
431 
432  /* Wait for the next page to become available */
433  readOff = ReadPageInternal(state, targetPagePtr,
434  Min(total_len - gotlen + SizeOfXLogShortPHD,
435  XLOG_BLCKSZ));
436 
437  if (readOff < 0)
438  goto err;
439 
440  Assert(SizeOfXLogShortPHD <= readOff);
441 
442  /* Check that the continuation on next page looks valid */
443  pageHeader = (XLogPageHeader) state->readBuf;
444  if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
445  {
446  report_invalid_record(state,
447  "there is no contrecord flag at %X/%X",
448  (uint32) (RecPtr >> 32), (uint32) RecPtr);
449  goto err;
450  }
451 
452  /*
453  * Cross-check that xlp_rem_len agrees with how much of the record
454  * we expect there to be left.
455  */
456  if (pageHeader->xlp_rem_len == 0 ||
457  total_len != (pageHeader->xlp_rem_len + gotlen))
458  {
459  report_invalid_record(state,
460  "invalid contrecord length %u at %X/%X",
461  pageHeader->xlp_rem_len,
462  (uint32) (RecPtr >> 32), (uint32) RecPtr);
463  goto err;
464  }
465 
466  /* Append the continuation from this page to the buffer */
467  pageHeaderSize = XLogPageHeaderSize(pageHeader);
468 
469  if (readOff < pageHeaderSize)
470  readOff = ReadPageInternal(state, targetPagePtr,
471  pageHeaderSize);
472 
473  Assert(pageHeaderSize <= readOff);
474 
475  contdata = (char *) state->readBuf + pageHeaderSize;
476  len = XLOG_BLCKSZ - pageHeaderSize;
477  if (pageHeader->xlp_rem_len < len)
478  len = pageHeader->xlp_rem_len;
479 
480  if (readOff < pageHeaderSize + len)
481  readOff = ReadPageInternal(state, targetPagePtr,
482  pageHeaderSize + len);
483 
484  memcpy(buffer, (char *) contdata, len);
485  buffer += len;
486  gotlen += len;
487 
488  /* If we just reassembled the record header, validate it. */
489  if (!gotheader)
490  {
491  record = (XLogRecord *) state->readRecordBuf;
492  if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr,
493  record, randAccess))
494  goto err;
495  gotheader = true;
496  }
497  } while (gotlen < total_len);
498 
499  Assert(gotheader);
500 
501  record = (XLogRecord *) state->readRecordBuf;
502  if (!ValidXLogRecord(state, record, RecPtr))
503  goto err;
504 
505  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
506  state->ReadRecPtr = RecPtr;
507  state->EndRecPtr = targetPagePtr + pageHeaderSize
508  + MAXALIGN(pageHeader->xlp_rem_len);
509  }
510  else
511  {
512  /* Wait for the record data to become available */
513  readOff = ReadPageInternal(state, targetPagePtr,
514  Min(targetRecOff + total_len, XLOG_BLCKSZ));
515  if (readOff < 0)
516  goto err;
517 
518  /* Record does not cross a page boundary */
519  if (!ValidXLogRecord(state, record, RecPtr))
520  goto err;
521 
522  state->EndRecPtr = RecPtr + MAXALIGN(total_len);
523 
524  state->ReadRecPtr = RecPtr;
525  }
526 
527  /*
528  * Special processing if it's an XLOG SWITCH record
529  */
530  if (record->xl_rmid == RM_XLOG_ID &&
531  (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
532  {
533  /* Pretend it extends to end of segment */
534  state->EndRecPtr += state->segcxt.ws_segsize - 1;
535  state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->segcxt.ws_segsize);
536  }
537 
538  if (DecodeXLogRecord(state, record, errormsg))
539  return record;
540  else
541  return NULL;
542 
543 err:
544 
545  /*
546  * Invalidate the read state. We might read from a different source after
547  * failure.
548  */
550 
551  if (state->errormsg_buf[0] != '\0')
552  *errormsg = state->errormsg_buf;
553 
554  return NULL;
555 }
556 
557 /*
558  * Read a single xlog page including at least [pageptr, reqLen] of valid data
559  * via the read_page() callback.
560  *
561  * Returns -1 if the required page cannot be read for some reason; errormsg_buf
562  * is set in that case (unless the error occurs in the read_page callback).
563  *
564  * We fetch the page from a reader-local cache if we know we have the required
565  * data and if there hasn't been any error since caching the data.
566  */
567 static int
568 ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
569 {
570  int readLen;
571  uint32 targetPageOff;
572  XLogSegNo targetSegNo;
573  XLogPageHeader hdr;
574 
575  Assert((pageptr % XLOG_BLCKSZ) == 0);
576 
577  XLByteToSeg(pageptr, targetSegNo, state->segcxt.ws_segsize);
578  targetPageOff = XLogSegmentOffset(pageptr, state->segcxt.ws_segsize);
579 
580  /* check whether we have all the requested data already */
581  if (targetSegNo == state->seg.ws_segno &&
582  targetPageOff == state->segoff && reqLen <= state->readLen)
583  return state->readLen;
584 
585  /*
586  * Data is not in our buffer.
587  *
588  * Every time we actually read the page, even if we looked at parts of it
589  * before, we need to do verification as the read_page callback might now
590  * be rereading data from a different source.
591  *
592  * Whenever switching to a new WAL segment, we read the first page of the
593  * file and validate its header, even if that's not where the target
594  * record is. This is so that we can check the additional identification
595  * info that is present in the first page's "long" header.
596  */
597  if (targetSegNo != state->seg.ws_segno && targetPageOff != 0)
598  {
599  XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
600 
601  readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
602  state->currRecPtr,
603  state->readBuf);
604  if (readLen < 0)
605  goto err;
606 
607  /* we can be sure to have enough WAL available, we scrolled back */
608  Assert(readLen == XLOG_BLCKSZ);
609 
610  if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
611  state->readBuf))
612  goto err;
613  }
614 
615  /*
616  * First, read the requested data length, but at least a short page header
617  * so that we can validate it.
618  */
619  readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
620  state->currRecPtr,
621  state->readBuf);
622  if (readLen < 0)
623  goto err;
624 
625  Assert(readLen <= XLOG_BLCKSZ);
626 
627  /* Do we have enough data to check the header length? */
628  if (readLen <= SizeOfXLogShortPHD)
629  goto err;
630 
631  Assert(readLen >= reqLen);
632 
633  hdr = (XLogPageHeader) state->readBuf;
634 
635  /* still not enough */
636  if (readLen < XLogPageHeaderSize(hdr))
637  {
638  readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr),
639  state->currRecPtr,
640  state->readBuf);
641  if (readLen < 0)
642  goto err;
643  }
644 
645  /*
646  * Now that we know we have the full header, validate it.
647  */
648  if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
649  goto err;
650 
651  /* update read state information */
652  state->seg.ws_segno = targetSegNo;
653  state->segoff = targetPageOff;
654  state->readLen = readLen;
655 
656  return readLen;
657 
658 err:
660  return -1;
661 }
662 
663 /*
664  * Invalidate the xlogreader's read state to force a re-read.
665  */
666 static void
668 {
669  state->seg.ws_segno = 0;
670  state->segoff = 0;
671  state->readLen = 0;
672 }
673 
674 /*
675  * Validate an XLOG record header.
676  *
677  * This is just a convenience subroutine to avoid duplicated code in
678  * XLogReadRecord. It's not intended for use from anywhere else.
679  */
680 static bool
682  XLogRecPtr PrevRecPtr, XLogRecord *record,
683  bool randAccess)
684 {
685  if (record->xl_tot_len < SizeOfXLogRecord)
686  {
687  report_invalid_record(state,
688  "invalid record length at %X/%X: wanted %u, got %u",
689  (uint32) (RecPtr >> 32), (uint32) RecPtr,
690  (uint32) SizeOfXLogRecord, record->xl_tot_len);
691  return false;
692  }
693  if (record->xl_rmid > RM_MAX_ID)
694  {
695  report_invalid_record(state,
696  "invalid resource manager ID %u at %X/%X",
697  record->xl_rmid, (uint32) (RecPtr >> 32),
698  (uint32) RecPtr);
699  return false;
700  }
701  if (randAccess)
702  {
703  /*
704  * We can't exactly verify the prev-link, but surely it should be less
705  * than the record's own address.
706  */
707  if (!(record->xl_prev < RecPtr))
708  {
709  report_invalid_record(state,
710  "record with incorrect prev-link %X/%X at %X/%X",
711  (uint32) (record->xl_prev >> 32),
712  (uint32) record->xl_prev,
713  (uint32) (RecPtr >> 32), (uint32) RecPtr);
714  return false;
715  }
716  }
717  else
718  {
719  /*
720  * Record's prev-link should exactly match our previous location. This
721  * check guards against torn WAL pages where a stale but valid-looking
722  * WAL record starts on a sector boundary.
723  */
724  if (record->xl_prev != PrevRecPtr)
725  {
726  report_invalid_record(state,
727  "record with incorrect prev-link %X/%X at %X/%X",
728  (uint32) (record->xl_prev >> 32),
729  (uint32) record->xl_prev,
730  (uint32) (RecPtr >> 32), (uint32) RecPtr);
731  return false;
732  }
733  }
734 
735  return true;
736 }
737 
738 
739 /*
740  * CRC-check an XLOG record. We do not believe the contents of an XLOG
741  * record (other than to the minimal extent of computing the amount of
742  * data to read in) until we've checked the CRCs.
743  *
744  * We assume all of the record (that is, xl_tot_len bytes) has been read
745  * into memory at *record. Also, ValidXLogRecordHeader() has accepted the
746  * record's header, which means in particular that xl_tot_len is at least
747  * SizeOfXLogRecord.
748  */
749 static bool
751 {
752  pg_crc32c crc;
753 
754  /* Calculate the CRC */
755  INIT_CRC32C(crc);
756  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
757  /* include the record header last */
758  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
759  FIN_CRC32C(crc);
760 
761  if (!EQ_CRC32C(record->xl_crc, crc))
762  {
763  report_invalid_record(state,
764  "incorrect resource manager data checksum in record at %X/%X",
765  (uint32) (recptr >> 32), (uint32) recptr);
766  return false;
767  }
768 
769  return true;
770 }
771 
772 /*
773  * Validate a page header.
774  *
775  * Check if 'phdr' is valid as the header of the XLog page at position
776  * 'recptr'.
777  */
778 bool
780  char *phdr)
781 {
782  XLogRecPtr recaddr;
783  XLogSegNo segno;
784  int32 offset;
785  XLogPageHeader hdr = (XLogPageHeader) phdr;
786 
787  Assert((recptr % XLOG_BLCKSZ) == 0);
788 
789  XLByteToSeg(recptr, segno, state->segcxt.ws_segsize);
790  offset = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
791 
792  XLogSegNoOffsetToRecPtr(segno, offset, state->segcxt.ws_segsize, recaddr);
793 
794  if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
795  {
796  char fname[MAXFNAMELEN];
797 
798  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
799 
800  report_invalid_record(state,
801  "invalid magic number %04X in log segment %s, offset %u",
802  hdr->xlp_magic,
803  fname,
804  offset);
805  return false;
806  }
807 
808  if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
809  {
810  char fname[MAXFNAMELEN];
811 
812  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
813 
814  report_invalid_record(state,
815  "invalid info bits %04X in log segment %s, offset %u",
816  hdr->xlp_info,
817  fname,
818  offset);
819  return false;
820  }
821 
822  if (hdr->xlp_info & XLP_LONG_HEADER)
823  {
824  XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
825 
826  if (state->system_identifier &&
827  longhdr->xlp_sysid != state->system_identifier)
828  {
829  report_invalid_record(state,
830  "WAL file is from different database system: WAL file database system identifier is %llu, pg_control database system identifier is %llu",
831  (unsigned long long) longhdr->xlp_sysid,
832  (unsigned long long) state->system_identifier);
833  return false;
834  }
835  else if (longhdr->xlp_seg_size != state->segcxt.ws_segsize)
836  {
837  report_invalid_record(state,
838  "WAL file is from different database system: incorrect segment size in page header");
839  return false;
840  }
841  else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ)
842  {
843  report_invalid_record(state,
844  "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header");
845  return false;
846  }
847  }
848  else if (offset == 0)
849  {
850  char fname[MAXFNAMELEN];
851 
852  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
853 
854  /* hmm, first page of file doesn't have a long header? */
855  report_invalid_record(state,
856  "invalid info bits %04X in log segment %s, offset %u",
857  hdr->xlp_info,
858  fname,
859  offset);
860  return false;
861  }
862 
863  /*
864  * Check that the address on the page agrees with what we expected. This
865  * check typically fails when an old WAL segment is recycled, and hasn't
866  * yet been overwritten with new data yet.
867  */
868  if (hdr->xlp_pageaddr != recaddr)
869  {
870  char fname[MAXFNAMELEN];
871 
872  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
873 
874  report_invalid_record(state,
875  "unexpected pageaddr %X/%X in log segment %s, offset %u",
876  (uint32) (hdr->xlp_pageaddr >> 32), (uint32) hdr->xlp_pageaddr,
877  fname,
878  offset);
879  return false;
880  }
881 
882  /*
883  * Since child timelines are always assigned a TLI greater than their
884  * immediate parent's TLI, we should never see TLI go backwards across
885  * successive pages of a consistent WAL sequence.
886  *
887  * Sometimes we re-read a segment that's already been (partially) read. So
888  * we only verify TLIs for pages that are later than the last remembered
889  * LSN.
890  */
891  if (recptr > state->latestPagePtr)
892  {
893  if (hdr->xlp_tli < state->latestPageTLI)
894  {
895  char fname[MAXFNAMELEN];
896 
897  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
898 
899  report_invalid_record(state,
900  "out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u",
901  hdr->xlp_tli,
902  state->latestPageTLI,
903  fname,
904  offset);
905  return false;
906  }
907  }
908  state->latestPagePtr = recptr;
909  state->latestPageTLI = hdr->xlp_tli;
910 
911  return true;
912 }
913 
914 #ifdef FRONTEND
915 /*
916  * Functions that are currently not needed in the backend, but are better
917  * implemented inside xlogreader.c because of the internal facilities available
918  * here.
919  */
920 
921 /*
922  * Find the first record with an lsn >= RecPtr.
923  *
924  * This is different from XLogBeginRead() in that RecPtr doesn't need to point
925  * to a valid record boundary. Useful for checking whether RecPtr is a valid
926  * xlog address for reading, and to find the first valid address after some
927  * address when dumping records for debugging purposes.
928  *
929  * This positions the reader, like XLogBeginRead(), so that the next call to
930  * XLogReadRecord() will read the next valid record.
931  */
933 XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
934 {
935  XLogRecPtr tmpRecPtr;
938  char *errormsg;
939 
940  Assert(!XLogRecPtrIsInvalid(RecPtr));
941 
942  /*
943  * skip over potential continuation data, keeping in mind that it may span
944  * multiple pages
945  */
946  tmpRecPtr = RecPtr;
947  while (true)
948  {
949  XLogRecPtr targetPagePtr;
950  int targetRecOff;
951  uint32 pageHeaderSize;
952  int readLen;
953 
954  /*
955  * Compute targetRecOff. It should typically be equal or greater than
956  * short page-header since a valid record can't start anywhere before
957  * that, except when caller has explicitly specified the offset that
958  * falls somewhere there or when we are skipping multi-page
959  * continuation record. It doesn't matter though because
960  * ReadPageInternal() is prepared to handle that and will read at
961  * least short page-header worth of data
962  */
963  targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
964 
965  /* scroll back to page boundary */
966  targetPagePtr = tmpRecPtr - targetRecOff;
967 
968  /* Read the page containing the record */
969  readLen = ReadPageInternal(state, targetPagePtr, targetRecOff);
970  if (readLen < 0)
971  goto err;
972 
973  header = (XLogPageHeader) state->readBuf;
974 
975  pageHeaderSize = XLogPageHeaderSize(header);
976 
977  /* make sure we have enough data for the page header */
978  readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize);
979  if (readLen < 0)
980  goto err;
981 
982  /* skip over potential continuation data */
983  if (header->xlp_info & XLP_FIRST_IS_CONTRECORD)
984  {
985  /*
986  * If the length of the remaining continuation data is more than
987  * what can fit in this page, the continuation record crosses over
988  * this page. Read the next page and try again. xlp_rem_len in the
989  * next page header will contain the remaining length of the
990  * continuation data
991  *
992  * Note that record headers are MAXALIGN'ed
993  */
994  if (MAXALIGN(header->xlp_rem_len) >= (XLOG_BLCKSZ - pageHeaderSize))
995  tmpRecPtr = targetPagePtr + XLOG_BLCKSZ;
996  else
997  {
998  /*
999  * The previous continuation record ends in this page. Set
1000  * tmpRecPtr to point to the first valid record
1001  */
1002  tmpRecPtr = targetPagePtr + pageHeaderSize
1003  + MAXALIGN(header->xlp_rem_len);
1004  break;
1005  }
1006  }
1007  else
1008  {
1009  tmpRecPtr = targetPagePtr + pageHeaderSize;
1010  break;
1011  }
1012  }
1013 
1014  /*
1015  * we know now that tmpRecPtr is an address pointing to a valid XLogRecord
1016  * because either we're at the first record after the beginning of a page
1017  * or we just jumped over the remaining data of a continuation.
1018  */
1019  XLogBeginRead(state, tmpRecPtr);
1020  while (XLogReadRecord(state, &errormsg) != NULL)
1021  {
1022  /* past the record we've found, break out */
1023  if (RecPtr <= state->ReadRecPtr)
1024  {
1025  /* Rewind the reader to the beginning of the last record. */
1026  found = state->ReadRecPtr;
1027  XLogBeginRead(state, found);
1028  return found;
1029  }
1030  }
1031 
1032 err:
1033  XLogReaderInvalReadState(state);
1034 
1035  return InvalidXLogRecPtr;
1036 }
1037 
1038 #endif /* FRONTEND */
1039 
1040 /*
1041  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
1042  * fetched from timeline 'tli'.
1043  *
1044  * 'seg/segcxt' identify the last segment used. 'openSegment' is a callback
1045  * to open the next segment, if necessary.
1046  *
1047  * Returns true if succeeded, false if an error occurs, in which case
1048  * 'errinfo' receives error details.
1049  *
1050  * XXX probably this should be improved to suck data directly from the
1051  * WAL buffers when possible.
1052  */
1053 bool
1055  WALOpenSegment *seg, WALSegmentContext *segcxt,
1056  WALSegmentOpen openSegment, WALReadError *errinfo)
1057 {
1058  char *p;
1059  XLogRecPtr recptr;
1060  Size nbytes;
1061 
1062  p = buf;
1063  recptr = startptr;
1064  nbytes = count;
1065 
1066  while (nbytes > 0)
1067  {
1068  uint32 startoff;
1069  int segbytes;
1070  int readbytes;
1071 
1072  startoff = XLogSegmentOffset(recptr, segcxt->ws_segsize);
1073 
1074  /*
1075  * If the data we want is not in a segment we have open, close what we
1076  * have (if anything) and open the next one, using the caller's
1077  * provided openSegment callback.
1078  */
1079  if (seg->ws_file < 0 ||
1080  !XLByteInSeg(recptr, seg->ws_segno, segcxt->ws_segsize) ||
1081  tli != seg->ws_tli)
1082  {
1083  XLogSegNo nextSegNo;
1084 
1085  if (seg->ws_file >= 0)
1086  close(seg->ws_file);
1087 
1088  XLByteToSeg(recptr, nextSegNo, segcxt->ws_segsize);
1089  seg->ws_file = openSegment(nextSegNo, segcxt, &tli);
1090 
1091  /* Update the current segment info. */
1092  seg->ws_tli = tli;
1093  seg->ws_segno = nextSegNo;
1094  }
1095 
1096  /* How many bytes are within this segment? */
1097  if (nbytes > (segcxt->ws_segsize - startoff))
1098  segbytes = segcxt->ws_segsize - startoff;
1099  else
1100  segbytes = nbytes;
1101 
1102 #ifndef FRONTEND
1104 #endif
1105 
1106  /* Reset errno first; eases reporting non-errno-affecting errors */
1107  errno = 0;
1108  readbytes = pg_pread(seg->ws_file, p, segbytes, (off_t) startoff);
1109 
1110 #ifndef FRONTEND
1112 #endif
1113 
1114  if (readbytes <= 0)
1115  {
1116  errinfo->wre_errno = errno;
1117  errinfo->wre_req = segbytes;
1118  errinfo->wre_read = readbytes;
1119  errinfo->wre_off = startoff;
1120  errinfo->wre_seg = *seg;
1121  return false;
1122  }
1123 
1124  /* Update state for read */
1125  recptr += readbytes;
1126  nbytes -= readbytes;
1127  p += readbytes;
1128  }
1129 
1130  return true;
1131 }
1132 
1133 /* ----------------------------------------
1134  * Functions for decoding the data and block references in a record.
1135  * ----------------------------------------
1136  */
1137 
1138 /* private function to reset the state between records */
1139 static void
1141 {
1142  int block_id;
1143 
1144  state->decoded_record = NULL;
1145 
1146  state->main_data_len = 0;
1147 
1148  for (block_id = 0; block_id <= state->max_block_id; block_id++)
1149  {
1150  state->blocks[block_id].in_use = false;
1151  state->blocks[block_id].has_image = false;
1152  state->blocks[block_id].has_data = false;
1153  state->blocks[block_id].apply_image = false;
1154  }
1155  state->max_block_id = -1;
1156 }
1157 
1158 /*
1159  * Decode the previously read record.
1160  *
1161  * On error, a human-readable error message is returned in *errormsg, and
1162  * the return value is false.
1163  */
1164 bool
1165 DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
1166 {
1167  /*
1168  * read next _size bytes from record buffer, but check for overrun first.
1169  */
1170 #define COPY_HEADER_FIELD(_dst, _size) \
1171  do { \
1172  if (remaining < _size) \
1173  goto shortdata_err; \
1174  memcpy(_dst, ptr, _size); \
1175  ptr += _size; \
1176  remaining -= _size; \
1177  } while(0)
1178 
1179  char *ptr;
1180  uint32 remaining;
1181  uint32 datatotal;
1182  RelFileNode *rnode = NULL;
1183  uint8 block_id;
1184 
1185  ResetDecoder(state);
1186 
1187  state->decoded_record = record;
1189 
1190  ptr = (char *) record;
1191  ptr += SizeOfXLogRecord;
1192  remaining = record->xl_tot_len - SizeOfXLogRecord;
1193 
1194  /* Decode the headers */
1195  datatotal = 0;
1196  while (remaining > datatotal)
1197  {
1198  COPY_HEADER_FIELD(&block_id, sizeof(uint8));
1199 
1200  if (block_id == XLR_BLOCK_ID_DATA_SHORT)
1201  {
1202  /* XLogRecordDataHeaderShort */
1203  uint8 main_data_len;
1204 
1205  COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
1206 
1207  state->main_data_len = main_data_len;
1208  datatotal += main_data_len;
1209  break; /* by convention, the main data fragment is
1210  * always last */
1211  }
1212  else if (block_id == XLR_BLOCK_ID_DATA_LONG)
1213  {
1214  /* XLogRecordDataHeaderLong */
1215  uint32 main_data_len;
1216 
1217  COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
1218  state->main_data_len = main_data_len;
1219  datatotal += main_data_len;
1220  break; /* by convention, the main data fragment is
1221  * always last */
1222  }
1223  else if (block_id == XLR_BLOCK_ID_ORIGIN)
1224  {
1225  COPY_HEADER_FIELD(&state->record_origin, sizeof(RepOriginId));
1226  }
1227  else if (block_id <= XLR_MAX_BLOCK_ID)
1228  {
1229  /* XLogRecordBlockHeader */
1230  DecodedBkpBlock *blk;
1231  uint8 fork_flags;
1232 
1233  if (block_id <= state->max_block_id)
1234  {
1235  report_invalid_record(state,
1236  "out-of-order block_id %u at %X/%X",
1237  block_id,
1238  (uint32) (state->ReadRecPtr >> 32),
1239  (uint32) state->ReadRecPtr);
1240  goto err;
1241  }
1242  state->max_block_id = block_id;
1243 
1244  blk = &state->blocks[block_id];
1245  blk->in_use = true;
1246  blk->apply_image = false;
1247 
1248  COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
1249  blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
1250  blk->flags = fork_flags;
1251  blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
1252  blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
1253 
1254  COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
1255  /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
1256  if (blk->has_data && blk->data_len == 0)
1257  {
1258  report_invalid_record(state,
1259  "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
1260  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1261  goto err;
1262  }
1263  if (!blk->has_data && blk->data_len != 0)
1264  {
1265  report_invalid_record(state,
1266  "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
1267  (unsigned int) blk->data_len,
1268  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1269  goto err;
1270  }
1271  datatotal += blk->data_len;
1272 
1273  if (blk->has_image)
1274  {
1275  COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
1276  COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
1277  COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
1278 
1279  blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
1280 
1281  if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED)
1282  {
1283  if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
1284  COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
1285  else
1286  blk->hole_length = 0;
1287  }
1288  else
1289  blk->hole_length = BLCKSZ - blk->bimg_len;
1290  datatotal += blk->bimg_len;
1291 
1292  /*
1293  * cross-check that hole_offset > 0, hole_length > 0 and
1294  * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
1295  */
1296  if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1297  (blk->hole_offset == 0 ||
1298  blk->hole_length == 0 ||
1299  blk->bimg_len == BLCKSZ))
1300  {
1301  report_invalid_record(state,
1302  "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
1303  (unsigned int) blk->hole_offset,
1304  (unsigned int) blk->hole_length,
1305  (unsigned int) blk->bimg_len,
1306  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1307  goto err;
1308  }
1309 
1310  /*
1311  * cross-check that hole_offset == 0 and hole_length == 0 if
1312  * the HAS_HOLE flag is not set.
1313  */
1314  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1315  (blk->hole_offset != 0 || blk->hole_length != 0))
1316  {
1317  report_invalid_record(state,
1318  "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
1319  (unsigned int) blk->hole_offset,
1320  (unsigned int) blk->hole_length,
1321  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1322  goto err;
1323  }
1324 
1325  /*
1326  * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED
1327  * flag is set.
1328  */
1329  if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
1330  blk->bimg_len == BLCKSZ)
1331  {
1332  report_invalid_record(state,
1333  "BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
1334  (unsigned int) blk->bimg_len,
1335  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1336  goto err;
1337  }
1338 
1339  /*
1340  * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor
1341  * IS_COMPRESSED flag is set.
1342  */
1343  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1344  !(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
1345  blk->bimg_len != BLCKSZ)
1346  {
1347  report_invalid_record(state,
1348  "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
1349  (unsigned int) blk->data_len,
1350  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1351  goto err;
1352  }
1353  }
1354  if (!(fork_flags & BKPBLOCK_SAME_REL))
1355  {
1356  COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
1357  rnode = &blk->rnode;
1358  }
1359  else
1360  {
1361  if (rnode == NULL)
1362  {
1363  report_invalid_record(state,
1364  "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
1365  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1366  goto err;
1367  }
1368 
1369  blk->rnode = *rnode;
1370  }
1371  COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
1372  }
1373  else
1374  {
1375  report_invalid_record(state,
1376  "invalid block_id %u at %X/%X",
1377  block_id,
1378  (uint32) (state->ReadRecPtr >> 32),
1379  (uint32) state->ReadRecPtr);
1380  goto err;
1381  }
1382  }
1383 
1384  if (remaining != datatotal)
1385  goto shortdata_err;
1386 
1387  /*
1388  * Ok, we've parsed the fragment headers, and verified that the total
1389  * length of the payload in the fragments is equal to the amount of data
1390  * left. Copy the data of each fragment to a separate buffer.
1391  *
1392  * We could just set up pointers into readRecordBuf, but we want to align
1393  * the data for the convenience of the callers. Backup images are not
1394  * copied, however; they don't need alignment.
1395  */
1396 
1397  /* block data first */
1398  for (block_id = 0; block_id <= state->max_block_id; block_id++)
1399  {
1400  DecodedBkpBlock *blk = &state->blocks[block_id];
1401 
1402  if (!blk->in_use)
1403  continue;
1404 
1405  Assert(blk->has_image || !blk->apply_image);
1406 
1407  if (blk->has_image)
1408  {
1409  blk->bkp_image = ptr;
1410  ptr += blk->bimg_len;
1411  }
1412  if (blk->has_data)
1413  {
1414  if (!blk->data || blk->data_len > blk->data_bufsz)
1415  {
1416  if (blk->data)
1417  pfree(blk->data);
1418 
1419  /*
1420  * Force the initial request to be BLCKSZ so that we don't
1421  * waste time with lots of trips through this stanza as a
1422  * result of WAL compression.
1423  */
1424  blk->data_bufsz = MAXALIGN(Max(blk->data_len, BLCKSZ));
1425  blk->data = palloc(blk->data_bufsz);
1426  }
1427  memcpy(blk->data, ptr, blk->data_len);
1428  ptr += blk->data_len;
1429  }
1430  }
1431 
1432  /* and finally, the main data */
1433  if (state->main_data_len > 0)
1434  {
1435  if (!state->main_data || state->main_data_len > state->main_data_bufsz)
1436  {
1437  if (state->main_data)
1438  pfree(state->main_data);
1439 
1440  /*
1441  * main_data_bufsz must be MAXALIGN'ed. In many xlog record
1442  * types, we omit trailing struct padding on-disk to save a few
1443  * bytes; but compilers may generate accesses to the xlog struct
1444  * that assume that padding bytes are present. If the palloc
1445  * request is not large enough to include such padding bytes then
1446  * we'll get valgrind complaints due to otherwise-harmless fetches
1447  * of the padding bytes.
1448  *
1449  * In addition, force the initial request to be reasonably large
1450  * so that we don't waste time with lots of trips through this
1451  * stanza. BLCKSZ / 2 seems like a good compromise choice.
1452  */
1453  state->main_data_bufsz = MAXALIGN(Max(state->main_data_len,
1454  BLCKSZ / 2));
1455  state->main_data = palloc(state->main_data_bufsz);
1456  }
1457  memcpy(state->main_data, ptr, state->main_data_len);
1458  ptr += state->main_data_len;
1459  }
1460 
1461  return true;
1462 
1463 shortdata_err:
1464  report_invalid_record(state,
1465  "record with invalid length at %X/%X",
1466  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1467 err:
1468  *errormsg = state->errormsg_buf;
1469 
1470  return false;
1471 }
1472 
1473 /*
1474  * Returns information about the block that a block reference refers to.
1475  *
1476  * If the WAL record contains a block reference with the given ID, *rnode,
1477  * *forknum, and *blknum are filled in (if not NULL), and returns true.
1478  * Otherwise returns false.
1479  */
1480 bool
1482  RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
1483 {
1484  DecodedBkpBlock *bkpb;
1485 
1486  if (!record->blocks[block_id].in_use)
1487  return false;
1488 
1489  bkpb = &record->blocks[block_id];
1490  if (rnode)
1491  *rnode = bkpb->rnode;
1492  if (forknum)
1493  *forknum = bkpb->forknum;
1494  if (blknum)
1495  *blknum = bkpb->blkno;
1496  return true;
1497 }
1498 
1499 /*
1500  * Returns the data associated with a block reference, or NULL if there is
1501  * no data (e.g. because a full-page image was taken instead). The returned
1502  * pointer points to a MAXALIGNed buffer.
1503  */
1504 char *
1506 {
1507  DecodedBkpBlock *bkpb;
1508 
1509  if (!record->blocks[block_id].in_use)
1510  return NULL;
1511 
1512  bkpb = &record->blocks[block_id];
1513 
1514  if (!bkpb->has_data)
1515  {
1516  if (len)
1517  *len = 0;
1518  return NULL;
1519  }
1520  else
1521  {
1522  if (len)
1523  *len = bkpb->data_len;
1524  return bkpb->data;
1525  }
1526 }
1527 
1528 /*
1529  * Restore a full-page image from a backup block attached to an XLOG record.
1530  *
1531  * Returns the buffer number containing the page.
1532  */
1533 bool
1534 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
1535 {
1536  DecodedBkpBlock *bkpb;
1537  char *ptr;
1538  PGAlignedBlock tmp;
1539 
1540  if (!record->blocks[block_id].in_use)
1541  return false;
1542  if (!record->blocks[block_id].has_image)
1543  return false;
1544 
1545  bkpb = &record->blocks[block_id];
1546  ptr = bkpb->bkp_image;
1547 
1548  if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
1549  {
1550  /* If a backup block image is compressed, decompress it */
1551  if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
1552  BLCKSZ - bkpb->hole_length, true) < 0)
1553  {
1554  report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
1555  (uint32) (record->ReadRecPtr >> 32),
1556  (uint32) record->ReadRecPtr,
1557  block_id);
1558  return false;
1559  }
1560  ptr = tmp.data;
1561  }
1562 
1563  /* generate page, taking into account hole if necessary */
1564  if (bkpb->hole_length == 0)
1565  {
1566  memcpy(page, ptr, BLCKSZ);
1567  }
1568  else
1569  {
1570  memcpy(page, ptr, bkpb->hole_offset);
1571  /* must zero-fill the hole */
1572  MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
1573  memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
1574  ptr + bkpb->hole_offset,
1575  BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
1576  }
1577 
1578  return true;
1579 }
1580 
1581 #ifndef FRONTEND
1582 
1583 /*
1584  * Extract the FullTransactionId from a WAL record.
1585  */
1588 {
1589  TransactionId xid,
1590  next_xid;
1591  uint32 epoch;
1592 
1593  /*
1594  * This function is only safe during replay, because it depends on the
1595  * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
1596  */
1598 
1599  xid = XLogRecGetXid(record);
1602 
1603  /*
1604  * If xid is numerically greater than next_xid, it has to be from the
1605  * last epoch.
1606  */
1607  if (unlikely(xid > next_xid))
1608  --epoch;
1609 
1610  return FullTransactionIdFromEpochAndXid(epoch, xid);
1611 }
1612 
1613 #endif
int remaining
Definition: informix.c:667
WALOpenSegment wre_seg
Definition: xlogreader.h:269
BlockNumber blkno
Definition: xlogreader.h:68
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define AmStartupProcess()
Definition: miscadmin.h:410
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:47
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
#define BKPIMAGE_HAS_HOLE
Definition: xlogrecord.h:146
XLogPageReadCB read_page
Definition: xlogreader.h:116
char * readRecordBuf
Definition: xlogreader.h:207
uint32 TimeLineID
Definition: xlogdefs.h:52
int wal_segment_size
Definition: xlog.c:112
uint32 TransactionId
Definition: c.h:513
#define XLogPageHeaderSize(hdr)
Definition: xlog_internal.h:85
#define XLR_BLOCK_ID_DATA_LONG
Definition: xlogrecord.h:224
static XLogRecPtr ReadRecPtr
Definition: xlog.c:828
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define Min(x, y)
Definition: c.h:920
uint16 hole_offset
Definition: xlogreader.h:77
unsigned char uint8
Definition: c.h:365
uint16 RepOriginId
Definition: xlogdefs.h:58
void * palloc_extended(Size size, int flags)
Definition: mcxt.c:1013
static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2
Definition: xlogreader.c:56
void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, int segsize, const char *waldir)
Definition: xlogreader.c:209
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:18
#define MemSet(start, val, len)
Definition: c.h:971
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
uint32 BlockNumber
Definition: block.h:31
FullTransactionId nextFullXid
Definition: transam.h:164
void * private_data
Definition: xlogreader.h:127
uint16 bimg_len
Definition: xlogreader.h:79
bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
Definition: xlogreader.c:1165
static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
Definition: xlogreader.c:568
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
#define MAX_ERRORMSG_LEN
Definition: xlogreader.c:49
signed int int32
Definition: c.h:355
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:779
#define pg_attribute_printf(f, a)
Definition: c.h:130
XLogRecPtr EndRecPtr
Definition: xlogreader.h:135
#define XidFromFullTransactionId(x)
Definition: transam.h:48
ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset)
Definition: pread.c:27
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
int(* WALSegmentOpen)(XLogSegNo nextSegNo, WALSegmentContext *segcxt, TimeLineID *tli_p)
Definition: xlogreader.h:238
WALOpenSegment seg
Definition: xlogreader.h:172
char data[BLCKSZ]
Definition: c.h:1100
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:261
XLogRecPtr latestPagePtr
Definition: xlogreader.h:179
static uint32 readOff
Definition: xlog.c:795
uint16 hole_length
Definition: xlogreader.h:78
static void XLogReaderInvalReadState(XLogReaderState *state)
Definition: xlogreader.c:667
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
uint32 main_data_len
Definition: xlogreader.h:148
#define vsnprintf
Definition: port.h:191
#define MAXPGPATH
static void static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength)
Definition: xlogreader.c:166
#define BKPIMAGE_APPLY
Definition: xlogrecord.h:148
static char * buf
Definition: pg_test_fsync.c:67
bool IsUnderPostmaster
Definition: globals.c:109
uint64 XLogSegNo
Definition: xlogdefs.h:41
bool WALRead(char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALOpenSegment *seg, WALSegmentContext *segcxt, WALSegmentOpen openSegment, WALReadError *errinfo)
Definition: xlogreader.c:1054
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:134
XLogRecord * decoded_record
Definition: xlogreader.h:145
XLogSegNo ws_segno
Definition: xlogreader.h:40
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define COPY_HEADER_FIELD(_dst, _size)
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:233
unsigned int uint32
Definition: c.h:367
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1344
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define AllocSizeIsValid(size)
Definition: memutils.h:42
ForkNumber
Definition: relpath.h:40
TimeLineID xlp_tli
Definition: xlog_internal.h:40
#define XLR_MAX_BLOCK_ID
Definition: xlogrecord.h:221
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 readRecordBufSize
Definition: xlogreader.h:208
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:135
#define MAXFNAMELEN
#define RM_MAX_ID
Definition: rmgr.h:33
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1481
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:286
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1505
#define BKPBLOCK_SAME_REL
Definition: xlogrecord.h:183
#define BKPIMAGE_IS_COMPRESSED
Definition: xlogrecord.h:147
#define BKPBLOCK_HAS_IMAGE
Definition: xlogrecord.h:180
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
ForkNumber forknum
Definition: xlogreader.h:67
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
#define XLP_ALL_FLAGS
Definition: xlog_internal.h:83
uint16 data_len
Definition: xlogreader.h:85
XLogRecPtr currRecPtr
Definition: xlogreader.h:183
#define Max(x, y)
Definition: c.h:914
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
Definition: regguts.h:298
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess)
Definition: xlogreader.c:681
uint16 data_bufsz
Definition: xlogreader.h:86
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:19
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
size_t Size
Definition: c.h:466
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1320
uint8 xl_info
Definition: xlogrecord.h:46
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:1587
#define XLR_BLOCK_ID_ORIGIN
Definition: xlogrecord.h:225
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:77
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:65
#define MAXALIGN(LEN)
Definition: c.h:691
#define XLOG_SWITCH
Definition: pg_control.h:71
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:208
TimeLineID ws_tli
Definition: xlogreader.h:41
#define InvalidRepOriginId
Definition: origin.h:33
char * bkp_image
Definition: xlogreader.h:76
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:1534
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:223
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogPageReadCB pagereadfunc, void *private_data)
Definition: xlogreader.c:73
uint32 main_data_bufsz
Definition: xlogreader.h:149
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
Definition: xlogreader.c:750
#define BKPBLOCK_FORK_MASK
Definition: xlogrecord.h:178
#define XRecOffIsValid(xlrp)
void * palloc(Size size)
Definition: mcxt.c:949
static const unsigned __int64 epoch
Definition: gettimeofday.c:34
#define unlikely(x)
Definition: c.h:206
uint64 system_identifier
Definition: xlogreader.h:122
WALSegmentContext segcxt
Definition: xlogreader.h:171
int(* XLogPageReadCB)(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
Definition: xlogreader.h:54
char * errormsg_buf
Definition: xlogreader.h:211
#define close(a)
Definition: win32.h:12
char * main_data
Definition: xlogreader.h:147
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
TimeLineID latestPageTLI
Definition: xlogreader.h:180
RelFileNode rnode
Definition: xlogreader.h:66
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define snprintf
Definition: port.h:192
static uint32 readLen
Definition: xlog.c:796
#define _(x)
Definition: elog.c:87
RepOriginId record_origin
Definition: xlogreader.h:151
static void ResetDecoder(XLogReaderState *state)
Definition: xlogreader.c:1140
#define offsetof(type, field)
Definition: c.h:661
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID+1]
Definition: xlogreader.h:154
static XLogRecPtr startptr
Definition: basebackup.c:116
#define BKPBLOCK_HAS_DATA
Definition: xlogrecord.h:181
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)