PostgreSQL Source Code  git master
xlogreader.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * xlogreader.c
4  * Generic XLog reading facility
5  *
6  * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/access/transam/xlogreader.c
10  *
11  * NOTES
12  * See xlogreader.h for more notes on this facility.
13  *
14  * This file is compiled as both front-end and backend code, so it
15  * may not use ereport, server-defined static variables, etc.
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <unistd.h>
21 
22 #include "access/transam.h"
23 #include "access/xlog_internal.h"
24 #include "access/xlogreader.h"
25 #include "access/xlogrecord.h"
26 #include "catalog/pg_control.h"
27 #include "common/pg_lzcompress.h"
28 #include "replication/origin.h"
29 
30 #ifndef FRONTEND
31 #include "miscadmin.h"
32 #include "pgstat.h"
33 #include "utils/memutils.h"
34 #endif
35 
36 static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
37  pg_attribute_printf(2, 3);
38 static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
39 static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr,
40  int reqLen);
41 static void XLogReaderInvalReadState(XLogReaderState *state);
42 static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
43  XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
44 static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
45  XLogRecPtr recptr);
46 static void ResetDecoder(XLogReaderState *state);
47 
48 /* size of the buffer allocated for error message. */
49 #define MAX_ERRORMSG_LEN 1000
50 
51 /*
52  * Construct a string in state->errormsg_buf explaining what's wrong with
53  * the current record being read.
54  */
55 static void
56 report_invalid_record(XLogReaderState *state, const char *fmt,...)
57 {
58  va_list args;
59 
60  fmt = _(fmt);
61 
62  va_start(args, fmt);
63  vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args);
64  va_end(args);
65 }
66 
67 /*
68  * Allocate and initialize a new XLogReader.
69  *
70  * Returns NULL if the xlogreader couldn't be allocated.
71  */
73 XLogReaderAllocate(int wal_segment_size, const char *waldir,
74  XLogReaderRoutine *routine, void *private_data)
75 {
76  XLogReaderState *state;
77 
78  state = (XLogReaderState *)
81  if (!state)
82  return NULL;
83 
84  /* initialize caller-provided support functions */
85  state->routine = *routine;
86 
87  state->max_block_id = -1;
88 
89  /*
90  * Permanently allocate readBuf. We do it this way, rather than just
91  * making a static array, for two reasons: (1) no need to waste the
92  * storage in most instantiations of the backend; (2) a static char array
93  * isn't guaranteed to have any particular alignment, whereas
94  * palloc_extended() will provide MAXALIGN'd storage.
95  */
96  state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ,
98  if (!state->readBuf)
99  {
100  pfree(state);
101  return NULL;
102  }
103 
104  /* Initialize segment info. */
105  WALOpenSegmentInit(&state->seg, &state->segcxt, wal_segment_size,
106  waldir);
107 
108  /* system_identifier initialized to zeroes above */
109  state->private_data = private_data;
110  /* ReadRecPtr, EndRecPtr and readLen initialized to zeroes above */
113  if (!state->errormsg_buf)
114  {
115  pfree(state->readBuf);
116  pfree(state);
117  return NULL;
118  }
119  state->errormsg_buf[0] = '\0';
120 
121  /*
122  * Allocate an initial readRecordBuf of minimal size, which can later be
123  * enlarged if necessary.
124  */
125  if (!allocate_recordbuf(state, 0))
126  {
127  pfree(state->errormsg_buf);
128  pfree(state->readBuf);
129  pfree(state);
130  return NULL;
131  }
132 
133  return state;
134 }
135 
136 void
138 {
139  int block_id;
140 
141  if (state->seg.ws_file != -1)
142  state->routine.segment_close(state);
143 
144  for (block_id = 0; block_id <= XLR_MAX_BLOCK_ID; block_id++)
145  {
146  if (state->blocks[block_id].data)
147  pfree(state->blocks[block_id].data);
148  }
149  if (state->main_data)
150  pfree(state->main_data);
151 
152  pfree(state->errormsg_buf);
153  if (state->readRecordBuf)
154  pfree(state->readRecordBuf);
155  pfree(state->readBuf);
156  pfree(state);
157 }
158 
159 /*
160  * Allocate readRecordBuf to fit a record of at least the given length.
161  * Returns true if successful, false if out of memory.
162  *
163  * readRecordBufSize is set to the new buffer size.
164  *
165  * To avoid useless small increases, round its size to a multiple of
166  * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start
167  * with. (That is enough for all "normal" records, but very large commit or
168  * abort records might need more space.)
169  */
170 static bool
172 {
173  uint32 newSize = reclength;
174 
175  newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
176  newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
177 
178 #ifndef FRONTEND
179 
180  /*
181  * Note that in much unlucky circumstances, the random data read from a
182  * recycled segment can cause this routine to be called with a size
183  * causing a hard failure at allocation. For a standby, this would cause
184  * the instance to stop suddenly with a hard failure, preventing it to
185  * retry fetching WAL from one of its sources which could allow it to move
186  * on with replay without a manual restart. If the data comes from a past
187  * recycled segment and is still valid, then the allocation may succeed
188  * but record checks are going to fail so this would be short-lived. If
189  * the allocation fails because of a memory shortage, then this is not a
190  * hard failure either per the guarantee given by MCXT_ALLOC_NO_OOM.
191  */
192  if (!AllocSizeIsValid(newSize))
193  return false;
194 
195 #endif
196 
197  if (state->readRecordBuf)
198  pfree(state->readRecordBuf);
199  state->readRecordBuf =
200  (char *) palloc_extended(newSize, MCXT_ALLOC_NO_OOM);
201  if (state->readRecordBuf == NULL)
202  {
203  state->readRecordBufSize = 0;
204  return false;
205  }
206  state->readRecordBufSize = newSize;
207  return true;
208 }
209 
210 /*
211  * Initialize the passed segment structs.
212  */
213 void
215  int segsize, const char *waldir)
216 {
217  seg->ws_file = -1;
218  seg->ws_segno = 0;
219  seg->ws_tli = 0;
220 
221  segcxt->ws_segsize = segsize;
222  if (waldir)
223  snprintf(segcxt->ws_dir, MAXPGPATH, "%s", waldir);
224 }
225 
226 /*
227  * Begin reading WAL at 'RecPtr'.
228  *
229  * 'RecPtr' should point to the beginnning of a valid WAL record. Pointing at
230  * the beginning of a page is also OK, if there is a new record right after
231  * the page header, i.e. not a continuation.
232  *
233  * This does not make any attempt to read the WAL yet, and hence cannot fail.
234  * If the starting address is not correct, the first call to XLogReadRecord()
235  * will error out.
236  */
237 void
239 {
240  Assert(!XLogRecPtrIsInvalid(RecPtr));
241 
242  ResetDecoder(state);
243 
244  /* Begin at the passed-in record pointer. */
245  state->EndRecPtr = RecPtr;
246  state->ReadRecPtr = InvalidXLogRecPtr;
247 }
248 
249 /*
250  * Attempt to read an XLOG record.
251  *
252  * XLogBeginRead() or XLogFindNextRecord() must be called before the first call
253  * to XLogReadRecord().
254  *
255  * If the page_read callback fails to read the requested data, NULL is
256  * returned. The callback is expected to have reported the error; errormsg
257  * is set to NULL.
258  *
259  * If the reading fails for some other reason, NULL is also returned, and
260  * *errormsg is set to a string with details of the failure.
261  *
262  * The returned pointer (or *errormsg) points to an internal buffer that's
263  * valid until the next call to XLogReadRecord.
264  */
265 XLogRecord *
266 XLogReadRecord(XLogReaderState *state, char **errormsg)
267 {
268  XLogRecPtr RecPtr;
269  XLogRecord *record;
270  XLogRecPtr targetPagePtr;
271  bool randAccess;
272  uint32 len,
273  total_len;
274  uint32 targetRecOff;
275  uint32 pageHeaderSize;
276  bool gotheader;
277  int readOff;
278 
279  /*
280  * randAccess indicates whether to verify the previous-record pointer of
281  * the record we're reading. We only do this if we're reading
282  * sequentially, which is what we initially assume.
283  */
284  randAccess = false;
285 
286  /* reset error state */
287  *errormsg = NULL;
288  state->errormsg_buf[0] = '\0';
289 
290  ResetDecoder(state);
291 
292  RecPtr = state->EndRecPtr;
293 
294  if (state->ReadRecPtr != InvalidXLogRecPtr)
295  {
296  /* read the record after the one we just read */
297 
298  /*
299  * EndRecPtr is pointing to end+1 of the previous WAL record. If
300  * we're at a page boundary, no more records can fit on the current
301  * page. We must skip over the page header, but we can't do that until
302  * we've read in the page, since the header size is variable.
303  */
304  }
305  else
306  {
307  /*
308  * Caller supplied a position to start at.
309  *
310  * In this case, EndRecPtr should already be pointing to a valid
311  * record starting position.
312  */
313  Assert(XRecOffIsValid(RecPtr));
314  randAccess = true;
315  }
316 
317  state->currRecPtr = RecPtr;
318 
319  targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
320  targetRecOff = RecPtr % XLOG_BLCKSZ;
321 
322  /*
323  * Read the page containing the record into state->readBuf. Request enough
324  * byte to cover the whole record header, or at least the part of it that
325  * fits on the same page.
326  */
327  readOff = ReadPageInternal(state, targetPagePtr,
328  Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
329  if (readOff < 0)
330  goto err;
331 
332  /*
333  * ReadPageInternal always returns at least the page header, so we can
334  * examine it now.
335  */
336  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
337  if (targetRecOff == 0)
338  {
339  /*
340  * At page start, so skip over page header.
341  */
342  RecPtr += pageHeaderSize;
343  targetRecOff = pageHeaderSize;
344  }
345  else if (targetRecOff < pageHeaderSize)
346  {
347  report_invalid_record(state, "invalid record offset at %X/%X",
348  (uint32) (RecPtr >> 32), (uint32) RecPtr);
349  goto err;
350  }
351 
352  if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
353  targetRecOff == pageHeaderSize)
354  {
355  report_invalid_record(state, "contrecord is requested by %X/%X",
356  (uint32) (RecPtr >> 32), (uint32) RecPtr);
357  goto err;
358  }
359 
360  /* ReadPageInternal has verified the page header */
361  Assert(pageHeaderSize <= readOff);
362 
363  /*
364  * Read the record length.
365  *
366  * NB: Even though we use an XLogRecord pointer here, the whole record
367  * header might not fit on this page. xl_tot_len is the first field of the
368  * struct, so it must be on this page (the records are MAXALIGNed), but we
369  * cannot access any other fields until we've verified that we got the
370  * whole header.
371  */
372  record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);
373  total_len = record->xl_tot_len;
374 
375  /*
376  * If the whole record header is on this page, validate it immediately.
377  * Otherwise do just a basic sanity check on xl_tot_len, and validate the
378  * rest of the header after reading it from the next page. The xl_tot_len
379  * check is necessary here to ensure that we enter the "Need to reassemble
380  * record" code path below; otherwise we might fail to apply
381  * ValidXLogRecordHeader at all.
382  */
383  if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
384  {
385  if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr, record,
386  randAccess))
387  goto err;
388  gotheader = true;
389  }
390  else
391  {
392  /* XXX: more validation should be done here */
393  if (total_len < SizeOfXLogRecord)
394  {
395  report_invalid_record(state,
396  "invalid record length at %X/%X: wanted %u, got %u",
397  (uint32) (RecPtr >> 32), (uint32) RecPtr,
398  (uint32) SizeOfXLogRecord, total_len);
399  goto err;
400  }
401  gotheader = false;
402  }
403 
404  len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ;
405  if (total_len > len)
406  {
407  /* Need to reassemble record */
408  char *contdata;
409  XLogPageHeader pageHeader;
410  char *buffer;
411  uint32 gotlen;
412 
413  /*
414  * Enlarge readRecordBuf as needed.
415  */
416  if (total_len > state->readRecordBufSize &&
417  !allocate_recordbuf(state, total_len))
418  {
419  /* We treat this as a "bogus data" condition */
420  report_invalid_record(state, "record length %u at %X/%X too long",
421  total_len,
422  (uint32) (RecPtr >> 32), (uint32) RecPtr);
423  goto err;
424  }
425 
426  /* Copy the first fragment of the record from the first page. */
427  memcpy(state->readRecordBuf,
428  state->readBuf + RecPtr % XLOG_BLCKSZ, len);
429  buffer = state->readRecordBuf + len;
430  gotlen = len;
431 
432  do
433  {
434  /* Calculate pointer to beginning of next page */
435  targetPagePtr += XLOG_BLCKSZ;
436 
437  /* Wait for the next page to become available */
438  readOff = ReadPageInternal(state, targetPagePtr,
439  Min(total_len - gotlen + SizeOfXLogShortPHD,
440  XLOG_BLCKSZ));
441 
442  if (readOff < 0)
443  goto err;
444 
445  Assert(SizeOfXLogShortPHD <= readOff);
446 
447  /* Check that the continuation on next page looks valid */
448  pageHeader = (XLogPageHeader) state->readBuf;
449  if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
450  {
451  report_invalid_record(state,
452  "there is no contrecord flag at %X/%X",
453  (uint32) (RecPtr >> 32), (uint32) RecPtr);
454  goto err;
455  }
456 
457  /*
458  * Cross-check that xlp_rem_len agrees with how much of the record
459  * we expect there to be left.
460  */
461  if (pageHeader->xlp_rem_len == 0 ||
462  total_len != (pageHeader->xlp_rem_len + gotlen))
463  {
464  report_invalid_record(state,
465  "invalid contrecord length %u at %X/%X",
466  pageHeader->xlp_rem_len,
467  (uint32) (RecPtr >> 32), (uint32) RecPtr);
468  goto err;
469  }
470 
471  /* Append the continuation from this page to the buffer */
472  pageHeaderSize = XLogPageHeaderSize(pageHeader);
473 
474  if (readOff < pageHeaderSize)
475  readOff = ReadPageInternal(state, targetPagePtr,
476  pageHeaderSize);
477 
478  Assert(pageHeaderSize <= readOff);
479 
480  contdata = (char *) state->readBuf + pageHeaderSize;
481  len = XLOG_BLCKSZ - pageHeaderSize;
482  if (pageHeader->xlp_rem_len < len)
483  len = pageHeader->xlp_rem_len;
484 
485  if (readOff < pageHeaderSize + len)
486  readOff = ReadPageInternal(state, targetPagePtr,
487  pageHeaderSize + len);
488 
489  memcpy(buffer, (char *) contdata, len);
490  buffer += len;
491  gotlen += len;
492 
493  /* If we just reassembled the record header, validate it. */
494  if (!gotheader)
495  {
496  record = (XLogRecord *) state->readRecordBuf;
497  if (!ValidXLogRecordHeader(state, RecPtr, state->ReadRecPtr,
498  record, randAccess))
499  goto err;
500  gotheader = true;
501  }
502  } while (gotlen < total_len);
503 
504  Assert(gotheader);
505 
506  record = (XLogRecord *) state->readRecordBuf;
507  if (!ValidXLogRecord(state, record, RecPtr))
508  goto err;
509 
510  pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
511  state->ReadRecPtr = RecPtr;
512  state->EndRecPtr = targetPagePtr + pageHeaderSize
513  + MAXALIGN(pageHeader->xlp_rem_len);
514  }
515  else
516  {
517  /* Wait for the record data to become available */
518  readOff = ReadPageInternal(state, targetPagePtr,
519  Min(targetRecOff + total_len, XLOG_BLCKSZ));
520  if (readOff < 0)
521  goto err;
522 
523  /* Record does not cross a page boundary */
524  if (!ValidXLogRecord(state, record, RecPtr))
525  goto err;
526 
527  state->EndRecPtr = RecPtr + MAXALIGN(total_len);
528 
529  state->ReadRecPtr = RecPtr;
530  }
531 
532  /*
533  * Special processing if it's an XLOG SWITCH record
534  */
535  if (record->xl_rmid == RM_XLOG_ID &&
536  (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
537  {
538  /* Pretend it extends to end of segment */
539  state->EndRecPtr += state->segcxt.ws_segsize - 1;
540  state->EndRecPtr -= XLogSegmentOffset(state->EndRecPtr, state->segcxt.ws_segsize);
541  }
542 
543  if (DecodeXLogRecord(state, record, errormsg))
544  return record;
545  else
546  return NULL;
547 
548 err:
549 
550  /*
551  * Invalidate the read state. We might read from a different source after
552  * failure.
553  */
555 
556  if (state->errormsg_buf[0] != '\0')
557  *errormsg = state->errormsg_buf;
558 
559  return NULL;
560 }
561 
562 /*
563  * Read a single xlog page including at least [pageptr, reqLen] of valid data
564  * via the page_read() callback.
565  *
566  * Returns -1 if the required page cannot be read for some reason; errormsg_buf
567  * is set in that case (unless the error occurs in the page_read callback).
568  *
569  * We fetch the page from a reader-local cache if we know we have the required
570  * data and if there hasn't been any error since caching the data.
571  */
572 static int
573 ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
574 {
575  int readLen;
576  uint32 targetPageOff;
577  XLogSegNo targetSegNo;
578  XLogPageHeader hdr;
579 
580  Assert((pageptr % XLOG_BLCKSZ) == 0);
581 
582  XLByteToSeg(pageptr, targetSegNo, state->segcxt.ws_segsize);
583  targetPageOff = XLogSegmentOffset(pageptr, state->segcxt.ws_segsize);
584 
585  /* check whether we have all the requested data already */
586  if (targetSegNo == state->seg.ws_segno &&
587  targetPageOff == state->segoff && reqLen <= state->readLen)
588  return state->readLen;
589 
590  /*
591  * Data is not in our buffer.
592  *
593  * Every time we actually read the segment, even if we looked at parts of
594  * it before, we need to do verification as the page_read callback might
595  * now be rereading data from a different source.
596  *
597  * Whenever switching to a new WAL segment, we read the first page of the
598  * file and validate its header, even if that's not where the target
599  * record is. This is so that we can check the additional identification
600  * info that is present in the first page's "long" header.
601  */
602  if (targetSegNo != state->seg.ws_segno && targetPageOff != 0)
603  {
604  XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
605 
606  readLen = state->routine.page_read(state, targetSegmentPtr, XLOG_BLCKSZ,
607  state->currRecPtr,
608  state->readBuf);
609  if (readLen < 0)
610  goto err;
611 
612  /* we can be sure to have enough WAL available, we scrolled back */
613  Assert(readLen == XLOG_BLCKSZ);
614 
615  if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
616  state->readBuf))
617  goto err;
618  }
619 
620  /*
621  * First, read the requested data length, but at least a short page header
622  * so that we can validate it.
623  */
624  readLen = state->routine.page_read(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
625  state->currRecPtr,
626  state->readBuf);
627  if (readLen < 0)
628  goto err;
629 
630  Assert(readLen <= XLOG_BLCKSZ);
631 
632  /* Do we have enough data to check the header length? */
633  if (readLen <= SizeOfXLogShortPHD)
634  goto err;
635 
636  Assert(readLen >= reqLen);
637 
638  hdr = (XLogPageHeader) state->readBuf;
639 
640  /* still not enough */
641  if (readLen < XLogPageHeaderSize(hdr))
642  {
643  readLen = state->routine.page_read(state, pageptr, XLogPageHeaderSize(hdr),
644  state->currRecPtr,
645  state->readBuf);
646  if (readLen < 0)
647  goto err;
648  }
649 
650  /*
651  * Now that we know we have the full header, validate it.
652  */
653  if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
654  goto err;
655 
656  /* update read state information */
657  state->seg.ws_segno = targetSegNo;
658  state->segoff = targetPageOff;
659  state->readLen = readLen;
660 
661  return readLen;
662 
663 err:
665  return -1;
666 }
667 
668 /*
669  * Invalidate the xlogreader's read state to force a re-read.
670  */
671 static void
673 {
674  state->seg.ws_segno = 0;
675  state->segoff = 0;
676  state->readLen = 0;
677 }
678 
679 /*
680  * Validate an XLOG record header.
681  *
682  * This is just a convenience subroutine to avoid duplicated code in
683  * XLogReadRecord. It's not intended for use from anywhere else.
684  */
685 static bool
687  XLogRecPtr PrevRecPtr, XLogRecord *record,
688  bool randAccess)
689 {
690  if (record->xl_tot_len < SizeOfXLogRecord)
691  {
692  report_invalid_record(state,
693  "invalid record length at %X/%X: wanted %u, got %u",
694  (uint32) (RecPtr >> 32), (uint32) RecPtr,
695  (uint32) SizeOfXLogRecord, record->xl_tot_len);
696  return false;
697  }
698  if (record->xl_rmid > RM_MAX_ID)
699  {
700  report_invalid_record(state,
701  "invalid resource manager ID %u at %X/%X",
702  record->xl_rmid, (uint32) (RecPtr >> 32),
703  (uint32) RecPtr);
704  return false;
705  }
706  if (randAccess)
707  {
708  /*
709  * We can't exactly verify the prev-link, but surely it should be less
710  * than the record's own address.
711  */
712  if (!(record->xl_prev < RecPtr))
713  {
714  report_invalid_record(state,
715  "record with incorrect prev-link %X/%X at %X/%X",
716  (uint32) (record->xl_prev >> 32),
717  (uint32) record->xl_prev,
718  (uint32) (RecPtr >> 32), (uint32) RecPtr);
719  return false;
720  }
721  }
722  else
723  {
724  /*
725  * Record's prev-link should exactly match our previous location. This
726  * check guards against torn WAL pages where a stale but valid-looking
727  * WAL record starts on a sector boundary.
728  */
729  if (record->xl_prev != PrevRecPtr)
730  {
731  report_invalid_record(state,
732  "record with incorrect prev-link %X/%X at %X/%X",
733  (uint32) (record->xl_prev >> 32),
734  (uint32) record->xl_prev,
735  (uint32) (RecPtr >> 32), (uint32) RecPtr);
736  return false;
737  }
738  }
739 
740  return true;
741 }
742 
743 
744 /*
745  * CRC-check an XLOG record. We do not believe the contents of an XLOG
746  * record (other than to the minimal extent of computing the amount of
747  * data to read in) until we've checked the CRCs.
748  *
749  * We assume all of the record (that is, xl_tot_len bytes) has been read
750  * into memory at *record. Also, ValidXLogRecordHeader() has accepted the
751  * record's header, which means in particular that xl_tot_len is at least
752  * SizeOfXLogRecord.
753  */
754 static bool
756 {
757  pg_crc32c crc;
758 
759  /* Calculate the CRC */
760  INIT_CRC32C(crc);
761  COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
762  /* include the record header last */
763  COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
764  FIN_CRC32C(crc);
765 
766  if (!EQ_CRC32C(record->xl_crc, crc))
767  {
768  report_invalid_record(state,
769  "incorrect resource manager data checksum in record at %X/%X",
770  (uint32) (recptr >> 32), (uint32) recptr);
771  return false;
772  }
773 
774  return true;
775 }
776 
777 /*
778  * Validate a page header.
779  *
780  * Check if 'phdr' is valid as the header of the XLog page at position
781  * 'recptr'.
782  */
783 bool
785  char *phdr)
786 {
787  XLogRecPtr recaddr;
788  XLogSegNo segno;
789  int32 offset;
790  XLogPageHeader hdr = (XLogPageHeader) phdr;
791 
792  Assert((recptr % XLOG_BLCKSZ) == 0);
793 
794  XLByteToSeg(recptr, segno, state->segcxt.ws_segsize);
795  offset = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
796 
797  XLogSegNoOffsetToRecPtr(segno, offset, state->segcxt.ws_segsize, recaddr);
798 
799  if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
800  {
801  char fname[MAXFNAMELEN];
802 
803  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
804 
805  report_invalid_record(state,
806  "invalid magic number %04X in log segment %s, offset %u",
807  hdr->xlp_magic,
808  fname,
809  offset);
810  return false;
811  }
812 
813  if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
814  {
815  char fname[MAXFNAMELEN];
816 
817  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
818 
819  report_invalid_record(state,
820  "invalid info bits %04X in log segment %s, offset %u",
821  hdr->xlp_info,
822  fname,
823  offset);
824  return false;
825  }
826 
827  if (hdr->xlp_info & XLP_LONG_HEADER)
828  {
829  XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
830 
831  if (state->system_identifier &&
832  longhdr->xlp_sysid != state->system_identifier)
833  {
834  report_invalid_record(state,
835  "WAL file is from different database system: WAL file database system identifier is %llu, pg_control database system identifier is %llu",
836  (unsigned long long) longhdr->xlp_sysid,
837  (unsigned long long) state->system_identifier);
838  return false;
839  }
840  else if (longhdr->xlp_seg_size != state->segcxt.ws_segsize)
841  {
842  report_invalid_record(state,
843  "WAL file is from different database system: incorrect segment size in page header");
844  return false;
845  }
846  else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ)
847  {
848  report_invalid_record(state,
849  "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header");
850  return false;
851  }
852  }
853  else if (offset == 0)
854  {
855  char fname[MAXFNAMELEN];
856 
857  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
858 
859  /* hmm, first page of file doesn't have a long header? */
860  report_invalid_record(state,
861  "invalid info bits %04X in log segment %s, offset %u",
862  hdr->xlp_info,
863  fname,
864  offset);
865  return false;
866  }
867 
868  /*
869  * Check that the address on the page agrees with what we expected. This
870  * check typically fails when an old WAL segment is recycled, and hasn't
871  * yet been overwritten with new data yet.
872  */
873  if (hdr->xlp_pageaddr != recaddr)
874  {
875  char fname[MAXFNAMELEN];
876 
877  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
878 
879  report_invalid_record(state,
880  "unexpected pageaddr %X/%X in log segment %s, offset %u",
881  (uint32) (hdr->xlp_pageaddr >> 32), (uint32) hdr->xlp_pageaddr,
882  fname,
883  offset);
884  return false;
885  }
886 
887  /*
888  * Since child timelines are always assigned a TLI greater than their
889  * immediate parent's TLI, we should never see TLI go backwards across
890  * successive pages of a consistent WAL sequence.
891  *
892  * Sometimes we re-read a segment that's already been (partially) read. So
893  * we only verify TLIs for pages that are later than the last remembered
894  * LSN.
895  */
896  if (recptr > state->latestPagePtr)
897  {
898  if (hdr->xlp_tli < state->latestPageTLI)
899  {
900  char fname[MAXFNAMELEN];
901 
902  XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
903 
904  report_invalid_record(state,
905  "out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u",
906  hdr->xlp_tli,
907  state->latestPageTLI,
908  fname,
909  offset);
910  return false;
911  }
912  }
913  state->latestPagePtr = recptr;
914  state->latestPageTLI = hdr->xlp_tli;
915 
916  return true;
917 }
918 
919 #ifdef FRONTEND
920 /*
921  * Functions that are currently not needed in the backend, but are better
922  * implemented inside xlogreader.c because of the internal facilities available
923  * here.
924  */
925 
926 /*
927  * Find the first record with an lsn >= RecPtr.
928  *
929  * This is different from XLogBeginRead() in that RecPtr doesn't need to point
930  * to a valid record boundary. Useful for checking whether RecPtr is a valid
931  * xlog address for reading, and to find the first valid address after some
932  * address when dumping records for debugging purposes.
933  *
934  * This positions the reader, like XLogBeginRead(), so that the next call to
935  * XLogReadRecord() will read the next valid record.
936  */
938 XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
939 {
940  XLogRecPtr tmpRecPtr;
943  char *errormsg;
944 
945  Assert(!XLogRecPtrIsInvalid(RecPtr));
946 
947  /*
948  * skip over potential continuation data, keeping in mind that it may span
949  * multiple pages
950  */
951  tmpRecPtr = RecPtr;
952  while (true)
953  {
954  XLogRecPtr targetPagePtr;
955  int targetRecOff;
956  uint32 pageHeaderSize;
957  int readLen;
958 
959  /*
960  * Compute targetRecOff. It should typically be equal or greater than
961  * short page-header since a valid record can't start anywhere before
962  * that, except when caller has explicitly specified the offset that
963  * falls somewhere there or when we are skipping multi-page
964  * continuation record. It doesn't matter though because
965  * ReadPageInternal() is prepared to handle that and will read at
966  * least short page-header worth of data
967  */
968  targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
969 
970  /* scroll back to page boundary */
971  targetPagePtr = tmpRecPtr - targetRecOff;
972 
973  /* Read the page containing the record */
974  readLen = ReadPageInternal(state, targetPagePtr, targetRecOff);
975  if (readLen < 0)
976  goto err;
977 
978  header = (XLogPageHeader) state->readBuf;
979 
980  pageHeaderSize = XLogPageHeaderSize(header);
981 
982  /* make sure we have enough data for the page header */
983  readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize);
984  if (readLen < 0)
985  goto err;
986 
987  /* skip over potential continuation data */
988  if (header->xlp_info & XLP_FIRST_IS_CONTRECORD)
989  {
990  /*
991  * If the length of the remaining continuation data is more than
992  * what can fit in this page, the continuation record crosses over
993  * this page. Read the next page and try again. xlp_rem_len in the
994  * next page header will contain the remaining length of the
995  * continuation data
996  *
997  * Note that record headers are MAXALIGN'ed
998  */
999  if (MAXALIGN(header->xlp_rem_len) >= (XLOG_BLCKSZ - pageHeaderSize))
1000  tmpRecPtr = targetPagePtr + XLOG_BLCKSZ;
1001  else
1002  {
1003  /*
1004  * The previous continuation record ends in this page. Set
1005  * tmpRecPtr to point to the first valid record
1006  */
1007  tmpRecPtr = targetPagePtr + pageHeaderSize
1008  + MAXALIGN(header->xlp_rem_len);
1009  break;
1010  }
1011  }
1012  else
1013  {
1014  tmpRecPtr = targetPagePtr + pageHeaderSize;
1015  break;
1016  }
1017  }
1018 
1019  /*
1020  * we know now that tmpRecPtr is an address pointing to a valid XLogRecord
1021  * because either we're at the first record after the beginning of a page
1022  * or we just jumped over the remaining data of a continuation.
1023  */
1024  XLogBeginRead(state, tmpRecPtr);
1025  while (XLogReadRecord(state, &errormsg) != NULL)
1026  {
1027  /* past the record we've found, break out */
1028  if (RecPtr <= state->ReadRecPtr)
1029  {
1030  /* Rewind the reader to the beginning of the last record. */
1031  found = state->ReadRecPtr;
1032  XLogBeginRead(state, found);
1033  return found;
1034  }
1035  }
1036 
1037 err:
1038  XLogReaderInvalReadState(state);
1039 
1040  return InvalidXLogRecPtr;
1041 }
1042 
1043 #endif /* FRONTEND */
1044 
1045 /*
1046  * Helper function to ease writing of XLogRoutine->page_read callbacks.
1047  * If this function is used, caller must supply a segment_open callback in
1048  * 'state', as that is used here.
1049  *
1050  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
1051  * fetched from timeline 'tli'.
1052  *
1053  * Returns true if succeeded, false if an error occurs, in which case
1054  * 'errinfo' receives error details.
1055  *
1056  * XXX probably this should be improved to suck data directly from the
1057  * WAL buffers when possible.
1058  */
1059 bool
1061  char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
1062  WALReadError *errinfo)
1063 {
1064  char *p;
1065  XLogRecPtr recptr;
1066  Size nbytes;
1067 
1068  p = buf;
1069  recptr = startptr;
1070  nbytes = count;
1071 
1072  while (nbytes > 0)
1073  {
1074  uint32 startoff;
1075  int segbytes;
1076  int readbytes;
1077 
1078  startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
1079 
1080  /*
1081  * If the data we want is not in a segment we have open, close what we
1082  * have (if anything) and open the next one, using the caller's
1083  * provided openSegment callback.
1084  */
1085  if (state->seg.ws_file < 0 ||
1086  !XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
1087  tli != state->seg.ws_tli)
1088  {
1089  XLogSegNo nextSegNo;
1090 
1091  if (state->seg.ws_file >= 0)
1092  state->routine.segment_close(state);
1093 
1094  XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
1095  state->routine.segment_open(state, nextSegNo, &tli);
1096 
1097  /* This shouldn't happen -- indicates a bug in segment_open */
1098  Assert(state->seg.ws_file >= 0);
1099 
1100  /* Update the current segment info. */
1101  state->seg.ws_tli = tli;
1102  state->seg.ws_segno = nextSegNo;
1103  }
1104 
1105  /* How many bytes are within this segment? */
1106  if (nbytes > (state->segcxt.ws_segsize - startoff))
1107  segbytes = state->segcxt.ws_segsize - startoff;
1108  else
1109  segbytes = nbytes;
1110 
1111 #ifndef FRONTEND
1113 #endif
1114 
1115  /* Reset errno first; eases reporting non-errno-affecting errors */
1116  errno = 0;
1117  readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
1118 
1119 #ifndef FRONTEND
1121 #endif
1122 
1123  if (readbytes <= 0)
1124  {
1125  errinfo->wre_errno = errno;
1126  errinfo->wre_req = segbytes;
1127  errinfo->wre_read = readbytes;
1128  errinfo->wre_off = startoff;
1129  errinfo->wre_seg = state->seg;
1130  return false;
1131  }
1132 
1133  /* Update state for read */
1134  recptr += readbytes;
1135  nbytes -= readbytes;
1136  p += readbytes;
1137  }
1138 
1139  return true;
1140 }
1141 
1142 /* ----------------------------------------
1143  * Functions for decoding the data and block references in a record.
1144  * ----------------------------------------
1145  */
1146 
1147 /* private function to reset the state between records */
1148 static void
1150 {
1151  int block_id;
1152 
1153  state->decoded_record = NULL;
1154 
1155  state->main_data_len = 0;
1156 
1157  for (block_id = 0; block_id <= state->max_block_id; block_id++)
1158  {
1159  state->blocks[block_id].in_use = false;
1160  state->blocks[block_id].has_image = false;
1161  state->blocks[block_id].has_data = false;
1162  state->blocks[block_id].apply_image = false;
1163  }
1164  state->max_block_id = -1;
1165 }
1166 
1167 /*
1168  * Decode the previously read record.
1169  *
1170  * On error, a human-readable error message is returned in *errormsg, and
1171  * the return value is false.
1172  */
1173 bool
1174 DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
1175 {
1176  /*
1177  * read next _size bytes from record buffer, but check for overrun first.
1178  */
1179 #define COPY_HEADER_FIELD(_dst, _size) \
1180  do { \
1181  if (remaining < _size) \
1182  goto shortdata_err; \
1183  memcpy(_dst, ptr, _size); \
1184  ptr += _size; \
1185  remaining -= _size; \
1186  } while(0)
1187 
1188  char *ptr;
1189  uint32 remaining;
1190  uint32 datatotal;
1191  RelFileNode *rnode = NULL;
1192  uint8 block_id;
1193 
1194  ResetDecoder(state);
1195 
1196  state->decoded_record = record;
1198 
1199  ptr = (char *) record;
1200  ptr += SizeOfXLogRecord;
1201  remaining = record->xl_tot_len - SizeOfXLogRecord;
1202 
1203  /* Decode the headers */
1204  datatotal = 0;
1205  while (remaining > datatotal)
1206  {
1207  COPY_HEADER_FIELD(&block_id, sizeof(uint8));
1208 
1209  if (block_id == XLR_BLOCK_ID_DATA_SHORT)
1210  {
1211  /* XLogRecordDataHeaderShort */
1212  uint8 main_data_len;
1213 
1214  COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
1215 
1216  state->main_data_len = main_data_len;
1217  datatotal += main_data_len;
1218  break; /* by convention, the main data fragment is
1219  * always last */
1220  }
1221  else if (block_id == XLR_BLOCK_ID_DATA_LONG)
1222  {
1223  /* XLogRecordDataHeaderLong */
1224  uint32 main_data_len;
1225 
1226  COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
1227  state->main_data_len = main_data_len;
1228  datatotal += main_data_len;
1229  break; /* by convention, the main data fragment is
1230  * always last */
1231  }
1232  else if (block_id == XLR_BLOCK_ID_ORIGIN)
1233  {
1234  COPY_HEADER_FIELD(&state->record_origin, sizeof(RepOriginId));
1235  }
1236  else if (block_id <= XLR_MAX_BLOCK_ID)
1237  {
1238  /* XLogRecordBlockHeader */
1239  DecodedBkpBlock *blk;
1240  uint8 fork_flags;
1241 
1242  if (block_id <= state->max_block_id)
1243  {
1244  report_invalid_record(state,
1245  "out-of-order block_id %u at %X/%X",
1246  block_id,
1247  (uint32) (state->ReadRecPtr >> 32),
1248  (uint32) state->ReadRecPtr);
1249  goto err;
1250  }
1251  state->max_block_id = block_id;
1252 
1253  blk = &state->blocks[block_id];
1254  blk->in_use = true;
1255  blk->apply_image = false;
1256 
1257  COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
1258  blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
1259  blk->flags = fork_flags;
1260  blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
1261  blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
1262 
1263  COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
1264  /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
1265  if (blk->has_data && blk->data_len == 0)
1266  {
1267  report_invalid_record(state,
1268  "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
1269  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1270  goto err;
1271  }
1272  if (!blk->has_data && blk->data_len != 0)
1273  {
1274  report_invalid_record(state,
1275  "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
1276  (unsigned int) blk->data_len,
1277  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1278  goto err;
1279  }
1280  datatotal += blk->data_len;
1281 
1282  if (blk->has_image)
1283  {
1284  COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
1285  COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
1286  COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
1287 
1288  blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
1289 
1290  if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED)
1291  {
1292  if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
1293  COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
1294  else
1295  blk->hole_length = 0;
1296  }
1297  else
1298  blk->hole_length = BLCKSZ - blk->bimg_len;
1299  datatotal += blk->bimg_len;
1300 
1301  /*
1302  * cross-check that hole_offset > 0, hole_length > 0 and
1303  * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
1304  */
1305  if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1306  (blk->hole_offset == 0 ||
1307  blk->hole_length == 0 ||
1308  blk->bimg_len == BLCKSZ))
1309  {
1310  report_invalid_record(state,
1311  "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
1312  (unsigned int) blk->hole_offset,
1313  (unsigned int) blk->hole_length,
1314  (unsigned int) blk->bimg_len,
1315  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1316  goto err;
1317  }
1318 
1319  /*
1320  * cross-check that hole_offset == 0 and hole_length == 0 if
1321  * the HAS_HOLE flag is not set.
1322  */
1323  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1324  (blk->hole_offset != 0 || blk->hole_length != 0))
1325  {
1326  report_invalid_record(state,
1327  "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
1328  (unsigned int) blk->hole_offset,
1329  (unsigned int) blk->hole_length,
1330  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1331  goto err;
1332  }
1333 
1334  /*
1335  * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED
1336  * flag is set.
1337  */
1338  if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
1339  blk->bimg_len == BLCKSZ)
1340  {
1341  report_invalid_record(state,
1342  "BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
1343  (unsigned int) blk->bimg_len,
1344  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1345  goto err;
1346  }
1347 
1348  /*
1349  * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor
1350  * IS_COMPRESSED flag is set.
1351  */
1352  if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
1353  !(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
1354  blk->bimg_len != BLCKSZ)
1355  {
1356  report_invalid_record(state,
1357  "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
1358  (unsigned int) blk->data_len,
1359  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1360  goto err;
1361  }
1362  }
1363  if (!(fork_flags & BKPBLOCK_SAME_REL))
1364  {
1365  COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
1366  rnode = &blk->rnode;
1367  }
1368  else
1369  {
1370  if (rnode == NULL)
1371  {
1372  report_invalid_record(state,
1373  "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
1374  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1375  goto err;
1376  }
1377 
1378  blk->rnode = *rnode;
1379  }
1380  COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
1381  }
1382  else
1383  {
1384  report_invalid_record(state,
1385  "invalid block_id %u at %X/%X",
1386  block_id,
1387  (uint32) (state->ReadRecPtr >> 32),
1388  (uint32) state->ReadRecPtr);
1389  goto err;
1390  }
1391  }
1392 
1393  if (remaining != datatotal)
1394  goto shortdata_err;
1395 
1396  /*
1397  * Ok, we've parsed the fragment headers, and verified that the total
1398  * length of the payload in the fragments is equal to the amount of data
1399  * left. Copy the data of each fragment to a separate buffer.
1400  *
1401  * We could just set up pointers into readRecordBuf, but we want to align
1402  * the data for the convenience of the callers. Backup images are not
1403  * copied, however; they don't need alignment.
1404  */
1405 
1406  /* block data first */
1407  for (block_id = 0; block_id <= state->max_block_id; block_id++)
1408  {
1409  DecodedBkpBlock *blk = &state->blocks[block_id];
1410 
1411  if (!blk->in_use)
1412  continue;
1413 
1414  Assert(blk->has_image || !blk->apply_image);
1415 
1416  if (blk->has_image)
1417  {
1418  blk->bkp_image = ptr;
1419  ptr += blk->bimg_len;
1420  }
1421  if (blk->has_data)
1422  {
1423  if (!blk->data || blk->data_len > blk->data_bufsz)
1424  {
1425  if (blk->data)
1426  pfree(blk->data);
1427 
1428  /*
1429  * Force the initial request to be BLCKSZ so that we don't
1430  * waste time with lots of trips through this stanza as a
1431  * result of WAL compression.
1432  */
1433  blk->data_bufsz = MAXALIGN(Max(blk->data_len, BLCKSZ));
1434  blk->data = palloc(blk->data_bufsz);
1435  }
1436  memcpy(blk->data, ptr, blk->data_len);
1437  ptr += blk->data_len;
1438  }
1439  }
1440 
1441  /* and finally, the main data */
1442  if (state->main_data_len > 0)
1443  {
1444  if (!state->main_data || state->main_data_len > state->main_data_bufsz)
1445  {
1446  if (state->main_data)
1447  pfree(state->main_data);
1448 
1449  /*
1450  * main_data_bufsz must be MAXALIGN'ed. In many xlog record
1451  * types, we omit trailing struct padding on-disk to save a few
1452  * bytes; but compilers may generate accesses to the xlog struct
1453  * that assume that padding bytes are present. If the palloc
1454  * request is not large enough to include such padding bytes then
1455  * we'll get valgrind complaints due to otherwise-harmless fetches
1456  * of the padding bytes.
1457  *
1458  * In addition, force the initial request to be reasonably large
1459  * so that we don't waste time with lots of trips through this
1460  * stanza. BLCKSZ / 2 seems like a good compromise choice.
1461  */
1462  state->main_data_bufsz = MAXALIGN(Max(state->main_data_len,
1463  BLCKSZ / 2));
1464  state->main_data = palloc(state->main_data_bufsz);
1465  }
1466  memcpy(state->main_data, ptr, state->main_data_len);
1467  ptr += state->main_data_len;
1468  }
1469 
1470  return true;
1471 
1472 shortdata_err:
1473  report_invalid_record(state,
1474  "record with invalid length at %X/%X",
1475  (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
1476 err:
1477  *errormsg = state->errormsg_buf;
1478 
1479  return false;
1480 }
1481 
1482 /*
1483  * Returns information about the block that a block reference refers to.
1484  *
1485  * If the WAL record contains a block reference with the given ID, *rnode,
1486  * *forknum, and *blknum are filled in (if not NULL), and returns true.
1487  * Otherwise returns false.
1488  */
1489 bool
1491  RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
1492 {
1493  DecodedBkpBlock *bkpb;
1494 
1495  if (!record->blocks[block_id].in_use)
1496  return false;
1497 
1498  bkpb = &record->blocks[block_id];
1499  if (rnode)
1500  *rnode = bkpb->rnode;
1501  if (forknum)
1502  *forknum = bkpb->forknum;
1503  if (blknum)
1504  *blknum = bkpb->blkno;
1505  return true;
1506 }
1507 
1508 /*
1509  * Returns the data associated with a block reference, or NULL if there is
1510  * no data (e.g. because a full-page image was taken instead). The returned
1511  * pointer points to a MAXALIGNed buffer.
1512  */
1513 char *
1515 {
1516  DecodedBkpBlock *bkpb;
1517 
1518  if (!record->blocks[block_id].in_use)
1519  return NULL;
1520 
1521  bkpb = &record->blocks[block_id];
1522 
1523  if (!bkpb->has_data)
1524  {
1525  if (len)
1526  *len = 0;
1527  return NULL;
1528  }
1529  else
1530  {
1531  if (len)
1532  *len = bkpb->data_len;
1533  return bkpb->data;
1534  }
1535 }
1536 
1537 /*
1538  * Restore a full-page image from a backup block attached to an XLOG record.
1539  *
1540  * Returns the buffer number containing the page.
1541  */
1542 bool
1543 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
1544 {
1545  DecodedBkpBlock *bkpb;
1546  char *ptr;
1547  PGAlignedBlock tmp;
1548 
1549  if (!record->blocks[block_id].in_use)
1550  return false;
1551  if (!record->blocks[block_id].has_image)
1552  return false;
1553 
1554  bkpb = &record->blocks[block_id];
1555  ptr = bkpb->bkp_image;
1556 
1557  if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
1558  {
1559  /* If a backup block image is compressed, decompress it */
1560  if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
1561  BLCKSZ - bkpb->hole_length, true) < 0)
1562  {
1563  report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
1564  (uint32) (record->ReadRecPtr >> 32),
1565  (uint32) record->ReadRecPtr,
1566  block_id);
1567  return false;
1568  }
1569  ptr = tmp.data;
1570  }
1571 
1572  /* generate page, taking into account hole if necessary */
1573  if (bkpb->hole_length == 0)
1574  {
1575  memcpy(page, ptr, BLCKSZ);
1576  }
1577  else
1578  {
1579  memcpy(page, ptr, bkpb->hole_offset);
1580  /* must zero-fill the hole */
1581  MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
1582  memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
1583  ptr + bkpb->hole_offset,
1584  BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
1585  }
1586 
1587  return true;
1588 }
1589 
1590 #ifndef FRONTEND
1591 
1592 /*
1593  * Extract the FullTransactionId from a WAL record.
1594  */
1597 {
1598  TransactionId xid,
1599  next_xid;
1600  uint32 epoch;
1601 
1602  /*
1603  * This function is only safe during replay, because it depends on the
1604  * replay state. See AdvanceNextFullTransactionIdPastXid() for more.
1605  */
1607 
1608  xid = XLogRecGetXid(record);
1611 
1612  /*
1613  * If xid is numerically greater than next_xid, it has to be from the last
1614  * epoch.
1615  */
1616  if (unlikely(xid > next_xid))
1617  --epoch;
1618 
1619  return FullTransactionIdFromEpochAndXid(epoch, xid);
1620 }
1621 
1622 #endif
int remaining
Definition: informix.c:667
WALOpenSegment wre_seg
Definition: xlogreader.h:293
BlockNumber blkno
Definition: xlogreader.h:126
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define AmStartupProcess()
Definition: miscadmin.h:430
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
char ws_dir[MAXPGPATH]
Definition: xlogreader.h:54
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
WALSegmentCloseCB segment_close
Definition: xlogreader.h:113
#define BKPIMAGE_HAS_HOLE
Definition: xlogrecord.h:146
char * readRecordBuf
Definition: xlogreader.h:248
uint32 TimeLineID
Definition: xlogdefs.h:52
int wal_segment_size
Definition: xlog.c:116
uint32 TransactionId
Definition: c.h:513
#define XLogPageHeaderSize(hdr)
Definition: xlog_internal.h:85
#define XLR_BLOCK_ID_DATA_LONG
Definition: xlogrecord.h:224
static XLogRecPtr ReadRecPtr
Definition: xlog.c:850
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define Min(x, y)
Definition: c.h:920
uint16 hole_offset
Definition: xlogreader.h:135
unsigned char uint8
Definition: c.h:365
uint16 RepOriginId
Definition: xlogdefs.h:58
void * palloc_extended(Size size, int flags)
Definition: mcxt.c:1013
static void report_invalid_record(XLogReaderState *state, const char *fmt,...) pg_attribute_printf(2
Definition: xlogreader.c:56
void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, int segsize, const char *waldir)
Definition: xlogreader.c:214
#define MCXT_ALLOC_NO_OOM
Definition: fe_memutils.h:18
#define MemSet(start, val, len)
Definition: c.h:971
RmgrId xl_rmid
Definition: xlogrecord.h:47
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:57
uint32 BlockNumber
Definition: block.h:31
FullTransactionId nextFullXid
Definition: transam.h:178
void * private_data
Definition: xlogreader.h:168
bool DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
Definition: xlogreader.c:1174
static int ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
Definition: xlogreader.c:573
int32 pglz_decompress(const char *source, int32 slen, char *dest, int32 rawsize, bool check_complete)
XLogPageReadCB page_read
Definition: xlogreader.h:93
#define MAX_ERRORMSG_LEN
Definition: xlogreader.c:49
signed int int32
Definition: c.h:355
bool XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, char *phdr)
Definition: xlogreader.c:784
#define pg_attribute_printf(f, a)
Definition: c.h:130
XLogRecPtr EndRecPtr
Definition: xlogreader.h:176
#define XidFromFullTransactionId(x)
Definition: transam.h:48
ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset)
Definition: pread.c:27
#define XLByteInSeg(xlrp, logSegNo, wal_segsz_bytes)
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:74
WALOpenSegment seg
Definition: xlogreader.h:213
char data[BLCKSZ]
Definition: c.h:1104
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
XLogRecord * XLogReadRecord(XLogReaderState *state, char **errormsg)
Definition: xlogreader.c:266
XLogRecPtr latestPagePtr
Definition: xlogreader.h:220
static uint32 readOff
Definition: xlog.c:813
uint16 hole_length
Definition: xlogreader.h:136
static void XLogReaderInvalReadState(XLogReaderState *state)
Definition: xlogreader.c:672
uint32 xl_tot_len
Definition: xlogrecord.h:43
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
uint32 main_data_len
Definition: xlogreader.h:189
#define vsnprintf
Definition: port.h:192
#define MAXPGPATH
static void static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength)
Definition: xlogreader.c:171
#define BKPIMAGE_APPLY
Definition: xlogrecord.h:148
static char * buf
Definition: pg_test_fsync.c:67
bool IsUnderPostmaster
Definition: globals.c:109
uint64 XLogSegNo
Definition: xlogdefs.h:41
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:175
XLogRecord * decoded_record
Definition: xlogreader.h:186
XLogSegNo ws_segno
Definition: xlogreader.h:47
VariableCache ShmemVariableCache
Definition: varsup.c:34
#define COPY_HEADER_FIELD(_dst, _size)
void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
Definition: xlogreader.c:238
unsigned int uint32
Definition: c.h:367
static void pgstat_report_wait_end(void)
Definition: pgstat.h:1380
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:73
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define AllocSizeIsValid(size)
Definition: memutils.h:42
ForkNumber
Definition: relpath.h:40
TimeLineID xlp_tli
Definition: xlog_internal.h:40
#define XLR_MAX_BLOCK_ID
Definition: xlogrecord.h:221
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint32 readRecordBufSize
Definition: xlogreader.h:249
#define SizeOfXLogRecord
Definition: xlogrecord.h:55
void XLogReaderFree(XLogReaderState *state)
Definition: xlogreader.c:137
#define MAXFNAMELEN
#define RM_MAX_ID
Definition: rmgr.h:33
bool XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
Definition: xlogreader.c:1490
#define XLogRecGetXid(decoder)
Definition: xlogreader.h:309
char * XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
Definition: xlogreader.c:1514
#define BKPBLOCK_SAME_REL
Definition: xlogrecord.h:183
#define BKPIMAGE_IS_COMPRESSED
Definition: xlogrecord.h:147
#define BKPBLOCK_HAS_IMAGE
Definition: xlogrecord.h:180
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
ForkNumber forknum
Definition: xlogreader.h:125
#define EpochFromFullTransactionId(x)
Definition: transam.h:47
#define XLP_ALL_FLAGS
Definition: xlog_internal.h:83
XLogRecPtr currRecPtr
Definition: xlogreader.h:224
#define Max(x, y)
Definition: c.h:914
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
#define XLP_LONG_HEADER
Definition: xlog_internal.h:79
Definition: regguts.h:298
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess)
Definition: xlogreader.c:686
uint16 data_bufsz
Definition: xlogreader.h:144
#define MCXT_ALLOC_ZERO
Definition: fe_memutils.h:19
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:55
size_t Size
Definition: c.h:466
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: pgstat.h:1356
uint8 xl_info
Definition: xlogrecord.h:46
FullTransactionId XLogRecGetFullXid(XLogReaderState *record)
Definition: xlogreader.c:1596
#define XLR_BLOCK_ID_ORIGIN
Definition: xlogrecord.h:225
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:77
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:69
#define MAXALIGN(LEN)
Definition: c.h:691
#define XLOG_SWITCH
Definition: pg_control.h:71
static void header(const char *fmt,...) pg_attribute_printf(1
Definition: pg_regress.c:208
TimeLineID ws_tli
Definition: xlogreader.h:48
#define InvalidRepOriginId
Definition: origin.h:33
char * bkp_image
Definition: xlogreader.h:134
bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
Definition: xlogreader.c:1543
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:223
uint32 main_data_bufsz
Definition: xlogreader.h:190
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
Definition: xlogreader.c:755
#define BKPBLOCK_FORK_MASK
Definition: xlogrecord.h:178
#define XRecOffIsValid(xlrp)
void * palloc(Size size)
Definition: mcxt.c:949
static const unsigned __int64 epoch
Definition: gettimeofday.c:34
#define unlikely(x)
Definition: c.h:206
uint64 system_identifier
Definition: xlogreader.h:163
bool WALRead(XLogReaderState *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli, WALReadError *errinfo)
Definition: xlogreader.c:1060
WALSegmentContext segcxt
Definition: xlogreader.h:212
WALSegmentOpenCB segment_open
Definition: xlogreader.h:107
char * errormsg_buf
Definition: xlogreader.h:252
char * main_data
Definition: xlogreader.h:188
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
TimeLineID latestPageTLI
Definition: xlogreader.h:221
XLogReaderRoutine routine
Definition: xlogreader.h:152
RelFileNode rnode
Definition: xlogreader.h:124
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define snprintf
Definition: port.h:193
static uint32 readLen
Definition: xlog.c:814
#define _(x)
Definition: elog.c:88
RepOriginId record_origin
Definition: xlogreader.h:192
static void ResetDecoder(XLogReaderState *state)
Definition: xlogreader.c:1149
#define offsetof(type, field)
Definition: c.h:661
DecodedBkpBlock blocks[XLR_MAX_BLOCK_ID+1]
Definition: xlogreader.h:195
static XLogRecPtr startptr
Definition: basebackup.c:124
#define BKPBLOCK_HAS_DATA
Definition: xlogrecord.h:181
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)