PostgreSQL Source Code  git master
pg_rewind.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_rewind.c
4  * Synchronizes a PostgreSQL data directory to a new timeline
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <time.h>
15 #include <unistd.h>
16 
17 #include "access/timeline.h"
18 #include "access/xlog_internal.h"
19 #include "catalog/catversion.h"
20 #include "catalog/pg_control.h"
22 #include "common/file_perm.h"
23 #include "common/file_utils.h"
25 #include "common/string.h"
26 #include "fe_utils/recovery_gen.h"
27 #include "fetch.h"
28 #include "file_ops.h"
29 #include "filemap.h"
30 #include "getopt_long.h"
31 #include "pg_rewind.h"
32 #include "storage/bufpage.h"
33 
34 static void usage(const char *progname);
35 
36 static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
37  XLogRecPtr checkpointloc);
38 
39 static void digestControlFile(ControlFileData *ControlFile, char *source,
40  size_t size);
41 static void syncTargetDirectory(void);
42 static void getRestoreCommand(const char *argv0);
43 static void sanityChecks(void);
44 static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
45 static void ensureCleanShutdown(const char *argv0);
46 static void disconnect_atexit(void);
47 
50 
51 const char *progname;
53 
54 /* Configuration options */
55 char *datadir_target = NULL;
56 char *datadir_source = NULL;
57 char *connstr_source = NULL;
58 char *restore_command = NULL;
59 
60 static bool debug = false;
61 bool showprogress = false;
62 bool dry_run = false;
63 bool do_sync = true;
64 bool restore_wal = false;
65 
66 /* Target history */
69 
70 /* Progress counters */
71 uint64 fetch_size;
72 uint64 fetch_done;
73 
74 
75 static void
76 usage(const char *progname)
77 {
78  printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
79  printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
80  printf(_("Options:\n"));
81  printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n"
82  " retrieve WAL files from archives\n"));
83  printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
84  printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
85  printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
86  printf(_(" -n, --dry-run stop before modifying anything\n"));
87  printf(_(" -N, --no-sync do not wait for changes to be written\n"
88  " safely to disk\n"));
89  printf(_(" -P, --progress write progress messages\n"));
90  printf(_(" -R, --write-recovery-conf write configuration for replication\n"
91  " (requires --source-server)\n"));
92  printf(_(" --debug write a lot of debug messages\n"));
93  printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
94  printf(_(" -V, --version output version information, then exit\n"));
95  printf(_(" -?, --help show this help, then exit\n"));
96  printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
97  printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
98 }
99 
100 
101 int
102 main(int argc, char **argv)
103 {
104  static struct option long_options[] = {
105  {"help", no_argument, NULL, '?'},
106  {"target-pgdata", required_argument, NULL, 'D'},
107  {"write-recovery-conf", no_argument, NULL, 'R'},
108  {"source-pgdata", required_argument, NULL, 1},
109  {"source-server", required_argument, NULL, 2},
110  {"no-ensure-shutdown", no_argument, NULL, 4},
111  {"version", no_argument, NULL, 'V'},
112  {"restore-target-wal", no_argument, NULL, 'c'},
113  {"dry-run", no_argument, NULL, 'n'},
114  {"no-sync", no_argument, NULL, 'N'},
115  {"progress", no_argument, NULL, 'P'},
116  {"debug", no_argument, NULL, 3},
117  {NULL, 0, NULL, 0}
118  };
119  int option_index;
120  int c;
121  XLogRecPtr divergerec;
122  int lastcommontliIndex;
123  XLogRecPtr chkptrec;
124  TimeLineID chkpttli;
125  XLogRecPtr chkptredo;
126  size_t size;
127  char *buffer;
128  bool no_ensure_shutdown = false;
129  bool rewind_needed;
130  XLogRecPtr endrec;
131  TimeLineID endtli;
132  ControlFileData ControlFile_new;
133  bool writerecoveryconf = false;
134 
135  pg_logging_init(argv[0]);
136  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
137  progname = get_progname(argv[0]);
138 
139  /* Process command-line arguments */
140  if (argc > 1)
141  {
142  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
143  {
144  usage(progname);
145  exit(0);
146  }
147  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
148  {
149  puts("pg_rewind (PostgreSQL) " PG_VERSION);
150  exit(0);
151  }
152  }
153 
154  while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
155  {
156  switch (c)
157  {
158  case '?':
159  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
160  exit(1);
161 
162  case 'c':
163  restore_wal = true;
164  break;
165 
166  case 'P':
167  showprogress = true;
168  break;
169 
170  case 'n':
171  dry_run = true;
172  break;
173 
174  case 'N':
175  do_sync = false;
176  break;
177 
178  case 'R':
179  writerecoveryconf = true;
180  break;
181 
182  case 3:
183  debug = true;
185  break;
186 
187  case 'D': /* -D or --target-pgdata */
189  break;
190 
191  case 1: /* --source-pgdata */
193  break;
194 
195  case 2: /* --source-server */
197  break;
198 
199  case 4:
200  no_ensure_shutdown = true;
201  break;
202  }
203  }
204 
205  if (datadir_source == NULL && connstr_source == NULL)
206  {
207  pg_log_error("no source specified (--source-pgdata or --source-server)");
208  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
209  exit(1);
210  }
211 
212  if (datadir_source != NULL && connstr_source != NULL)
213  {
214  pg_log_error("only one of --source-pgdata or --source-server can be specified");
215  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
216  exit(1);
217  }
218 
219  if (datadir_target == NULL)
220  {
221  pg_log_error("no target data directory specified (--target-pgdata)");
222  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
223  exit(1);
224  }
225 
226  if (writerecoveryconf && connstr_source == NULL)
227  {
228  pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
229  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
230  exit(1);
231  }
232 
233  if (optind < argc)
234  {
235  pg_log_error("too many command-line arguments (first is \"%s\")",
236  argv[optind]);
237  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
238  exit(1);
239  }
240 
241  /*
242  * Don't allow pg_rewind to be run as root, to avoid overwriting the
243  * ownership of files in the data directory. We need only check for root
244  * -- any other user won't have sufficient permissions to modify files in
245  * the data directory.
246  */
247 #ifndef WIN32
248  if (geteuid() == 0)
249  {
250  pg_log_error("cannot be executed by \"root\"");
251  fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
252  progname);
253  exit(1);
254  }
255 #endif
256 
258 
259  /* Set mask based on PGDATA permissions */
261  {
262  pg_log_error("could not read permissions of directory \"%s\": %m",
264  exit(1);
265  }
266 
267  umask(pg_mode_mask);
268 
269  getRestoreCommand(argv[0]);
270 
271  atexit(disconnect_atexit);
272 
273  /* Connect to remote server */
274  if (connstr_source)
276 
277  /*
278  * Ok, we have all the options and we're ready to start. Read in all the
279  * information we need from both clusters.
280  */
281  buffer = slurpFile(datadir_target, "global/pg_control", &size);
282  digestControlFile(&ControlFile_target, buffer, size);
283  pg_free(buffer);
284 
285  /*
286  * If the target instance was not cleanly shut down, start and stop the
287  * target cluster once in single-user mode to enforce recovery to finish,
288  * ensuring that the cluster can be used by pg_rewind. Note that if
289  * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
290  * need to make sure by themselves that the target cluster is in a clean
291  * state.
292  */
293  if (!no_ensure_shutdown &&
294  ControlFile_target.state != DB_SHUTDOWNED &&
295  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
296  {
297  ensureCleanShutdown(argv[0]);
298 
299  buffer = slurpFile(datadir_target, "global/pg_control", &size);
300  digestControlFile(&ControlFile_target, buffer, size);
301  pg_free(buffer);
302  }
303 
304  buffer = fetchFile("global/pg_control", &size);
305  digestControlFile(&ControlFile_source, buffer, size);
306  pg_free(buffer);
307 
308  sanityChecks();
309 
310  /*
311  * If both clusters are already on the same timeline, there's nothing to
312  * do.
313  */
314  if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID)
315  {
316  pg_log_info("source and target cluster are on the same timeline");
317  rewind_needed = false;
318  }
319  else
320  {
321  findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
322  pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
323  (uint32) (divergerec >> 32), (uint32) divergerec,
324  targetHistory[lastcommontliIndex].tli);
325 
326  /*
327  * Check for the possibility that the target is in fact a direct
328  * ancestor of the source. In that case, there is no divergent history
329  * in the target that needs rewinding.
330  */
331  if (ControlFile_target.checkPoint >= divergerec)
332  {
333  rewind_needed = true;
334  }
335  else
336  {
337  XLogRecPtr chkptendrec;
338 
339  /* Read the checkpoint record on the target to see where it ends. */
340  chkptendrec = readOneRecord(datadir_target,
341  ControlFile_target.checkPoint,
342  targetNentries - 1,
344 
345  /*
346  * If the histories diverged exactly at the end of the shutdown
347  * checkpoint record on the target, there are no WAL records in
348  * the target that don't belong in the source's history, and no
349  * rewind is needed.
350  */
351  if (chkptendrec == divergerec)
352  rewind_needed = false;
353  else
354  rewind_needed = true;
355  }
356  }
357 
358  if (!rewind_needed)
359  {
360  pg_log_info("no rewind required");
361  if (writerecoveryconf && !dry_run)
364  exit(0);
365  }
366 
367  findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
368  &chkptrec, &chkpttli, &chkptredo, restore_command);
369  pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
370  (uint32) (chkptrec >> 32), (uint32) chkptrec,
371  chkpttli);
372 
373  /*
374  * Build the filemap, by comparing the source and target data directories.
375  */
376  filemap_create();
377  if (showprogress)
378  pg_log_info("reading source file list");
380  if (showprogress)
381  pg_log_info("reading target file list");
383 
384  /*
385  * Read the target WAL from last checkpoint before the point of fork, to
386  * extract all the pages that were modified on the target cluster after
387  * the fork. We can stop reading after reaching the final shutdown record.
388  * XXX: If we supported rewinding a server that was not shut down cleanly,
389  * we would need to replay until the end of WAL here.
390  */
391  if (showprogress)
392  pg_log_info("reading WAL in target");
393  extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
394  ControlFile_target.checkPoint, restore_command);
396 
397  if (showprogress)
399 
400  /* this is too verbose even for verbose mode */
401  if (debug)
402  print_filemap();
403 
404  /*
405  * Ok, we're ready to start copying things over.
406  */
407  if (showprogress)
408  {
409  pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
410  (unsigned long) (filemap->fetch_size / (1024 * 1024)),
411  (unsigned long) (filemap->total_size / (1024 * 1024)));
412 
414  fetch_done = 0;
415  }
416 
417  /*
418  * This is the point of no return. Once we start copying things, we have
419  * modified the target directory and there is no turning back!
420  */
421 
422  executeFileMap();
423 
424  progress_report(true);
425  printf("\n");
426 
427  if (showprogress)
428  pg_log_info("creating backup label and updating control file");
429  createBackupLabel(chkptredo, chkpttli, chkptrec);
430 
431  /*
432  * Update control file of target. Make it ready to perform archive
433  * recovery when restarting.
434  *
435  * minRecoveryPoint is set to the current WAL insert location in the
436  * source server. Like in an online backup, it's important that we recover
437  * all the WAL that was generated while we copied the files over.
438  */
439  memcpy(&ControlFile_new, &ControlFile_source, sizeof(ControlFileData));
440 
441  if (connstr_source)
442  {
444  endtli = ControlFile_source.checkPointCopy.ThisTimeLineID;
445  }
446  else
447  {
448  endrec = ControlFile_source.checkPoint;
449  endtli = ControlFile_source.checkPointCopy.ThisTimeLineID;
450  }
451  ControlFile_new.minRecoveryPoint = endrec;
452  ControlFile_new.minRecoveryPointTLI = endtli;
453  ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
454  if (!dry_run)
455  update_controlfile(datadir_target, &ControlFile_new, do_sync);
456 
457  if (showprogress)
458  pg_log_info("syncing target data directory");
460 
461  if (writerecoveryconf && !dry_run)
464 
465  pg_log_info("Done!");
466 
467  return 0;
468 }
469 
470 static void
472 {
473  /* TODO Check that there's no backup_label in either cluster */
474 
475  /* Check system_identifier match */
476  if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
477  pg_fatal("source and target clusters are from different systems");
478 
479  /* check version */
480  if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
481  ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
482  ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
483  ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
484  {
485  pg_fatal("clusters are not compatible with this version of pg_rewind");
486  }
487 
488  /*
489  * Target cluster need to use checksums or hint bit wal-logging, this to
490  * prevent from data corruption that could occur because of hint bits.
491  */
492  if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
493  !ControlFile_target.wal_log_hints)
494  {
495  pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
496  }
497 
498  /*
499  * Target cluster better not be running. This doesn't guard against
500  * someone starting the cluster concurrently. Also, this is probably more
501  * strict than necessary; it's OK if the target node was not shut down
502  * cleanly, as long as it isn't running at the moment.
503  */
504  if (ControlFile_target.state != DB_SHUTDOWNED &&
505  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
506  pg_fatal("target server must be shut down cleanly");
507 
508  /*
509  * When the source is a data directory, also require that the source
510  * server is shut down. There isn't any very strong reason for this
511  * limitation, but better safe than sorry.
512  */
513  if (datadir_source &&
514  ControlFile_source.state != DB_SHUTDOWNED &&
515  ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
516  pg_fatal("source data directory must be shut down cleanly");
517 }
518 
519 /*
520  * Print a progress report based on the fetch_size and fetch_done variables.
521  *
522  * Progress report is written at maximum once per second, unless the
523  * force parameter is set to true.
524  */
525 void
526 progress_report(bool force)
527 {
528  static pg_time_t last_progress_report = 0;
529  int percent;
530  char fetch_done_str[32];
531  char fetch_size_str[32];
532  pg_time_t now;
533 
534  if (!showprogress)
535  return;
536 
537  now = time(NULL);
538  if (now == last_progress_report && !force)
539  return; /* Max once per second */
540 
541  last_progress_report = now;
542  percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
543 
544  /*
545  * Avoid overflowing past 100% or the full size. This may make the total
546  * size number change as we approach the end of the backup (the estimate
547  * will always be wrong if WAL is included), but that's better than having
548  * the done column be bigger than the total.
549  */
550  if (percent > 100)
551  percent = 100;
552  if (fetch_done > fetch_size)
554 
555  /*
556  * Separate step to keep platform-dependent format code out of
557  * translatable strings. And we only test for INT64_FORMAT availability
558  * in snprintf, not fprintf.
559  */
560  snprintf(fetch_done_str, sizeof(fetch_done_str), INT64_FORMAT,
561  fetch_done / 1024);
562  snprintf(fetch_size_str, sizeof(fetch_size_str), INT64_FORMAT,
563  fetch_size / 1024);
564 
565  fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
566  (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
567  percent);
568  if (isatty(fileno(stderr)))
569  fprintf(stderr, "\r");
570  else
571  fprintf(stderr, "\n");
572 }
573 
574 /*
575  * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
576  * infinity as src/include/access/timeline.h states. This routine should
577  * be used only when comparing WAL locations related to history files.
578  */
579 static XLogRecPtr
581 {
582  if (XLogRecPtrIsInvalid(a))
583  return b;
584  else if (XLogRecPtrIsInvalid(b))
585  return a;
586  else
587  return Min(a, b);
588 }
589 
590 /*
591  * Retrieve timeline history for given control file which should behold
592  * either source or target.
593  */
594 static TimeLineHistoryEntry *
595 getTimelineHistory(ControlFileData *controlFile, int *nentries)
596 {
597  TimeLineHistoryEntry *history;
598  TimeLineID tli;
599 
600  tli = controlFile->checkPointCopy.ThisTimeLineID;
601 
602  /*
603  * Timeline 1 does not have a history file, so there is no need to check
604  * and fake an entry with infinite start and end positions.
605  */
606  if (tli == 1)
607  {
608  history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
609  history->tli = tli;
610  history->begin = history->end = InvalidXLogRecPtr;
611  *nentries = 1;
612  }
613  else
614  {
615  char path[MAXPGPATH];
616  char *histfile;
617 
618  TLHistoryFilePath(path, tli);
619 
620  /* Get history file from appropriate source */
621  if (controlFile == &ControlFile_source)
622  histfile = fetchFile(path, NULL);
623  else if (controlFile == &ControlFile_target)
624  histfile = slurpFile(datadir_target, path, NULL);
625  else
626  pg_fatal("invalid control file");
627 
628  history = rewind_parseTimeLineHistory(histfile, tli, nentries);
629  pg_free(histfile);
630  }
631 
632  if (debug)
633  {
634  int i;
635 
636  if (controlFile == &ControlFile_source)
637  pg_log_debug("Source timeline history:");
638  else if (controlFile == &ControlFile_target)
639  pg_log_debug("Target timeline history:");
640  else
641  Assert(false);
642 
643  /*
644  * Print the target timeline history.
645  */
646  for (i = 0; i < targetNentries; i++)
647  {
648  TimeLineHistoryEntry *entry;
649 
650  entry = &history[i];
651  pg_log_debug("%d: %X/%X - %X/%X", entry->tli,
652  (uint32) (entry->begin >> 32), (uint32) (entry->begin),
653  (uint32) (entry->end >> 32), (uint32) (entry->end));
654  }
655  }
656 
657  return history;
658 }
659 
660 /*
661  * Determine the TLI of the last common timeline in the timeline history of the
662  * two clusters. targetHistory is filled with target timeline history and
663  * targetNentries is number of items in targetHistory. *tliIndex is set to the
664  * index of last common timeline in targetHistory array, and *recptr is set to
665  * the position where the timeline history diverged (ie. the first WAL record
666  * that's not the same in both clusters).
667  *
668  * Control files of both clusters must be read into ControlFile_target/source
669  * before calling this routine.
670  */
671 static void
672 findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
673 {
674  TimeLineHistoryEntry *sourceHistory;
675  int sourceNentries;
676  int i,
677  n;
678 
679  /* Retrieve timelines for both source and target */
680  sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
681  targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
682 
683  /*
684  * Trace the history forward, until we hit the timeline diverge. It may
685  * still be possible that the source and target nodes used the same
686  * timeline number in their history but with different start position
687  * depending on the history files that each node has fetched in previous
688  * recovery processes. Hence check the start position of the new timeline
689  * as well and move down by one extra timeline entry if they do not match.
690  */
691  n = Min(sourceNentries, targetNentries);
692  for (i = 0; i < n; i++)
693  {
694  if (sourceHistory[i].tli != targetHistory[i].tli ||
695  sourceHistory[i].begin != targetHistory[i].begin)
696  break;
697  }
698 
699  if (i > 0)
700  {
701  i--;
702  *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
703  *tliIndex = i;
704 
705  pg_free(sourceHistory);
706  return;
707  }
708  else
709  {
710  pg_fatal("could not find common ancestor of the source and target cluster's timelines");
711  }
712 }
713 
714 
715 /*
716  * Create a backup_label file that forces recovery to begin at the last common
717  * checkpoint.
718  */
719 static void
720 createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
721 {
722  XLogSegNo startsegno;
723  time_t stamp_time;
724  char strfbuf[128];
725  char xlogfilename[MAXFNAMELEN];
726  struct tm *tmp;
727  char buf[1000];
728  int len;
729 
730  XLByteToSeg(startpoint, startsegno, WalSegSz);
731  XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
732 
733  /*
734  * Construct backup label file
735  */
736  stamp_time = time(NULL);
737  tmp = localtime(&stamp_time);
738  strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
739 
740  len = snprintf(buf, sizeof(buf),
741  "START WAL LOCATION: %X/%X (file %s)\n"
742  "CHECKPOINT LOCATION: %X/%X\n"
743  "BACKUP METHOD: pg_rewind\n"
744  "BACKUP FROM: standby\n"
745  "START TIME: %s\n",
746  /* omit LABEL: line */
747  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
748  (uint32) (checkpointloc >> 32), (uint32) checkpointloc,
749  strfbuf);
750  if (len >= sizeof(buf))
751  pg_fatal("backup label buffer too small"); /* shouldn't happen */
752 
753  /* TODO: move old file out of the way, if any. */
754  open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
755  write_target_range(buf, 0, len);
757 }
758 
759 /*
760  * Check CRC of control file
761  */
762 static void
764 {
765  pg_crc32c crc;
766 
767  /* Calculate CRC */
768  INIT_CRC32C(crc);
769  COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
770  FIN_CRC32C(crc);
771 
772  /* And simply compare it */
773  if (!EQ_CRC32C(crc, ControlFile->crc))
774  pg_fatal("unexpected control file CRC");
775 }
776 
777 /*
778  * Verify control file contents in the buffer src, and copy it to *ControlFile.
779  */
780 static void
782 {
783  if (size != PG_CONTROL_FILE_SIZE)
784  pg_fatal("unexpected control file size %d, expected %d",
785  (int) size, PG_CONTROL_FILE_SIZE);
786 
787  memcpy(ControlFile, src, sizeof(ControlFileData));
788 
789  /* set and validate WalSegSz */
790  WalSegSz = ControlFile->xlog_seg_size;
791 
793  pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
794  "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
795  WalSegSz),
796  WalSegSz);
797 
798  /* Additional checks on control file */
799  checkControlFile(ControlFile);
800 }
801 
802 /*
803  * Sync target data directory to ensure that modifications are safely on disk.
804  *
805  * We do this once, for the whole data directory, for performance reasons. At
806  * the end of pg_rewind's run, the kernel is likely to already have flushed
807  * most dirty buffers to disk. Additionally fsync_pgdata uses a two-pass
808  * approach (only initiating writeback in the first pass), which often reduces
809  * the overall amount of IO noticeably.
810  */
811 static void
813 {
814  if (!do_sync || dry_run)
815  return;
816 
817  fsync_pgdata(datadir_target, PG_VERSION_NUM);
818 }
819 
820 /*
821  * Get value of GUC parameter restore_command from the target cluster.
822  *
823  * This uses a logic based on "postgres -C" to get the value from the
824  * cluster.
825  */
826 static void
828 {
829  int rc;
830  char postgres_exec_path[MAXPGPATH],
831  postgres_cmd[MAXPGPATH],
832  cmd_output[MAXPGPATH];
833 
834  if (!restore_wal)
835  return;
836 
837  /* find postgres executable */
838  rc = find_other_exec(argv0, "postgres",
840  postgres_exec_path);
841 
842  if (rc < 0)
843  {
844  char full_path[MAXPGPATH];
845 
846  if (find_my_exec(argv0, full_path) < 0)
847  strlcpy(full_path, progname, sizeof(full_path));
848 
849  if (rc == -1)
850  pg_log_error("The program \"%s\" is needed by %s but was not found in the\n"
851  "same directory as \"%s\".\n"
852  "Check your installation.",
853  "postgres", progname, full_path);
854  else
855  pg_log_error("The program \"%s\" was found by \"%s\"\n"
856  "but was not the same version as %s.\n"
857  "Check your installation.",
858  "postgres", full_path, progname);
859  exit(1);
860  }
861 
862  /*
863  * Build a command able to retrieve the value of GUC parameter
864  * restore_command, if set.
865  */
866  snprintf(postgres_cmd, sizeof(postgres_cmd),
867  "\"%s\" -D \"%s\" -C restore_command",
868  postgres_exec_path, datadir_target);
869 
870  if (!pipe_read_line(postgres_cmd, cmd_output, sizeof(cmd_output)))
871  exit(1);
872 
873  (void) pg_strip_crlf(cmd_output);
874 
875  if (strcmp(cmd_output, "") == 0)
876  pg_fatal("restore_command is not set in the target cluster");
877 
878  restore_command = pg_strdup(cmd_output);
879 
880  pg_log_debug("using for rewind restore_command = \'%s\'",
882 }
883 
884 
885 /*
886  * Ensure clean shutdown of target instance by launching single-user mode
887  * postgres to do crash recovery.
888  */
889 static void
891 {
892  int ret;
893 #define MAXCMDLEN (2 * MAXPGPATH)
894  char exec_path[MAXPGPATH];
895  char cmd[MAXCMDLEN];
896 
897  /* locate postgres binary */
898  if ((ret = find_other_exec(argv0, "postgres",
900  exec_path)) < 0)
901  {
902  char full_path[MAXPGPATH];
903 
904  if (find_my_exec(argv0, full_path) < 0)
905  strlcpy(full_path, progname, sizeof(full_path));
906 
907  if (ret == -1)
908  pg_fatal("The program \"%s\" is needed by %s but was not found in the\n"
909  "same directory as \"%s\".\n"
910  "Check your installation.",
911  "postgres", progname, full_path);
912  else
913  pg_fatal("The program \"%s\" was found by \"%s\"\n"
914  "but was not the same version as %s.\n"
915  "Check your installation.",
916  "postgres", full_path, progname);
917  }
918 
919  pg_log_info("executing \"%s\" for target server to complete crash recovery",
920  exec_path);
921 
922  /*
923  * Skip processing if requested, but only after ensuring presence of
924  * postgres.
925  */
926  if (dry_run)
927  return;
928 
929  /*
930  * Finally run postgres in single-user mode. There is no need to use
931  * fsync here. This makes the recovery faster, and the target data folder
932  * is synced at the end anyway.
933  */
934  snprintf(cmd, MAXCMDLEN, "\"%s\" --single -F -D \"%s\" template1 < \"%s\"",
935  exec_path, datadir_target, DEVNULL);
936 
937  if (system(cmd) != 0)
938  {
939  pg_log_error("postgres single-user mode in target cluster failed");
940  pg_fatal("Command was: %s", cmd);
941  }
942 }
943 
944 static void
946 {
947  if (conn != NULL)
948  PQfinish(conn);
949 }
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand)
Definition: parsexlog.c:59
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:97
void calculate_totals(void)
Definition: filemap.c:633
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:323
static void syncTargetDirectory(void)
Definition: pg_rewind.c:812
char * datadir_target
Definition: pg_rewind.c:55
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:42
static char * argv0
Definition: pg_ctl.c:97
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:83
uint32 TimeLineID
Definition: xlogdefs.h:52
int64 pg_time_t
Definition: pgtime.h:23
static bool debug
Definition: pg_rewind.c:60
int pg_strip_crlf(char *str)
Definition: string.c:105
#define MAXCMDLEN
int main(int argc, char **argv)
Definition: pg_rewind.c:102
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:67
bool dry_run
Definition: pg_rewind.c:62
const char * get_progname(const char *argv0)
Definition: path.c:453
void process_target_file(const char *path, file_type_t type, size_t oldsize, const char *link_target)
Definition: filemap.c:348
#define pg_log_error(...)
Definition: logging.h:79
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
void get_restricted_token(void)
uint32 pg_crc32c
Definition: pg_crc32c.h:38
void pg_logging_init(const char *argv0)
Definition: logging.c:81
void WriteRecoveryConfig(PGconn *pgconn, char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:117
static pg_time_t last_progress_report
void progress_report(bool force)
Definition: pg_rewind.c:526
#define Min(x, y)
Definition: c.h:920
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:890
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:580
static bool writerecoveryconf
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand)
Definition: parsexlog.c:154
#define pg_fatal(...)
Definition: pg_rewind.h:41
void PQfinish(PGconn *conn)
Definition: fe-connect.c:4189
#define CATALOG_VERSION_NO
Definition: catversion.h:56
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
uint32 pg_control_version
Definition: pg_control.h:121
#define printf(...)
Definition: port.h:199
TimeLineID tli
Definition: timeline.h:27
CheckPoint checkPointCopy
Definition: pg_control.h:131
void fetchSourceFileList(void)
Definition: fetch.c:28
void filemap_create(void)
Definition: filemap.c:135
#define fprintf
Definition: port.h:197
char * fetchFile(const char *filename, size_t *filesize)
Definition: fetch.c:54
void filemap_finalize(void)
Definition: filemap.c:597
void executeFileMap(void)
Definition: fetch.c:40
uint64 fetch_size
Definition: filemap.h:86
void fsync_pgdata(const char *pg_data, int serverVersion)
Definition: file_utils.c:61
static struct pg_tm tm
Definition: localtime.c:102
bool showprogress
Definition: pg_rewind.c:61
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define required_argument
Definition: getopt_long.h:25
uint32 xlog_seg_size
Definition: pg_control.h:209
int optind
Definition: getopt.c:50
bool restore_wal
Definition: pg_rewind.c:64
uint64 system_identifier
Definition: pg_control.h:106
uint64 fetch_size
Definition: pg_rewind.c:71
PGconn * conn
Definition: streamutil.c:54
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:763
#define pg_log_debug(...)
Definition: logging.h:91
XLogRecPtr libpqGetCurrentXlogInsertLocation(void)
Definition: libpq_fetch.c:147
#define MAXPGPATH
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:128
uint32 data_checksum_version
Definition: pg_control.h:220
char * c
static void usage(const char *progname)
Definition: pg_rewind.c:76
static char * buf
Definition: pg_test_fsync.c:67
uint64 XLogSegNo
Definition: xlogdefs.h:41
filemap_t * filemap
Definition: filemap.c:23
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
uint64 total_size
Definition: filemap.h:85
unsigned int uint32
Definition: c.h:367
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define DEVNULL
Definition: port.h:124
void print_filemap(void)
Definition: filemap.c:675
static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:672
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: copy_fetch.c:33
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, char *replication_slot)
Definition: recovery_gen.c:23
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
char * connstr_source
Definition: pg_rewind.c:57
#define MAXFNAMELEN
int WalSegSz
Definition: pg_rewind.c:52
#define no_argument
Definition: getopt_long.h:24
#define ngettext(s, p, n)
Definition: c.h:1145
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1177
static void disconnect_atexit(void)
Definition: pg_rewind.c:945
int targetNentries
Definition: pg_rewind.c:68
char * datadir_source
Definition: pg_rewind.c:56
static ControlFileData * ControlFile
Definition: xlog.c:738
#define PG_BACKEND_VERSIONSTR
Definition: port.h:112
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
bool do_sync
Definition: pg_rewind.c:63
XLogRecPtr end
Definition: timeline.h:29
char * restore_command
Definition: pg_rewind.c:58
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:738
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static char * exec_path
Definition: pg_ctl.c:92
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand)
Definition: parsexlog.c:110
uint32 catalog_version_no
Definition: pg_control.h:122
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:248
static TimeLineHistoryEntry * getTimelineHistory(ControlFileData *controlFile, int *nentries)
Definition: pg_rewind.c:595
#define INT64_FORMAT
Definition: c.h:409
static void getRestoreCommand(const char *argv0)
Definition: pg_rewind.c:827
static void sanityChecks(void)
Definition: pg_rewind.c:471
void close_target_file(void)
Definition: file_ops.c:70
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:200
bool GetDataDirectoryCreatePerm(const char *dataDir)
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:434
XLogRecPtr begin
Definition: timeline.h:28
char * optarg
Definition: getopt.c:52
void pg_logging_set_level(enum pg_log_level new_level)
Definition: logging.c:161
int i
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:29
static ControlFileData ControlFile_target
Definition: pg_rewind.c:48
char * pipe_read_line(char *cmd, char *line, int maxsize)
Definition: exec.c:359
uint64 fetch_done
Definition: pg_rewind.c:72
static ControlFileData ControlFile_source
Definition: pg_rewind.c:49
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:720
static void digestControlFile(ControlFileData *ControlFile, char *source, size_t size)
Definition: pg_rewind.c:781
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define snprintf
Definition: port.h:193
#define _(x)
Definition: elog.c:88
int pg_mode_mask
Definition: file_perm.c:25
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1538
XLogRecPtr checkPoint
Definition: pg_control.h:129
#define offsetof(type, field)
Definition: c.h:661
#define pg_log_info(...)
Definition: logging.h:87
const char * progname
Definition: pg_rewind.c:51
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
void libpqConnect(const char *connstr)
Definition: libpq_fetch.c:43
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:284
#define TLHistoryFilePath(path, tli)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)