PostgreSQL Source Code  git master
pg_rewind.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_rewind.c
4  * Synchronizes a PostgreSQL data directory to a new timeline
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <time.h>
15 #include <unistd.h>
16 
17 #include "pg_rewind.h"
18 #include "fetch.h"
19 #include "file_ops.h"
20 #include "filemap.h"
21 
22 #include "access/timeline.h"
23 #include "access/xlog_internal.h"
24 #include "catalog/catversion.h"
25 #include "catalog/pg_control.h"
27 #include "common/file_perm.h"
28 #include "common/file_utils.h"
30 #include "fe_utils/recovery_gen.h"
31 #include "getopt_long.h"
32 #include "storage/bufpage.h"
33 
34 static void usage(const char *progname);
35 
36 static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
37  XLogRecPtr checkpointloc);
38 
39 static void digestControlFile(ControlFileData *ControlFile, char *source,
40  size_t size);
41 static void syncTargetDirectory(void);
42 static void sanityChecks(void);
43 static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
44 static void ensureCleanShutdown(const char *argv0);
45 static void disconnect_atexit(void);
46 
49 
50 const char *progname;
52 
53 /* Configuration options */
54 char *datadir_target = NULL;
55 char *datadir_source = NULL;
56 char *connstr_source = NULL;
57 
58 static bool debug = false;
59 bool showprogress = false;
60 bool dry_run = false;
61 bool do_sync = true;
62 
63 /* Target history */
66 
67 /* Progress counters */
68 uint64 fetch_size;
69 uint64 fetch_done;
70 
71 
72 static void
73 usage(const char *progname)
74 {
75  printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
76  printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
77  printf(_("Options:\n"));
78  printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
79  printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
80  printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
81  printf(_(" -R, --write-recovery-conf write configuration for replication\n"
82  " (requires --source-server)\n"));
83  printf(_(" -n, --dry-run stop before modifying anything\n"));
84  printf(_(" -N, --no-sync do not wait for changes to be written\n"
85  " safely to disk\n"));
86  printf(_(" -P, --progress write progress messages\n"));
87  printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
88  printf(_(" --debug write a lot of debug messages\n"));
89  printf(_(" -V, --version output version information, then exit\n"));
90  printf(_(" -?, --help show this help, then exit\n"));
91  printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
92 }
93 
94 
95 int
96 main(int argc, char **argv)
97 {
98  static struct option long_options[] = {
99  {"help", no_argument, NULL, '?'},
100  {"target-pgdata", required_argument, NULL, 'D'},
101  {"write-recovery-conf", no_argument, NULL, 'R'},
102  {"source-pgdata", required_argument, NULL, 1},
103  {"source-server", required_argument, NULL, 2},
104  {"no-ensure-shutdown", no_argument, NULL, 4},
105  {"version", no_argument, NULL, 'V'},
106  {"dry-run", no_argument, NULL, 'n'},
107  {"no-sync", no_argument, NULL, 'N'},
108  {"progress", no_argument, NULL, 'P'},
109  {"debug", no_argument, NULL, 3},
110  {NULL, 0, NULL, 0}
111  };
112  int option_index;
113  int c;
114  XLogRecPtr divergerec;
115  int lastcommontliIndex;
116  XLogRecPtr chkptrec;
117  TimeLineID chkpttli;
118  XLogRecPtr chkptredo;
119  size_t size;
120  char *buffer;
121  bool no_ensure_shutdown = false;
122  bool rewind_needed;
123  XLogRecPtr endrec;
124  TimeLineID endtli;
125  ControlFileData ControlFile_new;
126  bool writerecoveryconf = false;
127 
128  pg_logging_init(argv[0]);
129  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
130  progname = get_progname(argv[0]);
131 
132  /* Process command-line arguments */
133  if (argc > 1)
134  {
135  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
136  {
137  usage(progname);
138  exit(0);
139  }
140  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
141  {
142  puts("pg_rewind (PostgreSQL) " PG_VERSION);
143  exit(0);
144  }
145  }
146 
147  while ((c = getopt_long(argc, argv, "D:nNPR", long_options, &option_index)) != -1)
148  {
149  switch (c)
150  {
151  case '?':
152  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
153  exit(1);
154 
155  case 'P':
156  showprogress = true;
157  break;
158 
159  case 'n':
160  dry_run = true;
161  break;
162 
163  case 'N':
164  do_sync = false;
165  break;
166 
167  case 'R':
168  writerecoveryconf = true;
169  break;
170 
171  case 3:
172  debug = true;
174  break;
175 
176  case 'D': /* -D or --target-pgdata */
178  break;
179 
180  case 1: /* --source-pgdata */
182  break;
183 
184  case 2: /* --source-server */
186  break;
187 
188  case 4:
189  no_ensure_shutdown = true;
190  break;
191  }
192  }
193 
194  if (datadir_source == NULL && connstr_source == NULL)
195  {
196  pg_log_error("no source specified (--source-pgdata or --source-server)");
197  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
198  exit(1);
199  }
200 
201  if (datadir_source != NULL && connstr_source != NULL)
202  {
203  pg_log_error("only one of --source-pgdata or --source-server can be specified");
204  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
205  exit(1);
206  }
207 
208  if (datadir_target == NULL)
209  {
210  pg_log_error("no target data directory specified (--target-pgdata)");
211  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
212  exit(1);
213  }
214 
215  if (writerecoveryconf && connstr_source == NULL)
216  {
217  pg_log_error("no source server information (--source--server) specified for --write-recovery-conf");
218  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
219  exit(1);
220  }
221 
222  if (optind < argc)
223  {
224  pg_log_error("too many command-line arguments (first is \"%s\")",
225  argv[optind]);
226  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
227  exit(1);
228  }
229 
230  /*
231  * Don't allow pg_rewind to be run as root, to avoid overwriting the
232  * ownership of files in the data directory. We need only check for root
233  * -- any other user won't have sufficient permissions to modify files in
234  * the data directory.
235  */
236 #ifndef WIN32
237  if (geteuid() == 0)
238  {
239  pg_log_error("cannot be executed by \"root\"");
240  fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
241  progname);
242  exit(1);
243  }
244 #endif
245 
247 
248  /* Set mask based on PGDATA permissions */
250  {
251  pg_log_error("could not read permissions of directory \"%s\": %m",
253  exit(1);
254  }
255 
256  umask(pg_mode_mask);
257 
258  atexit(disconnect_atexit);
259 
260  /* Connect to remote server */
261  if (connstr_source)
263 
264  /*
265  * Ok, we have all the options and we're ready to start. Read in all the
266  * information we need from both clusters.
267  */
268  buffer = slurpFile(datadir_target, "global/pg_control", &size);
269  digestControlFile(&ControlFile_target, buffer, size);
270  pg_free(buffer);
271 
272  /*
273  * If the target instance was not cleanly shut down, start and stop the
274  * target cluster once in single-user mode to enforce recovery to finish,
275  * ensuring that the cluster can be used by pg_rewind. Note that if
276  * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
277  * need to make sure by themselves that the target cluster is in a clean
278  * state.
279  */
280  if (!no_ensure_shutdown &&
281  ControlFile_target.state != DB_SHUTDOWNED &&
282  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
283  {
284  ensureCleanShutdown(argv[0]);
285 
286  buffer = slurpFile(datadir_target, "global/pg_control", &size);
287  digestControlFile(&ControlFile_target, buffer, size);
288  pg_free(buffer);
289  }
290 
291  buffer = fetchFile("global/pg_control", &size);
292  digestControlFile(&ControlFile_source, buffer, size);
293  pg_free(buffer);
294 
295  sanityChecks();
296 
297  /*
298  * If both clusters are already on the same timeline, there's nothing to
299  * do.
300  */
301  if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID)
302  {
303  pg_log_info("source and target cluster are on the same timeline");
304  rewind_needed = false;
305  }
306  else
307  {
308  findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
309  pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
310  (uint32) (divergerec >> 32), (uint32) divergerec,
311  targetHistory[lastcommontliIndex].tli);
312 
313  /*
314  * Check for the possibility that the target is in fact a direct
315  * ancestor of the source. In that case, there is no divergent history
316  * in the target that needs rewinding.
317  */
318  if (ControlFile_target.checkPoint >= divergerec)
319  {
320  rewind_needed = true;
321  }
322  else
323  {
324  XLogRecPtr chkptendrec;
325 
326  /* Read the checkpoint record on the target to see where it ends. */
327  chkptendrec = readOneRecord(datadir_target,
328  ControlFile_target.checkPoint,
329  targetNentries - 1);
330 
331  /*
332  * If the histories diverged exactly at the end of the shutdown
333  * checkpoint record on the target, there are no WAL records in
334  * the target that don't belong in the source's history, and no
335  * rewind is needed.
336  */
337  if (chkptendrec == divergerec)
338  rewind_needed = false;
339  else
340  rewind_needed = true;
341  }
342  }
343 
344  if (!rewind_needed)
345  {
346  pg_log_info("no rewind required");
347  if (writerecoveryconf && !dry_run)
350  exit(0);
351  }
352 
354  lastcommontliIndex,
355  &chkptrec, &chkpttli, &chkptredo);
356  pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
357  (uint32) (chkptrec >> 32), (uint32) chkptrec,
358  chkpttli);
359 
360  /*
361  * Build the filemap, by comparing the source and target data directories.
362  */
363  filemap_create();
364  if (showprogress)
365  pg_log_info("reading source file list");
367  if (showprogress)
368  pg_log_info("reading target file list");
370 
371  /*
372  * Read the target WAL from last checkpoint before the point of fork, to
373  * extract all the pages that were modified on the target cluster after
374  * the fork. We can stop reading after reaching the final shutdown record.
375  * XXX: If we supported rewinding a server that was not shut down cleanly,
376  * we would need to replay until the end of WAL here.
377  */
378  if (showprogress)
379  pg_log_info("reading WAL in target");
380  extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
381  ControlFile_target.checkPoint);
383 
384  if (showprogress)
386 
387  /* this is too verbose even for verbose mode */
388  if (debug)
389  print_filemap();
390 
391  /*
392  * Ok, we're ready to start copying things over.
393  */
394  if (showprogress)
395  {
396  pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
397  (unsigned long) (filemap->fetch_size / (1024 * 1024)),
398  (unsigned long) (filemap->total_size / (1024 * 1024)));
399 
401  fetch_done = 0;
402  }
403 
404  /*
405  * This is the point of no return. Once we start copying things, we have
406  * modified the target directory and there is no turning back!
407  */
408 
409  executeFileMap();
410 
411  progress_report(true);
412  printf("\n");
413 
414  if (showprogress)
415  pg_log_info("creating backup label and updating control file");
416  createBackupLabel(chkptredo, chkpttli, chkptrec);
417 
418  /*
419  * Update control file of target. Make it ready to perform archive
420  * recovery when restarting.
421  *
422  * minRecoveryPoint is set to the current WAL insert location in the
423  * source server. Like in an online backup, it's important that we recover
424  * all the WAL that was generated while we copied the files over.
425  */
426  memcpy(&ControlFile_new, &ControlFile_source, sizeof(ControlFileData));
427 
428  if (connstr_source)
429  {
431  endtli = ControlFile_source.checkPointCopy.ThisTimeLineID;
432  }
433  else
434  {
435  endrec = ControlFile_source.checkPoint;
436  endtli = ControlFile_source.checkPointCopy.ThisTimeLineID;
437  }
438  ControlFile_new.minRecoveryPoint = endrec;
439  ControlFile_new.minRecoveryPointTLI = endtli;
440  ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
441  if (!dry_run)
442  update_controlfile(datadir_target, &ControlFile_new, do_sync);
443 
444  if (showprogress)
445  pg_log_info("syncing target data directory");
447 
448  if (writerecoveryconf && !dry_run)
451 
452  pg_log_info("Done!");
453 
454  return 0;
455 }
456 
457 static void
459 {
460  /* TODO Check that there's no backup_label in either cluster */
461 
462  /* Check system_identifier match */
463  if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
464  pg_fatal("source and target clusters are from different systems");
465 
466  /* check version */
467  if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
468  ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
469  ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
470  ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
471  {
472  pg_fatal("clusters are not compatible with this version of pg_rewind");
473  }
474 
475  /*
476  * Target cluster need to use checksums or hint bit wal-logging, this to
477  * prevent from data corruption that could occur because of hint bits.
478  */
479  if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
480  !ControlFile_target.wal_log_hints)
481  {
482  pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
483  }
484 
485  /*
486  * Target cluster better not be running. This doesn't guard against
487  * someone starting the cluster concurrently. Also, this is probably more
488  * strict than necessary; it's OK if the target node was not shut down
489  * cleanly, as long as it isn't running at the moment.
490  */
491  if (ControlFile_target.state != DB_SHUTDOWNED &&
492  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
493  pg_fatal("target server must be shut down cleanly");
494 
495  /*
496  * When the source is a data directory, also require that the source
497  * server is shut down. There isn't any very strong reason for this
498  * limitation, but better safe than sorry.
499  */
500  if (datadir_source &&
501  ControlFile_source.state != DB_SHUTDOWNED &&
502  ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
503  pg_fatal("source data directory must be shut down cleanly");
504 }
505 
506 /*
507  * Print a progress report based on the fetch_size and fetch_done variables.
508  *
509  * Progress report is written at maximum once per second, unless the
510  * force parameter is set to true.
511  */
512 void
513 progress_report(bool force)
514 {
515  static pg_time_t last_progress_report = 0;
516  int percent;
517  char fetch_done_str[32];
518  char fetch_size_str[32];
519  pg_time_t now;
520 
521  if (!showprogress)
522  return;
523 
524  now = time(NULL);
525  if (now == last_progress_report && !force)
526  return; /* Max once per second */
527 
528  last_progress_report = now;
529  percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
530 
531  /*
532  * Avoid overflowing past 100% or the full size. This may make the total
533  * size number change as we approach the end of the backup (the estimate
534  * will always be wrong if WAL is included), but that's better than having
535  * the done column be bigger than the total.
536  */
537  if (percent > 100)
538  percent = 100;
539  if (fetch_done > fetch_size)
541 
542  /*
543  * Separate step to keep platform-dependent format code out of
544  * translatable strings. And we only test for INT64_FORMAT availability
545  * in snprintf, not fprintf.
546  */
547  snprintf(fetch_done_str, sizeof(fetch_done_str), INT64_FORMAT,
548  fetch_done / 1024);
549  snprintf(fetch_size_str, sizeof(fetch_size_str), INT64_FORMAT,
550  fetch_size / 1024);
551 
552  fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
553  (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
554  percent);
555  if (isatty(fileno(stderr)))
556  fprintf(stderr, "\r");
557  else
558  fprintf(stderr, "\n");
559 }
560 
561 /*
562  * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
563  * infinity as src/include/access/timeline.h states. This routine should
564  * be used only when comparing WAL locations related to history files.
565  */
566 static XLogRecPtr
568 {
569  if (XLogRecPtrIsInvalid(a))
570  return b;
571  else if (XLogRecPtrIsInvalid(b))
572  return a;
573  else
574  return Min(a, b);
575 }
576 
577 /*
578  * Retrieve timeline history for given control file which should behold
579  * either source or target.
580  */
581 static TimeLineHistoryEntry *
582 getTimelineHistory(ControlFileData *controlFile, int *nentries)
583 {
584  TimeLineHistoryEntry *history;
585  TimeLineID tli;
586 
587  tli = controlFile->checkPointCopy.ThisTimeLineID;
588 
589  /*
590  * Timeline 1 does not have a history file, so there is no need to check
591  * and fake an entry with infinite start and end positions.
592  */
593  if (tli == 1)
594  {
595  history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
596  history->tli = tli;
597  history->begin = history->end = InvalidXLogRecPtr;
598  *nentries = 1;
599  }
600  else
601  {
602  char path[MAXPGPATH];
603  char *histfile;
604 
605  TLHistoryFilePath(path, tli);
606 
607  /* Get history file from appropriate source */
608  if (controlFile == &ControlFile_source)
609  histfile = fetchFile(path, NULL);
610  else if (controlFile == &ControlFile_target)
611  histfile = slurpFile(datadir_target, path, NULL);
612  else
613  pg_fatal("invalid control file");
614 
615  history = rewind_parseTimeLineHistory(histfile, tli, nentries);
616  pg_free(histfile);
617  }
618 
619  if (debug)
620  {
621  int i;
622 
623  if (controlFile == &ControlFile_source)
624  pg_log_debug("Source timeline history:");
625  else if (controlFile == &ControlFile_target)
626  pg_log_debug("Target timeline history:");
627  else
628  Assert(false);
629 
630  /*
631  * Print the target timeline history.
632  */
633  for (i = 0; i < targetNentries; i++)
634  {
635  TimeLineHistoryEntry *entry;
636 
637  entry = &history[i];
638  pg_log_debug("%d: %X/%X - %X/%X", entry->tli,
639  (uint32) (entry->begin >> 32), (uint32) (entry->begin),
640  (uint32) (entry->end >> 32), (uint32) (entry->end));
641  }
642  }
643 
644  return history;
645 }
646 
647 /*
648  * Determine the TLI of the last common timeline in the timeline history of the
649  * two clusters. targetHistory is filled with target timeline history and
650  * targetNentries is number of items in targetHistory. *tliIndex is set to the
651  * index of last common timeline in targetHistory array, and *recptr is set to
652  * the position where the timeline history diverged (ie. the first WAL record
653  * that's not the same in both clusters).
654  *
655  * Control files of both clusters must be read into ControlFile_target/source
656  * before calling this routine.
657  */
658 static void
659 findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
660 {
661  TimeLineHistoryEntry *sourceHistory;
662  int sourceNentries;
663  int i,
664  n;
665 
666  /* Retrieve timelines for both source and target */
667  sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
668  targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
669 
670  /*
671  * Trace the history forward, until we hit the timeline diverge. It may
672  * still be possible that the source and target nodes used the same
673  * timeline number in their history but with different start position
674  * depending on the history files that each node has fetched in previous
675  * recovery processes. Hence check the start position of the new timeline
676  * as well and move down by one extra timeline entry if they do not match.
677  */
678  n = Min(sourceNentries, targetNentries);
679  for (i = 0; i < n; i++)
680  {
681  if (sourceHistory[i].tli != targetHistory[i].tli ||
682  sourceHistory[i].begin != targetHistory[i].begin)
683  break;
684  }
685 
686  if (i > 0)
687  {
688  i--;
689  *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
690  *tliIndex = i;
691 
692  pg_free(sourceHistory);
693  return;
694  }
695  else
696  {
697  pg_fatal("could not find common ancestor of the source and target cluster's timelines");
698  }
699 }
700 
701 
702 /*
703  * Create a backup_label file that forces recovery to begin at the last common
704  * checkpoint.
705  */
706 static void
707 createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
708 {
709  XLogSegNo startsegno;
710  time_t stamp_time;
711  char strfbuf[128];
712  char xlogfilename[MAXFNAMELEN];
713  struct tm *tmp;
714  char buf[1000];
715  int len;
716 
717  XLByteToSeg(startpoint, startsegno, WalSegSz);
718  XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
719 
720  /*
721  * Construct backup label file
722  */
723  stamp_time = time(NULL);
724  tmp = localtime(&stamp_time);
725  strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
726 
727  len = snprintf(buf, sizeof(buf),
728  "START WAL LOCATION: %X/%X (file %s)\n"
729  "CHECKPOINT LOCATION: %X/%X\n"
730  "BACKUP METHOD: pg_rewind\n"
731  "BACKUP FROM: standby\n"
732  "START TIME: %s\n",
733  /* omit LABEL: line */
734  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
735  (uint32) (checkpointloc >> 32), (uint32) checkpointloc,
736  strfbuf);
737  if (len >= sizeof(buf))
738  pg_fatal("backup label buffer too small"); /* shouldn't happen */
739 
740  /* TODO: move old file out of the way, if any. */
741  open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
742  write_target_range(buf, 0, len);
744 }
745 
746 /*
747  * Check CRC of control file
748  */
749 static void
751 {
752  pg_crc32c crc;
753 
754  /* Calculate CRC */
755  INIT_CRC32C(crc);
756  COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
757  FIN_CRC32C(crc);
758 
759  /* And simply compare it */
760  if (!EQ_CRC32C(crc, ControlFile->crc))
761  pg_fatal("unexpected control file CRC");
762 }
763 
764 /*
765  * Verify control file contents in the buffer src, and copy it to *ControlFile.
766  */
767 static void
769 {
770  if (size != PG_CONTROL_FILE_SIZE)
771  pg_fatal("unexpected control file size %d, expected %d",
772  (int) size, PG_CONTROL_FILE_SIZE);
773 
774  memcpy(ControlFile, src, sizeof(ControlFileData));
775 
776  /* set and validate WalSegSz */
777  WalSegSz = ControlFile->xlog_seg_size;
778 
780  pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
781  "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
782  WalSegSz),
783  WalSegSz);
784 
785  /* Additional checks on control file */
786  checkControlFile(ControlFile);
787 }
788 
789 /*
790  * Sync target data directory to ensure that modifications are safely on disk.
791  *
792  * We do this once, for the whole data directory, for performance reasons. At
793  * the end of pg_rewind's run, the kernel is likely to already have flushed
794  * most dirty buffers to disk. Additionally fsync_pgdata uses a two-pass
795  * approach (only initiating writeback in the first pass), which often reduces
796  * the overall amount of IO noticeably.
797  */
798 static void
800 {
801  if (!do_sync || dry_run)
802  return;
803 
804  fsync_pgdata(datadir_target, PG_VERSION_NUM);
805 }
806 
807 /*
808  * Ensure clean shutdown of target instance by launching single-user mode
809  * postgres to do crash recovery.
810  */
811 static void
813 {
814  int ret;
815 #define MAXCMDLEN (2 * MAXPGPATH)
816  char exec_path[MAXPGPATH];
817  char cmd[MAXCMDLEN];
818 
819  /* locate postgres binary */
820  if ((ret = find_other_exec(argv0, "postgres",
822  exec_path)) < 0)
823  {
824  char full_path[MAXPGPATH];
825 
826  if (find_my_exec(argv0, full_path) < 0)
827  strlcpy(full_path, progname, sizeof(full_path));
828 
829  if (ret == -1)
830  pg_fatal("The program \"%s\" is needed by %s but was\n"
831  "not found in the same directory as \"%s\".\n"
832  "Check your installation.",
833  "postgres", progname, full_path);
834  else
835  pg_fatal("The program \"%s\" was found by \"%s\" but was\n"
836  "not the same version as %s.\n"
837  "Check your installation.",
838  "postgres", full_path, progname);
839  }
840 
841  pg_log_info("executing \"%s\" for target server to complete crash recovery",
842  exec_path);
843 
844  /*
845  * Skip processing if requested, but only after ensuring presence of
846  * postgres.
847  */
848  if (dry_run)
849  return;
850 
851  /*
852  * Finally run postgres in single-user mode. There is no need to use
853  * fsync here. This makes the recovery faster, and the target data folder
854  * is synced at the end anyway.
855  */
856  snprintf(cmd, MAXCMDLEN, "\"%s\" --single -F -D \"%s\" template1 < \"%s\"",
857  exec_path, datadir_target, DEVNULL);
858 
859  if (system(cmd) != 0)
860  {
861  pg_log_error("postgres single-user mode of target instance failed");
862  pg_fatal("Command was: %s", cmd);
863  }
864 }
865 
866 static void
868 {
869  if (conn != NULL)
870  PQfinish(conn);
871 }
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:97
void calculate_totals(void)
Definition: filemap.c:607
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:324
static void syncTargetDirectory(void)
Definition: pg_rewind.c:799
char * datadir_target
Definition: pg_rewind.c:54
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:42
static char * argv0
Definition: pg_ctl.c:97
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:83
uint32 TimeLineID
Definition: xlogdefs.h:52
int64 pg_time_t
Definition: pgtime.h:23
static bool debug
Definition: pg_rewind.c:58
#define MAXCMDLEN
int main(int argc, char **argv)
Definition: pg_rewind.c:96
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:64
bool dry_run
Definition: pg_rewind.c:60
const char * get_progname(const char *argv0)
Definition: path.c:453
void process_target_file(const char *path, file_type_t type, size_t oldsize, const char *link_target)
Definition: filemap.c:327
#define pg_log_error(...)
Definition: logging.h:79
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
void get_restricted_token(void)
uint32 pg_crc32c
Definition: pg_crc32c.h:38
void pg_logging_init(const char *argv0)
Definition: logging.c:39
void WriteRecoveryConfig(PGconn *pgconn, char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:118
static pg_time_t last_progress_report
Definition: pg_basebackup.c:97
void progress_report(bool force)
Definition: pg_rewind.c:513
#define Min(x, y)
Definition: c.h:904
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:812
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint)
Definition: parsexlog.c:59
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:567
static bool writerecoveryconf
Definition: pg_basebackup.c:94
#define pg_fatal(...)
Definition: pg_rewind.h:43
void PQfinish(PGconn *conn)
Definition: fe-connect.c:4098
#define CATALOG_VERSION_NO
Definition: catversion.h:56
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
uint32 pg_control_version
Definition: pg_control.h:121
#define printf(...)
Definition: port.h:198
TimeLineID tli
Definition: timeline.h:27
CheckPoint checkPointCopy
Definition: pg_control.h:131
void fetchSourceFileList(void)
Definition: fetch.c:28
void filemap_create(void)
Definition: filemap.c:114
#define fprintf
Definition: port.h:196
char * fetchFile(const char *filename, size_t *filesize)
Definition: fetch.c:54
void filemap_finalize(void)
Definition: filemap.c:571
void executeFileMap(void)
Definition: fetch.c:40
uint64 fetch_size
Definition: filemap.h:87
void fsync_pgdata(const char *pg_data, int serverVersion)
Definition: file_utils.c:58
static struct pg_tm tm
Definition: localtime.c:108
bool showprogress
Definition: pg_rewind.c:59
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define required_argument
Definition: getopt_long.h:25
uint32 xlog_seg_size
Definition: pg_control.h:209
int optind
Definition: getopt.c:50
uint64 system_identifier
Definition: pg_control.h:106
uint64 fetch_size
Definition: pg_rewind.c:68
PGconn * conn
Definition: streamutil.c:56
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:750
#define pg_log_debug(...)
Definition: logging.h:91
XLogRecPtr libpqGetCurrentXlogInsertLocation(void)
Definition: libpq_fetch.c:148
#define MAXPGPATH
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:129
uint32 data_checksum_version
Definition: pg_control.h:222
char * c
static void usage(const char *progname)
Definition: pg_rewind.c:73
static char * buf
Definition: pg_test_fsync.c:68
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex)
Definition: parsexlog.c:111
uint64 XLogSegNo
Definition: xlogdefs.h:41
filemap_t * filemap
Definition: filemap.c:24
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
uint64 total_size
Definition: filemap.h:86
unsigned int uint32
Definition: c.h:358
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define DEVNULL
Definition: port.h:123
void print_filemap(void)
Definition: filemap.c:649
static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:659
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: copy_fetch.c:33
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, char *replication_slot)
Definition: recovery_gen.c:24
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
char * connstr_source
Definition: pg_rewind.c:56
#define MAXFNAMELEN
int WalSegSz
Definition: pg_rewind.c:51
#define no_argument
Definition: getopt_long.h:24
#define ngettext(s, p, n)
Definition: c.h:1103
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1135
static void disconnect_atexit(void)
Definition: pg_rewind.c:867
int targetNentries
Definition: pg_rewind.c:65
char * datadir_source
Definition: pg_rewind.c:55
static ControlFileData * ControlFile
Definition: xlog.c:721
#define PG_BACKEND_VERSIONSTR
Definition: port.h:111
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
bool do_sync
Definition: pg_rewind.c:61
XLogRecPtr end
Definition: timeline.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:732
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static char * exec_path
Definition: pg_ctl.c:92
uint32 catalog_version_no
Definition: pg_control.h:122
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250
static TimeLineHistoryEntry * getTimelineHistory(ControlFileData *controlFile, int *nentries)
Definition: pg_rewind.c:582
#define INT64_FORMAT
Definition: c.h:400
static void sanityChecks(void)
Definition: pg_rewind.c:458
void close_target_file(void)
Definition: file_ops.c:70
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:200
bool GetDataDirectoryCreatePerm(const char *dataDir)
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:565
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo)
Definition: parsexlog.c:151
XLogRecPtr begin
Definition: timeline.h:28
char * optarg
Definition: getopt.c:52
void pg_logging_set_level(enum pg_log_level new_level)
Definition: logging.c:108
int i
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:30
static ControlFileData ControlFile_target
Definition: pg_rewind.c:47
uint64 fetch_done
Definition: pg_rewind.c:69
static ControlFileData ControlFile_source
Definition: pg_rewind.c:48
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:707
static void digestControlFile(ControlFileData *ControlFile, char *source, size_t size)
Definition: pg_rewind.c:768
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define snprintf
Definition: port.h:192
#define _(x)
Definition: elog.c:84
int pg_mode_mask
Definition: file_perm.c:25
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1547
XLogRecPtr checkPoint
Definition: pg_control.h:129
#define offsetof(type, field)
Definition: c.h:655
#define pg_log_info(...)
Definition: logging.h:87
const char * progname
Definition: pg_rewind.c:50
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
void libpqConnect(const char *connstr)
Definition: libpq_fetch.c:44
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:284
#define TLHistoryFilePath(path, tli)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)