PostgreSQL Source Code  git master
pg_rewind.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_rewind.c
4  * Synchronizes a PostgreSQL data directory to a new timeline
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *-------------------------------------------------------------------------
9  */
10 #include "postgres_fe.h"
11 
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <time.h>
15 #include <unistd.h>
16 
17 #include "access/timeline.h"
18 #include "access/xlog_internal.h"
19 #include "catalog/catversion.h"
20 #include "catalog/pg_control.h"
22 #include "common/file_perm.h"
24 #include "common/string.h"
25 #include "fe_utils/recovery_gen.h"
26 #include "file_ops.h"
27 #include "filemap.h"
28 #include "getopt_long.h"
29 #include "pg_rewind.h"
30 #include "rewind_source.h"
31 #include "storage/bufpage.h"
32 
33 static void usage(const char *progname);
34 
35 static void perform_rewind(filemap_t *filemap, rewind_source *source,
36  XLogRecPtr chkptrec,
37  TimeLineID chkpttli,
38  XLogRecPtr chkptredo);
39 
40 static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
41  XLogRecPtr checkpointloc);
42 
44  const char *content, size_t size);
45 static void getRestoreCommand(const char *argv0);
46 static void sanityChecks(void);
47 static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
48 static void ensureCleanShutdown(const char *argv0);
49 static void disconnect_atexit(void);
50 
54 
55 const char *progname;
57 
58 /* Configuration options */
59 char *datadir_target = NULL;
60 char *datadir_source = NULL;
61 char *connstr_source = NULL;
62 char *restore_command = NULL;
63 
64 static bool debug = false;
65 bool showprogress = false;
66 bool dry_run = false;
67 bool do_sync = true;
68 bool restore_wal = false;
69 
70 /* Target history */
73 
74 /* Progress counters */
75 uint64 fetch_size;
76 uint64 fetch_done;
77 
78 static PGconn *conn;
80 
81 static void
82 usage(const char *progname)
83 {
84  printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
85  printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
86  printf(_("Options:\n"));
87  printf(_(" -c, --restore-target-wal use restore_command in target configuration to\n"
88  " retrieve WAL files from archives\n"));
89  printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
90  printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
91  printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
92  printf(_(" -n, --dry-run stop before modifying anything\n"));
93  printf(_(" -N, --no-sync do not wait for changes to be written\n"
94  " safely to disk\n"));
95  printf(_(" -P, --progress write progress messages\n"));
96  printf(_(" -R, --write-recovery-conf write configuration for replication\n"
97  " (requires --source-server)\n"));
98  printf(_(" --debug write a lot of debug messages\n"));
99  printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
100  printf(_(" -V, --version output version information, then exit\n"));
101  printf(_(" -?, --help show this help, then exit\n"));
102  printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
103  printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
104 }
105 
106 
107 int
108 main(int argc, char **argv)
109 {
110  static struct option long_options[] = {
111  {"help", no_argument, NULL, '?'},
112  {"target-pgdata", required_argument, NULL, 'D'},
113  {"write-recovery-conf", no_argument, NULL, 'R'},
114  {"source-pgdata", required_argument, NULL, 1},
115  {"source-server", required_argument, NULL, 2},
116  {"no-ensure-shutdown", no_argument, NULL, 4},
117  {"version", no_argument, NULL, 'V'},
118  {"restore-target-wal", no_argument, NULL, 'c'},
119  {"dry-run", no_argument, NULL, 'n'},
120  {"no-sync", no_argument, NULL, 'N'},
121  {"progress", no_argument, NULL, 'P'},
122  {"debug", no_argument, NULL, 3},
123  {NULL, 0, NULL, 0}
124  };
125  int option_index;
126  int c;
127  XLogRecPtr divergerec;
128  int lastcommontliIndex;
129  XLogRecPtr chkptrec;
130  TimeLineID chkpttli;
131  XLogRecPtr chkptredo;
132  XLogRecPtr target_wal_endrec;
133  size_t size;
134  char *buffer;
135  bool no_ensure_shutdown = false;
136  bool rewind_needed;
137  bool writerecoveryconf = false;
138  filemap_t *filemap;
139 
140  pg_logging_init(argv[0]);
141  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
142  progname = get_progname(argv[0]);
143 
144  /* Process command-line arguments */
145  if (argc > 1)
146  {
147  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
148  {
149  usage(progname);
150  exit(0);
151  }
152  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
153  {
154  puts("pg_rewind (PostgreSQL) " PG_VERSION);
155  exit(0);
156  }
157  }
158 
159  while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
160  {
161  switch (c)
162  {
163  case '?':
164  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
165  exit(1);
166 
167  case 'c':
168  restore_wal = true;
169  break;
170 
171  case 'P':
172  showprogress = true;
173  break;
174 
175  case 'n':
176  dry_run = true;
177  break;
178 
179  case 'N':
180  do_sync = false;
181  break;
182 
183  case 'R':
184  writerecoveryconf = true;
185  break;
186 
187  case 3:
188  debug = true;
190  break;
191 
192  case 'D': /* -D or --target-pgdata */
194  break;
195 
196  case 1: /* --source-pgdata */
198  break;
199 
200  case 2: /* --source-server */
202  break;
203 
204  case 4:
205  no_ensure_shutdown = true;
206  break;
207  }
208  }
209 
210  if (datadir_source == NULL && connstr_source == NULL)
211  {
212  pg_log_error("no source specified (--source-pgdata or --source-server)");
213  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
214  exit(1);
215  }
216 
217  if (datadir_source != NULL && connstr_source != NULL)
218  {
219  pg_log_error("only one of --source-pgdata or --source-server can be specified");
220  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
221  exit(1);
222  }
223 
224  if (datadir_target == NULL)
225  {
226  pg_log_error("no target data directory specified (--target-pgdata)");
227  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
228  exit(1);
229  }
230 
231  if (writerecoveryconf && connstr_source == NULL)
232  {
233  pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
234  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
235  exit(1);
236  }
237 
238  if (optind < argc)
239  {
240  pg_log_error("too many command-line arguments (first is \"%s\")",
241  argv[optind]);
242  fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
243  exit(1);
244  }
245 
246  /*
247  * Don't allow pg_rewind to be run as root, to avoid overwriting the
248  * ownership of files in the data directory. We need only check for root
249  * -- any other user won't have sufficient permissions to modify files in
250  * the data directory.
251  */
252 #ifndef WIN32
253  if (geteuid() == 0)
254  {
255  pg_log_error("cannot be executed by \"root\"");
256  fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n"),
257  progname);
258  exit(1);
259  }
260 #endif
261 
263 
264  /* Set mask based on PGDATA permissions */
266  {
267  pg_log_error("could not read permissions of directory \"%s\": %m",
269  exit(1);
270  }
271 
272  umask(pg_mode_mask);
273 
274  getRestoreCommand(argv[0]);
275 
276  atexit(disconnect_atexit);
277 
278  /*
279  * Ok, we have all the options and we're ready to start. First, connect to
280  * remote server.
281  */
282  if (connstr_source)
283  {
284  conn = PQconnectdb(connstr_source);
285 
286  if (PQstatus(conn) == CONNECTION_BAD)
287  pg_fatal("%s", PQerrorMessage(conn));
288 
289  if (showprogress)
290  pg_log_info("connected to server");
291 
292  source = init_libpq_source(conn);
293  }
294  else
296 
297  /*
298  * Check the status of the target instance.
299  *
300  * If the target instance was not cleanly shut down, start and stop the
301  * target cluster once in single-user mode to enforce recovery to finish,
302  * ensuring that the cluster can be used by pg_rewind. Note that if
303  * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
304  * need to make sure by themselves that the target cluster is in a clean
305  * state.
306  */
307  buffer = slurpFile(datadir_target, "global/pg_control", &size);
308  digestControlFile(&ControlFile_target, buffer, size);
309  pg_free(buffer);
310 
311  if (!no_ensure_shutdown &&
312  ControlFile_target.state != DB_SHUTDOWNED &&
313  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
314  {
315  ensureCleanShutdown(argv[0]);
316 
317  buffer = slurpFile(datadir_target, "global/pg_control", &size);
318  digestControlFile(&ControlFile_target, buffer, size);
319  pg_free(buffer);
320  }
321 
322  buffer = source->fetch_file(source, "global/pg_control", &size);
323  digestControlFile(&ControlFile_source, buffer, size);
324  pg_free(buffer);
325 
326  sanityChecks();
327 
328  /*
329  * Find the common ancestor timeline between the clusters.
330  *
331  * If both clusters are already on the same timeline, there's nothing to
332  * do.
333  */
334  if (ControlFile_target.checkPointCopy.ThisTimeLineID ==
335  ControlFile_source.checkPointCopy.ThisTimeLineID)
336  {
337  pg_log_info("source and target cluster are on the same timeline");
338  rewind_needed = false;
339  target_wal_endrec = 0;
340  }
341  else
342  {
343  XLogRecPtr chkptendrec;
344 
345  findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
346  pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
347  (uint32) (divergerec >> 32), (uint32) divergerec,
348  targetHistory[lastcommontliIndex].tli);
349 
350  /*
351  * Determine the end-of-WAL on the target.
352  *
353  * The WAL ends at the last shutdown checkpoint, or at
354  * minRecoveryPoint if it was a standby. (If we supported rewinding a
355  * server that was not shut down cleanly, we would need to replay
356  * until we reach the first invalid record, like crash recovery does.)
357  */
358 
359  /* read the checkpoint record on the target to see where it ends. */
360  chkptendrec = readOneRecord(datadir_target,
361  ControlFile_target.checkPoint,
362  targetNentries - 1,
364 
365  if (ControlFile_target.minRecoveryPoint > chkptendrec)
366  {
367  target_wal_endrec = ControlFile_target.minRecoveryPoint;
368  }
369  else
370  {
371  target_wal_endrec = chkptendrec;
372  }
373 
374  /*
375  * Check for the possibility that the target is in fact a direct
376  * ancestor of the source. In that case, there is no divergent history
377  * in the target that needs rewinding.
378  */
379  if (target_wal_endrec > divergerec)
380  {
381  rewind_needed = true;
382  }
383  else
384  {
385  /* the last common checkpoint record must be part of target WAL */
386  Assert(target_wal_endrec == divergerec);
387 
388  rewind_needed = false;
389  }
390  }
391 
392  if (!rewind_needed)
393  {
394  pg_log_info("no rewind required");
395  if (writerecoveryconf && !dry_run)
397  GenerateRecoveryConfig(conn, NULL));
398  exit(0);
399  }
400 
401  findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
402  &chkptrec, &chkpttli, &chkptredo, restore_command);
403  pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
404  (uint32) (chkptrec >> 32), (uint32) chkptrec,
405  chkpttli);
406 
407  /* Initialize the hash table to track the status of each file */
408  filehash_init();
409 
410  /*
411  * Collect information about all files in the both data directories.
412  */
413  if (showprogress)
414  pg_log_info("reading source file list");
415  source->traverse_files(source, &process_source_file);
416 
417  if (showprogress)
418  pg_log_info("reading target file list");
420 
421  /*
422  * Read the target WAL from last checkpoint before the point of fork, to
423  * extract all the pages that were modified on the target cluster after
424  * the fork.
425  */
426  if (showprogress)
427  pg_log_info("reading WAL in target");
428  extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
429  target_wal_endrec, restore_command);
430 
431  /*
432  * We have collected all information we need from both systems. Decide
433  * what to do with each file.
434  */
435  filemap = decide_file_actions();
436  if (showprogress)
437  calculate_totals(filemap);
438 
439  /* this is too verbose even for verbose mode */
440  if (debug)
441  print_filemap(filemap);
442 
443  /*
444  * Ok, we're ready to start copying things over.
445  */
446  if (showprogress)
447  {
448  pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
449  (unsigned long) (filemap->fetch_size / (1024 * 1024)),
450  (unsigned long) (filemap->total_size / (1024 * 1024)));
451 
452  fetch_size = filemap->fetch_size;
453  fetch_done = 0;
454  }
455 
456  /*
457  * We have now collected all the information we need from both systems,
458  * and we are ready to start modifying the target directory.
459  *
460  * This is the point of no return. Once we start copying things, there is
461  * no turning back!
462  */
463  perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
464 
465  if (showprogress)
466  pg_log_info("syncing target data directory");
467  sync_target_dir();
468 
469  /* Also update the standby configuration, if requested. */
470  if (writerecoveryconf && !dry_run)
472  GenerateRecoveryConfig(conn, NULL));
473 
474  /* don't need the source connection anymore */
475  source->destroy(source);
476  if (conn)
477  {
478  PQfinish(conn);
479  conn = NULL;
480  }
481 
482  pg_log_info("Done!");
483 
484  return 0;
485 }
486 
487 /*
488  * Perform the rewind.
489  *
490  * We have already collected all the information we need from the
491  * target and the source.
492  */
493 static void
495  XLogRecPtr chkptrec,
496  TimeLineID chkpttli,
497  XLogRecPtr chkptredo)
498 {
499  XLogRecPtr endrec;
500  TimeLineID endtli;
501  ControlFileData ControlFile_new;
502  size_t size;
503  char *buffer;
504 
505  /*
506  * Execute the actions in the file map, fetching data from the source
507  * system as needed.
508  */
509  for (int i = 0; i < filemap->nentries; i++)
510  {
511  file_entry_t *entry = filemap->entries[i];
512 
513  /*
514  * If this is a relation file, copy the modified blocks.
515  *
516  * This is in addition to any other changes.
517  */
518  if (entry->target_pages_to_overwrite.bitmapsize > 0)
519  {
521  BlockNumber blkno;
522  off_t offset;
523 
525  while (datapagemap_next(iter, &blkno))
526  {
527  offset = blkno * BLCKSZ;
528  source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
529  }
530  pg_free(iter);
531  }
532 
533  switch (entry->action)
534  {
535  case FILE_ACTION_NONE:
536  /* nothing else to do */
537  break;
538 
539  case FILE_ACTION_COPY:
540  /* Truncate the old file out of the way, if any */
541  open_target_file(entry->path, true);
542  source->queue_fetch_range(source, entry->path,
543  0, entry->source_size);
544  break;
545 
547  truncate_target_file(entry->path, entry->source_size);
548  break;
549 
551  source->queue_fetch_range(source, entry->path,
552  entry->target_size,
553  entry->source_size - entry->target_size);
554  break;
555 
556  case FILE_ACTION_REMOVE:
557  remove_target(entry);
558  break;
559 
560  case FILE_ACTION_CREATE:
561  create_target(entry);
562  break;
563 
565  pg_fatal("no action decided for \"%s\"", entry->path);
566  break;
567  }
568  }
569 
570  /* Complete any remaining range-fetches that we queued up above. */
571  source->finish_fetch(source);
572 
574 
575  progress_report(true);
576 
577  /*
578  * Fetch the control file from the source last. This ensures that the
579  * minRecoveryPoint is up-to-date.
580  */
581  buffer = source->fetch_file(source, "global/pg_control", &size);
582  digestControlFile(&ControlFile_source_after, buffer, size);
583  pg_free(buffer);
584 
585  /*
586  * Sanity check: If the source is a local system, the control file should
587  * not have changed since we started.
588  *
589  * XXX: We assume it hasn't been modified, but actually, what could go
590  * wrong? The logic handles a libpq source that's modified concurrently,
591  * why not a local datadir?
592  */
593  if (datadir_source &&
594  memcmp(&ControlFile_source, &ControlFile_source_after,
595  sizeof(ControlFileData)) != 0)
596  {
597  pg_fatal("source system was modified while pg_rewind was running");
598  }
599 
600  if (showprogress)
601  pg_log_info("creating backup label and updating control file");
602 
603  /*
604  * Create a backup label file, to tell the target where to begin the WAL
605  * replay. Normally, from the last common checkpoint between the source
606  * and the target. But if the source is a standby server, it's possible
607  * that the last common checkpoint is *after* the standby's restartpoint.
608  * That implies that the source server has applied the checkpoint record,
609  * but hasn't perfomed a corresponding restartpoint yet. Make sure we
610  * start at the restartpoint's redo point in that case.
611  *
612  * Use the old version of the source's control file for this. The server
613  * might have finished the restartpoint after we started copying files,
614  * but we must begin from the redo point at the time that started copying.
615  */
616  if (ControlFile_source.checkPointCopy.redo < chkptredo)
617  {
618  chkptredo = ControlFile_source.checkPointCopy.redo;
619  chkpttli = ControlFile_source.checkPointCopy.ThisTimeLineID;
620  chkptrec = ControlFile_source.checkPoint;
621  }
622  createBackupLabel(chkptredo, chkpttli, chkptrec);
623 
624  /*
625  * Update control file of target, to tell the target how far it must
626  * replay the WAL (minRecoveryPoint).
627  */
628  if (connstr_source)
629  {
630  /*
631  * The source is a live server. Like in an online backup, it's
632  * important that we recover all the WAL that was generated while we
633  * were copying files.
634  */
635  if (ControlFile_source_after.state == DB_IN_ARCHIVE_RECOVERY)
636  {
637  /*
638  * Source is a standby server. We must replay to its
639  * minRecoveryPoint.
640  */
641  endrec = ControlFile_source_after.minRecoveryPoint;
642  endtli = ControlFile_source_after.minRecoveryPointTLI;
643  }
644  else
645  {
646  /*
647  * Source is a production, non-standby, server. We must replay to
648  * the last WAL insert location.
649  */
650  if (ControlFile_source_after.state != DB_IN_PRODUCTION)
651  pg_fatal("source system was in unexpected state at end of rewind");
652 
653  endrec = source->get_current_wal_insert_lsn(source);
654  endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
655  }
656  }
657  else
658  {
659  /*
660  * Source is a local data directory. It should've shut down cleanly,
661  * and we must replay to the latest shutdown checkpoint.
662  */
663  endrec = ControlFile_source_after.checkPoint;
664  endtli = ControlFile_source_after.checkPointCopy.ThisTimeLineID;
665  }
666 
667  memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
668  ControlFile_new.minRecoveryPoint = endrec;
669  ControlFile_new.minRecoveryPointTLI = endtli;
670  ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
671  if (!dry_run)
672  update_controlfile(datadir_target, &ControlFile_new, do_sync);
673 }
674 
675 static void
677 {
678  /* TODO Check that there's no backup_label in either cluster */
679 
680  /* Check system_identifier match */
681  if (ControlFile_target.system_identifier != ControlFile_source.system_identifier)
682  pg_fatal("source and target clusters are from different systems");
683 
684  /* check version */
685  if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION ||
686  ControlFile_source.pg_control_version != PG_CONTROL_VERSION ||
687  ControlFile_target.catalog_version_no != CATALOG_VERSION_NO ||
688  ControlFile_source.catalog_version_no != CATALOG_VERSION_NO)
689  {
690  pg_fatal("clusters are not compatible with this version of pg_rewind");
691  }
692 
693  /*
694  * Target cluster need to use checksums or hint bit wal-logging, this to
695  * prevent from data corruption that could occur because of hint bits.
696  */
697  if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION &&
698  !ControlFile_target.wal_log_hints)
699  {
700  pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
701  }
702 
703  /*
704  * Target cluster better not be running. This doesn't guard against
705  * someone starting the cluster concurrently. Also, this is probably more
706  * strict than necessary; it's OK if the target node was not shut down
707  * cleanly, as long as it isn't running at the moment.
708  */
709  if (ControlFile_target.state != DB_SHUTDOWNED &&
710  ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
711  pg_fatal("target server must be shut down cleanly");
712 
713  /*
714  * When the source is a data directory, also require that the source
715  * server is shut down. There isn't any very strong reason for this
716  * limitation, but better safe than sorry.
717  */
718  if (datadir_source &&
719  ControlFile_source.state != DB_SHUTDOWNED &&
720  ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
721  pg_fatal("source data directory must be shut down cleanly");
722 }
723 
724 /*
725  * Print a progress report based on the fetch_size and fetch_done variables.
726  *
727  * Progress report is written at maximum once per second, except that the
728  * last progress report is always printed.
729  *
730  * If finished is set to true, this is the last progress report. The cursor
731  * is moved to the next line.
732  */
733 void
734 progress_report(bool finished)
735 {
736  static pg_time_t last_progress_report = 0;
737  int percent;
738  char fetch_done_str[32];
739  char fetch_size_str[32];
740  pg_time_t now;
741 
742  if (!showprogress)
743  return;
744 
745  now = time(NULL);
746  if (now == last_progress_report && !finished)
747  return; /* Max once per second */
748 
749  last_progress_report = now;
750  percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
751 
752  /*
753  * Avoid overflowing past 100% or the full size. This may make the total
754  * size number change as we approach the end of the backup (the estimate
755  * will always be wrong if WAL is included), but that's better than having
756  * the done column be bigger than the total.
757  */
758  if (percent > 100)
759  percent = 100;
760  if (fetch_done > fetch_size)
762 
763  /*
764  * Separate step to keep platform-dependent format code out of
765  * translatable strings. And we only test for INT64_FORMAT availability
766  * in snprintf, not fprintf.
767  */
768  snprintf(fetch_done_str, sizeof(fetch_done_str), INT64_FORMAT,
769  fetch_done / 1024);
770  snprintf(fetch_size_str, sizeof(fetch_size_str), INT64_FORMAT,
771  fetch_size / 1024);
772 
773  fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
774  (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
775  percent);
776 
777  /*
778  * Stay on the same line if reporting to a terminal and we're not done
779  * yet.
780  */
781  fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
782 }
783 
784 /*
785  * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
786  * infinity as src/include/access/timeline.h states. This routine should
787  * be used only when comparing WAL locations related to history files.
788  */
789 static XLogRecPtr
791 {
792  if (XLogRecPtrIsInvalid(a))
793  return b;
794  else if (XLogRecPtrIsInvalid(b))
795  return a;
796  else
797  return Min(a, b);
798 }
799 
800 /*
801  * Retrieve timeline history for given control file which should behold
802  * either source or target.
803  */
804 static TimeLineHistoryEntry *
805 getTimelineHistory(ControlFileData *controlFile, int *nentries)
806 {
807  TimeLineHistoryEntry *history;
808  TimeLineID tli;
809 
810  tli = controlFile->checkPointCopy.ThisTimeLineID;
811 
812  /*
813  * Timeline 1 does not have a history file, so there is no need to check
814  * and fake an entry with infinite start and end positions.
815  */
816  if (tli == 1)
817  {
818  history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
819  history->tli = tli;
820  history->begin = history->end = InvalidXLogRecPtr;
821  *nentries = 1;
822  }
823  else
824  {
825  char path[MAXPGPATH];
826  char *histfile;
827 
828  TLHistoryFilePath(path, tli);
829 
830  /* Get history file from appropriate source */
831  if (controlFile == &ControlFile_source)
832  histfile = source->fetch_file(source, path, NULL);
833  else if (controlFile == &ControlFile_target)
834  histfile = slurpFile(datadir_target, path, NULL);
835  else
836  pg_fatal("invalid control file");
837 
838  history = rewind_parseTimeLineHistory(histfile, tli, nentries);
839  pg_free(histfile);
840  }
841 
842  if (debug)
843  {
844  int i;
845 
846  if (controlFile == &ControlFile_source)
847  pg_log_debug("Source timeline history:");
848  else if (controlFile == &ControlFile_target)
849  pg_log_debug("Target timeline history:");
850  else
851  Assert(false);
852 
853  /*
854  * Print the target timeline history.
855  */
856  for (i = 0; i < targetNentries; i++)
857  {
858  TimeLineHistoryEntry *entry;
859 
860  entry = &history[i];
861  pg_log_debug("%d: %X/%X - %X/%X", entry->tli,
862  (uint32) (entry->begin >> 32), (uint32) (entry->begin),
863  (uint32) (entry->end >> 32), (uint32) (entry->end));
864  }
865  }
866 
867  return history;
868 }
869 
870 /*
871  * Determine the TLI of the last common timeline in the timeline history of the
872  * two clusters. targetHistory is filled with target timeline history and
873  * targetNentries is number of items in targetHistory. *tliIndex is set to the
874  * index of last common timeline in targetHistory array, and *recptr is set to
875  * the position where the timeline history diverged (ie. the first WAL record
876  * that's not the same in both clusters).
877  *
878  * Control files of both clusters must be read into ControlFile_target/source
879  * before calling this routine.
880  */
881 static void
882 findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
883 {
884  TimeLineHistoryEntry *sourceHistory;
885  int sourceNentries;
886  int i,
887  n;
888 
889  /* Retrieve timelines for both source and target */
890  sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
891  targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
892 
893  /*
894  * Trace the history forward, until we hit the timeline diverge. It may
895  * still be possible that the source and target nodes used the same
896  * timeline number in their history but with different start position
897  * depending on the history files that each node has fetched in previous
898  * recovery processes. Hence check the start position of the new timeline
899  * as well and move down by one extra timeline entry if they do not match.
900  */
901  n = Min(sourceNentries, targetNentries);
902  for (i = 0; i < n; i++)
903  {
904  if (sourceHistory[i].tli != targetHistory[i].tli ||
905  sourceHistory[i].begin != targetHistory[i].begin)
906  break;
907  }
908 
909  if (i > 0)
910  {
911  i--;
912  *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
913  *tliIndex = i;
914 
915  pg_free(sourceHistory);
916  return;
917  }
918  else
919  {
920  pg_fatal("could not find common ancestor of the source and target cluster's timelines");
921  }
922 }
923 
924 
925 /*
926  * Create a backup_label file that forces recovery to begin at the last common
927  * checkpoint.
928  */
929 static void
930 createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
931 {
932  XLogSegNo startsegno;
933  time_t stamp_time;
934  char strfbuf[128];
935  char xlogfilename[MAXFNAMELEN];
936  struct tm *tmp;
937  char buf[1000];
938  int len;
939 
940  XLByteToSeg(startpoint, startsegno, WalSegSz);
941  XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
942 
943  /*
944  * Construct backup label file
945  */
946  stamp_time = time(NULL);
947  tmp = localtime(&stamp_time);
948  strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
949 
950  len = snprintf(buf, sizeof(buf),
951  "START WAL LOCATION: %X/%X (file %s)\n"
952  "CHECKPOINT LOCATION: %X/%X\n"
953  "BACKUP METHOD: pg_rewind\n"
954  "BACKUP FROM: standby\n"
955  "START TIME: %s\n",
956  /* omit LABEL: line */
957  (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename,
958  (uint32) (checkpointloc >> 32), (uint32) checkpointloc,
959  strfbuf);
960  if (len >= sizeof(buf))
961  pg_fatal("backup label buffer too small"); /* shouldn't happen */
962 
963  /* TODO: move old file out of the way, if any. */
964  open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
965  write_target_range(buf, 0, len);
967 }
968 
969 /*
970  * Check CRC of control file
971  */
972 static void
974 {
975  pg_crc32c crc;
976 
977  /* Calculate CRC */
978  INIT_CRC32C(crc);
979  COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
980  FIN_CRC32C(crc);
981 
982  /* And simply compare it */
983  if (!EQ_CRC32C(crc, ControlFile->crc))
984  pg_fatal("unexpected control file CRC");
985 }
986 
987 /*
988  * Verify control file contents in the buffer 'content', and copy it to
989  * *ControlFile.
990  */
991 static void
993  size_t size)
994 {
995  if (size != PG_CONTROL_FILE_SIZE)
996  pg_fatal("unexpected control file size %d, expected %d",
997  (int) size, PG_CONTROL_FILE_SIZE);
998 
999  memcpy(ControlFile, content, sizeof(ControlFileData));
1000 
1001  /* set and validate WalSegSz */
1002  WalSegSz = ControlFile->xlog_seg_size;
1003 
1005  pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte",
1006  "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes",
1007  WalSegSz),
1008  WalSegSz);
1009 
1010  /* Additional checks on control file */
1011  checkControlFile(ControlFile);
1012 }
1013 
1014 /*
1015  * Get value of GUC parameter restore_command from the target cluster.
1016  *
1017  * This uses a logic based on "postgres -C" to get the value from the
1018  * cluster.
1019  */
1020 static void
1022 {
1023  int rc;
1024  char postgres_exec_path[MAXPGPATH],
1025  postgres_cmd[MAXPGPATH],
1026  cmd_output[MAXPGPATH];
1027 
1028  if (!restore_wal)
1029  return;
1030 
1031  /* find postgres executable */
1032  rc = find_other_exec(argv0, "postgres",
1034  postgres_exec_path);
1035 
1036  if (rc < 0)
1037  {
1038  char full_path[MAXPGPATH];
1039 
1040  if (find_my_exec(argv0, full_path) < 0)
1041  strlcpy(full_path, progname, sizeof(full_path));
1042 
1043  if (rc == -1)
1044  pg_log_error("The program \"%s\" is needed by %s but was not found in the\n"
1045  "same directory as \"%s\".\n"
1046  "Check your installation.",
1047  "postgres", progname, full_path);
1048  else
1049  pg_log_error("The program \"%s\" was found by \"%s\"\n"
1050  "but was not the same version as %s.\n"
1051  "Check your installation.",
1052  "postgres", full_path, progname);
1053  exit(1);
1054  }
1055 
1056  /*
1057  * Build a command able to retrieve the value of GUC parameter
1058  * restore_command, if set.
1059  */
1060  snprintf(postgres_cmd, sizeof(postgres_cmd),
1061  "\"%s\" -D \"%s\" -C restore_command",
1062  postgres_exec_path, datadir_target);
1063 
1064  if (!pipe_read_line(postgres_cmd, cmd_output, sizeof(cmd_output)))
1065  exit(1);
1066 
1067  (void) pg_strip_crlf(cmd_output);
1068 
1069  if (strcmp(cmd_output, "") == 0)
1070  pg_fatal("restore_command is not set in the target cluster");
1071 
1072  restore_command = pg_strdup(cmd_output);
1073 
1074  pg_log_debug("using for rewind restore_command = \'%s\'",
1075  restore_command);
1076 }
1077 
1078 
1079 /*
1080  * Ensure clean shutdown of target instance by launching single-user mode
1081  * postgres to do crash recovery.
1082  */
1083 static void
1085 {
1086  int ret;
1087 #define MAXCMDLEN (2 * MAXPGPATH)
1088  char exec_path[MAXPGPATH];
1089  char cmd[MAXCMDLEN];
1090 
1091  /* locate postgres binary */
1092  if ((ret = find_other_exec(argv0, "postgres",
1094  exec_path)) < 0)
1095  {
1096  char full_path[MAXPGPATH];
1097 
1098  if (find_my_exec(argv0, full_path) < 0)
1099  strlcpy(full_path, progname, sizeof(full_path));
1100 
1101  if (ret == -1)
1102  pg_fatal("The program \"%s\" is needed by %s but was not found in the\n"
1103  "same directory as \"%s\".\n"
1104  "Check your installation.",
1105  "postgres", progname, full_path);
1106  else
1107  pg_fatal("The program \"%s\" was found by \"%s\"\n"
1108  "but was not the same version as %s.\n"
1109  "Check your installation.",
1110  "postgres", full_path, progname);
1111  }
1112 
1113  pg_log_info("executing \"%s\" for target server to complete crash recovery",
1114  exec_path);
1115 
1116  /*
1117  * Skip processing if requested, but only after ensuring presence of
1118  * postgres.
1119  */
1120  if (dry_run)
1121  return;
1122 
1123  /*
1124  * Finally run postgres in single-user mode. There is no need to use
1125  * fsync here. This makes the recovery faster, and the target data folder
1126  * is synced at the end anyway.
1127  */
1128  snprintf(cmd, MAXCMDLEN, "\"%s\" --single -F -D \"%s\" template1 < \"%s\"",
1129  exec_path, datadir_target, DEVNULL);
1130 
1131  if (system(cmd) != 0)
1132  {
1133  pg_log_error("postgres single-user mode in target cluster failed");
1134  pg_fatal("Command was: %s", cmd);
1135  }
1136 }
1137 
1138 static void
1140 {
1141  if (conn != NULL)
1142  PQfinish(conn);
1143 }
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand)
Definition: parsexlog.c:63
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:94
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:323
char * datadir_target
Definition: pg_rewind.c:59
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:6669
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:47
static char * argv0
Definition: pg_ctl.c:97
void(* finish_fetch)(struct rewind_source *)
Definition: rewind_source.h:53
static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo)
Definition: pg_rewind.c:494
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:88
uint32 TimeLineID
Definition: xlogdefs.h:52
int64 pg_time_t
Definition: pgtime.h:23
static bool debug
Definition: pg_rewind.c:64
int pg_strip_crlf(char *str)
Definition: string.c:105
#define MAXCMDLEN
int main(int argc, char **argv)
Definition: pg_rewind.c:108
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:167
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:71
bool dry_run
Definition: pg_rewind.c:66
const char * get_progname(const char *argv0)
Definition: path.c:453
#define pg_log_error(...)
Definition: logging.h:80
rewind_source * init_local_source(const char *datadir)
Definition: local_source.c:38
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:57
void get_restricted_token(void)
uint32 pg_crc32c
Definition: pg_crc32c.h:38
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: file_ops.c:362
void pg_logging_init(const char *argv0)
Definition: logging.c:81
void WriteRecoveryConfig(PGconn *pgconn, char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:117
static pg_time_t last_progress_report
#define Min(x, y)
Definition: c.h:982
void(* traverse_files)(struct rewind_source *, process_file_callback_t callback)
Definition: rewind_source.h:29
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:1084
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:790
char *(* fetch_file)(struct rewind_source *, const char *path, size_t *filesize)
Definition: rewind_source.h:37
static bool writerecoveryconf
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand)
Definition: parsexlog.c:164
#define pg_fatal(...)
Definition: pg_rewind.h:37
void PQfinish(PGconn *conn)
Definition: fe-connect.c:4174
#define CATALOG_VERSION_NO
Definition: catversion.h:56
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
uint32 pg_control_version
Definition: pg_control.h:121
#define printf(...)
Definition: port.h:221
uint32 BlockNumber
Definition: block.h:31
TimeLineID tli
Definition: timeline.h:27
CheckPoint checkPointCopy
Definition: pg_control.h:131
#define fprintf
Definition: port.h:219
void filehash_init(void)
Definition: filemap.c:169
void process_target_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:255
static ControlFileData ControlFile_source_after
Definition: pg_rewind.c:53
uint64 fetch_size
Definition: filemap.h:93
void truncate_target_file(const char *path, off_t newsize)
Definition: file_ops.c:206
static struct pg_tm tm
Definition: localtime.c:102
size_t target_size
Definition: filemap.h:61
bool showprogress
Definition: pg_rewind.c:65
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define required_argument
Definition: getopt_long.h:25
uint32 xlog_seg_size
Definition: pg_control.h:209
int optind
Definition: getopt.c:50
rewind_source * init_libpq_source(PGconn *conn)
Definition: libpq_source.c:81
bool restore_wal
Definition: pg_rewind.c:68
uint64 system_identifier
Definition: pg_control.h:106
uint64 fetch_size
Definition: pg_rewind.c:75
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:973
#define pg_log_debug(...)
Definition: logging.h:92
XLogRecPtr(* get_current_wal_insert_lsn)(struct rewind_source *)
Definition: rewind_source.h:58
#define MAXPGPATH
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:128
file_action_t action
Definition: filemap.h:81
uint32 data_checksum_version
Definition: pg_control.h:220
char * c
static void usage(const char *progname)
Definition: pg_rewind.c:82
static char * buf
Definition: pg_test_fsync.c:68
uint64 XLogSegNo
Definition: xlogdefs.h:41
size_t source_size
Definition: filemap.h:75
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
uint64 total_size
Definition: filemap.h:92
unsigned int uint32
Definition: c.h:429
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define DEVNULL
Definition: port.h:146
const char * path
Definition: filemap.h:53
void(* queue_fetch_range)(struct rewind_source *, const char *path, off_t offset, size_t len)
Definition: rewind_source.h:47
filemap_t * decide_file_actions(void)
Definition: filemap.c:789
static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:882
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, char *replication_slot)
Definition: recovery_gen.c:23
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
char * connstr_source
Definition: pg_rewind.c:61
#define MAXFNAMELEN
int WalSegSz
Definition: pg_rewind.c:56
#define no_argument
Definition: getopt_long.h:24
#define ngettext(s, p, n)
Definition: c.h:1178
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1211
void print_filemap(filemap_t *filemap)
Definition: filemap.c:480
static void disconnect_atexit(void)
Definition: pg_rewind.c:1139
int targetNentries
Definition: pg_rewind.c:72
datapagemap_t target_pages_to_overwrite
Definition: filemap.h:68
void sync_target_dir(void)
Definition: file_ops.c:294
void(* destroy)(struct rewind_source *)
Definition: rewind_source.h:63
char * datadir_source
Definition: pg_rewind.c:60
static ControlFileData * ControlFile
Definition: xlog.c:737
int bitmapsize
Definition: datapagemap.h:18
#define PG_BACKEND_VERSIONSTR
Definition: port.h:134
void calculate_totals(filemap_t *filemap)
Definition: filemap.c:439
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
bool do_sync
Definition: pg_rewind.c:67
XLogRecPtr end
Definition: timeline.h:29
char * restore_command
Definition: pg_rewind.c:62
void remove_target(file_entry_t *entry)
Definition: file_ops.c:130
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define Assert(condition)
Definition: c.h:800
static rewind_source * source
Definition: pg_rewind.c:79
#define XLogFileName(fname, tli, logSegNo, wal_segsz_bytes)
static char * exec_path
Definition: pg_ctl.c:92
void pg_logging_increase_verbosity(void)
Definition: logging.c:174
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand)
Definition: parsexlog.c:120
void progress_report(bool finished)
Definition: pg_rewind.c:734
uint32 catalog_version_no
Definition: pg_control.h:122
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
void pg_free(void *ptr)
Definition: fe_memutils.c:105
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:248
static TimeLineHistoryEntry * getTimelineHistory(ControlFileData *controlFile, int *nentries)
Definition: pg_rewind.c:805
#define INT64_FORMAT
Definition: c.h:471
void process_source_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:219
static void getRestoreCommand(const char *argv0)
Definition: pg_rewind.c:1021
static void sanityChecks(void)
Definition: pg_rewind.c:676
static PGconn * conn
Definition: pg_rewind.c:78
void close_target_file(void)
Definition: file_ops.c:75
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:200
bool GetDataDirectoryCreatePerm(const char *dataDir)
int nentries
Definition: filemap.h:95
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:434
XLogRecPtr begin
Definition: timeline.h:28
char * optarg
Definition: getopt.c:52
int i
Definition: filemap.h:49
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:29
static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size)
Definition: pg_rewind.c:992
file_entry_t * entries[FLEXIBLE_ARRAY_MEMBER]
Definition: filemap.h:96
static ControlFileData ControlFile_target
Definition: pg_rewind.c:51
char * pipe_read_line(char *cmd, char *line, int maxsize)
Definition: exec.c:359
void create_target(file_entry_t *entry)
Definition: file_ops.c:156
uint64 fetch_done
Definition: pg_rewind.c:76
static ControlFileData ControlFile_source
Definition: pg_rewind.c:52
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:930
ConnStatusType PQstatus(const PGconn *conn)
Definition: fe-connect.c:6616
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:89
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:94
#define snprintf
Definition: port.h:215
#define _(x)
Definition: elog.c:88
int pg_mode_mask
Definition: file_perm.c:25
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1542
XLogRecPtr checkPoint
Definition: pg_control.h:129
XLogRecPtr redo
Definition: pg_control.h:37
#define offsetof(type, field)
Definition: c.h:723
#define pg_log_info(...)
Definition: logging.h:88
PGconn * PQconnectdb(const char *conninfo)
Definition: fe-connect.c:703
const char * progname
Definition: pg_rewind.c:55
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:166
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:314
#define TLHistoryFilePath(path, tli)
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)