PostgreSQL Source Code git master
pg_rewind.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_rewind.c
4 * Synchronizes a PostgreSQL data directory to a new timeline
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *-------------------------------------------------------------------------
9 */
10#include "postgres_fe.h"
11
12#include <sys/stat.h>
13#include <fcntl.h>
14#include <time.h>
15#include <unistd.h>
16
17#include "access/timeline.h"
19#include "catalog/catversion.h"
20#include "catalog/pg_control.h"
22#include "common/file_perm.h"
24#include "common/string.h"
28#include "file_ops.h"
29#include "filemap.h"
30#include "getopt_long.h"
31#include "pg_rewind.h"
32#include "rewind_source.h"
33#include "storage/bufpage.h"
34
35static void usage(const char *progname);
36
37static void perform_rewind(filemap_t *filemap, rewind_source *source,
38 XLogRecPtr chkptrec,
39 TimeLineID chkpttli,
40 XLogRecPtr chkptredo);
41
42static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
43 XLogRecPtr checkpointloc);
44
46 const char *content, size_t size);
47static void getRestoreCommand(const char *argv0);
48static void sanityChecks(void);
49static TimeLineHistoryEntry *getTimelineHistory(TimeLineID tli, bool is_source,
50 int *nentries);
52 int a_nentries,
53 TimeLineHistoryEntry *b_history,
54 int b_nentries,
55 XLogRecPtr *recptr, int *tliIndex);
56static void ensureCleanShutdown(const char *argv0);
57static void disconnect_atexit(void);
58
62
63static const char *progname;
65
66/* Configuration options */
67char *datadir_target = NULL;
68static char *datadir_source = NULL;
69static char *connstr_source = NULL;
70static char *restore_command = NULL;
71static char *config_file = NULL;
72
73static bool debug = false;
74bool showprogress = false;
75bool dry_run = false;
76bool do_sync = true;
77static bool restore_wal = false;
79
80/* Target history */
83
84/* Progress counters */
87
88static PGconn *conn;
90
91static void
92usage(const char *progname)
93{
94 printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
95 printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
96 printf(_("Options:\n"));
97 printf(_(" -c, --restore-target-wal use \"restore_command\" in target configuration to\n"
98 " retrieve WAL files from archives\n"));
99 printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
100 printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
101 printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
102 printf(_(" -n, --dry-run stop before modifying anything\n"));
103 printf(_(" -N, --no-sync do not wait for changes to be written\n"
104 " safely to disk\n"));
105 printf(_(" -P, --progress write progress messages\n"));
106 printf(_(" -R, --write-recovery-conf write configuration for replication\n"
107 " (requires --source-server)\n"));
108 printf(_(" --config-file=FILENAME use specified main server configuration\n"
109 " file when running target cluster\n"));
110 printf(_(" --debug write a lot of debug messages\n"));
111 printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
112 printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
113 printf(_(" -V, --version output version information, then exit\n"));
114 printf(_(" -?, --help show this help, then exit\n"));
115 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
116 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
117}
118
119
120int
121main(int argc, char **argv)
122{
123 static struct option long_options[] = {
124 {"help", no_argument, NULL, '?'},
125 {"target-pgdata", required_argument, NULL, 'D'},
126 {"write-recovery-conf", no_argument, NULL, 'R'},
127 {"source-pgdata", required_argument, NULL, 1},
128 {"source-server", required_argument, NULL, 2},
129 {"no-ensure-shutdown", no_argument, NULL, 4},
130 {"config-file", required_argument, NULL, 5},
131 {"version", no_argument, NULL, 'V'},
132 {"restore-target-wal", no_argument, NULL, 'c'},
133 {"dry-run", no_argument, NULL, 'n'},
134 {"no-sync", no_argument, NULL, 'N'},
135 {"progress", no_argument, NULL, 'P'},
136 {"debug", no_argument, NULL, 3},
137 {"sync-method", required_argument, NULL, 6},
138 {NULL, 0, NULL, 0}
139 };
140 int option_index;
141 int c;
142 XLogRecPtr divergerec;
143 int lastcommontliIndex;
144 XLogRecPtr chkptrec;
145 TimeLineID chkpttli;
146 XLogRecPtr chkptredo;
147 TimeLineID source_tli;
148 TimeLineID target_tli;
149 XLogRecPtr target_wal_endrec;
150 size_t size;
151 char *buffer;
152 bool no_ensure_shutdown = false;
153 bool rewind_needed;
154 bool writerecoveryconf = false;
155 filemap_t *filemap;
156
157 pg_logging_init(argv[0]);
158 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
159 progname = get_progname(argv[0]);
160
161 /* Process command-line arguments */
162 if (argc > 1)
163 {
164 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
165 {
167 exit(0);
168 }
169 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
170 {
171 puts("pg_rewind (PostgreSQL) " PG_VERSION);
172 exit(0);
173 }
174 }
175
176 while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
177 {
178 switch (c)
179 {
180 case 'c':
181 restore_wal = true;
182 break;
183
184 case 'P':
185 showprogress = true;
186 break;
187
188 case 'n':
189 dry_run = true;
190 break;
191
192 case 'N':
193 do_sync = false;
194 break;
195
196 case 'R':
197 writerecoveryconf = true;
198 break;
199
200 case 3:
201 debug = true;
203 break;
204
205 case 'D': /* -D or --target-pgdata */
207 break;
208
209 case 1: /* --source-pgdata */
211 break;
212
213 case 2: /* --source-server */
215 break;
216
217 case 4:
218 no_ensure_shutdown = true;
219 break;
220
221 case 5:
223 break;
224
225 case 6:
227 exit(1);
228 break;
229
230 default:
231 /* getopt_long already emitted a complaint */
232 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
233 exit(1);
234 }
235 }
236
237 if (datadir_source == NULL && connstr_source == NULL)
238 {
239 pg_log_error("no source specified (--source-pgdata or --source-server)");
240 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
241 exit(1);
242 }
243
244 if (datadir_source != NULL && connstr_source != NULL)
245 {
246 pg_log_error("only one of --source-pgdata or --source-server can be specified");
247 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
248 exit(1);
249 }
250
251 if (datadir_target == NULL)
252 {
253 pg_log_error("no target data directory specified (--target-pgdata)");
254 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
255 exit(1);
256 }
257
258 if (writerecoveryconf && connstr_source == NULL)
259 {
260 pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
261 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
262 exit(1);
263 }
264
265 if (optind < argc)
266 {
267 pg_log_error("too many command-line arguments (first is \"%s\")",
268 argv[optind]);
269 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
270 exit(1);
271 }
272
273 /*
274 * Don't allow pg_rewind to be run as root, to avoid overwriting the
275 * ownership of files in the data directory. We need only check for root
276 * -- any other user won't have sufficient permissions to modify files in
277 * the data directory.
278 */
279#ifndef WIN32
280 if (geteuid() == 0)
281 {
282 pg_log_error("cannot be executed by \"root\"");
283 pg_log_error_hint("You must run %s as the PostgreSQL superuser.",
284 progname);
285 exit(1);
286 }
287#endif
288
290
291 /* Set mask based on PGDATA permissions */
293 pg_fatal("could not read permissions of directory \"%s\": %m",
295
296 umask(pg_mode_mask);
297
298 getRestoreCommand(argv[0]);
299
300 atexit(disconnect_atexit);
301
302 /*
303 * Ok, we have all the options and we're ready to start. First, connect to
304 * remote server.
305 */
306 if (connstr_source)
307 {
309
312
313 if (showprogress)
314 pg_log_info("connected to server");
315
317 }
318 else
320
321 /*
322 * Check the status of the target instance.
323 *
324 * If the target instance was not cleanly shut down, start and stop the
325 * target cluster once in single-user mode to enforce recovery to finish,
326 * ensuring that the cluster can be used by pg_rewind. Note that if
327 * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
328 * need to make sure by themselves that the target cluster is in a clean
329 * state.
330 */
331 buffer = slurpFile(datadir_target, "global/pg_control", &size);
333 pg_free(buffer);
334
335 if (!no_ensure_shutdown &&
338 {
339 ensureCleanShutdown(argv[0]);
340
341 buffer = slurpFile(datadir_target, "global/pg_control", &size);
343 pg_free(buffer);
344 }
345
346 buffer = source->fetch_file(source, "global/pg_control", &size);
348 pg_free(buffer);
349
350 sanityChecks();
351
352 /*
353 * Usually, the TLI can be found in the latest checkpoint record. But if
354 * the source server is just being promoted (or it's a standby that's
355 * following a primary that's just being promoted), and the checkpoint
356 * requested by the promotion hasn't completed yet, the latest timeline is
357 * in minRecoveryPoint. So we check which is later, the TLI of the
358 * minRecoveryPoint or the latest checkpoint.
359 */
362
363 /* Similarly for the target. */
366
367 /*
368 * Find the common ancestor timeline between the clusters.
369 *
370 * If both clusters are already on the same timeline, there's nothing to
371 * do.
372 */
373 if (target_tli == source_tli)
374 {
375 pg_log_info("source and target cluster are on the same timeline");
376 rewind_needed = false;
377 target_wal_endrec = 0;
378 }
379 else
380 {
381 XLogRecPtr chkptendrec;
382 TimeLineHistoryEntry *sourceHistory;
383 int sourceNentries;
384
385 /*
386 * Retrieve timelines for both source and target, and find the point
387 * where they diverged.
388 */
389 sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries);
390 targetHistory = getTimelineHistory(target_tli, false, &targetNentries);
391
392 findCommonAncestorTimeline(sourceHistory, sourceNentries,
394 &divergerec, &lastcommontliIndex);
395
396 pg_log_info("servers diverged at WAL location %X/%X on timeline %u",
397 LSN_FORMAT_ARGS(divergerec),
398 targetHistory[lastcommontliIndex].tli);
399
400 /*
401 * Don't need the source history anymore. The target history is still
402 * needed by the routines in parsexlog.c, when we read the target WAL.
403 */
404 pfree(sourceHistory);
405
406
407 /*
408 * Determine the end-of-WAL on the target.
409 *
410 * The WAL ends at the last shutdown checkpoint, or at
411 * minRecoveryPoint if it was a standby. (If we supported rewinding a
412 * server that was not shut down cleanly, we would need to replay
413 * until we reach the first invalid record, like crash recovery does.)
414 */
415
416 /* read the checkpoint record on the target to see where it ends. */
417 chkptendrec = readOneRecord(datadir_target,
419 targetNentries - 1,
421
422 if (ControlFile_target.minRecoveryPoint > chkptendrec)
423 {
424 target_wal_endrec = ControlFile_target.minRecoveryPoint;
425 }
426 else
427 {
428 target_wal_endrec = chkptendrec;
429 }
430
431 /*
432 * Check for the possibility that the target is in fact a direct
433 * ancestor of the source. In that case, there is no divergent history
434 * in the target that needs rewinding.
435 */
436 if (target_wal_endrec > divergerec)
437 {
438 rewind_needed = true;
439 }
440 else
441 {
442 /* the last common checkpoint record must be part of target WAL */
443 Assert(target_wal_endrec == divergerec);
444
445 rewind_needed = false;
446 }
447 }
448
449 if (!rewind_needed)
450 {
451 pg_log_info("no rewind required");
454 GenerateRecoveryConfig(conn, NULL, NULL));
455 exit(0);
456 }
457
458 /* Initialize hashtable that tracks WAL files protected from removal */
459 keepwal_init();
460
461 findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
462 &chkptrec, &chkpttli, &chkptredo, restore_command);
463 pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
464 LSN_FORMAT_ARGS(chkptrec), chkpttli);
465
466 /* Initialize the hash table to track the status of each file */
468
469 /*
470 * Collect information about all files in the both data directories.
471 */
472 if (showprogress)
473 pg_log_info("reading source file list");
475
476 if (showprogress)
477 pg_log_info("reading target file list");
479
480 /*
481 * Read the target WAL from last checkpoint before the point of fork, to
482 * extract all the pages that were modified on the target cluster after
483 * the fork.
484 */
485 if (showprogress)
486 pg_log_info("reading WAL in target");
487 extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
488 target_wal_endrec, restore_command);
489
490 /*
491 * We have collected all information we need from both systems. Decide
492 * what to do with each file.
493 */
494 filemap = decide_file_actions();
495 if (showprogress)
496 calculate_totals(filemap);
497
498 /* this is too verbose even for verbose mode */
499 if (debug)
500 print_filemap(filemap);
501
502 /*
503 * Ok, we're ready to start copying things over.
504 */
505 if (showprogress)
506 {
507 pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
508 (unsigned long) (filemap->fetch_size / (1024 * 1024)),
509 (unsigned long) (filemap->total_size / (1024 * 1024)));
510
511 fetch_size = filemap->fetch_size;
512 fetch_done = 0;
513 }
514
515 /*
516 * We have now collected all the information we need from both systems,
517 * and we are ready to start modifying the target directory.
518 *
519 * This is the point of no return. Once we start copying things, there is
520 * no turning back!
521 */
522 perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
523
524 if (showprogress)
525 pg_log_info("syncing target data directory");
527
528 /* Also update the standby configuration, if requested. */
531 GenerateRecoveryConfig(conn, NULL, NULL));
532
533 /* don't need the source connection anymore */
535 if (conn)
536 {
537 PQfinish(conn);
538 conn = NULL;
539 }
540
541 pg_log_info("Done!");
542
543 return 0;
544}
545
546/*
547 * Perform the rewind.
548 *
549 * We have already collected all the information we need from the
550 * target and the source.
551 */
552static void
554 XLogRecPtr chkptrec,
555 TimeLineID chkpttli,
556 XLogRecPtr chkptredo)
557{
558 XLogRecPtr endrec;
559 TimeLineID endtli;
560 ControlFileData ControlFile_new;
561 size_t size;
562 char *buffer;
563
564 /*
565 * Execute the actions in the file map, fetching data from the source
566 * system as needed.
567 */
568 for (int i = 0; i < filemap->nentries; i++)
569 {
570 file_entry_t *entry = filemap->entries[i];
571
572 /*
573 * If this is a relation file, copy the modified blocks.
574 *
575 * This is in addition to any other changes.
576 */
578 {
580 BlockNumber blkno;
581 off_t offset;
582
584 while (datapagemap_next(iter, &blkno))
585 {
586 offset = blkno * BLCKSZ;
587 source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
588 }
589 pg_free(iter);
590 }
591
592 switch (entry->action)
593 {
594 case FILE_ACTION_NONE:
595 /* nothing else to do */
596 break;
597
598 case FILE_ACTION_COPY:
600 break;
601
603 truncate_target_file(entry->path, entry->source_size);
604 break;
605
608 entry->target_size,
609 entry->source_size - entry->target_size);
610 break;
611
613 remove_target(entry);
614 break;
615
617 create_target(entry);
618 break;
619
621 pg_fatal("no action decided for file \"%s\"", entry->path);
622 break;
623 }
624 }
625
626 /* Complete any remaining range-fetches that we queued up above. */
628
630
631 progress_report(true);
632
633 /*
634 * Fetch the control file from the source last. This ensures that the
635 * minRecoveryPoint is up-to-date.
636 */
637 buffer = source->fetch_file(source, "global/pg_control", &size);
639 pg_free(buffer);
640
641 /*
642 * Sanity check: If the source is a local system, the control file should
643 * not have changed since we started.
644 *
645 * XXX: We assume it hasn't been modified, but actually, what could go
646 * wrong? The logic handles a libpq source that's modified concurrently,
647 * why not a local datadir?
648 */
649 if (datadir_source &&
651 sizeof(ControlFileData)) != 0)
652 {
653 pg_fatal("source system was modified while pg_rewind was running");
654 }
655
656 if (showprogress)
657 pg_log_info("creating backup label and updating control file");
658
659 /*
660 * Create a backup label file, to tell the target where to begin the WAL
661 * replay. Normally, from the last common checkpoint between the source
662 * and the target. But if the source is a standby server, it's possible
663 * that the last common checkpoint is *after* the standby's restartpoint.
664 * That implies that the source server has applied the checkpoint record,
665 * but hasn't performed a corresponding restartpoint yet. Make sure we
666 * start at the restartpoint's redo point in that case.
667 *
668 * Use the old version of the source's control file for this. The server
669 * might have finished the restartpoint after we started copying files,
670 * but we must begin from the redo point at the time that started copying.
671 */
672 if (ControlFile_source.checkPointCopy.redo < chkptredo)
673 {
677 }
678 createBackupLabel(chkptredo, chkpttli, chkptrec);
679
680 /*
681 * Update control file of target, to tell the target how far it must
682 * replay the WAL (minRecoveryPoint).
683 */
684 if (connstr_source)
685 {
686 /*
687 * The source is a live server. Like in an online backup, it's
688 * important that we recover all the WAL that was generated while we
689 * were copying files.
690 */
692 {
693 /*
694 * Source is a standby server. We must replay to its
695 * minRecoveryPoint.
696 */
699 }
700 else
701 {
702 /*
703 * Source is a production, non-standby, server. We must replay to
704 * the last WAL insert location.
705 */
707 pg_fatal("source system was in unexpected state at end of rewind");
708
712 }
713 }
714 else
715 {
716 /*
717 * Source is a local data directory. It should've shut down cleanly,
718 * and we must replay to the latest shutdown checkpoint.
719 */
722 }
723
724 memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
725 ControlFile_new.minRecoveryPoint = endrec;
726 ControlFile_new.minRecoveryPointTLI = endtli;
727 ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
728 if (!dry_run)
729 update_controlfile(datadir_target, &ControlFile_new, do_sync);
730}
731
732static void
734{
735 /* TODO Check that there's no backup_label in either cluster */
736
737 /* Check system_identifier match */
739 pg_fatal("source and target clusters are from different systems");
740
741 /* check version */
746 {
747 pg_fatal("clusters are not compatible with this version of pg_rewind");
748 }
749
750 /*
751 * Target cluster need to use checksums or hint bit wal-logging, this to
752 * prevent from data corruption that could occur because of hint bits.
753 */
756 {
757 pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
758 }
759
760 /*
761 * Target cluster better not be running. This doesn't guard against
762 * someone starting the cluster concurrently. Also, this is probably more
763 * strict than necessary; it's OK if the target node was not shut down
764 * cleanly, as long as it isn't running at the moment.
765 */
768 pg_fatal("target server must be shut down cleanly");
769
770 /*
771 * When the source is a data directory, also require that the source
772 * server is shut down. There isn't any very strong reason for this
773 * limitation, but better safe than sorry.
774 */
775 if (datadir_source &&
778 pg_fatal("source data directory must be shut down cleanly");
779}
780
781/*
782 * Print a progress report based on the fetch_size and fetch_done variables.
783 *
784 * Progress report is written at maximum once per second, except that the
785 * last progress report is always printed.
786 *
787 * If finished is set to true, this is the last progress report. The cursor
788 * is moved to the next line.
789 */
790void
791progress_report(bool finished)
792{
794 int percent;
795 char fetch_done_str[32];
796 char fetch_size_str[32];
798
799 if (!showprogress)
800 return;
801
802 now = time(NULL);
803 if (now == last_progress_report && !finished)
804 return; /* Max once per second */
805
807 percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
808
809 /*
810 * Avoid overflowing past 100% or the full size. This may make the total
811 * size number change as we approach the end of the backup (the estimate
812 * will always be wrong if WAL is included), but that's better than having
813 * the done column be bigger than the total.
814 */
815 if (percent > 100)
816 percent = 100;
819
820 snprintf(fetch_done_str, sizeof(fetch_done_str), UINT64_FORMAT,
821 fetch_done / 1024);
822 snprintf(fetch_size_str, sizeof(fetch_size_str), UINT64_FORMAT,
823 fetch_size / 1024);
824
825 fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
826 (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
827 percent);
828
829 /*
830 * Stay on the same line if reporting to a terminal and we're not done
831 * yet.
832 */
833 fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
834}
835
836/*
837 * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
838 * infinity as src/include/access/timeline.h states. This routine should
839 * be used only when comparing WAL locations related to history files.
840 */
841static XLogRecPtr
843{
845 return b;
846 else if (XLogRecPtrIsInvalid(b))
847 return a;
848 else
849 return Min(a, b);
850}
851
852/*
853 * Retrieve timeline history for the source or target system.
854 */
856getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
857{
858 TimeLineHistoryEntry *history;
859
860 /*
861 * Timeline 1 does not have a history file, so there is no need to check
862 * and fake an entry with infinite start and end positions.
863 */
864 if (tli == 1)
865 {
867 history->tli = tli;
868 history->begin = history->end = InvalidXLogRecPtr;
869 *nentries = 1;
870 }
871 else
872 {
873 char path[MAXPGPATH];
874 char *histfile;
875
876 TLHistoryFilePath(path, tli);
877
878 /* Get history file from appropriate source */
879 if (is_source)
880 histfile = source->fetch_file(source, path, NULL);
881 else
882 histfile = slurpFile(datadir_target, path, NULL);
883
884 history = rewind_parseTimeLineHistory(histfile, tli, nentries);
885 pg_free(histfile);
886 }
887
888 /* In debugging mode, print what we read */
889 if (debug)
890 {
891 int i;
892
893 if (is_source)
894 pg_log_debug("Source timeline history:");
895 else
896 pg_log_debug("Target timeline history:");
897
898 for (i = 0; i < *nentries; i++)
899 {
901
902 entry = &history[i];
903 pg_log_debug("%u: %X/%X - %X/%X", entry->tli,
904 LSN_FORMAT_ARGS(entry->begin),
905 LSN_FORMAT_ARGS(entry->end));
906 }
907 }
908
909 return history;
910}
911
912/*
913 * Determine the TLI of the last common timeline in the timeline history of
914 * two clusters. *tliIndex is set to the index of last common timeline in
915 * the arrays, and *recptr is set to the position where the timeline history
916 * diverged (ie. the first WAL record that's not the same in both clusters).
917 */
918static void
920 TimeLineHistoryEntry *b_history, int b_nentries,
921 XLogRecPtr *recptr, int *tliIndex)
922{
923 int i,
924 n;
925
926 /*
927 * Trace the history forward, until we hit the timeline diverge. It may
928 * still be possible that the source and target nodes used the same
929 * timeline number in their history but with different start position
930 * depending on the history files that each node has fetched in previous
931 * recovery processes. Hence check the start position of the new timeline
932 * as well and move down by one extra timeline entry if they do not match.
933 */
934 n = Min(a_nentries, b_nentries);
935 for (i = 0; i < n; i++)
936 {
937 if (a_history[i].tli != b_history[i].tli ||
938 a_history[i].begin != b_history[i].begin)
939 break;
940 }
941
942 if (i > 0)
943 {
944 i--;
945 *recptr = MinXLogRecPtr(a_history[i].end, b_history[i].end);
946 *tliIndex = i;
947 return;
948 }
949 else
950 {
951 pg_fatal("could not find common ancestor of the source and target cluster's timelines");
952 }
953}
954
955
956/*
957 * Create a backup_label file that forces recovery to begin at the last common
958 * checkpoint.
959 */
960static void
961createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
962{
963 XLogSegNo startsegno;
964 time_t stamp_time;
965 char strfbuf[128];
966 char xlogfilename[MAXFNAMELEN];
967 struct tm *tmp;
968 char buf[1000];
969 int len;
970
971 XLByteToSeg(startpoint, startsegno, WalSegSz);
972 XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
973
974 /*
975 * Construct backup label file
976 */
977 stamp_time = time(NULL);
978 tmp = localtime(&stamp_time);
979 strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
980
981 len = snprintf(buf, sizeof(buf),
982 "START WAL LOCATION: %X/%X (file %s)\n"
983 "CHECKPOINT LOCATION: %X/%X\n"
984 "BACKUP METHOD: pg_rewind\n"
985 "BACKUP FROM: standby\n"
986 "START TIME: %s\n",
987 /* omit LABEL: line */
988 LSN_FORMAT_ARGS(startpoint), xlogfilename,
989 LSN_FORMAT_ARGS(checkpointloc),
990 strfbuf);
991 if (len >= sizeof(buf))
992 pg_fatal("backup label buffer too small"); /* shouldn't happen */
993
994 /* TODO: move old file out of the way, if any. */
995 open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
998}
999
1000/*
1001 * Check CRC of control file
1002 */
1003static void
1005{
1006 pg_crc32c crc;
1007
1008 /* Calculate CRC */
1010 COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc));
1011 FIN_CRC32C(crc);
1012
1013 /* And simply compare it */
1014 if (!EQ_CRC32C(crc, ControlFile->crc))
1015 pg_fatal("unexpected control file CRC");
1016}
1017
1018/*
1019 * Verify control file contents in the buffer 'content', and copy it to
1020 * *ControlFile.
1021 */
1022static void
1024 size_t size)
1025{
1027 pg_fatal("unexpected control file size %d, expected %d",
1028 (int) size, PG_CONTROL_FILE_SIZE);
1029
1030 memcpy(ControlFile, content, sizeof(ControlFileData));
1031
1032 /* set and validate WalSegSz */
1034
1036 {
1037 pg_log_error(ngettext("invalid WAL segment size in control file (%d byte)",
1038 "invalid WAL segment size in control file (%d bytes)",
1039 WalSegSz),
1040 WalSegSz);
1041 pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
1042 exit(1);
1043 }
1044
1045 /* Additional checks on control file */
1047}
1048
1049/*
1050 * Get value of GUC parameter restore_command from the target cluster.
1051 *
1052 * This uses a logic based on "postgres -C" to get the value from the
1053 * cluster.
1054 */
1055static void
1057{
1058 int rc;
1059 char postgres_exec_path[MAXPGPATH];
1060 PQExpBuffer postgres_cmd;
1061
1062 if (!restore_wal)
1063 return;
1064
1065 /* find postgres executable */
1066 rc = find_other_exec(argv0, "postgres",
1068 postgres_exec_path);
1069
1070 if (rc < 0)
1071 {
1072 char full_path[MAXPGPATH];
1073
1074 if (find_my_exec(argv0, full_path) < 0)
1075 strlcpy(full_path, progname, sizeof(full_path));
1076
1077 if (rc == -1)
1078 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1079 "postgres", progname, full_path);
1080 else
1081 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1082 "postgres", full_path, progname);
1083 }
1084
1085 /*
1086 * Build a command able to retrieve the value of GUC parameter
1087 * restore_command, if set.
1088 */
1089 postgres_cmd = createPQExpBuffer();
1090
1091 /* path to postgres, properly quoted */
1092 appendShellString(postgres_cmd, postgres_exec_path);
1093
1094 /* add -D switch, with properly quoted data directory */
1095 appendPQExpBufferStr(postgres_cmd, " -D ");
1096 appendShellString(postgres_cmd, datadir_target);
1097
1098 /* add custom configuration file only if requested */
1099 if (config_file != NULL)
1100 {
1101 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1102 appendShellString(postgres_cmd, config_file);
1103 }
1104
1105 /* add -C switch, for restore_command */
1106 appendPQExpBufferStr(postgres_cmd, " -C restore_command");
1107
1108 restore_command = pipe_read_line(postgres_cmd->data);
1109 if (restore_command == NULL)
1110 pg_fatal("could not read \"restore_command\" from target cluster");
1111
1113
1114 if (strcmp(restore_command, "") == 0)
1115 pg_fatal("\"restore_command\" is not set in the target cluster");
1116
1117 pg_log_debug("using for rewind \"restore_command = \'%s\'\"",
1119
1120 destroyPQExpBuffer(postgres_cmd);
1121}
1122
1123
1124/*
1125 * Ensure clean shutdown of target instance by launching single-user mode
1126 * postgres to do crash recovery.
1127 */
1128static void
1130{
1131 int ret;
1132 char exec_path[MAXPGPATH];
1133 PQExpBuffer postgres_cmd;
1134
1135 /* locate postgres binary */
1136 if ((ret = find_other_exec(argv0, "postgres",
1138 exec_path)) < 0)
1139 {
1140 char full_path[MAXPGPATH];
1141
1142 if (find_my_exec(argv0, full_path) < 0)
1143 strlcpy(full_path, progname, sizeof(full_path));
1144
1145 if (ret == -1)
1146 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1147 "postgres", progname, full_path);
1148 else
1149 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1150 "postgres", full_path, progname);
1151 }
1152
1153 pg_log_info("executing \"%s\" for target server to complete crash recovery",
1154 exec_path);
1155
1156 /*
1157 * Skip processing if requested, but only after ensuring presence of
1158 * postgres.
1159 */
1160 if (dry_run)
1161 return;
1162
1163 /*
1164 * Finally run postgres in single-user mode. There is no need to use
1165 * fsync here. This makes the recovery faster, and the target data folder
1166 * is synced at the end anyway.
1167 */
1168 postgres_cmd = createPQExpBuffer();
1169
1170 /* path to postgres, properly quoted */
1171 appendShellString(postgres_cmd, exec_path);
1172
1173 /* add set of options with properly quoted data directory */
1174 appendPQExpBufferStr(postgres_cmd, " --single -F -D ");
1175 appendShellString(postgres_cmd, datadir_target);
1176
1177 /* add custom configuration file only if requested */
1178 if (config_file != NULL)
1179 {
1180 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1181 appendShellString(postgres_cmd, config_file);
1182 }
1183
1184 /* finish with the database name, and a properly quoted redirection */
1185 appendPQExpBufferStr(postgres_cmd, " template1 < ");
1186 appendShellString(postgres_cmd, DEVNULL);
1187
1188 fflush(NULL);
1189 if (system(postgres_cmd->data) != 0)
1190 {
1191 pg_log_error("postgres single-user mode in target cluster failed");
1192 pg_log_error_detail("Command was: %s", postgres_cmd->data);
1193 exit(1);
1194 }
1195
1196 destroyPQExpBuffer(postgres_cmd);
1197}
1198
1199static void
1201{
1202 if (conn != NULL)
1203 PQfinish(conn);
1204}
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1608
uint32 BlockNumber
Definition: block.h:31
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:207
#define Min(x, y)
Definition: c.h:961
#define ngettext(s, p, n)
Definition: c.h:1138
#define Max(x, y)
Definition: c.h:955
#define Assert(condition)
Definition: c.h:815
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1171
#define UINT64_FORMAT
Definition: c.h:507
uint64_t uint64
Definition: c.h:489
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:160
char * pipe_read_line(char *cmd)
Definition: exec.c:352
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:429
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:310
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
#define _(x)
Definition: elog.c:90
PGconn * PQconnectdb(const char *conninfo)
Definition: fe-connect.c:753
ConnStatusType PQstatus(const PGconn *conn)
Definition: fe-connect.c:7205
void PQfinish(PGconn *conn)
Definition: fe-connect.c:4939
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:7268
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: file_ops.c:362
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:314
void close_target_file(void)
Definition: file_ops.c:75
void truncate_target_file(const char *path, off_t newsize)
Definition: file_ops.c:206
void remove_target(file_entry_t *entry)
Definition: file_ops.c:130
void sync_target_dir(void)
Definition: file_ops.c:294
void create_target(file_entry_t *entry)
Definition: file_ops.c:156
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:47
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:88
int pg_mode_mask
Definition: file_perm.c:25
bool GetDataDirectoryCreatePerm(const char *dataDir)
DataDirSyncMethod
Definition: file_utils.h:28
@ DATA_DIR_SYNC_METHOD_FSYNC
Definition: file_utils.h:29
void filehash_init(void)
Definition: filemap.c:196
void process_source_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:279
void print_filemap(filemap_t *filemap)
Definition: filemap.c:540
void keepwal_init(void)
Definition: filemap.c:242
void process_target_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:315
filemap_t * decide_file_actions(void)
Definition: filemap.c:861
void calculate_totals(filemap_t *filemap)
Definition: filemap.c:499
@ FILE_ACTION_REMOVE
Definition: filemap.h:27
@ FILE_ACTION_COPY
Definition: filemap.h:21
@ FILE_ACTION_NONE
Definition: filemap.h:24
@ FILE_ACTION_COPY_TAIL
Definition: filemap.h:22
@ FILE_ACTION_UNDECIDED
Definition: filemap.h:18
@ FILE_ACTION_TRUNCATE
Definition: filemap.h:26
@ FILE_ACTION_CREATE
Definition: filemap.h:20
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define no_argument
Definition: getopt_long.h:25
#define required_argument
Definition: getopt_long.h:26
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int i
Definition: isn.c:72
@ CONNECTION_BAD
Definition: libpq-fe.h:82
static void const char fflush(stdout)
exit(1)
rewind_source * init_libpq_source(PGconn *conn)
Definition: libpq_source.c:81
rewind_source * init_local_source(const char *datadir)
Definition: local_source.c:38
static struct pg_tm tm
Definition: localtime.c:104
void pg_logging_increase_verbosity(void)
Definition: logging.c:185
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_log_info(...)
Definition: logging.h:124
#define pg_log_error_detail(...)
Definition: logging.h:109
#define pg_log_debug(...)
Definition: logging.h:133
void pfree(void *pointer)
Definition: mcxt.c:1521
bool parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
Definition: option_utils.c:90
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand)
Definition: parsexlog.c:66
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand)
Definition: parsexlog.c:168
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand)
Definition: parsexlog.c:124
static pg_time_t last_progress_report
Definition: pg_amcheck.c:147
#define pg_fatal(...)
static bool writerecoveryconf
#define MAXPGPATH
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
const void size_t len
return crc
static char * argv0
Definition: pg_ctl.c:93
static char * exec_path
Definition: pg_ctl.c:88
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
static ControlFileData ControlFile_source
Definition: pg_rewind.c:60
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:961
static void usage(const char *progname)
Definition: pg_rewind.c:92
static void sanityChecks(void)
Definition: pg_rewind.c:733
static char * datadir_source
Definition: pg_rewind.c:68
static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:919
static ControlFileData ControlFile_source_after
Definition: pg_rewind.c:61
int WalSegSz
Definition: pg_rewind.c:64
static char * restore_command
Definition: pg_rewind.c:70
static bool debug
Definition: pg_rewind.c:73
int main(int argc, char **argv)
Definition: pg_rewind.c:121
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:842
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:1129
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:81
static rewind_source * source
Definition: pg_rewind.c:89
static ControlFileData ControlFile_target
Definition: pg_rewind.c:59
void progress_report(bool finished)
Definition: pg_rewind.c:791
static TimeLineHistoryEntry * getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
Definition: pg_rewind.c:856
static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size)
Definition: pg_rewind.c:1023
static char * connstr_source
Definition: pg_rewind.c:69
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:1004
static void getRestoreCommand(const char *argv0)
Definition: pg_rewind.c:1056
char * datadir_target
Definition: pg_rewind.c:67
bool do_sync
Definition: pg_rewind.c:76
static bool restore_wal
Definition: pg_rewind.c:77
uint64 fetch_done
Definition: pg_rewind.c:86
int targetNentries
Definition: pg_rewind.c:82
uint64 fetch_size
Definition: pg_rewind.c:85
static char * config_file
Definition: pg_rewind.c:71
bool dry_run
Definition: pg_rewind.c:75
DataDirSyncMethod sync_method
Definition: pg_rewind.c:78
bool showprogress
Definition: pg_rewind.c:74
static const char * progname
Definition: pg_rewind.c:63
static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo)
Definition: pg_rewind.c:553
static void disconnect_atexit(void)
Definition: pg_rewind.c:1200
static PGconn * conn
Definition: pg_rewind.c:88
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:28
static char * buf
Definition: pg_test_fsync.c:72
int64 pg_time_t
Definition: pgtime.h:23
#define snprintf
Definition: port.h:238
#define DEVNULL
Definition: port.h:160
#define PG_BACKEND_VERSIONSTR
Definition: port.h:143
const char * get_progname(const char *argv0)
Definition: path.c:575
#define printf(...)
Definition: port.h:244
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
PQExpBuffer createPQExpBuffer(void)
Definition: pqexpbuffer.c:72
void destroyPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:114
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
Definition: pqexpbuffer.c:367
char * c
void WriteRecoveryConfig(PGconn *pgconn, const char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:124
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, const char *replication_slot, char *dbname)
Definition: recovery_gen.c:27
void get_restricted_token(void)
static pg_noinline void Size size
Definition: slab.c:607
int pg_strip_crlf(char *str)
Definition: string.c:154
void appendShellString(PQExpBuffer buf, const char *str)
Definition: string_utils.c:429
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
XLogRecPtr redo
Definition: pg_control.h:37
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:211
CheckPoint checkPointCopy
Definition: pg_control.h:135
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
uint32 data_checksum_version
Definition: pg_control.h:222
XLogRecPtr checkPoint
Definition: pg_control.h:133
uint64 system_identifier
Definition: pg_control.h:110
uint32 catalog_version_no
Definition: pg_control.h:126
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
pg_crc32c crc
Definition: pg_control.h:232
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
int bitmapsize
Definition: datapagemap.h:17
Definition: filemap.h:50
datapagemap_t target_pages_to_overwrite
Definition: filemap.h:68
const char * path
Definition: filemap.h:53
size_t source_size
Definition: filemap.h:75
size_t target_size
Definition: filemap.h:61
file_action_t action
Definition: filemap.h:81
file_entry_t * entries[FLEXIBLE_ARRAY_MEMBER]
Definition: filemap.h:96
int nentries
Definition: filemap.h:95
uint64 total_size
Definition: filemap.h:92
uint64 fetch_size
Definition: filemap.h:93
void(* queue_fetch_file)(struct rewind_source *, const char *path, size_t len)
Definition: rewind_source.h:60
void(* traverse_files)(struct rewind_source *, process_file_callback_t callback)
Definition: rewind_source.h:29
void(* finish_fetch)(struct rewind_source *)
Definition: rewind_source.h:66
XLogRecPtr(* get_current_wal_insert_lsn)(struct rewind_source *)
Definition: rewind_source.h:71
void(* queue_fetch_range)(struct rewind_source *, const char *path, off_t offset, size_t len)
Definition: rewind_source.h:47
char *(* fetch_file)(struct rewind_source *, const char *path, size_t *filesize)
Definition: rewind_source.h:37
void(* destroy)(struct rewind_source *)
Definition: rewind_source.h:76
static ControlFileData * ControlFile
Definition: xlog.c:574
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96
#define MAXFNAMELEN
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
static void TLHistoryFilePath(char *path, TimeLineID tli)
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
#define XLogRecPtrIsInvalid(r)
Definition: xlogdefs.h:29
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:59
uint64 XLogSegNo
Definition: xlogdefs.h:48