PostgreSQL Source Code  git master
pg_verifybackup.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pg_verifybackup.c
4  * Verify a backup against a backup manifest.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/bin/pg_verifybackup/pg_verifybackup.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres_fe.h"
15 
16 #include <dirent.h>
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <time.h>
20 
22 #include "common/hashfn_unstable.h"
23 #include "common/logging.h"
24 #include "common/parse_manifest.h"
25 #include "fe_utils/simple_list.h"
26 #include "getopt_long.h"
27 #include "pgtime.h"
28 
29 /*
30  * For efficiency, we'd like our hash table containing information about the
31  * manifest to start out with approximately the correct number of entries.
32  * There's no way to know the exact number of entries without reading the whole
33  * file, but we can get an estimate by dividing the file size by the estimated
34  * number of bytes per line.
35  *
36  * This could be off by about a factor of two in either direction, because the
37  * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
38  * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
39  * might be no checksum at all.
40  */
41 #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
42 
43 /*
44  * How many bytes should we try to read from a file at once?
45  */
46 #define READ_CHUNK_SIZE (128 * 1024)
47 
48 /*
49  * Each file described by the manifest file is parsed to produce an object
50  * like this.
51  */
52 typedef struct manifest_file
53 {
54  uint32 status; /* hash status */
55  char *pathname;
56  size_t size;
58  int checksum_length;
60  bool matched;
61  bool bad;
63 
64 #define should_verify_checksum(m) \
65  (((m)->matched) && !((m)->bad) && (((m)->checksum_type) != CHECKSUM_TYPE_NONE))
66 
67 /*
68  * Define a hash table which we can use to store information about the files
69  * mentioned in the backup manifest.
70  */
71 #define SH_PREFIX manifest_files
72 #define SH_ELEMENT_TYPE manifest_file
73 #define SH_KEY_TYPE char *
74 #define SH_KEY pathname
75 #define SH_HASH_KEY(tb, key) hash_string(key)
76 #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
77 #define SH_SCOPE static inline
78 #define SH_RAW_ALLOCATOR pg_malloc0
79 #define SH_DECLARE
80 #define SH_DEFINE
81 #include "lib/simplehash.h"
82 
83 /*
84  * Each WAL range described by the manifest file is parsed to produce an
85  * object like this.
86  */
87 typedef struct manifest_wal_range
88 {
92  struct manifest_wal_range *next;
93  struct manifest_wal_range *prev;
95 
96 /*
97  * All the data parsed from a backup_manifest file.
98  */
99 typedef struct manifest_data
100 {
101  int version;
102  uint64 system_identifier;
103  manifest_files_hash *files;
107 
108 /*
109  * All of the context information we need while checking a backup manifest.
110  */
111 typedef struct verifier_context
112 {
119 
120 static manifest_data *parse_manifest_file(char *manifest_path);
122  int manifest_version);
124  uint64 manifest_system_identifier);
126  char *pathname, size_t size,
127  pg_checksum_type checksum_type,
128  int checksum_length,
129  uint8 *checksum_payload);
131  TimeLineID tli,
132  XLogRecPtr start_lsn,
133  XLogRecPtr end_lsn);
135  const char *fmt,...)
137 
139  char *relpath, char *fullpath);
141  char *relpath, char *fullpath);
142 static void verify_control_file(const char *controlpath,
143  uint64 manifest_system_identifier);
147  manifest_file *m, char *fullpath,
148  uint8 *buffer);
150  char *pg_waldump_path,
151  char *wal_directory);
152 
154  const char *pg_restrict fmt,...)
155  pg_attribute_printf(2, 3);
156 static void report_fatal_error(const char *pg_restrict fmt,...)
159 
160 static void progress_report(bool finished);
161 static void usage(void);
162 
163 static const char *progname;
164 
165 /* options */
166 static bool show_progress = false;
167 static bool skip_checksums = false;
168 
169 /* Progress indicators */
170 static uint64 total_size = 0;
171 static uint64 done_size = 0;
172 
173 /*
174  * Main entry point.
175  */
176 int
177 main(int argc, char **argv)
178 {
179  static struct option long_options[] = {
180  {"exit-on-error", no_argument, NULL, 'e'},
181  {"ignore", required_argument, NULL, 'i'},
182  {"manifest-path", required_argument, NULL, 'm'},
183  {"no-parse-wal", no_argument, NULL, 'n'},
184  {"progress", no_argument, NULL, 'P'},
185  {"quiet", no_argument, NULL, 'q'},
186  {"skip-checksums", no_argument, NULL, 's'},
187  {"wal-directory", required_argument, NULL, 'w'},
188  {NULL, 0, NULL, 0}
189  };
190 
191  int c;
193  char *manifest_path = NULL;
194  bool no_parse_wal = false;
195  bool quiet = false;
196  char *wal_directory = NULL;
197  char *pg_waldump_path = NULL;
198 
199  pg_logging_init(argv[0]);
200  set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup"));
201  progname = get_progname(argv[0]);
202 
203  memset(&context, 0, sizeof(context));
204 
205  if (argc > 1)
206  {
207  if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
208  {
209  usage();
210  exit(0);
211  }
212  if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
213  {
214  puts("pg_verifybackup (PostgreSQL) " PG_VERSION);
215  exit(0);
216  }
217  }
218 
219  /*
220  * Skip certain files in the toplevel directory.
221  *
222  * Ignore the backup_manifest file, because it's not included in the
223  * backup manifest.
224  *
225  * Ignore the pg_wal directory, because those files are not included in
226  * the backup manifest either, since they are fetched separately from the
227  * backup itself, and verified via a separate mechanism.
228  *
229  * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
230  * because we expect that those files may sometimes be created or changed
231  * as part of the backup process. For example, pg_basebackup -R will
232  * modify postgresql.auto.conf and create standby.signal.
233  */
234  simple_string_list_append(&context.ignore_list, "backup_manifest");
235  simple_string_list_append(&context.ignore_list, "pg_wal");
236  simple_string_list_append(&context.ignore_list, "postgresql.auto.conf");
237  simple_string_list_append(&context.ignore_list, "recovery.signal");
238  simple_string_list_append(&context.ignore_list, "standby.signal");
239 
240  while ((c = getopt_long(argc, argv, "ei:m:nPqsw:", long_options, NULL)) != -1)
241  {
242  switch (c)
243  {
244  case 'e':
245  context.exit_on_error = true;
246  break;
247  case 'i':
248  {
249  char *arg = pstrdup(optarg);
250 
252  simple_string_list_append(&context.ignore_list, arg);
253  break;
254  }
255  case 'm':
256  manifest_path = pstrdup(optarg);
257  canonicalize_path(manifest_path);
258  break;
259  case 'n':
260  no_parse_wal = true;
261  break;
262  case 'P':
263  show_progress = true;
264  break;
265  case 'q':
266  quiet = true;
267  break;
268  case 's':
269  skip_checksums = true;
270  break;
271  case 'w':
272  wal_directory = pstrdup(optarg);
273  canonicalize_path(wal_directory);
274  break;
275  default:
276  /* getopt_long already emitted a complaint */
277  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
278  exit(1);
279  }
280  }
281 
282  /* Get backup directory name */
283  if (optind >= argc)
284  {
285  pg_log_error("no backup directory specified");
286  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
287  exit(1);
288  }
289  context.backup_directory = pstrdup(argv[optind++]);
290  canonicalize_path(context.backup_directory);
291 
292  /* Complain if any arguments remain */
293  if (optind < argc)
294  {
295  pg_log_error("too many command-line arguments (first is \"%s\")",
296  argv[optind]);
297  pg_log_error_hint("Try \"%s --help\" for more information.", progname);
298  exit(1);
299  }
300 
301  /* Complain if the specified arguments conflict */
302  if (show_progress && quiet)
303  pg_fatal("cannot specify both %s and %s",
304  "-P/--progress", "-q/--quiet");
305 
306  /* Unless --no-parse-wal was specified, we will need pg_waldump. */
307  if (!no_parse_wal)
308  {
309  int ret;
310 
311  pg_waldump_path = pg_malloc(MAXPGPATH);
312  ret = find_other_exec(argv[0], "pg_waldump",
313  "pg_waldump (PostgreSQL) " PG_VERSION "\n",
314  pg_waldump_path);
315  if (ret < 0)
316  {
317  char full_path[MAXPGPATH];
318 
319  if (find_my_exec(argv[0], full_path) < 0)
320  strlcpy(full_path, progname, sizeof(full_path));
321 
322  if (ret == -1)
323  pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
324  "pg_waldump", "pg_verifybackup", full_path);
325  else
326  pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
327  "pg_waldump", full_path, "pg_verifybackup");
328  }
329  }
330 
331  /* By default, look for the manifest in the backup directory. */
332  if (manifest_path == NULL)
333  manifest_path = psprintf("%s/backup_manifest",
334  context.backup_directory);
335 
336  /* By default, look for the WAL in the backup directory, too. */
337  if (wal_directory == NULL)
338  wal_directory = psprintf("%s/pg_wal", context.backup_directory);
339 
340  /*
341  * Try to read the manifest. We treat any errors encountered while parsing
342  * the manifest as fatal; there doesn't seem to be much point in trying to
343  * verify the backup directory against a corrupted manifest.
344  */
345  context.manifest = parse_manifest_file(manifest_path);
346 
347  /*
348  * Now scan the files in the backup directory. At this stage, we verify
349  * that every file on disk is present in the manifest and that the sizes
350  * match. We also set the "matched" flag on every manifest entry that
351  * corresponds to a file on disk.
352  */
353  verify_backup_directory(&context, NULL, context.backup_directory);
354 
355  /*
356  * The "matched" flag should now be set on every entry in the hash table.
357  * Any entries for which the bit is not set are files mentioned in the
358  * manifest that don't exist on disk.
359  */
361 
362  /*
363  * Now do the expensive work of verifying file checksums, unless we were
364  * told to skip it.
365  */
366  if (!skip_checksums)
368 
369  /*
370  * Try to parse the required ranges of WAL records, unless we were told
371  * not to do so.
372  */
373  if (!no_parse_wal)
374  parse_required_wal(&context, pg_waldump_path, wal_directory);
375 
376  /*
377  * If everything looks OK, tell the user this, unless we were asked to
378  * work quietly.
379  */
380  if (!context.saw_any_error && !quiet)
381  printf(_("backup successfully verified\n"));
382 
383  return context.saw_any_error ? 1 : 0;
384 }
385 
386 /*
387  * Parse a manifest file and return a data structure describing the contents.
388  */
389 static manifest_data *
390 parse_manifest_file(char *manifest_path)
391 {
392  int fd;
393  struct stat statbuf;
394  off_t estimate;
395  uint32 initial_size;
396  manifest_files_hash *ht;
397  char *buffer;
398  int rc;
400  manifest_data *result;
401 
402  int chunk_size = READ_CHUNK_SIZE;
403 
404  /* Open the manifest file. */
405  if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
406  report_fatal_error("could not open file \"%s\": %m", manifest_path);
407 
408  /* Figure out how big the manifest is. */
409  if (fstat(fd, &statbuf) != 0)
410  report_fatal_error("could not stat file \"%s\": %m", manifest_path);
411 
412  /* Guess how large to make the hash table based on the manifest size. */
413  estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
414  initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
415 
416  /* Create the hash table. */
417  ht = manifest_files_create(initial_size, NULL);
418 
419  result = pg_malloc0(sizeof(manifest_data));
420  result->files = ht;
421  context.private_data = result;
422  context.version_cb = verifybackup_version_cb;
423  context.system_identifier_cb = verifybackup_system_identifier;
424  context.per_file_cb = verifybackup_per_file_cb;
425  context.per_wal_range_cb = verifybackup_per_wal_range_cb;
426  context.error_cb = report_manifest_error;
427 
428  /*
429  * Parse the file, in chunks if necessary.
430  */
431  if (statbuf.st_size <= chunk_size)
432  {
433  buffer = pg_malloc(statbuf.st_size);
434  rc = read(fd, buffer, statbuf.st_size);
435  if (rc != statbuf.st_size)
436  {
437  if (rc < 0)
438  pg_fatal("could not read file \"%s\": %m", manifest_path);
439  else
440  pg_fatal("could not read file \"%s\": read %d of %lld",
441  manifest_path, rc, (long long int) statbuf.st_size);
442  }
443 
444  /* Close the manifest file. */
445  close(fd);
446 
447  /* Parse the manifest. */
448  json_parse_manifest(&context, buffer, statbuf.st_size);
449  }
450  else
451  {
452  int bytes_left = statbuf.st_size;
454 
456 
457  buffer = pg_malloc(chunk_size + 1);
458 
459  while (bytes_left > 0)
460  {
461  int bytes_to_read = chunk_size;
462 
463  /*
464  * Make sure that the last chunk is sufficiently large. (i.e. at
465  * least half the chunk size) so that it will contain fully the
466  * piece at the end with the checksum.
467  */
468  if (bytes_left < chunk_size)
469  bytes_to_read = bytes_left;
470  else if (bytes_left < 2 * chunk_size)
471  bytes_to_read = bytes_left / 2;
472  rc = read(fd, buffer, bytes_to_read);
473  if (rc != bytes_to_read)
474  {
475  if (rc < 0)
476  pg_fatal("could not read file \"%s\": %m", manifest_path);
477  else
478  pg_fatal("could not read file \"%s\": read %lld of %lld",
479  manifest_path,
480  (long long int) (statbuf.st_size + rc - bytes_left),
481  (long long int) statbuf.st_size);
482  }
483  bytes_left -= rc;
484  json_parse_manifest_incremental_chunk(inc_state, buffer, rc,
485  bytes_left == 0);
486  }
487 
488  /* Release the incremental state memory */
490 
491  close(fd);
492  }
493 
494  /* Done with the buffer. */
495  pfree(buffer);
496 
497  return result;
498 }
499 
500 /*
501  * Report an error while parsing the manifest.
502  *
503  * We consider all such errors to be fatal errors. The manifest parser
504  * expects this function not to return.
505  */
506 static void
508 {
509  va_list ap;
510 
511  va_start(ap, fmt);
513  va_end(ap);
514 
515  exit(1);
516 }
517 
518 /*
519  * Record details extracted from the backup manifest.
520  */
521 static void
523  int manifest_version)
524 {
525  manifest_data *manifest = context->private_data;
526 
527  /* Validation will be at the later stage */
528  manifest->version = manifest_version;
529 }
530 
531 /*
532  * Record details extracted from the backup manifest.
533  */
534 static void
536  uint64 manifest_system_identifier)
537 {
538  manifest_data *manifest = context->private_data;
539 
540  /* Validation will be at the later stage */
541  manifest->system_identifier = manifest_system_identifier;
542 }
543 
544 /*
545  * Record details extracted from the backup manifest for one file.
546  */
547 static void
549  char *pathname, size_t size,
550  pg_checksum_type checksum_type,
551  int checksum_length, uint8 *checksum_payload)
552 {
553  manifest_data *manifest = context->private_data;
554  manifest_files_hash *ht = manifest->files;
555  manifest_file *m;
556  bool found;
557 
558  /* Make a new entry in the hash table for this file. */
559  m = manifest_files_insert(ht, pathname, &found);
560  if (found)
561  report_fatal_error("duplicate path name in backup manifest: \"%s\"",
562  pathname);
563 
564  /* Initialize the entry. */
565  m->size = size;
566  m->checksum_type = checksum_type;
567  m->checksum_length = checksum_length;
568  m->checksum_payload = checksum_payload;
569  m->matched = false;
570  m->bad = false;
571 }
572 
573 /*
574  * Record details extracted from the backup manifest for one WAL range.
575  */
576 static void
578  TimeLineID tli,
579  XLogRecPtr start_lsn, XLogRecPtr end_lsn)
580 {
581  manifest_data *manifest = context->private_data;
583 
584  /* Allocate and initialize a struct describing this WAL range. */
585  range = palloc(sizeof(manifest_wal_range));
586  range->tli = tli;
587  range->start_lsn = start_lsn;
588  range->end_lsn = end_lsn;
589  range->prev = manifest->last_wal_range;
590  range->next = NULL;
591 
592  /* Add it to the end of the list. */
593  if (manifest->first_wal_range == NULL)
594  manifest->first_wal_range = range;
595  else
596  manifest->last_wal_range->next = range;
597  manifest->last_wal_range = range;
598 }
599 
600 /*
601  * Verify one directory.
602  *
603  * 'relpath' is NULL if we are to verify the top-level backup directory,
604  * and otherwise the relative path to the directory that is to be verified.
605  *
606  * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
607  * filesystem path at which it can be found.
608  */
609 static void
611  char *fullpath)
612 {
613  DIR *dir;
614  struct dirent *dirent;
615 
616  dir = opendir(fullpath);
617  if (dir == NULL)
618  {
619  /*
620  * If even the toplevel backup directory cannot be found, treat this
621  * as a fatal error.
622  */
623  if (relpath == NULL)
624  report_fatal_error("could not open directory \"%s\": %m", fullpath);
625 
626  /*
627  * Otherwise, treat this as a non-fatal error, but ignore any further
628  * errors related to this path and anything beneath it.
629  */
631  "could not open directory \"%s\": %m", fullpath);
632  simple_string_list_append(&context->ignore_list, relpath);
633 
634  return;
635  }
636 
637  while (errno = 0, (dirent = readdir(dir)) != NULL)
638  {
639  char *filename = dirent->d_name;
640  char *newfullpath = psprintf("%s/%s", fullpath, filename);
641  char *newrelpath;
642 
643  /* Skip "." and ".." */
644  if (filename[0] == '.' && (filename[1] == '\0'
645  || strcmp(filename, "..") == 0))
646  continue;
647 
648  if (relpath == NULL)
649  newrelpath = pstrdup(filename);
650  else
651  newrelpath = psprintf("%s/%s", relpath, filename);
652 
653  if (!should_ignore_relpath(context, newrelpath))
654  verify_backup_file(context, newrelpath, newfullpath);
655 
656  pfree(newfullpath);
657  pfree(newrelpath);
658  }
659 
660  if (closedir(dir))
661  {
663  "could not close directory \"%s\": %m", fullpath);
664  return;
665  }
666 }
667 
668 /*
669  * Verify one file (which might actually be a directory or a symlink).
670  *
671  * The arguments to this function have the same meaning as the arguments to
672  * verify_backup_directory.
673  */
674 static void
676 {
677  struct stat sb;
678  manifest_file *m;
679 
680  if (stat(fullpath, &sb) != 0)
681  {
683  "could not stat file or directory \"%s\": %m",
684  relpath);
685 
686  /*
687  * Suppress further errors related to this path name and, if it's a
688  * directory, anything underneath it.
689  */
690  simple_string_list_append(&context->ignore_list, relpath);
691 
692  return;
693  }
694 
695  /* If it's a directory, just recurse. */
696  if (S_ISDIR(sb.st_mode))
697  {
699  return;
700  }
701 
702  /* If it's not a directory, it should be a plain file. */
703  if (!S_ISREG(sb.st_mode))
704  {
706  "\"%s\" is not a file or directory",
707  relpath);
708  return;
709  }
710 
711  /* Check whether there's an entry in the manifest hash. */
712  m = manifest_files_lookup(context->manifest->files, relpath);
713  if (m == NULL)
714  {
716  "\"%s\" is present on disk but not in the manifest",
717  relpath);
718  return;
719  }
720 
721  /* Flag this entry as having been encountered in the filesystem. */
722  m->matched = true;
723 
724  /* Check that the size matches. */
725  if (m->size != sb.st_size)
726  {
728  "\"%s\" has size %lld on disk but size %zu in the manifest",
729  relpath, (long long int) sb.st_size, m->size);
730  m->bad = true;
731  }
732 
733  /*
734  * Validate the manifest system identifier, not available in manifest
735  * version 1.
736  */
737  if (context->manifest->version != 1 &&
738  strcmp(relpath, "global/pg_control") == 0)
739  verify_control_file(fullpath, context->manifest->system_identifier);
740 
741  /* Update statistics for progress report, if necessary */
743  total_size += m->size;
744 
745  /*
746  * We don't verify checksums at this stage. We first finish verifying that
747  * we have the expected set of files with the expected sizes, and only
748  * afterwards verify the checksums. That's because computing checksums may
749  * take a while, and we'd like to report more obvious problems quickly.
750  */
751 }
752 
753 /*
754  * Sanity check control file and validate system identifier against manifest
755  * system identifier.
756  */
757 static void
758 verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
759 {
760  ControlFileData *control_file;
761  bool crc_ok;
762 
763  pg_log_debug("reading \"%s\"", controlpath);
764  control_file = get_controlfile_by_exact_path(controlpath, &crc_ok);
765 
766  /* Control file contents not meaningful if CRC is bad. */
767  if (!crc_ok)
768  report_fatal_error("%s: CRC is incorrect", controlpath);
769 
770  /* Can't interpret control file if not current version. */
771  if (control_file->pg_control_version != PG_CONTROL_VERSION)
772  report_fatal_error("%s: unexpected control file version",
773  controlpath);
774 
775  /* System identifiers should match. */
776  if (manifest_system_identifier != control_file->system_identifier)
777  report_fatal_error("%s: manifest system identifier is %llu, but control file has %llu",
778  controlpath,
779  (unsigned long long) manifest_system_identifier,
780  (unsigned long long) control_file->system_identifier);
781 
782  /* Release memory. */
783  pfree(control_file);
784 }
785 
786 /*
787  * Scan the hash table for entries where the 'matched' flag is not set; report
788  * that such files are present in the manifest but not on disk.
789  */
790 static void
792 {
793  manifest_data *manifest = context->manifest;
794  manifest_files_iterator it;
795  manifest_file *m;
796 
797  manifest_files_start_iterate(manifest->files, &it);
798  while ((m = manifest_files_iterate(manifest->files, &it)) != NULL)
801  "\"%s\" is present in the manifest but not on disk",
802  m->pathname);
803 }
804 
805 /*
806  * Verify checksums for hash table entries that are otherwise unproblematic.
807  * If we've already reported some problem related to a hash table entry, or
808  * if it has no checksum, just skip it.
809  */
810 static void
812 {
813  manifest_data *manifest = context->manifest;
814  manifest_files_iterator it;
815  manifest_file *m;
816  uint8 *buffer;
817 
818  progress_report(false);
819 
820  buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8));
821 
822  manifest_files_start_iterate(manifest->files, &it);
823  while ((m = manifest_files_iterate(manifest->files, &it)) != NULL)
824  {
825  if (should_verify_checksum(m) &&
827  {
828  char *fullpath;
829 
830  /* Compute the full pathname to the target file. */
831  fullpath = psprintf("%s/%s", context->backup_directory,
832  m->pathname);
833 
834  /* Do the actual checksum verification. */
835  verify_file_checksum(context, m, fullpath, buffer);
836 
837  /* Avoid leaking memory. */
838  pfree(fullpath);
839  }
840  }
841 
842  pfree(buffer);
843 
844  progress_report(true);
845 }
846 
847 /*
848  * Verify the checksum of a single file.
849  */
850 static void
852  char *fullpath, uint8 *buffer)
853 {
854  pg_checksum_context checksum_ctx;
855  char *relpath = m->pathname;
856  int fd;
857  int rc;
858  size_t bytes_read = 0;
859  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
860  int checksumlen;
861 
862  /* Open the target file. */
863  if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
864  {
865  report_backup_error(context, "could not open file \"%s\": %m",
866  relpath);
867  return;
868  }
869 
870  /* Initialize checksum context. */
871  if (pg_checksum_init(&checksum_ctx, m->checksum_type) < 0)
872  {
873  report_backup_error(context, "could not initialize checksum of file \"%s\"",
874  relpath);
875  close(fd);
876  return;
877  }
878 
879  /* Read the file chunk by chunk, updating the checksum as we go. */
880  while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
881  {
882  bytes_read += rc;
883  if (pg_checksum_update(&checksum_ctx, buffer, rc) < 0)
884  {
885  report_backup_error(context, "could not update checksum of file \"%s\"",
886  relpath);
887  close(fd);
888  return;
889  }
890 
891  /* Report progress */
892  done_size += rc;
893  progress_report(false);
894  }
895  if (rc < 0)
896  report_backup_error(context, "could not read file \"%s\": %m",
897  relpath);
898 
899  /* Close the file. */
900  if (close(fd) != 0)
901  {
902  report_backup_error(context, "could not close file \"%s\": %m",
903  relpath);
904  return;
905  }
906 
907  /* If we didn't manage to read the whole file, bail out now. */
908  if (rc < 0)
909  return;
910 
911  /*
912  * Double-check that we read the expected number of bytes from the file.
913  * Normally, a file size mismatch would be caught in verify_backup_file
914  * and this check would never be reached, but this provides additional
915  * safety and clarity in the event of concurrent modifications or
916  * filesystem misbehavior.
917  */
918  if (bytes_read != m->size)
919  {
921  "file \"%s\" should contain %zu bytes, but read %zu bytes",
922  relpath, m->size, bytes_read);
923  return;
924  }
925 
926  /* Get the final checksum. */
927  checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf);
928  if (checksumlen < 0)
929  {
931  "could not finalize checksum of file \"%s\"",
932  relpath);
933  return;
934  }
935 
936  /* And check it against the manifest. */
937  if (checksumlen != m->checksum_length)
939  "file \"%s\" has checksum of length %d, but expected %d",
940  relpath, m->checksum_length, checksumlen);
941  else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
943  "checksum mismatch for file \"%s\"",
944  relpath);
945 }
946 
947 /*
948  * Attempt to parse the WAL files required to restore from backup using
949  * pg_waldump.
950  */
951 static void
953  char *wal_directory)
954 {
955  manifest_data *manifest = context->manifest;
956  manifest_wal_range *this_wal_range = manifest->first_wal_range;
957 
958  while (this_wal_range != NULL)
959  {
960  char *pg_waldump_cmd;
961 
962  pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n",
963  pg_waldump_path, wal_directory, this_wal_range->tli,
964  LSN_FORMAT_ARGS(this_wal_range->start_lsn),
965  LSN_FORMAT_ARGS(this_wal_range->end_lsn));
966  fflush(NULL);
967  if (system(pg_waldump_cmd) != 0)
969  "WAL parsing failed for timeline %u",
970  this_wal_range->tli);
971 
972  this_wal_range = this_wal_range->next;
973  }
974 }
975 
976 /*
977  * Report a problem with the backup.
978  *
979  * Update the context to indicate that we saw an error, and exit if the
980  * context says we should.
981  */
982 static void
983 report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
984 {
985  va_list ap;
986 
987  va_start(ap, fmt);
989  va_end(ap);
990 
991  context->saw_any_error = true;
992  if (context->exit_on_error)
993  exit(1);
994 }
995 
996 /*
997  * Report a fatal error and exit
998  */
999 static void
1000 report_fatal_error(const char *pg_restrict fmt,...)
1001 {
1002  va_list ap;
1003 
1004  va_start(ap, fmt);
1006  va_end(ap);
1007 
1008  exit(1);
1009 }
1010 
1011 /*
1012  * Is the specified relative path, or some prefix of it, listed in the set
1013  * of paths to ignore?
1014  *
1015  * Note that by "prefix" we mean a parent directory; for this purpose,
1016  * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
1017  */
1018 static bool
1020 {
1021  SimpleStringListCell *cell;
1022 
1023  for (cell = context->ignore_list.head; cell != NULL; cell = cell->next)
1024  {
1025  char *r = relpath;
1026  char *v = cell->val;
1027 
1028  while (*v != '\0' && *r == *v)
1029  ++r, ++v;
1030 
1031  if (*v == '\0' && (*r == '\0' || *r == '/'))
1032  return true;
1033  }
1034 
1035  return false;
1036 }
1037 
1038 /*
1039  * Print a progress report based on the global variables.
1040  *
1041  * Progress report is written at maximum once per second, unless the finished
1042  * parameter is set to true.
1043  *
1044  * If finished is set to true, this is the last progress report. The cursor
1045  * is moved to the next line.
1046  */
1047 static void
1048 progress_report(bool finished)
1049 {
1050  static pg_time_t last_progress_report = 0;
1051  pg_time_t now;
1052  int percent_size = 0;
1053  char totalsize_str[32];
1054  char donesize_str[32];
1055 
1056  if (!show_progress)
1057  return;
1058 
1059  now = time(NULL);
1060  if (now == last_progress_report && !finished)
1061  return; /* Max once per second */
1062 
1064  percent_size = total_size ? (int) ((done_size * 100 / total_size)) : 0;
1065 
1066  snprintf(totalsize_str, sizeof(totalsize_str), UINT64_FORMAT,
1067  total_size / 1024);
1068  snprintf(donesize_str, sizeof(donesize_str), UINT64_FORMAT,
1069  done_size / 1024);
1070 
1071  fprintf(stderr,
1072  _("%*s/%s kB (%d%%) verified"),
1073  (int) strlen(totalsize_str),
1074  donesize_str, totalsize_str, percent_size);
1075 
1076  /*
1077  * Stay on the same line if reporting to a terminal and we're not done
1078  * yet.
1079  */
1080  fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
1081 }
1082 
1083 /*
1084  * Print out usage information and exit.
1085  */
1086 static void
1087 usage(void)
1088 {
1089  printf(_("%s verifies a backup against the backup manifest.\n\n"), progname);
1090  printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
1091  printf(_("Options:\n"));
1092  printf(_(" -e, --exit-on-error exit immediately on error\n"));
1093  printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
1094  printf(_(" -m, --manifest-path=PATH use specified path for manifest\n"));
1095  printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));
1096  printf(_(" -P, --progress show progress information\n"));
1097  printf(_(" -q, --quiet do not print any output, except for errors\n"));
1098  printf(_(" -s, --skip-checksums skip checksum verification\n"));
1099  printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n"));
1100  printf(_(" -V, --version output version information, then exit\n"));
1101  printf(_(" -?, --help show this help, then exit\n"));
1102  printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1103  printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
1104 }
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1618
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define PG_UINT32_MAX
Definition: c.h:590
#define Max(x, y)
Definition: c.h:998
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1214
#define PG_BINARY
Definition: c.h:1273
#define pg_attribute_printf(f, a)
Definition: c.h:191
#define gettext(x)
Definition: c.h:1179
#define UINT64_FORMAT
Definition: c.h:549
unsigned char uint8
Definition: c.h:504
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
pg_checksum_type
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:160
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:448
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:329
ControlFileData * get_controlfile_by_exact_path(const char *ControlFilePath, bool *crc_ok_p)
int closedir(DIR *)
Definition: dirent.c:127
struct dirent * readdir(DIR *)
Definition: dirent.c:78
DIR * opendir(const char *)
Definition: dirent.c:33
#define _(x)
Definition: elog.c:90
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define no_argument
Definition: getopt_long.h:24
#define required_argument
Definition: getopt_long.h:25
#define close(a)
Definition: win32.h:12
#define read(a, b, c)
Definition: win32.h:13
return false
Definition: isn.c:131
static void const char * fmt
static void const char fflush(stdout)
va_end(args)
exit(1)
va_start(args, fmt)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
void pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, const char *pg_restrict fmt, va_list ap)
Definition: logging.c:216
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
@ PG_LOG_PRIMARY
Definition: logging.h:67
@ PG_LOG_ERROR
Definition: logging.h:43
#define pg_log_debug(...)
Definition: logging.h:133
char * pstrdup(const char *in)
Definition: mcxt.c:1695
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, char *chunk, int size, bool is_last)
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
void json_parse_manifest(JsonManifestParseContext *context, char *buffer, size_t size)
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
static pg_time_t last_progress_report
Definition: pg_amcheck.c:146
void * arg
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
static char * filename
Definition: pg_dumpall.c:119
PGDLLIMPORT int optind
Definition: getopt.c:50
PGDLLIMPORT char * optarg
Definition: getopt.c:52
static void verify_backup_checksums(verifier_context *context)
static uint64 done_size
static manifest_data * parse_manifest_file(char *manifest_path)
struct verifier_context verifier_context
static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version)
static void verifybackup_per_file_cb(JsonManifestParseContext *context, char *pathname, size_t size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload)
static void verifybackup_system_identifier(JsonManifestParseContext *context, uint64 manifest_system_identifier)
int main(int argc, char **argv)
static void verifybackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
static uint64 total_size
struct manifest_wal_range manifest_wal_range
static void report_extra_backup_files(verifier_context *context)
#define ESTIMATED_BYTES_PER_MANIFEST_LINE
static bool skip_checksums
static void progress_report(bool finished)
struct manifest_file manifest_file
static void pg_attribute_noreturn()
#define READ_CHUNK_SIZE
static bool should_ignore_relpath(verifier_context *context, char *relpath)
#define should_verify_checksum(m)
static bool show_progress
static void verify_backup_file(verifier_context *context, char *relpath, char *fullpath)
static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2
static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory)
static void static void report_fatal_error(const char *pg_restrict fmt,...) pg_attribute_printf(1
static const char * progname
static void verify_backup_directory(verifier_context *context, char *relpath, char *fullpath)
static void usage(void)
static void verify_file_checksum(verifier_context *context, manifest_file *m, char *fullpath, uint8 *buffer)
static void verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
static void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) pg_attribute_printf(2
struct manifest_data manifest_data
int64 pg_time_t
Definition: pgtime.h:23
void canonicalize_path(char *path)
Definition: path.c:264
const char * get_progname(const char *argv0)
Definition: path.c:574
#define snprintf
Definition: port.h:238
#define fprintf
Definition: port.h:242
#define printf(...)
Definition: port.h:244
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
char * c
static int fd(const char *x, int i)
Definition: preproc-init.c:105
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
tree context
Definition: radixtree.h:1833
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
#define relpath(rlocator, forknum)
Definition: relpath.h:94
void simple_string_list_append(SimpleStringList *list, const char *val)
Definition: simple_list.c:63
static pg_noinline void Size size
Definition: slab.c:607
uint32 pg_control_version
Definition: pg_control.h:124
uint64 system_identifier
Definition: pg_control.h:109
Definition: dirent.c:26
char val[FLEXIBLE_ARRAY_MEMBER]
Definition: simple_list.h:37
struct SimpleStringListCell * next
Definition: simple_list.h:34
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
manifest_wal_range * last_wal_range
Definition: load_manifest.h:61
manifest_wal_range * first_wal_range
Definition: load_manifest.h:60
uint64 system_identifier
Definition: load_manifest.h:58
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
XLogRecPtr end_lsn
Definition: load_manifest.h:48
struct manifest_wal_range * next
Definition: load_manifest.h:49
struct manifest_wal_range * prev
Definition: load_manifest.h:50
XLogRecPtr start_lsn
Definition: load_manifest.h:47
__int64 st_size
Definition: win32_port.h:273
unsigned short st_mode
Definition: win32_port.h:268
SimpleStringList ignore_list
manifest_data * manifest
#define stat
Definition: win32_port.h:284
#define S_ISDIR(m)
Definition: win32_port.h:325
#define fstat
Definition: win32_port.h:283
#define S_ISREG(m)
Definition: win32_port.h:328
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:43
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59