PostgreSQL Source Code git master
Loading...
Searching...
No Matches
pg_verifybackup.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_verifybackup.c
4 * Verify a backup against a backup manifest.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * src/bin/pg_verifybackup/pg_verifybackup.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#include "postgres_fe.h"
15
16#include <dirent.h>
17#include <fcntl.h>
18#include <limits.h>
19#include <sys/stat.h>
20#include <time.h>
21
23#include "common/logging.h"
26#include "getopt_long.h"
27#include "pg_verifybackup.h"
28#include "pgtime.h"
29
30/*
31 * For efficiency, we'd like our hash table containing information about the
32 * manifest to start out with approximately the correct number of entries.
33 * There's no way to know the exact number of entries without reading the whole
34 * file, but we can get an estimate by dividing the file size by the estimated
35 * number of bytes per line.
36 *
37 * This could be off by about a factor of two in either direction, because the
38 * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
39 * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
40 * might be no checksum at all.
41 */
42#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
43
44/*
45 * How many bytes should we try to read from a file at once?
46 */
47#define READ_CHUNK_SIZE (128 * 1024)
48
49/*
50 * Tar file information needed for content verification.
51 */
58
61 int manifest_version);
63 uint64 manifest_system_identifier);
65 const char *pathname, uint64 size,
66 pg_checksum_type checksum_type,
67 int checksum_length,
68 uint8 *checksum_payload);
70 TimeLineID tli,
71 XLogRecPtr start_lsn,
72 XLogRecPtr end_lsn);
74 const char *fmt,...)
76
78 char **base_archive_path,
79 char **wal_archive_path);
81 char *relpath, char *fullpath,
82 DIR *dir);
84 char *fullpath);
86 uint64 manifest_system_identifier);
88 char *fullpath, SimplePtrList *tarfiles,
89 char **base_archive_path,
90 char **wal_archive_path);
92 char *fullpath, astreamer *streamer);
96 manifest_file *m, char *fullpath,
97 uint8 *buffer);
99 char *pg_waldump_path,
100 char *wal_path);
102 char *archive_name,
103 Oid tblspc_oid,
104 pg_compress_algorithm compress_algo);
105
106static void progress_report(bool finished);
107static void usage(void);
108
110
111/* is progress reporting enabled? */
113
114/* Progress indicators */
117
118/*
119 * Main entry point.
120 */
121int
122main(int argc, char **argv)
123{
124 static struct option long_options[] = {
125 {"exit-on-error", no_argument, NULL, 'e'},
126 {"ignore", required_argument, NULL, 'i'},
127 {"manifest-path", required_argument, NULL, 'm'},
128 {"format", required_argument, NULL, 'F'},
129 {"no-parse-wal", no_argument, NULL, 'n'},
130 {"progress", no_argument, NULL, 'P'},
131 {"quiet", no_argument, NULL, 'q'},
132 {"skip-checksums", no_argument, NULL, 's'},
133 {"wal-path", required_argument, NULL, 'w'},
134 {"wal-directory", required_argument, NULL, 'w'}, /* deprecated */
135 {NULL, 0, NULL, 0}
136 };
137
138 int c;
139 verifier_context context;
140 char *manifest_path = NULL;
141 bool no_parse_wal = false;
142 bool quiet = false;
143 char *wal_path = NULL;
144 char *base_archive_path = NULL;
145 char *wal_archive_path = NULL;
146 char *pg_waldump_path = NULL;
147 DIR *dir;
148
149 pg_logging_init(argv[0]);
150 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup"));
151 progname = get_progname(argv[0]);
152
153 memset(&context, 0, sizeof(context));
154
155 if (argc > 1)
156 {
157 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
158 {
159 usage();
160 exit(0);
161 }
162 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
163 {
164 puts("pg_verifybackup (PostgreSQL) " PG_VERSION);
165 exit(0);
166 }
167 }
168
169 /*
170 * Skip certain files in the toplevel directory.
171 *
172 * Ignore the backup_manifest file, because it's not included in the
173 * backup manifest.
174 *
175 * Ignore the pg_wal directory, because those files are not included in
176 * the backup manifest either, since they are fetched separately from the
177 * backup itself, and verified via a separate mechanism.
178 *
179 * Ignore postgresql.auto.conf, recovery.signal, and standby.signal,
180 * because we expect that those files may sometimes be created or changed
181 * as part of the backup process. For example, pg_basebackup -R will
182 * modify postgresql.auto.conf and create standby.signal.
183 */
184 simple_string_list_append(&context.ignore_list, "backup_manifest");
185 simple_string_list_append(&context.ignore_list, "pg_wal");
186 simple_string_list_append(&context.ignore_list, "postgresql.auto.conf");
187 simple_string_list_append(&context.ignore_list, "recovery.signal");
188 simple_string_list_append(&context.ignore_list, "standby.signal");
189
190 while ((c = getopt_long(argc, argv, "eF:i:m:nPqsw:", long_options, NULL)) != -1)
191 {
192 switch (c)
193 {
194 case 'e':
195 context.exit_on_error = true;
196 break;
197 case 'i':
198 {
199 char *arg = pstrdup(optarg);
200
202 simple_string_list_append(&context.ignore_list, arg);
203 break;
204 }
205 case 'm':
208 break;
209 case 'F':
210 if (strcmp(optarg, "p") == 0 || strcmp(optarg, "plain") == 0)
211 context.format = 'p';
212 else if (strcmp(optarg, "t") == 0 || strcmp(optarg, "tar") == 0)
213 context.format = 't';
214 else
215 pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"",
216 optarg);
217 break;
218 case 'n':
219 no_parse_wal = true;
220 break;
221 case 'P':
222 show_progress = true;
223 break;
224 case 'q':
225 quiet = true;
226 break;
227 case 's':
228 context.skip_checksums = true;
229 break;
230 case 'w':
233 break;
234 default:
235 /* getopt_long already emitted a complaint */
236 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
237 exit(1);
238 }
239 }
240
241 /* Get backup directory name */
242 if (optind >= argc)
243 {
244 pg_log_error("no backup directory specified");
245 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
246 exit(1);
247 }
248 context.backup_directory = pstrdup(argv[optind++]);
249 canonicalize_path(context.backup_directory);
250
251 /* Complain if any arguments remain */
252 if (optind < argc)
253 {
254 pg_log_error("too many command-line arguments (first is \"%s\")",
255 argv[optind]);
256 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
257 exit(1);
258 }
259
260 /* Complain if the specified arguments conflict */
261 if (show_progress && quiet)
262 pg_fatal("cannot specify both %s and %s",
263 "-P/--progress", "-q/--quiet");
264
265 /* Unless --no-parse-wal was specified, we will need pg_waldump. */
266 if (!no_parse_wal)
267 {
268 int ret;
269
271 ret = find_other_exec(argv[0], "pg_waldump",
272 "pg_waldump (PostgreSQL) " PG_VERSION "\n",
274 if (ret < 0)
275 {
276 char full_path[MAXPGPATH];
277
278 if (find_my_exec(argv[0], full_path) < 0)
280
281 if (ret == -1)
282 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
283 "pg_waldump", "pg_verifybackup", full_path);
284 else
285 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
286 "pg_waldump", full_path, "pg_verifybackup");
287 }
288 }
289
290 /* By default, look for the manifest in the backup directory. */
291 if (manifest_path == NULL)
292 manifest_path = psprintf("%s/backup_manifest",
293 context.backup_directory);
294
295 /*
296 * Try to read the manifest. We treat any errors encountered while parsing
297 * the manifest as fatal; there doesn't seem to be much point in trying to
298 * verify the backup directory against a corrupted manifest.
299 */
300 context.manifest = parse_manifest_file(manifest_path);
301
302 /*
303 * If the backup directory cannot be found, treat this as a fatal error.
304 */
305 dir = opendir(context.backup_directory);
306 if (dir == NULL)
307 report_fatal_error("could not open directory \"%s\": %m",
308 context.backup_directory);
309
310 /*
311 * At this point, we know that the backup directory exists, so it's now
312 * reasonable to check for files immediately inside it. Thus, before going
313 * further, if the user did not specify the backup format, check for
314 * PG_VERSION to distinguish between tar and plain format.
315 */
316 if (context.format == '\0')
317 {
318 struct stat sb;
319 char *path;
320
321 path = psprintf("%s/%s", context.backup_directory, "PG_VERSION");
322 if (stat(path, &sb) == 0)
323 context.format = 'p';
324 else if (errno != ENOENT)
325 {
326 pg_log_error("could not stat file \"%s\": %m", path);
327 exit(1);
328 }
329 else
330 {
331 /* No PG_VERSION, so assume tar format. */
332 context.format = 't';
333 }
334 pfree(path);
335 }
336
337 /*
338 * Perform the appropriate type of verification appropriate based on the
339 * backup format. This will close 'dir'.
340 */
341 if (context.format == 'p')
342 verify_plain_backup_directory(&context, NULL, context.backup_directory,
343 dir);
344 else
346
347 /*
348 * The "matched" flag should now be set on every entry in the hash table.
349 * Any entries for which the bit is not set are files mentioned in the
350 * manifest that don't exist on disk (or in the relevant tar files).
351 */
353
354 /*
355 * If this is a tar-format backup, checksums were already verified above;
356 * but if it's a plain-format backup, we postpone it until this point,
357 * since the earlier checks can be performed just by knowing which files
358 * are present, without needing to read all of them.
359 */
360 if (context.format == 'p' && !context.skip_checksums)
361 verify_backup_checksums(&context);
362
363 /*
364 * By default, WAL files are expected to be found in the backup directory
365 * for plain-format backups. In the case of tar-format backups, if a
366 * separate WAL archive is not found, the WAL files are most likely
367 * included within the main data directory archive.
368 */
369 if (wal_path == NULL)
370 {
371 if (context.format == 'p')
372 wal_path = psprintf("%s/pg_wal", context.backup_directory);
373 else if (wal_archive_path)
375 else if (base_archive_path)
377 else
378 {
379 pg_log_error("WAL archive not found");
380 pg_log_error_hint("Specify the correct path using the option -w/--wal-path. "
381 "Or you must use -n/--no-parse-wal when verifying a tar-format backup.");
382 exit(1);
383 }
384 }
385
386 /*
387 * Try to parse the required ranges of WAL records, unless we were told
388 * not to do so.
389 */
390 if (!no_parse_wal)
392
393 /*
394 * If everything looks OK, tell the user this, unless we were asked to
395 * work quietly.
396 */
397 if (!context.saw_any_error && !quiet)
398 printf(_("backup successfully verified\n"));
399
400 return context.saw_any_error ? 1 : 0;
401}
402
403/*
404 * Parse a manifest file and return a data structure describing the contents.
405 */
406static manifest_data *
408{
409 int fd;
410 struct stat statbuf;
411 off_t estimate;
414 char *buffer;
415 int rc;
417 manifest_data *result;
418
419 int chunk_size = READ_CHUNK_SIZE;
420
421 /* Open the manifest file. */
422 if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0)
423 report_fatal_error("could not open file \"%s\": %m", manifest_path);
424
425 /* Figure out how big the manifest is. */
426 if (fstat(fd, &statbuf) != 0)
427 report_fatal_error("could not stat file \"%s\": %m", manifest_path);
428
429 /* Guess how large to make the hash table based on the manifest size. */
430 estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
431 initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
432
433 /* Create the hash table. */
435
437 result->files = ht;
438 context.private_data = result;
444
445 /*
446 * Parse the file, in chunks if necessary.
447 */
448 if (statbuf.st_size <= chunk_size)
449 {
450 buffer = pg_malloc(statbuf.st_size);
451 rc = read(fd, buffer, statbuf.st_size);
452 if (rc != statbuf.st_size)
453 {
454 if (rc < 0)
455 pg_fatal("could not read file \"%s\": %m", manifest_path);
456 else
457 pg_fatal("could not read file \"%s\": read %d of %lld",
458 manifest_path, rc, (long long int) statbuf.st_size);
459 }
460
461 /* Close the manifest file. */
462 close(fd);
463
464 /* Parse the manifest. */
465 json_parse_manifest(&context, buffer, statbuf.st_size);
466 }
467 else
468 {
469 int bytes_left = statbuf.st_size;
471
472 inc_state = json_parse_manifest_incremental_init(&context);
473
474 buffer = pg_malloc(chunk_size + 1);
475
476 while (bytes_left > 0)
477 {
478 int bytes_to_read = chunk_size;
479
480 /*
481 * Make sure that the last chunk is sufficiently large. (i.e. at
482 * least half the chunk size) so that it will contain fully the
483 * piece at the end with the checksum.
484 */
485 if (bytes_left < chunk_size)
487 else if (bytes_left < 2 * chunk_size)
489 rc = read(fd, buffer, bytes_to_read);
490 if (rc != bytes_to_read)
491 {
492 if (rc < 0)
493 pg_fatal("could not read file \"%s\": %m", manifest_path);
494 else
495 pg_fatal("could not read file \"%s\": read %lld of %lld",
497 (long long int) (statbuf.st_size + rc - bytes_left),
498 (long long int) statbuf.st_size);
499 }
500 bytes_left -= rc;
501 json_parse_manifest_incremental_chunk(inc_state, buffer, rc,
502 bytes_left == 0);
503 }
504
505 /* Release the incremental state memory */
507
508 close(fd);
509 }
510
511 /* Done with the buffer. */
512 pfree(buffer);
513
514 return result;
515}
516
517/*
518 * Report an error while parsing the manifest.
519 *
520 * We consider all such errors to be fatal errors. The manifest parser
521 * expects this function not to return.
522 */
523static void
525{
526 va_list ap;
527
528 va_start(ap, fmt);
530 va_end(ap);
531
532 exit(1);
533}
534
535/*
536 * Record details extracted from the backup manifest.
537 */
538static void
540 int manifest_version)
541{
543
544 /* Validation will be at the later stage */
545 manifest->version = manifest_version;
546}
547
548/*
549 * Record details extracted from the backup manifest.
550 */
551static void
553 uint64 manifest_system_identifier)
554{
556
557 /* Validation will be at the later stage */
558 manifest->system_identifier = manifest_system_identifier;
559}
560
561/*
562 * Record details extracted from the backup manifest for one file.
563 */
564static void
566 const char *pathname, uint64 size,
567 pg_checksum_type checksum_type,
568 int checksum_length, uint8 *checksum_payload)
569{
572 manifest_file *m;
573 bool found;
574
575 /* Make a new entry in the hash table for this file. */
576 m = manifest_files_insert(ht, pathname, &found);
577 if (found)
578 report_fatal_error("duplicate path name in backup manifest: \"%s\"",
579 pathname);
580
581 /* Initialize the entry. */
582 m->size = size;
583 m->checksum_type = checksum_type;
584 m->checksum_length = checksum_length;
585 m->checksum_payload = checksum_payload;
586 m->matched = false;
587 m->bad = false;
588}
589
590/*
591 * Record details extracted from the backup manifest for one WAL range.
592 */
593static void
595 TimeLineID tli,
596 XLogRecPtr start_lsn, XLogRecPtr end_lsn)
597{
600
601 /* Allocate and initialize a struct describing this WAL range. */
603 range->tli = tli;
604 range->start_lsn = start_lsn;
605 range->end_lsn = end_lsn;
606 range->prev = manifest->last_wal_range;
607 range->next = NULL;
608
609 /* Add it to the end of the list. */
610 if (manifest->first_wal_range == NULL)
611 manifest->first_wal_range = range;
612 else
613 manifest->last_wal_range->next = range;
614 manifest->last_wal_range = range;
615}
616
617/*
618 * Verify one directory of a plain-format backup.
619 *
620 * 'relpath' is NULL if we are to verify the top-level backup directory,
621 * and otherwise the relative path to the directory that is to be verified.
622 *
623 * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual
624 * filesystem path at which it can be found.
625 *
626 * 'dir' is an open directory handle, or NULL if the caller wants us to
627 * open it. If the caller chooses to pass a handle, we'll close it when
628 * we're done with it.
629 */
630static void
632 char *fullpath, DIR *dir)
633{
634 struct dirent *dirent;
635
636 /* Open the directory unless the caller did it. */
637 if (dir == NULL && ((dir = opendir(fullpath)) == NULL))
638 {
639 report_backup_error(context,
640 "could not open directory \"%s\": %m", fullpath);
642
643 return;
644 }
645
646 while (errno = 0, (dirent = readdir(dir)) != NULL)
647 {
648 char *filename = dirent->d_name;
649 char *newfullpath = psprintf("%s/%s", fullpath, filename);
650 char *newrelpath;
651
652 /* Skip "." and ".." */
653 if (filename[0] == '.' && (filename[1] == '\0'
654 || strcmp(filename, "..") == 0))
655 continue;
656
657 if (relpath == NULL)
659 else
660 newrelpath = psprintf("%s/%s", relpath, filename);
661
662 if (!should_ignore_relpath(context, newrelpath))
664
667 }
668
669 if (closedir(dir))
670 {
671 report_backup_error(context,
672 "could not close directory \"%s\": %m", fullpath);
673 return;
674 }
675}
676
677/*
678 * Verify one file (which might actually be a directory or a symlink).
679 *
680 * The arguments to this function have the same meaning as the similarly named
681 * arguments to verify_plain_backup_directory.
682 */
683static void
685 char *fullpath)
686{
687 struct stat sb;
688 manifest_file *m;
689
690 if (stat(fullpath, &sb) != 0)
691 {
692 report_backup_error(context,
693 "could not stat file or directory \"%s\": %m",
694 relpath);
695
696 /*
697 * Suppress further errors related to this path name and, if it's a
698 * directory, anything underneath it.
699 */
701
702 return;
703 }
704
705 /* If it's a directory, just recurse. */
706 if (S_ISDIR(sb.st_mode))
707 {
708 verify_plain_backup_directory(context, relpath, fullpath, NULL);
709 return;
710 }
711
712 /* If it's not a directory, it should be a regular file. */
713 if (!S_ISREG(sb.st_mode))
714 {
715 report_backup_error(context,
716 "\"%s\" is not a regular file or directory",
717 relpath);
718 return;
719 }
720
721 /* Check whether there's an entry in the manifest hash. */
723 if (m == NULL)
724 {
725 report_backup_error(context,
726 "\"%s\" is present on disk but not in the manifest",
727 relpath);
728 return;
729 }
730
731 /* Flag this entry as having been encountered in the filesystem. */
732 m->matched = true;
733
734 /* Check that the size matches. */
735 if (m->size != sb.st_size)
736 {
737 report_backup_error(context,
738 "\"%s\" has size %llu on disk but size %llu in the manifest",
739 relpath, (unsigned long long) sb.st_size,
740 (unsigned long long) m->size);
741 m->bad = true;
742 }
743
744 /*
745 * Validate the manifest system identifier, not available in manifest
746 * version 1.
747 */
748 if (context->manifest->version != 1 &&
751
752 /* Update statistics for progress report, if necessary */
753 if (show_progress && !context->skip_checksums &&
755 total_size += m->size;
756
757 /*
758 * We don't verify checksums at this stage. We first finish verifying that
759 * we have the expected set of files with the expected sizes, and only
760 * afterwards verify the checksums. That's because computing checksums may
761 * take a while, and we'd like to report more obvious problems quickly.
762 */
763}
764
765/*
766 * Sanity check control file and validate system identifier against manifest
767 * system identifier.
768 */
769static void
770verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
771{
772 ControlFileData *control_file;
773 bool crc_ok;
774
775 pg_log_debug("reading \"%s\"", controlpath);
777
778 /* Control file contents not meaningful if CRC is bad. */
779 if (!crc_ok)
780 report_fatal_error("%s: CRC is incorrect", controlpath);
781
782 /* Can't interpret control file if not current version. */
783 if (control_file->pg_control_version != PG_CONTROL_VERSION)
784 report_fatal_error("%s: unexpected control file version",
786
787 /* System identifiers should match. */
788 if (manifest_system_identifier != control_file->system_identifier)
789 report_fatal_error("%s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
791 manifest_system_identifier,
792 control_file->system_identifier);
793
794 /* Release memory. */
795 pfree(control_file);
796}
797
798/*
799 * Verify tar backup.
800 *
801 * The caller should pass a handle to the target directory, which we will
802 * close when we're done with it.
803 */
804static void
806 char **wal_archive_path)
807{
808 struct dirent *dirent;
810 SimplePtrListCell *cell;
811
812 Assert(context->format != 'p');
813
814 progress_report(false);
815
816 /* First pass: scan the directory for tar files. */
817 while (errno = 0, (dirent = readdir(dir)) != NULL)
818 {
819 char *filename = dirent->d_name;
820
821 /* Skip "." and ".." */
822 if (filename[0] == '.' && (filename[1] == '\0'
823 || strcmp(filename, "..") == 0))
824 continue;
825
826 /*
827 * Unless it's something we should ignore, perform prechecks and add
828 * it to the list.
829 */
830 if (!should_ignore_relpath(context, filename))
831 {
832 char *fullpath;
833
834 fullpath = psprintf("%s/%s", context->backup_directory, filename);
835 precheck_tar_backup_file(context, filename, fullpath, &tarfiles,
837 pfree(fullpath);
838 }
839 }
840
841 if (closedir(dir))
842 {
843 report_backup_error(context,
844 "could not close directory \"%s\": %m",
845 context->backup_directory);
846 return;
847 }
848
849 /* Second pass: Perform the final verification of the tar contents. */
850 for (cell = tarfiles.head; cell != NULL; cell = cell->next)
851 {
852 tar_file *tar = (tar_file *) cell->ptr;
853 astreamer *streamer;
854 char *fullpath;
855
856 /*
857 * Prepares the archive streamer stack according to the tar
858 * compression format.
859 */
860 streamer = create_archive_verifier(context,
861 tar->relpath,
862 tar->tblspc_oid,
863 tar->compress_algorithm);
864
865 /* Compute the full pathname to the target file. */
866 fullpath = psprintf("%s/%s", context->backup_directory,
867 tar->relpath);
868
869 /* Invoke the streamer for reading, decompressing, and verifying. */
870 verify_tar_file(context, tar->relpath, fullpath, streamer);
871
872 /* Cleanup. */
873 pfree(tar->relpath);
874 pfree(tar);
875 pfree(fullpath);
876
877 astreamer_finalize(streamer);
878 astreamer_free(streamer);
879 }
881
882 progress_report(true);
883}
884
885/*
886 * Preparatory steps for verifying files in tar format backups.
887 *
888 * Carries out basic validation of the tar format backup file, detects the
889 * compression type, and appends that information to the tarfiles list. An
890 * error will be reported if the tar file is inaccessible, or if the file type,
891 * name, or compression type is not as expected.
892 *
893 * The arguments to this function are mostly the same as the
894 * verify_plain_backup_file. The additional argument outputs a list of valid
895 * tar files, along with the full paths to the main archive and the WAL
896 * directory archive.
897 */
898static void
900 char *fullpath, SimplePtrList *tarfiles,
901 char **base_archive_path, char **wal_archive_path)
902{
903 struct stat sb;
904 Oid tblspc_oid = InvalidOid;
905 pg_compress_algorithm compress_algorithm;
906 tar_file *tar;
907 char *suffix = NULL;
908 bool is_base_archive = false;
909 bool is_wal_archive = false;
910
911 /* Should be tar format backup */
912 Assert(context->format == 't');
913
914 /* Get file information */
915 if (stat(fullpath, &sb) != 0)
916 {
917 report_backup_error(context,
918 "could not stat file or directory \"%s\": %m",
919 relpath);
920 return;
921 }
922
923 /* In a tar format backup, we expect only regular files. */
924 if (!S_ISREG(sb.st_mode))
925 {
926 report_backup_error(context,
927 "file \"%s\" is not a regular file",
928 relpath);
929 return;
930 }
931
932 /*
933 * We expect tar files for backing up the main directory, tablespace, and
934 * pg_wal directory.
935 *
936 * pg_basebackup writes the main data directory to an archive file named
937 * base.tar, the pg_wal directory to pg_wal.tar, and the tablespace
938 * directory to <tablespaceoid>.tar, each followed by a compression type
939 * extension such as .gz, .lz4, or .zst.
940 */
941 if (strncmp("base", relpath, 4) == 0)
942 {
943 suffix = relpath + 4;
944 is_base_archive = true;
945 }
946 else if (strncmp("pg_wal", relpath, 6) == 0)
947 {
948 suffix = relpath + 6;
949 is_wal_archive = true;
950 }
951 else
952 {
953 /* Expected a <tablespaceoid>.tar file here. */
954 uint64 num = strtoul(relpath, &suffix, 10);
955
956 /*
957 * Report an error if we didn't consume at least one character, if the
958 * result is 0, or if the value is too large to be a valid OID.
959 */
960 if (suffix == NULL || num <= 0 || num > OID_MAX)
961 {
962 report_backup_error(context,
963 "file \"%s\" is not expected in a tar format backup",
964 relpath);
965 return;
966 }
967 tblspc_oid = (Oid) num;
968 }
969
970 /* Now, check the compression type of the tar */
971 if (!parse_tar_compress_algorithm(suffix, &compress_algorithm))
972 {
973 report_backup_error(context,
974 "file \"%s\" is not expected in a tar format backup",
975 relpath);
976 return;
977 }
978
979 /*
980 * Ignore WALs, as reading and verification will be handled through
981 * pg_waldump.
982 */
983 if (is_wal_archive)
984 {
985 *wal_archive_path = pstrdup(fullpath);
986 return;
987 }
988 else if (is_base_archive)
989 *base_archive_path = pstrdup(fullpath);
990
991 /*
992 * Append the information to the list for complete verification at a later
993 * stage.
994 */
996 tar->relpath = pstrdup(relpath);
997 tar->tblspc_oid = tblspc_oid;
998 tar->compress_algorithm = compress_algorithm;
999
1001
1002 /* Update statistics for progress report, if necessary */
1003 if (show_progress)
1004 total_size += sb.st_size;
1005}
1006
1007/*
1008 * Verification of a single tar file content.
1009 *
1010 * It reads a given tar archive in predefined chunks and passes it to the
1011 * streamer, which initiates routines for decompression (if necessary) and then
1012 * verifies each member within the tar file.
1013 */
1014static void
1015verify_tar_file(verifier_context *context, char *relpath, char *fullpath,
1016 astreamer *streamer)
1017{
1018 int fd;
1019 int rc;
1020 char *buffer;
1021
1022 pg_log_debug("reading \"%s\"", fullpath);
1023
1024 /* Open the target file. */
1025 if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
1026 {
1027 report_backup_error(context, "could not open file \"%s\": %m",
1028 relpath);
1029 return;
1030 }
1031
1032 buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8));
1033
1034 /* Perform the reads */
1035 while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
1036 {
1037 astreamer_content(streamer, NULL, buffer, rc, ASTREAMER_UNKNOWN);
1038
1039 /* Report progress */
1040 done_size += rc;
1041 progress_report(false);
1042 }
1043
1044 pg_free(buffer);
1045
1046 if (rc < 0)
1047 report_backup_error(context, "could not read file \"%s\": %m",
1048 relpath);
1049
1050 /* Close the file. */
1051 if (close(fd) != 0)
1052 report_backup_error(context, "could not close file \"%s\": %m",
1053 relpath);
1054}
1055
1056/*
1057 * Scan the hash table for entries where the 'matched' flag is not set; report
1058 * that such files are present in the manifest but not on disk.
1059 */
1060static void
1062{
1063 manifest_data *manifest = context->manifest;
1065 manifest_file *m;
1066
1068 while ((m = manifest_files_iterate(manifest->files, &it)) != NULL)
1069 if (!m->matched && !should_ignore_relpath(context, m->pathname))
1070 report_backup_error(context,
1071 "\"%s\" is present in the manifest but not on disk",
1072 m->pathname);
1073}
1074
1075/*
1076 * Verify checksums for hash table entries that are otherwise unproblematic.
1077 * If we've already reported some problem related to a hash table entry, or
1078 * if it has no checksum, just skip it.
1079 */
1080static void
1082{
1083 manifest_data *manifest = context->manifest;
1085 manifest_file *m;
1086 uint8 *buffer;
1087
1088 progress_report(false);
1089
1091
1093 while ((m = manifest_files_iterate(manifest->files, &it)) != NULL)
1094 {
1095 if (should_verify_checksum(m) &&
1096 !should_ignore_relpath(context, m->pathname))
1097 {
1098 char *fullpath;
1099
1100 /* Compute the full pathname to the target file. */
1101 fullpath = psprintf("%s/%s", context->backup_directory,
1102 m->pathname);
1103
1104 /* Do the actual checksum verification. */
1105 verify_file_checksum(context, m, fullpath, buffer);
1106
1107 /* Avoid leaking memory. */
1108 pfree(fullpath);
1109 }
1110 }
1111
1112 pfree(buffer);
1113
1114 progress_report(true);
1115}
1116
1117/*
1118 * Verify the checksum of a single file.
1119 */
1120static void
1122 char *fullpath, uint8 *buffer)
1123{
1124 pg_checksum_context checksum_ctx;
1125 const char *relpath = m->pathname;
1126 int fd;
1127 int rc;
1128 uint64 bytes_read = 0;
1130 int checksumlen;
1131
1132 /* Open the target file. */
1133 if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0)
1134 {
1135 report_backup_error(context, "could not open file \"%s\": %m",
1136 relpath);
1137 return;
1138 }
1139
1140 /* Initialize checksum context. */
1141 if (pg_checksum_init(&checksum_ctx, m->checksum_type) < 0)
1142 {
1143 report_backup_error(context, "could not initialize checksum of file \"%s\"",
1144 relpath);
1145 close(fd);
1146 return;
1147 }
1148
1149 /* Read the file chunk by chunk, updating the checksum as we go. */
1150 while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0)
1151 {
1152 bytes_read += rc;
1153 if (pg_checksum_update(&checksum_ctx, buffer, rc) < 0)
1154 {
1155 report_backup_error(context, "could not update checksum of file \"%s\"",
1156 relpath);
1157 close(fd);
1158 return;
1159 }
1160
1161 /* Report progress */
1162 done_size += rc;
1163 progress_report(false);
1164 }
1165 if (rc < 0)
1166 report_backup_error(context, "could not read file \"%s\": %m",
1167 relpath);
1168
1169 /* Close the file. */
1170 if (close(fd) != 0)
1171 {
1172 report_backup_error(context, "could not close file \"%s\": %m",
1173 relpath);
1174 return;
1175 }
1176
1177 /* If we didn't manage to read the whole file, bail out now. */
1178 if (rc < 0)
1179 return;
1180
1181 /*
1182 * Double-check that we read the expected number of bytes from the file.
1183 * Normally, mismatches would be caught in verify_plain_backup_file and
1184 * this check would never be reached, but this provides additional safety
1185 * and clarity in the event of concurrent modifications or filesystem
1186 * misbehavior.
1187 */
1188 if (bytes_read != m->size)
1189 {
1190 report_backup_error(context,
1191 "file \"%s\" should contain %" PRIu64 " bytes, but read %" PRIu64,
1192 relpath, m->size, bytes_read);
1193 return;
1194 }
1195
1196 /* Get the final checksum. */
1197 checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf);
1198 if (checksumlen < 0)
1199 {
1200 report_backup_error(context,
1201 "could not finalize checksum of file \"%s\"",
1202 relpath);
1203 return;
1204 }
1205
1206 /* And check it against the manifest. */
1207 if (checksumlen != m->checksum_length)
1208 report_backup_error(context,
1209 "file \"%s\" has checksum of length %d, but expected %d",
1211 else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
1212 report_backup_error(context,
1213 "checksum mismatch for file \"%s\"",
1214 relpath);
1215}
1216
1217/*
1218 * Attempt to parse the WAL files required to restore from backup using
1219 * pg_waldump.
1220 */
1221static void
1223 char *wal_path)
1224{
1225 manifest_data *manifest = context->manifest;
1226 manifest_wal_range *this_wal_range = manifest->first_wal_range;
1227
1228 while (this_wal_range != NULL)
1229 {
1230 char *pg_waldump_cmd;
1231
1232 pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n",
1234 LSN_FORMAT_ARGS(this_wal_range->start_lsn),
1236 fflush(NULL);
1237 if (system(pg_waldump_cmd) != 0)
1238 report_backup_error(context,
1239 "WAL parsing failed for timeline %u",
1240 this_wal_range->tli);
1241
1243 }
1244}
1245
1246/*
1247 * Report a problem with the backup.
1248 *
1249 * Update the context to indicate that we saw an error, and exit if the
1250 * context says we should.
1251 */
1252void
1254{
1255 va_list ap;
1256
1257 va_start(ap, fmt);
1259 va_end(ap);
1260
1261 context->saw_any_error = true;
1262 if (context->exit_on_error)
1263 exit(1);
1264}
1265
1266/*
1267 * Report a fatal error and exit
1268 */
1269void
1271{
1272 va_list ap;
1273
1274 va_start(ap, fmt);
1276 va_end(ap);
1277
1278 exit(1);
1279}
1280
1281/*
1282 * Is the specified relative path, or some prefix of it, listed in the set
1283 * of paths to ignore?
1284 *
1285 * Note that by "prefix" we mean a parent directory; for this purpose,
1286 * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc".
1287 */
1288bool
1290{
1292
1293 for (cell = context->ignore_list.head; cell != NULL; cell = cell->next)
1294 {
1295 const char *r = relpath;
1296 char *v = cell->val;
1297
1298 while (*v != '\0' && *r == *v)
1299 ++r, ++v;
1300
1301 if (*v == '\0' && (*r == '\0' || *r == '/'))
1302 return true;
1303 }
1304
1305 return false;
1306}
1307
1308/*
1309 * Create a chain of archive streamers appropriate for verifying a given
1310 * archive.
1311 */
1312static astreamer *
1313create_archive_verifier(verifier_context *context, char *archive_name,
1314 Oid tblspc_oid, pg_compress_algorithm compress_algo)
1315{
1316 astreamer *streamer = NULL;
1317
1318 /* Should be here only for tar backup */
1319 Assert(context->format == 't');
1320
1321 /* Last step is the actual verification. */
1322 streamer = astreamer_verify_content_new(streamer, context, archive_name,
1323 tblspc_oid);
1324
1325 /* Before that we must parse the tar file. */
1326 streamer = astreamer_tar_parser_new(streamer);
1327
1328 /* Before that we must decompress, if archive is compressed. */
1329 if (compress_algo == PG_COMPRESSION_GZIP)
1330 streamer = astreamer_gzip_decompressor_new(streamer);
1331 else if (compress_algo == PG_COMPRESSION_LZ4)
1332 streamer = astreamer_lz4_decompressor_new(streamer);
1333 else if (compress_algo == PG_COMPRESSION_ZSTD)
1334 streamer = astreamer_zstd_decompressor_new(streamer);
1335
1336 return streamer;
1337}
1338
1339/*
1340 * Print a progress report based on the global variables.
1341 *
1342 * Progress report is written at maximum once per second, unless the finished
1343 * parameter is set to true.
1344 *
1345 * If finished is set to true, this is the last progress report. The cursor
1346 * is moved to the next line.
1347 */
1348static void
1349progress_report(bool finished)
1350{
1352 pg_time_t now;
1353 int percent_size = 0;
1354 char totalsize_str[32];
1355 char donesize_str[32];
1356
1357 if (!show_progress)
1358 return;
1359
1360 now = time(NULL);
1361 if (now == last_progress_report && !finished)
1362 return; /* Max once per second */
1363
1365 percent_size = total_size ? (int) ((done_size * 100 / total_size)) : 0;
1366
1368 total_size / 1024);
1370 done_size / 1024);
1371
1373 _("%*s/%s kB (%d%%) verified"),
1374 (int) strlen(totalsize_str),
1376
1377 /*
1378 * Stay on the same line if reporting to a terminal and we're not done
1379 * yet.
1380 */
1381 fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
1382}
1383
1384/*
1385 * Print out usage information and exit.
1386 */
1387static void
1389{
1390 printf(_("%s verifies a backup against the backup manifest.\n\n"), progname);
1391 printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname);
1392 printf(_("Options:\n"));
1393 printf(_(" -e, --exit-on-error exit immediately on error\n"));
1394 printf(_(" -F, --format=p|t backup format (plain, tar)\n"));
1395 printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n"));
1396 printf(_(" -m, --manifest-path=PATH use specified path for manifest\n"));
1397 printf(_(" -n, --no-parse-wal do not try to parse WAL files\n"));
1398 printf(_(" -P, --progress show progress information\n"));
1399 printf(_(" -q, --quiet do not print any output, except for errors\n"));
1400 printf(_(" -s, --skip-checksums skip checksum verification\n"));
1401 printf(_(" -w, --wal-path=PATH use specified path for WAL files\n"));
1402 printf(_(" -V, --version output version information, then exit\n"));
1403 printf(_(" -?, --help show this help, then exit\n"));
1404 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
1405 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
1406}
static void astreamer_free(astreamer *streamer)
Definition astreamer.h:153
static void astreamer_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition astreamer.h:135
static void astreamer_finalize(astreamer *streamer)
Definition astreamer.h:145
@ ASTREAMER_UNKNOWN
Definition astreamer.h:64
astreamer * astreamer_gzip_decompressor_new(astreamer *next)
astreamer * astreamer_lz4_decompressor_new(astreamer *next)
astreamer * astreamer_tar_parser_new(astreamer *next)
astreamer * astreamer_verify_content_new(astreamer *next, verifier_context *context, char *archive_name, Oid tblspc_oid)
astreamer * astreamer_zstd_decompressor_new(astreamer *next)
Datum now(PG_FUNCTION_ARGS)
Definition timestamp.c:1600
#define Min(x, y)
Definition c.h:1093
uint8_t uint8
Definition c.h:616
#define PG_UINT32_MAX
Definition c.h:676
#define pg_noreturn
Definition c.h:184
#define Max(x, y)
Definition c.h:1087
#define Assert(condition)
Definition c.h:945
#define PG_TEXTDOMAIN(domain)
Definition c.h:1305
#define PG_BINARY
Definition c.h:1376
#define pg_attribute_printf(f, a)
Definition c.h:262
#define gettext(x)
Definition c.h:1270
#define UINT64_FORMAT
Definition c.h:637
uint64_t uint64
Definition c.h:619
uint32_t uint32
Definition c.h:618
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
pg_checksum_type
int find_my_exec(const char *argv0, char *retpath)
Definition exec.c:161
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition exec.c:430
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition exec.c:311
int main(void)
bool parse_tar_compress_algorithm(const char *fname, pg_compress_algorithm *algorithm)
Definition compression.c:49
pg_compress_algorithm
Definition compression.h:22
@ PG_COMPRESSION_GZIP
Definition compression.h:24
@ PG_COMPRESSION_LZ4
Definition compression.h:25
@ PG_COMPRESSION_ZSTD
Definition compression.h:26
ControlFileData * get_controlfile_by_exact_path(const char *ControlFilePath, bool *crc_ok_p)
#define fprintf(file, fmt, msg)
Definition cubescan.l:21
int closedir(DIR *)
Definition dirent.c:127
struct dirent * readdir(DIR *)
Definition dirent.c:78
DIR * opendir(const char *)
Definition dirent.c:33
Datum arg
Definition elog.c:1322
#define _(x)
Definition elog.c:95
void * pg_malloc(size_t size)
Definition fe_memutils.c:47
void pg_free(void *ptr)
#define pg_malloc_array(type, count)
Definition fe_memutils.h:56
#define palloc_object(type)
Definition fe_memutils.h:74
#define pg_malloc0_object(type)
Definition fe_memutils.h:51
#define pg_malloc_object(type)
Definition fe_memutils.h:50
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition getopt_long.c:60
#define no_argument
Definition getopt_long.h:25
#define required_argument
Definition getopt_long.h:26
#define false
#define close(a)
Definition win32.h:12
#define read(a, b, c)
Definition win32.h:13
void pg_logging_init(const char *argv0)
Definition logging.c:83
void pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, const char *pg_restrict fmt, va_list ap)
Definition logging.c:219
#define pg_log_error(...)
Definition logging.h:106
#define pg_log_error_hint(...)
Definition logging.h:112
@ PG_LOG_PRIMARY
Definition logging.h:67
@ PG_LOG_ERROR
Definition logging.h:43
#define pg_log_debug(...)
Definition logging.h:133
char * pstrdup(const char *in)
Definition mcxt.c:1781
void pfree(void *pointer)
Definition mcxt.c:1616
void json_parse_manifest(JsonManifestParseContext *context, const char *buffer, size_t size)
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, const char *chunk, size_t size, bool is_last)
static pg_time_t last_progress_report
Definition pg_amcheck.c:147
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
#define PG_CONTROL_VERSION
Definition pg_control.h:25
static char * filename
Definition pg_dumpall.c:133
PGDLLIMPORT int optind
Definition getopt.c:51
PGDLLIMPORT char * optarg
Definition getopt.c:53
static void verify_backup_checksums(verifier_context *context)
static uint64 done_size
static manifest_data * parse_manifest_file(char *manifest_path)
static void verify_tar_file(verifier_context *context, char *relpath, char *fullpath, astreamer *streamer)
void report_fatal_error(const char *pg_restrict fmt,...)
static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version)
static void verifybackup_system_identifier(JsonManifestParseContext *context, uint64 manifest_system_identifier)
bool should_ignore_relpath(verifier_context *context, const char *relpath)
static void verifybackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
static uint64 total_size
static void verify_plain_backup_directory(verifier_context *context, char *relpath, char *fullpath, DIR *dir)
static void report_extra_backup_files(verifier_context *context)
#define ESTIMATED_BYTES_PER_MANIFEST_LINE
static void precheck_tar_backup_file(verifier_context *context, char *relpath, char *fullpath, SimplePtrList *tarfiles, char **base_archive_path, char **wal_archive_path)
static void progress_report(bool finished)
static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_path)
#define READ_CHUNK_SIZE
static astreamer * create_archive_verifier(verifier_context *context, char *archive_name, Oid tblspc_oid, pg_compress_algorithm compress_algo)
static bool show_progress
static pg_noreturn void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2
void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
static pg_noreturn void static void verify_tar_backup(verifier_context *context, DIR *dir, char **base_archive_path, char **wal_archive_path)
static const char * progname
static void usage(void)
static void verify_plain_backup_file(verifier_context *context, char *relpath, char *fullpath)
static void verify_file_checksum(verifier_context *context, manifest_file *m, char *fullpath, uint8 *buffer)
static void verify_control_file(const char *controlpath, uint64 manifest_system_identifier)
static void verifybackup_per_file_cb(JsonManifestParseContext *context, const char *pathname, uint64 size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload)
#define should_verify_checksum(m)
int64 pg_time_t
Definition pgtime.h:23
void canonicalize_path(char *path)
Definition path.c:337
#define snprintf
Definition port.h:260
const char * get_progname(const char *argv0)
Definition path.c:652
#define printf(...)
Definition port.h:266
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition strlcpy.c:45
#define InvalidOid
unsigned int Oid
#define OID_MAX
char * c
static int fd(const char *x, int i)
static int fb(int x)
char * psprintf(const char *fmt,...)
Definition psprintf.c:43
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
#define relpath(rlocator, forknum)
Definition relpath.h:150
void simple_ptr_list_destroy(SimplePtrList *list)
void simple_string_list_append(SimpleStringList *list, const char *val)
Definition simple_list.c:63
void simple_ptr_list_append(SimplePtrList *list, void *ptr)
uint32 pg_control_version
Definition pg_control.h:127
uint64 system_identifier
Definition pg_control.h:112
Definition dirent.c:26
json_manifest_per_wal_range_callback per_wal_range_cb
json_manifest_system_identifier_callback system_identifier_cb
json_manifest_error_callback error_cb
json_manifest_per_file_callback per_file_cb
json_manifest_version_callback version_cb
struct SimplePtrListCell * next
Definition simple_list.h:48
char val[FLEXIBLE_ARRAY_MEMBER]
Definition simple_list.h:37
struct SimpleStringListCell * next
Definition simple_list.h:34
SimpleStringListCell * head
Definition simple_list.h:42
char d_name[MAX_PATH]
Definition dirent.h:15
uint64 system_identifier
manifest_files_hash * files
uint8 * checksum_payload
pg_checksum_type checksum_type
const char * pathname
char * relpath
pg_compress_algorithm compress_algorithm
SimpleStringList ignore_list
manifest_data * manifest
#define stat
Definition win32_port.h:74
#define S_ISDIR(m)
Definition win32_port.h:315
#define fstat
Definition win32_port.h:73
#define S_ISREG(m)
Definition win32_port.h:318
#define XLOG_CONTROL_FILE
#define LSN_FORMAT_ARGS(lsn)
Definition xlogdefs.h:47
uint64 XLogRecPtr
Definition xlogdefs.h:21
uint32 TimeLineID
Definition xlogdefs.h:63