PostgreSQL Source Code  git master
astreamer_verify.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * astreamer_verify.c
4  *
5  * Archive streamer for verification of a tar format backup (including
6  * compressed tar format backups).
7  *
8  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
9  *
10  * src/bin/pg_verifybackup/astreamer_verify.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres_fe.h"
16 
17 #include "catalog/pg_control.h"
18 #include "pg_verifybackup.h"
19 
20 typedef struct astreamer_verify
21 {
22  /* These fields don't change once initialized. */
25  char *archive_name;
27 
28  /* These fields change for each archive member. */
37 
38 static void astreamer_verify_content(astreamer *streamer,
39  astreamer_member *member,
40  const char *data, int len,
42 static void astreamer_verify_finalize(astreamer *streamer);
43 static void astreamer_verify_free(astreamer *streamer);
44 
45 static void member_verify_header(astreamer *streamer, astreamer_member *member);
46 static void member_compute_checksum(astreamer *streamer,
47  astreamer_member *member,
48  const char *data, int len);
49 static void member_verify_checksum(astreamer *streamer);
50 static void member_copy_control_data(astreamer *streamer,
51  astreamer_member *member,
52  const char *data, int len);
53 static void member_verify_control_data(astreamer *streamer);
54 static void member_reset_info(astreamer *streamer);
55 
58  .finalize = astreamer_verify_finalize,
59  .free = astreamer_verify_free
60 };
61 
62 /*
63  * Create an astreamer that can verify a tar file.
64  */
65 astreamer *
67  char *archive_name, Oid tblspc_oid)
68 {
69  astreamer_verify *streamer;
70 
71  streamer = palloc0(sizeof(astreamer_verify));
72  *((const astreamer_ops **) &streamer->base.bbs_ops) =
74 
75  streamer->base.bbs_next = next;
76  streamer->context = context;
77  streamer->archive_name = archive_name;
78  streamer->tblspc_oid = tblspc_oid;
79 
80  if (!context->skip_checksums)
81  streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
82 
83  return &streamer->base;
84 }
85 
86 /*
87  * Main entry point of the archive streamer for verifying tar members.
88  */
89 static void
91  const char *data, int len,
93 {
94  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
95 
97 
98  switch (context)
99  {
101  /* Initial setup plus decide which checks to perform. */
102  member_verify_header(streamer, member);
103  break;
104 
106  /* Incremental work required to verify file contents. */
107  if (mystreamer->verify_checksum)
108  member_compute_checksum(streamer, member, data, len);
109  if (mystreamer->verify_control_data)
110  member_copy_control_data(streamer, member, data, len);
111  break;
112 
114  /* Now we've got all the file data. */
115  if (mystreamer->verify_checksum)
116  member_verify_checksum(streamer);
117  if (mystreamer->verify_control_data)
118  member_verify_control_data(streamer);
119 
120  /* Reset for next archive member. */
121  member_reset_info(streamer);
122  break;
123 
125  break;
126 
127  default:
128  /* Shouldn't happen. */
129  pg_fatal("unexpected state while parsing tar file");
130  }
131 }
132 
133 /*
134  * End-of-stream processing for a astreamer_verify stream.
135  */
136 static void
138 {
139  Assert(streamer->bbs_next == NULL);
140 }
141 
142 /*
143  * Free memory associated with a astreamer_verify stream.
144  */
145 static void
147 {
148  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
149 
150  if (mystreamer->checksum_ctx)
151  pfree(mystreamer->checksum_ctx);
152 
153  pfree(streamer);
154 }
155 
156 /*
157  * Prepare to validate the next archive member.
158  */
159 static void
161 {
162  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
163  manifest_file *m;
164  char pathname[MAXPGPATH];
165 
166  /* We are only interested in normal files. */
167  if (member->is_directory || member->is_link)
168  return;
169 
170  /*
171  * The backup manifest stores a relative path to the base directory for
172  * files belonging to a tablespace, while the tablespace backup tar
173  * archive does not include this path.
174  *
175  * The pathname taken from the tar file could contain '.' or '..'
176  * references, which we want to remove, so apply canonicalize_path(). It
177  * could also be an absolute pathname, which we want to treat as a
178  * relative path, so prepend "./" if we're not adding a tablespace prefix
179  * to make sure that canonicalize_path() does what we want.
180  */
181  if (OidIsValid(mystreamer->tblspc_oid))
182  snprintf(pathname, MAXPGPATH, "%s/%u/%s",
183  "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
184  else
185  snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
186  canonicalize_path(pathname);
187 
188  /* Ignore any files that are listed in the ignore list. */
189  if (should_ignore_relpath(mystreamer->context, pathname))
190  return;
191 
192  /* Check whether there's an entry in the manifest hash. */
193  m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
194  if (m == NULL)
195  {
196  report_backup_error(mystreamer->context,
197  "\"%s\" is present in \"%s\" but not in the manifest",
198  member->pathname, mystreamer->archive_name);
199  return;
200  }
201  mystreamer->mfile = m;
202 
203  /* Flag this entry as having been encountered in a tar archive. */
204  m->matched = true;
205 
206  /* Check that the size matches. */
207  if (m->size != member->size)
208  {
209  report_backup_error(mystreamer->context,
210  "\"%s\" has size %llu in \"%s\" but size %llu in the manifest",
211  member->pathname,
212  (unsigned long long) member->size,
213  mystreamer->archive_name,
214  (unsigned long long) m->size);
215  m->bad = true;
216  return;
217  }
218 
219  /*
220  * Decide whether we're going to verify the checksum for this file, and
221  * whether we're going to perform the additional validation that we do
222  * only for the control file.
223  */
224  mystreamer->verify_checksum =
225  (!mystreamer->context->skip_checksums && should_verify_checksum(m));
226  mystreamer->verify_control_data =
227  mystreamer->context->manifest->version != 1 &&
228  !m->bad && strcmp(m->pathname, "global/pg_control") == 0;
229 
230  /* If we're going to verify the checksum, initial a checksum context. */
231  if (mystreamer->verify_checksum &&
232  pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
233  {
234  report_backup_error(mystreamer->context,
235  "%s: could not initialize checksum of file \"%s\"",
236  mystreamer->archive_name, m->pathname);
237 
238  /*
239  * Checksum verification cannot be performed without proper context
240  * initialization.
241  */
242  mystreamer->verify_checksum = false;
243  }
244 }
245 
246 /*
247  * Computes the checksum incrementally for the received file content.
248  *
249  * Should have a correctly initialized checksum_ctx, which will be used for
250  * incremental checksum computation.
251  */
252 static void
254  const char *data, int len)
255 {
256  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
257  pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
258  manifest_file *m = mystreamer->mfile;
259 
260  Assert(mystreamer->verify_checksum);
261  Assert(m->checksum_type == checksum_ctx->type);
262 
263  /*
264  * Update the total count of computed checksum bytes so that we can
265  * cross-check against the file size.
266  */
267  mystreamer->checksum_bytes += len;
268 
269  /* Feed these bytes to the checksum calculation. */
270  if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
271  {
272  report_backup_error(mystreamer->context,
273  "could not update checksum of file \"%s\"",
274  m->pathname);
275  mystreamer->verify_checksum = false;
276  }
277 }
278 
279 /*
280  * Perform the final computation and checksum verification after the entire
281  * file content has been processed.
282  */
283 static void
285 {
286  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
287  manifest_file *m = mystreamer->mfile;
288  uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
289  int checksumlen;
290 
291  Assert(mystreamer->verify_checksum);
292 
293  /*
294  * It's unclear how this could fail, but let's check anyway to be safe.
295  */
296  if (mystreamer->checksum_bytes != m->size)
297  {
298  report_backup_error(mystreamer->context,
299  "file \"%s\" in \"%s\" should contain %llu bytes, but read %llu bytes",
300  m->pathname, mystreamer->archive_name,
301  (unsigned long long) m->size,
302  (unsigned long long) mystreamer->checksum_bytes);
303  return;
304  }
305 
306  /* Get the final checksum. */
307  checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
308  if (checksumlen < 0)
309  {
310  report_backup_error(mystreamer->context,
311  "could not finalize checksum of file \"%s\"",
312  m->pathname);
313  return;
314  }
315 
316  /* And check it against the manifest. */
317  if (checksumlen != m->checksum_length)
318  report_backup_error(mystreamer->context,
319  "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
320  m->pathname, mystreamer->archive_name,
321  m->checksum_length, checksumlen);
322  else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
323  report_backup_error(mystreamer->context,
324  "checksum mismatch for file \"%s\" in \"%s\"",
325  m->pathname, mystreamer->archive_name);
326 }
327 
328 /*
329  * Stores the pg_control file contents into a local buffer; we need the entire
330  * control file data for verification.
331  */
332 static void
334  const char *data, int len)
335 {
336  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
337 
338  /* Should be here only for control file */
339  Assert(mystreamer->verify_control_data);
340 
341  /*
342  * Copy the new data into the control file buffer, but do not overrun the
343  * buffer. Note that the on-disk length of the control file is expected to
344  * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
345  * shorter, just sizeof(ControlFileData).
346  */
347  if (mystreamer->control_file_bytes < sizeof(ControlFileData))
348  {
349  size_t remaining;
350 
351  remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
352  memcpy(((char *) &mystreamer->control_file)
353  + mystreamer->control_file_bytes,
354  data, Min((size_t) len, remaining));
355  }
356 
357  /* Remember how many bytes we saw, even if we didn't buffer them. */
358  mystreamer->control_file_bytes += len;
359 }
360 
361 /*
362  * Performs the CRC calculation of pg_control data and then calls the routines
363  * that execute the final verification of the control file information.
364  */
365 static void
367 {
368  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
369  manifest_data *manifest = mystreamer->context->manifest;
370  pg_crc32c crc;
371 
372  /* Should be here only for control file */
373  Assert(strcmp(mystreamer->mfile->pathname, "global/pg_control") == 0);
374  Assert(mystreamer->verify_control_data);
375 
376  /*
377  * If the control file is not the right length, that's a big problem.
378  *
379  * NB: There is a theoretical overflow risk here from casting to int, but
380  * it isn't likely to be a real problem and this enables us to match the
381  * same format string that pg_rewind uses for this case. Perhaps both this
382  * and pg_rewind should use an unsigned 64-bit value, but for now we don't
383  * worry about it.
384  */
385  if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
386  report_fatal_error("unexpected control file size %d, expected %d",
387  (int) mystreamer->control_file_bytes,
389 
390  /* Compute the CRC. */
391  INIT_CRC32C(crc);
392  COMP_CRC32C(crc, &mystreamer->control_file,
393  offsetof(ControlFileData, crc));
394  FIN_CRC32C(crc);
395 
396  /* Control file contents not meaningful if CRC is bad. */
397  if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
398  report_fatal_error("%s: %s: CRC is incorrect",
399  mystreamer->archive_name,
400  mystreamer->mfile->pathname);
401 
402  /* Can't interpret control file if not current version. */
404  report_fatal_error("%s: %s: unexpected control file version",
405  mystreamer->archive_name,
406  mystreamer->mfile->pathname);
407 
408  /* System identifiers should match. */
409  if (manifest->system_identifier !=
410  mystreamer->control_file.system_identifier)
411  report_fatal_error("%s: %s: manifest system identifier is %llu, but control file has %llu",
412  mystreamer->archive_name,
413  mystreamer->mfile->pathname,
414  (unsigned long long) manifest->system_identifier,
415  (unsigned long long) mystreamer->control_file.system_identifier);
416 }
417 
418 /*
419  * Reset flags and free memory allocations for member file verification.
420  */
421 static void
423 {
424  astreamer_verify *mystreamer = (astreamer_verify *) streamer;
425 
426  mystreamer->mfile = NULL;
427  mystreamer->verify_checksum = false;
428  mystreamer->verify_control_data = false;
429  mystreamer->checksum_bytes = 0;
430  mystreamer->control_file_bytes = 0;
431 }
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void member_copy_control_data(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void member_verify_header(astreamer *streamer, astreamer_member *member)
static void astreamer_verify_finalize(astreamer *streamer)
astreamer * astreamer_verify_content_new(astreamer *next, verifier_context *context, char *archive_name, Oid tblspc_oid)
static void member_compute_checksum(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void astreamer_verify_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
static void member_reset_info(astreamer *streamer)
struct astreamer_verify astreamer_verify
static void member_verify_control_data(astreamer *streamer)
static const astreamer_ops astreamer_verify_ops
static void astreamer_verify_free(astreamer *streamer)
static void member_verify_checksum(astreamer *streamer)
static int32 next
Definition: blutils.c:222
#define Min(x, y)
Definition: c.h:995
#define Assert(condition)
Definition: c.h:849
unsigned char uint8
Definition: c.h:504
#define OidIsValid(objectId)
Definition: c.h:766
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
int remaining
Definition: informix.c:692
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
struct ControlFileData ControlFileData
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
const void size_t len
const void * data
return crc
void report_fatal_error(const char *pg_restrict fmt,...)
bool should_ignore_relpath(verifier_context *context, const char *relpath)
void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
#define should_verify_checksum(m)
void canonicalize_path(char *path)
Definition: path.c:265
#define snprintf
Definition: port.h:238
unsigned int Oid
Definition: postgres_ext.h:31
tree context
Definition: radixtree.h:1835
uint32 pg_control_version
Definition: pg_control.h:125
uint64 system_identifier
Definition: pg_control.h:110
pg_crc32c crc
Definition: pg_control.h:232
char pathname[MAXPGPATH]
Definition: astreamer.h:81
pgoff_t size
Definition: astreamer.h:82
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
verifier_context * context
ControlFileData control_file
manifest_file * mfile
pg_checksum_context * checksum_ctx
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
const char * pathname
Definition: load_manifest.h:25
pg_checksum_type type
manifest_data * manifest