PostgreSQL Source Code git master
astreamer_verify.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * astreamer_verify.c
4 *
5 * Archive streamer for verification of a tar format backup (including
6 * compressed tar format backups).
7 *
8 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 *
10 * src/bin/pg_verifybackup/astreamer_verify.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres_fe.h"
16
17#include "catalog/pg_control.h"
18#include "pg_verifybackup.h"
19
20typedef struct astreamer_verify
21{
22 /* These fields don't change once initialized. */
27
28 /* These fields change for each archive member. */
37
38static void astreamer_verify_content(astreamer *streamer,
39 astreamer_member *member,
40 const char *data, int len,
42static void astreamer_verify_finalize(astreamer *streamer);
43static void astreamer_verify_free(astreamer *streamer);
44
45static void member_verify_header(astreamer *streamer, astreamer_member *member);
46static void member_compute_checksum(astreamer *streamer,
47 astreamer_member *member,
48 const char *data, int len);
49static void member_verify_checksum(astreamer *streamer);
50static void member_copy_control_data(astreamer *streamer,
51 astreamer_member *member,
52 const char *data, int len);
53static void member_verify_control_data(astreamer *streamer);
54static void member_reset_info(astreamer *streamer);
55
58 .finalize = astreamer_verify_finalize,
60};
61
62/*
63 * Create an astreamer that can verify a tar file.
64 */
67 char *archive_name, Oid tblspc_oid)
68{
69 astreamer_verify *streamer;
70
71 streamer = palloc0(sizeof(astreamer_verify));
72 *((const astreamer_ops **) &streamer->base.bbs_ops) =
74
75 streamer->base.bbs_next = next;
76 streamer->context = context;
77 streamer->archive_name = archive_name;
78 streamer->tblspc_oid = tblspc_oid;
79
80 if (!context->skip_checksums)
81 streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
82
83 return &streamer->base;
84}
85
86/*
87 * Main entry point of the archive streamer for verifying tar members.
88 */
89static void
91 const char *data, int len,
93{
94 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
95
96 Assert(context != ASTREAMER_UNKNOWN);
97
98 switch (context)
99 {
101 /* Initial setup plus decide which checks to perform. */
102 member_verify_header(streamer, member);
103 break;
104
106 /* Incremental work required to verify file contents. */
107 if (mystreamer->verify_checksum)
108 member_compute_checksum(streamer, member, data, len);
109 if (mystreamer->verify_control_data)
110 member_copy_control_data(streamer, member, data, len);
111 break;
112
114 /* Now we've got all the file data. */
115 if (mystreamer->verify_checksum)
116 member_verify_checksum(streamer);
117 if (mystreamer->verify_control_data)
119
120 /* Reset for next archive member. */
121 member_reset_info(streamer);
122 break;
123
125 break;
126
127 default:
128 /* Shouldn't happen. */
129 pg_fatal("unexpected state while parsing tar file");
130 }
131}
132
133/*
134 * End-of-stream processing for a astreamer_verify stream.
135 */
136static void
138{
139 Assert(streamer->bbs_next == NULL);
140}
141
142/*
143 * Free memory associated with a astreamer_verify stream.
144 */
145static void
147{
148 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
149
150 if (mystreamer->checksum_ctx)
151 pfree(mystreamer->checksum_ctx);
152
153 pfree(streamer);
154}
155
156/*
157 * Prepare to validate the next archive member.
158 */
159static void
161{
162 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
163 manifest_file *m;
164 char pathname[MAXPGPATH];
165
166 /* We are only interested in normal files. */
167 if (member->is_directory || member->is_link)
168 return;
169
170 /*
171 * The backup manifest stores a relative path to the base directory for
172 * files belonging to a tablespace, while the tablespace backup tar
173 * archive does not include this path.
174 *
175 * The pathname taken from the tar file could contain '.' or '..'
176 * references, which we want to remove, so apply canonicalize_path(). It
177 * could also be an absolute pathname, which we want to treat as a
178 * relative path, so prepend "./" if we're not adding a tablespace prefix
179 * to make sure that canonicalize_path() does what we want.
180 */
181 if (OidIsValid(mystreamer->tblspc_oid))
182 snprintf(pathname, MAXPGPATH, "%s/%u/%s",
183 "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
184 else
185 snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
186 canonicalize_path(pathname);
187
188 /* Ignore any files that are listed in the ignore list. */
189 if (should_ignore_relpath(mystreamer->context, pathname))
190 return;
191
192 /* Check whether there's an entry in the manifest hash. */
193 m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
194 if (m == NULL)
195 {
196 report_backup_error(mystreamer->context,
197 "\"%s\" is present in \"%s\" but not in the manifest",
198 member->pathname, mystreamer->archive_name);
199 return;
200 }
201 mystreamer->mfile = m;
202
203 /* Flag this entry as having been encountered in a tar archive. */
204 m->matched = true;
205
206 /* Check that the size matches. */
207 if (m->size != member->size)
208 {
209 report_backup_error(mystreamer->context,
210 "\"%s\" has size %llu in \"%s\" but size %llu in the manifest",
211 member->pathname,
212 (unsigned long long) member->size,
213 mystreamer->archive_name,
214 (unsigned long long) m->size);
215 m->bad = true;
216 return;
217 }
218
219 /*
220 * Decide whether we're going to verify the checksum for this file, and
221 * whether we're going to perform the additional validation that we do
222 * only for the control file.
223 */
224 mystreamer->verify_checksum =
225 (!mystreamer->context->skip_checksums && should_verify_checksum(m));
226 mystreamer->verify_control_data =
227 mystreamer->context->manifest->version != 1 &&
228 !m->bad && strcmp(m->pathname, "global/pg_control") == 0;
229
230 /* If we're going to verify the checksum, initial a checksum context. */
231 if (mystreamer->verify_checksum &&
232 pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
233 {
234 report_backup_error(mystreamer->context,
235 "%s: could not initialize checksum of file \"%s\"",
236 mystreamer->archive_name, m->pathname);
237
238 /*
239 * Checksum verification cannot be performed without proper context
240 * initialization.
241 */
242 mystreamer->verify_checksum = false;
243 }
244}
245
246/*
247 * Computes the checksum incrementally for the received file content.
248 *
249 * Should have a correctly initialized checksum_ctx, which will be used for
250 * incremental checksum computation.
251 */
252static void
254 const char *data, int len)
255{
256 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
257 pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
258 manifest_file *m = mystreamer->mfile;
259
260 Assert(mystreamer->verify_checksum);
261 Assert(m->checksum_type == checksum_ctx->type);
262
263 /*
264 * Update the total count of computed checksum bytes so that we can
265 * cross-check against the file size.
266 */
267 mystreamer->checksum_bytes += len;
268
269 /* Feed these bytes to the checksum calculation. */
270 if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
271 {
272 report_backup_error(mystreamer->context,
273 "could not update checksum of file \"%s\"",
274 m->pathname);
275 mystreamer->verify_checksum = false;
276 }
277}
278
279/*
280 * Perform the final computation and checksum verification after the entire
281 * file content has been processed.
282 */
283static void
285{
286 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
287 manifest_file *m = mystreamer->mfile;
288 uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
289 int checksumlen;
290
291 Assert(mystreamer->verify_checksum);
292
293 /*
294 * It's unclear how this could fail, but let's check anyway to be safe.
295 */
296 if (mystreamer->checksum_bytes != m->size)
297 {
298 report_backup_error(mystreamer->context,
299 "file \"%s\" in \"%s\" should contain %llu bytes, but read %llu bytes",
300 m->pathname, mystreamer->archive_name,
301 (unsigned long long) m->size,
302 (unsigned long long) mystreamer->checksum_bytes);
303 return;
304 }
305
306 /* Get the final checksum. */
307 checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
308 if (checksumlen < 0)
309 {
310 report_backup_error(mystreamer->context,
311 "could not finalize checksum of file \"%s\"",
312 m->pathname);
313 return;
314 }
315
316 /* And check it against the manifest. */
317 if (checksumlen != m->checksum_length)
318 report_backup_error(mystreamer->context,
319 "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
320 m->pathname, mystreamer->archive_name,
321 m->checksum_length, checksumlen);
322 else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
323 report_backup_error(mystreamer->context,
324 "checksum mismatch for file \"%s\" in \"%s\"",
325 m->pathname, mystreamer->archive_name);
326}
327
328/*
329 * Stores the pg_control file contents into a local buffer; we need the entire
330 * control file data for verification.
331 */
332static void
334 const char *data, int len)
335{
336 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
337
338 /* Should be here only for control file */
339 Assert(mystreamer->verify_control_data);
340
341 /*
342 * Copy the new data into the control file buffer, but do not overrun the
343 * buffer. Note that the on-disk length of the control file is expected to
344 * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
345 * shorter, just sizeof(ControlFileData).
346 */
347 if (mystreamer->control_file_bytes < sizeof(ControlFileData))
348 {
349 size_t remaining;
350
351 remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
352 memcpy(((char *) &mystreamer->control_file)
353 + mystreamer->control_file_bytes,
354 data, Min((size_t) len, remaining));
355 }
356
357 /* Remember how many bytes we saw, even if we didn't buffer them. */
358 mystreamer->control_file_bytes += len;
359}
360
361/*
362 * Performs the CRC calculation of pg_control data and then calls the routines
363 * that execute the final verification of the control file information.
364 */
365static void
367{
368 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
369 manifest_data *manifest = mystreamer->context->manifest;
371
372 /* Should be here only for control file */
373 Assert(strcmp(mystreamer->mfile->pathname, "global/pg_control") == 0);
374 Assert(mystreamer->verify_control_data);
375
376 /*
377 * If the control file is not the right length, that's a big problem.
378 *
379 * NB: There is a theoretical overflow risk here from casting to int, but
380 * it isn't likely to be a real problem and this enables us to match the
381 * same format string that pg_rewind uses for this case. Perhaps both this
382 * and pg_rewind should use an unsigned 64-bit value, but for now we don't
383 * worry about it.
384 */
385 if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
386 report_fatal_error("unexpected control file size %d, expected %d",
387 (int) mystreamer->control_file_bytes,
389
390 /* Compute the CRC. */
392 COMP_CRC32C(crc, &mystreamer->control_file,
393 offsetof(ControlFileData, crc));
395
396 /* Control file contents not meaningful if CRC is bad. */
397 if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
398 report_fatal_error("%s: %s: CRC is incorrect",
399 mystreamer->archive_name,
400 mystreamer->mfile->pathname);
401
402 /* Can't interpret control file if not current version. */
404 report_fatal_error("%s: %s: unexpected control file version",
405 mystreamer->archive_name,
406 mystreamer->mfile->pathname);
407
408 /* System identifiers should match. */
409 if (manifest->system_identifier !=
411 report_fatal_error("%s: %s: manifest system identifier is %llu, but control file has %llu",
412 mystreamer->archive_name,
413 mystreamer->mfile->pathname,
414 (unsigned long long) manifest->system_identifier,
415 (unsigned long long) mystreamer->control_file.system_identifier);
416}
417
418/*
419 * Reset flags and free memory allocations for member file verification.
420 */
421static void
423{
424 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
425
426 mystreamer->mfile = NULL;
427 mystreamer->verify_checksum = false;
428 mystreamer->verify_control_data = false;
429 mystreamer->checksum_bytes = 0;
430 mystreamer->control_file_bytes = 0;
431}
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void member_copy_control_data(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void member_verify_header(astreamer *streamer, astreamer_member *member)
static void astreamer_verify_finalize(astreamer *streamer)
static void member_compute_checksum(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void astreamer_verify_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
astreamer * astreamer_verify_content_new(astreamer *next, verifier_context *context, char *archive_name, Oid tblspc_oid)
static void member_reset_info(astreamer *streamer)
struct astreamer_verify astreamer_verify
static void member_verify_control_data(astreamer *streamer)
static const astreamer_ops astreamer_verify_ops
static void astreamer_verify_free(astreamer *streamer)
static void member_verify_checksum(astreamer *streamer)
static int32 next
Definition: blutils.c:221
#define Min(x, y)
Definition: c.h:961
uint8_t uint8
Definition: c.h:486
#define Assert(condition)
Definition: c.h:815
uint64_t uint64
Definition: c.h:489
#define OidIsValid(objectId)
Definition: c.h:732
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
int remaining
Definition: informix.c:692
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
struct ControlFileData ControlFileData
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:250
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:98
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:103
const void size_t len
const void * data
return crc
void report_fatal_error(const char *pg_restrict fmt,...)
bool should_ignore_relpath(verifier_context *context, const char *relpath)
void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
#define should_verify_checksum(m)
void canonicalize_path(char *path)
Definition: path.c:337
#define snprintf
Definition: port.h:239
unsigned int Oid
Definition: postgres_ext.h:32
uint32 pg_control_version
Definition: pg_control.h:125
uint64 system_identifier
Definition: pg_control.h:110
pg_crc32c crc
Definition: pg_control.h:232
char pathname[MAXPGPATH]
Definition: astreamer.h:81
pgoff_t size
Definition: astreamer.h:82
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
verifier_context * context
ControlFileData control_file
manifest_file * mfile
pg_checksum_context * checksum_ctx
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
const char * pathname
Definition: load_manifest.h:25
pg_checksum_type type
manifest_data * manifest