PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
astreamer_verify.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * astreamer_verify.c
4 *
5 * Archive streamer for verification of a tar format backup (including
6 * compressed tar format backups).
7 *
8 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
9 *
10 * src/bin/pg_verifybackup/astreamer_verify.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres_fe.h"
16
18#include "catalog/pg_control.h"
19#include "pg_verifybackup.h"
20
21typedef struct astreamer_verify
22{
23 /* These fields don't change once initialized. */
28
29 /* These fields change for each archive member. */
38
39static void astreamer_verify_content(astreamer *streamer,
40 astreamer_member *member,
41 const char *data, int len,
43static void astreamer_verify_finalize(astreamer *streamer);
44static void astreamer_verify_free(astreamer *streamer);
45
46static void member_verify_header(astreamer *streamer, astreamer_member *member);
47static void member_compute_checksum(astreamer *streamer,
48 astreamer_member *member,
49 const char *data, int len);
50static void member_verify_checksum(astreamer *streamer);
51static void member_copy_control_data(astreamer *streamer,
52 astreamer_member *member,
53 const char *data, int len);
54static void member_verify_control_data(astreamer *streamer);
55static void member_reset_info(astreamer *streamer);
56
59 .finalize = astreamer_verify_finalize,
61};
62
63/*
64 * Create an astreamer that can verify a tar file.
65 */
68 char *archive_name, Oid tblspc_oid)
69{
70 astreamer_verify *streamer;
71
72 streamer = palloc0(sizeof(astreamer_verify));
73 *((const astreamer_ops **) &streamer->base.bbs_ops) =
75
76 streamer->base.bbs_next = next;
77 streamer->context = context;
78 streamer->archive_name = archive_name;
79 streamer->tblspc_oid = tblspc_oid;
80
81 if (!context->skip_checksums)
82 streamer->checksum_ctx = pg_malloc(sizeof(pg_checksum_context));
83
84 return &streamer->base;
85}
86
87/*
88 * Main entry point of the archive streamer for verifying tar members.
89 */
90static void
92 const char *data, int len,
94{
95 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
96
97 Assert(context != ASTREAMER_UNKNOWN);
98
99 switch (context)
100 {
102 /* Initial setup plus decide which checks to perform. */
103 member_verify_header(streamer, member);
104 break;
105
107 /* Incremental work required to verify file contents. */
108 if (mystreamer->verify_checksum)
109 member_compute_checksum(streamer, member, data, len);
110 if (mystreamer->verify_control_data)
111 member_copy_control_data(streamer, member, data, len);
112 break;
113
115 /* Now we've got all the file data. */
116 if (mystreamer->verify_checksum)
117 member_verify_checksum(streamer);
118 if (mystreamer->verify_control_data)
120
121 /* Reset for next archive member. */
122 member_reset_info(streamer);
123 break;
124
126 break;
127
128 default:
129 /* Shouldn't happen. */
130 pg_fatal("unexpected state while parsing tar file");
131 }
132}
133
134/*
135 * End-of-stream processing for a astreamer_verify stream.
136 */
137static void
139{
140 Assert(streamer->bbs_next == NULL);
141}
142
143/*
144 * Free memory associated with a astreamer_verify stream.
145 */
146static void
148{
149 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
150
151 if (mystreamer->checksum_ctx)
152 pfree(mystreamer->checksum_ctx);
153
154 pfree(streamer);
155}
156
157/*
158 * Prepare to validate the next archive member.
159 */
160static void
162{
163 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
164 manifest_file *m;
165 char pathname[MAXPGPATH];
166
167 /* We are only interested in normal files. */
168 if (member->is_directory || member->is_link)
169 return;
170
171 /*
172 * The backup manifest stores a relative path to the base directory for
173 * files belonging to a tablespace, while the tablespace backup tar
174 * archive does not include this path.
175 *
176 * The pathname taken from the tar file could contain '.' or '..'
177 * references, which we want to remove, so apply canonicalize_path(). It
178 * could also be an absolute pathname, which we want to treat as a
179 * relative path, so prepend "./" if we're not adding a tablespace prefix
180 * to make sure that canonicalize_path() does what we want.
181 */
182 if (OidIsValid(mystreamer->tblspc_oid))
183 snprintf(pathname, MAXPGPATH, "%s/%u/%s",
184 "pg_tblspc", mystreamer->tblspc_oid, member->pathname);
185 else
186 snprintf(pathname, MAXPGPATH, "./%s", member->pathname);
187 canonicalize_path(pathname);
188
189 /* Ignore any files that are listed in the ignore list. */
190 if (should_ignore_relpath(mystreamer->context, pathname))
191 return;
192
193 /* Check whether there's an entry in the manifest hash. */
194 m = manifest_files_lookup(mystreamer->context->manifest->files, pathname);
195 if (m == NULL)
196 {
197 report_backup_error(mystreamer->context,
198 "\"%s\" is present in \"%s\" but not in the manifest",
199 member->pathname, mystreamer->archive_name);
200 return;
201 }
202 mystreamer->mfile = m;
203
204 /* Flag this entry as having been encountered in a tar archive. */
205 m->matched = true;
206
207 /* Check that the size matches. */
208 if (m->size != member->size)
209 {
210 report_backup_error(mystreamer->context,
211 "\"%s\" has size %llu in \"%s\" but size %" PRIu64 " in the manifest",
212 member->pathname,
213 (unsigned long long) member->size,
214 mystreamer->archive_name,
215 m->size);
216 m->bad = true;
217 return;
218 }
219
220 /*
221 * Decide whether we're going to verify the checksum for this file, and
222 * whether we're going to perform the additional validation that we do
223 * only for the control file.
224 */
225 mystreamer->verify_checksum =
226 (!mystreamer->context->skip_checksums && should_verify_checksum(m));
227 mystreamer->verify_control_data =
228 mystreamer->context->manifest->version != 1 &&
229 !m->bad && strcmp(m->pathname, XLOG_CONTROL_FILE) == 0;
230
231 /* If we're going to verify the checksum, initial a checksum context. */
232 if (mystreamer->verify_checksum &&
233 pg_checksum_init(mystreamer->checksum_ctx, m->checksum_type) < 0)
234 {
235 report_backup_error(mystreamer->context,
236 "%s: could not initialize checksum of file \"%s\"",
237 mystreamer->archive_name, m->pathname);
238
239 /*
240 * Checksum verification cannot be performed without proper context
241 * initialization.
242 */
243 mystreamer->verify_checksum = false;
244 }
245}
246
247/*
248 * Computes the checksum incrementally for the received file content.
249 *
250 * Should have a correctly initialized checksum_ctx, which will be used for
251 * incremental checksum computation.
252 */
253static void
255 const char *data, int len)
256{
257 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
258 pg_checksum_context *checksum_ctx = mystreamer->checksum_ctx;
259 manifest_file *m = mystreamer->mfile;
260
261 Assert(mystreamer->verify_checksum);
262 Assert(m->checksum_type == checksum_ctx->type);
263
264 /*
265 * Update the total count of computed checksum bytes so that we can
266 * cross-check against the file size.
267 */
268 mystreamer->checksum_bytes += len;
269
270 /* Feed these bytes to the checksum calculation. */
271 if (pg_checksum_update(checksum_ctx, (uint8 *) data, len) < 0)
272 {
273 report_backup_error(mystreamer->context,
274 "could not update checksum of file \"%s\"",
275 m->pathname);
276 mystreamer->verify_checksum = false;
277 }
278}
279
280/*
281 * Perform the final computation and checksum verification after the entire
282 * file content has been processed.
283 */
284static void
286{
287 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
288 manifest_file *m = mystreamer->mfile;
289 uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
290 int checksumlen;
291
292 Assert(mystreamer->verify_checksum);
293
294 /*
295 * It's unclear how this could fail, but let's check anyway to be safe.
296 */
297 if (mystreamer->checksum_bytes != m->size)
298 {
299 report_backup_error(mystreamer->context,
300 "file \"%s\" in \"%s\" should contain %" PRIu64 " bytes, but read %" PRIu64 " bytes",
301 m->pathname, mystreamer->archive_name,
302 m->size,
303 mystreamer->checksum_bytes);
304 return;
305 }
306
307 /* Get the final checksum. */
308 checksumlen = pg_checksum_final(mystreamer->checksum_ctx, checksumbuf);
309 if (checksumlen < 0)
310 {
311 report_backup_error(mystreamer->context,
312 "could not finalize checksum of file \"%s\"",
313 m->pathname);
314 return;
315 }
316
317 /* And check it against the manifest. */
318 if (checksumlen != m->checksum_length)
319 report_backup_error(mystreamer->context,
320 "file \"%s\" in \"%s\" has checksum of length %d, but expected %d",
321 m->pathname, mystreamer->archive_name,
322 m->checksum_length, checksumlen);
323 else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0)
324 report_backup_error(mystreamer->context,
325 "checksum mismatch for file \"%s\" in \"%s\"",
326 m->pathname, mystreamer->archive_name);
327}
328
329/*
330 * Stores the pg_control file contents into a local buffer; we need the entire
331 * control file data for verification.
332 */
333static void
335 const char *data, int len)
336{
337 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
338
339 /* Should be here only for control file */
340 Assert(mystreamer->verify_control_data);
341
342 /*
343 * Copy the new data into the control file buffer, but do not overrun the
344 * buffer. Note that the on-disk length of the control file is expected to
345 * be PG_CONTROL_FILE_SIZE, but the part that fits in our buffer is
346 * shorter, just sizeof(ControlFileData).
347 */
348 if (mystreamer->control_file_bytes < sizeof(ControlFileData))
349 {
350 size_t remaining;
351
352 remaining = sizeof(ControlFileData) - mystreamer->control_file_bytes;
353 memcpy(((char *) &mystreamer->control_file)
354 + mystreamer->control_file_bytes,
355 data, Min((size_t) len, remaining));
356 }
357
358 /* Remember how many bytes we saw, even if we didn't buffer them. */
359 mystreamer->control_file_bytes += len;
360}
361
362/*
363 * Performs the CRC calculation of pg_control data and then calls the routines
364 * that execute the final verification of the control file information.
365 */
366static void
368{
369 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
370 manifest_data *manifest = mystreamer->context->manifest;
372
373 /* Should be here only for control file */
374 Assert(strcmp(mystreamer->mfile->pathname, XLOG_CONTROL_FILE) == 0);
375 Assert(mystreamer->verify_control_data);
376
377 /*
378 * If the control file is not the right length, that's a big problem.
379 *
380 * NB: There is a theoretical overflow risk here from casting to int, but
381 * it isn't likely to be a real problem and this enables us to match the
382 * same format string that pg_rewind uses for this case. Perhaps both this
383 * and pg_rewind should use an unsigned 64-bit value, but for now we don't
384 * worry about it.
385 */
386 if (mystreamer->control_file_bytes != PG_CONTROL_FILE_SIZE)
387 report_fatal_error("unexpected control file size %d, expected %d",
388 (int) mystreamer->control_file_bytes,
390
391 /* Compute the CRC. */
393 COMP_CRC32C(crc, &mystreamer->control_file,
394 offsetof(ControlFileData, crc));
396
397 /* Control file contents not meaningful if CRC is bad. */
398 if (!EQ_CRC32C(crc, mystreamer->control_file.crc))
399 report_fatal_error("%s: %s: CRC is incorrect",
400 mystreamer->archive_name,
401 mystreamer->mfile->pathname);
402
403 /* Can't interpret control file if not current version. */
405 report_fatal_error("%s: %s: unexpected control file version",
406 mystreamer->archive_name,
407 mystreamer->mfile->pathname);
408
409 /* System identifiers should match. */
410 if (manifest->system_identifier !=
412 report_fatal_error("%s: %s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64,
413 mystreamer->archive_name,
414 mystreamer->mfile->pathname,
415 manifest->system_identifier,
416 mystreamer->control_file.system_identifier);
417}
418
419/*
420 * Reset flags and free memory allocations for member file verification.
421 */
422static void
424{
425 astreamer_verify *mystreamer = (astreamer_verify *) streamer;
426
427 mystreamer->mfile = NULL;
428 mystreamer->verify_checksum = false;
429 mystreamer->verify_control_data = false;
430 mystreamer->checksum_bytes = 0;
431 mystreamer->control_file_bytes = 0;
432}
astreamer_archive_context
Definition: astreamer.h:63
@ ASTREAMER_MEMBER_HEADER
Definition: astreamer.h:65
@ ASTREAMER_MEMBER_CONTENTS
Definition: astreamer.h:66
@ ASTREAMER_MEMBER_TRAILER
Definition: astreamer.h:67
@ ASTREAMER_ARCHIVE_TRAILER
Definition: astreamer.h:68
@ ASTREAMER_UNKNOWN
Definition: astreamer.h:64
static void member_copy_control_data(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void member_verify_header(astreamer *streamer, astreamer_member *member)
static void astreamer_verify_finalize(astreamer *streamer)
static void member_compute_checksum(astreamer *streamer, astreamer_member *member, const char *data, int len)
static void astreamer_verify_content(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
astreamer * astreamer_verify_content_new(astreamer *next, verifier_context *context, char *archive_name, Oid tblspc_oid)
static void member_reset_info(astreamer *streamer)
struct astreamer_verify astreamer_verify
static void member_verify_control_data(astreamer *streamer)
static const astreamer_ops astreamer_verify_ops
static void astreamer_verify_free(astreamer *streamer)
static void member_verify_checksum(astreamer *streamer)
static int32 next
Definition: blutils.c:224
#define Min(x, y)
Definition: c.h:975
uint8_t uint8
Definition: c.h:500
uint64_t uint64
Definition: c.h:503
#define OidIsValid(objectId)
Definition: c.h:746
int pg_checksum_final(pg_checksum_context *context, uint8 *output)
int pg_checksum_update(pg_checksum_context *context, const uint8 *input, size_t len)
int pg_checksum_init(pg_checksum_context *context, pg_checksum_type type)
#define PG_CHECKSUM_MAX_LENGTH
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
Assert(PointerIsAligned(start, uint64))
int remaining
Definition: informix.c:692
void pfree(void *pointer)
Definition: mcxt.c:2150
void * palloc0(Size size)
Definition: mcxt.c:1973
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
struct ControlFileData ControlFileData
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:256
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
const void size_t len
const void * data
return crc
void report_fatal_error(const char *pg_restrict fmt,...)
bool should_ignore_relpath(verifier_context *context, const char *relpath)
void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...)
#define should_verify_checksum(m)
void canonicalize_path(char *path)
Definition: path.c:337
#define snprintf
Definition: port.h:239
unsigned int Oid
Definition: postgres_ext.h:30
uint32 pg_control_version
Definition: pg_control.h:125
uint64 system_identifier
Definition: pg_control.h:110
pg_crc32c crc
Definition: pg_control.h:238
char pathname[MAXPGPATH]
Definition: astreamer.h:81
pgoff_t size
Definition: astreamer.h:82
void(* content)(astreamer *streamer, astreamer_member *member, const char *data, int len, astreamer_archive_context context)
Definition: astreamer.h:126
verifier_context * context
ControlFileData control_file
manifest_file * mfile
pg_checksum_context * checksum_ctx
const astreamer_ops * bbs_ops
Definition: astreamer.h:109
astreamer * bbs_next
Definition: astreamer.h:110
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
const char * pathname
Definition: load_manifest.h:25
pg_checksum_type type
manifest_data * manifest
#define XLOG_CONTROL_FILE