PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
load_manifest.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * Load data from a backup manifest into memory.
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 * src/bin/pg_combinebackup/load_manifest.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13#include "postgres_fe.h"
14
15#include <sys/stat.h>
16#include <unistd.h>
17
19#include "common/logging.h"
21#include "load_manifest.h"
22
23/*
24 * For efficiency, we'd like our hash table containing information about the
25 * manifest to start out with approximately the correct number of entries.
26 * There's no way to know the exact number of entries without reading the whole
27 * file, but we can get an estimate by dividing the file size by the estimated
28 * number of bytes per line.
29 *
30 * This could be off by about a factor of two in either direction, because the
31 * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
32 * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
33 * might be no checksum at all.
34 */
35#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
36
37/*
38 * size of json chunk to be read in
39 *
40 */
41#define READ_CHUNK_SIZE (128 * 1024)
42
43/*
44 * Define a hash table which we can use to store information about the files
45 * mentioned in the backup manifest.
46 */
47#define SH_PREFIX manifest_files
48#define SH_ELEMENT_TYPE manifest_file
49#define SH_KEY_TYPE const char *
50#define SH_KEY pathname
51#define SH_HASH_KEY(tb, key) hash_string(key)
52#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
53#define SH_SCOPE extern
54#define SH_RAW_ALLOCATOR pg_malloc0
55#define SH_DEFINE
56#include "lib/simplehash.h"
57
59 int manifest_version);
61 uint64 manifest_system_identifier);
63 const char *pathname, uint64 size,
64 pg_checksum_type checksum_type,
65 int checksum_length,
66 uint8 *checksum_payload);
68 TimeLineID tli,
69 XLogRecPtr start_lsn,
70 XLogRecPtr end_lsn);
72 const char *fmt,...)
74
75/*
76 * Load backup_manifest files from an array of backups and produces an array
77 * of manifest_data objects.
78 *
79 * NB: Since load_backup_manifest() can return NULL, the resulting array could
80 * contain NULL entries.
81 */
83load_backup_manifests(int n_backups, char **backup_directories)
84{
85 manifest_data **result;
86 int i;
87
88 result = pg_malloc(sizeof(manifest_data *) * n_backups);
89 for (i = 0; i < n_backups; ++i)
90 result[i] = load_backup_manifest(backup_directories[i]);
91
92 return result;
93}
94
95/*
96 * Parse the backup_manifest file in the named backup directory. Construct a
97 * hash table with information about all the files it mentions, and a linked
98 * list of all the WAL ranges it mentions.
99 *
100 * If the backup_manifest file simply doesn't exist, logs a warning and returns
101 * NULL. Any other error, or any error parsing the contents of the file, is
102 * fatal.
103 */
105load_backup_manifest(char *backup_directory)
106{
107 char pathname[MAXPGPATH];
108 int fd;
109 struct stat statbuf;
110 off_t estimate;
111 uint32 initial_size;
112 manifest_files_hash *ht;
113 char *buffer;
114 int rc;
116 manifest_data *result;
117 int chunk_size = READ_CHUNK_SIZE;
118
119 /* Open the manifest file. */
120 snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
121 if ((fd = open(pathname, O_RDONLY | PG_BINARY, 0)) < 0)
122 {
123 if (errno == ENOENT)
124 {
125 pg_log_warning("file \"%s\" does not exist", pathname);
126 return NULL;
127 }
128 pg_fatal("could not open file \"%s\": %m", pathname);
129 }
130
131 /* Figure out how big the manifest is. */
132 if (fstat(fd, &statbuf) != 0)
133 pg_fatal("could not stat file \"%s\": %m", pathname);
134
135 /* Guess how large to make the hash table based on the manifest size. */
136 estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
137 initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
138
139 /* Create the hash table. */
140 ht = manifest_files_create(initial_size, NULL);
141
142 result = pg_malloc0(sizeof(manifest_data));
143 result->files = ht;
144 context.private_data = result;
150
151 /*
152 * Parse the file, in chunks if necessary.
153 */
154 if (statbuf.st_size <= chunk_size)
155 {
156 buffer = pg_malloc(statbuf.st_size);
157 rc = read(fd, buffer, statbuf.st_size);
158 if (rc != statbuf.st_size)
159 {
160 if (rc < 0)
161 pg_fatal("could not read file \"%s\": %m", pathname);
162 else
163 pg_fatal("could not read file \"%s\": read %d of %lld",
164 pathname, rc, (long long int) statbuf.st_size);
165 }
166
167 /* Close the manifest file. */
168 close(fd);
169
170 /* Parse the manifest. */
171 json_parse_manifest(&context, buffer, statbuf.st_size);
172 }
173 else
174 {
175 int bytes_left = statbuf.st_size;
177
178 inc_state = json_parse_manifest_incremental_init(&context);
179
180 buffer = pg_malloc(chunk_size + 1);
181
182 while (bytes_left > 0)
183 {
184 int bytes_to_read = chunk_size;
185
186 /*
187 * Make sure that the last chunk is sufficiently large. (i.e. at
188 * least half the chunk size) so that it will contain fully the
189 * piece at the end with the checksum.
190 */
191 if (bytes_left < chunk_size)
192 bytes_to_read = bytes_left;
193 else if (bytes_left < 2 * chunk_size)
194 bytes_to_read = bytes_left / 2;
195 rc = read(fd, buffer, bytes_to_read);
196 if (rc != bytes_to_read)
197 {
198 if (rc < 0)
199 pg_fatal("could not read file \"%s\": %m", pathname);
200 else
201 pg_fatal("could not read file \"%s\": read %lld of %lld",
202 pathname,
203 (long long int) (statbuf.st_size + rc - bytes_left),
204 (long long int) statbuf.st_size);
205 }
206 bytes_left -= rc;
207 json_parse_manifest_incremental_chunk(inc_state, buffer, rc, bytes_left == 0);
208 }
209
210 /* Release the incremental state memory */
212
213 close(fd);
214 }
215
216 /* All done. */
217 pfree(buffer);
218 return result;
219}
220
221/*
222 * Report an error while parsing the manifest.
223 *
224 * We consider all such errors to be fatal errors. The manifest parser
225 * expects this function not to return.
226 */
227static void
229{
230 va_list ap;
231
232 va_start(ap, fmt);
234 va_end(ap);
235
236 exit(1);
237}
238
239/*
240 * This callback to validate the manifest version number for incremental backup.
241 */
242static void
244 int manifest_version)
245{
246 /* Incremental backups supported on manifest version 2 or later */
247 if (manifest_version == 1)
248 pg_fatal("backup manifest version 1 does not support incremental backup");
249}
250
251/*
252 * Record system identifier extracted from the backup manifest.
253 */
254static void
256 uint64 manifest_system_identifier)
257{
259
260 /* Validation will be at the later stage */
261 manifest->system_identifier = manifest_system_identifier;
262}
263
264/*
265 * Record details extracted from the backup manifest for one file.
266 */
267static void
269 const char *pathname, uint64 size,
270 pg_checksum_type checksum_type,
271 int checksum_length, uint8 *checksum_payload)
272{
274 manifest_file *m;
275 bool found;
276
277 /* Make a new entry in the hash table for this file. */
278 m = manifest_files_insert(manifest->files, pathname, &found);
279 if (found)
280 pg_fatal("duplicate path name in backup manifest: \"%s\"", pathname);
281
282 /* Initialize the entry. */
283 m->size = size;
284 m->checksum_type = checksum_type;
285 m->checksum_length = checksum_length;
286 m->checksum_payload = checksum_payload;
287}
288
289/*
290 * Record details extracted from the backup manifest for one WAL range.
291 */
292static void
294 TimeLineID tli,
295 XLogRecPtr start_lsn, XLogRecPtr end_lsn)
296{
299
300 /* Allocate and initialize a struct describing this WAL range. */
302 range->tli = tli;
303 range->start_lsn = start_lsn;
304 range->end_lsn = end_lsn;
305 range->prev = manifest->last_wal_range;
306 range->next = NULL;
307
308 /* Add it to the end of the list. */
309 if (manifest->first_wal_range == NULL)
310 manifest->first_wal_range = range;
311 else
312 manifest->last_wal_range->next = range;
313 manifest->last_wal_range = range;
314}
#define Min(x, y)
Definition: c.h:975
uint8_t uint8
Definition: c.h:500
#define PG_UINT32_MAX
Definition: c.h:561
#define pg_noreturn
Definition: c.h:165
#define Max(x, y)
Definition: c.h:969
#define PG_BINARY
Definition: c.h:1244
#define pg_attribute_printf(f, a)
Definition: c.h:233
#define gettext(x)
Definition: c.h:1150
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
pg_checksum_type
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
#define close(a)
Definition: win32.h:12
#define read(a, b, c)
Definition: win32.h:13
int i
Definition: isn.c:72
#define ESTIMATED_BYTES_PER_MANIFEST_LINE
Definition: load_manifest.c:35
static pg_noreturn void manifest_data ** load_backup_manifests(int n_backups, char **backup_directories)
Definition: load_manifest.c:83
manifest_data * load_backup_manifest(char *backup_directory)
#define READ_CHUNK_SIZE
Definition: load_manifest.c:41
static void combinebackup_per_file_cb(JsonManifestParseContext *context, const char *pathname, uint64 size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload)
static pg_noreturn void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2
static void combinebackup_version_cb(JsonManifestParseContext *context, int manifest_version)
static void combinebackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
static void combinebackup_system_identifier_cb(JsonManifestParseContext *context, uint64 manifest_system_identifier)
void pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, const char *pg_restrict fmt, va_list ap)
Definition: logging.c:219
@ PG_LOG_PRIMARY
Definition: logging.h:67
@ PG_LOG_ERROR
Definition: logging.h:43
void pfree(void *pointer)
Definition: mcxt.c:1524
void * palloc(Size size)
Definition: mcxt.c:1317
void json_parse_manifest(JsonManifestParseContext *context, const char *buffer, size_t size)
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, const char *chunk, size_t size, bool is_last)
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
#define pg_log_warning(...)
Definition: pgfnames.c:24
#define snprintf
Definition: port.h:239
static int fd(const char *x, int i)
Definition: preproc-init.c:105
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
json_manifest_per_wal_range_callback per_wal_range_cb
json_manifest_system_identifier_callback system_identifier_cb
json_manifest_error_callback error_cb
json_manifest_per_file_callback per_file_cb
json_manifest_version_callback version_cb
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
__int64 st_size
Definition: win32_port.h:263
#define fstat
Definition: win32_port.h:273
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59