PostgreSQL Source Code  git master
load_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * Load data from a backup manifest into memory.
4  *
5  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  *
8  * src/bin/pg_combinebackup/load_manifest.c
9  *
10  *-------------------------------------------------------------------------
11  */
12 
13 #include "postgres_fe.h"
14 
15 #include <sys/stat.h>
16 #include <unistd.h>
17 
18 #include "common/hashfn_unstable.h"
19 #include "common/logging.h"
20 #include "common/parse_manifest.h"
21 #include "load_manifest.h"
22 
23 /*
24  * For efficiency, we'd like our hash table containing information about the
25  * manifest to start out with approximately the correct number of entries.
26  * There's no way to know the exact number of entries without reading the whole
27  * file, but we can get an estimate by dividing the file size by the estimated
28  * number of bytes per line.
29  *
30  * This could be off by about a factor of two in either direction, because the
31  * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
32  * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
33  * might be no checksum at all.
34  */
35 #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100
36 
37 /*
38  * size of json chunk to be read in
39  *
40  */
41 #define READ_CHUNK_SIZE (128 * 1024)
42 
43 /*
44  * Define a hash table which we can use to store information about the files
45  * mentioned in the backup manifest.
46  */
47 #define SH_PREFIX manifest_files
48 #define SH_ELEMENT_TYPE manifest_file
49 #define SH_KEY_TYPE char *
50 #define SH_KEY pathname
51 #define SH_HASH_KEY(tb, key) hash_string(key)
52 #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
53 #define SH_SCOPE extern
54 #define SH_RAW_ALLOCATOR pg_malloc0
55 #define SH_DEFINE
56 #include "lib/simplehash.h"
57 
59  int manifest_version);
61  uint64 manifest_system_identifier);
63  char *pathname, size_t size,
64  pg_checksum_type checksum_type,
65  int checksum_length,
66  uint8 *checksum_payload);
68  TimeLineID tli,
69  XLogRecPtr start_lsn,
70  XLogRecPtr end_lsn);
72  const char *fmt,...)
74 
75 /*
76  * Load backup_manifest files from an array of backups and produces an array
77  * of manifest_data objects.
78  *
79  * NB: Since load_backup_manifest() can return NULL, the resulting array could
80  * contain NULL entries.
81  */
83 load_backup_manifests(int n_backups, char **backup_directories)
84 {
85  manifest_data **result;
86  int i;
87 
88  result = pg_malloc(sizeof(manifest_data *) * n_backups);
89  for (i = 0; i < n_backups; ++i)
90  result[i] = load_backup_manifest(backup_directories[i]);
91 
92  return result;
93 }
94 
95 /*
96  * Parse the backup_manifest file in the named backup directory. Construct a
97  * hash table with information about all the files it mentions, and a linked
98  * list of all the WAL ranges it mentions.
99  *
100  * If the backup_manifest file simply doesn't exist, logs a warning and returns
101  * NULL. Any other error, or any error parsing the contents of the file, is
102  * fatal.
103  */
105 load_backup_manifest(char *backup_directory)
106 {
107  char pathname[MAXPGPATH];
108  int fd;
109  struct stat statbuf;
110  off_t estimate;
111  uint32 initial_size;
112  manifest_files_hash *ht;
113  char *buffer;
114  int rc;
116  manifest_data *result;
117  int chunk_size = READ_CHUNK_SIZE;
118 
119  /* Open the manifest file. */
120  snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
121  if ((fd = open(pathname, O_RDONLY | PG_BINARY, 0)) < 0)
122  {
123  if (errno == ENOENT)
124  {
125  pg_log_warning("\"%s\" does not exist", pathname);
126  return NULL;
127  }
128  pg_fatal("could not open file \"%s\": %m", pathname);
129  }
130 
131  /* Figure out how big the manifest is. */
132  if (fstat(fd, &statbuf) != 0)
133  pg_fatal("could not stat file \"%s\": %m", pathname);
134 
135  /* Guess how large to make the hash table based on the manifest size. */
136  estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
137  initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));
138 
139  /* Create the hash table. */
140  ht = manifest_files_create(initial_size, NULL);
141 
142  result = pg_malloc0(sizeof(manifest_data));
143  result->files = ht;
144  context.private_data = result;
145  context.version_cb = combinebackup_version_cb;
146  context.system_identifier_cb = combinebackup_system_identifier_cb;
147  context.per_file_cb = combinebackup_per_file_cb;
148  context.per_wal_range_cb = combinebackup_per_wal_range_cb;
149  context.error_cb = report_manifest_error;
150 
151  /*
152  * Parse the file, in chunks if necessary.
153  */
154  if (statbuf.st_size <= chunk_size)
155  {
156  buffer = pg_malloc(statbuf.st_size);
157  rc = read(fd, buffer, statbuf.st_size);
158  if (rc != statbuf.st_size)
159  {
160  if (rc < 0)
161  pg_fatal("could not read file \"%s\": %m", pathname);
162  else
163  pg_fatal("could not read file \"%s\": read %d of %lld",
164  pathname, rc, (long long int) statbuf.st_size);
165  }
166 
167  /* Close the manifest file. */
168  close(fd);
169 
170  /* Parse the manifest. */
171  json_parse_manifest(&context, buffer, statbuf.st_size);
172  }
173  else
174  {
175  int bytes_left = statbuf.st_size;
177 
179 
180  buffer = pg_malloc(chunk_size + 1);
181 
182  while (bytes_left > 0)
183  {
184  int bytes_to_read = chunk_size;
185 
186  /*
187  * Make sure that the last chunk is sufficiently large. (i.e. at
188  * least half the chunk size) so that it will contain fully the
189  * piece at the end with the checksum.
190  */
191  if (bytes_left < chunk_size)
192  bytes_to_read = bytes_left;
193  else if (bytes_left < 2 * chunk_size)
194  bytes_to_read = bytes_left / 2;
195  rc = read(fd, buffer, bytes_to_read);
196  if (rc != bytes_to_read)
197  {
198  if (rc < 0)
199  pg_fatal("could not read file \"%s\": %m", pathname);
200  else
201  pg_fatal("could not read file \"%s\": read %lld of %lld",
202  pathname,
203  (long long int) (statbuf.st_size + rc - bytes_left),
204  (long long int) statbuf.st_size);
205  }
206  bytes_left -= rc;
208  inc_state, buffer, rc, bytes_left == 0);
209  }
210 
211  /* Release the incremental state memory */
213 
214  close(fd);
215  }
216 
217  /* All done. */
218  pfree(buffer);
219  return result;
220 }
221 
222 /*
223  * Report an error while parsing the manifest.
224  *
225  * We consider all such errors to be fatal errors. The manifest parser
226  * expects this function not to return.
227  */
228 static void
230 {
231  va_list ap;
232 
233  va_start(ap, fmt);
235  va_end(ap);
236 
237  exit(1);
238 }
239 
240 /*
241  * This callback to validate the manifest version number for incremental backup.
242  */
243 static void
245  int manifest_version)
246 {
247  /* Incremental backups supported on manifest version 2 or later */
248  if (manifest_version == 1)
249  pg_fatal("backup manifest version 1 does not support incremental backup");
250 }
251 
252 /*
253  * Record system identifier extracted from the backup manifest.
254  */
255 static void
257  uint64 manifest_system_identifier)
258 {
259  manifest_data *manifest = context->private_data;
260 
261  /* Validation will be at the later stage */
262  manifest->system_identifier = manifest_system_identifier;
263 }
264 
265 /*
266  * Record details extracted from the backup manifest for one file.
267  */
268 static void
270  char *pathname, size_t size,
271  pg_checksum_type checksum_type,
272  int checksum_length, uint8 *checksum_payload)
273 {
274  manifest_data *manifest = context->private_data;
275  manifest_file *m;
276  bool found;
277 
278  /* Make a new entry in the hash table for this file. */
279  m = manifest_files_insert(manifest->files, pathname, &found);
280  if (found)
281  pg_fatal("duplicate path name in backup manifest: \"%s\"", pathname);
282 
283  /* Initialize the entry. */
284  m->size = size;
285  m->checksum_type = checksum_type;
286  m->checksum_length = checksum_length;
287  m->checksum_payload = checksum_payload;
288 }
289 
290 /*
291  * Record details extracted from the backup manifest for one WAL range.
292  */
293 static void
295  TimeLineID tli,
296  XLogRecPtr start_lsn, XLogRecPtr end_lsn)
297 {
298  manifest_data *manifest = context->private_data;
300 
301  /* Allocate and initialize a struct describing this WAL range. */
302  range = palloc(sizeof(manifest_wal_range));
303  range->tli = tli;
304  range->start_lsn = start_lsn;
305  range->end_lsn = end_lsn;
306  range->prev = manifest->last_wal_range;
307  range->next = NULL;
308 
309  /* Add it to the end of the list. */
310  if (manifest->first_wal_range == NULL)
311  manifest->first_wal_range = range;
312  else
313  manifest->last_wal_range->next = range;
314  manifest->last_wal_range = range;
315 }
unsigned int uint32
Definition: c.h:506
#define Min(x, y)
Definition: c.h:1004
#define PG_UINT32_MAX
Definition: c.h:590
#define Max(x, y)
Definition: c.h:998
#define PG_BINARY
Definition: c.h:1273
#define pg_attribute_printf(f, a)
Definition: c.h:191
#define gettext(x)
Definition: c.h:1179
unsigned char uint8
Definition: c.h:504
pg_checksum_type
void * pg_malloc0(size_t size)
Definition: fe_memutils.c:53
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
#define close(a)
Definition: win32.h:12
#define read(a, b, c)
Definition: win32.h:13
int i
Definition: isn.c:73
static void const char * fmt
va_end(args)
exit(1)
va_start(args, fmt)
static void combinebackup_per_file_cb(JsonManifestParseContext *context, char *pathname, size_t size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload)
#define ESTIMATED_BYTES_PER_MANIFEST_LINE
Definition: load_manifest.c:35
static void pg_attribute_noreturn()
#define READ_CHUNK_SIZE
Definition: load_manifest.c:41
static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2
static void combinebackup_version_cb(JsonManifestParseContext *context, int manifest_version)
manifest_data ** load_backup_manifests(int n_backups, char **backup_directories)
Definition: load_manifest.c:83
static void combinebackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
manifest_data * load_backup_manifest(char *backup_directory)
static void combinebackup_system_identifier_cb(JsonManifestParseContext *context, uint64 manifest_system_identifier)
void pg_log_generic_v(enum pg_log_level level, enum pg_log_part part, const char *pg_restrict fmt, va_list ap)
Definition: logging.c:216
@ PG_LOG_PRIMARY
Definition: logging.h:67
@ PG_LOG_ERROR
Definition: logging.h:43
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
void json_parse_manifest_incremental_chunk(JsonManifestParseIncrementalState *incstate, char *chunk, int size, bool is_last)
JsonManifestParseIncrementalState * json_parse_manifest_incremental_init(JsonManifestParseContext *context)
void json_parse_manifest(JsonManifestParseContext *context, char *buffer, size_t size)
void json_parse_manifest_incremental_shutdown(JsonManifestParseIncrementalState *incstate)
#define pg_fatal(...)
static bool manifest
#define MAXPGPATH
#define pg_log_warning(...)
Definition: pgfnames.c:24
#define snprintf
Definition: port.h:238
static int fd(const char *x, int i)
Definition: preproc-init.c:105
tree context
Definition: radixtree.h:1833
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
static pg_noinline void Size size
Definition: slab.c:607
manifest_files_hash * files
Definition: load_manifest.h:59
uint8 * checksum_payload
Definition: load_manifest.h:29
pg_checksum_type checksum_type
Definition: load_manifest.h:27
__int64 st_size
Definition: win32_port.h:273
#define fstat
Definition: win32_port.h:283
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59