PostgreSQL Source Code  git master
parse_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parse_manifest.c
4  * Parse a backup manifest in JSON format.
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/bin/pg_verifybackup/parse_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres_fe.h"
15 
16 #include "parse_manifest.h"
17 #include "common/jsonapi.h"
18 
19 /*
20  * Semantic states for JSON manifest parsing.
21  */
22 typedef enum
23 {
39 
40 /*
41  * Possible fields for one file as described by the manifest.
42  */
43 typedef enum
44 {
52 
53 /*
54  * Possible fields for one file as described by the manifest.
55  */
56 typedef enum
57 {
62 
63 /*
64  * Internal state used while decoding the JSON-format backup manifest.
65  */
66 typedef struct
67 {
70 
71  /* These fields are used for parsing objects in the list of files. */
73  char *pathname;
75  char *size;
76  char *algorithm;
78  char *checksum;
79 
80  /* These fields are used for parsing objects in the list of WAL ranges. */
82  char *timeline;
83  char *start_lsn;
84  char *end_lsn;
85 
86  /* Miscellaneous other stuff. */
90 
96  bool isnull);
98  JsonTokenType tokentype);
102  char *buffer, size_t size);
104  char *msg);
105 
106 static int hexdecode_char(char c);
107 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
108 static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
109 
110 /*
111  * Main entrypoint to parse a JSON-format backup manifest.
112  *
113  * Caller should set up the parsing context and then invoke this function.
114  * For each file whose information is extracted from the manifest,
115  * context->perfile_cb is invoked. In case of trouble, context->error_cb is
116  * invoked and is expected not to return.
117  */
118 void
120  size_t size)
121 {
122  JsonLexContext *lex;
123  JsonParseErrorType json_error;
124  JsonSemAction sem;
126 
127  /* Set up our private parsing context. */
128  parse.context = context;
130  parse.saw_version_field = false;
131 
132  /* Create a JSON lexing context. */
133  lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
134 
135  /* Set up semantic actions. */
136  sem.semstate = &parse;
142  sem.object_field_end = NULL;
143  sem.array_element_start = NULL;
144  sem.array_element_end = NULL;
146 
147  /* Run the actual JSON parser. */
148  json_error = pg_parse_json(lex, &sem);
149  if (json_error != JSON_SUCCESS)
150  json_manifest_parse_failure(context, "parsing failed");
151  if (parse.state != JM_EXPECT_EOF)
152  json_manifest_parse_failure(context, "manifest ended unexpectedly");
153 
154  /* Verify the manifest checksum. */
155  verify_manifest_checksum(&parse, buffer, size);
156 }
157 
158 /*
159  * Invoked at the start of each object in the JSON document.
160  *
161  * The document as a whole is expected to be an object; each file and each
162  * WAL range is also expected to be an object. If we're anywhere else in the
163  * document, it's an error.
164  */
165 static JsonParseErrorType
167 {
169 
170  switch (parse->state)
171  {
174  break;
177  parse->pathname = NULL;
178  parse->encoded_pathname = NULL;
179  parse->size = NULL;
180  parse->algorithm = NULL;
181  parse->checksum = NULL;
182  break;
185  parse->timeline = NULL;
186  parse->start_lsn = NULL;
187  parse->end_lsn = NULL;
188  break;
189  default:
191  "unexpected object start");
192  break;
193  }
194 
195  return JSON_SUCCESS;
196 }
197 
198 /*
199  * Invoked at the end of each object in the JSON document.
200  *
201  * The possible cases here are the same as for json_manifest_object_start.
202  * There's nothing special to do at the end of the document, but when we
203  * reach the end of an object representing a particular file or WAL range,
204  * we must call json_manifest_finalize_file() to save the associated details.
205  */
206 static JsonParseErrorType
208 {
210 
211  switch (parse->state)
212  {
214  parse->state = JM_EXPECT_EOF;
215  break;
218  parse->state = JM_EXPECT_FILES_NEXT;
219  break;
223  break;
224  default:
226  "unexpected object end");
227  break;
228  }
229 
230  return JSON_SUCCESS;
231 }
232 
233 /*
234  * Invoked at the start of each array in the JSON document.
235  *
236  * Within the toplevel object, the value associated with the "Files" key
237  * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
238  * are expected.
239  */
240 static JsonParseErrorType
242 {
244 
245  switch (parse->state)
246  {
248  parse->state = JM_EXPECT_FILES_NEXT;
249  break;
252  break;
253  default:
255  "unexpected array start");
256  break;
257  }
258 
259  return JSON_SUCCESS;
260 }
261 
262 /*
263  * Invoked at the end of each array in the JSON document.
264  *
265  * The cases here are analogous to those in json_manifest_array_start.
266  */
267 static JsonParseErrorType
269 {
271 
272  switch (parse->state)
273  {
277  break;
278  default:
280  "unexpected array end");
281  break;
282  }
283 
284  return JSON_SUCCESS;
285 }
286 
287 /*
288  * Invoked at the start of each object field in the JSON document.
289  */
290 static JsonParseErrorType
291 json_manifest_object_field_start(void *state, char *fname, bool isnull)
292 {
294 
295  switch (parse->state)
296  {
298 
299  /*
300  * Inside toplevel object. The version indicator should always be
301  * the first field.
302  */
303  if (!parse->saw_version_field)
304  {
305  if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
307  "expected version indicator");
309  parse->saw_version_field = true;
310  break;
311  }
312 
313  /* Is this the list of files? */
314  if (strcmp(fname, "Files") == 0)
315  {
316  parse->state = JM_EXPECT_FILES_START;
317  break;
318  }
319 
320  /* Is this the list of WAL ranges? */
321  if (strcmp(fname, "WAL-Ranges") == 0)
322  {
324  break;
325  }
326 
327  /* Is this the manifest checksum? */
328  if (strcmp(fname, "Manifest-Checksum") == 0)
329  {
331  break;
332  }
333 
334  /* It's not a field we recognize. */
336  "unrecognized top-level field");
337  break;
338 
340  /* Inside object for one file; which key have we got? */
341  if (strcmp(fname, "Path") == 0)
342  parse->file_field = JMFF_PATH;
343  else if (strcmp(fname, "Encoded-Path") == 0)
344  parse->file_field = JMFF_ENCODED_PATH;
345  else if (strcmp(fname, "Size") == 0)
346  parse->file_field = JMFF_SIZE;
347  else if (strcmp(fname, "Last-Modified") == 0)
348  parse->file_field = JMFF_LAST_MODIFIED;
349  else if (strcmp(fname, "Checksum-Algorithm") == 0)
350  parse->file_field = JMFF_CHECKSUM_ALGORITHM;
351  else if (strcmp(fname, "Checksum") == 0)
352  parse->file_field = JMFF_CHECKSUM;
353  else
355  "unexpected file field");
357  break;
358 
360  /* Inside object for one file; which key have we got? */
361  if (strcmp(fname, "Timeline") == 0)
362  parse->wal_range_field = JMWRF_TIMELINE;
363  else if (strcmp(fname, "Start-LSN") == 0)
364  parse->wal_range_field = JMWRF_START_LSN;
365  else if (strcmp(fname, "End-LSN") == 0)
366  parse->wal_range_field = JMWRF_END_LSN;
367  else
369  "unexpected WAL range field");
371  break;
372 
373  default:
375  "unexpected object field");
376  break;
377  }
378 
379  return JSON_SUCCESS;
380 }
381 
382 /*
383  * Invoked at the start of each scalar in the JSON document.
384  *
385  * Object field names don't reach this code; those are handled by
386  * json_manifest_object_field_start. When we're inside of the object for
387  * a particular file or WAL range, that function will have noticed the name
388  * of the field, and we'll get the corresponding value here. When we're in
389  * the toplevel object, the parse state itself tells us which field this is.
390  *
391  * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
392  * can just check on the spot, the goal here is just to save the value in
393  * the parse state for later use. We don't actually do anything until we
394  * reach either the end of the object representing this file, or the end
395  * of the manifest, as the case may be.
396  */
397 static JsonParseErrorType
399 {
401 
402  switch (parse->state)
403  {
405  if (strcmp(token, "1") != 0)
407  "unexpected manifest version");
409  break;
410 
412  switch (parse->file_field)
413  {
414  case JMFF_PATH:
415  parse->pathname = token;
416  break;
417  case JMFF_ENCODED_PATH:
418  parse->encoded_pathname = token;
419  break;
420  case JMFF_SIZE:
421  parse->size = token;
422  break;
423  case JMFF_LAST_MODIFIED:
424  pfree(token); /* unused */
425  break;
427  parse->algorithm = token;
428  break;
429  case JMFF_CHECKSUM:
430  parse->checksum = token;
431  break;
432  }
434  break;
435 
437  switch (parse->wal_range_field)
438  {
439  case JMWRF_TIMELINE:
440  parse->timeline = token;
441  break;
442  case JMWRF_START_LSN:
443  parse->start_lsn = token;
444  break;
445  case JMWRF_END_LSN:
446  parse->end_lsn = token;
447  break;
448  }
450  break;
451 
453  parse->state = JM_EXPECT_TOPLEVEL_END;
454  parse->manifest_checksum = token;
455  break;
456 
457  default:
458  json_manifest_parse_failure(parse->context, "unexpected scalar");
459  break;
460  }
461 
462  return JSON_SUCCESS;
463 }
464 
465 /*
466  * Do additional parsing and sanity-checking of the details gathered for one
467  * file, and invoke the per-file callback so that the caller gets those
468  * details. This happens for each file when the corresponding JSON object is
469  * completely parsed.
470  */
471 static void
473 {
474  JsonManifestParseContext *context = parse->context;
475  size_t size;
476  char *ep;
477  int checksum_string_length;
478  pg_checksum_type checksum_type;
479  int checksum_length;
480  uint8 *checksum_payload;
481 
482  /* Pathname and size are required. */
483  if (parse->pathname == NULL && parse->encoded_pathname == NULL)
484  json_manifest_parse_failure(parse->context, "missing path name");
485  if (parse->pathname != NULL && parse->encoded_pathname != NULL)
487  "both path name and encoded path name");
488  if (parse->size == NULL)
489  json_manifest_parse_failure(parse->context, "missing size");
490  if (parse->algorithm == NULL && parse->checksum != NULL)
492  "checksum without algorithm");
493 
494  /* Decode encoded pathname, if that's what we have. */
495  if (parse->encoded_pathname != NULL)
496  {
497  int encoded_length = strlen(parse->encoded_pathname);
498  int raw_length = encoded_length / 2;
499 
500  parse->pathname = palloc(raw_length + 1);
501  if (encoded_length % 2 != 0 ||
502  !hexdecode_string((uint8 *) parse->pathname,
503  parse->encoded_pathname,
504  raw_length))
506  "could not decode file name");
507  parse->pathname[raw_length] = '\0';
508  pfree(parse->encoded_pathname);
509  parse->encoded_pathname = NULL;
510  }
511 
512  /* Parse size. */
513  size = strtoul(parse->size, &ep, 10);
514  if (*ep)
516  "file size is not an integer");
517 
518  /* Parse the checksum algorithm, if it's present. */
519  if (parse->algorithm == NULL)
520  checksum_type = CHECKSUM_TYPE_NONE;
521  else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
522  context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
523  parse->algorithm);
524 
525  /* Parse the checksum payload, if it's present. */
526  checksum_string_length = parse->checksum == NULL ? 0
527  : strlen(parse->checksum);
528  if (checksum_string_length == 0)
529  {
530  checksum_length = 0;
531  checksum_payload = NULL;
532  }
533  else
534  {
535  checksum_length = checksum_string_length / 2;
536  checksum_payload = palloc(checksum_length);
537  if (checksum_string_length % 2 != 0 ||
538  !hexdecode_string(checksum_payload, parse->checksum,
539  checksum_length))
540  context->error_cb(context,
541  "invalid checksum for file \"%s\": \"%s\"",
542  parse->pathname, parse->checksum);
543  }
544 
545  /* Invoke the callback with the details we've gathered. */
546  context->perfile_cb(context, parse->pathname, size,
547  checksum_type, checksum_length, checksum_payload);
548 
549  /* Free memory we no longer need. */
550  if (parse->size != NULL)
551  {
552  pfree(parse->size);
553  parse->size = NULL;
554  }
555  if (parse->algorithm != NULL)
556  {
557  pfree(parse->algorithm);
558  parse->algorithm = NULL;
559  }
560  if (parse->checksum != NULL)
561  {
562  pfree(parse->checksum);
563  parse->checksum = NULL;
564  }
565 }
566 
567 /*
568  * Do additional parsing and sanity-checking of the details gathered for one
569  * WAL range, and invoke the per-WAL-range callback so that the caller gets
570  * those details. This happens for each WAL range when the corresponding JSON
571  * object is completely parsed.
572  */
573 static void
575 {
576  JsonManifestParseContext *context = parse->context;
577  TimeLineID tli;
578  XLogRecPtr start_lsn,
579  end_lsn;
580  char *ep;
581 
582  /* Make sure all fields are present. */
583  if (parse->timeline == NULL)
584  json_manifest_parse_failure(parse->context, "missing timeline");
585  if (parse->start_lsn == NULL)
586  json_manifest_parse_failure(parse->context, "missing start LSN");
587  if (parse->end_lsn == NULL)
588  json_manifest_parse_failure(parse->context, "missing end LSN");
589 
590  /* Parse timeline. */
591  tli = strtoul(parse->timeline, &ep, 10);
592  if (*ep)
594  "timeline is not an integer");
595  if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
597  "could not parse start LSN");
598  if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
600  "could not parse end LSN");
601 
602  /* Invoke the callback with the details we've gathered. */
603  context->perwalrange_cb(context, tli, start_lsn, end_lsn);
604 
605  /* Free memory we no longer need. */
606  if (parse->timeline != NULL)
607  {
608  pfree(parse->timeline);
609  parse->timeline = NULL;
610  }
611  if (parse->start_lsn != NULL)
612  {
613  pfree(parse->start_lsn);
614  parse->start_lsn = NULL;
615  }
616  if (parse->end_lsn != NULL)
617  {
618  pfree(parse->end_lsn);
619  parse->end_lsn = NULL;
620  }
621 }
622 
623 /*
624  * Verify that the manifest checksum is correct.
625  *
626  * The last line of the manifest file is excluded from the manifest checksum,
627  * because the last line is expected to contain the checksum that covers
628  * the rest of the file.
629  */
630 static void
632  size_t size)
633 {
634  JsonManifestParseContext *context = parse->context;
635  size_t i;
636  size_t number_of_newlines = 0;
637  size_t ultimate_newline = 0;
638  size_t penultimate_newline = 0;
639  pg_cryptohash_ctx *manifest_ctx;
640  uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
641  uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
642 
643  /* Find the last two newlines in the file. */
644  for (i = 0; i < size; ++i)
645  {
646  if (buffer[i] == '\n')
647  {
648  ++number_of_newlines;
649  penultimate_newline = ultimate_newline;
650  ultimate_newline = i;
651  }
652  }
653 
654  /*
655  * Make sure that the last newline is right at the end, and that there are
656  * at least two lines total. We need this to be true in order for the
657  * following code, which computes the manifest checksum, to work properly.
658  */
659  if (number_of_newlines < 2)
661  "expected at least 2 lines");
662  if (ultimate_newline != size - 1)
664  "last line not newline-terminated");
665 
666  /* Checksum the rest. */
667  manifest_ctx = pg_cryptohash_create(PG_SHA256);
668  if (manifest_ctx == NULL)
669  context->error_cb(context, "out of memory");
670  if (pg_cryptohash_init(manifest_ctx) < 0)
671  context->error_cb(context, "could not initialize checksum of manifest");
672  if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0)
673  context->error_cb(context, "could not update checksum of manifest");
674  if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
675  sizeof(manifest_checksum_actual)) < 0)
676  context->error_cb(context, "could not finalize checksum of manifest");
677 
678  /* Now verify it. */
679  if (parse->manifest_checksum == NULL)
680  context->error_cb(parse->context, "manifest has no checksum");
681  if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
682  !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
684  context->error_cb(context, "invalid manifest checksum: \"%s\"",
685  parse->manifest_checksum);
686  if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
688  context->error_cb(context, "manifest checksum mismatch");
689  pg_cryptohash_free(manifest_ctx);
690 }
691 
692 /*
693  * Report a parse error.
694  *
695  * This is intended to be used for fairly low-level failures that probably
696  * shouldn't occur unless somebody has deliberately constructed a bad manifest,
697  * or unless the server is generating bad manifests due to some bug. msg should
698  * be a short string giving some hint as to what the problem is.
699  */
700 static void
702 {
703  context->error_cb(context, "could not parse backup manifest: %s", msg);
704 }
705 
706 /*
707  * Convert a character which represents a hexadecimal digit to an integer.
708  *
709  * Returns -1 if the character is not a hexadecimal digit.
710  */
711 static int
713 {
714  if (c >= '0' && c <= '9')
715  return c - '0';
716  if (c >= 'a' && c <= 'f')
717  return c - 'a' + 10;
718  if (c >= 'A' && c <= 'F')
719  return c - 'A' + 10;
720 
721  return -1;
722 }
723 
724 /*
725  * Decode a hex string into a byte string, 2 hex chars per byte.
726  *
727  * Returns false if invalid characters are encountered; otherwise true.
728  */
729 static bool
730 hexdecode_string(uint8 *result, char *input, int nbytes)
731 {
732  int i;
733 
734  for (i = 0; i < nbytes; ++i)
735  {
736  int n1 = hexdecode_char(input[i * 2]);
737  int n2 = hexdecode_char(input[i * 2 + 1]);
738 
739  if (n1 < 0 || n2 < 0)
740  return false;
741  result[i] = n1 * 16 + n2;
742  }
743 
744  return true;
745 }
746 
747 /*
748  * Parse an XLogRecPtr expressed using the usual string format.
749  */
750 static bool
752 {
753  uint32 hi;
754  uint32 lo;
755 
756  if (sscanf(input, "%X/%X", &hi, &lo) != 2)
757  return false;
758  *result = ((uint64) hi) << 32 | lo;
759  return true;
760 }
unsigned int uint32
Definition: c.h:495
unsigned char uint8
Definition: c.h:493
bool pg_checksum_parse_type(char *name, pg_checksum_type *type)
pg_checksum_type
@ CHECKSUM_TYPE_NONE
int pg_cryptohash_update(pg_cryptohash_ctx *ctx, const uint8 *data, size_t len)
Definition: cryptohash.c:136
int pg_cryptohash_init(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:100
void pg_cryptohash_free(pg_cryptohash_ctx *ctx)
Definition: cryptohash.c:238
pg_cryptohash_ctx * pg_cryptohash_create(pg_cryptohash_type type)
Definition: cryptohash.c:74
int pg_cryptohash_final(pg_cryptohash_ctx *ctx, uint8 *dest, size_t len)
Definition: cryptohash.c:172
@ PG_SHA256
Definition: cryptohash.h:24
#define token
Definition: indent_globs.h:126
FILE * input
int i
Definition: isn.c:73
JsonLexContext * makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
Definition: jsonapi.c:145
JsonParseErrorType pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:169
JsonParseErrorType
Definition: jsonapi.h:37
@ JSON_SUCCESS
Definition: jsonapi.h:38
JsonTokenType
Definition: jsonapi.h:20
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc(Size size)
Definition: mcxt.c:1226
JsonManifestSemanticState
@ JM_EXPECT_TOPLEVEL_END
@ JM_EXPECT_FILES_START
@ JM_EXPECT_TOPLEVEL_START
@ JM_EXPECT_EOF
@ JM_EXPECT_WAL_RANGES_START
@ JM_EXPECT_FILES_NEXT
@ JM_EXPECT_THIS_FILE_FIELD
@ JM_EXPECT_THIS_FILE_VALUE
@ JM_EXPECT_THIS_WAL_RANGE_VALUE
@ JM_EXPECT_VERSION_VALUE
@ JM_EXPECT_MANIFEST_CHECKSUM_VALUE
@ JM_EXPECT_THIS_WAL_RANGE_FIELD
@ JM_EXPECT_TOPLEVEL_FIELD
@ JM_EXPECT_WAL_RANGES_NEXT
static JsonParseErrorType json_manifest_array_start(void *state)
JsonManifestWALRangeField
@ JMWRF_START_LSN
@ JMWRF_TIMELINE
@ JMWRF_END_LSN
static bool parse_xlogrecptr(XLogRecPtr *result, char *input)
static void json_manifest_finalize_wal_range(JsonManifestParseState *parse)
static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname, bool isnull)
static void json_manifest_finalize_file(JsonManifestParseState *parse)
static JsonParseErrorType json_manifest_object_end(void *state)
static JsonParseErrorType json_manifest_object_start(void *state)
static JsonParseErrorType json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
static void json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
static JsonParseErrorType json_manifest_array_end(void *state)
JsonManifestFileField
@ JMFF_ENCODED_PATH
@ JMFF_PATH
@ JMFF_LAST_MODIFIED
@ JMFF_CHECKSUM_ALGORITHM
@ JMFF_CHECKSUM
@ JMFF_SIZE
void json_parse_manifest(JsonManifestParseContext *context, char *buffer, size_t size)
static int hexdecode_char(char c)
static bool hexdecode_string(uint8 *result, char *input, int nbytes)
static void verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, size_t size)
@ PG_UTF8
Definition: pg_wchar.h:232
char * c
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:23
json_manifest_perwalrange_callback perwalrange_cb
json_manifest_perfile_callback perfile_cb
json_manifest_error_callback error_cb
JsonManifestWALRangeField wal_range_field
JsonManifestParseContext * context
JsonManifestFileField file_field
pg_checksum_type checksum_algorithm
JsonManifestSemanticState state
json_struct_action array_end
Definition: jsonapi.h:118
json_struct_action object_start
Definition: jsonapi.h:115
json_ofield_action object_field_start
Definition: jsonapi.h:119
json_aelem_action array_element_start
Definition: jsonapi.h:121
json_scalar_action scalar
Definition: jsonapi.h:123
void * semstate
Definition: jsonapi.h:114
json_aelem_action array_element_end
Definition: jsonapi.h:122
json_struct_action array_start
Definition: jsonapi.h:117
json_struct_action object_end
Definition: jsonapi.h:116
json_ofield_action object_field_end
Definition: jsonapi.h:120
Definition: regguts.h:323
uint64 XLogRecPtr
Definition: xlogdefs.h:21
uint32 TimeLineID
Definition: xlogdefs.h:59