PostgreSQL Source Code  git master
parse_manifest.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * parse_manifest.c
4  * Parse a backup manifest in JSON format.
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/bin/pg_verifybackup/parse_manifest.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres_fe.h"
15 
16 #include "parse_manifest.h"
17 #include "common/jsonapi.h"
18 
19 /*
20  * Semantic states for JSON manifest parsing.
21  */
22 typedef enum
23 {
39 
40 /*
41  * Possible fields for one file as described by the manifest.
42  */
43 typedef enum
44 {
52 
53 /*
54  * Possible fields for one file as described by the manifest.
55  */
56 typedef enum
57 {
62 
63 /*
64  * Internal state used while decoding the JSON-format backup manifest.
65  */
66 typedef struct
67 {
70 
71  /* These fields are used for parsing objects in the list of files. */
73  char *pathname;
75  char *size;
76  char *algorithm;
78  char *checksum;
79 
80  /* These fields are used for parsing objects in the list of WAL ranges. */
82  char *timeline;
83  char *start_lsn;
84  char *end_lsn;
85 
86  /* Miscellaneous other stuff. */
90 
91 static void json_manifest_object_start(void *state);
92 static void json_manifest_object_end(void *state);
93 static void json_manifest_array_start(void *state);
94 static void json_manifest_array_end(void *state);
95 static void json_manifest_object_field_start(void *state, char *fname,
96  bool isnull);
97 static void json_manifest_scalar(void *state, char *token,
98  JsonTokenType tokentype);
102  char *buffer, size_t size);
104  char *msg);
105 
106 static int hexdecode_char(char c);
107 static bool hexdecode_string(uint8 *result, char *input, int nbytes);
108 static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
109 
110 /*
111  * Main entrypoint to parse a JSON-format backup manifest.
112  *
113  * Caller should set up the parsing context and then invoke this function.
114  * For each file whose information is extracted from the manifest,
115  * context->perfile_cb is invoked. In case of trouble, context->error_cb is
116  * invoked and is expected not to return.
117  */
118 void
120  size_t size)
121 {
122  JsonLexContext *lex;
123  JsonParseErrorType json_error;
124  JsonSemAction sem;
126 
127  /* Set up our private parsing context. */
128  parse.context = context;
130  parse.saw_version_field = false;
131 
132  /* Create a JSON lexing context. */
133  lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
134 
135  /* Set up semantic actions. */
136  sem.semstate = &parse;
142  sem.object_field_end = NULL;
143  sem.array_element_start = NULL;
144  sem.array_element_end = NULL;
146 
147  /* Run the actual JSON parser. */
148  json_error = pg_parse_json(lex, &sem);
149  if (json_error != JSON_SUCCESS)
150  json_manifest_parse_failure(context, json_errdetail(json_error, lex));
151  if (parse.state != JM_EXPECT_EOF)
152  json_manifest_parse_failure(context, "manifest ended unexpectedly");
153 
154  /* Verify the manifest checksum. */
155  verify_manifest_checksum(&parse, buffer, size);
156 }
157 
158 /*
159  * Invoked at the start of each object in the JSON document.
160  *
161  * The document as a whole is expected to be an object; each file and each
162  * WAL range is also expected to be an object. If we're anywhere else in the
163  * document, it's an error.
164  */
165 static void
167 {
168  JsonManifestParseState *parse = state;
169 
170  switch (parse->state)
171  {
174  break;
177  parse->pathname = NULL;
178  parse->encoded_pathname = NULL;
179  parse->size = NULL;
180  parse->algorithm = NULL;
181  parse->checksum = NULL;
182  break;
185  parse->timeline = NULL;
186  parse->start_lsn = NULL;
187  parse->end_lsn = NULL;
188  break;
189  default:
191  "unexpected object start");
192  break;
193  }
194 }
195 
196 /*
197  * Invoked at the end of each object in the JSON document.
198  *
199  * The possible cases here are the same as for json_manifest_object_start.
200  * There's nothing special to do at the end of the document, but when we
201  * reach the end of an object representing a particular file or WAL range,
202  * we must call json_manifest_finalize_file() to save the associated details.
203  */
204 static void
206 {
207  JsonManifestParseState *parse = state;
208 
209  switch (parse->state)
210  {
212  parse->state = JM_EXPECT_EOF;
213  break;
216  parse->state = JM_EXPECT_FILES_NEXT;
217  break;
221  break;
222  default:
224  "unexpected object end");
225  break;
226  }
227 }
228 
229 /*
230  * Invoked at the start of each array in the JSON document.
231  *
232  * Within the toplevel object, the value associated with the "Files" key
233  * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
234  * are expected.
235  */
236 static void
238 {
239  JsonManifestParseState *parse = state;
240 
241  switch (parse->state)
242  {
244  parse->state = JM_EXPECT_FILES_NEXT;
245  break;
248  break;
249  default:
251  "unexpected array start");
252  break;
253  }
254 }
255 
256 /*
257  * Invoked at the end of each array in the JSON document.
258  *
259  * The cases here are analogous to those in json_manifest_array_start.
260  */
261 static void
263 {
264  JsonManifestParseState *parse = state;
265 
266  switch (parse->state)
267  {
271  break;
272  default:
274  "unexpected array end");
275  break;
276  }
277 }
278 
279 /*
280  * Invoked at the start of each object field in the JSON document.
281  */
282 static void
283 json_manifest_object_field_start(void *state, char *fname, bool isnull)
284 {
285  JsonManifestParseState *parse = state;
286 
287  switch (parse->state)
288  {
290 
291  /*
292  * Inside toplevel object. The version indicator should always be
293  * the first field.
294  */
295  if (!parse->saw_version_field)
296  {
297  if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
299  "expected version indicator");
301  parse->saw_version_field = true;
302  break;
303  }
304 
305  /* Is this the list of files? */
306  if (strcmp(fname, "Files") == 0)
307  {
308  parse->state = JM_EXPECT_FILES_START;
309  break;
310  }
311 
312  /* Is this the list of WAL ranges? */
313  if (strcmp(fname, "WAL-Ranges") == 0)
314  {
316  break;
317  }
318 
319  /* Is this the manifest checksum? */
320  if (strcmp(fname, "Manifest-Checksum") == 0)
321  {
323  break;
324  }
325 
326  /* It's not a field we recognize. */
328  "unknown toplevel field");
329  break;
330 
332  /* Inside object for one file; which key have we got? */
333  if (strcmp(fname, "Path") == 0)
334  parse->file_field = JMFF_PATH;
335  else if (strcmp(fname, "Encoded-Path") == 0)
336  parse->file_field = JMFF_ENCODED_PATH;
337  else if (strcmp(fname, "Size") == 0)
338  parse->file_field = JMFF_SIZE;
339  else if (strcmp(fname, "Last-Modified") == 0)
341  else if (strcmp(fname, "Checksum-Algorithm") == 0)
343  else if (strcmp(fname, "Checksum") == 0)
344  parse->file_field = JMFF_CHECKSUM;
345  else
347  "unexpected file field");
349  break;
350 
352  /* Inside object for one file; which key have we got? */
353  if (strcmp(fname, "Timeline") == 0)
355  else if (strcmp(fname, "Start-LSN") == 0)
357  else if (strcmp(fname, "End-LSN") == 0)
359  else
361  "unexpected wal range field");
363  break;
364 
365  default:
367  "unexpected object field");
368  break;
369  }
370 }
371 
372 /*
373  * Invoked at the start of each scalar in the JSON document.
374  *
375  * Object field names don't reach this code; those are handled by
376  * json_manifest_object_field_start. When we're inside of the object for
377  * a particular file or WAL range, that function will have noticed the name
378  * of the field, and we'll get the corresponding value here. When we're in
379  * the toplevel object, the parse state itself tells us which field this is.
380  *
381  * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
382  * can just check on the spot, the goal here is just to save the value in
383  * the parse state for later use. We don't actually do anything until we
384  * reach either the end of the object representing this file, or the end
385  * of the manifest, as the case may be.
386  */
387 static void
388 json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
389 {
390  JsonManifestParseState *parse = state;
391 
392  switch (parse->state)
393  {
395  if (strcmp(token, "1") != 0)
397  "unexpected manifest version");
399  break;
400 
402  switch (parse->file_field)
403  {
404  case JMFF_PATH:
405  parse->pathname = token;
406  break;
407  case JMFF_ENCODED_PATH:
408  parse->encoded_pathname = token;
409  break;
410  case JMFF_SIZE:
411  parse->size = token;
412  break;
413  case JMFF_LAST_MODIFIED:
414  pfree(token); /* unused */
415  break;
417  parse->algorithm = token;
418  break;
419  case JMFF_CHECKSUM:
420  parse->checksum = token;
421  break;
422  }
424  break;
425 
427  switch (parse->wal_range_field)
428  {
429  case JMWRF_TIMELINE:
430  parse->timeline = token;
431  break;
432  case JMWRF_START_LSN:
433  parse->start_lsn = token;
434  break;
435  case JMWRF_END_LSN:
436  parse->end_lsn = token;
437  break;
438  }
440  break;
441 
443  parse->state = JM_EXPECT_TOPLEVEL_END;
444  parse->manifest_checksum = token;
445  break;
446 
447  default:
448  json_manifest_parse_failure(parse->context, "unexpected scalar");
449  break;
450  }
451 }
452 
453 /*
454  * Do additional parsing and sanity-checking of the details gathered for one
455  * file, and invoke the per-file callback so that the caller gets those
456  * details. This happens for each file when the corresponding JSON object is
457  * completely parsed.
458  */
459 static void
461 {
462  JsonManifestParseContext *context = parse->context;
463  size_t size;
464  char *ep;
465  int checksum_string_length;
466  pg_checksum_type checksum_type;
467  int checksum_length;
468  uint8 *checksum_payload;
469 
470  /* Pathname and size are required. */
471  if (parse->pathname == NULL && parse->encoded_pathname == NULL)
472  json_manifest_parse_failure(parse->context, "missing pathname");
473  if (parse->pathname != NULL && parse->encoded_pathname != NULL)
475  "both pathname and encoded pathname");
476  if (parse->size == NULL)
477  json_manifest_parse_failure(parse->context, "missing size");
478  if (parse->algorithm == NULL && parse->checksum != NULL)
480  "checksum without algorithm");
481 
482  /* Decode encoded pathname, if that's what we have. */
483  if (parse->encoded_pathname != NULL)
484  {
485  int encoded_length = strlen(parse->encoded_pathname);
486  int raw_length = encoded_length / 2;
487 
488  parse->pathname = palloc(raw_length + 1);
489  if (encoded_length % 2 != 0 ||
490  !hexdecode_string((uint8 *) parse->pathname,
491  parse->encoded_pathname,
492  raw_length))
494  "unable to decode filename");
495  parse->pathname[raw_length] = '\0';
496  pfree(parse->encoded_pathname);
497  parse->encoded_pathname = NULL;
498  }
499 
500  /* Parse size. */
501  size = strtoul(parse->size, &ep, 10);
502  if (*ep)
504  "file size is not an integer");
505 
506  /* Parse the checksum algorithm, if it's present. */
507  if (parse->algorithm == NULL)
508  checksum_type = CHECKSUM_TYPE_NONE;
509  else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
510  context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
511  parse->algorithm);
512 
513  /* Parse the checksum payload, if it's present. */
514  checksum_string_length = parse->checksum == NULL ? 0
515  : strlen(parse->checksum);
516  if (checksum_string_length == 0)
517  {
518  checksum_length = 0;
519  checksum_payload = NULL;
520  }
521  else
522  {
523  checksum_length = checksum_string_length / 2;
524  checksum_payload = palloc(checksum_length);
525  if (checksum_string_length % 2 != 0 ||
526  !hexdecode_string(checksum_payload, parse->checksum,
527  checksum_length))
528  context->error_cb(context,
529  "invalid checksum for file \"%s\": \"%s\"",
530  parse->pathname, parse->checksum);
531  }
532 
533  /* Invoke the callback with the details we've gathered. */
534  context->perfile_cb(context, parse->pathname, size,
535  checksum_type, checksum_length, checksum_payload);
536 
537  /* Free memory we no longer need. */
538  if (parse->size != NULL)
539  {
540  pfree(parse->size);
541  parse->size = NULL;
542  }
543  if (parse->algorithm != NULL)
544  {
545  pfree(parse->algorithm);
546  parse->algorithm = NULL;
547  }
548  if (parse->checksum != NULL)
549  {
550  pfree(parse->checksum);
551  parse->checksum = NULL;
552  }
553 }
554 
555 /*
556  * Do additional parsing and sanity-checking of the details gathered for one
557  * WAL range, and invoke the per-WAL-range callback so that the caller gets
558  * those details. This happens for each WAL range when the corresponding JSON
559  * object is completely parsed.
560  */
561 static void
563 {
564  JsonManifestParseContext *context = parse->context;
565  TimeLineID tli;
566  XLogRecPtr start_lsn,
567  end_lsn;
568  char *ep;
569 
570  /* Make sure all fields are present. */
571  if (parse->timeline == NULL)
572  json_manifest_parse_failure(parse->context, "missing timeline");
573  if (parse->start_lsn == NULL)
574  json_manifest_parse_failure(parse->context, "missing start LSN");
575  if (parse->end_lsn == NULL)
576  json_manifest_parse_failure(parse->context, "missing end LSN");
577 
578  /* Parse timeline. */
579  tli = strtoul(parse->timeline, &ep, 10);
580  if (*ep)
582  "timeline is not an integer");
583  if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
585  "unable to parse start LSN");
586  if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
588  "unable to parse end LSN");
589 
590  /* Invoke the callback with the details we've gathered. */
591  context->perwalrange_cb(context, tli, start_lsn, end_lsn);
592 
593  /* Free memory we no longer need. */
594  if (parse->timeline != NULL)
595  {
596  pfree(parse->timeline);
597  parse->timeline = NULL;
598  }
599  if (parse->start_lsn != NULL)
600  {
601  pfree(parse->start_lsn);
602  parse->start_lsn = NULL;
603  }
604  if (parse->end_lsn != NULL)
605  {
606  pfree(parse->end_lsn);
607  parse->end_lsn = NULL;
608  }
609 }
610 
611 /*
612  * Verify that the manifest checksum is correct.
613  *
614  * The last line of the manifest file is excluded from the manifest checksum,
615  * because the last line is expected to contain the checksum that covers
616  * the rest of the file.
617  */
618 static void
620  size_t size)
621 {
622  JsonManifestParseContext *context = parse->context;
623  size_t i;
624  size_t number_of_newlines = 0;
625  size_t ultimate_newline = 0;
626  size_t penultimate_newline = 0;
627  pg_sha256_ctx manifest_ctx;
628  uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
629  uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
630 
631  /* Find the last two newlines in the file. */
632  for (i = 0; i < size; ++i)
633  {
634  if (buffer[i] == '\n')
635  {
636  ++number_of_newlines;
637  penultimate_newline = ultimate_newline;
638  ultimate_newline = i;
639  }
640  }
641 
642  /*
643  * Make sure that the last newline is right at the end, and that there are
644  * at least two lines total. We need this to be true in order for the
645  * following code, which computes the manifest checksum, to work properly.
646  */
647  if (number_of_newlines < 2)
649  "expected at least 2 lines");
650  if (ultimate_newline != size - 1)
652  "last line not newline-terminated");
653 
654  /* Checksum the rest. */
655  pg_sha256_init(&manifest_ctx);
656  pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1);
657  pg_sha256_final(&manifest_ctx, manifest_checksum_actual);
658 
659  /* Now verify it. */
660  if (parse->manifest_checksum == NULL)
661  context->error_cb(parse->context, "manifest has no checksum");
662  if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
663  !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
665  context->error_cb(context, "invalid manifest checksum: \"%s\"",
666  parse->manifest_checksum);
667  if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
669  context->error_cb(context, "manifest checksum mismatch");
670 }
671 
672 /*
673  * Report a parse error.
674  *
675  * This is intended to be used for fairly low-level failures that probably
676  * shouldn't occur unless somebody has deliberately constructed a bad manifest,
677  * or unless the server is generating bad manifests due to some bug. msg should
678  * be a short string giving some hint as to what the problem is.
679  */
680 static void
682 {
683  context->error_cb(context, "could not parse backup manifest: %s", msg);
684 }
685 
686 /*
687  * Convert a character which represents a hexadecimal digit to an integer.
688  *
689  * Returns -1 if the character is not a hexadecimal digit.
690  */
691 static int
693 {
694  if (c >= '0' && c <= '9')
695  return c - '0';
696  if (c >= 'a' && c <= 'f')
697  return c - 'a' + 10;
698  if (c >= 'A' && c <= 'F')
699  return c - 'A' + 10;
700 
701  return -1;
702 }
703 
704 /*
705  * Decode a hex string into a byte string, 2 hex chars per byte.
706  *
707  * Returns false if invalid characters are encountered; otherwise true.
708  */
709 static bool
710 hexdecode_string(uint8 *result, char *input, int nbytes)
711 {
712  int i;
713 
714  for (i = 0; i < nbytes; ++i)
715  {
716  int n1 = hexdecode_char(input[i * 2]);
717  int n2 = hexdecode_char(input[i * 2 + 1]);
718 
719  if (n1 < 0 || n2 < 0)
720  return false;
721  result[i] = n1 * 16 + n2;
722  }
723 
724  return true;
725 }
726 
727 /*
728  * Parse an XLogRecPtr expressed using the usual string format.
729  */
730 static bool
731 parse_xlogrecptr(XLogRecPtr *result, char *input)
732 {
733  uint32 hi;
734  uint32 lo;
735 
736  if (sscanf(input, "%X/%X", &hi, &lo) != 2)
737  return false;
738  *result = ((uint64) hi) << 32 | lo;
739  return true;
740 }
json_struct_action array_end
Definition: jsonapi.h:110
static void json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
uint32 TimeLineID
Definition: xlogdefs.h:52
json_struct_action object_end
Definition: jsonapi.h:108
json_manifest_error_callback error_cb
JsonManifestParseContext * context
json_struct_action object_start
Definition: jsonapi.h:107
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:1056
#define PG_SHA256_DIGEST_LENGTH
Definition: sha2.h:62
unsigned char uint8
Definition: c.h:365
void json_parse_manifest(JsonManifestParseContext *context, char *buffer, size_t size)
json_scalar_action scalar
Definition: jsonapi.h:115
bool pg_checksum_parse_type(char *name, pg_checksum_type *type)
void pg_sha256_init(pg_sha256_ctx *context)
Definition: sha2.c:268
JsonManifestFileField file_field
void pfree(void *pointer)
Definition: mcxt.c:1056
pg_checksum_type checksum_algorithm
static void verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, size_t size)
JsonLexContext * makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
Definition: jsonapi.c:155
char * c
static void json_manifest_finalize_wal_range(JsonManifestParseState *parse)
JsonManifestFileField
static void json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
unsigned int uint32
Definition: c.h:367
JsonManifestSemanticState state
pg_checksum_type
JsonParseErrorType pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:179
json_ofield_action object_field_end
Definition: jsonapi.h:112
JsonManifestSemanticState
static void json_manifest_object_start(void *state)
void pg_sha256_update(pg_sha256_ctx *context, const uint8 *data, size_t len)
Definition: sha2.c:465
static bool hexdecode_string(uint8 *result, char *input, int nbytes)
static void json_manifest_array_start(void *state)
JsonManifestWALRangeField wal_range_field
JsonParseErrorType
Definition: jsonapi.h:36
static int hexdecode_char(char c)
json_aelem_action array_element_start
Definition: jsonapi.h:113
JsonManifestWALRangeField
static bool parse_xlogrecptr(XLogRecPtr *result, char *input)
uint64 XLogRecPtr
Definition: xlogdefs.h:21
json_struct_action array_start
Definition: jsonapi.h:109
Definition: regguts.h:298
void * palloc(Size size)
Definition: mcxt.c:949
void pg_sha256_final(pg_sha256_ctx *context, uint8 *digest)
Definition: sha2.c:566
json_manifest_perwalrange_callback perwalrange_cb
int i
static void json_manifest_object_field_start(void *state, char *fname, bool isnull)
json_ofield_action object_field_start
Definition: jsonapi.h:111
static void json_manifest_object_end(void *state)
static void json_manifest_finalize_file(JsonManifestParseState *parse)
void * semstate
Definition: jsonapi.h:106
static void json_manifest_array_end(void *state)
json_aelem_action array_element_end
Definition: jsonapi.h:114
json_manifest_perfile_callback perfile_cb
static struct subre * parse(struct vars *, int, int, struct state *, struct state *)
Definition: regcomp.c:648
JsonTokenType
Definition: jsonapi.h:19