PostgreSQL Source Code  git master
jsonapi.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * jsonapi.c
4  * JSON parser and lexer interfaces
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/common/jsonapi.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef FRONTEND
15 #include "postgres.h"
16 #else
17 #include "postgres_fe.h"
18 #endif
19 
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 
23 #ifndef FRONTEND
24 #include "miscadmin.h"
25 #endif
26 
27 /*
28  * The context of the parser is maintained by the recursive descent
29  * mechanism, but is passed explicitly to the error reporting routine
30  * for better diagnostics.
31  */
32 typedef enum /* contexts of JSON parser */
33 {
34  JSON_PARSE_VALUE, /* expecting a value */
35  JSON_PARSE_STRING, /* expecting a string (for a field name) */
36  JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
37  JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
38  JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
39  JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
40  JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
41  JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
42  JSON_PARSE_END /* saw the end of a document, expect nothing */
44 
46 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
47  bool *num_err, int *total_len);
54 
55 /* the null action object used for pure validation */
57 {
58  NULL, NULL, NULL, NULL, NULL,
59  NULL, NULL, NULL, NULL, NULL
60 };
61 
62 /* Recursive Descent parser support routines */
63 
64 /*
65  * lex_peek
66  *
67  * what is the current look_ahead token?
68 */
69 static inline JsonTokenType
71 {
72  return lex->token_type;
73 }
74 
75 /*
76  * lex_expect
77  *
78  * move the lexer to the next token if the current look_ahead token matches
79  * the parameter token. Otherwise, report an error.
80  */
81 static inline JsonParseErrorType
83 {
84  if (lex_peek(lex) == token)
85  return json_lex(lex);
86  else
87  return report_parse_error(ctx, lex);
88 }
89 
90 /* chars to consider as part of an alphanumeric token */
91 #define JSON_ALPHANUMERIC_CHAR(c) \
92  (((c) >= 'a' && (c) <= 'z') || \
93  ((c) >= 'A' && (c) <= 'Z') || \
94  ((c) >= '0' && (c) <= '9') || \
95  (c) == '_' || \
96  IS_HIGHBIT_SET(c))
97 
98 /*
99  * Utility function to check if a string is a valid JSON number.
100  *
101  * str is of length len, and need not be null-terminated.
102  */
103 bool
104 IsValidJsonNumber(const char *str, int len)
105 {
106  bool numeric_error;
107  int total_len;
108  JsonLexContext dummy_lex;
109 
110  if (len <= 0)
111  return false;
112 
113  /*
114  * json_lex_number expects a leading '-' to have been eaten already.
115  *
116  * having to cast away the constness of str is ugly, but there's not much
117  * easy alternative.
118  */
119  if (*str == '-')
120  {
121  dummy_lex.input = unconstify(char *, str) + 1;
122  dummy_lex.input_length = len - 1;
123  }
124  else
125  {
126  dummy_lex.input = unconstify(char *, str);
127  dummy_lex.input_length = len;
128  }
129 
130  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
131 
132  return (!numeric_error) && (total_len == dummy_lex.input_length);
133 }
134 
135 /*
136  * makeJsonLexContextCstringLen
137  *
138  * lex constructor, with or without StringInfo object for de-escaped lexemes.
139  *
140  * Without is better as it makes the processing faster, so only make one
141  * if really required.
142  */
144 makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
145 {
146  JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
147 
148  lex->input = lex->token_terminator = lex->line_start = json;
149  lex->line_number = 1;
150  lex->input_length = len;
151  lex->input_encoding = encoding;
152  if (need_escapes)
153  lex->strval = makeStringInfo();
154  return lex;
155 }
156 
157 /*
158  * pg_parse_json
159  *
160  * Publicly visible entry point for the JSON parser.
161  *
162  * lex is a lexing context, set up for the json to be processed by calling
163  * makeJsonLexContext(). sem is a structure of function pointers to semantic
164  * action routines to be called at appropriate spots during parsing, and a
165  * pointer to a state object to be passed to those routines.
166  */
169 {
170  JsonTokenType tok;
171  JsonParseErrorType result;
172 
173  /* get the initial token */
174  result = json_lex(lex);
175  if (result != JSON_SUCCESS)
176  return result;
177 
178  tok = lex_peek(lex);
179 
180  /* parse by recursive descent */
181  switch (tok)
182  {
184  result = parse_object(lex, sem);
185  break;
187  result = parse_array(lex, sem);
188  break;
189  default:
190  result = parse_scalar(lex, sem); /* json can be a bare scalar */
191  }
192 
193  if (result == JSON_SUCCESS)
194  result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
195 
196  return result;
197 }
198 
199 /*
200  * json_count_array_elements
201  *
202  * Returns number of array elements in lex context at start of array token
203  * until end of array token at same nesting level.
204  *
205  * Designed to be called from array_start routines.
206  */
209 {
210  JsonLexContext copylex;
211  int count;
212  JsonParseErrorType result;
213 
214  /*
215  * It's safe to do this with a shallow copy because the lexical routines
216  * don't scribble on the input. They do scribble on the other pointers
217  * etc, so doing this with a copy makes that safe.
218  */
219  memcpy(&copylex, lex, sizeof(JsonLexContext));
220  copylex.strval = NULL; /* not interested in values here */
221  copylex.lex_level++;
222 
223  count = 0;
224  result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
226  if (result != JSON_SUCCESS)
227  return result;
228  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
229  {
230  while (1)
231  {
232  count++;
233  result = parse_array_element(&copylex, &nullSemAction);
234  if (result != JSON_SUCCESS)
235  return result;
236  if (copylex.token_type != JSON_TOKEN_COMMA)
237  break;
238  result = json_lex(&copylex);
239  if (result != JSON_SUCCESS)
240  return result;
241  }
242  }
243  result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
245  if (result != JSON_SUCCESS)
246  return result;
247 
248  *elements = count;
249  return JSON_SUCCESS;
250 }
251 
252 /*
253  * Recursive Descent parse routines. There is one for each structural
254  * element in a json document:
255  * - scalar (string, number, true, false, null)
256  * - array ( [ ] )
257  * - array element
258  * - object ( { } )
259  * - object field
260  */
261 static inline JsonParseErrorType
263 {
264  char *val = NULL;
265  json_scalar_action sfunc = sem->scalar;
266  JsonTokenType tok = lex_peek(lex);
267  JsonParseErrorType result;
268 
269  /* a scalar must be a string, a number, true, false, or null */
270  if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
271  tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
272  tok != JSON_TOKEN_NULL)
274 
275  /* if no semantic function, just consume the token */
276  if (sfunc == NULL)
277  return json_lex(lex);
278 
279  /* extract the de-escaped string value, or the raw lexeme */
280  if (lex_peek(lex) == JSON_TOKEN_STRING)
281  {
282  if (lex->strval != NULL)
283  val = pstrdup(lex->strval->data);
284  }
285  else
286  {
287  int len = (lex->token_terminator - lex->token_start);
288 
289  val = palloc(len + 1);
290  memcpy(val, lex->token_start, len);
291  val[len] = '\0';
292  }
293 
294  /* consume the token */
295  result = json_lex(lex);
296  if (result != JSON_SUCCESS)
297  return result;
298 
299  /* invoke the callback */
300  (*sfunc) (sem->semstate, val, tok);
301 
302  return JSON_SUCCESS;
303 }
304 
305 static JsonParseErrorType
307 {
308  /*
309  * An object field is "fieldname" : value where value can be a scalar,
310  * object or array. Note: in user-facing docs and error messages, we
311  * generally call a field name a "key".
312  */
313 
314  char *fname = NULL; /* keep compiler quiet */
317  bool isnull;
318  JsonTokenType tok;
319  JsonParseErrorType result;
320 
321  if (lex_peek(lex) != JSON_TOKEN_STRING)
323  if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
324  fname = pstrdup(lex->strval->data);
325  result = json_lex(lex);
326  if (result != JSON_SUCCESS)
327  return result;
328 
330  if (result != JSON_SUCCESS)
331  return result;
332 
333  tok = lex_peek(lex);
334  isnull = tok == JSON_TOKEN_NULL;
335 
336  if (ostart != NULL)
337  (*ostart) (sem->semstate, fname, isnull);
338 
339  switch (tok)
340  {
342  result = parse_object(lex, sem);
343  break;
345  result = parse_array(lex, sem);
346  break;
347  default:
348  result = parse_scalar(lex, sem);
349  }
350  if (result != JSON_SUCCESS)
351  return result;
352 
353  if (oend != NULL)
354  (*oend) (sem->semstate, fname, isnull);
355  return JSON_SUCCESS;
356 }
357 
358 static JsonParseErrorType
360 {
361  /*
362  * an object is a possibly empty sequence of object fields, separated by
363  * commas and surrounded by curly braces.
364  */
365  json_struct_action ostart = sem->object_start;
366  json_struct_action oend = sem->object_end;
367  JsonTokenType tok;
368  JsonParseErrorType result;
369 
370 #ifndef FRONTEND
372 #endif
373 
374  if (ostart != NULL)
375  (*ostart) (sem->semstate);
376 
377  /*
378  * Data inside an object is at a higher nesting level than the object
379  * itself. Note that we increment this after we call the semantic routine
380  * for the object start and restore it before we call the routine for the
381  * object end.
382  */
383  lex->lex_level++;
384 
386  result = json_lex(lex);
387  if (result != JSON_SUCCESS)
388  return result;
389 
390  tok = lex_peek(lex);
391  switch (tok)
392  {
393  case JSON_TOKEN_STRING:
394  result = parse_object_field(lex, sem);
395  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
396  {
397  result = json_lex(lex);
398  if (result != JSON_SUCCESS)
399  break;
400  result = parse_object_field(lex, sem);
401  }
402  break;
404  break;
405  default:
406  /* case of an invalid initial token inside the object */
408  }
409  if (result != JSON_SUCCESS)
410  return result;
411 
413  if (result != JSON_SUCCESS)
414  return result;
415 
416  lex->lex_level--;
417 
418  if (oend != NULL)
419  (*oend) (sem->semstate);
420 
421  return JSON_SUCCESS;
422 }
423 
424 static JsonParseErrorType
426 {
429  JsonTokenType tok = lex_peek(lex);
430  JsonParseErrorType result;
431 
432  bool isnull;
433 
434  isnull = tok == JSON_TOKEN_NULL;
435 
436  if (astart != NULL)
437  (*astart) (sem->semstate, isnull);
438 
439  /* an array element is any object, array or scalar */
440  switch (tok)
441  {
443  result = parse_object(lex, sem);
444  break;
446  result = parse_array(lex, sem);
447  break;
448  default:
449  result = parse_scalar(lex, sem);
450  }
451 
452  if (result != JSON_SUCCESS)
453  return result;
454 
455  if (aend != NULL)
456  (*aend) (sem->semstate, isnull);
457 
458  return JSON_SUCCESS;
459 }
460 
461 static JsonParseErrorType
463 {
464  /*
465  * an array is a possibly empty sequence of array elements, separated by
466  * commas and surrounded by square brackets.
467  */
468  json_struct_action astart = sem->array_start;
469  json_struct_action aend = sem->array_end;
470  JsonParseErrorType result;
471 
472 #ifndef FRONTEND
474 #endif
475 
476  if (astart != NULL)
477  (*astart) (sem->semstate);
478 
479  /*
480  * Data inside an array is at a higher nesting level than the array
481  * itself. Note that we increment this after we call the semantic routine
482  * for the array start and restore it before we call the routine for the
483  * array end.
484  */
485  lex->lex_level++;
486 
488  if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
489  {
490  result = parse_array_element(lex, sem);
491 
492  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
493  {
494  result = json_lex(lex);
495  if (result != JSON_SUCCESS)
496  break;
497  result = parse_array_element(lex, sem);
498  }
499  }
500  if (result != JSON_SUCCESS)
501  return result;
502 
504  if (result != JSON_SUCCESS)
505  return result;
506 
507  lex->lex_level--;
508 
509  if (aend != NULL)
510  (*aend) (sem->semstate);
511 
512  return JSON_SUCCESS;
513 }
514 
515 /*
516  * Lex one token from the input stream.
517  */
520 {
521  char *s;
522  int len;
523  JsonParseErrorType result;
524 
525  /* Skip leading whitespace. */
526  s = lex->token_terminator;
527  len = s - lex->input;
528  while (len < lex->input_length &&
529  (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
530  {
531  if (*s++ == '\n')
532  {
533  ++lex->line_number;
534  lex->line_start = s;
535  }
536  len++;
537  }
538  lex->token_start = s;
539 
540  /* Determine token type. */
541  if (len >= lex->input_length)
542  {
543  lex->token_start = NULL;
545  lex->token_terminator = s;
546  lex->token_type = JSON_TOKEN_END;
547  }
548  else
549  {
550  switch (*s)
551  {
552  /* Single-character token, some kind of punctuation mark. */
553  case '{':
555  lex->token_terminator = s + 1;
557  break;
558  case '}':
560  lex->token_terminator = s + 1;
562  break;
563  case '[':
565  lex->token_terminator = s + 1;
567  break;
568  case ']':
570  lex->token_terminator = s + 1;
572  break;
573  case ',':
575  lex->token_terminator = s + 1;
577  break;
578  case ':':
580  lex->token_terminator = s + 1;
582  break;
583  case '"':
584  /* string */
585  result = json_lex_string(lex);
586  if (result != JSON_SUCCESS)
587  return result;
589  break;
590  case '-':
591  /* Negative number. */
592  result = json_lex_number(lex, s + 1, NULL, NULL);
593  if (result != JSON_SUCCESS)
594  return result;
596  break;
597  case '0':
598  case '1':
599  case '2':
600  case '3':
601  case '4':
602  case '5':
603  case '6':
604  case '7':
605  case '8':
606  case '9':
607  /* Positive number. */
608  result = json_lex_number(lex, s, NULL, NULL);
609  if (result != JSON_SUCCESS)
610  return result;
612  break;
613  default:
614  {
615  char *p;
616 
617  /*
618  * We're not dealing with a string, number, legal
619  * punctuation mark, or end of string. The only legal
620  * tokens we might find here are true, false, and null,
621  * but for error reporting purposes we scan until we see a
622  * non-alphanumeric character. That way, we can report
623  * the whole word as an unexpected token, rather than just
624  * some unintuitive prefix thereof.
625  */
626  for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
627  /* skip */ ;
628 
629  /*
630  * We got some sort of unexpected punctuation or an
631  * otherwise unexpected character, so just complain about
632  * that one character.
633  */
634  if (p == s)
635  {
637  lex->token_terminator = s + 1;
638  return JSON_INVALID_TOKEN;
639  }
640 
641  /*
642  * We've got a real alphanumeric token here. If it
643  * happens to be true, false, or null, all is well. If
644  * not, error out.
645  */
647  lex->token_terminator = p;
648  if (p - s == 4)
649  {
650  if (memcmp(s, "true", 4) == 0)
652  else if (memcmp(s, "null", 4) == 0)
654  else
655  return JSON_INVALID_TOKEN;
656  }
657  else if (p - s == 5 && memcmp(s, "false", 5) == 0)
659  else
660  return JSON_INVALID_TOKEN;
661  }
662  } /* end of switch */
663  }
664 
665  return JSON_SUCCESS;
666 }
667 
668 /*
669  * The next token in the input stream is known to be a string; lex it.
670  */
671 static inline JsonParseErrorType
673 {
674  char *s;
675  int len;
676  int hi_surrogate = -1;
677 
678  if (lex->strval != NULL)
679  resetStringInfo(lex->strval);
680 
681  Assert(lex->input_length > 0);
682  s = lex->token_start;
683  len = lex->token_start - lex->input;
684  for (;;)
685  {
686  s++;
687  len++;
688  /* Premature end of the string. */
689  if (len >= lex->input_length)
690  {
691  lex->token_terminator = s;
692  return JSON_INVALID_TOKEN;
693  }
694  else if (*s == '"')
695  break;
696  else if ((unsigned char) *s < 32)
697  {
698  /* Per RFC4627, these characters MUST be escaped. */
699  /* Since *s isn't printable, exclude it from the context string */
700  lex->token_terminator = s;
701  return JSON_ESCAPING_REQUIRED;
702  }
703  else if (*s == '\\')
704  {
705  /* OK, we have an escape character. */
706  s++;
707  len++;
708  if (len >= lex->input_length)
709  {
710  lex->token_terminator = s;
711  return JSON_INVALID_TOKEN;
712  }
713  else if (*s == 'u')
714  {
715  int i;
716  int ch = 0;
717 
718  for (i = 1; i <= 4; i++)
719  {
720  s++;
721  len++;
722  if (len >= lex->input_length)
723  {
724  lex->token_terminator = s;
725  return JSON_INVALID_TOKEN;
726  }
727  else if (*s >= '0' && *s <= '9')
728  ch = (ch * 16) + (*s - '0');
729  else if (*s >= 'a' && *s <= 'f')
730  ch = (ch * 16) + (*s - 'a') + 10;
731  else if (*s >= 'A' && *s <= 'F')
732  ch = (ch * 16) + (*s - 'A') + 10;
733  else
734  {
737  }
738  }
739  if (lex->strval != NULL)
740  {
741  /*
742  * Combine surrogate pairs.
743  */
744  if (is_utf16_surrogate_first(ch))
745  {
746  if (hi_surrogate != -1)
748  hi_surrogate = ch;
749  continue;
750  }
751  else if (is_utf16_surrogate_second(ch))
752  {
753  if (hi_surrogate == -1)
755  ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
756  hi_surrogate = -1;
757  }
758 
759  if (hi_surrogate != -1)
761 
762  /*
763  * Reject invalid cases. We can't have a value above
764  * 0xFFFF here (since we only accepted 4 hex digits
765  * above), so no need to test for out-of-range chars.
766  */
767  if (ch == 0)
768  {
769  /* We can't allow this, since our TEXT type doesn't */
771  }
772 
773  /*
774  * Add the represented character to lex->strval. In the
775  * backend, we can let pg_unicode_to_server() handle any
776  * required character set conversion; in frontend, we can
777  * only deal with trivial conversions.
778  *
779  * Note: pg_unicode_to_server() will throw an error for a
780  * conversion failure, rather than returning a failure
781  * indication. That seems OK.
782  */
783 #ifndef FRONTEND
784  {
785  char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
786 
787  pg_unicode_to_server(ch, (unsigned char *) cbuf);
788  appendStringInfoString(lex->strval, cbuf);
789  }
790 #else
791  if (lex->input_encoding == PG_UTF8)
792  {
793  /* OK, we can map the code point to UTF8 easily */
794  char utf8str[5];
795  int utf8len;
796 
797  unicode_to_utf8(ch, (unsigned char *) utf8str);
798  utf8len = pg_utf_mblen((unsigned char *) utf8str);
799  appendBinaryStringInfo(lex->strval, utf8str, utf8len);
800  }
801  else if (ch <= 0x007f)
802  {
803  /* The ASCII range is the same in all encodings */
804  appendStringInfoChar(lex->strval, (char) ch);
805  }
806  else
808 #endif /* FRONTEND */
809  }
810  }
811  else if (lex->strval != NULL)
812  {
813  if (hi_surrogate != -1)
815 
816  switch (*s)
817  {
818  case '"':
819  case '\\':
820  case '/':
821  appendStringInfoChar(lex->strval, *s);
822  break;
823  case 'b':
824  appendStringInfoChar(lex->strval, '\b');
825  break;
826  case 'f':
827  appendStringInfoChar(lex->strval, '\f');
828  break;
829  case 'n':
830  appendStringInfoChar(lex->strval, '\n');
831  break;
832  case 'r':
833  appendStringInfoChar(lex->strval, '\r');
834  break;
835  case 't':
836  appendStringInfoChar(lex->strval, '\t');
837  break;
838  default:
839  /* Not a valid string escape, so signal error. */
840  lex->token_start = s;
842  return JSON_ESCAPING_INVALID;
843  }
844  }
845  else if (strchr("\"\\/bfnrt", *s) == NULL)
846  {
847  /*
848  * Simpler processing if we're not bothered about de-escaping
849  *
850  * It's very tempting to remove the strchr() call here and
851  * replace it with a switch statement, but testing so far has
852  * shown it's not a performance win.
853  */
854  lex->token_start = s;
856  return JSON_ESCAPING_INVALID;
857  }
858  }
859  else if (lex->strval != NULL)
860  {
861  if (hi_surrogate != -1)
863 
864  appendStringInfoChar(lex->strval, *s);
865  }
866  }
867 
868  if (hi_surrogate != -1)
870 
871  /* Hooray, we found the end of the string! */
873  lex->token_terminator = s + 1;
874  return JSON_SUCCESS;
875 }
876 
877 /*
878  * The next token in the input stream is known to be a number; lex it.
879  *
880  * In JSON, a number consists of four parts:
881  *
882  * (1) An optional minus sign ('-').
883  *
884  * (2) Either a single '0', or a string of one or more digits that does not
885  * begin with a '0'.
886  *
887  * (3) An optional decimal part, consisting of a period ('.') followed by
888  * one or more digits. (Note: While this part can be omitted
889  * completely, it's not OK to have only the decimal point without
890  * any digits afterwards.)
891  *
892  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
893  * followed by '+' or '-', followed by one or more digits. (Note:
894  * As with the decimal part, if 'e' or 'E' is present, it must be
895  * followed by at least one digit.)
896  *
897  * The 's' argument to this function points to the ostensible beginning
898  * of part 2 - i.e. the character after any optional minus sign, or the
899  * first character of the string if there is none.
900  *
901  * If num_err is not NULL, we return an error flag to *num_err rather than
902  * raising an error for a badly-formed number. Also, if total_len is not NULL
903  * the distance from lex->input to the token end+1 is returned to *total_len.
904  */
905 static inline JsonParseErrorType
907  bool *num_err, int *total_len)
908 {
909  bool error = false;
910  int len = s - lex->input;
911 
912  /* Part (1): leading sign indicator. */
913  /* Caller already did this for us; so do nothing. */
914 
915  /* Part (2): parse main digit string. */
916  if (len < lex->input_length && *s == '0')
917  {
918  s++;
919  len++;
920  }
921  else if (len < lex->input_length && *s >= '1' && *s <= '9')
922  {
923  do
924  {
925  s++;
926  len++;
927  } while (len < lex->input_length && *s >= '0' && *s <= '9');
928  }
929  else
930  error = true;
931 
932  /* Part (3): parse optional decimal portion. */
933  if (len < lex->input_length && *s == '.')
934  {
935  s++;
936  len++;
937  if (len == lex->input_length || *s < '0' || *s > '9')
938  error = true;
939  else
940  {
941  do
942  {
943  s++;
944  len++;
945  } while (len < lex->input_length && *s >= '0' && *s <= '9');
946  }
947  }
948 
949  /* Part (4): parse optional exponent. */
950  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
951  {
952  s++;
953  len++;
954  if (len < lex->input_length && (*s == '+' || *s == '-'))
955  {
956  s++;
957  len++;
958  }
959  if (len == lex->input_length || *s < '0' || *s > '9')
960  error = true;
961  else
962  {
963  do
964  {
965  s++;
966  len++;
967  } while (len < lex->input_length && *s >= '0' && *s <= '9');
968  }
969  }
970 
971  /*
972  * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
973  * here should be considered part of the token for error-reporting
974  * purposes.
975  */
976  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
977  error = true;
978 
979  if (total_len != NULL)
980  *total_len = len;
981 
982  if (num_err != NULL)
983  {
984  /* let the caller handle any error */
985  *num_err = error;
986  }
987  else
988  {
989  /* return token endpoint */
991  lex->token_terminator = s;
992  /* handle error if any */
993  if (error)
994  return JSON_INVALID_TOKEN;
995  }
996 
997  return JSON_SUCCESS;
998 }
999 
1000 /*
1001  * Report a parse error.
1002  *
1003  * lex->token_start and lex->token_terminator must identify the current token.
1004  */
1005 static JsonParseErrorType
1007 {
1008  /* Handle case where the input ended prematurely. */
1009  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
1010  return JSON_EXPECTED_MORE;
1011 
1012  /* Otherwise choose the error type based on the parsing context. */
1013  switch (ctx)
1014  {
1015  case JSON_PARSE_END:
1016  return JSON_EXPECTED_END;
1017  case JSON_PARSE_VALUE:
1018  return JSON_EXPECTED_JSON;
1019  case JSON_PARSE_STRING:
1020  return JSON_EXPECTED_STRING;
1023  case JSON_PARSE_ARRAY_NEXT:
1024  return JSON_EXPECTED_ARRAY_NEXT;
1028  return JSON_EXPECTED_COLON;
1032  return JSON_EXPECTED_STRING;
1033  }
1034 
1035  /*
1036  * We don't use a default: case, so that the compiler will warn about
1037  * unhandled enum values.
1038  */
1039  Assert(false);
1040  return JSON_SUCCESS; /* silence stupider compilers */
1041 }
1042 
1043 
1044 #ifndef FRONTEND
1045 /*
1046  * Extract the current token from a lexing context, for error reporting.
1047  */
1048 static char *
1050 {
1051  int toklen = lex->token_terminator - lex->token_start;
1052  char *token = palloc(toklen + 1);
1053 
1054  memcpy(token, lex->token_start, toklen);
1055  token[toklen] = '\0';
1056  return token;
1057 }
1058 
1059 /*
1060  * Construct a detail message for a JSON error.
1061  *
1062  * Note that the error message generated by this routine may not be
1063  * palloc'd, making it unsafe for frontend code as there is no way to
1064  * know if this can be safery pfree'd or not.
1065  */
1066 char *
1068 {
1069  switch (error)
1070  {
1071  case JSON_SUCCESS:
1072  /* fall through to the error code after switch */
1073  break;
1074  case JSON_ESCAPING_INVALID:
1075  return psprintf(_("Escape sequence \"\\%s\" is invalid."),
1076  extract_token(lex));
1078  return psprintf(_("Character with value 0x%02x must be escaped."),
1079  (unsigned char) *(lex->token_terminator));
1080  case JSON_EXPECTED_END:
1081  return psprintf(_("Expected end of input, but found \"%s\"."),
1082  extract_token(lex));
1084  return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
1085  extract_token(lex));
1087  return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
1088  extract_token(lex));
1089  case JSON_EXPECTED_COLON:
1090  return psprintf(_("Expected \":\", but found \"%s\"."),
1091  extract_token(lex));
1092  case JSON_EXPECTED_JSON:
1093  return psprintf(_("Expected JSON value, but found \"%s\"."),
1094  extract_token(lex));
1095  case JSON_EXPECTED_MORE:
1096  return _("The input string ended unexpectedly.");
1098  return psprintf(_("Expected string or \"}\", but found \"%s\"."),
1099  extract_token(lex));
1101  return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
1102  extract_token(lex));
1103  case JSON_EXPECTED_STRING:
1104  return psprintf(_("Expected string, but found \"%s\"."),
1105  extract_token(lex));
1106  case JSON_INVALID_TOKEN:
1107  return psprintf(_("Token \"%s\" is invalid."),
1108  extract_token(lex));
1110  return _("\\u0000 cannot be converted to text.");
1112  return _("\"\\u\" must be followed by four hexadecimal digits.");
1114  /* note: this case is only reachable in frontend not backend */
1115  return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
1117  return _("Unicode high surrogate must not follow a high surrogate.");
1119  return _("Unicode low surrogate must follow a high surrogate.");
1120  }
1121 
1122  /*
1123  * We don't use a default: case, so that the compiler will warn about
1124  * unhandled enum values. But this needs to be here anyway to cover the
1125  * possibility of an incorrect input.
1126  */
1127  elog(ERROR, "unexpected json parse error type: %d", (int) error);
1128  return NULL;
1129 }
1130 #endif
#define unconstify(underlying_type, expr)
Definition: c.h:1240
#define _(x)
Definition: elog.c:89
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
long val
Definition: informix.c:664
int i
Definition: isn.c:73
JsonParseContext
Definition: jsonapi.c:33
@ JSON_PARSE_OBJECT_LABEL
Definition: jsonapi.c:39
@ JSON_PARSE_VALUE
Definition: jsonapi.c:34
@ JSON_PARSE_OBJECT_START
Definition: jsonapi.c:38
@ JSON_PARSE_ARRAY_START
Definition: jsonapi.c:36
@ JSON_PARSE_END
Definition: jsonapi.c:42
@ JSON_PARSE_OBJECT_NEXT
Definition: jsonapi.c:40
@ JSON_PARSE_ARRAY_NEXT
Definition: jsonapi.c:37
@ JSON_PARSE_OBJECT_COMMA
Definition: jsonapi.c:41
@ JSON_PARSE_STRING
Definition: jsonapi.c:35
static JsonParseErrorType json_lex_string(JsonLexContext *lex)
Definition: jsonapi.c:672
#define JSON_ALPHANUMERIC_CHAR(c)
Definition: jsonapi.c:91
static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:306
JsonLexContext * makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
Definition: jsonapi.c:144
static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:425
static char * extract_token(JsonLexContext *lex)
Definition: jsonapi.c:1049
static JsonTokenType lex_peek(JsonLexContext *lex)
Definition: jsonapi.c:70
static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:359
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:1067
bool IsValidJsonNumber(const char *str, int len)
Definition: jsonapi.c:104
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
Definition: jsonapi.c:1006
JsonSemAction nullSemAction
Definition: jsonapi.c:56
static JsonParseErrorType lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
Definition: jsonapi.c:82
JsonParseErrorType pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:168
static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:462
static JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:262
JsonParseErrorType json_lex(JsonLexContext *lex)
Definition: jsonapi.c:519
JsonParseErrorType json_count_array_elements(JsonLexContext *lex, int *elements)
Definition: jsonapi.c:208
static JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s, bool *num_err, int *total_len)
Definition: jsonapi.c:906
void(* json_struct_action)(void *state)
Definition: jsonapi.h:87
JsonParseErrorType
Definition: jsonapi.h:37
@ JSON_EXPECTED_ARRAY_FIRST
Definition: jsonapi.h:41
@ JSON_EXPECTED_MORE
Definition: jsonapi.h:46
@ JSON_UNICODE_HIGH_SURROGATE
Definition: jsonapi.h:54
@ JSON_EXPECTED_COLON
Definition: jsonapi.h:43
@ JSON_EXPECTED_OBJECT_FIRST
Definition: jsonapi.h:47
@ JSON_UNICODE_CODE_POINT_ZERO
Definition: jsonapi.h:51
@ JSON_EXPECTED_STRING
Definition: jsonapi.h:49
@ JSON_UNICODE_ESCAPE_FORMAT
Definition: jsonapi.h:52
@ JSON_SUCCESS
Definition: jsonapi.h:38
@ JSON_EXPECTED_OBJECT_NEXT
Definition: jsonapi.h:48
@ JSON_ESCAPING_REQUIRED
Definition: jsonapi.h:40
@ JSON_EXPECTED_JSON
Definition: jsonapi.h:45
@ JSON_INVALID_TOKEN
Definition: jsonapi.h:50
@ JSON_ESCAPING_INVALID
Definition: jsonapi.h:39
@ JSON_EXPECTED_END
Definition: jsonapi.h:44
@ JSON_EXPECTED_ARRAY_NEXT
Definition: jsonapi.h:42
@ JSON_UNICODE_HIGH_ESCAPE
Definition: jsonapi.h:53
@ JSON_UNICODE_LOW_SURROGATE
Definition: jsonapi.h:55
void(* json_ofield_action)(void *state, char *fname, bool isnull)
Definition: jsonapi.h:88
JsonTokenType
Definition: jsonapi.h:20
@ JSON_TOKEN_COMMA
Definition: jsonapi.h:28
@ JSON_TOKEN_FALSE
Definition: jsonapi.h:31
@ JSON_TOKEN_END
Definition: jsonapi.h:33
@ JSON_TOKEN_TRUE
Definition: jsonapi.h:30
@ JSON_TOKEN_OBJECT_END
Definition: jsonapi.h:25
@ JSON_TOKEN_NULL
Definition: jsonapi.h:32
@ JSON_TOKEN_ARRAY_END
Definition: jsonapi.h:27
@ JSON_TOKEN_OBJECT_START
Definition: jsonapi.h:24
@ JSON_TOKEN_NUMBER
Definition: jsonapi.h:23
@ JSON_TOKEN_STRING
Definition: jsonapi.h:22
@ JSON_TOKEN_COLON
Definition: jsonapi.h:29
@ JSON_TOKEN_ARRAY_START
Definition: jsonapi.h:26
void(* json_scalar_action)(void *state, char *token, JsonTokenType tokentype)
Definition: jsonapi.h:90
void(* json_aelem_action)(void *state, bool isnull)
Definition: jsonapi.h:89
Assert(fmt[strlen(fmt) - 1] !='\n')
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
Definition: mbutils.c:864
char * pstrdup(const char *in)
Definition: mcxt.c:1305
void * palloc0(Size size)
Definition: mcxt.c:1099
void * palloc(Size size)
Definition: mcxt.c:1068
const void size_t len
int32 encoding
Definition: pg_database.h:41
@ PG_UTF8
Definition: pg_wchar.h:230
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:327
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:541
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:529
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:535
void check_stack_depth(void)
Definition: postgres.c:3500
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
static void error(void)
Definition: sql-dyntest.c:147
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
char * input
Definition: jsonapi.h:74
char * token_start
Definition: jsonapi.h:77
int input_encoding
Definition: jsonapi.h:76
StringInfo strval
Definition: jsonapi.h:84
char * token_terminator
Definition: jsonapi.h:78
char * prev_token_terminator
Definition: jsonapi.h:79
char * line_start
Definition: jsonapi.h:83
int lex_level
Definition: jsonapi.h:81
int input_length
Definition: jsonapi.h:75
int line_number
Definition: jsonapi.h:82
JsonTokenType token_type
Definition: jsonapi.h:80
json_struct_action array_end
Definition: jsonapi.h:110
json_struct_action object_start
Definition: jsonapi.h:107
json_ofield_action object_field_start
Definition: jsonapi.h:111
json_aelem_action array_element_start
Definition: jsonapi.h:113
json_scalar_action scalar
Definition: jsonapi.h:115
void * semstate
Definition: jsonapi.h:106
json_aelem_action array_element_end
Definition: jsonapi.h:114
json_struct_action array_start
Definition: jsonapi.h:109
json_struct_action object_end
Definition: jsonapi.h:108
json_ofield_action object_field_end
Definition: jsonapi.h:112
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
Definition: wchar.c:2141
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549