PostgreSQL Source Code  git master
jsonapi.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * jsonapi.c
4  * JSON parser and lexer interfaces
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/common/jsonapi.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef FRONTEND
15 #include "postgres.h"
16 #else
17 #include "postgres_fe.h"
18 #endif
19 
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 #include "port/pg_lfind.h"
23 
24 #ifndef FRONTEND
25 #include "miscadmin.h"
26 #endif
27 
28 /*
29  * The context of the parser is maintained by the recursive descent
30  * mechanism, but is passed explicitly to the error reporting routine
31  * for better diagnostics.
32  */
33 typedef enum /* contexts of JSON parser */
34 {
35  JSON_PARSE_VALUE, /* expecting a value */
36  JSON_PARSE_STRING, /* expecting a string (for a field name) */
37  JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
38  JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
39  JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
40  JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
41  JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
42  JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
43  JSON_PARSE_END /* saw the end of a document, expect nothing */
45 
47 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
48  bool *num_err, int *total_len);
55 
56 /* the null action object used for pure validation */
58 {
59  NULL, NULL, NULL, NULL, NULL,
60  NULL, NULL, NULL, NULL, NULL
61 };
62 
63 /* Recursive Descent parser support routines */
64 
65 /*
66  * lex_peek
67  *
68  * what is the current look_ahead token?
69 */
70 static inline JsonTokenType
72 {
73  return lex->token_type;
74 }
75 
76 /*
77  * lex_expect
78  *
79  * move the lexer to the next token if the current look_ahead token matches
80  * the parameter token. Otherwise, report an error.
81  */
82 static inline JsonParseErrorType
84 {
85  if (lex_peek(lex) == token)
86  return json_lex(lex);
87  else
88  return report_parse_error(ctx, lex);
89 }
90 
91 /* chars to consider as part of an alphanumeric token */
92 #define JSON_ALPHANUMERIC_CHAR(c) \
93  (((c) >= 'a' && (c) <= 'z') || \
94  ((c) >= 'A' && (c) <= 'Z') || \
95  ((c) >= '0' && (c) <= '9') || \
96  (c) == '_' || \
97  IS_HIGHBIT_SET(c))
98 
99 /*
100  * Utility function to check if a string is a valid JSON number.
101  *
102  * str is of length len, and need not be null-terminated.
103  */
104 bool
105 IsValidJsonNumber(const char *str, int len)
106 {
107  bool numeric_error;
108  int total_len;
109  JsonLexContext dummy_lex;
110 
111  if (len <= 0)
112  return false;
113 
114  /*
115  * json_lex_number expects a leading '-' to have been eaten already.
116  *
117  * having to cast away the constness of str is ugly, but there's not much
118  * easy alternative.
119  */
120  if (*str == '-')
121  {
122  dummy_lex.input = unconstify(char *, str) + 1;
123  dummy_lex.input_length = len - 1;
124  }
125  else
126  {
127  dummy_lex.input = unconstify(char *, str);
128  dummy_lex.input_length = len;
129  }
130 
131  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
132 
133  return (!numeric_error) && (total_len == dummy_lex.input_length);
134 }
135 
136 /*
137  * makeJsonLexContextCstringLen
138  *
139  * lex constructor, with or without StringInfo object for de-escaped lexemes.
140  *
141  * Without is better as it makes the processing faster, so only make one
142  * if really required.
143  */
145 makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
146 {
147  JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
148 
149  lex->input = lex->token_terminator = lex->line_start = json;
150  lex->line_number = 1;
151  lex->input_length = len;
152  lex->input_encoding = encoding;
153  if (need_escapes)
154  lex->strval = makeStringInfo();
155  return lex;
156 }
157 
158 /*
159  * pg_parse_json
160  *
161  * Publicly visible entry point for the JSON parser.
162  *
163  * lex is a lexing context, set up for the json to be processed by calling
164  * makeJsonLexContext(). sem is a structure of function pointers to semantic
165  * action routines to be called at appropriate spots during parsing, and a
166  * pointer to a state object to be passed to those routines.
167  */
170 {
171  JsonTokenType tok;
172  JsonParseErrorType result;
173 
174  /* get the initial token */
175  result = json_lex(lex);
176  if (result != JSON_SUCCESS)
177  return result;
178 
179  tok = lex_peek(lex);
180 
181  /* parse by recursive descent */
182  switch (tok)
183  {
185  result = parse_object(lex, sem);
186  break;
188  result = parse_array(lex, sem);
189  break;
190  default:
191  result = parse_scalar(lex, sem); /* json can be a bare scalar */
192  }
193 
194  if (result == JSON_SUCCESS)
195  result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
196 
197  return result;
198 }
199 
200 /*
201  * json_count_array_elements
202  *
203  * Returns number of array elements in lex context at start of array token
204  * until end of array token at same nesting level.
205  *
206  * Designed to be called from array_start routines.
207  */
210 {
211  JsonLexContext copylex;
212  int count;
213  JsonParseErrorType result;
214 
215  /*
216  * It's safe to do this with a shallow copy because the lexical routines
217  * don't scribble on the input. They do scribble on the other pointers
218  * etc, so doing this with a copy makes that safe.
219  */
220  memcpy(&copylex, lex, sizeof(JsonLexContext));
221  copylex.strval = NULL; /* not interested in values here */
222  copylex.lex_level++;
223 
224  count = 0;
225  result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
227  if (result != JSON_SUCCESS)
228  return result;
229  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
230  {
231  while (1)
232  {
233  count++;
234  result = parse_array_element(&copylex, &nullSemAction);
235  if (result != JSON_SUCCESS)
236  return result;
237  if (copylex.token_type != JSON_TOKEN_COMMA)
238  break;
239  result = json_lex(&copylex);
240  if (result != JSON_SUCCESS)
241  return result;
242  }
243  }
244  result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
246  if (result != JSON_SUCCESS)
247  return result;
248 
249  *elements = count;
250  return JSON_SUCCESS;
251 }
252 
253 /*
254  * Recursive Descent parse routines. There is one for each structural
255  * element in a json document:
256  * - scalar (string, number, true, false, null)
257  * - array ( [ ] )
258  * - array element
259  * - object ( { } )
260  * - object field
261  */
262 static inline JsonParseErrorType
264 {
265  char *val = NULL;
266  json_scalar_action sfunc = sem->scalar;
267  JsonTokenType tok = lex_peek(lex);
268  JsonParseErrorType result;
269 
270  /* a scalar must be a string, a number, true, false, or null */
271  if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
272  tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
273  tok != JSON_TOKEN_NULL)
275 
276  /* if no semantic function, just consume the token */
277  if (sfunc == NULL)
278  return json_lex(lex);
279 
280  /* extract the de-escaped string value, or the raw lexeme */
281  if (lex_peek(lex) == JSON_TOKEN_STRING)
282  {
283  if (lex->strval != NULL)
284  val = pstrdup(lex->strval->data);
285  }
286  else
287  {
288  int len = (lex->token_terminator - lex->token_start);
289 
290  val = palloc(len + 1);
291  memcpy(val, lex->token_start, len);
292  val[len] = '\0';
293  }
294 
295  /* consume the token */
296  result = json_lex(lex);
297  if (result != JSON_SUCCESS)
298  return result;
299 
300  /* invoke the callback */
301  (*sfunc) (sem->semstate, val, tok);
302 
303  return JSON_SUCCESS;
304 }
305 
306 static JsonParseErrorType
308 {
309  /*
310  * An object field is "fieldname" : value where value can be a scalar,
311  * object or array. Note: in user-facing docs and error messages, we
312  * generally call a field name a "key".
313  */
314 
315  char *fname = NULL; /* keep compiler quiet */
318  bool isnull;
319  JsonTokenType tok;
320  JsonParseErrorType result;
321 
322  if (lex_peek(lex) != JSON_TOKEN_STRING)
324  if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
325  fname = pstrdup(lex->strval->data);
326  result = json_lex(lex);
327  if (result != JSON_SUCCESS)
328  return result;
329 
331  if (result != JSON_SUCCESS)
332  return result;
333 
334  tok = lex_peek(lex);
335  isnull = tok == JSON_TOKEN_NULL;
336 
337  if (ostart != NULL)
338  (*ostart) (sem->semstate, fname, isnull);
339 
340  switch (tok)
341  {
343  result = parse_object(lex, sem);
344  break;
346  result = parse_array(lex, sem);
347  break;
348  default:
349  result = parse_scalar(lex, sem);
350  }
351  if (result != JSON_SUCCESS)
352  return result;
353 
354  if (oend != NULL)
355  (*oend) (sem->semstate, fname, isnull);
356  return JSON_SUCCESS;
357 }
358 
359 static JsonParseErrorType
361 {
362  /*
363  * an object is a possibly empty sequence of object fields, separated by
364  * commas and surrounded by curly braces.
365  */
366  json_struct_action ostart = sem->object_start;
367  json_struct_action oend = sem->object_end;
368  JsonTokenType tok;
369  JsonParseErrorType result;
370 
371 #ifndef FRONTEND
373 #endif
374 
375  if (ostart != NULL)
376  (*ostart) (sem->semstate);
377 
378  /*
379  * Data inside an object is at a higher nesting level than the object
380  * itself. Note that we increment this after we call the semantic routine
381  * for the object start and restore it before we call the routine for the
382  * object end.
383  */
384  lex->lex_level++;
385 
387  result = json_lex(lex);
388  if (result != JSON_SUCCESS)
389  return result;
390 
391  tok = lex_peek(lex);
392  switch (tok)
393  {
394  case JSON_TOKEN_STRING:
395  result = parse_object_field(lex, sem);
396  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
397  {
398  result = json_lex(lex);
399  if (result != JSON_SUCCESS)
400  break;
401  result = parse_object_field(lex, sem);
402  }
403  break;
405  break;
406  default:
407  /* case of an invalid initial token inside the object */
409  }
410  if (result != JSON_SUCCESS)
411  return result;
412 
414  if (result != JSON_SUCCESS)
415  return result;
416 
417  lex->lex_level--;
418 
419  if (oend != NULL)
420  (*oend) (sem->semstate);
421 
422  return JSON_SUCCESS;
423 }
424 
425 static JsonParseErrorType
427 {
430  JsonTokenType tok = lex_peek(lex);
431  JsonParseErrorType result;
432 
433  bool isnull;
434 
435  isnull = tok == JSON_TOKEN_NULL;
436 
437  if (astart != NULL)
438  (*astart) (sem->semstate, isnull);
439 
440  /* an array element is any object, array or scalar */
441  switch (tok)
442  {
444  result = parse_object(lex, sem);
445  break;
447  result = parse_array(lex, sem);
448  break;
449  default:
450  result = parse_scalar(lex, sem);
451  }
452 
453  if (result != JSON_SUCCESS)
454  return result;
455 
456  if (aend != NULL)
457  (*aend) (sem->semstate, isnull);
458 
459  return JSON_SUCCESS;
460 }
461 
462 static JsonParseErrorType
464 {
465  /*
466  * an array is a possibly empty sequence of array elements, separated by
467  * commas and surrounded by square brackets.
468  */
469  json_struct_action astart = sem->array_start;
470  json_struct_action aend = sem->array_end;
471  JsonParseErrorType result;
472 
473 #ifndef FRONTEND
475 #endif
476 
477  if (astart != NULL)
478  (*astart) (sem->semstate);
479 
480  /*
481  * Data inside an array is at a higher nesting level than the array
482  * itself. Note that we increment this after we call the semantic routine
483  * for the array start and restore it before we call the routine for the
484  * array end.
485  */
486  lex->lex_level++;
487 
489  if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
490  {
491  result = parse_array_element(lex, sem);
492 
493  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
494  {
495  result = json_lex(lex);
496  if (result != JSON_SUCCESS)
497  break;
498  result = parse_array_element(lex, sem);
499  }
500  }
501  if (result != JSON_SUCCESS)
502  return result;
503 
505  if (result != JSON_SUCCESS)
506  return result;
507 
508  lex->lex_level--;
509 
510  if (aend != NULL)
511  (*aend) (sem->semstate);
512 
513  return JSON_SUCCESS;
514 }
515 
516 /*
517  * Lex one token from the input stream.
518  */
521 {
522  char *s;
523  char *const end = lex->input + lex->input_length;
524  JsonParseErrorType result;
525 
526  /* Skip leading whitespace. */
527  s = lex->token_terminator;
528  while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
529  {
530  if (*s++ == '\n')
531  {
532  ++lex->line_number;
533  lex->line_start = s;
534  }
535  }
536  lex->token_start = s;
537 
538  /* Determine token type. */
539  if (s >= end)
540  {
541  lex->token_start = NULL;
543  lex->token_terminator = s;
544  lex->token_type = JSON_TOKEN_END;
545  }
546  else
547  {
548  switch (*s)
549  {
550  /* Single-character token, some kind of punctuation mark. */
551  case '{':
553  lex->token_terminator = s + 1;
555  break;
556  case '}':
558  lex->token_terminator = s + 1;
560  break;
561  case '[':
563  lex->token_terminator = s + 1;
565  break;
566  case ']':
568  lex->token_terminator = s + 1;
570  break;
571  case ',':
573  lex->token_terminator = s + 1;
575  break;
576  case ':':
578  lex->token_terminator = s + 1;
580  break;
581  case '"':
582  /* string */
583  result = json_lex_string(lex);
584  if (result != JSON_SUCCESS)
585  return result;
587  break;
588  case '-':
589  /* Negative number. */
590  result = json_lex_number(lex, s + 1, NULL, NULL);
591  if (result != JSON_SUCCESS)
592  return result;
594  break;
595  case '0':
596  case '1':
597  case '2':
598  case '3':
599  case '4':
600  case '5':
601  case '6':
602  case '7':
603  case '8':
604  case '9':
605  /* Positive number. */
606  result = json_lex_number(lex, s, NULL, NULL);
607  if (result != JSON_SUCCESS)
608  return result;
610  break;
611  default:
612  {
613  char *p;
614 
615  /*
616  * We're not dealing with a string, number, legal
617  * punctuation mark, or end of string. The only legal
618  * tokens we might find here are true, false, and null,
619  * but for error reporting purposes we scan until we see a
620  * non-alphanumeric character. That way, we can report
621  * the whole word as an unexpected token, rather than just
622  * some unintuitive prefix thereof.
623  */
624  for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
625  /* skip */ ;
626 
627  /*
628  * We got some sort of unexpected punctuation or an
629  * otherwise unexpected character, so just complain about
630  * that one character.
631  */
632  if (p == s)
633  {
635  lex->token_terminator = s + 1;
636  return JSON_INVALID_TOKEN;
637  }
638 
639  /*
640  * We've got a real alphanumeric token here. If it
641  * happens to be true, false, or null, all is well. If
642  * not, error out.
643  */
645  lex->token_terminator = p;
646  if (p - s == 4)
647  {
648  if (memcmp(s, "true", 4) == 0)
650  else if (memcmp(s, "null", 4) == 0)
652  else
653  return JSON_INVALID_TOKEN;
654  }
655  else if (p - s == 5 && memcmp(s, "false", 5) == 0)
657  else
658  return JSON_INVALID_TOKEN;
659  }
660  } /* end of switch */
661  }
662 
663  return JSON_SUCCESS;
664 }
665 
666 /*
667  * The next token in the input stream is known to be a string; lex it.
668  */
669 static inline JsonParseErrorType
671 {
672  char *s;
673  char *const end = lex->input + lex->input_length;
674  int hi_surrogate = -1;
675 
676  if (lex->strval != NULL)
677  resetStringInfo(lex->strval);
678 
679  Assert(lex->input_length > 0);
680  s = lex->token_start;
681  for (;;)
682  {
683  s++;
684  /* Premature end of the string. */
685  if (s >= end)
686  {
687  lex->token_terminator = s;
688  return JSON_INVALID_TOKEN;
689  }
690  else if (*s == '"')
691  break;
692  else if (*s == '\\')
693  {
694  /* OK, we have an escape character. */
695  s++;
696  if (s >= end)
697  {
698  lex->token_terminator = s;
699  return JSON_INVALID_TOKEN;
700  }
701  else if (*s == 'u')
702  {
703  int i;
704  int ch = 0;
705 
706  for (i = 1; i <= 4; i++)
707  {
708  s++;
709  if (s >= end)
710  {
711  lex->token_terminator = s;
712  return JSON_INVALID_TOKEN;
713  }
714  else if (*s >= '0' && *s <= '9')
715  ch = (ch * 16) + (*s - '0');
716  else if (*s >= 'a' && *s <= 'f')
717  ch = (ch * 16) + (*s - 'a') + 10;
718  else if (*s >= 'A' && *s <= 'F')
719  ch = (ch * 16) + (*s - 'A') + 10;
720  else
721  {
724  }
725  }
726  if (lex->strval != NULL)
727  {
728  /*
729  * Combine surrogate pairs.
730  */
731  if (is_utf16_surrogate_first(ch))
732  {
733  if (hi_surrogate != -1)
735  hi_surrogate = ch;
736  continue;
737  }
738  else if (is_utf16_surrogate_second(ch))
739  {
740  if (hi_surrogate == -1)
742  ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
743  hi_surrogate = -1;
744  }
745 
746  if (hi_surrogate != -1)
748 
749  /*
750  * Reject invalid cases. We can't have a value above
751  * 0xFFFF here (since we only accepted 4 hex digits
752  * above), so no need to test for out-of-range chars.
753  */
754  if (ch == 0)
755  {
756  /* We can't allow this, since our TEXT type doesn't */
758  }
759 
760  /*
761  * Add the represented character to lex->strval. In the
762  * backend, we can let pg_unicode_to_server() handle any
763  * required character set conversion; in frontend, we can
764  * only deal with trivial conversions.
765  *
766  * Note: pg_unicode_to_server() will throw an error for a
767  * conversion failure, rather than returning a failure
768  * indication. That seems OK.
769  */
770 #ifndef FRONTEND
771  {
772  char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
773 
774  pg_unicode_to_server(ch, (unsigned char *) cbuf);
775  appendStringInfoString(lex->strval, cbuf);
776  }
777 #else
778  if (lex->input_encoding == PG_UTF8)
779  {
780  /* OK, we can map the code point to UTF8 easily */
781  char utf8str[5];
782  int utf8len;
783 
784  unicode_to_utf8(ch, (unsigned char *) utf8str);
785  utf8len = pg_utf_mblen((unsigned char *) utf8str);
786  appendBinaryStringInfo(lex->strval, utf8str, utf8len);
787  }
788  else if (ch <= 0x007f)
789  {
790  /* The ASCII range is the same in all encodings */
791  appendStringInfoChar(lex->strval, (char) ch);
792  }
793  else
795 #endif /* FRONTEND */
796  }
797  }
798  else if (lex->strval != NULL)
799  {
800  if (hi_surrogate != -1)
802 
803  switch (*s)
804  {
805  case '"':
806  case '\\':
807  case '/':
808  appendStringInfoChar(lex->strval, *s);
809  break;
810  case 'b':
811  appendStringInfoChar(lex->strval, '\b');
812  break;
813  case 'f':
814  appendStringInfoChar(lex->strval, '\f');
815  break;
816  case 'n':
817  appendStringInfoChar(lex->strval, '\n');
818  break;
819  case 'r':
820  appendStringInfoChar(lex->strval, '\r');
821  break;
822  case 't':
823  appendStringInfoChar(lex->strval, '\t');
824  break;
825  default:
826  /* Not a valid string escape, so signal error. */
827  lex->token_start = s;
829  return JSON_ESCAPING_INVALID;
830  }
831  }
832  else if (strchr("\"\\/bfnrt", *s) == NULL)
833  {
834  /*
835  * Simpler processing if we're not bothered about de-escaping
836  *
837  * It's very tempting to remove the strchr() call here and
838  * replace it with a switch statement, but testing so far has
839  * shown it's not a performance win.
840  */
841  lex->token_start = s;
843  return JSON_ESCAPING_INVALID;
844  }
845  }
846  else
847  {
848  char *p = s;
849 
850  if (hi_surrogate != -1)
852 
853  /*
854  * Skip to the first byte that requires special handling, so we
855  * can batch calls to appendBinaryStringInfo.
856  */
857  while (p < end - sizeof(Vector8) &&
858  !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
859  !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
860  !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
861  p += sizeof(Vector8);
862 
863  for (; p < end; p++)
864  {
865  if (*p == '\\' || *p == '"')
866  break;
867  else if ((unsigned char) *p <= 31)
868  {
869  /* Per RFC4627, these characters MUST be escaped. */
870  /*
871  * Since *p isn't printable, exclude it from the context
872  * string
873  */
874  lex->token_terminator = p;
875  return JSON_ESCAPING_REQUIRED;
876  }
877  }
878 
879  if (lex->strval != NULL)
880  appendBinaryStringInfo(lex->strval, s, p - s);
881 
882  /*
883  * s will be incremented at the top of the loop, so set it to just
884  * behind our lookahead position
885  */
886  s = p - 1;
887  }
888  }
889 
890  if (hi_surrogate != -1)
892 
893  /* Hooray, we found the end of the string! */
895  lex->token_terminator = s + 1;
896  return JSON_SUCCESS;
897 }
898 
899 /*
900  * The next token in the input stream is known to be a number; lex it.
901  *
902  * In JSON, a number consists of four parts:
903  *
904  * (1) An optional minus sign ('-').
905  *
906  * (2) Either a single '0', or a string of one or more digits that does not
907  * begin with a '0'.
908  *
909  * (3) An optional decimal part, consisting of a period ('.') followed by
910  * one or more digits. (Note: While this part can be omitted
911  * completely, it's not OK to have only the decimal point without
912  * any digits afterwards.)
913  *
914  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
915  * followed by '+' or '-', followed by one or more digits. (Note:
916  * As with the decimal part, if 'e' or 'E' is present, it must be
917  * followed by at least one digit.)
918  *
919  * The 's' argument to this function points to the ostensible beginning
920  * of part 2 - i.e. the character after any optional minus sign, or the
921  * first character of the string if there is none.
922  *
923  * If num_err is not NULL, we return an error flag to *num_err rather than
924  * raising an error for a badly-formed number. Also, if total_len is not NULL
925  * the distance from lex->input to the token end+1 is returned to *total_len.
926  */
927 static inline JsonParseErrorType
929  bool *num_err, int *total_len)
930 {
931  bool error = false;
932  int len = s - lex->input;
933 
934  /* Part (1): leading sign indicator. */
935  /* Caller already did this for us; so do nothing. */
936 
937  /* Part (2): parse main digit string. */
938  if (len < lex->input_length && *s == '0')
939  {
940  s++;
941  len++;
942  }
943  else if (len < lex->input_length && *s >= '1' && *s <= '9')
944  {
945  do
946  {
947  s++;
948  len++;
949  } while (len < lex->input_length && *s >= '0' && *s <= '9');
950  }
951  else
952  error = true;
953 
954  /* Part (3): parse optional decimal portion. */
955  if (len < lex->input_length && *s == '.')
956  {
957  s++;
958  len++;
959  if (len == lex->input_length || *s < '0' || *s > '9')
960  error = true;
961  else
962  {
963  do
964  {
965  s++;
966  len++;
967  } while (len < lex->input_length && *s >= '0' && *s <= '9');
968  }
969  }
970 
971  /* Part (4): parse optional exponent. */
972  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
973  {
974  s++;
975  len++;
976  if (len < lex->input_length && (*s == '+' || *s == '-'))
977  {
978  s++;
979  len++;
980  }
981  if (len == lex->input_length || *s < '0' || *s > '9')
982  error = true;
983  else
984  {
985  do
986  {
987  s++;
988  len++;
989  } while (len < lex->input_length && *s >= '0' && *s <= '9');
990  }
991  }
992 
993  /*
994  * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
995  * here should be considered part of the token for error-reporting
996  * purposes.
997  */
998  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
999  error = true;
1000 
1001  if (total_len != NULL)
1002  *total_len = len;
1003 
1004  if (num_err != NULL)
1005  {
1006  /* let the caller handle any error */
1007  *num_err = error;
1008  }
1009  else
1010  {
1011  /* return token endpoint */
1013  lex->token_terminator = s;
1014  /* handle error if any */
1015  if (error)
1016  return JSON_INVALID_TOKEN;
1017  }
1018 
1019  return JSON_SUCCESS;
1020 }
1021 
1022 /*
1023  * Report a parse error.
1024  *
1025  * lex->token_start and lex->token_terminator must identify the current token.
1026  */
1027 static JsonParseErrorType
1029 {
1030  /* Handle case where the input ended prematurely. */
1031  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
1032  return JSON_EXPECTED_MORE;
1033 
1034  /* Otherwise choose the error type based on the parsing context. */
1035  switch (ctx)
1036  {
1037  case JSON_PARSE_END:
1038  return JSON_EXPECTED_END;
1039  case JSON_PARSE_VALUE:
1040  return JSON_EXPECTED_JSON;
1041  case JSON_PARSE_STRING:
1042  return JSON_EXPECTED_STRING;
1045  case JSON_PARSE_ARRAY_NEXT:
1046  return JSON_EXPECTED_ARRAY_NEXT;
1050  return JSON_EXPECTED_COLON;
1054  return JSON_EXPECTED_STRING;
1055  }
1056 
1057  /*
1058  * We don't use a default: case, so that the compiler will warn about
1059  * unhandled enum values.
1060  */
1061  Assert(false);
1062  return JSON_SUCCESS; /* silence stupider compilers */
1063 }
1064 
1065 
1066 #ifndef FRONTEND
1067 /*
1068  * Extract the current token from a lexing context, for error reporting.
1069  */
1070 static char *
1072 {
1073  int toklen = lex->token_terminator - lex->token_start;
1074  char *token = palloc(toklen + 1);
1075 
1076  memcpy(token, lex->token_start, toklen);
1077  token[toklen] = '\0';
1078  return token;
1079 }
1080 
1081 /*
1082  * Construct an (already translated) detail message for a JSON error.
1083  *
1084  * Note that the error message generated by this routine may not be
1085  * palloc'd, making it unsafe for frontend code as there is no way to
1086  * know if this can be safely pfree'd or not.
1087  */
1088 char *
1090 {
1091  switch (error)
1092  {
1093  case JSON_SUCCESS:
1094  /* fall through to the error code after switch */
1095  break;
1096  case JSON_ESCAPING_INVALID:
1097  return psprintf(_("Escape sequence \"\\%s\" is invalid."),
1098  extract_token(lex));
1100  return psprintf(_("Character with value 0x%02x must be escaped."),
1101  (unsigned char) *(lex->token_terminator));
1102  case JSON_EXPECTED_END:
1103  return psprintf(_("Expected end of input, but found \"%s\"."),
1104  extract_token(lex));
1106  return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
1107  extract_token(lex));
1109  return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
1110  extract_token(lex));
1111  case JSON_EXPECTED_COLON:
1112  return psprintf(_("Expected \":\", but found \"%s\"."),
1113  extract_token(lex));
1114  case JSON_EXPECTED_JSON:
1115  return psprintf(_("Expected JSON value, but found \"%s\"."),
1116  extract_token(lex));
1117  case JSON_EXPECTED_MORE:
1118  return _("The input string ended unexpectedly.");
1120  return psprintf(_("Expected string or \"}\", but found \"%s\"."),
1121  extract_token(lex));
1123  return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
1124  extract_token(lex));
1125  case JSON_EXPECTED_STRING:
1126  return psprintf(_("Expected string, but found \"%s\"."),
1127  extract_token(lex));
1128  case JSON_INVALID_TOKEN:
1129  return psprintf(_("Token \"%s\" is invalid."),
1130  extract_token(lex));
1132  return _("\\u0000 cannot be converted to text.");
1134  return _("\"\\u\" must be followed by four hexadecimal digits.");
1136  /* note: this case is only reachable in frontend not backend */
1137  return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
1139  return _("Unicode high surrogate must not follow a high surrogate.");
1141  return _("Unicode low surrogate must follow a high surrogate.");
1142  }
1143 
1144  /*
1145  * We don't use a default: case, so that the compiler will warn about
1146  * unhandled enum values. But this needs to be here anyway to cover the
1147  * possibility of an incorrect input.
1148  */
1149  elog(ERROR, "unexpected json parse error type: %d", (int) error);
1150  return NULL;
1151 }
1152 #endif
#define unconstify(underlying_type, expr)
Definition: c.h:1181
unsigned char uint8
Definition: c.h:440
#define _(x)
Definition: elog.c:90
#define ERROR
Definition: elog.h:35
long val
Definition: informix.c:664
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
JsonParseContext
Definition: jsonapi.c:34
@ JSON_PARSE_OBJECT_LABEL
Definition: jsonapi.c:40
@ JSON_PARSE_VALUE
Definition: jsonapi.c:35
@ JSON_PARSE_OBJECT_START
Definition: jsonapi.c:39
@ JSON_PARSE_ARRAY_START
Definition: jsonapi.c:37
@ JSON_PARSE_END
Definition: jsonapi.c:43
@ JSON_PARSE_OBJECT_NEXT
Definition: jsonapi.c:41
@ JSON_PARSE_ARRAY_NEXT
Definition: jsonapi.c:38
@ JSON_PARSE_OBJECT_COMMA
Definition: jsonapi.c:42
@ JSON_PARSE_STRING
Definition: jsonapi.c:36
static JsonParseErrorType json_lex_string(JsonLexContext *lex)
Definition: jsonapi.c:670
#define JSON_ALPHANUMERIC_CHAR(c)
Definition: jsonapi.c:92
static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:307
JsonLexContext * makeJsonLexContextCstringLen(char *json, int len, int encoding, bool need_escapes)
Definition: jsonapi.c:145
static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:426
static char * extract_token(JsonLexContext *lex)
Definition: jsonapi.c:1071
static JsonTokenType lex_peek(JsonLexContext *lex)
Definition: jsonapi.c:71
static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:360
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:1089
bool IsValidJsonNumber(const char *str, int len)
Definition: jsonapi.c:105
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
Definition: jsonapi.c:1028
JsonSemAction nullSemAction
Definition: jsonapi.c:57
static JsonParseErrorType lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
Definition: jsonapi.c:83
JsonParseErrorType pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:169
static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:463
static JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
Definition: jsonapi.c:263
JsonParseErrorType json_lex(JsonLexContext *lex)
Definition: jsonapi.c:520
JsonParseErrorType json_count_array_elements(JsonLexContext *lex, int *elements)
Definition: jsonapi.c:209
static JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s, bool *num_err, int *total_len)
Definition: jsonapi.c:928
void(* json_struct_action)(void *state)
Definition: jsonapi.h:87
JsonParseErrorType
Definition: jsonapi.h:37
@ JSON_EXPECTED_ARRAY_FIRST
Definition: jsonapi.h:41
@ JSON_EXPECTED_MORE
Definition: jsonapi.h:46
@ JSON_UNICODE_HIGH_SURROGATE
Definition: jsonapi.h:54
@ JSON_EXPECTED_COLON
Definition: jsonapi.h:43
@ JSON_EXPECTED_OBJECT_FIRST
Definition: jsonapi.h:47
@ JSON_UNICODE_CODE_POINT_ZERO
Definition: jsonapi.h:51
@ JSON_EXPECTED_STRING
Definition: jsonapi.h:49
@ JSON_UNICODE_ESCAPE_FORMAT
Definition: jsonapi.h:52
@ JSON_SUCCESS
Definition: jsonapi.h:38
@ JSON_EXPECTED_OBJECT_NEXT
Definition: jsonapi.h:48
@ JSON_ESCAPING_REQUIRED
Definition: jsonapi.h:40
@ JSON_EXPECTED_JSON
Definition: jsonapi.h:45
@ JSON_INVALID_TOKEN
Definition: jsonapi.h:50
@ JSON_ESCAPING_INVALID
Definition: jsonapi.h:39
@ JSON_EXPECTED_END
Definition: jsonapi.h:44
@ JSON_EXPECTED_ARRAY_NEXT
Definition: jsonapi.h:42
@ JSON_UNICODE_HIGH_ESCAPE
Definition: jsonapi.h:53
@ JSON_UNICODE_LOW_SURROGATE
Definition: jsonapi.h:55
void(* json_ofield_action)(void *state, char *fname, bool isnull)
Definition: jsonapi.h:88
JsonTokenType
Definition: jsonapi.h:20
@ JSON_TOKEN_COMMA
Definition: jsonapi.h:28
@ JSON_TOKEN_FALSE
Definition: jsonapi.h:31
@ JSON_TOKEN_END
Definition: jsonapi.h:33
@ JSON_TOKEN_TRUE
Definition: jsonapi.h:30
@ JSON_TOKEN_OBJECT_END
Definition: jsonapi.h:25
@ JSON_TOKEN_NULL
Definition: jsonapi.h:32
@ JSON_TOKEN_ARRAY_END
Definition: jsonapi.h:27
@ JSON_TOKEN_OBJECT_START
Definition: jsonapi.h:24
@ JSON_TOKEN_NUMBER
Definition: jsonapi.h:23
@ JSON_TOKEN_STRING
Definition: jsonapi.h:22
@ JSON_TOKEN_COLON
Definition: jsonapi.h:29
@ JSON_TOKEN_ARRAY_START
Definition: jsonapi.h:26
void(* json_scalar_action)(void *state, char *token, JsonTokenType tokentype)
Definition: jsonapi.h:90
void(* json_aelem_action)(void *state, bool isnull)
Definition: jsonapi.h:89
Assert(fmt[strlen(fmt) - 1] !='\n')
void pg_unicode_to_server(pg_wchar c, unsigned char *s)
Definition: mbutils.c:864
char * pstrdup(const char *in)
Definition: mcxt.c:1483
void * palloc0(Size size)
Definition: mcxt.c:1230
void * palloc(Size size)
Definition: mcxt.c:1199
const void size_t len
int32 encoding
Definition: pg_database.h:41
static bool pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:58
static bool pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:26
@ PG_UTF8
Definition: pg_wchar.h:232
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:543
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:531
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:537
void check_stack_depth(void)
Definition: postgres.c:3440
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
uint64 Vector8
Definition: simd.h:60
static void error(void)
Definition: sql-dyntest.c:147
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:75
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:227
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:176
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
char * input
Definition: jsonapi.h:74
char * token_start
Definition: jsonapi.h:77
int input_encoding
Definition: jsonapi.h:76
StringInfo strval
Definition: jsonapi.h:84
char * token_terminator
Definition: jsonapi.h:78
char * prev_token_terminator
Definition: jsonapi.h:79
char * line_start
Definition: jsonapi.h:83
int lex_level
Definition: jsonapi.h:81
int input_length
Definition: jsonapi.h:75
int line_number
Definition: jsonapi.h:82
JsonTokenType token_type
Definition: jsonapi.h:80
json_struct_action array_end
Definition: jsonapi.h:110
json_struct_action object_start
Definition: jsonapi.h:107
json_ofield_action object_field_start
Definition: jsonapi.h:111
json_aelem_action array_element_start
Definition: jsonapi.h:113
json_scalar_action scalar
Definition: jsonapi.h:115
void * semstate
Definition: jsonapi.h:106
json_aelem_action array_element_end
Definition: jsonapi.h:114
json_struct_action array_start
Definition: jsonapi.h:109
json_struct_action object_end
Definition: jsonapi.h:108
json_ofield_action object_field_end
Definition: jsonapi.h:112
int pg_encoding_mblen_bounded(int encoding, const char *mbstr)
Definition: wchar.c:2142
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:483
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:549