PostgreSQL Source Code  git master
jsonapi.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * jsonapi.c
4  * JSON parser and lexer interfaces
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/common/jsonapi.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef FRONTEND
15 #include "postgres.h"
16 #else
17 #include "postgres_fe.h"
18 #endif
19 
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 #include "port/pg_lfind.h"
23 
24 #ifndef FRONTEND
25 #include "miscadmin.h"
26 #endif
27 
28 /*
29  * The context of the parser is maintained by the recursive descent
30  * mechanism, but is passed explicitly to the error reporting routine
31  * for better diagnostics.
32  */
33 typedef enum /* contexts of JSON parser */
34 {
35  JSON_PARSE_VALUE, /* expecting a value */
36  JSON_PARSE_STRING, /* expecting a string (for a field name) */
37  JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
38  JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
39  JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
40  JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
41  JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
42  JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
43  JSON_PARSE_END, /* saw the end of a document, expect nothing */
45 
46 /*
47  * Setup for table-driven parser.
48  * These enums need to be separate from the JsonTokenType and from each other
49  * so we can have all of them on the prediction stack, which consists of
50  * tokens, non-terminals, and semantic action markers.
51  */
52 
54 {
60 };
61 
63 {
75 };
76 
77 /*
78  * struct containing the 3 stacks used in non-recursive parsing,
79  * and the token and value for scalars that need to be preserved
80  * across calls.
81  *
82  * typedef appears in jsonapi.h
83  */
85 {
87  char *prediction;
88  size_t pred_index;
89  /* these two are indexed by lex_level */
90  char **fnames;
91  bool *fnull;
93  char *scalar_val;
94 };
95 
96 /*
97  * struct containing state used when there is a possible partial token at the
98  * end of a json chunk when we are doing incremental parsing.
99  *
100  * typedef appears in jsonapi.h
101  */
103 {
107 };
108 
109 /*
110  * constants and macros used in the nonrecursive parser
111  */
112 #define JSON_NUM_TERMINALS 13
113 #define JSON_NUM_NONTERMINALS 5
114 #define JSON_NT_OFFSET JSON_NT_JSON
115 /* for indexing the table */
116 #define OFS(NT) (NT) - JSON_NT_OFFSET
117 /* classify items we get off the stack */
118 #define IS_SEM(x) ((x) & 0x40)
119 #define IS_NT(x) ((x) & 0x20)
120 
121 /*
122  * These productions are stored in reverse order right to left so that when
123  * they are pushed on the stack what we expect next is at the top of the stack.
124  */
125 static char JSON_PROD_EPSILON[] = {0}; /* epsilon - an empty production */
126 
127 /* JSON -> string */
129 
130 /* JSON -> number */
132 
133 /* JSON -> 'true' */
135 
136 /* JSON -> 'false' */
138 
139 /* JSON -> 'null' */
141 
142 /* JSON -> '{' KEY_PAIRS '}' */
144 
145 /* JSON -> '[' ARRAY_ELEMENTS ']' */
147 
148 /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
150 
151 /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
153 
154 /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
156 
157 /* MORE_KEY_PAIRS -> ',' string ':' JSON MORE_KEY_PAIRS */
159 
160 /*
161  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
162  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
163  * They are all the same as none require any semantic actions.
164  */
165 
166 /*
167  * Table connecting the productions with their director sets of
168  * terminal symbols.
169  * Any combination not specified here represents an error.
170  */
171 
172 typedef struct
173 {
174  size_t len;
175  char *prod;
176 } td_entry;
177 
178 #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
179 
181 {
182  /* JSON */
190  /* ARRAY_ELEMENTS */
199  /* MORE_ARRAY_ELEMENTS */
202  /* KEY_PAIRS */
205  /* MORE_KEY_PAIRS */
208 };
209 
210 /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
212 
214 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
215  bool *num_err, size_t *total_len);
222 
223 /* the null action object used for pure validation */
225 {
226  NULL, NULL, NULL, NULL, NULL,
227  NULL, NULL, NULL, NULL, NULL
228 };
229 
230 /* Parser support routines */
231 
232 /*
233  * lex_peek
234  *
235  * what is the current look_ahead token?
236 */
237 static inline JsonTokenType
239 {
240  return lex->token_type;
241 }
242 
243 /*
244  * lex_expect
245  *
246  * move the lexer to the next token if the current look_ahead token matches
247  * the parameter token. Otherwise, report an error.
248  */
249 static inline JsonParseErrorType
251 {
252  if (lex_peek(lex) == token)
253  return json_lex(lex);
254  else
255  return report_parse_error(ctx, lex);
256 }
257 
258 /* chars to consider as part of an alphanumeric token */
259 #define JSON_ALPHANUMERIC_CHAR(c) \
260  (((c) >= 'a' && (c) <= 'z') || \
261  ((c) >= 'A' && (c) <= 'Z') || \
262  ((c) >= '0' && (c) <= '9') || \
263  (c) == '_' || \
264  IS_HIGHBIT_SET(c))
265 
266 /*
267  * Utility function to check if a string is a valid JSON number.
268  *
269  * str is of length len, and need not be null-terminated.
270  */
271 bool
272 IsValidJsonNumber(const char *str, size_t len)
273 {
274  bool numeric_error;
275  size_t total_len;
276  JsonLexContext dummy_lex;
277 
278  if (len <= 0)
279  return false;
280 
281  dummy_lex.incremental = false;
282  dummy_lex.inc_state = NULL;
283  dummy_lex.pstack = NULL;
284 
285  /*
286  * json_lex_number expects a leading '-' to have been eaten already.
287  *
288  * having to cast away the constness of str is ugly, but there's not much
289  * easy alternative.
290  */
291  if (*str == '-')
292  {
293  dummy_lex.input = str + 1;
294  dummy_lex.input_length = len - 1;
295  }
296  else
297  {
298  dummy_lex.input = str;
299  dummy_lex.input_length = len;
300  }
301 
302  dummy_lex.token_start = dummy_lex.input;
303 
304  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
305 
306  return (!numeric_error) && (total_len == dummy_lex.input_length);
307 }
308 
309 /*
310  * makeJsonLexContextCstringLen
311  * Initialize the given JsonLexContext object, or create one
312  *
313  * If a valid 'lex' pointer is given, it is initialized. This can
314  * be used for stack-allocated structs, saving overhead. If NULL is
315  * given, a new struct is allocated.
316  *
317  * If need_escapes is true, ->strval stores the unescaped lexemes.
318  * Unescaping is expensive, so only request it when necessary.
319  *
320  * If need_escapes is true or lex was given as NULL, then caller is
321  * responsible for freeing the returned struct, either by calling
322  * freeJsonLexContext() or (in backend environment) via memory context
323  * cleanup.
324  */
327  size_t len, int encoding, bool need_escapes)
328 {
329  if (lex == NULL)
330  {
331  lex = palloc0(sizeof(JsonLexContext));
332  lex->flags |= JSONLEX_FREE_STRUCT;
333  }
334  else
335  memset(lex, 0, sizeof(JsonLexContext));
336 
337  lex->errormsg = NULL;
338  lex->input = lex->token_terminator = lex->line_start = json;
339  lex->line_number = 1;
340  lex->input_length = len;
341  lex->input_encoding = encoding;
342  if (need_escapes)
343  {
344  lex->strval = makeStringInfo();
345  lex->flags |= JSONLEX_FREE_STRVAL;
346  }
347 
348  return lex;
349 }
350 
351 
352 /*
353  * makeJsonLexContextIncremental
354  *
355  * Similar to above but set up for use in incremental parsing. That means we
356  * need explicit stacks for predictions, field names and null indicators, but
357  * we don't need the input, that will be handed in bit by bit to the
358  * parse routine. We also need an accumulator for partial tokens in case
359  * the boundary between chunks happens to fall in the middle of a token.
360  */
361 #define JS_STACK_CHUNK_SIZE 64
362 #define JS_MAX_PROD_LEN 10 /* more than we need */
363 #define JSON_TD_MAX_STACK 6400 /* hard coded for now - this is a REALLY high
364  * number */
365 
368  bool need_escapes)
369 {
370  if (lex == NULL)
371  {
372  lex = palloc0(sizeof(JsonLexContext));
373  lex->flags |= JSONLEX_FREE_STRUCT;
374  }
375  else
376  memset(lex, 0, sizeof(JsonLexContext));
377 
378  lex->line_number = 1;
379  lex->input_encoding = encoding;
380  lex->incremental = true;
381  lex->inc_state = palloc0(sizeof(JsonIncrementalState));
383  lex->pstack = palloc(sizeof(JsonParserStack));
386  lex->pstack->pred_index = 0;
387  lex->pstack->fnames = palloc(JS_STACK_CHUNK_SIZE * sizeof(char *));
388  lex->pstack->fnull = palloc(JS_STACK_CHUNK_SIZE * sizeof(bool));
389  if (need_escapes)
390  {
391  lex->strval = makeStringInfo();
392  lex->flags |= JSONLEX_FREE_STRVAL;
393  }
394  return lex;
395 }
396 
397 static inline void
399 {
400  lex->lex_level += 1;
401 
402  if (lex->incremental && lex->lex_level >= lex->pstack->stack_size)
403  {
405  lex->pstack->prediction =
406  repalloc(lex->pstack->prediction,
408  if (lex->pstack->fnames)
409  lex->pstack->fnames =
410  repalloc(lex->pstack->fnames,
411  lex->pstack->stack_size * sizeof(char *));
412  if (lex->pstack->fnull)
413  lex->pstack->fnull =
414  repalloc(lex->pstack->fnull, lex->pstack->stack_size * sizeof(bool));
415  }
416 }
417 
418 static inline void
420 {
421  lex->lex_level -= 1;
422 }
423 
424 static inline void
426 {
427  memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
428  pstack->pred_index += entry.len;
429 }
430 
431 static inline char
433 {
434  Assert(pstack->pred_index > 0);
435  return pstack->prediction[--pstack->pred_index];
436 }
437 
438 static inline char
440 {
441  Assert(pstack->pred_index > 0);
442  return pstack->prediction[pstack->pred_index - 1];
443 }
444 
445 static inline bool
447 {
448  return pstack->pred_index > 0;
449 }
450 
451 static inline void
452 set_fname(JsonLexContext *lex, char *fname)
453 {
454  lex->pstack->fnames[lex->lex_level] = fname;
455 }
456 
457 static inline char *
459 {
460  return lex->pstack->fnames[lex->lex_level];
461 }
462 
463 static inline void
464 set_fnull(JsonLexContext *lex, bool fnull)
465 {
466  lex->pstack->fnull[lex->lex_level] = fnull;
467 }
468 
469 static inline bool
471 {
472  return lex->pstack->fnull[lex->lex_level];
473 }
474 
475 /*
476  * Free memory in a JsonLexContext.
477  *
478  * There's no need for this if a *lex pointer was given when the object was
479  * made, need_escapes was false, and json_errdetail() was not called; or if (in
480  * backend environment) a memory context delete/reset is imminent.
481  */
482 void
484 {
485  if (lex->flags & JSONLEX_FREE_STRVAL)
487 
488  if (lex->errormsg)
490 
491  if (lex->incremental)
492  {
494  pfree(lex->inc_state);
495  pfree(lex->pstack->prediction);
496  pfree(lex->pstack->fnames);
497  pfree(lex->pstack->fnull);
498  pfree(lex->pstack);
499  }
500 
501  if (lex->flags & JSONLEX_FREE_STRUCT)
502  pfree(lex);
503 }
504 
505 /*
506  * pg_parse_json
507  *
508  * Publicly visible entry point for the JSON parser.
509  *
510  * lex is a lexing context, set up for the json to be processed by calling
511  * makeJsonLexContext(). sem is a structure of function pointers to semantic
512  * action routines to be called at appropriate spots during parsing, and a
513  * pointer to a state object to be passed to those routines.
514  *
515  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
516  * JSON parser. This is a useful way to validate that it's doing the right
517  * thing at least for non-incremental cases. If this is on we expect to see
518  * regression diffs relating to error messages about stack depth, but no
519  * other differences.
520  */
523 {
524 #ifdef FORCE_JSON_PSTACK
525 
526  lex->incremental = true;
527  lex->inc_state = palloc0(sizeof(JsonIncrementalState));
528 
529  /*
530  * We don't need partial token processing, there is only one chunk. But we
531  * still need to init the partial token string so that freeJsonLexContext
532  * works.
533  */
535  lex->pstack = palloc(sizeof(JsonParserStack));
538  lex->pstack->pred_index = 0;
539  lex->pstack->fnames = palloc(JS_STACK_CHUNK_SIZE * sizeof(char *));
540  lex->pstack->fnull = palloc(JS_STACK_CHUNK_SIZE * sizeof(bool));
541 
542  return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
543 
544 #else
545 
546  JsonTokenType tok;
547  JsonParseErrorType result;
548 
549  if (lex->incremental)
551 
552  /* get the initial token */
553  result = json_lex(lex);
554  if (result != JSON_SUCCESS)
555  return result;
556 
557  tok = lex_peek(lex);
558 
559  /* parse by recursive descent */
560  switch (tok)
561  {
563  result = parse_object(lex, sem);
564  break;
566  result = parse_array(lex, sem);
567  break;
568  default:
569  result = parse_scalar(lex, sem); /* json can be a bare scalar */
570  }
571 
572  if (result == JSON_SUCCESS)
573  result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
574 
575  return result;
576 #endif
577 }
578 
579 /*
580  * json_count_array_elements
581  *
582  * Returns number of array elements in lex context at start of array token
583  * until end of array token at same nesting level.
584  *
585  * Designed to be called from array_start routines.
586  */
588 json_count_array_elements(JsonLexContext *lex, int *elements)
589 {
590  JsonLexContext copylex;
591  int count;
592  JsonParseErrorType result;
593 
594  /*
595  * It's safe to do this with a shallow copy because the lexical routines
596  * don't scribble on the input. They do scribble on the other pointers
597  * etc, so doing this with a copy makes that safe.
598  */
599  memcpy(&copylex, lex, sizeof(JsonLexContext));
600  copylex.strval = NULL; /* not interested in values here */
601  copylex.lex_level++;
602 
603  count = 0;
604  result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
606  if (result != JSON_SUCCESS)
607  return result;
608  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
609  {
610  while (1)
611  {
612  count++;
613  result = parse_array_element(&copylex, &nullSemAction);
614  if (result != JSON_SUCCESS)
615  return result;
616  if (copylex.token_type != JSON_TOKEN_COMMA)
617  break;
618  result = json_lex(&copylex);
619  if (result != JSON_SUCCESS)
620  return result;
621  }
622  }
623  result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
625  if (result != JSON_SUCCESS)
626  return result;
627 
628  *elements = count;
629  return JSON_SUCCESS;
630 }
631 
632 /*
633  * pg_parse_json_incremental
634  *
635  * Routine for incremental parsing of json. This uses the non-recursive top
636  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
637  * the Recursive Descent pattern used above, so we only use it for incremental
638  * parsing of JSON.
639  *
640  * The lexing context needs to be set up by a call to
641  * makeJsonLexContextIncremental(). sem is a structure of function pointers
642  * to semantic action routines, which should function exactly as those used
643  * in the recursive descent parser.
644  *
645  * This routine can be called repeatedly with chunks of JSON. On the final
646  * chunk is_last must be set to true. len is the length of the json chunk,
647  * which does not need to be null terminated.
648  */
651  const JsonSemAction *sem,
652  const char *json,
653  size_t len,
654  bool is_last)
655 {
656  JsonTokenType tok;
657  JsonParseErrorType result;
659  JsonParserStack *pstack = lex->pstack;
660 
661 
662  if (!lex->incremental)
664 
665  lex->input = lex->token_terminator = lex->line_start = json;
666  lex->input_length = len;
667  lex->inc_state->is_last_chunk = is_last;
668 
669  /* get the initial token */
670  result = json_lex(lex);
671  if (result != JSON_SUCCESS)
672  return result;
673 
674  tok = lex_peek(lex);
675 
676  /* use prediction stack for incremental parsing */
677 
678  if (!have_prediction(pstack))
679  {
681 
682  push_prediction(pstack, goal);
683  }
684 
685  while (have_prediction(pstack))
686  {
687  char top = pop_prediction(pstack);
688  td_entry entry;
689 
690  /*
691  * these first two branches are the guts of the Table Driven method
692  */
693  if (top == tok)
694  {
695  /*
696  * tok can only be a terminal symbol, so top must be too. the
697  * token matches the top of the stack, so get the next token.
698  */
699  if (tok < JSON_TOKEN_END)
700  {
701  result = json_lex(lex);
702  if (result != JSON_SUCCESS)
703  return result;
704  tok = lex_peek(lex);
705  }
706  }
707  else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
708  {
709  /*
710  * the token is in the director set for a production of the
711  * non-terminal at the top of the stack, so push the reversed RHS
712  * of the production onto the stack.
713  */
714  push_prediction(pstack, entry);
715  }
716  else if (IS_SEM(top))
717  {
718  /*
719  * top is a semantic action marker, so take action accordingly.
720  * It's important to have these markers in the prediction stack
721  * before any token they might need so we don't advance the token
722  * prematurely. Note in a couple of cases we need to do something
723  * both before and after the token.
724  */
725  switch (top)
726  {
727  case JSON_SEM_OSTART:
728  {
730 
731  if (lex->lex_level >= JSON_TD_MAX_STACK)
732  return JSON_NESTING_TOO_DEEP;
733 
734  if (ostart != NULL)
735  {
736  result = (*ostart) (sem->semstate);
737  if (result != JSON_SUCCESS)
738  return result;
739  }
740  inc_lex_level(lex);
741  }
742  break;
743  case JSON_SEM_OEND:
744  {
746 
747  dec_lex_level(lex);
748  if (oend != NULL)
749  {
750  result = (*oend) (sem->semstate);
751  if (result != JSON_SUCCESS)
752  return result;
753  }
754  }
755  break;
756  case JSON_SEM_ASTART:
757  {
759 
760  if (lex->lex_level >= JSON_TD_MAX_STACK)
761  return JSON_NESTING_TOO_DEEP;
762 
763  if (astart != NULL)
764  {
765  result = (*astart) (sem->semstate);
766  if (result != JSON_SUCCESS)
767  return result;
768  }
769  inc_lex_level(lex);
770  }
771  break;
772  case JSON_SEM_AEND:
773  {
775 
776  dec_lex_level(lex);
777  if (aend != NULL)
778  {
779  result = (*aend) (sem->semstate);
780  if (result != JSON_SUCCESS)
781  return result;
782  }
783  }
784  break;
786  {
787  /*
788  * all we do here is save out the field name. We have
789  * to wait to get past the ':' to see if the next
790  * value is null so we can call the semantic routine
791  */
792  char *fname = NULL;
795 
796  if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
797  {
798  fname = pstrdup(lex->strval->data);
799  }
800  set_fname(lex, fname);
801  }
802  break;
804  {
805  /*
806  * the current token should be the first token of the
807  * value
808  */
809  bool isnull = tok == JSON_TOKEN_NULL;
811 
812  set_fnull(lex, isnull);
813 
814  if (ostart != NULL)
815  {
816  char *fname = get_fname(lex);
817 
818  result = (*ostart) (sem->semstate, fname, isnull);
819  if (result != JSON_SUCCESS)
820  return result;
821  }
822  }
823  break;
824  case JSON_SEM_OFIELD_END:
825  {
827 
828  if (oend != NULL)
829  {
830  char *fname = get_fname(lex);
831  bool isnull = get_fnull(lex);
832 
833  result = (*oend) (sem->semstate, fname, isnull);
834  if (result != JSON_SUCCESS)
835  return result;
836  }
837  }
838  break;
840  {
842  bool isnull = tok == JSON_TOKEN_NULL;
843 
844  set_fnull(lex, isnull);
845 
846  if (astart != NULL)
847  {
848  result = (*astart) (sem->semstate, isnull);
849  if (result != JSON_SUCCESS)
850  return result;
851  }
852  }
853  break;
854  case JSON_SEM_AELEM_END:
855  {
857 
858  if (aend != NULL)
859  {
860  bool isnull = get_fnull(lex);
861 
862  result = (*aend) (sem->semstate, isnull);
863  if (result != JSON_SUCCESS)
864  return result;
865  }
866  }
867  break;
869  {
870  json_scalar_action sfunc = sem->scalar;
871 
872  pstack->scalar_val = NULL;
873 
874  if (sfunc != NULL)
875  {
876  /*
877  * extract the de-escaped string value, or the raw
878  * lexeme
879  */
880  /*
881  * XXX copied from RD parser but looks like a
882  * buglet
883  */
884  if (tok == JSON_TOKEN_STRING)
885  {
886  if (lex->strval != NULL)
887  pstack->scalar_val = pstrdup(lex->strval->data);
888  }
889  else
890  {
891  ptrdiff_t tlen = (lex->token_terminator - lex->token_start);
892 
893  pstack->scalar_val = palloc(tlen + 1);
894  memcpy(pstack->scalar_val, lex->token_start, tlen);
895  pstack->scalar_val[tlen] = '\0';
896  }
897  pstack->scalar_tok = tok;
898  }
899  }
900  break;
902  {
903  /*
904  * We'd like to be able to get rid of this business of
905  * two bits of scalar action, but we can't. It breaks
906  * certain semantic actions which expect that when
907  * called the lexer has consumed the item. See for
908  * example get_scalar() in jsonfuncs.c.
909  */
910  json_scalar_action sfunc = sem->scalar;
911 
912  if (sfunc != NULL)
913  {
914  result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
915  if (result != JSON_SUCCESS)
916  return result;
917  }
918  }
919  break;
920  default:
921  /* should not happen */
922  break;
923  }
924  }
925  else
926  {
927  /*
928  * The token didn't match the stack top if it's a terminal nor a
929  * production for the stack top if it's a non-terminal.
930  *
931  * Various cases here are Asserted to be not possible, as the
932  * token would not appear at the top of the prediction stack
933  * unless the lookahead matched.
934  */
935  switch (top)
936  {
937  case JSON_TOKEN_STRING:
938  if (next_prediction(pstack) == JSON_TOKEN_COLON)
939  ctx = JSON_PARSE_STRING;
940  else
941  {
942  Assert(false);
943  ctx = JSON_PARSE_VALUE;
944  }
945  break;
946  case JSON_TOKEN_NUMBER:
947  case JSON_TOKEN_TRUE:
948  case JSON_TOKEN_FALSE:
949  case JSON_TOKEN_NULL:
952  Assert(false);
953  ctx = JSON_PARSE_VALUE;
954  break;
956  Assert(false);
957  ctx = JSON_PARSE_ARRAY_NEXT;
958  break;
960  Assert(false);
962  break;
963  case JSON_TOKEN_COMMA:
964  Assert(false);
965  if (next_prediction(pstack) == JSON_TOKEN_STRING)
967  else
968  ctx = JSON_PARSE_ARRAY_NEXT;
969  break;
970  case JSON_TOKEN_COLON:
972  break;
973  case JSON_TOKEN_END:
974  ctx = JSON_PARSE_END;
975  break;
977  ctx = JSON_PARSE_ARRAY_NEXT;
978  break;
981  break;
984  break;
985  case JSON_NT_KEY_PAIRS:
987  break;
988  default:
989  ctx = JSON_PARSE_VALUE;
990  }
991  return report_parse_error(ctx, lex);
992  }
993  }
994 
995  return JSON_SUCCESS;
996 }
997 
998 /*
999  * Recursive Descent parse routines. There is one for each structural
1000  * element in a json document:
1001  * - scalar (string, number, true, false, null)
1002  * - array ( [ ] )
1003  * - array element
1004  * - object ( { } )
1005  * - object field
1006  */
1007 static inline JsonParseErrorType
1009 {
1010  char *val = NULL;
1011  json_scalar_action sfunc = sem->scalar;
1012  JsonTokenType tok = lex_peek(lex);
1013  JsonParseErrorType result;
1014 
1015  /* a scalar must be a string, a number, true, false, or null */
1016  if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
1017  tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
1018  tok != JSON_TOKEN_NULL)
1019  return report_parse_error(JSON_PARSE_VALUE, lex);
1020 
1021  /* if no semantic function, just consume the token */
1022  if (sfunc == NULL)
1023  return json_lex(lex);
1024 
1025  /* extract the de-escaped string value, or the raw lexeme */
1026  if (lex_peek(lex) == JSON_TOKEN_STRING)
1027  {
1028  if (lex->strval != NULL)
1029  val = pstrdup(lex->strval->data);
1030  }
1031  else
1032  {
1033  int len = (lex->token_terminator - lex->token_start);
1034 
1035  val = palloc(len + 1);
1036  memcpy(val, lex->token_start, len);
1037  val[len] = '\0';
1038  }
1039 
1040  /* consume the token */
1041  result = json_lex(lex);
1042  if (result != JSON_SUCCESS)
1043  return result;
1044 
1045  /* invoke the callback */
1046  result = (*sfunc) (sem->semstate, val, tok);
1047 
1048  return result;
1049 }
1050 
1053 {
1054  /*
1055  * An object field is "fieldname" : value where value can be a scalar,
1056  * object or array. Note: in user-facing docs and error messages, we
1057  * generally call a field name a "key".
1058  */
1059 
1060  char *fname = NULL; /* keep compiler quiet */
1063  bool isnull;
1064  JsonTokenType tok;
1065  JsonParseErrorType result;
1066 
1067  if (lex_peek(lex) != JSON_TOKEN_STRING)
1069  if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
1070  fname = pstrdup(lex->strval->data);
1071  result = json_lex(lex);
1072  if (result != JSON_SUCCESS)
1073  return result;
1074 
1076  if (result != JSON_SUCCESS)
1077  return result;
1078 
1079  tok = lex_peek(lex);
1080  isnull = tok == JSON_TOKEN_NULL;
1081 
1082  if (ostart != NULL)
1083  {
1084  result = (*ostart) (sem->semstate, fname, isnull);
1085  if (result != JSON_SUCCESS)
1086  return result;
1087  }
1088 
1089  switch (tok)
1090  {
1092  result = parse_object(lex, sem);
1093  break;
1095  result = parse_array(lex, sem);
1096  break;
1097  default:
1098  result = parse_scalar(lex, sem);
1099  }
1100  if (result != JSON_SUCCESS)
1101  return result;
1102 
1103  if (oend != NULL)
1104  {
1105  result = (*oend) (sem->semstate, fname, isnull);
1106  if (result != JSON_SUCCESS)
1107  return result;
1108  }
1109 
1110  return JSON_SUCCESS;
1111 }
1112 
1115 {
1116  /*
1117  * an object is a possibly empty sequence of object fields, separated by
1118  * commas and surrounded by curly braces.
1119  */
1122  JsonTokenType tok;
1123  JsonParseErrorType result;
1124 
1125 #ifndef FRONTEND
1127 #endif
1128 
1129  if (ostart != NULL)
1130  {
1131  result = (*ostart) (sem->semstate);
1132  if (result != JSON_SUCCESS)
1133  return result;
1134  }
1135 
1136  /*
1137  * Data inside an object is at a higher nesting level than the object
1138  * itself. Note that we increment this after we call the semantic routine
1139  * for the object start and restore it before we call the routine for the
1140  * object end.
1141  */
1142  lex->lex_level++;
1143 
1145  result = json_lex(lex);
1146  if (result != JSON_SUCCESS)
1147  return result;
1148 
1149  tok = lex_peek(lex);
1150  switch (tok)
1151  {
1152  case JSON_TOKEN_STRING:
1153  result = parse_object_field(lex, sem);
1154  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1155  {
1156  result = json_lex(lex);
1157  if (result != JSON_SUCCESS)
1158  break;
1159  result = parse_object_field(lex, sem);
1160  }
1161  break;
1162  case JSON_TOKEN_OBJECT_END:
1163  break;
1164  default:
1165  /* case of an invalid initial token inside the object */
1167  }
1168  if (result != JSON_SUCCESS)
1169  return result;
1170 
1172  if (result != JSON_SUCCESS)
1173  return result;
1174 
1175  lex->lex_level--;
1176 
1177  if (oend != NULL)
1178  {
1179  result = (*oend) (sem->semstate);
1180  if (result != JSON_SUCCESS)
1181  return result;
1182  }
1183 
1184  return JSON_SUCCESS;
1185 }
1186 
1189 {
1192  JsonTokenType tok = lex_peek(lex);
1193  JsonParseErrorType result;
1194  bool isnull;
1195 
1196  isnull = tok == JSON_TOKEN_NULL;
1197 
1198  if (astart != NULL)
1199  {
1200  result = (*astart) (sem->semstate, isnull);
1201  if (result != JSON_SUCCESS)
1202  return result;
1203  }
1204 
1205  /* an array element is any object, array or scalar */
1206  switch (tok)
1207  {
1209  result = parse_object(lex, sem);
1210  break;
1212  result = parse_array(lex, sem);
1213  break;
1214  default:
1215  result = parse_scalar(lex, sem);
1216  }
1217 
1218  if (result != JSON_SUCCESS)
1219  return result;
1220 
1221  if (aend != NULL)
1222  {
1223  result = (*aend) (sem->semstate, isnull);
1224  if (result != JSON_SUCCESS)
1225  return result;
1226  }
1227 
1228  return JSON_SUCCESS;
1229 }
1230 
1233 {
1234  /*
1235  * an array is a possibly empty sequence of array elements, separated by
1236  * commas and surrounded by square brackets.
1237  */
1240  JsonParseErrorType result;
1241 
1242 #ifndef FRONTEND
1244 #endif
1245 
1246  if (astart != NULL)
1247  {
1248  result = (*astart) (sem->semstate);
1249  if (result != JSON_SUCCESS)
1250  return result;
1251  }
1252 
1253  /*
1254  * Data inside an array is at a higher nesting level than the array
1255  * itself. Note that we increment this after we call the semantic routine
1256  * for the array start and restore it before we call the routine for the
1257  * array end.
1258  */
1259  lex->lex_level++;
1260 
1262  if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
1263  {
1264  result = parse_array_element(lex, sem);
1265 
1266  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1267  {
1268  result = json_lex(lex);
1269  if (result != JSON_SUCCESS)
1270  break;
1271  result = parse_array_element(lex, sem);
1272  }
1273  }
1274  if (result != JSON_SUCCESS)
1275  return result;
1276 
1278  if (result != JSON_SUCCESS)
1279  return result;
1280 
1281  lex->lex_level--;
1282 
1283  if (aend != NULL)
1284  {
1285  result = (*aend) (sem->semstate);
1286  if (result != JSON_SUCCESS)
1287  return result;
1288  }
1289 
1290  return JSON_SUCCESS;
1291 }
1292 
1293 /*
1294  * Lex one token from the input stream.
1295  *
1296  * When doing incremental parsing, we can reach the end of the input string
1297  * without having (or knowing we have) a complete token. If it's not the
1298  * final chunk of input, the partial token is then saved to the lex
1299  * structure's ptok StringInfo. On subsequent calls input is appended to this
1300  * buffer until we have something that we think is a complete token,
1301  * which is then lexed using a recursive call to json_lex. Processing then
1302  * continues as normal on subsequent calls.
1303  *
1304  * Note than when doing incremental processing, the lex.prev_token_terminator
1305  * should not be relied on. It could point into a previous input chunk or
1306  * worse.
1307  */
1310 {
1311  const char *s;
1312  const char *const end = lex->input + lex->input_length;
1313  JsonParseErrorType result;
1314 
1315  if (lex->incremental && lex->inc_state->partial_completed)
1316  {
1317  /*
1318  * We just lexed a completed partial token on the last call, so reset
1319  * everything
1320  */
1322  lex->token_terminator = lex->input;
1323  lex->inc_state->partial_completed = false;
1324  }
1325 
1326  s = lex->token_terminator;
1327 
1328  if (lex->incremental && lex->inc_state->partial_token.len)
1329  {
1330  /*
1331  * We have a partial token. Extend it and if completed lex it by a
1332  * recursive call
1333  */
1334  StringInfo ptok = &(lex->inc_state->partial_token);
1335  size_t added = 0;
1336  bool tok_done = false;
1337  JsonLexContext dummy_lex;
1338  JsonParseErrorType partial_result;
1339 
1340  if (ptok->data[0] == '"')
1341  {
1342  /*
1343  * It's a string. Accumulate characters until we reach an
1344  * unescaped '"'.
1345  */
1346  int escapes = 0;
1347 
1348  for (int i = ptok->len - 1; i > 0; i--)
1349  {
1350  /* count the trailing backslashes on the partial token */
1351  if (ptok->data[i] == '\\')
1352  escapes++;
1353  else
1354  break;
1355  }
1356 
1357  for (size_t i = 0; i < lex->input_length; i++)
1358  {
1359  char c = lex->input[i];
1360 
1362  added++;
1363  if (c == '"' && escapes % 2 == 0)
1364  {
1365  tok_done = true;
1366  break;
1367  }
1368  if (c == '\\')
1369  escapes++;
1370  else
1371  escapes = 0;
1372  }
1373  }
1374  else
1375  {
1376  /* not a string */
1377  char c = ptok->data[0];
1378 
1379  if (c == '-' || (c >= '0' && c <= '9'))
1380  {
1381  /* for numbers look for possible numeric continuations */
1382 
1383  bool numend = false;
1384 
1385  for (size_t i = 0; i < lex->input_length && !numend; i++)
1386  {
1387  char cc = lex->input[i];
1388 
1389  switch (cc)
1390  {
1391  case '+':
1392  case '-':
1393  case 'e':
1394  case 'E':
1395  case '0':
1396  case '1':
1397  case '2':
1398  case '3':
1399  case '4':
1400  case '5':
1401  case '6':
1402  case '7':
1403  case '8':
1404  case '9':
1405  {
1406  appendStringInfoCharMacro(ptok, cc);
1407  added++;
1408  }
1409  break;
1410  default:
1411  numend = true;
1412  }
1413  }
1414  }
1415 
1416  /*
1417  * Add any remaining alphanumeric chars. This takes care of the
1418  * {null, false, true} literals as well as any trailing
1419  * alphanumeric junk on non-string tokens.
1420  */
1421  for (size_t i = added; i < lex->input_length; i++)
1422  {
1423  char cc = lex->input[i];
1424 
1425  if (JSON_ALPHANUMERIC_CHAR(cc))
1426  {
1427  appendStringInfoCharMacro(ptok, cc);
1428  added++;
1429  }
1430  else
1431  {
1432  tok_done = true;
1433  break;
1434  }
1435  }
1436  if (added == lex->input_length &&
1437  lex->inc_state->is_last_chunk)
1438  {
1439  tok_done = true;
1440  }
1441  }
1442 
1443  if (!tok_done)
1444  {
1445  /* We should have consumed the whole chunk in this case. */
1446  Assert(added == lex->input_length);
1447 
1448  if (!lex->inc_state->is_last_chunk)
1449  return JSON_INCOMPLETE;
1450 
1451  /* json_errdetail() needs access to the accumulated token. */
1452  lex->token_start = ptok->data;
1453  lex->token_terminator = ptok->data + ptok->len;
1454  return JSON_INVALID_TOKEN;
1455  }
1456 
1457  /*
1458  * Everything up to lex->input[added] has been added to the partial
1459  * token, so move the input past it.
1460  */
1461  lex->input += added;
1462  lex->input_length -= added;
1463 
1464  dummy_lex.input = dummy_lex.token_terminator =
1465  dummy_lex.line_start = ptok->data;
1466  dummy_lex.line_number = lex->line_number;
1467  dummy_lex.input_length = ptok->len;
1468  dummy_lex.input_encoding = lex->input_encoding;
1469  dummy_lex.incremental = false;
1470  dummy_lex.strval = lex->strval;
1471 
1472  partial_result = json_lex(&dummy_lex);
1473 
1474  /*
1475  * We either have a complete token or an error. In either case we need
1476  * to point to the partial token data for the semantic or error
1477  * routines. If it's not an error we'll readjust on the next call to
1478  * json_lex.
1479  */
1480  lex->token_type = dummy_lex.token_type;
1481  lex->line_number = dummy_lex.line_number;
1482 
1483  /*
1484  * We know the prev_token_terminator must be back in some previous
1485  * piece of input, so we just make it NULL.
1486  */
1487  lex->prev_token_terminator = NULL;
1488 
1489  /*
1490  * Normally token_start would be ptok->data, but it could be later,
1491  * see json_lex_string's handling of invalid escapes.
1492  */
1493  lex->token_start = dummy_lex.token_start;
1494  lex->token_terminator = dummy_lex.token_terminator;
1495  if (partial_result == JSON_SUCCESS)
1496  {
1497  /* make sure we've used all the input */
1498  if (lex->token_terminator - lex->token_start != ptok->len)
1499  {
1500  Assert(false);
1501  return JSON_INVALID_TOKEN;
1502  }
1503 
1504  lex->inc_state->partial_completed = true;
1505  }
1506  return partial_result;
1507  /* end of partial token processing */
1508  }
1509 
1510  /* Skip leading whitespace. */
1511  while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
1512  {
1513  if (*s++ == '\n')
1514  {
1515  ++lex->line_number;
1516  lex->line_start = s;
1517  }
1518  }
1519  lex->token_start = s;
1520 
1521  /* Determine token type. */
1522  if (s >= end)
1523  {
1524  lex->token_start = NULL;
1526  lex->token_terminator = s;
1527  lex->token_type = JSON_TOKEN_END;
1528  }
1529  else
1530  {
1531  switch (*s)
1532  {
1533  /* Single-character token, some kind of punctuation mark. */
1534  case '{':
1536  lex->token_terminator = s + 1;
1538  break;
1539  case '}':
1541  lex->token_terminator = s + 1;
1543  break;
1544  case '[':
1546  lex->token_terminator = s + 1;
1548  break;
1549  case ']':
1551  lex->token_terminator = s + 1;
1553  break;
1554  case ',':
1556  lex->token_terminator = s + 1;
1558  break;
1559  case ':':
1561  lex->token_terminator = s + 1;
1563  break;
1564  case '"':
1565  /* string */
1566  result = json_lex_string(lex);
1567  if (result != JSON_SUCCESS)
1568  return result;
1570  break;
1571  case '-':
1572  /* Negative number. */
1573  result = json_lex_number(lex, s + 1, NULL, NULL);
1574  if (result != JSON_SUCCESS)
1575  return result;
1577  break;
1578  case '0':
1579  case '1':
1580  case '2':
1581  case '3':
1582  case '4':
1583  case '5':
1584  case '6':
1585  case '7':
1586  case '8':
1587  case '9':
1588  /* Positive number. */
1589  result = json_lex_number(lex, s, NULL, NULL);
1590  if (result != JSON_SUCCESS)
1591  return result;
1593  break;
1594  default:
1595  {
1596  const char *p;
1597 
1598  /*
1599  * We're not dealing with a string, number, legal
1600  * punctuation mark, or end of string. The only legal
1601  * tokens we might find here are true, false, and null,
1602  * but for error reporting purposes we scan until we see a
1603  * non-alphanumeric character. That way, we can report
1604  * the whole word as an unexpected token, rather than just
1605  * some unintuitive prefix thereof.
1606  */
1607  for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
1608  /* skip */ ;
1609 
1610  /*
1611  * We got some sort of unexpected punctuation or an
1612  * otherwise unexpected character, so just complain about
1613  * that one character.
1614  */
1615  if (p == s)
1616  {
1618  lex->token_terminator = s + 1;
1619  return JSON_INVALID_TOKEN;
1620  }
1621 
1622  if (lex->incremental && !lex->inc_state->is_last_chunk &&
1623  p == lex->input + lex->input_length)
1624  {
1626  &(lex->inc_state->partial_token), s, end - s);
1627  return JSON_INCOMPLETE;
1628  }
1629 
1630  /*
1631  * We've got a real alphanumeric token here. If it
1632  * happens to be true, false, or null, all is well. If
1633  * not, error out.
1634  */
1636  lex->token_terminator = p;
1637  if (p - s == 4)
1638  {
1639  if (memcmp(s, "true", 4) == 0)
1640  lex->token_type = JSON_TOKEN_TRUE;
1641  else if (memcmp(s, "null", 4) == 0)
1642  lex->token_type = JSON_TOKEN_NULL;
1643  else
1644  return JSON_INVALID_TOKEN;
1645  }
1646  else if (p - s == 5 && memcmp(s, "false", 5) == 0)
1648  else
1649  return JSON_INVALID_TOKEN;
1650  }
1651  } /* end of switch */
1652  }
1653 
1654  if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
1655  return JSON_INCOMPLETE;
1656  else
1657  return JSON_SUCCESS;
1658 }
1659 
1660 /*
1661  * The next token in the input stream is known to be a string; lex it.
1662  *
1663  * If lex->strval isn't NULL, fill it with the decoded string.
1664  * Set lex->token_terminator to the end of the decoded input, and in
1665  * success cases, transfer its previous value to lex->prev_token_terminator.
1666  * Return JSON_SUCCESS or an error code.
1667  *
1668  * Note: be careful that all error exits advance lex->token_terminator
1669  * to the point after the character we detected the error on.
1670  */
1671 static inline JsonParseErrorType
1673 {
1674  const char *s;
1675  const char *const end = lex->input + lex->input_length;
1676  int hi_surrogate = -1;
1677 
1678  /* Convenience macros for error exits */
1679 #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1680  do { \
1681  if (lex->incremental && !lex->inc_state->is_last_chunk) \
1682  { \
1683  appendBinaryStringInfo(&lex->inc_state->partial_token, \
1684  lex->token_start, end - lex->token_start); \
1685  return JSON_INCOMPLETE; \
1686  } \
1687  lex->token_terminator = s; \
1688  return code; \
1689  } while (0)
1690 #define FAIL_AT_CHAR_END(code) \
1691  do { \
1692  const char *term = s + pg_encoding_mblen(lex->input_encoding, s); \
1693  lex->token_terminator = (term <= end) ? term : end; \
1694  return code; \
1695  } while (0)
1696 
1697  if (lex->strval != NULL)
1698  resetStringInfo(lex->strval);
1699 
1700  Assert(lex->input_length > 0);
1701  s = lex->token_start;
1702  for (;;)
1703  {
1704  s++;
1705  /* Premature end of the string. */
1706  if (s >= end)
1708  else if (*s == '"')
1709  break;
1710  else if (*s == '\\')
1711  {
1712  /* OK, we have an escape character. */
1713  s++;
1714  if (s >= end)
1716  else if (*s == 'u')
1717  {
1718  int i;
1719  int ch = 0;
1720 
1721  for (i = 1; i <= 4; i++)
1722  {
1723  s++;
1724  if (s >= end)
1726  else if (*s >= '0' && *s <= '9')
1727  ch = (ch * 16) + (*s - '0');
1728  else if (*s >= 'a' && *s <= 'f')
1729  ch = (ch * 16) + (*s - 'a') + 10;
1730  else if (*s >= 'A' && *s <= 'F')
1731  ch = (ch * 16) + (*s - 'A') + 10;
1732  else
1734  }
1735  if (lex->strval != NULL)
1736  {
1737  /*
1738  * Combine surrogate pairs.
1739  */
1740  if (is_utf16_surrogate_first(ch))
1741  {
1742  if (hi_surrogate != -1)
1744  hi_surrogate = ch;
1745  continue;
1746  }
1747  else if (is_utf16_surrogate_second(ch))
1748  {
1749  if (hi_surrogate == -1)
1751  ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
1752  hi_surrogate = -1;
1753  }
1754 
1755  if (hi_surrogate != -1)
1757 
1758  /*
1759  * Reject invalid cases. We can't have a value above
1760  * 0xFFFF here (since we only accepted 4 hex digits
1761  * above), so no need to test for out-of-range chars.
1762  */
1763  if (ch == 0)
1764  {
1765  /* We can't allow this, since our TEXT type doesn't */
1767  }
1768 
1769  /*
1770  * Add the represented character to lex->strval. In the
1771  * backend, we can let pg_unicode_to_server_noerror()
1772  * handle any required character set conversion; in
1773  * frontend, we can only deal with trivial conversions.
1774  */
1775 #ifndef FRONTEND
1776  {
1777  char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
1778 
1779  if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
1781  appendStringInfoString(lex->strval, cbuf);
1782  }
1783 #else
1784  if (lex->input_encoding == PG_UTF8)
1785  {
1786  /* OK, we can map the code point to UTF8 easily */
1787  char utf8str[5];
1788  int utf8len;
1789 
1790  unicode_to_utf8(ch, (unsigned char *) utf8str);
1791  utf8len = pg_utf_mblen((unsigned char *) utf8str);
1792  appendBinaryStringInfo(lex->strval, utf8str, utf8len);
1793  }
1794  else if (ch <= 0x007f)
1795  {
1796  /* The ASCII range is the same in all encodings */
1797  appendStringInfoChar(lex->strval, (char) ch);
1798  }
1799  else
1801 #endif /* FRONTEND */
1802  }
1803  }
1804  else if (lex->strval != NULL)
1805  {
1806  if (hi_surrogate != -1)
1808 
1809  switch (*s)
1810  {
1811  case '"':
1812  case '\\':
1813  case '/':
1814  appendStringInfoChar(lex->strval, *s);
1815  break;
1816  case 'b':
1817  appendStringInfoChar(lex->strval, '\b');
1818  break;
1819  case 'f':
1820  appendStringInfoChar(lex->strval, '\f');
1821  break;
1822  case 'n':
1823  appendStringInfoChar(lex->strval, '\n');
1824  break;
1825  case 'r':
1826  appendStringInfoChar(lex->strval, '\r');
1827  break;
1828  case 't':
1829  appendStringInfoChar(lex->strval, '\t');
1830  break;
1831  default:
1832 
1833  /*
1834  * Not a valid string escape, so signal error. We
1835  * adjust token_start so that just the escape sequence
1836  * is reported, not the whole string.
1837  */
1838  lex->token_start = s;
1840  }
1841  }
1842  else if (strchr("\"\\/bfnrt", *s) == NULL)
1843  {
1844  /*
1845  * Simpler processing if we're not bothered about de-escaping
1846  *
1847  * It's very tempting to remove the strchr() call here and
1848  * replace it with a switch statement, but testing so far has
1849  * shown it's not a performance win.
1850  */
1851  lex->token_start = s;
1853  }
1854  }
1855  else
1856  {
1857  const char *p = s;
1858 
1859  if (hi_surrogate != -1)
1861 
1862  /*
1863  * Skip to the first byte that requires special handling, so we
1864  * can batch calls to appendBinaryStringInfo.
1865  */
1866  while (p < end - sizeof(Vector8) &&
1867  !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
1868  !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
1869  !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
1870  p += sizeof(Vector8);
1871 
1872  for (; p < end; p++)
1873  {
1874  if (*p == '\\' || *p == '"')
1875  break;
1876  else if ((unsigned char) *p <= 31)
1877  {
1878  /* Per RFC4627, these characters MUST be escaped. */
1879  /*
1880  * Since *p isn't printable, exclude it from the context
1881  * string
1882  */
1883  lex->token_terminator = p;
1884  return JSON_ESCAPING_REQUIRED;
1885  }
1886  }
1887 
1888  if (lex->strval != NULL)
1889  appendBinaryStringInfo(lex->strval, s, p - s);
1890 
1891  /*
1892  * s will be incremented at the top of the loop, so set it to just
1893  * behind our lookahead position
1894  */
1895  s = p - 1;
1896  }
1897  }
1898 
1899  if (hi_surrogate != -1)
1900  {
1901  lex->token_terminator = s + 1;
1903  }
1904 
1905  /* Hooray, we found the end of the string! */
1907  lex->token_terminator = s + 1;
1908  return JSON_SUCCESS;
1909 
1910 #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
1911 #undef FAIL_AT_CHAR_END
1912 }
1913 
1914 /*
1915  * The next token in the input stream is known to be a number; lex it.
1916  *
1917  * In JSON, a number consists of four parts:
1918  *
1919  * (1) An optional minus sign ('-').
1920  *
1921  * (2) Either a single '0', or a string of one or more digits that does not
1922  * begin with a '0'.
1923  *
1924  * (3) An optional decimal part, consisting of a period ('.') followed by
1925  * one or more digits. (Note: While this part can be omitted
1926  * completely, it's not OK to have only the decimal point without
1927  * any digits afterwards.)
1928  *
1929  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
1930  * followed by '+' or '-', followed by one or more digits. (Note:
1931  * As with the decimal part, if 'e' or 'E' is present, it must be
1932  * followed by at least one digit.)
1933  *
1934  * The 's' argument to this function points to the ostensible beginning
1935  * of part 2 - i.e. the character after any optional minus sign, or the
1936  * first character of the string if there is none.
1937  *
1938  * If num_err is not NULL, we return an error flag to *num_err rather than
1939  * raising an error for a badly-formed number. Also, if total_len is not NULL
1940  * the distance from lex->input to the token end+1 is returned to *total_len.
1941  */
1942 static inline JsonParseErrorType
1943 json_lex_number(JsonLexContext *lex, const char *s,
1944  bool *num_err, size_t *total_len)
1945 {
1946  bool error = false;
1947  int len = s - lex->input;
1948 
1949  /* Part (1): leading sign indicator. */
1950  /* Caller already did this for us; so do nothing. */
1951 
1952  /* Part (2): parse main digit string. */
1953  if (len < lex->input_length && *s == '0')
1954  {
1955  s++;
1956  len++;
1957  }
1958  else if (len < lex->input_length && *s >= '1' && *s <= '9')
1959  {
1960  do
1961  {
1962  s++;
1963  len++;
1964  } while (len < lex->input_length && *s >= '0' && *s <= '9');
1965  }
1966  else
1967  error = true;
1968 
1969  /* Part (3): parse optional decimal portion. */
1970  if (len < lex->input_length && *s == '.')
1971  {
1972  s++;
1973  len++;
1974  if (len == lex->input_length || *s < '0' || *s > '9')
1975  error = true;
1976  else
1977  {
1978  do
1979  {
1980  s++;
1981  len++;
1982  } while (len < lex->input_length && *s >= '0' && *s <= '9');
1983  }
1984  }
1985 
1986  /* Part (4): parse optional exponent. */
1987  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
1988  {
1989  s++;
1990  len++;
1991  if (len < lex->input_length && (*s == '+' || *s == '-'))
1992  {
1993  s++;
1994  len++;
1995  }
1996  if (len == lex->input_length || *s < '0' || *s > '9')
1997  error = true;
1998  else
1999  {
2000  do
2001  {
2002  s++;
2003  len++;
2004  } while (len < lex->input_length && *s >= '0' && *s <= '9');
2005  }
2006  }
2007 
2008  /*
2009  * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
2010  * here should be considered part of the token for error-reporting
2011  * purposes.
2012  */
2013  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
2014  error = true;
2015 
2016  if (total_len != NULL)
2017  *total_len = len;
2018 
2019  if (lex->incremental && !lex->inc_state->is_last_chunk &&
2020  len >= lex->input_length)
2021  {
2023  lex->token_start, s - lex->token_start);
2024  if (num_err != NULL)
2025  *num_err = error;
2026 
2027  return JSON_INCOMPLETE;
2028  }
2029  else if (num_err != NULL)
2030  {
2031  /* let the caller handle any error */
2032  *num_err = error;
2033  }
2034  else
2035  {
2036  /* return token endpoint */
2038  lex->token_terminator = s;
2039  /* handle error if any */
2040  if (error)
2041  return JSON_INVALID_TOKEN;
2042  }
2043 
2044  return JSON_SUCCESS;
2045 }
2046 
2047 /*
2048  * Report a parse error.
2049  *
2050  * lex->token_start and lex->token_terminator must identify the current token.
2051  */
2054 {
2055  /* Handle case where the input ended prematurely. */
2056  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
2057  return JSON_EXPECTED_MORE;
2058 
2059  /* Otherwise choose the error type based on the parsing context. */
2060  switch (ctx)
2061  {
2062  case JSON_PARSE_END:
2063  return JSON_EXPECTED_END;
2064  case JSON_PARSE_VALUE:
2065  return JSON_EXPECTED_JSON;
2066  case JSON_PARSE_STRING:
2067  return JSON_EXPECTED_STRING;
2070  case JSON_PARSE_ARRAY_NEXT:
2071  return JSON_EXPECTED_ARRAY_NEXT;
2075  return JSON_EXPECTED_COLON;
2079  return JSON_EXPECTED_STRING;
2080  }
2081 
2082  /*
2083  * We don't use a default: case, so that the compiler will warn about
2084  * unhandled enum values.
2085  */
2086  Assert(false);
2087  return JSON_SUCCESS; /* silence stupider compilers */
2088 }
2089 
2090 /*
2091  * Construct an (already translated) detail message for a JSON error.
2092  *
2093  * The returned pointer should not be freed, the allocation is either static
2094  * or owned by the JsonLexContext.
2095  */
2096 char *
2098 {
2099  if (lex->errormsg)
2100  resetStringInfo(lex->errormsg);
2101  else
2102  lex->errormsg = makeStringInfo();
2103 
2104  /*
2105  * A helper for error messages that should print the current token. The
2106  * format must contain exactly one %.*s specifier.
2107  */
2108 #define json_token_error(lex, format) \
2109  appendStringInfo((lex)->errormsg, _(format), \
2110  (int) ((lex)->token_terminator - (lex)->token_start), \
2111  (lex)->token_start);
2112 
2113  switch (error)
2114  {
2115  case JSON_INCOMPLETE:
2116  case JSON_SUCCESS:
2117  /* fall through to the error code after switch */
2118  break;
2120  if (lex->incremental)
2121  return _("Recursive descent parser cannot use incremental lexer.");
2122  else
2123  return _("Incremental parser requires incremental lexer.");
2124  case JSON_NESTING_TOO_DEEP:
2125  return (_("JSON nested too deep, maximum permitted depth is 6400."));
2126  case JSON_ESCAPING_INVALID:
2127  json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
2128  break;
2131  _("Character with value 0x%02x must be escaped."),
2132  (unsigned char) *(lex->token_terminator));
2133  break;
2134  case JSON_EXPECTED_END:
2135  json_token_error(lex, "Expected end of input, but found \"%.*s\".");
2136  break;
2138  json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
2139  break;
2141  json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
2142  break;
2143  case JSON_EXPECTED_COLON:
2144  json_token_error(lex, "Expected \":\", but found \"%.*s\".");
2145  break;
2146  case JSON_EXPECTED_JSON:
2147  json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
2148  break;
2149  case JSON_EXPECTED_MORE:
2150  return _("The input string ended unexpectedly.");
2152  json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
2153  break;
2155  json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
2156  break;
2157  case JSON_EXPECTED_STRING:
2158  json_token_error(lex, "Expected string, but found \"%.*s\".");
2159  break;
2160  case JSON_INVALID_TOKEN:
2161  json_token_error(lex, "Token \"%.*s\" is invalid.");
2162  break;
2164  return _("\\u0000 cannot be converted to text.");
2166  return _("\"\\u\" must be followed by four hexadecimal digits.");
2168  /* note: this case is only reachable in frontend not backend */
2169  return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2171 
2172  /*
2173  * Note: this case is only reachable in backend and not frontend.
2174  * #ifdef it away so the frontend doesn't try to link against
2175  * backend functionality.
2176  */
2177 #ifndef FRONTEND
2178  return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2180 #else
2181  Assert(false);
2182  break;
2183 #endif
2185  return _("Unicode high surrogate must not follow a high surrogate.");
2187  return _("Unicode low surrogate must follow a high surrogate.");
2189  /* fall through to the error code after switch */
2190  break;
2191  }
2192 #undef json_token_error
2193 
2194  /*
2195  * We don't use a default: case, so that the compiler will warn about
2196  * unhandled enum values. But this needs to be here anyway to cover the
2197  * possibility of an incorrect input.
2198  */
2199  if (lex->errormsg->len == 0)
2201  "unexpected json parse error type: %d",
2202  (int) error);
2203 
2204  return lex->errormsg->data;
2205 }
#define Assert(condition)
Definition: c.h:858
unsigned char uint8
Definition: c.h:504
#define _(x)
Definition: elog.c:90
const char * str
#define token
Definition: indent_globs.h:126
long val
Definition: informix.c:689
int i
Definition: isn.c:73
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, const JsonSemAction *sem, const char *json, size_t len, bool is_last)
Definition: jsonapi.c:649
#define JSON_TD_MAX_STACK
Definition: jsonapi.c:363
JsonParseContext
Definition: jsonapi.c:34
@ JSON_PARSE_OBJECT_LABEL
Definition: jsonapi.c:40
@ JSON_PARSE_VALUE
Definition: jsonapi.c:35
@ JSON_PARSE_OBJECT_START
Definition: jsonapi.c:39
@ JSON_PARSE_ARRAY_START
Definition: jsonapi.c:37
@ JSON_PARSE_END
Definition: jsonapi.c:43
@ JSON_PARSE_OBJECT_NEXT
Definition: jsonapi.c:41
@ JSON_PARSE_ARRAY_NEXT
Definition: jsonapi.c:38
@ JSON_PARSE_OBJECT_COMMA
Definition: jsonapi.c:42
@ JSON_PARSE_STRING
Definition: jsonapi.c:36
#define TD_ENTRY(PROD)
Definition: jsonapi.c:178
JsonParserSem
Definition: jsonapi.c:63
@ JSON_SEM_SCALAR_CALL
Definition: jsonapi.c:74
@ JSON_SEM_OSTART
Definition: jsonapi.c:64
@ JSON_SEM_AELEM_START
Definition: jsonapi.c:71
@ JSON_SEM_AELEM_END
Definition: jsonapi.c:72
@ JSON_SEM_SCALAR_INIT
Definition: jsonapi.c:73
@ JSON_SEM_ASTART
Definition: jsonapi.c:66
@ JSON_SEM_OFIELD_INIT
Definition: jsonapi.c:68
@ JSON_SEM_OFIELD_END
Definition: jsonapi.c:70
@ JSON_SEM_OEND
Definition: jsonapi.c:65
@ JSON_SEM_OFIELD_START
Definition: jsonapi.c:69
@ JSON_SEM_AEND
Definition: jsonapi.c:67
static void set_fnull(JsonLexContext *lex, bool fnull)
Definition: jsonapi.c:463
#define JSON_NUM_TERMINALS
Definition: jsonapi.c:112
static char JSON_PROD_MORE_KEY_PAIRS[]
Definition: jsonapi.c:158
bool IsValidJsonNumber(const char *str, size_t len)
Definition: jsonapi.c:272
static JsonParseErrorType json_lex_string(JsonLexContext *lex)
Definition: jsonapi.c:1671
static void inc_lex_level(JsonLexContext *lex)
Definition: jsonapi.c:397
#define JSON_ALPHANUMERIC_CHAR(c)
Definition: jsonapi.c:259
static char JSON_PROD_KEY_PAIRS[]
Definition: jsonapi.c:155
#define JSON_NUM_NONTERMINALS
Definition: jsonapi.c:113
#define JS_MAX_PROD_LEN
Definition: jsonapi.c:362
#define OFS(NT)
Definition: jsonapi.c:116
static char JSON_PROD_SCALAR_STRING[]
Definition: jsonapi.c:128
JsonParseErrorType pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:521
static char JSON_PROD_ARRAY_ELEMENTS[]
Definition: jsonapi.c:149
static bool have_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:445
static void set_fname(JsonLexContext *lex, char *fname)
Definition: jsonapi.c:451
static char JSON_PROD_SCALAR_NUMBER[]
Definition: jsonapi.c:131
#define json_token_error(lex, format)
static char next_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:438
static void push_prediction(JsonParserStack *pstack, td_entry entry)
Definition: jsonapi.c:424
#define IS_NT(x)
Definition: jsonapi.c:119
static char JSON_PROD_GOAL[]
Definition: jsonapi.c:211
static JsonTokenType lex_peek(JsonLexContext *lex)
Definition: jsonapi.c:238
static char JSON_PROD_EPSILON[]
Definition: jsonapi.c:125
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:2096
static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1113
#define JS_STACK_CHUNK_SIZE
Definition: jsonapi.c:361
static char JSON_PROD_SCALAR_NULL[]
Definition: jsonapi.c:140
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
Definition: jsonapi.c:2052
static JsonParseErrorType lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
Definition: jsonapi.c:250
static JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s, bool *num_err, size_t *total_len)
Definition: jsonapi.c:1942
static char JSON_PROD_MORE_ARRAY_ELEMENTS[]
Definition: jsonapi.c:152
const JsonSemAction nullSemAction
Definition: jsonapi.c:224
#define IS_SEM(x)
Definition: jsonapi.c:118
static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS]
Definition: jsonapi.c:180
static JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1007
static char * get_fname(JsonLexContext *lex)
Definition: jsonapi.c:457
static char pop_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:431
static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1051
static char JSON_PROD_SCALAR_FALSE[]
Definition: jsonapi.c:137
static bool get_fnull(JsonLexContext *lex)
Definition: jsonapi.c:469
JsonParseErrorType json_lex(JsonLexContext *lex)
Definition: jsonapi.c:1308
static char JSON_PROD_OBJECT[]
Definition: jsonapi.c:143
JsonLexContext * makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json, size_t len, int encoding, bool need_escapes)
Definition: jsonapi.c:326
JsonParseErrorType json_count_array_elements(JsonLexContext *lex, int *elements)
Definition: jsonapi.c:587
static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1231
static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1187
void freeJsonLexContext(JsonLexContext *lex)
Definition: jsonapi.c:482
JsonNonTerminal
Definition: jsonapi.c:54
@ JSON_NT_JSON
Definition: jsonapi.c:55
@ JSON_NT_MORE_ARRAY_ELEMENTS
Definition: jsonapi.c:57
@ JSON_NT_MORE_KEY_PAIRS
Definition: jsonapi.c:59
@ JSON_NT_ARRAY_ELEMENTS
Definition: jsonapi.c:56
@ JSON_NT_KEY_PAIRS
Definition: jsonapi.c:58
static char JSON_PROD_ARRAY[]
Definition: jsonapi.c:146
#define FAIL_OR_INCOMPLETE_AT_CHAR_START(code)
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
Definition: jsonapi.c:366
static char JSON_PROD_SCALAR_TRUE[]
Definition: jsonapi.c:134
#define FAIL_AT_CHAR_END(code)
static void dec_lex_level(JsonLexContext *lex)
Definition: jsonapi.c:418
JsonParseErrorType(* json_struct_action)(void *state)
Definition: jsonapi.h:109
JsonParseErrorType(* json_aelem_action)(void *state, bool isnull)
Definition: jsonapi.h:111
#define JSONLEX_FREE_STRVAL
Definition: jsonapi.h:88
JsonParseErrorType
Definition: jsonapi.h:37
@ JSON_SEM_ACTION_FAILED
Definition: jsonapi.h:60
@ JSON_EXPECTED_ARRAY_FIRST
Definition: jsonapi.h:44
@ JSON_EXPECTED_MORE
Definition: jsonapi.h:49
@ JSON_UNICODE_HIGH_SURROGATE
Definition: jsonapi.h:58
@ JSON_EXPECTED_COLON
Definition: jsonapi.h:46
@ JSON_EXPECTED_OBJECT_FIRST
Definition: jsonapi.h:50
@ JSON_UNICODE_CODE_POINT_ZERO
Definition: jsonapi.h:54
@ JSON_INVALID_LEXER_TYPE
Definition: jsonapi.h:40
@ JSON_EXPECTED_STRING
Definition: jsonapi.h:52
@ JSON_UNICODE_ESCAPE_FORMAT
Definition: jsonapi.h:55
@ JSON_SUCCESS
Definition: jsonapi.h:38
@ JSON_UNICODE_UNTRANSLATABLE
Definition: jsonapi.h:57
@ JSON_EXPECTED_OBJECT_NEXT
Definition: jsonapi.h:51
@ JSON_ESCAPING_REQUIRED
Definition: jsonapi.h:43
@ JSON_EXPECTED_JSON
Definition: jsonapi.h:48
@ JSON_INVALID_TOKEN
Definition: jsonapi.h:53
@ JSON_ESCAPING_INVALID
Definition: jsonapi.h:42
@ JSON_INCOMPLETE
Definition: jsonapi.h:39
@ JSON_EXPECTED_END
Definition: jsonapi.h:47
@ JSON_EXPECTED_ARRAY_NEXT
Definition: jsonapi.h:45
@ JSON_UNICODE_HIGH_ESCAPE
Definition: jsonapi.h:56
@ JSON_NESTING_TOO_DEEP
Definition: jsonapi.h:41
@ JSON_UNICODE_LOW_SURROGATE
Definition: jsonapi.h:59
JsonParseErrorType(* json_ofield_action)(void *state, char *fname, bool isnull)
Definition: jsonapi.h:110
#define JSONLEX_FREE_STRUCT
Definition: jsonapi.h:87
JsonTokenType
Definition: jsonapi.h:20
@ JSON_TOKEN_COMMA
Definition: jsonapi.h:28
@ JSON_TOKEN_FALSE
Definition: jsonapi.h:31
@ JSON_TOKEN_END
Definition: jsonapi.h:33
@ JSON_TOKEN_TRUE
Definition: jsonapi.h:30
@ JSON_TOKEN_OBJECT_END
Definition: jsonapi.h:25
@ JSON_TOKEN_NULL
Definition: jsonapi.h:32
@ JSON_TOKEN_ARRAY_END
Definition: jsonapi.h:27
@ JSON_TOKEN_OBJECT_START
Definition: jsonapi.h:24
@ JSON_TOKEN_NUMBER
Definition: jsonapi.h:23
@ JSON_TOKEN_STRING
Definition: jsonapi.h:22
@ JSON_TOKEN_COLON
Definition: jsonapi.h:29
@ JSON_TOKEN_ARRAY_START
Definition: jsonapi.h:26
JsonParseErrorType(* json_scalar_action)(void *state, char *token, JsonTokenType tokentype)
Definition: jsonapi.h:112
bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
Definition: mbutils.c:926
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1267
char * pstrdup(const char *in)
Definition: mcxt.c:1696
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
const void size_t len
int32 encoding
Definition: pg_database.h:41
static bool pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:58
static bool pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:26
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_UTF8
Definition: pg_wchar.h:232
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:537
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:525
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:531
void check_stack_depth(void)
Definition: postgres.c:3540
char * c
char * psprintf(const char *fmt,...)
Definition: psprintf.c:46
uint64 Vector8
Definition: simd.h:60
static void error(void)
Definition: sql-dyntest.c:147
void destroyStringInfo(StringInfo str)
Definition: stringinfo.c:361
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:78
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:194
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:204
StringInfoData partial_token
Definition: jsonapi.c:106
bits32 flags
Definition: jsonapi.h:100
int input_encoding
Definition: jsonapi.h:93
StringInfo strval
Definition: jsonapi.h:105
const char * prev_token_terminator
Definition: jsonapi.h:96
const char * input
Definition: jsonapi.h:91
const char * token_start
Definition: jsonapi.h:94
int lex_level
Definition: jsonapi.h:99
JsonParserStack * pstack
Definition: jsonapi.h:103
size_t input_length
Definition: jsonapi.h:92
StringInfo errormsg
Definition: jsonapi.h:106
JsonIncrementalState * inc_state
Definition: jsonapi.h:104
bool incremental
Definition: jsonapi.h:97
const char * line_start
Definition: jsonapi.h:102
int line_number
Definition: jsonapi.h:101
JsonTokenType token_type
Definition: jsonapi.h:98
const char * token_terminator
Definition: jsonapi.h:95
int stack_size
Definition: jsonapi.c:86
bool * fnull
Definition: jsonapi.c:91
JsonTokenType scalar_tok
Definition: jsonapi.c:92
char * prediction
Definition: jsonapi.c:87
size_t pred_index
Definition: jsonapi.c:88
char * scalar_val
Definition: jsonapi.c:93
char ** fnames
Definition: jsonapi.c:90
json_struct_action array_end
Definition: jsonapi.h:138
json_struct_action object_start
Definition: jsonapi.h:135
json_ofield_action object_field_start
Definition: jsonapi.h:139
json_aelem_action array_element_start
Definition: jsonapi.h:141
json_scalar_action scalar
Definition: jsonapi.h:143
void * semstate
Definition: jsonapi.h:134
json_aelem_action array_element_end
Definition: jsonapi.h:142
json_struct_action array_start
Definition: jsonapi.h:137
json_struct_action object_end
Definition: jsonapi.h:136
json_ofield_action object_field_end
Definition: jsonapi.h:140
Definition: jsonapi.c:173
char * prod
Definition: jsonapi.c:175
size_t len
Definition: jsonapi.c:174
static JsonSemAction sem