PostgreSQL Source Code  git master
jsonapi.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * jsonapi.c
4  * JSON parser and lexer interfaces
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/common/jsonapi.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef FRONTEND
15 #include "postgres.h"
16 #else
17 #include "postgres_fe.h"
18 #endif
19 
20 #include "common/jsonapi.h"
21 #include "mb/pg_wchar.h"
22 #include "port/pg_lfind.h"
23 
24 #ifdef JSONAPI_USE_PQEXPBUFFER
25 #include "pqexpbuffer.h"
26 #else
27 #include "lib/stringinfo.h"
28 #include "miscadmin.h"
29 #endif
30 
31 /*
32  * By default, we will use palloc/pfree along with StringInfo. In libpq,
33  * use malloc and PQExpBuffer, and return JSON_OUT_OF_MEMORY on out-of-memory.
34  */
35 #ifdef JSONAPI_USE_PQEXPBUFFER
36 
37 #define STRDUP(s) strdup(s)
38 #define ALLOC(size) malloc(size)
39 #define ALLOC0(size) calloc(1, size)
40 #define REALLOC realloc
41 #define FREE(s) free(s)
42 
43 #define jsonapi_appendStringInfo appendPQExpBuffer
44 #define jsonapi_appendBinaryStringInfo appendBinaryPQExpBuffer
45 #define jsonapi_appendStringInfoChar appendPQExpBufferChar
46 /* XXX should we add a macro version to PQExpBuffer? */
47 #define jsonapi_appendStringInfoCharMacro appendPQExpBufferChar
48 #define jsonapi_makeStringInfo createPQExpBuffer
49 #define jsonapi_initStringInfo initPQExpBuffer
50 #define jsonapi_resetStringInfo resetPQExpBuffer
51 #define jsonapi_termStringInfo termPQExpBuffer
52 #define jsonapi_destroyStringInfo destroyPQExpBuffer
53 
54 #else /* !JSONAPI_USE_PQEXPBUFFER */
55 
56 #define STRDUP(s) pstrdup(s)
57 #define ALLOC(size) palloc(size)
58 #define ALLOC0(size) palloc0(size)
59 #define REALLOC repalloc
60 
61 #ifdef FRONTEND
62 #define FREE pfree
63 #else
64 /*
65  * Backend pfree() doesn't handle NULL pointers like the frontend's does; smooth
66  * that over to reduce mental gymnastics. Avoid multiple evaluation of the macro
67  * argument to avoid future hair-pulling.
68  */
69 #define FREE(s) do { \
70  void *__v = (s); \
71  if (__v) \
72  pfree(__v); \
73 } while (0)
74 #endif
75 
76 #define jsonapi_appendStringInfo appendStringInfo
77 #define jsonapi_appendBinaryStringInfo appendBinaryStringInfo
78 #define jsonapi_appendStringInfoChar appendStringInfoChar
79 #define jsonapi_appendStringInfoCharMacro appendStringInfoCharMacro
80 #define jsonapi_makeStringInfo makeStringInfo
81 #define jsonapi_initStringInfo initStringInfo
82 #define jsonapi_resetStringInfo resetStringInfo
83 #define jsonapi_termStringInfo(s) pfree((s)->data)
84 #define jsonapi_destroyStringInfo destroyStringInfo
85 
86 #endif /* JSONAPI_USE_PQEXPBUFFER */
87 
88 /*
89  * The context of the parser is maintained by the recursive descent
90  * mechanism, but is passed explicitly to the error reporting routine
91  * for better diagnostics.
92  */
93 typedef enum /* contexts of JSON parser */
94 {
95  JSON_PARSE_VALUE, /* expecting a value */
96  JSON_PARSE_STRING, /* expecting a string (for a field name) */
97  JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
98  JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
99  JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
100  JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
101  JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
102  JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
103  JSON_PARSE_END, /* saw the end of a document, expect nothing */
105 
106 /*
107  * Setup for table-driven parser.
108  * These enums need to be separate from the JsonTokenType and from each other
109  * so we can have all of them on the prediction stack, which consists of
110  * tokens, non-terminals, and semantic action markers.
111  */
112 
114 {
120 };
121 
123 {
135 };
136 
137 /*
138  * struct containing the 3 stacks used in non-recursive parsing,
139  * and the token and value for scalars that need to be preserved
140  * across calls.
141  *
142  * typedef appears in jsonapi.h
143  */
145 {
147  char *prediction;
148  size_t pred_index;
149  /* these two are indexed by lex_level */
150  char **fnames;
151  bool *fnull;
153  char *scalar_val;
154 };
155 
156 /*
157  * struct containing state used when there is a possible partial token at the
158  * end of a json chunk when we are doing incremental parsing.
159  *
160  * typedef appears in jsonapi.h
161  */
163 {
164  bool started;
168 };
169 
170 /*
171  * constants and macros used in the nonrecursive parser
172  */
173 #define JSON_NUM_TERMINALS 13
174 #define JSON_NUM_NONTERMINALS 5
175 #define JSON_NT_OFFSET JSON_NT_JSON
176 /* for indexing the table */
177 #define OFS(NT) (NT) - JSON_NT_OFFSET
178 /* classify items we get off the stack */
179 #define IS_SEM(x) ((x) & 0x40)
180 #define IS_NT(x) ((x) & 0x20)
181 
182 /*
183  * These productions are stored in reverse order right to left so that when
184  * they are pushed on the stack what we expect next is at the top of the stack.
185  */
186 static char JSON_PROD_EPSILON[] = {0}; /* epsilon - an empty production */
187 
188 /* JSON -> string */
190 
191 /* JSON -> number */
193 
194 /* JSON -> 'true' */
196 
197 /* JSON -> 'false' */
199 
200 /* JSON -> 'null' */
202 
203 /* JSON -> '{' KEY_PAIRS '}' */
205 
206 /* JSON -> '[' ARRAY_ELEMENTS ']' */
208 
209 /* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
211 
212 /* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
214 
215 /* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
217 
218 /* MORE_KEY_PAIRS -> ',' string ':' JSON MORE_KEY_PAIRS */
220 
221 /*
222  * Note: there are also epsilon productions for ARRAY_ELEMENTS,
223  * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
224  * They are all the same as none require any semantic actions.
225  */
226 
227 /*
228  * Table connecting the productions with their director sets of
229  * terminal symbols.
230  * Any combination not specified here represents an error.
231  */
232 
233 typedef struct
234 {
235  size_t len;
236  char *prod;
237 } td_entry;
238 
239 #define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
240 
242 {
243  /* JSON */
251  /* ARRAY_ELEMENTS */
260  /* MORE_ARRAY_ELEMENTS */
263  /* KEY_PAIRS */
266  /* MORE_KEY_PAIRS */
269 };
270 
271 /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
273 
275 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s,
276  bool *num_err, size_t *total_len);
284 static inline void set_fname(JsonLexContext *lex, char *fname);
285 
286 /* the null action object used for pure validation */
288 {
289  NULL, NULL, NULL, NULL, NULL,
290  NULL, NULL, NULL, NULL, NULL
291 };
292 
293 /* sentinels used for out-of-memory conditions */
296 
297 /* Parser support routines */
298 
299 /*
300  * lex_peek
301  *
302  * what is the current look_ahead token?
303 */
304 static inline JsonTokenType
306 {
307  return lex->token_type;
308 }
309 
310 /*
311  * lex_expect
312  *
313  * move the lexer to the next token if the current look_ahead token matches
314  * the parameter token. Otherwise, report an error.
315  */
316 static inline JsonParseErrorType
318 {
319  if (lex_peek(lex) == token)
320  return json_lex(lex);
321  else
322  return report_parse_error(ctx, lex);
323 }
324 
325 /* chars to consider as part of an alphanumeric token */
326 #define JSON_ALPHANUMERIC_CHAR(c) \
327  (((c) >= 'a' && (c) <= 'z') || \
328  ((c) >= 'A' && (c) <= 'Z') || \
329  ((c) >= '0' && (c) <= '9') || \
330  (c) == '_' || \
331  IS_HIGHBIT_SET(c))
332 
333 /*
334  * Utility function to check if a string is a valid JSON number.
335  *
336  * str is of length len, and need not be null-terminated.
337  */
338 bool
339 IsValidJsonNumber(const char *str, size_t len)
340 {
341  bool numeric_error;
342  size_t total_len;
343  JsonLexContext dummy_lex = {0};
344 
345  if (len <= 0)
346  return false;
347 
348  /*
349  * json_lex_number expects a leading '-' to have been eaten already.
350  *
351  * having to cast away the constness of str is ugly, but there's not much
352  * easy alternative.
353  */
354  if (*str == '-')
355  {
356  dummy_lex.input = str + 1;
357  dummy_lex.input_length = len - 1;
358  }
359  else
360  {
361  dummy_lex.input = str;
362  dummy_lex.input_length = len;
363  }
364 
365  dummy_lex.token_start = dummy_lex.input;
366 
367  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
368 
369  return (!numeric_error) && (total_len == dummy_lex.input_length);
370 }
371 
372 /*
373  * makeJsonLexContextCstringLen
374  * Initialize the given JsonLexContext object, or create one
375  *
376  * If a valid 'lex' pointer is given, it is initialized. This can
377  * be used for stack-allocated structs, saving overhead. If NULL is
378  * given, a new struct is allocated.
379  *
380  * If need_escapes is true, ->strval stores the unescaped lexemes.
381  * Unescaping is expensive, so only request it when necessary.
382  *
383  * If need_escapes is true or lex was given as NULL, then caller is
384  * responsible for freeing the returned struct, either by calling
385  * freeJsonLexContext() or (in backend environment) via memory context
386  * cleanup.
387  *
388  * In shlib code, any out-of-memory failures will be deferred to time
389  * of use; this function is guaranteed to return a valid JsonLexContext.
390  */
393  size_t len, int encoding, bool need_escapes)
394 {
395  if (lex == NULL)
396  {
397  lex = ALLOC0(sizeof(JsonLexContext));
398  if (!lex)
399  return &failed_oom;
400  lex->flags |= JSONLEX_FREE_STRUCT;
401  }
402  else
403  memset(lex, 0, sizeof(JsonLexContext));
404 
405  lex->errormsg = NULL;
406  lex->input = lex->token_terminator = lex->line_start = json;
407  lex->line_number = 1;
408  lex->input_length = len;
409  lex->input_encoding = encoding;
410  lex->need_escapes = need_escapes;
411  if (need_escapes)
412  {
413  /*
414  * This call can fail in shlib code. We defer error handling to time
415  * of use (json_lex_string()) since we might not need to parse any
416  * strings anyway.
417  */
419  lex->flags |= JSONLEX_FREE_STRVAL;
420  }
421 
422  return lex;
423 }
424 
425 /*
426  * Allocates the internal bookkeeping structures for incremental parsing. This
427  * can only fail in-band with shlib code.
428  */
429 #define JS_STACK_CHUNK_SIZE 64
430 #define JS_MAX_PROD_LEN 10 /* more than we need */
431 #define JSON_TD_MAX_STACK 6400 /* hard coded for now - this is a REALLY high
432  * number */
433 static bool
435 {
436  void *pstack,
437  *prediction,
438  *fnames,
439  *fnull;
440 
441  lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
442  pstack = ALLOC0(sizeof(JsonParserStack));
443  prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
444  fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
445  fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
446 
447 #ifdef JSONAPI_USE_PQEXPBUFFER
448  if (!lex->inc_state
449  || !pstack
450  || !prediction
451  || !fnames
452  || !fnull)
453  {
454  FREE(lex->inc_state);
455  FREE(pstack);
456  FREE(prediction);
457  FREE(fnames);
458  FREE(fnull);
459 
460  lex->inc_state = &failed_inc_oom;
461  return false;
462  }
463 #endif
464 
466  lex->pstack = pstack;
468  lex->pstack->prediction = prediction;
469  lex->pstack->fnames = fnames;
470  lex->pstack->fnull = fnull;
471 
472  /*
473  * fnames between 0 and lex_level must always be defined so that
474  * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
475  * the rest.
476  */
477  Assert(lex->lex_level == 0);
478  lex->pstack->fnames[0] = NULL;
479 
480  lex->incremental = true;
481  return true;
482 }
483 
484 
485 /*
486  * makeJsonLexContextIncremental
487  *
488  * Similar to above but set up for use in incremental parsing. That means we
489  * need explicit stacks for predictions, field names and null indicators, but
490  * we don't need the input, that will be handed in bit by bit to the
491  * parse routine. We also need an accumulator for partial tokens in case
492  * the boundary between chunks happens to fall in the middle of a token.
493  *
494  * In shlib code, any out-of-memory failures will be deferred to time of use;
495  * this function is guaranteed to return a valid JsonLexContext.
496  */
499  bool need_escapes)
500 {
501  if (lex == NULL)
502  {
503  lex = ALLOC0(sizeof(JsonLexContext));
504  if (!lex)
505  return &failed_oom;
506 
507  lex->flags |= JSONLEX_FREE_STRUCT;
508  }
509  else
510  memset(lex, 0, sizeof(JsonLexContext));
511 
512  lex->line_number = 1;
513  lex->input_encoding = encoding;
514 
515  if (!allocate_incremental_state(lex))
516  {
517  if (lex->flags & JSONLEX_FREE_STRUCT)
518  {
519  FREE(lex);
520  return &failed_oom;
521  }
522 
523  /* lex->inc_state tracks the OOM failure; we can return here. */
524  return lex;
525  }
526 
527  lex->need_escapes = need_escapes;
528  if (need_escapes)
529  {
530  /*
531  * This call can fail in shlib code. We defer error handling to time
532  * of use (json_lex_string()) since we might not need to parse any
533  * strings anyway.
534  */
536  lex->flags |= JSONLEX_FREE_STRVAL;
537  }
538 
539  return lex;
540 }
541 
542 void
543 setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
544 {
545  if (lex->incremental && lex->inc_state->started)
546  {
547  /*
548  * Switching this flag after parsing has already started is a
549  * programming error.
550  */
551  Assert(false);
552  return;
553  }
554 
555  if (owned_by_context)
557  else
559 }
560 
561 static inline bool
563 {
564  if (lex->incremental && (lex->lex_level + 1) >= lex->pstack->stack_size)
565  {
566  size_t new_stack_size;
567  char *new_prediction;
568  char **new_fnames;
569  bool *new_fnull;
570 
571  new_stack_size = lex->pstack->stack_size + JS_STACK_CHUNK_SIZE;
572 
573  new_prediction = REALLOC(lex->pstack->prediction,
574  new_stack_size * JS_MAX_PROD_LEN);
575 #ifdef JSONAPI_USE_PQEXPBUFFER
576  if (!new_prediction)
577  return false;
578 #endif
579  lex->pstack->prediction = new_prediction;
580 
581  new_fnames = REALLOC(lex->pstack->fnames,
582  new_stack_size * sizeof(char *));
583 #ifdef JSONAPI_USE_PQEXPBUFFER
584  if (!new_fnames)
585  return false;
586 #endif
587  lex->pstack->fnames = new_fnames;
588 
589  new_fnull = REALLOC(lex->pstack->fnull, new_stack_size * sizeof(bool));
590 #ifdef JSONAPI_USE_PQEXPBUFFER
591  if (!new_fnull)
592  return false;
593 #endif
594  lex->pstack->fnull = new_fnull;
595 
596  lex->pstack->stack_size = new_stack_size;
597  }
598 
599  lex->lex_level += 1;
600 
601  if (lex->incremental)
602  {
603  /*
604  * Ensure freeJsonLexContext() remains safe even if no fname is
605  * assigned at this level.
606  */
607  lex->pstack->fnames[lex->lex_level] = NULL;
608  }
609 
610  return true;
611 }
612 
613 static inline void
615 {
616  set_fname(lex, NULL); /* free the current level's fname, if needed */
617  lex->lex_level -= 1;
618 }
619 
620 static inline void
622 {
623  memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
624  pstack->pred_index += entry.len;
625 }
626 
627 static inline char
629 {
630  Assert(pstack->pred_index > 0);
631  return pstack->prediction[--pstack->pred_index];
632 }
633 
634 static inline char
636 {
637  Assert(pstack->pred_index > 0);
638  return pstack->prediction[pstack->pred_index - 1];
639 }
640 
641 static inline bool
643 {
644  return pstack->pred_index > 0;
645 }
646 
647 static inline void
648 set_fname(JsonLexContext *lex, char *fname)
649 {
650  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
651  {
652  /*
653  * Don't leak prior fnames. If one hasn't been assigned yet,
654  * inc_lex_level ensured that it's NULL (and therefore safe to free).
655  */
656  FREE(lex->pstack->fnames[lex->lex_level]);
657  }
658 
659  lex->pstack->fnames[lex->lex_level] = fname;
660 }
661 
662 static inline char *
664 {
665  return lex->pstack->fnames[lex->lex_level];
666 }
667 
668 static inline void
669 set_fnull(JsonLexContext *lex, bool fnull)
670 {
671  lex->pstack->fnull[lex->lex_level] = fnull;
672 }
673 
674 static inline bool
676 {
677  return lex->pstack->fnull[lex->lex_level];
678 }
679 
680 /*
681  * Free memory in a JsonLexContext.
682  *
683  * There's no need for this if a *lex pointer was given when the object was
684  * made, need_escapes was false, and json_errdetail() was not called; or if (in
685  * backend environment) a memory context delete/reset is imminent.
686  */
687 void
689 {
690  static const JsonLexContext empty = {0};
691 
692  if (!lex || lex == &failed_oom)
693  return;
694 
695  if (lex->flags & JSONLEX_FREE_STRVAL)
697 
698  if (lex->errormsg)
700 
701  if (lex->incremental)
702  {
704  FREE(lex->inc_state);
705  FREE(lex->pstack->prediction);
706 
707  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
708  {
709  int i;
710 
711  /* Clean up any tokens that were left behind. */
712  for (i = 0; i <= lex->lex_level; i++)
713  FREE(lex->pstack->fnames[i]);
714  }
715 
716  FREE(lex->pstack->fnames);
717  FREE(lex->pstack->fnull);
718  FREE(lex->pstack->scalar_val);
719  FREE(lex->pstack);
720  }
721 
722  if (lex->flags & JSONLEX_FREE_STRUCT)
723  FREE(lex);
724  else
725  *lex = empty;
726 }
727 
728 /*
729  * pg_parse_json
730  *
731  * Publicly visible entry point for the JSON parser.
732  *
733  * lex is a lexing context, set up for the json to be processed by calling
734  * makeJsonLexContext(). sem is a structure of function pointers to semantic
735  * action routines to be called at appropriate spots during parsing, and a
736  * pointer to a state object to be passed to those routines.
737  *
738  * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
739  * JSON parser. This is a useful way to validate that it's doing the right
740  * thing at least for non-incremental cases. If this is on we expect to see
741  * regression diffs relating to error messages about stack depth, but no
742  * other differences.
743  */
746 {
747 #ifdef FORCE_JSON_PSTACK
748  /*
749  * We don't need partial token processing, there is only one chunk. But we
750  * still need to init the partial token string so that freeJsonLexContext
751  * works, so perform the full incremental initialization.
752  */
753  if (!allocate_incremental_state(lex))
754  return JSON_OUT_OF_MEMORY;
755 
756  return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
757 
758 #else
759 
760  JsonTokenType tok;
761  JsonParseErrorType result;
762 
763  if (lex == &failed_oom)
764  return JSON_OUT_OF_MEMORY;
765  if (lex->incremental)
767 
768  /* get the initial token */
769  result = json_lex(lex);
770  if (result != JSON_SUCCESS)
771  return result;
772 
773  tok = lex_peek(lex);
774 
775  /* parse by recursive descent */
776  switch (tok)
777  {
779  result = parse_object(lex, sem);
780  break;
782  result = parse_array(lex, sem);
783  break;
784  default:
785  result = parse_scalar(lex, sem); /* json can be a bare scalar */
786  }
787 
788  if (result == JSON_SUCCESS)
789  result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
790 
791  return result;
792 #endif
793 }
794 
795 /*
796  * json_count_array_elements
797  *
798  * Returns number of array elements in lex context at start of array token
799  * until end of array token at same nesting level.
800  *
801  * Designed to be called from array_start routines.
802  */
804 json_count_array_elements(JsonLexContext *lex, int *elements)
805 {
806  JsonLexContext copylex;
807  int count;
808  JsonParseErrorType result;
809 
810  if (lex == &failed_oom)
811  return JSON_OUT_OF_MEMORY;
812 
813  /*
814  * It's safe to do this with a shallow copy because the lexical routines
815  * don't scribble on the input. They do scribble on the other pointers
816  * etc, so doing this with a copy makes that safe.
817  */
818  memcpy(&copylex, lex, sizeof(JsonLexContext));
819  copylex.need_escapes = false; /* not interested in values here */
820  copylex.lex_level++;
821 
822  count = 0;
823  result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
825  if (result != JSON_SUCCESS)
826  return result;
827  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
828  {
829  while (1)
830  {
831  count++;
832  result = parse_array_element(&copylex, &nullSemAction);
833  if (result != JSON_SUCCESS)
834  return result;
835  if (copylex.token_type != JSON_TOKEN_COMMA)
836  break;
837  result = json_lex(&copylex);
838  if (result != JSON_SUCCESS)
839  return result;
840  }
841  }
842  result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
844  if (result != JSON_SUCCESS)
845  return result;
846 
847  *elements = count;
848  return JSON_SUCCESS;
849 }
850 
851 /*
852  * pg_parse_json_incremental
853  *
854  * Routine for incremental parsing of json. This uses the non-recursive top
855  * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
856  * the Recursive Descent pattern used above, so we only use it for incremental
857  * parsing of JSON.
858  *
859  * The lexing context needs to be set up by a call to
860  * makeJsonLexContextIncremental(). sem is a structure of function pointers
861  * to semantic action routines, which should function exactly as those used
862  * in the recursive descent parser.
863  *
864  * This routine can be called repeatedly with chunks of JSON. On the final
865  * chunk is_last must be set to true. len is the length of the json chunk,
866  * which does not need to be null terminated.
867  */
870  const JsonSemAction *sem,
871  const char *json,
872  size_t len,
873  bool is_last)
874 {
875  JsonTokenType tok;
876  JsonParseErrorType result;
878  JsonParserStack *pstack = lex->pstack;
879 
880  if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
881  return JSON_OUT_OF_MEMORY;
882  if (!lex->incremental)
884 
885  lex->input = lex->token_terminator = lex->line_start = json;
886  lex->input_length = len;
887  lex->inc_state->is_last_chunk = is_last;
888  lex->inc_state->started = true;
889 
890  /* get the initial token */
891  result = json_lex(lex);
892  if (result != JSON_SUCCESS)
893  return result;
894 
895  tok = lex_peek(lex);
896 
897  /* use prediction stack for incremental parsing */
898 
899  if (!have_prediction(pstack))
900  {
902 
903  push_prediction(pstack, goal);
904  }
905 
906  while (have_prediction(pstack))
907  {
908  char top = pop_prediction(pstack);
909  td_entry entry;
910 
911  /*
912  * these first two branches are the guts of the Table Driven method
913  */
914  if (top == tok)
915  {
916  /*
917  * tok can only be a terminal symbol, so top must be too. the
918  * token matches the top of the stack, so get the next token.
919  */
920  if (tok < JSON_TOKEN_END)
921  {
922  result = json_lex(lex);
923  if (result != JSON_SUCCESS)
924  return result;
925  tok = lex_peek(lex);
926  }
927  }
928  else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
929  {
930  /*
931  * the token is in the director set for a production of the
932  * non-terminal at the top of the stack, so push the reversed RHS
933  * of the production onto the stack.
934  */
935  push_prediction(pstack, entry);
936  }
937  else if (IS_SEM(top))
938  {
939  /*
940  * top is a semantic action marker, so take action accordingly.
941  * It's important to have these markers in the prediction stack
942  * before any token they might need so we don't advance the token
943  * prematurely. Note in a couple of cases we need to do something
944  * both before and after the token.
945  */
946  switch (top)
947  {
948  case JSON_SEM_OSTART:
949  {
951 
952  if (lex->lex_level >= JSON_TD_MAX_STACK)
953  return JSON_NESTING_TOO_DEEP;
954 
955  if (ostart != NULL)
956  {
957  result = (*ostart) (sem->semstate);
958  if (result != JSON_SUCCESS)
959  return result;
960  }
961 
962  if (!inc_lex_level(lex))
963  return JSON_OUT_OF_MEMORY;
964  }
965  break;
966  case JSON_SEM_OEND:
967  {
969 
970  dec_lex_level(lex);
971  if (oend != NULL)
972  {
973  result = (*oend) (sem->semstate);
974  if (result != JSON_SUCCESS)
975  return result;
976  }
977  }
978  break;
979  case JSON_SEM_ASTART:
980  {
982 
983  if (lex->lex_level >= JSON_TD_MAX_STACK)
984  return JSON_NESTING_TOO_DEEP;
985 
986  if (astart != NULL)
987  {
988  result = (*astart) (sem->semstate);
989  if (result != JSON_SUCCESS)
990  return result;
991  }
992 
993  if (!inc_lex_level(lex))
994  return JSON_OUT_OF_MEMORY;
995  }
996  break;
997  case JSON_SEM_AEND:
998  {
1000 
1001  dec_lex_level(lex);
1002  if (aend != NULL)
1003  {
1004  result = (*aend) (sem->semstate);
1005  if (result != JSON_SUCCESS)
1006  return result;
1007  }
1008  }
1009  break;
1010  case JSON_SEM_OFIELD_INIT:
1011  {
1012  /*
1013  * all we do here is save out the field name. We have
1014  * to wait to get past the ':' to see if the next
1015  * value is null so we can call the semantic routine
1016  */
1017  char *fname = NULL;
1020 
1021  if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1022  {
1023  fname = STRDUP(lex->strval->data);
1024  if (fname == NULL)
1025  return JSON_OUT_OF_MEMORY;
1026  }
1027  set_fname(lex, fname);
1028  }
1029  break;
1030  case JSON_SEM_OFIELD_START:
1031  {
1032  /*
1033  * the current token should be the first token of the
1034  * value
1035  */
1036  bool isnull = tok == JSON_TOKEN_NULL;
1038 
1039  set_fnull(lex, isnull);
1040 
1041  if (ostart != NULL)
1042  {
1043  char *fname = get_fname(lex);
1044 
1045  result = (*ostart) (sem->semstate, fname, isnull);
1046  if (result != JSON_SUCCESS)
1047  return result;
1048  }
1049  }
1050  break;
1051  case JSON_SEM_OFIELD_END:
1052  {
1054 
1055  if (oend != NULL)
1056  {
1057  char *fname = get_fname(lex);
1058  bool isnull = get_fnull(lex);
1059 
1060  result = (*oend) (sem->semstate, fname, isnull);
1061  if (result != JSON_SUCCESS)
1062  return result;
1063  }
1064  }
1065  break;
1066  case JSON_SEM_AELEM_START:
1067  {
1069  bool isnull = tok == JSON_TOKEN_NULL;
1070 
1071  set_fnull(lex, isnull);
1072 
1073  if (astart != NULL)
1074  {
1075  result = (*astart) (sem->semstate, isnull);
1076  if (result != JSON_SUCCESS)
1077  return result;
1078  }
1079  }
1080  break;
1081  case JSON_SEM_AELEM_END:
1082  {
1084 
1085  if (aend != NULL)
1086  {
1087  bool isnull = get_fnull(lex);
1088 
1089  result = (*aend) (sem->semstate, isnull);
1090  if (result != JSON_SUCCESS)
1091  return result;
1092  }
1093  }
1094  break;
1095  case JSON_SEM_SCALAR_INIT:
1096  {
1097  json_scalar_action sfunc = sem->scalar;
1098 
1099  pstack->scalar_val = NULL;
1100 
1101  if (sfunc != NULL)
1102  {
1103  /*
1104  * extract the de-escaped string value, or the raw
1105  * lexeme
1106  */
1107  /*
1108  * XXX copied from RD parser but looks like a
1109  * buglet
1110  */
1111  if (tok == JSON_TOKEN_STRING)
1112  {
1113  if (lex->need_escapes)
1114  {
1115  pstack->scalar_val = STRDUP(lex->strval->data);
1116  if (pstack->scalar_val == NULL)
1117  return JSON_OUT_OF_MEMORY;
1118  }
1119  }
1120  else
1121  {
1122  ptrdiff_t tlen = (lex->token_terminator - lex->token_start);
1123 
1124  pstack->scalar_val = ALLOC(tlen + 1);
1125  if (pstack->scalar_val == NULL)
1126  return JSON_OUT_OF_MEMORY;
1127 
1128  memcpy(pstack->scalar_val, lex->token_start, tlen);
1129  pstack->scalar_val[tlen] = '\0';
1130  }
1131  pstack->scalar_tok = tok;
1132  }
1133  }
1134  break;
1135  case JSON_SEM_SCALAR_CALL:
1136  {
1137  /*
1138  * We'd like to be able to get rid of this business of
1139  * two bits of scalar action, but we can't. It breaks
1140  * certain semantic actions which expect that when
1141  * called the lexer has consumed the item. See for
1142  * example get_scalar() in jsonfuncs.c.
1143  */
1144  json_scalar_action sfunc = sem->scalar;
1145 
1146  if (sfunc != NULL)
1147  {
1148  result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
1149 
1150  /*
1151  * Either ownership of the token passed to the
1152  * callback, or we need to free it now. Either
1153  * way, clear our pointer to it so it doesn't get
1154  * freed in the future.
1155  */
1156  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1157  FREE(pstack->scalar_val);
1158  pstack->scalar_val = NULL;
1159 
1160  if (result != JSON_SUCCESS)
1161  return result;
1162  }
1163  }
1164  break;
1165  default:
1166  /* should not happen */
1167  break;
1168  }
1169  }
1170  else
1171  {
1172  /*
1173  * The token didn't match the stack top if it's a terminal nor a
1174  * production for the stack top if it's a non-terminal.
1175  *
1176  * Various cases here are Asserted to be not possible, as the
1177  * token would not appear at the top of the prediction stack
1178  * unless the lookahead matched.
1179  */
1180  switch (top)
1181  {
1182  case JSON_TOKEN_STRING:
1183  if (next_prediction(pstack) == JSON_TOKEN_COLON)
1184  ctx = JSON_PARSE_STRING;
1185  else
1186  {
1187  Assert(false);
1188  ctx = JSON_PARSE_VALUE;
1189  }
1190  break;
1191  case JSON_TOKEN_NUMBER:
1192  case JSON_TOKEN_TRUE:
1193  case JSON_TOKEN_FALSE:
1194  case JSON_TOKEN_NULL:
1197  Assert(false);
1198  ctx = JSON_PARSE_VALUE;
1199  break;
1200  case JSON_TOKEN_ARRAY_END:
1201  Assert(false);
1202  ctx = JSON_PARSE_ARRAY_NEXT;
1203  break;
1204  case JSON_TOKEN_OBJECT_END:
1205  Assert(false);
1206  ctx = JSON_PARSE_OBJECT_NEXT;
1207  break;
1208  case JSON_TOKEN_COMMA:
1209  Assert(false);
1210  if (next_prediction(pstack) == JSON_TOKEN_STRING)
1211  ctx = JSON_PARSE_OBJECT_NEXT;
1212  else
1213  ctx = JSON_PARSE_ARRAY_NEXT;
1214  break;
1215  case JSON_TOKEN_COLON:
1217  break;
1218  case JSON_TOKEN_END:
1219  ctx = JSON_PARSE_END;
1220  break;
1222  ctx = JSON_PARSE_ARRAY_NEXT;
1223  break;
1225  ctx = JSON_PARSE_ARRAY_START;
1226  break;
1228  ctx = JSON_PARSE_OBJECT_NEXT;
1229  break;
1230  case JSON_NT_KEY_PAIRS:
1232  break;
1233  default:
1234  ctx = JSON_PARSE_VALUE;
1235  }
1236  return report_parse_error(ctx, lex);
1237  }
1238  }
1239 
1240  return JSON_SUCCESS;
1241 }
1242 
1243 /*
1244  * Recursive Descent parse routines. There is one for each structural
1245  * element in a json document:
1246  * - scalar (string, number, true, false, null)
1247  * - array ( [ ] )
1248  * - array element
1249  * - object ( { } )
1250  * - object field
1251  */
1252 static inline JsonParseErrorType
1254 {
1255  char *val = NULL;
1256  json_scalar_action sfunc = sem->scalar;
1257  JsonTokenType tok = lex_peek(lex);
1258  JsonParseErrorType result;
1259 
1260  /* a scalar must be a string, a number, true, false, or null */
1261  if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
1262  tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
1263  tok != JSON_TOKEN_NULL)
1264  return report_parse_error(JSON_PARSE_VALUE, lex);
1265 
1266  /* if no semantic function, just consume the token */
1267  if (sfunc == NULL)
1268  return json_lex(lex);
1269 
1270  /* extract the de-escaped string value, or the raw lexeme */
1271  if (lex_peek(lex) == JSON_TOKEN_STRING)
1272  {
1273  if (lex->need_escapes)
1274  {
1275  val = STRDUP(lex->strval->data);
1276  if (val == NULL)
1277  return JSON_OUT_OF_MEMORY;
1278  }
1279  }
1280  else
1281  {
1282  int len = (lex->token_terminator - lex->token_start);
1283 
1284  val = ALLOC(len + 1);
1285  if (val == NULL)
1286  return JSON_OUT_OF_MEMORY;
1287 
1288  memcpy(val, lex->token_start, len);
1289  val[len] = '\0';
1290  }
1291 
1292  /* consume the token */
1293  result = json_lex(lex);
1294  if (result != JSON_SUCCESS)
1295  {
1296  FREE(val);
1297  return result;
1298  }
1299 
1300  /* invoke the callback, which may take ownership of val */
1301  result = (*sfunc) (sem->semstate, val, tok);
1302 
1303  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1304  FREE(val);
1305 
1306  return result;
1307 }
1308 
1311 {
1312  /*
1313  * An object field is "fieldname" : value where value can be a scalar,
1314  * object or array. Note: in user-facing docs and error messages, we
1315  * generally call a field name a "key".
1316  */
1317 
1318  char *fname = NULL;
1321  bool isnull;
1322  JsonTokenType tok;
1323  JsonParseErrorType result;
1324 
1325  if (lex_peek(lex) != JSON_TOKEN_STRING)
1327  if ((ostart != NULL || oend != NULL) && lex->need_escapes)
1328  {
1329  fname = STRDUP(lex->strval->data);
1330  if (fname == NULL)
1331  return JSON_OUT_OF_MEMORY;
1332  }
1333  result = json_lex(lex);
1334  if (result != JSON_SUCCESS)
1335  {
1336  FREE(fname);
1337  return result;
1338  }
1339 
1341  if (result != JSON_SUCCESS)
1342  {
1343  FREE(fname);
1344  return result;
1345  }
1346 
1347  tok = lex_peek(lex);
1348  isnull = tok == JSON_TOKEN_NULL;
1349 
1350  if (ostart != NULL)
1351  {
1352  result = (*ostart) (sem->semstate, fname, isnull);
1353  if (result != JSON_SUCCESS)
1354  goto ofield_cleanup;
1355  }
1356 
1357  switch (tok)
1358  {
1360  result = parse_object(lex, sem);
1361  break;
1363  result = parse_array(lex, sem);
1364  break;
1365  default:
1366  result = parse_scalar(lex, sem);
1367  }
1368  if (result != JSON_SUCCESS)
1369  goto ofield_cleanup;
1370 
1371  if (oend != NULL)
1372  {
1373  result = (*oend) (sem->semstate, fname, isnull);
1374  if (result != JSON_SUCCESS)
1375  goto ofield_cleanup;
1376  }
1377 
1378 ofield_cleanup:
1379  if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
1380  FREE(fname);
1381  return result;
1382 }
1383 
1386 {
1387  /*
1388  * an object is a possibly empty sequence of object fields, separated by
1389  * commas and surrounded by curly braces.
1390  */
1393  JsonTokenType tok;
1394  JsonParseErrorType result;
1395 
1396 #ifndef FRONTEND
1397 
1398  /*
1399  * TODO: clients need some way to put a bound on stack growth. Parse level
1400  * limits maybe?
1401  */
1403 #endif
1404 
1405  if (ostart != NULL)
1406  {
1407  result = (*ostart) (sem->semstate);
1408  if (result != JSON_SUCCESS)
1409  return result;
1410  }
1411 
1412  /*
1413  * Data inside an object is at a higher nesting level than the object
1414  * itself. Note that we increment this after we call the semantic routine
1415  * for the object start and restore it before we call the routine for the
1416  * object end.
1417  */
1418  lex->lex_level++;
1419 
1421  result = json_lex(lex);
1422  if (result != JSON_SUCCESS)
1423  return result;
1424 
1425  tok = lex_peek(lex);
1426  switch (tok)
1427  {
1428  case JSON_TOKEN_STRING:
1429  result = parse_object_field(lex, sem);
1430  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1431  {
1432  result = json_lex(lex);
1433  if (result != JSON_SUCCESS)
1434  break;
1435  result = parse_object_field(lex, sem);
1436  }
1437  break;
1438  case JSON_TOKEN_OBJECT_END:
1439  break;
1440  default:
1441  /* case of an invalid initial token inside the object */
1443  }
1444  if (result != JSON_SUCCESS)
1445  return result;
1446 
1448  if (result != JSON_SUCCESS)
1449  return result;
1450 
1451  lex->lex_level--;
1452 
1453  if (oend != NULL)
1454  {
1455  result = (*oend) (sem->semstate);
1456  if (result != JSON_SUCCESS)
1457  return result;
1458  }
1459 
1460  return JSON_SUCCESS;
1461 }
1462 
1465 {
1468  JsonTokenType tok = lex_peek(lex);
1469  JsonParseErrorType result;
1470  bool isnull;
1471 
1472  isnull = tok == JSON_TOKEN_NULL;
1473 
1474  if (astart != NULL)
1475  {
1476  result = (*astart) (sem->semstate, isnull);
1477  if (result != JSON_SUCCESS)
1478  return result;
1479  }
1480 
1481  /* an array element is any object, array or scalar */
1482  switch (tok)
1483  {
1485  result = parse_object(lex, sem);
1486  break;
1488  result = parse_array(lex, sem);
1489  break;
1490  default:
1491  result = parse_scalar(lex, sem);
1492  }
1493 
1494  if (result != JSON_SUCCESS)
1495  return result;
1496 
1497  if (aend != NULL)
1498  {
1499  result = (*aend) (sem->semstate, isnull);
1500  if (result != JSON_SUCCESS)
1501  return result;
1502  }
1503 
1504  return JSON_SUCCESS;
1505 }
1506 
1509 {
1510  /*
1511  * an array is a possibly empty sequence of array elements, separated by
1512  * commas and surrounded by square brackets.
1513  */
1516  JsonParseErrorType result;
1517 
1518 #ifndef FRONTEND
1520 #endif
1521 
1522  if (astart != NULL)
1523  {
1524  result = (*astart) (sem->semstate);
1525  if (result != JSON_SUCCESS)
1526  return result;
1527  }
1528 
1529  /*
1530  * Data inside an array is at a higher nesting level than the array
1531  * itself. Note that we increment this after we call the semantic routine
1532  * for the array start and restore it before we call the routine for the
1533  * array end.
1534  */
1535  lex->lex_level++;
1536 
1538  if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
1539  {
1540  result = parse_array_element(lex, sem);
1541 
1542  while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
1543  {
1544  result = json_lex(lex);
1545  if (result != JSON_SUCCESS)
1546  break;
1547  result = parse_array_element(lex, sem);
1548  }
1549  }
1550  if (result != JSON_SUCCESS)
1551  return result;
1552 
1554  if (result != JSON_SUCCESS)
1555  return result;
1556 
1557  lex->lex_level--;
1558 
1559  if (aend != NULL)
1560  {
1561  result = (*aend) (sem->semstate);
1562  if (result != JSON_SUCCESS)
1563  return result;
1564  }
1565 
1566  return JSON_SUCCESS;
1567 }
1568 
1569 /*
1570  * Lex one token from the input stream.
1571  *
1572  * When doing incremental parsing, we can reach the end of the input string
1573  * without having (or knowing we have) a complete token. If it's not the
1574  * final chunk of input, the partial token is then saved to the lex
1575  * structure's ptok StringInfo. On subsequent calls input is appended to this
1576  * buffer until we have something that we think is a complete token,
1577  * which is then lexed using a recursive call to json_lex. Processing then
1578  * continues as normal on subsequent calls.
1579  *
1580  * Note than when doing incremental processing, the lex.prev_token_terminator
1581  * should not be relied on. It could point into a previous input chunk or
1582  * worse.
1583  */
1586 {
1587  const char *s;
1588  const char *const end = lex->input + lex->input_length;
1589  JsonParseErrorType result;
1590 
1591  if (lex == &failed_oom || lex->inc_state == &failed_inc_oom)
1592  return JSON_OUT_OF_MEMORY;
1593 
1594  if (lex->incremental)
1595  {
1596  if (lex->inc_state->partial_completed)
1597  {
1598  /*
1599  * We just lexed a completed partial token on the last call, so
1600  * reset everything
1601  */
1603  lex->token_terminator = lex->input;
1604  lex->inc_state->partial_completed = false;
1605  }
1606 
1607 #ifdef JSONAPI_USE_PQEXPBUFFER
1608  /* Make sure our partial token buffer is valid before using it below. */
1610  return JSON_OUT_OF_MEMORY;
1611 #endif
1612  }
1613 
1614  s = lex->token_terminator;
1615 
1616  if (lex->incremental && lex->inc_state->partial_token.len)
1617  {
1618  /*
1619  * We have a partial token. Extend it and if completed lex it by a
1620  * recursive call
1621  */
1622  jsonapi_StrValType *ptok = &(lex->inc_state->partial_token);
1623  size_t added = 0;
1624  bool tok_done = false;
1625  JsonLexContext dummy_lex = {0};
1626  JsonParseErrorType partial_result;
1627 
1628  if (ptok->data[0] == '"')
1629  {
1630  /*
1631  * It's a string. Accumulate characters until we reach an
1632  * unescaped '"'.
1633  */
1634  int escapes = 0;
1635 
1636  for (int i = ptok->len - 1; i > 0; i--)
1637  {
1638  /* count the trailing backslashes on the partial token */
1639  if (ptok->data[i] == '\\')
1640  escapes++;
1641  else
1642  break;
1643  }
1644 
1645  for (size_t i = 0; i < lex->input_length; i++)
1646  {
1647  char c = lex->input[i];
1648 
1650  added++;
1651  if (c == '"' && escapes % 2 == 0)
1652  {
1653  tok_done = true;
1654  break;
1655  }
1656  if (c == '\\')
1657  escapes++;
1658  else
1659  escapes = 0;
1660  }
1661  }
1662  else
1663  {
1664  /* not a string */
1665  char c = ptok->data[0];
1666 
1667  if (c == '-' || (c >= '0' && c <= '9'))
1668  {
1669  /* for numbers look for possible numeric continuations */
1670 
1671  bool numend = false;
1672 
1673  for (size_t i = 0; i < lex->input_length && !numend; i++)
1674  {
1675  char cc = lex->input[i];
1676 
1677  switch (cc)
1678  {
1679  case '+':
1680  case '-':
1681  case 'e':
1682  case 'E':
1683  case '0':
1684  case '1':
1685  case '2':
1686  case '3':
1687  case '4':
1688  case '5':
1689  case '6':
1690  case '7':
1691  case '8':
1692  case '9':
1693  {
1695  added++;
1696  }
1697  break;
1698  default:
1699  numend = true;
1700  }
1701  }
1702  }
1703 
1704  /*
1705  * Add any remaining alphanumeric chars. This takes care of the
1706  * {null, false, true} literals as well as any trailing
1707  * alphanumeric junk on non-string tokens.
1708  */
1709  for (size_t i = added; i < lex->input_length; i++)
1710  {
1711  char cc = lex->input[i];
1712 
1713  if (JSON_ALPHANUMERIC_CHAR(cc))
1714  {
1716  added++;
1717  }
1718  else
1719  {
1720  tok_done = true;
1721  break;
1722  }
1723  }
1724  if (added == lex->input_length &&
1725  lex->inc_state->is_last_chunk)
1726  {
1727  tok_done = true;
1728  }
1729  }
1730 
1731  if (!tok_done)
1732  {
1733  /* We should have consumed the whole chunk in this case. */
1734  Assert(added == lex->input_length);
1735 
1736  if (!lex->inc_state->is_last_chunk)
1737  return JSON_INCOMPLETE;
1738 
1739  /* json_errdetail() needs access to the accumulated token. */
1740  lex->token_start = ptok->data;
1741  lex->token_terminator = ptok->data + ptok->len;
1742  return JSON_INVALID_TOKEN;
1743  }
1744 
1745  /*
1746  * Everything up to lex->input[added] has been added to the partial
1747  * token, so move the input past it.
1748  */
1749  lex->input += added;
1750  lex->input_length -= added;
1751 
1752  dummy_lex.input = dummy_lex.token_terminator =
1753  dummy_lex.line_start = ptok->data;
1754  dummy_lex.line_number = lex->line_number;
1755  dummy_lex.input_length = ptok->len;
1756  dummy_lex.input_encoding = lex->input_encoding;
1757  dummy_lex.incremental = false;
1758  dummy_lex.need_escapes = lex->need_escapes;
1759  dummy_lex.strval = lex->strval;
1760 
1761  partial_result = json_lex(&dummy_lex);
1762 
1763  /*
1764  * We either have a complete token or an error. In either case we need
1765  * to point to the partial token data for the semantic or error
1766  * routines. If it's not an error we'll readjust on the next call to
1767  * json_lex.
1768  */
1769  lex->token_type = dummy_lex.token_type;
1770  lex->line_number = dummy_lex.line_number;
1771 
1772  /*
1773  * We know the prev_token_terminator must be back in some previous
1774  * piece of input, so we just make it NULL.
1775  */
1776  lex->prev_token_terminator = NULL;
1777 
1778  /*
1779  * Normally token_start would be ptok->data, but it could be later,
1780  * see json_lex_string's handling of invalid escapes.
1781  */
1782  lex->token_start = dummy_lex.token_start;
1783  lex->token_terminator = dummy_lex.token_terminator;
1784  if (partial_result == JSON_SUCCESS)
1785  {
1786  /* make sure we've used all the input */
1787  if (lex->token_terminator - lex->token_start != ptok->len)
1788  {
1789  Assert(false);
1790  return JSON_INVALID_TOKEN;
1791  }
1792 
1793  lex->inc_state->partial_completed = true;
1794  }
1795  return partial_result;
1796  /* end of partial token processing */
1797  }
1798 
1799  /* Skip leading whitespace. */
1800  while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
1801  {
1802  if (*s++ == '\n')
1803  {
1804  ++lex->line_number;
1805  lex->line_start = s;
1806  }
1807  }
1808  lex->token_start = s;
1809 
1810  /* Determine token type. */
1811  if (s >= end)
1812  {
1813  lex->token_start = NULL;
1815  lex->token_terminator = s;
1816  lex->token_type = JSON_TOKEN_END;
1817  }
1818  else
1819  {
1820  switch (*s)
1821  {
1822  /* Single-character token, some kind of punctuation mark. */
1823  case '{':
1825  lex->token_terminator = s + 1;
1827  break;
1828  case '}':
1830  lex->token_terminator = s + 1;
1832  break;
1833  case '[':
1835  lex->token_terminator = s + 1;
1837  break;
1838  case ']':
1840  lex->token_terminator = s + 1;
1842  break;
1843  case ',':
1845  lex->token_terminator = s + 1;
1847  break;
1848  case ':':
1850  lex->token_terminator = s + 1;
1852  break;
1853  case '"':
1854  /* string */
1855  result = json_lex_string(lex);
1856  if (result != JSON_SUCCESS)
1857  return result;
1859  break;
1860  case '-':
1861  /* Negative number. */
1862  result = json_lex_number(lex, s + 1, NULL, NULL);
1863  if (result != JSON_SUCCESS)
1864  return result;
1866  break;
1867  case '0':
1868  case '1':
1869  case '2':
1870  case '3':
1871  case '4':
1872  case '5':
1873  case '6':
1874  case '7':
1875  case '8':
1876  case '9':
1877  /* Positive number. */
1878  result = json_lex_number(lex, s, NULL, NULL);
1879  if (result != JSON_SUCCESS)
1880  return result;
1882  break;
1883  default:
1884  {
1885  const char *p;
1886 
1887  /*
1888  * We're not dealing with a string, number, legal
1889  * punctuation mark, or end of string. The only legal
1890  * tokens we might find here are true, false, and null,
1891  * but for error reporting purposes we scan until we see a
1892  * non-alphanumeric character. That way, we can report
1893  * the whole word as an unexpected token, rather than just
1894  * some unintuitive prefix thereof.
1895  */
1896  for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
1897  /* skip */ ;
1898 
1899  /*
1900  * We got some sort of unexpected punctuation or an
1901  * otherwise unexpected character, so just complain about
1902  * that one character.
1903  */
1904  if (p == s)
1905  {
1907  lex->token_terminator = s + 1;
1908  return JSON_INVALID_TOKEN;
1909  }
1910 
1911  if (lex->incremental && !lex->inc_state->is_last_chunk &&
1912  p == lex->input + lex->input_length)
1913  {
1915  return JSON_INCOMPLETE;
1916  }
1917 
1918  /*
1919  * We've got a real alphanumeric token here. If it
1920  * happens to be true, false, or null, all is well. If
1921  * not, error out.
1922  */
1924  lex->token_terminator = p;
1925  if (p - s == 4)
1926  {
1927  if (memcmp(s, "true", 4) == 0)
1928  lex->token_type = JSON_TOKEN_TRUE;
1929  else if (memcmp(s, "null", 4) == 0)
1930  lex->token_type = JSON_TOKEN_NULL;
1931  else
1932  return JSON_INVALID_TOKEN;
1933  }
1934  else if (p - s == 5 && memcmp(s, "false", 5) == 0)
1936  else
1937  return JSON_INVALID_TOKEN;
1938  }
1939  } /* end of switch */
1940  }
1941 
1942  if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
1943  return JSON_INCOMPLETE;
1944  else
1945  return JSON_SUCCESS;
1946 }
1947 
1948 /*
1949  * The next token in the input stream is known to be a string; lex it.
1950  *
1951  * If lex->strval isn't NULL, fill it with the decoded string.
1952  * Set lex->token_terminator to the end of the decoded input, and in
1953  * success cases, transfer its previous value to lex->prev_token_terminator.
1954  * Return JSON_SUCCESS or an error code.
1955  *
1956  * Note: be careful that all error exits advance lex->token_terminator
1957  * to the point after the character we detected the error on.
1958  */
1959 static inline JsonParseErrorType
1961 {
1962  const char *s;
1963  const char *const end = lex->input + lex->input_length;
1964  int hi_surrogate = -1;
1965 
1966  /* Convenience macros for error exits */
1967 #define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
1968  do { \
1969  if (lex->incremental && !lex->inc_state->is_last_chunk) \
1970  { \
1971  jsonapi_appendBinaryStringInfo(&lex->inc_state->partial_token, \
1972  lex->token_start, \
1973  end - lex->token_start); \
1974  return JSON_INCOMPLETE; \
1975  } \
1976  lex->token_terminator = s; \
1977  return code; \
1978  } while (0)
1979 #define FAIL_AT_CHAR_END(code) \
1980  do { \
1981  const char *term = s + pg_encoding_mblen(lex->input_encoding, s); \
1982  lex->token_terminator = (term <= end) ? term : end; \
1983  return code; \
1984  } while (0)
1985 
1986  if (lex->need_escapes)
1987  {
1988 #ifdef JSONAPI_USE_PQEXPBUFFER
1989  /* make sure initialization succeeded */
1990  if (lex->strval == NULL)
1991  return JSON_OUT_OF_MEMORY;
1992 #endif
1994  }
1995 
1996  Assert(lex->input_length > 0);
1997  s = lex->token_start;
1998  for (;;)
1999  {
2000  s++;
2001  /* Premature end of the string. */
2002  if (s >= end)
2004  else if (*s == '"')
2005  break;
2006  else if (*s == '\\')
2007  {
2008  /* OK, we have an escape character. */
2009  s++;
2010  if (s >= end)
2012  else if (*s == 'u')
2013  {
2014  int i;
2015  int ch = 0;
2016 
2017  for (i = 1; i <= 4; i++)
2018  {
2019  s++;
2020  if (s >= end)
2022  else if (*s >= '0' && *s <= '9')
2023  ch = (ch * 16) + (*s - '0');
2024  else if (*s >= 'a' && *s <= 'f')
2025  ch = (ch * 16) + (*s - 'a') + 10;
2026  else if (*s >= 'A' && *s <= 'F')
2027  ch = (ch * 16) + (*s - 'A') + 10;
2028  else
2030  }
2031  if (lex->need_escapes)
2032  {
2033  /*
2034  * Combine surrogate pairs.
2035  */
2036  if (is_utf16_surrogate_first(ch))
2037  {
2038  if (hi_surrogate != -1)
2040  hi_surrogate = ch;
2041  continue;
2042  }
2043  else if (is_utf16_surrogate_second(ch))
2044  {
2045  if (hi_surrogate == -1)
2047  ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
2048  hi_surrogate = -1;
2049  }
2050 
2051  if (hi_surrogate != -1)
2053 
2054  /*
2055  * Reject invalid cases. We can't have a value above
2056  * 0xFFFF here (since we only accepted 4 hex digits
2057  * above), so no need to test for out-of-range chars.
2058  */
2059  if (ch == 0)
2060  {
2061  /* We can't allow this, since our TEXT type doesn't */
2063  }
2064 
2065  /*
2066  * Add the represented character to lex->strval. In the
2067  * backend, we can let pg_unicode_to_server_noerror()
2068  * handle any required character set conversion; in
2069  * frontend, we can only deal with trivial conversions.
2070  */
2071 #ifndef FRONTEND
2072  {
2073  char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2074 
2075  if (!pg_unicode_to_server_noerror(ch, (unsigned char *) cbuf))
2077  appendStringInfoString(lex->strval, cbuf);
2078  }
2079 #else
2080  if (lex->input_encoding == PG_UTF8)
2081  {
2082  /* OK, we can map the code point to UTF8 easily */
2083  char utf8str[5];
2084  int utf8len;
2085 
2086  unicode_to_utf8(ch, (unsigned char *) utf8str);
2087  utf8len = pg_utf_mblen((unsigned char *) utf8str);
2088  jsonapi_appendBinaryStringInfo(lex->strval, utf8str, utf8len);
2089  }
2090  else if (ch <= 0x007f)
2091  {
2092  /* The ASCII range is the same in all encodings */
2093  jsonapi_appendStringInfoChar(lex->strval, (char) ch);
2094  }
2095  else
2097 #endif /* FRONTEND */
2098  }
2099  }
2100  else if (lex->need_escapes)
2101  {
2102  if (hi_surrogate != -1)
2104 
2105  switch (*s)
2106  {
2107  case '"':
2108  case '\\':
2109  case '/':
2111  break;
2112  case 'b':
2114  break;
2115  case 'f':
2117  break;
2118  case 'n':
2120  break;
2121  case 'r':
2123  break;
2124  case 't':
2126  break;
2127  default:
2128 
2129  /*
2130  * Not a valid string escape, so signal error. We
2131  * adjust token_start so that just the escape sequence
2132  * is reported, not the whole string.
2133  */
2134  lex->token_start = s;
2136  }
2137  }
2138  else if (strchr("\"\\/bfnrt", *s) == NULL)
2139  {
2140  /*
2141  * Simpler processing if we're not bothered about de-escaping
2142  *
2143  * It's very tempting to remove the strchr() call here and
2144  * replace it with a switch statement, but testing so far has
2145  * shown it's not a performance win.
2146  */
2147  lex->token_start = s;
2149  }
2150  }
2151  else
2152  {
2153  const char *p = s;
2154 
2155  if (hi_surrogate != -1)
2157 
2158  /*
2159  * Skip to the first byte that requires special handling, so we
2160  * can batch calls to jsonapi_appendBinaryStringInfo.
2161  */
2162  while (p < end - sizeof(Vector8) &&
2163  !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
2164  !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
2165  !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
2166  p += sizeof(Vector8);
2167 
2168  for (; p < end; p++)
2169  {
2170  if (*p == '\\' || *p == '"')
2171  break;
2172  else if ((unsigned char) *p <= 31)
2173  {
2174  /* Per RFC4627, these characters MUST be escaped. */
2175  /*
2176  * Since *p isn't printable, exclude it from the context
2177  * string
2178  */
2179  lex->token_terminator = p;
2180  return JSON_ESCAPING_REQUIRED;
2181  }
2182  }
2183 
2184  if (lex->need_escapes)
2185  jsonapi_appendBinaryStringInfo(lex->strval, s, p - s);
2186 
2187  /*
2188  * s will be incremented at the top of the loop, so set it to just
2189  * behind our lookahead position
2190  */
2191  s = p - 1;
2192  }
2193  }
2194 
2195  if (hi_surrogate != -1)
2196  {
2197  lex->token_terminator = s + 1;
2199  }
2200 
2201 #ifdef JSONAPI_USE_PQEXPBUFFER
2202  if (lex->need_escapes && PQExpBufferBroken(lex->strval))
2203  return JSON_OUT_OF_MEMORY;
2204 #endif
2205 
2206  /* Hooray, we found the end of the string! */
2208  lex->token_terminator = s + 1;
2209  return JSON_SUCCESS;
2210 
2211 #undef FAIL_OR_INCOMPLETE_AT_CHAR_START
2212 #undef FAIL_AT_CHAR_END
2213 }
2214 
2215 /*
2216  * The next token in the input stream is known to be a number; lex it.
2217  *
2218  * In JSON, a number consists of four parts:
2219  *
2220  * (1) An optional minus sign ('-').
2221  *
2222  * (2) Either a single '0', or a string of one or more digits that does not
2223  * begin with a '0'.
2224  *
2225  * (3) An optional decimal part, consisting of a period ('.') followed by
2226  * one or more digits. (Note: While this part can be omitted
2227  * completely, it's not OK to have only the decimal point without
2228  * any digits afterwards.)
2229  *
2230  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
2231  * followed by '+' or '-', followed by one or more digits. (Note:
2232  * As with the decimal part, if 'e' or 'E' is present, it must be
2233  * followed by at least one digit.)
2234  *
2235  * The 's' argument to this function points to the ostensible beginning
2236  * of part 2 - i.e. the character after any optional minus sign, or the
2237  * first character of the string if there is none.
2238  *
2239  * If num_err is not NULL, we return an error flag to *num_err rather than
2240  * raising an error for a badly-formed number. Also, if total_len is not NULL
2241  * the distance from lex->input to the token end+1 is returned to *total_len.
2242  */
2243 static inline JsonParseErrorType
2244 json_lex_number(JsonLexContext *lex, const char *s,
2245  bool *num_err, size_t *total_len)
2246 {
2247  bool error = false;
2248  int len = s - lex->input;
2249 
2250  /* Part (1): leading sign indicator. */
2251  /* Caller already did this for us; so do nothing. */
2252 
2253  /* Part (2): parse main digit string. */
2254  if (len < lex->input_length && *s == '0')
2255  {
2256  s++;
2257  len++;
2258  }
2259  else if (len < lex->input_length && *s >= '1' && *s <= '9')
2260  {
2261  do
2262  {
2263  s++;
2264  len++;
2265  } while (len < lex->input_length && *s >= '0' && *s <= '9');
2266  }
2267  else
2268  error = true;
2269 
2270  /* Part (3): parse optional decimal portion. */
2271  if (len < lex->input_length && *s == '.')
2272  {
2273  s++;
2274  len++;
2275  if (len == lex->input_length || *s < '0' || *s > '9')
2276  error = true;
2277  else
2278  {
2279  do
2280  {
2281  s++;
2282  len++;
2283  } while (len < lex->input_length && *s >= '0' && *s <= '9');
2284  }
2285  }
2286 
2287  /* Part (4): parse optional exponent. */
2288  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
2289  {
2290  s++;
2291  len++;
2292  if (len < lex->input_length && (*s == '+' || *s == '-'))
2293  {
2294  s++;
2295  len++;
2296  }
2297  if (len == lex->input_length || *s < '0' || *s > '9')
2298  error = true;
2299  else
2300  {
2301  do
2302  {
2303  s++;
2304  len++;
2305  } while (len < lex->input_length && *s >= '0' && *s <= '9');
2306  }
2307  }
2308 
2309  /*
2310  * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
2311  * here should be considered part of the token for error-reporting
2312  * purposes.
2313  */
2314  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
2315  error = true;
2316 
2317  if (total_len != NULL)
2318  *total_len = len;
2319 
2320  if (lex->incremental && !lex->inc_state->is_last_chunk &&
2321  len >= lex->input_length)
2322  {
2324  lex->token_start, s - lex->token_start);
2325  if (num_err != NULL)
2326  *num_err = error;
2327 
2328  return JSON_INCOMPLETE;
2329  }
2330  else if (num_err != NULL)
2331  {
2332  /* let the caller handle any error */
2333  *num_err = error;
2334  }
2335  else
2336  {
2337  /* return token endpoint */
2339  lex->token_terminator = s;
2340  /* handle error if any */
2341  if (error)
2342  return JSON_INVALID_TOKEN;
2343  }
2344 
2345  return JSON_SUCCESS;
2346 }
2347 
2348 /*
2349  * Report a parse error.
2350  *
2351  * lex->token_start and lex->token_terminator must identify the current token.
2352  */
2355 {
2356  /* Handle case where the input ended prematurely. */
2357  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
2358  return JSON_EXPECTED_MORE;
2359 
2360  /* Otherwise choose the error type based on the parsing context. */
2361  switch (ctx)
2362  {
2363  case JSON_PARSE_END:
2364  return JSON_EXPECTED_END;
2365  case JSON_PARSE_VALUE:
2366  return JSON_EXPECTED_JSON;
2367  case JSON_PARSE_STRING:
2368  return JSON_EXPECTED_STRING;
2371  case JSON_PARSE_ARRAY_NEXT:
2372  return JSON_EXPECTED_ARRAY_NEXT;
2376  return JSON_EXPECTED_COLON;
2380  return JSON_EXPECTED_STRING;
2381  }
2382 
2383  /*
2384  * We don't use a default: case, so that the compiler will warn about
2385  * unhandled enum values.
2386  */
2387  Assert(false);
2388  return JSON_SUCCESS; /* silence stupider compilers */
2389 }
2390 
2391 /*
2392  * Construct an (already translated) detail message for a JSON error.
2393  *
2394  * The returned pointer should not be freed, the allocation is either static
2395  * or owned by the JsonLexContext.
2396  */
2397 char *
2399 {
2400  if (error == JSON_OUT_OF_MEMORY || lex == &failed_oom)
2401  {
2402  /* Short circuit. Allocating anything for this case is unhelpful. */
2403  return _("out of memory");
2404  }
2405 
2406  if (lex->errormsg)
2408  else
2410 
2411  /*
2412  * A helper for error messages that should print the current token. The
2413  * format must contain exactly one %.*s specifier.
2414  */
2415 #define json_token_error(lex, format) \
2416  jsonapi_appendStringInfo((lex)->errormsg, _(format), \
2417  (int) ((lex)->token_terminator - (lex)->token_start), \
2418  (lex)->token_start);
2419 
2420  switch (error)
2421  {
2422  case JSON_INCOMPLETE:
2423  case JSON_SUCCESS:
2424  /* fall through to the error code after switch */
2425  break;
2427  if (lex->incremental)
2428  return _("Recursive descent parser cannot use incremental lexer.");
2429  else
2430  return _("Incremental parser requires incremental lexer.");
2431  case JSON_NESTING_TOO_DEEP:
2432  return (_("JSON nested too deep, maximum permitted depth is 6400."));
2433  case JSON_ESCAPING_INVALID:
2434  json_token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
2435  break;
2438  _("Character with value 0x%02x must be escaped."),
2439  (unsigned char) *(lex->token_terminator));
2440  break;
2441  case JSON_EXPECTED_END:
2442  json_token_error(lex, "Expected end of input, but found \"%.*s\".");
2443  break;
2445  json_token_error(lex, "Expected array element or \"]\", but found \"%.*s\".");
2446  break;
2448  json_token_error(lex, "Expected \",\" or \"]\", but found \"%.*s\".");
2449  break;
2450  case JSON_EXPECTED_COLON:
2451  json_token_error(lex, "Expected \":\", but found \"%.*s\".");
2452  break;
2453  case JSON_EXPECTED_JSON:
2454  json_token_error(lex, "Expected JSON value, but found \"%.*s\".");
2455  break;
2456  case JSON_EXPECTED_MORE:
2457  return _("The input string ended unexpectedly.");
2459  json_token_error(lex, "Expected string or \"}\", but found \"%.*s\".");
2460  break;
2462  json_token_error(lex, "Expected \",\" or \"}\", but found \"%.*s\".");
2463  break;
2464  case JSON_EXPECTED_STRING:
2465  json_token_error(lex, "Expected string, but found \"%.*s\".");
2466  break;
2467  case JSON_INVALID_TOKEN:
2468  json_token_error(lex, "Token \"%.*s\" is invalid.");
2469  break;
2470  case JSON_OUT_OF_MEMORY:
2471  /* should have been handled above; use the error path */
2472  break;
2474  return _("\\u0000 cannot be converted to text.");
2476  return _("\"\\u\" must be followed by four hexadecimal digits.");
2478  /* note: this case is only reachable in frontend not backend */
2479  return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
2481 
2482  /*
2483  * Note: this case is only reachable in backend and not frontend.
2484  * #ifdef it away so the frontend doesn't try to link against
2485  * backend functionality.
2486  */
2487 #ifndef FRONTEND
2488  return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."),
2490 #else
2491  Assert(false);
2492  break;
2493 #endif
2495  return _("Unicode high surrogate must not follow a high surrogate.");
2497  return _("Unicode low surrogate must follow a high surrogate.");
2499  /* fall through to the error code after switch */
2500  break;
2501  }
2502 #undef json_token_error
2503 
2504  /* Note that lex->errormsg can be NULL in shlib code. */
2505  if (lex->errormsg && lex->errormsg->len == 0)
2506  {
2507  /*
2508  * We don't use a default: case, so that the compiler will warn about
2509  * unhandled enum values. But this needs to be here anyway to cover
2510  * the possibility of an incorrect input.
2511  */
2513  "unexpected json parse error type: %d",
2514  (int) error);
2515  }
2516 
2517 #ifdef JSONAPI_USE_PQEXPBUFFER
2518  if (PQExpBufferBroken(lex->errormsg))
2519  return _("out of memory while constructing error description");
2520 #endif
2521 
2522  return lex->errormsg->data;
2523 }
uint8_t uint8
Definition: c.h:483
#define Assert(condition)
Definition: c.h:812
#define _(x)
Definition: elog.c:90
const char * str
#define token
Definition: indent_globs.h:126
long val
Definition: informix.c:689
int i
Definition: isn.c:72
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, const JsonSemAction *sem, const char *json, size_t len, bool is_last)
Definition: jsonapi.c:868
#define JSON_TD_MAX_STACK
Definition: jsonapi.c:431
JsonParseContext
Definition: jsonapi.c:94
@ JSON_PARSE_OBJECT_LABEL
Definition: jsonapi.c:100
@ JSON_PARSE_VALUE
Definition: jsonapi.c:95
@ JSON_PARSE_OBJECT_START
Definition: jsonapi.c:99
@ JSON_PARSE_ARRAY_START
Definition: jsonapi.c:97
@ JSON_PARSE_END
Definition: jsonapi.c:103
@ JSON_PARSE_OBJECT_NEXT
Definition: jsonapi.c:101
@ JSON_PARSE_ARRAY_NEXT
Definition: jsonapi.c:98
@ JSON_PARSE_OBJECT_COMMA
Definition: jsonapi.c:102
@ JSON_PARSE_STRING
Definition: jsonapi.c:96
#define TD_ENTRY(PROD)
Definition: jsonapi.c:239
JsonParserSem
Definition: jsonapi.c:123
@ JSON_SEM_SCALAR_CALL
Definition: jsonapi.c:134
@ JSON_SEM_OSTART
Definition: jsonapi.c:124
@ JSON_SEM_AELEM_START
Definition: jsonapi.c:131
@ JSON_SEM_AELEM_END
Definition: jsonapi.c:132
@ JSON_SEM_SCALAR_INIT
Definition: jsonapi.c:133
@ JSON_SEM_ASTART
Definition: jsonapi.c:126
@ JSON_SEM_OFIELD_INIT
Definition: jsonapi.c:128
@ JSON_SEM_OFIELD_END
Definition: jsonapi.c:130
@ JSON_SEM_OEND
Definition: jsonapi.c:125
@ JSON_SEM_OFIELD_START
Definition: jsonapi.c:129
@ JSON_SEM_AEND
Definition: jsonapi.c:127
static void set_fnull(JsonLexContext *lex, bool fnull)
Definition: jsonapi.c:668
#define JSON_NUM_TERMINALS
Definition: jsonapi.c:173
static char JSON_PROD_MORE_KEY_PAIRS[]
Definition: jsonapi.c:219
bool IsValidJsonNumber(const char *str, size_t len)
Definition: jsonapi.c:339
#define jsonapi_destroyStringInfo
Definition: jsonapi.c:84
static JsonParseErrorType json_lex_string(JsonLexContext *lex)
Definition: jsonapi.c:1959
#define REALLOC
Definition: jsonapi.c:59
#define JSON_ALPHANUMERIC_CHAR(c)
Definition: jsonapi.c:326
static char JSON_PROD_KEY_PAIRS[]
Definition: jsonapi.c:216
#define JSON_NUM_NONTERMINALS
Definition: jsonapi.c:174
#define JS_MAX_PROD_LEN
Definition: jsonapi.c:430
#define OFS(NT)
Definition: jsonapi.c:177
static char JSON_PROD_SCALAR_STRING[]
Definition: jsonapi.c:189
JsonParseErrorType pg_parse_json(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:744
static bool inc_lex_level(JsonLexContext *lex)
Definition: jsonapi.c:561
static char JSON_PROD_ARRAY_ELEMENTS[]
Definition: jsonapi.c:210
static bool have_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:641
#define ALLOC0(size)
Definition: jsonapi.c:58
static void set_fname(JsonLexContext *lex, char *fname)
Definition: jsonapi.c:647
static char JSON_PROD_SCALAR_NUMBER[]
Definition: jsonapi.c:192
#define json_token_error(lex, format)
static char next_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:634
static void push_prediction(JsonParserStack *pstack, td_entry entry)
Definition: jsonapi.c:620
#define IS_NT(x)
Definition: jsonapi.c:180
static JsonLexContext failed_oom
Definition: jsonapi.c:294
#define jsonapi_appendStringInfoCharMacro
Definition: jsonapi.c:79
static char JSON_PROD_GOAL[]
Definition: jsonapi.c:272
#define jsonapi_makeStringInfo
Definition: jsonapi.c:80
static JsonTokenType lex_peek(JsonLexContext *lex)
Definition: jsonapi.c:305
static char JSON_PROD_EPSILON[]
Definition: jsonapi.c:186
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:2397
static JsonParseErrorType parse_object(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1384
#define STRDUP(s)
Definition: jsonapi.c:56
#define jsonapi_initStringInfo
Definition: jsonapi.c:81
#define JS_STACK_CHUNK_SIZE
Definition: jsonapi.c:429
void setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
Definition: jsonapi.c:542
static char JSON_PROD_SCALAR_NULL[]
Definition: jsonapi.c:201
static bool allocate_incremental_state(JsonLexContext *lex)
Definition: jsonapi.c:433
#define jsonapi_resetStringInfo
Definition: jsonapi.c:82
static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
Definition: jsonapi.c:2353
static JsonParseErrorType lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
Definition: jsonapi.c:317
static JsonIncrementalState failed_inc_oom
Definition: jsonapi.c:295
static JsonParseErrorType json_lex_number(JsonLexContext *lex, const char *s, bool *num_err, size_t *total_len)
Definition: jsonapi.c:2243
static char JSON_PROD_MORE_ARRAY_ELEMENTS[]
Definition: jsonapi.c:213
const JsonSemAction nullSemAction
Definition: jsonapi.c:287
#define IS_SEM(x)
Definition: jsonapi.c:179
static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS]
Definition: jsonapi.c:241
static JsonParseErrorType parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1252
static char * get_fname(JsonLexContext *lex)
Definition: jsonapi.c:662
static char pop_prediction(JsonParserStack *pstack)
Definition: jsonapi.c:627
static JsonParseErrorType parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1309
#define jsonapi_termStringInfo(s)
Definition: jsonapi.c:83
#define ALLOC(size)
Definition: jsonapi.c:57
#define jsonapi_appendBinaryStringInfo
Definition: jsonapi.c:77
static char JSON_PROD_SCALAR_FALSE[]
Definition: jsonapi.c:198
static bool get_fnull(JsonLexContext *lex)
Definition: jsonapi.c:674
JsonParseErrorType json_lex(JsonLexContext *lex)
Definition: jsonapi.c:1584
#define jsonapi_appendStringInfoChar
Definition: jsonapi.c:78
static char JSON_PROD_OBJECT[]
Definition: jsonapi.c:204
#define jsonapi_appendStringInfo
Definition: jsonapi.c:76
#define FREE(s)
Definition: jsonapi.c:69
JsonLexContext * makeJsonLexContextCstringLen(JsonLexContext *lex, const char *json, size_t len, int encoding, bool need_escapes)
Definition: jsonapi.c:392
JsonParseErrorType json_count_array_elements(JsonLexContext *lex, int *elements)
Definition: jsonapi.c:803
static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1507
static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSemAction *sem)
Definition: jsonapi.c:1463
void freeJsonLexContext(JsonLexContext *lex)
Definition: jsonapi.c:687
JsonNonTerminal
Definition: jsonapi.c:114
@ JSON_NT_JSON
Definition: jsonapi.c:115
@ JSON_NT_MORE_ARRAY_ELEMENTS
Definition: jsonapi.c:117
@ JSON_NT_MORE_KEY_PAIRS
Definition: jsonapi.c:119
@ JSON_NT_ARRAY_ELEMENTS
Definition: jsonapi.c:116
@ JSON_NT_KEY_PAIRS
Definition: jsonapi.c:118
static char JSON_PROD_ARRAY[]
Definition: jsonapi.c:207
#define FAIL_OR_INCOMPLETE_AT_CHAR_START(code)
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
Definition: jsonapi.c:497
static char JSON_PROD_SCALAR_TRUE[]
Definition: jsonapi.c:195
#define FAIL_AT_CHAR_END(code)
static void dec_lex_level(JsonLexContext *lex)
Definition: jsonapi.c:613
JsonParseErrorType(* json_struct_action)(void *state)
Definition: jsonapi.h:121
JsonParseErrorType(* json_aelem_action)(void *state, bool isnull)
Definition: jsonapi.h:123
#define JSONLEX_FREE_STRVAL
Definition: jsonapi.h:98
#define jsonapi_StrValType
Definition: jsonapi.h:73
JsonParseErrorType
Definition: jsonapi.h:35
@ JSON_OUT_OF_MEMORY
Definition: jsonapi.h:52
@ JSON_SEM_ACTION_FAILED
Definition: jsonapi.h:59
@ JSON_EXPECTED_ARRAY_FIRST
Definition: jsonapi.h:42
@ JSON_EXPECTED_MORE
Definition: jsonapi.h:47
@ JSON_UNICODE_HIGH_SURROGATE
Definition: jsonapi.h:57
@ JSON_EXPECTED_COLON
Definition: jsonapi.h:44
@ JSON_EXPECTED_OBJECT_FIRST
Definition: jsonapi.h:48
@ JSON_UNICODE_CODE_POINT_ZERO
Definition: jsonapi.h:53
@ JSON_INVALID_LEXER_TYPE
Definition: jsonapi.h:38
@ JSON_EXPECTED_STRING
Definition: jsonapi.h:50
@ JSON_UNICODE_ESCAPE_FORMAT
Definition: jsonapi.h:54
@ JSON_SUCCESS
Definition: jsonapi.h:36
@ JSON_UNICODE_UNTRANSLATABLE
Definition: jsonapi.h:56
@ JSON_EXPECTED_OBJECT_NEXT
Definition: jsonapi.h:49
@ JSON_ESCAPING_REQUIRED
Definition: jsonapi.h:41
@ JSON_EXPECTED_JSON
Definition: jsonapi.h:46
@ JSON_INVALID_TOKEN
Definition: jsonapi.h:51
@ JSON_ESCAPING_INVALID
Definition: jsonapi.h:40
@ JSON_INCOMPLETE
Definition: jsonapi.h:37
@ JSON_EXPECTED_END
Definition: jsonapi.h:45
@ JSON_EXPECTED_ARRAY_NEXT
Definition: jsonapi.h:43
@ JSON_UNICODE_HIGH_ESCAPE
Definition: jsonapi.h:55
@ JSON_NESTING_TOO_DEEP
Definition: jsonapi.h:39
@ JSON_UNICODE_LOW_SURROGATE
Definition: jsonapi.h:58
JsonParseErrorType(* json_ofield_action)(void *state, char *fname, bool isnull)
Definition: jsonapi.h:122
#define JSONLEX_FREE_STRUCT
Definition: jsonapi.h:97
JsonTokenType
Definition: jsonapi.h:18
@ JSON_TOKEN_COMMA
Definition: jsonapi.h:26
@ JSON_TOKEN_FALSE
Definition: jsonapi.h:29
@ JSON_TOKEN_END
Definition: jsonapi.h:31
@ JSON_TOKEN_TRUE
Definition: jsonapi.h:28
@ JSON_TOKEN_OBJECT_END
Definition: jsonapi.h:23
@ JSON_TOKEN_NULL
Definition: jsonapi.h:30
@ JSON_TOKEN_ARRAY_END
Definition: jsonapi.h:25
@ JSON_TOKEN_OBJECT_START
Definition: jsonapi.h:22
@ JSON_TOKEN_NUMBER
Definition: jsonapi.h:21
@ JSON_TOKEN_STRING
Definition: jsonapi.h:20
@ JSON_TOKEN_COLON
Definition: jsonapi.h:27
@ JSON_TOKEN_ARRAY_START
Definition: jsonapi.h:24
#define JSONLEX_CTX_OWNS_TOKENS
Definition: jsonapi.h:99
JsonParseErrorType(* json_scalar_action)(void *state, char *token, JsonTokenType tokentype)
Definition: jsonapi.h:124
bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
Definition: mbutils.c:926
const char * GetDatabaseEncodingName(void)
Definition: mbutils.c:1267
const void size_t len
int32 encoding
Definition: pg_database.h:41
static bool pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:58
static bool pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
Definition: pg_lfind.h:26
#define pg_utf_mblen
Definition: pg_wchar.h:633
@ PG_UTF8
Definition: pg_wchar.h:232
static unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: pg_wchar.h:575
#define MAX_UNICODE_EQUIVALENT_STRING
Definition: pg_wchar.h:329
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
Definition: pg_wchar.h:537
static bool is_utf16_surrogate_first(pg_wchar c)
Definition: pg_wchar.h:525
static bool is_utf16_surrogate_second(pg_wchar c)
Definition: pg_wchar.h:531
void check_stack_depth(void)
Definition: postgres.c:3574
#define PQExpBufferBroken(str)
Definition: pqexpbuffer.h:59
#define PQExpBufferDataBroken(buf)
Definition: pqexpbuffer.h:67
char * c
char * psprintf(const char *fmt,...)
Definition: psprintf.c:43
uint64 Vector8
Definition: simd.h:60
static void error(void)
Definition: sql-dyntest.c:147
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:179
jsonapi_StrValType partial_token
Definition: jsonapi.c:167
bits32 flags
Definition: jsonapi.h:111
int input_encoding
Definition: jsonapi.h:104
const char * prev_token_terminator
Definition: jsonapi.h:107
struct jsonapi_StrValType * strval
Definition: jsonapi.h:117
bool need_escapes
Definition: jsonapi.h:116
struct jsonapi_StrValType * errormsg
Definition: jsonapi.h:118
const char * input
Definition: jsonapi.h:102
const char * token_start
Definition: jsonapi.h:105
JsonParserStack * pstack
Definition: jsonapi.h:114
size_t input_length
Definition: jsonapi.h:103
JsonIncrementalState * inc_state
Definition: jsonapi.h:115
bool incremental
Definition: jsonapi.h:108
const char * line_start
Definition: jsonapi.h:113
int line_number
Definition: jsonapi.h:112
JsonTokenType token_type
Definition: jsonapi.h:109
const char * token_terminator
Definition: jsonapi.h:106
bool * fnull
Definition: jsonapi.c:151
JsonTokenType scalar_tok
Definition: jsonapi.c:152
char * prediction
Definition: jsonapi.c:147
size_t pred_index
Definition: jsonapi.c:148
char * scalar_val
Definition: jsonapi.c:153
char ** fnames
Definition: jsonapi.c:150
json_struct_action array_end
Definition: jsonapi.h:151
json_struct_action object_start
Definition: jsonapi.h:148
json_ofield_action object_field_start
Definition: jsonapi.h:152
json_aelem_action array_element_start
Definition: jsonapi.h:154
json_scalar_action scalar
Definition: jsonapi.h:156
void * semstate
Definition: jsonapi.h:147
json_aelem_action array_element_end
Definition: jsonapi.h:155
json_struct_action array_start
Definition: jsonapi.h:150
json_struct_action object_end
Definition: jsonapi.h:149
json_ofield_action object_field_end
Definition: jsonapi.h:153
Definition: jsonapi.c:234
char * prod
Definition: jsonapi.c:236
size_t len
Definition: jsonapi.c:235
static JsonSemAction sem