PostgreSQL Source Code  git master
json.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * json.c
4  * JSON data type support.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/json.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/htup_details.h"
17 #include "access/transam.h"
18 #include "catalog/pg_type.h"
19 #include "executor/spi.h"
20 #include "funcapi.h"
21 #include "lib/stringinfo.h"
22 #include "libpq/pqformat.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "parser/parse_coerce.h"
26 #include "utils/array.h"
27 #include "utils/builtins.h"
28 #include "utils/date.h"
29 #include "utils/datetime.h"
30 #include "utils/lsyscache.h"
31 #include "utils/json.h"
32 #include "utils/jsonapi.h"
33 #include "utils/typcache.h"
34 #include "utils/syscache.h"
35 
36 /*
37  * The context of the parser is maintained by the recursive descent
38  * mechanism, but is passed explicitly to the error reporting routine
39  * for better diagnostics.
40  */
41 typedef enum /* contexts of JSON parser */
42 {
43  JSON_PARSE_VALUE, /* expecting a value */
44  JSON_PARSE_STRING, /* expecting a string (for a field name) */
45  JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */
46  JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */
47  JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */
48  JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */
49  JSON_PARSE_OBJECT_NEXT, /* saw object value, expecting ',' or '}' */
50  JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */
51  JSON_PARSE_END /* saw the end of a document, expect nothing */
53 
54 typedef enum /* type categories for datum_to_json */
55 {
56  JSONTYPE_NULL, /* null, so we didn't bother to identify */
57  JSONTYPE_BOOL, /* boolean (built-in types only) */
58  JSONTYPE_NUMERIC, /* numeric (ditto) */
59  JSONTYPE_DATE, /* we use special formatting for datetimes */
62  JSONTYPE_JSON, /* JSON itself (and JSONB) */
63  JSONTYPE_ARRAY, /* array */
64  JSONTYPE_COMPOSITE, /* composite */
65  JSONTYPE_CAST, /* something with an explicit cast to JSON */
66  JSONTYPE_OTHER /* all else */
68 
69 typedef struct JsonAggState
70 {
76 } JsonAggState;
77 
78 static inline void json_lex(JsonLexContext *lex);
79 static inline void json_lex_string(JsonLexContext *lex);
80 static inline void json_lex_number(JsonLexContext *lex, char *s,
81  bool *num_err, int *total_len);
82 static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
83 static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
84 static void parse_object(JsonLexContext *lex, JsonSemAction *sem);
85 static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
86 static void parse_array(JsonLexContext *lex, JsonSemAction *sem);
88 static void report_invalid_token(JsonLexContext *lex);
89 static int report_json_context(JsonLexContext *lex);
90 static char *extract_mb_char(char *s);
91 static void composite_to_json(Datum composite, StringInfo result,
92  bool use_line_feeds);
93 static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
94  Datum *vals, bool *nulls, int *valcount,
95  JsonTypeCategory tcategory, Oid outfuncoid,
96  bool use_line_feeds);
97 static void array_to_json_internal(Datum array, StringInfo result,
98  bool use_line_feeds);
99 static void json_categorize_type(Oid typoid,
100  JsonTypeCategory *tcategory,
101  Oid *outfuncoid);
102 static void datum_to_json(Datum val, bool is_null, StringInfo result,
103  JsonTypeCategory tcategory, Oid outfuncoid,
104  bool key_scalar);
105 static void add_json(Datum val, bool is_null, StringInfo result,
106  Oid val_type, bool key_scalar);
107 static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
108 
109 /* the null action object used for pure validation */
111 {
112  NULL, NULL, NULL, NULL, NULL,
113  NULL, NULL, NULL, NULL, NULL
114 };
115 
116 /* Recursive Descent parser support routines */
117 
118 /*
119  * lex_peek
120  *
121  * what is the current look_ahead token?
122 */
123 static inline JsonTokenType
125 {
126  return lex->token_type;
127 }
128 
129 /*
130  * lex_accept
131  *
132  * accept the look_ahead token and move the lexer to the next token if the
133  * look_ahead token matches the token parameter. In that case, and if required,
134  * also hand back the de-escaped lexeme.
135  *
136  * returns true if the token matched, false otherwise.
137  */
138 static inline bool
139 lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
140 {
141  if (lex->token_type == token)
142  {
143  if (lexeme != NULL)
144  {
145  if (lex->token_type == JSON_TOKEN_STRING)
146  {
147  if (lex->strval != NULL)
148  *lexeme = pstrdup(lex->strval->data);
149  }
150  else
151  {
152  int len = (lex->token_terminator - lex->token_start);
153  char *tokstr = palloc(len + 1);
154 
155  memcpy(tokstr, lex->token_start, len);
156  tokstr[len] = '\0';
157  *lexeme = tokstr;
158  }
159  }
160  json_lex(lex);
161  return true;
162  }
163  return false;
164 }
165 
166 /*
167  * lex_accept
168  *
169  * move the lexer to the next token if the current look_ahead token matches
170  * the parameter token. Otherwise, report an error.
171  */
172 static inline void
174 {
175  if (!lex_accept(lex, token, NULL))
176  report_parse_error(ctx, lex);
177 }
178 
179 /* chars to consider as part of an alphanumeric token */
180 #define JSON_ALPHANUMERIC_CHAR(c) \
181  (((c) >= 'a' && (c) <= 'z') || \
182  ((c) >= 'A' && (c) <= 'Z') || \
183  ((c) >= '0' && (c) <= '9') || \
184  (c) == '_' || \
185  IS_HIGHBIT_SET(c))
186 
187 /*
188  * Utility function to check if a string is a valid JSON number.
189  *
190  * str is of length len, and need not be null-terminated.
191  */
192 bool
193 IsValidJsonNumber(const char *str, int len)
194 {
195  bool numeric_error;
196  int total_len;
197  JsonLexContext dummy_lex;
198 
199  if (len <= 0)
200  return false;
201 
202  /*
203  * json_lex_number expects a leading '-' to have been eaten already.
204  *
205  * having to cast away the constness of str is ugly, but there's not much
206  * easy alternative.
207  */
208  if (*str == '-')
209  {
210  dummy_lex.input = (char *) str + 1;
211  dummy_lex.input_length = len - 1;
212  }
213  else
214  {
215  dummy_lex.input = (char *) str;
216  dummy_lex.input_length = len;
217  }
218 
219  json_lex_number(&dummy_lex, dummy_lex.input, &numeric_error, &total_len);
220 
221  return (!numeric_error) && (total_len == dummy_lex.input_length);
222 }
223 
224 /*
225  * Input.
226  */
227 Datum
229 {
230  char *json = PG_GETARG_CSTRING(0);
231  text *result = cstring_to_text(json);
232  JsonLexContext *lex;
233 
234  /* validate it */
235  lex = makeJsonLexContext(result, false);
236  pg_parse_json(lex, &nullSemAction);
237 
238  /* Internal representation is the same as text, for now */
239  PG_RETURN_TEXT_P(result);
240 }
241 
242 /*
243  * Output.
244  */
245 Datum
247 {
248  /* we needn't detoast because text_to_cstring will handle that */
249  Datum txt = PG_GETARG_DATUM(0);
250 
252 }
253 
254 /*
255  * Binary send.
256  */
257 Datum
259 {
260  text *t = PG_GETARG_TEXT_PP(0);
262 
263  pq_begintypsend(&buf);
266 }
267 
268 /*
269  * Binary receive.
270  */
271 Datum
273 {
275  char *str;
276  int nbytes;
277  JsonLexContext *lex;
278 
279  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
280 
281  /* Validate it. */
282  lex = makeJsonLexContextCstringLen(str, nbytes, false);
283  pg_parse_json(lex, &nullSemAction);
284 
286 }
287 
288 /*
289  * makeJsonLexContext
290  *
291  * lex constructor, with or without StringInfo object
292  * for de-escaped lexemes.
293  *
294  * Without is better as it makes the processing faster, so only make one
295  * if really required.
296  *
297  * If you already have the json as a text* value, use the first of these
298  * functions, otherwise use makeJsonLexContextCstringLen().
299  */
301 makeJsonLexContext(text *json, bool need_escapes)
302 {
304  VARSIZE_ANY_EXHDR(json),
305  need_escapes);
306 }
307 
309 makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
310 {
311  JsonLexContext *lex = palloc0(sizeof(JsonLexContext));
312 
313  lex->input = lex->token_terminator = lex->line_start = json;
314  lex->line_number = 1;
315  lex->input_length = len;
316  if (need_escapes)
317  lex->strval = makeStringInfo();
318  return lex;
319 }
320 
321 /*
322  * pg_parse_json
323  *
324  * Publicly visible entry point for the JSON parser.
325  *
326  * lex is a lexing context, set up for the json to be processed by calling
327  * makeJsonLexContext(). sem is a structure of function pointers to semantic
328  * action routines to be called at appropriate spots during parsing, and a
329  * pointer to a state object to be passed to those routines.
330  */
331 void
333 {
334  JsonTokenType tok;
335 
336  /* get the initial token */
337  json_lex(lex);
338 
339  tok = lex_peek(lex);
340 
341  /* parse by recursive descent */
342  switch (tok)
343  {
345  parse_object(lex, sem);
346  break;
348  parse_array(lex, sem);
349  break;
350  default:
351  parse_scalar(lex, sem); /* json can be a bare scalar */
352  }
353 
355 
356 }
357 
358 /*
359  * json_count_array_elements
360  *
361  * Returns number of array elements in lex context at start of array token
362  * until end of array token at same nesting level.
363  *
364  * Designed to be called from array_start routines.
365  */
366 int
368 {
369  JsonLexContext copylex;
370  int count;
371 
372  /*
373  * It's safe to do this with a shallow copy because the lexical routines
374  * don't scribble on the input. They do scribble on the other pointers
375  * etc, so doing this with a copy makes that safe.
376  */
377  memcpy(&copylex, lex, sizeof(JsonLexContext));
378  copylex.strval = NULL; /* not interested in values here */
379  copylex.lex_level++;
380 
381  count = 0;
383  if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
384  {
385  do
386  {
387  count++;
388  parse_array_element(&copylex, &nullSemAction);
389  }
390  while (lex_accept(&copylex, JSON_TOKEN_COMMA, NULL));
391  }
393 
394  return count;
395 }
396 
397 /*
398  * Recursive Descent parse routines. There is one for each structural
399  * element in a json document:
400  * - scalar (string, number, true, false, null)
401  * - array ( [ ] )
402  * - array element
403  * - object ( { } )
404  * - object field
405  */
406 static inline void
408 {
409  char *val = NULL;
410  json_scalar_action sfunc = sem->scalar;
411  char **valaddr;
412  JsonTokenType tok = lex_peek(lex);
413 
414  valaddr = sfunc == NULL ? NULL : &val;
415 
416  /* a scalar must be a string, a number, true, false, or null */
417  switch (tok)
418  {
419  case JSON_TOKEN_TRUE:
420  lex_accept(lex, JSON_TOKEN_TRUE, valaddr);
421  break;
422  case JSON_TOKEN_FALSE:
423  lex_accept(lex, JSON_TOKEN_FALSE, valaddr);
424  break;
425  case JSON_TOKEN_NULL:
426  lex_accept(lex, JSON_TOKEN_NULL, valaddr);
427  break;
428  case JSON_TOKEN_NUMBER:
429  lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);
430  break;
431  case JSON_TOKEN_STRING:
432  lex_accept(lex, JSON_TOKEN_STRING, valaddr);
433  break;
434  default:
436  }
437 
438  if (sfunc != NULL)
439  (*sfunc) (sem->semstate, val, tok);
440 }
441 
442 static void
444 {
445  /*
446  * An object field is "fieldname" : value where value can be a scalar,
447  * object or array. Note: in user-facing docs and error messages, we
448  * generally call a field name a "key".
449  */
450 
451  char *fname = NULL; /* keep compiler quiet */
454  bool isnull;
455  char **fnameaddr = NULL;
456  JsonTokenType tok;
457 
458  if (ostart != NULL || oend != NULL)
459  fnameaddr = &fname;
460 
461  if (!lex_accept(lex, JSON_TOKEN_STRING, fnameaddr))
463 
465 
466  tok = lex_peek(lex);
467  isnull = tok == JSON_TOKEN_NULL;
468 
469  if (ostart != NULL)
470  (*ostart) (sem->semstate, fname, isnull);
471 
472  switch (tok)
473  {
475  parse_object(lex, sem);
476  break;
478  parse_array(lex, sem);
479  break;
480  default:
481  parse_scalar(lex, sem);
482  }
483 
484  if (oend != NULL)
485  (*oend) (sem->semstate, fname, isnull);
486 }
487 
488 static void
490 {
491  /*
492  * an object is a possibly empty sequence of object fields, separated by
493  * commas and surrounded by curly braces.
494  */
495  json_struct_action ostart = sem->object_start;
496  json_struct_action oend = sem->object_end;
497  JsonTokenType tok;
498 
500 
501  if (ostart != NULL)
502  (*ostart) (sem->semstate);
503 
504  /*
505  * Data inside an object is at a higher nesting level than the object
506  * itself. Note that we increment this after we call the semantic routine
507  * for the object start and restore it before we call the routine for the
508  * object end.
509  */
510  lex->lex_level++;
511 
512  /* we know this will succeed, just clearing the token */
514 
515  tok = lex_peek(lex);
516  switch (tok)
517  {
518  case JSON_TOKEN_STRING:
519  parse_object_field(lex, sem);
520  while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
521  parse_object_field(lex, sem);
522  break;
524  break;
525  default:
526  /* case of an invalid initial token inside the object */
528  }
529 
531 
532  lex->lex_level--;
533 
534  if (oend != NULL)
535  (*oend) (sem->semstate);
536 }
537 
538 static void
540 {
543  JsonTokenType tok = lex_peek(lex);
544 
545  bool isnull;
546 
547  isnull = tok == JSON_TOKEN_NULL;
548 
549  if (astart != NULL)
550  (*astart) (sem->semstate, isnull);
551 
552  /* an array element is any object, array or scalar */
553  switch (tok)
554  {
556  parse_object(lex, sem);
557  break;
559  parse_array(lex, sem);
560  break;
561  default:
562  parse_scalar(lex, sem);
563  }
564 
565  if (aend != NULL)
566  (*aend) (sem->semstate, isnull);
567 }
568 
569 static void
571 {
572  /*
573  * an array is a possibly empty sequence of array elements, separated by
574  * commas and surrounded by square brackets.
575  */
576  json_struct_action astart = sem->array_start;
577  json_struct_action aend = sem->array_end;
578 
580 
581  if (astart != NULL)
582  (*astart) (sem->semstate);
583 
584  /*
585  * Data inside an array is at a higher nesting level than the array
586  * itself. Note that we increment this after we call the semantic routine
587  * for the array start and restore it before we call the routine for the
588  * array end.
589  */
590  lex->lex_level++;
591 
593  if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
594  {
595 
596  parse_array_element(lex, sem);
597 
598  while (lex_accept(lex, JSON_TOKEN_COMMA, NULL))
599  parse_array_element(lex, sem);
600  }
601 
603 
604  lex->lex_level--;
605 
606  if (aend != NULL)
607  (*aend) (sem->semstate);
608 }
609 
610 /*
611  * Lex one token from the input stream.
612  */
613 static inline void
615 {
616  char *s;
617  int len;
618 
619  /* Skip leading whitespace. */
620  s = lex->token_terminator;
621  len = s - lex->input;
622  while (len < lex->input_length &&
623  (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
624  {
625  if (*s == '\n')
626  ++lex->line_number;
627  ++s;
628  ++len;
629  }
630  lex->token_start = s;
631 
632  /* Determine token type. */
633  if (len >= lex->input_length)
634  {
635  lex->token_start = NULL;
637  lex->token_terminator = s;
638  lex->token_type = JSON_TOKEN_END;
639  }
640  else
641  switch (*s)
642  {
643  /* Single-character token, some kind of punctuation mark. */
644  case '{':
646  lex->token_terminator = s + 1;
648  break;
649  case '}':
651  lex->token_terminator = s + 1;
653  break;
654  case '[':
656  lex->token_terminator = s + 1;
658  break;
659  case ']':
661  lex->token_terminator = s + 1;
663  break;
664  case ',':
666  lex->token_terminator = s + 1;
668  break;
669  case ':':
671  lex->token_terminator = s + 1;
673  break;
674  case '"':
675  /* string */
676  json_lex_string(lex);
678  break;
679  case '-':
680  /* Negative number. */
681  json_lex_number(lex, s + 1, NULL, NULL);
683  break;
684  case '0':
685  case '1':
686  case '2':
687  case '3':
688  case '4':
689  case '5':
690  case '6':
691  case '7':
692  case '8':
693  case '9':
694  /* Positive number. */
695  json_lex_number(lex, s, NULL, NULL);
697  break;
698  default:
699  {
700  char *p;
701 
702  /*
703  * We're not dealing with a string, number, legal
704  * punctuation mark, or end of string. The only legal
705  * tokens we might find here are true, false, and null,
706  * but for error reporting purposes we scan until we see a
707  * non-alphanumeric character. That way, we can report
708  * the whole word as an unexpected token, rather than just
709  * some unintuitive prefix thereof.
710  */
711  for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
712  /* skip */ ;
713 
714  /*
715  * We got some sort of unexpected punctuation or an
716  * otherwise unexpected character, so just complain about
717  * that one character.
718  */
719  if (p == s)
720  {
722  lex->token_terminator = s + 1;
724  }
725 
726  /*
727  * We've got a real alphanumeric token here. If it
728  * happens to be true, false, or null, all is well. If
729  * not, error out.
730  */
732  lex->token_terminator = p;
733  if (p - s == 4)
734  {
735  if (memcmp(s, "true", 4) == 0)
737  else if (memcmp(s, "null", 4) == 0)
739  else
741  }
742  else if (p - s == 5 && memcmp(s, "false", 5) == 0)
744  else
746 
747  }
748  } /* end of switch */
749 }
750 
751 /*
752  * The next token in the input stream is known to be a string; lex it.
753  */
754 static inline void
756 {
757  char *s;
758  int len;
759  int hi_surrogate = -1;
760 
761  if (lex->strval != NULL)
762  resetStringInfo(lex->strval);
763 
764  Assert(lex->input_length > 0);
765  s = lex->token_start;
766  len = lex->token_start - lex->input;
767  for (;;)
768  {
769  s++;
770  len++;
771  /* Premature end of the string. */
772  if (len >= lex->input_length)
773  {
774  lex->token_terminator = s;
776  }
777  else if (*s == '"')
778  break;
779  else if ((unsigned char) *s < 32)
780  {
781  /* Per RFC4627, these characters MUST be escaped. */
782  /* Since *s isn't printable, exclude it from the context string */
783  lex->token_terminator = s;
784  ereport(ERROR,
785  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
786  errmsg("invalid input syntax for type %s", "json"),
787  errdetail("Character with value 0x%02x must be escaped.",
788  (unsigned char) *s),
789  report_json_context(lex)));
790  }
791  else if (*s == '\\')
792  {
793  /* OK, we have an escape character. */
794  s++;
795  len++;
796  if (len >= lex->input_length)
797  {
798  lex->token_terminator = s;
800  }
801  else if (*s == 'u')
802  {
803  int i;
804  int ch = 0;
805 
806  for (i = 1; i <= 4; i++)
807  {
808  s++;
809  len++;
810  if (len >= lex->input_length)
811  {
812  lex->token_terminator = s;
814  }
815  else if (*s >= '0' && *s <= '9')
816  ch = (ch * 16) + (*s - '0');
817  else if (*s >= 'a' && *s <= 'f')
818  ch = (ch * 16) + (*s - 'a') + 10;
819  else if (*s >= 'A' && *s <= 'F')
820  ch = (ch * 16) + (*s - 'A') + 10;
821  else
822  {
823  lex->token_terminator = s + pg_mblen(s);
824  ereport(ERROR,
825  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
826  errmsg("invalid input syntax for type %s",
827  "json"),
828  errdetail("\"\\u\" must be followed by four hexadecimal digits."),
829  report_json_context(lex)));
830  }
831  }
832  if (lex->strval != NULL)
833  {
834  char utf8str[5];
835  int utf8len;
836 
837  if (ch >= 0xd800 && ch <= 0xdbff)
838  {
839  if (hi_surrogate != -1)
840  ereport(ERROR,
841  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
842  errmsg("invalid input syntax for type %s",
843  "json"),
844  errdetail("Unicode high surrogate must not follow a high surrogate."),
845  report_json_context(lex)));
846  hi_surrogate = (ch & 0x3ff) << 10;
847  continue;
848  }
849  else if (ch >= 0xdc00 && ch <= 0xdfff)
850  {
851  if (hi_surrogate == -1)
852  ereport(ERROR,
853  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
854  errmsg("invalid input syntax for type %s", "json"),
855  errdetail("Unicode low surrogate must follow a high surrogate."),
856  report_json_context(lex)));
857  ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
858  hi_surrogate = -1;
859  }
860 
861  if (hi_surrogate != -1)
862  ereport(ERROR,
863  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
864  errmsg("invalid input syntax for type %s", "json"),
865  errdetail("Unicode low surrogate must follow a high surrogate."),
866  report_json_context(lex)));
867 
868  /*
869  * For UTF8, replace the escape sequence by the actual
870  * utf8 character in lex->strval. Do this also for other
871  * encodings if the escape designates an ASCII character,
872  * otherwise raise an error.
873  */
874 
875  if (ch == 0)
876  {
877  /* We can't allow this, since our TEXT type doesn't */
878  ereport(ERROR,
879  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
880  errmsg("unsupported Unicode escape sequence"),
881  errdetail("\\u0000 cannot be converted to text."),
882  report_json_context(lex)));
883  }
884  else if (GetDatabaseEncoding() == PG_UTF8)
885  {
886  unicode_to_utf8(ch, (unsigned char *) utf8str);
887  utf8len = pg_utf_mblen((unsigned char *) utf8str);
888  appendBinaryStringInfo(lex->strval, utf8str, utf8len);
889  }
890  else if (ch <= 0x007f)
891  {
892  /*
893  * This is the only way to designate things like a
894  * form feed character in JSON, so it's useful in all
895  * encodings.
896  */
897  appendStringInfoChar(lex->strval, (char) ch);
898  }
899  else
900  {
901  ereport(ERROR,
902  (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
903  errmsg("unsupported Unicode escape sequence"),
904  errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
905  report_json_context(lex)));
906  }
907 
908  }
909  }
910  else if (lex->strval != NULL)
911  {
912  if (hi_surrogate != -1)
913  ereport(ERROR,
914  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
915  errmsg("invalid input syntax for type %s",
916  "json"),
917  errdetail("Unicode low surrogate must follow a high surrogate."),
918  report_json_context(lex)));
919 
920  switch (*s)
921  {
922  case '"':
923  case '\\':
924  case '/':
925  appendStringInfoChar(lex->strval, *s);
926  break;
927  case 'b':
928  appendStringInfoChar(lex->strval, '\b');
929  break;
930  case 'f':
931  appendStringInfoChar(lex->strval, '\f');
932  break;
933  case 'n':
934  appendStringInfoChar(lex->strval, '\n');
935  break;
936  case 'r':
937  appendStringInfoChar(lex->strval, '\r');
938  break;
939  case 't':
940  appendStringInfoChar(lex->strval, '\t');
941  break;
942  default:
943  /* Not a valid string escape, so error out. */
944  lex->token_terminator = s + pg_mblen(s);
945  ereport(ERROR,
946  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
947  errmsg("invalid input syntax for type %s",
948  "json"),
949  errdetail("Escape sequence \"\\%s\" is invalid.",
950  extract_mb_char(s)),
951  report_json_context(lex)));
952  }
953  }
954  else if (strchr("\"\\/bfnrt", *s) == NULL)
955  {
956  /*
957  * Simpler processing if we're not bothered about de-escaping
958  *
959  * It's very tempting to remove the strchr() call here and
960  * replace it with a switch statement, but testing so far has
961  * shown it's not a performance win.
962  */
963  lex->token_terminator = s + pg_mblen(s);
964  ereport(ERROR,
965  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
966  errmsg("invalid input syntax for type %s", "json"),
967  errdetail("Escape sequence \"\\%s\" is invalid.",
968  extract_mb_char(s)),
969  report_json_context(lex)));
970  }
971 
972  }
973  else if (lex->strval != NULL)
974  {
975  if (hi_surrogate != -1)
976  ereport(ERROR,
977  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
978  errmsg("invalid input syntax for type %s", "json"),
979  errdetail("Unicode low surrogate must follow a high surrogate."),
980  report_json_context(lex)));
981 
982  appendStringInfoChar(lex->strval, *s);
983  }
984 
985  }
986 
987  if (hi_surrogate != -1)
988  ereport(ERROR,
989  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
990  errmsg("invalid input syntax for type %s", "json"),
991  errdetail("Unicode low surrogate must follow a high surrogate."),
992  report_json_context(lex)));
993 
994  /* Hooray, we found the end of the string! */
996  lex->token_terminator = s + 1;
997 }
998 
999 /*
1000  * The next token in the input stream is known to be a number; lex it.
1001  *
1002  * In JSON, a number consists of four parts:
1003  *
1004  * (1) An optional minus sign ('-').
1005  *
1006  * (2) Either a single '0', or a string of one or more digits that does not
1007  * begin with a '0'.
1008  *
1009  * (3) An optional decimal part, consisting of a period ('.') followed by
1010  * one or more digits. (Note: While this part can be omitted
1011  * completely, it's not OK to have only the decimal point without
1012  * any digits afterwards.)
1013  *
1014  * (4) An optional exponent part, consisting of 'e' or 'E', optionally
1015  * followed by '+' or '-', followed by one or more digits. (Note:
1016  * As with the decimal part, if 'e' or 'E' is present, it must be
1017  * followed by at least one digit.)
1018  *
1019  * The 's' argument to this function points to the ostensible beginning
1020  * of part 2 - i.e. the character after any optional minus sign, or the
1021  * first character of the string if there is none.
1022  *
1023  * If num_err is not NULL, we return an error flag to *num_err rather than
1024  * raising an error for a badly-formed number. Also, if total_len is not NULL
1025  * the distance from lex->input to the token end+1 is returned to *total_len.
1026  */
1027 static inline void
1029  bool *num_err, int *total_len)
1030 {
1031  bool error = false;
1032  int len = s - lex->input;
1033 
1034  /* Part (1): leading sign indicator. */
1035  /* Caller already did this for us; so do nothing. */
1036 
1037  /* Part (2): parse main digit string. */
1038  if (len < lex->input_length && *s == '0')
1039  {
1040  s++;
1041  len++;
1042  }
1043  else if (len < lex->input_length && *s >= '1' && *s <= '9')
1044  {
1045  do
1046  {
1047  s++;
1048  len++;
1049  } while (len < lex->input_length && *s >= '0' && *s <= '9');
1050  }
1051  else
1052  error = true;
1053 
1054  /* Part (3): parse optional decimal portion. */
1055  if (len < lex->input_length && *s == '.')
1056  {
1057  s++;
1058  len++;
1059  if (len == lex->input_length || *s < '0' || *s > '9')
1060  error = true;
1061  else
1062  {
1063  do
1064  {
1065  s++;
1066  len++;
1067  } while (len < lex->input_length && *s >= '0' && *s <= '9');
1068  }
1069  }
1070 
1071  /* Part (4): parse optional exponent. */
1072  if (len < lex->input_length && (*s == 'e' || *s == 'E'))
1073  {
1074  s++;
1075  len++;
1076  if (len < lex->input_length && (*s == '+' || *s == '-'))
1077  {
1078  s++;
1079  len++;
1080  }
1081  if (len == lex->input_length || *s < '0' || *s > '9')
1082  error = true;
1083  else
1084  {
1085  do
1086  {
1087  s++;
1088  len++;
1089  } while (len < lex->input_length && *s >= '0' && *s <= '9');
1090  }
1091  }
1092 
1093  /*
1094  * Check for trailing garbage. As in json_lex(), any alphanumeric stuff
1095  * here should be considered part of the token for error-reporting
1096  * purposes.
1097  */
1098  for (; len < lex->input_length && JSON_ALPHANUMERIC_CHAR(*s); s++, len++)
1099  error = true;
1100 
1101  if (total_len != NULL)
1102  *total_len = len;
1103 
1104  if (num_err != NULL)
1105  {
1106  /* let the caller handle any error */
1107  *num_err = error;
1108  }
1109  else
1110  {
1111  /* return token endpoint */
1113  lex->token_terminator = s;
1114  /* handle error if any */
1115  if (error)
1116  report_invalid_token(lex);
1117  }
1118 }
1119 
1120 /*
1121  * Report a parse error.
1122  *
1123  * lex->token_start and lex->token_terminator must identify the current token.
1124  */
1125 static void
1127 {
1128  char *token;
1129  int toklen;
1130 
1131  /* Handle case where the input ended prematurely. */
1132  if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
1133  ereport(ERROR,
1134  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1135  errmsg("invalid input syntax for type %s", "json"),
1136  errdetail("The input string ended unexpectedly."),
1137  report_json_context(lex)));
1138 
1139  /* Separate out the current token. */
1140  toklen = lex->token_terminator - lex->token_start;
1141  token = palloc(toklen + 1);
1142  memcpy(token, lex->token_start, toklen);
1143  token[toklen] = '\0';
1144 
1145  /* Complain, with the appropriate detail message. */
1146  if (ctx == JSON_PARSE_END)
1147  ereport(ERROR,
1148  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1149  errmsg("invalid input syntax for type %s", "json"),
1150  errdetail("Expected end of input, but found \"%s\".",
1151  token),
1152  report_json_context(lex)));
1153  else
1154  {
1155  switch (ctx)
1156  {
1157  case JSON_PARSE_VALUE:
1158  ereport(ERROR,
1159  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1160  errmsg("invalid input syntax for type %s", "json"),
1161  errdetail("Expected JSON value, but found \"%s\".",
1162  token),
1163  report_json_context(lex)));
1164  break;
1165  case JSON_PARSE_STRING:
1166  ereport(ERROR,
1167  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1168  errmsg("invalid input syntax for type %s", "json"),
1169  errdetail("Expected string, but found \"%s\".",
1170  token),
1171  report_json_context(lex)));
1172  break;
1174  ereport(ERROR,
1175  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1176  errmsg("invalid input syntax for type %s", "json"),
1177  errdetail("Expected array element or \"]\", but found \"%s\".",
1178  token),
1179  report_json_context(lex)));
1180  break;
1181  case JSON_PARSE_ARRAY_NEXT:
1182  ereport(ERROR,
1183  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1184  errmsg("invalid input syntax for type %s", "json"),
1185  errdetail("Expected \",\" or \"]\", but found \"%s\".",
1186  token),
1187  report_json_context(lex)));
1188  break;
1190  ereport(ERROR,
1191  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1192  errmsg("invalid input syntax for type %s", "json"),
1193  errdetail("Expected string or \"}\", but found \"%s\".",
1194  token),
1195  report_json_context(lex)));
1196  break;
1198  ereport(ERROR,
1199  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1200  errmsg("invalid input syntax for type %s", "json"),
1201  errdetail("Expected \":\", but found \"%s\".",
1202  token),
1203  report_json_context(lex)));
1204  break;
1206  ereport(ERROR,
1207  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1208  errmsg("invalid input syntax for type %s", "json"),
1209  errdetail("Expected \",\" or \"}\", but found \"%s\".",
1210  token),
1211  report_json_context(lex)));
1212  break;
1214  ereport(ERROR,
1215  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1216  errmsg("invalid input syntax for type %s", "json"),
1217  errdetail("Expected string, but found \"%s\".",
1218  token),
1219  report_json_context(lex)));
1220  break;
1221  default:
1222  elog(ERROR, "unexpected json parse state: %d", ctx);
1223  }
1224  }
1225 }
1226 
1227 /*
1228  * Report an invalid input token.
1229  *
1230  * lex->token_start and lex->token_terminator must identify the token.
1231  */
1232 static void
1234 {
1235  char *token;
1236  int toklen;
1237 
1238  /* Separate out the offending token. */
1239  toklen = lex->token_terminator - lex->token_start;
1240  token = palloc(toklen + 1);
1241  memcpy(token, lex->token_start, toklen);
1242  token[toklen] = '\0';
1243 
1244  ereport(ERROR,
1245  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1246  errmsg("invalid input syntax for type %s", "json"),
1247  errdetail("Token \"%s\" is invalid.", token),
1248  report_json_context(lex)));
1249 }
1250 
1251 /*
1252  * Report a CONTEXT line for bogus JSON input.
1253  *
1254  * lex->token_terminator must be set to identify the spot where we detected
1255  * the error. Note that lex->token_start might be NULL, in case we recognized
1256  * error at EOF.
1257  *
1258  * The return value isn't meaningful, but we make it non-void so that this
1259  * can be invoked inside ereport().
1260  */
1261 static int
1263 {
1264  const char *context_start;
1265  const char *context_end;
1266  const char *line_start;
1267  int line_number;
1268  char *ctxt;
1269  int ctxtlen;
1270  const char *prefix;
1271  const char *suffix;
1272 
1273  /* Choose boundaries for the part of the input we will display */
1274  context_start = lex->input;
1275  context_end = lex->token_terminator;
1276  line_start = context_start;
1277  line_number = 1;
1278  for (;;)
1279  {
1280  /* Always advance over newlines */
1281  if (context_start < context_end && *context_start == '\n')
1282  {
1283  context_start++;
1284  line_start = context_start;
1285  line_number++;
1286  continue;
1287  }
1288  /* Otherwise, done as soon as we are close enough to context_end */
1289  if (context_end - context_start < 50)
1290  break;
1291  /* Advance to next multibyte character */
1292  if (IS_HIGHBIT_SET(*context_start))
1293  context_start += pg_mblen(context_start);
1294  else
1295  context_start++;
1296  }
1297 
1298  /*
1299  * We add "..." to indicate that the excerpt doesn't start at the
1300  * beginning of the line ... but if we're within 3 characters of the
1301  * beginning of the line, we might as well just show the whole line.
1302  */
1303  if (context_start - line_start <= 3)
1304  context_start = line_start;
1305 
1306  /* Get a null-terminated copy of the data to present */
1307  ctxtlen = context_end - context_start;
1308  ctxt = palloc(ctxtlen + 1);
1309  memcpy(ctxt, context_start, ctxtlen);
1310  ctxt[ctxtlen] = '\0';
1311 
1312  /*
1313  * Show the context, prefixing "..." if not starting at start of line, and
1314  * suffixing "..." if not ending at end of line.
1315  */
1316  prefix = (context_start > line_start) ? "..." : "";
1317  suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";
1318 
1319  return errcontext("JSON data, line %d: %s%s%s",
1320  line_number, prefix, ctxt, suffix);
1321 }
1322 
1323 /*
1324  * Extract a single, possibly multi-byte char from the input string.
1325  */
1326 static char *
1328 {
1329  char *res;
1330  int len;
1331 
1332  len = pg_mblen(s);
1333  res = palloc(len + 1);
1334  memcpy(res, s, len);
1335  res[len] = '\0';
1336 
1337  return res;
1338 }
1339 
1340 /*
1341  * Determine how we want to print values of a given type in datum_to_json.
1342  *
1343  * Given the datatype OID, return its JsonTypeCategory, as well as the type's
1344  * output function OID. If the returned category is JSONTYPE_CAST, we
1345  * return the OID of the type->JSON cast function instead.
1346  */
1347 static void
1349  JsonTypeCategory *tcategory,
1350  Oid *outfuncoid)
1351 {
1352  bool typisvarlena;
1353 
1354  /* Look through any domain */
1355  typoid = getBaseType(typoid);
1356 
1357  *outfuncoid = InvalidOid;
1358 
1359  /*
1360  * We need to get the output function for everything except date and
1361  * timestamp types, array and composite types, booleans, and non-builtin
1362  * types where there's a cast to json.
1363  */
1364 
1365  switch (typoid)
1366  {
1367  case BOOLOID:
1368  *tcategory = JSONTYPE_BOOL;
1369  break;
1370 
1371  case INT2OID:
1372  case INT4OID:
1373  case INT8OID:
1374  case FLOAT4OID:
1375  case FLOAT8OID:
1376  case NUMERICOID:
1377  getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1378  *tcategory = JSONTYPE_NUMERIC;
1379  break;
1380 
1381  case DATEOID:
1382  *tcategory = JSONTYPE_DATE;
1383  break;
1384 
1385  case TIMESTAMPOID:
1386  *tcategory = JSONTYPE_TIMESTAMP;
1387  break;
1388 
1389  case TIMESTAMPTZOID:
1390  *tcategory = JSONTYPE_TIMESTAMPTZ;
1391  break;
1392 
1393  case JSONOID:
1394  case JSONBOID:
1395  getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1396  *tcategory = JSONTYPE_JSON;
1397  break;
1398 
1399  default:
1400  /* Check for arrays and composites */
1401  if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID
1402  || typoid == RECORDARRAYOID)
1403  *tcategory = JSONTYPE_ARRAY;
1404  else if (type_is_rowtype(typoid)) /* includes RECORDOID */
1405  *tcategory = JSONTYPE_COMPOSITE;
1406  else
1407  {
1408  /* It's probably the general case ... */
1409  *tcategory = JSONTYPE_OTHER;
1410  /* but let's look for a cast to json, if it's not built-in */
1411  if (typoid >= FirstNormalObjectId)
1412  {
1413  Oid castfunc;
1414  CoercionPathType ctype;
1415 
1416  ctype = find_coercion_pathway(JSONOID, typoid,
1418  &castfunc);
1419  if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
1420  {
1421  *tcategory = JSONTYPE_CAST;
1422  *outfuncoid = castfunc;
1423  }
1424  else
1425  {
1426  /* non builtin type with no cast */
1427  getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1428  }
1429  }
1430  else
1431  {
1432  /* any other builtin type */
1433  getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
1434  }
1435  }
1436  break;
1437  }
1438 }
1439 
1440 /*
1441  * Turn a Datum into JSON text, appending the string to "result".
1442  *
1443  * tcategory and outfuncoid are from a previous call to json_categorize_type,
1444  * except that if is_null is true then they can be invalid.
1445  *
1446  * If key_scalar is true, the value is being printed as a key, so insist
1447  * it's of an acceptable type, and force it to be quoted.
1448  */
1449 static void
1450 datum_to_json(Datum val, bool is_null, StringInfo result,
1451  JsonTypeCategory tcategory, Oid outfuncoid,
1452  bool key_scalar)
1453 {
1454  char *outputstr;
1455  text *jsontext;
1456 
1458 
1459  /* callers are expected to ensure that null keys are not passed in */
1460  Assert(!(key_scalar && is_null));
1461 
1462  if (is_null)
1463  {
1464  appendStringInfoString(result, "null");
1465  return;
1466  }
1467 
1468  if (key_scalar &&
1469  (tcategory == JSONTYPE_ARRAY ||
1470  tcategory == JSONTYPE_COMPOSITE ||
1471  tcategory == JSONTYPE_JSON ||
1472  tcategory == JSONTYPE_CAST))
1473  ereport(ERROR,
1474  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1475  errmsg("key value must be scalar, not array, composite, or json")));
1476 
1477  switch (tcategory)
1478  {
1479  case JSONTYPE_ARRAY:
1480  array_to_json_internal(val, result, false);
1481  break;
1482  case JSONTYPE_COMPOSITE:
1483  composite_to_json(val, result, false);
1484  break;
1485  case JSONTYPE_BOOL:
1486  outputstr = DatumGetBool(val) ? "true" : "false";
1487  if (key_scalar)
1488  escape_json(result, outputstr);
1489  else
1490  appendStringInfoString(result, outputstr);
1491  break;
1492  case JSONTYPE_NUMERIC:
1493  outputstr = OidOutputFunctionCall(outfuncoid, val);
1494 
1495  /*
1496  * Don't call escape_json for a non-key if it's a valid JSON
1497  * number.
1498  */
1499  if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr)))
1500  appendStringInfoString(result, outputstr);
1501  else
1502  escape_json(result, outputstr);
1503  pfree(outputstr);
1504  break;
1505  case JSONTYPE_DATE:
1506  {
1507  DateADT date;
1508  struct pg_tm tm;
1509  char buf[MAXDATELEN + 1];
1510 
1511  date = DatumGetDateADT(val);
1512  /* Same as date_out(), but forcing DateStyle */
1513  if (DATE_NOT_FINITE(date))
1514  EncodeSpecialDate(date, buf);
1515  else
1516  {
1518  &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
1519  EncodeDateOnly(&tm, USE_XSD_DATES, buf);
1520  }
1521  appendStringInfo(result, "\"%s\"", buf);
1522  }
1523  break;
1524  case JSONTYPE_TIMESTAMP:
1525  {
1527  struct pg_tm tm;
1528  fsec_t fsec;
1529  char buf[MAXDATELEN + 1];
1530 
1531  timestamp = DatumGetTimestamp(val);
1532  /* Same as timestamp_out(), but forcing DateStyle */
1533  if (TIMESTAMP_NOT_FINITE(timestamp))
1534  EncodeSpecialTimestamp(timestamp, buf);
1535  else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
1536  EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
1537  else
1538  ereport(ERROR,
1539  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1540  errmsg("timestamp out of range")));
1541  appendStringInfo(result, "\"%s\"", buf);
1542  }
1543  break;
1544  case JSONTYPE_TIMESTAMPTZ:
1545  {
1547  struct pg_tm tm;
1548  int tz;
1549  fsec_t fsec;
1550  const char *tzn = NULL;
1551  char buf[MAXDATELEN + 1];
1552 
1553  timestamp = DatumGetTimestampTz(val);
1554  /* Same as timestamptz_out(), but forcing DateStyle */
1555  if (TIMESTAMP_NOT_FINITE(timestamp))
1556  EncodeSpecialTimestamp(timestamp, buf);
1557  else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
1558  EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
1559  else
1560  ereport(ERROR,
1561  (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1562  errmsg("timestamp out of range")));
1563  appendStringInfo(result, "\"%s\"", buf);
1564  }
1565  break;
1566  case JSONTYPE_JSON:
1567  /* JSON and JSONB output will already be escaped */
1568  outputstr = OidOutputFunctionCall(outfuncoid, val);
1569  appendStringInfoString(result, outputstr);
1570  pfree(outputstr);
1571  break;
1572  case JSONTYPE_CAST:
1573  /* outfuncoid refers to a cast function, not an output function */
1574  jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
1575  outputstr = text_to_cstring(jsontext);
1576  appendStringInfoString(result, outputstr);
1577  pfree(outputstr);
1578  pfree(jsontext);
1579  break;
1580  default:
1581  outputstr = OidOutputFunctionCall(outfuncoid, val);
1582  escape_json(result, outputstr);
1583  pfree(outputstr);
1584  break;
1585  }
1586 }
1587 
1588 /*
1589  * Process a single dimension of an array.
1590  * If it's the innermost dimension, output the values, otherwise call
1591  * ourselves recursively to process the next dimension.
1592  */
1593 static void
1594 array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
1595  bool *nulls, int *valcount, JsonTypeCategory tcategory,
1596  Oid outfuncoid, bool use_line_feeds)
1597 {
1598  int i;
1599  const char *sep;
1600 
1601  Assert(dim < ndims);
1602 
1603  sep = use_line_feeds ? ",\n " : ",";
1604 
1605  appendStringInfoChar(result, '[');
1606 
1607  for (i = 1; i <= dims[dim]; i++)
1608  {
1609  if (i > 1)
1610  appendStringInfoString(result, sep);
1611 
1612  if (dim + 1 == ndims)
1613  {
1614  datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
1615  outfuncoid, false);
1616  (*valcount)++;
1617  }
1618  else
1619  {
1620  /*
1621  * Do we want line feeds on inner dimensions of arrays? For now
1622  * we'll say no.
1623  */
1624  array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
1625  valcount, tcategory, outfuncoid, false);
1626  }
1627  }
1628 
1629  appendStringInfoChar(result, ']');
1630 }
1631 
1632 /*
1633  * Turn an array into JSON.
1634  */
1635 static void
1636 array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
1637 {
1638  ArrayType *v = DatumGetArrayTypeP(array);
1639  Oid element_type = ARR_ELEMTYPE(v);
1640  int *dim;
1641  int ndim;
1642  int nitems;
1643  int count = 0;
1644  Datum *elements;
1645  bool *nulls;
1646  int16 typlen;
1647  bool typbyval;
1648  char typalign;
1649  JsonTypeCategory tcategory;
1650  Oid outfuncoid;
1651 
1652  ndim = ARR_NDIM(v);
1653  dim = ARR_DIMS(v);
1654  nitems = ArrayGetNItems(ndim, dim);
1655 
1656  if (nitems <= 0)
1657  {
1658  appendStringInfoString(result, "[]");
1659  return;
1660  }
1661 
1662  get_typlenbyvalalign(element_type,
1663  &typlen, &typbyval, &typalign);
1664 
1665  json_categorize_type(element_type,
1666  &tcategory, &outfuncoid);
1667 
1668  deconstruct_array(v, element_type, typlen, typbyval,
1669  typalign, &elements, &nulls,
1670  &nitems);
1671 
1672  array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
1673  outfuncoid, use_line_feeds);
1674 
1675  pfree(elements);
1676  pfree(nulls);
1677 }
1678 
1679 /*
1680  * Turn a composite / record into JSON.
1681  */
1682 static void
1683 composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
1684 {
1685  HeapTupleHeader td;
1686  Oid tupType;
1687  int32 tupTypmod;
1688  TupleDesc tupdesc;
1689  HeapTupleData tmptup,
1690  *tuple;
1691  int i;
1692  bool needsep = false;
1693  const char *sep;
1694 
1695  sep = use_line_feeds ? ",\n " : ",";
1696 
1697  td = DatumGetHeapTupleHeader(composite);
1698 
1699  /* Extract rowtype info and find a tupdesc */
1700  tupType = HeapTupleHeaderGetTypeId(td);
1701  tupTypmod = HeapTupleHeaderGetTypMod(td);
1702  tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
1703 
1704  /* Build a temporary HeapTuple control structure */
1705  tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
1706  tmptup.t_data = td;
1707  tuple = &tmptup;
1708 
1709  appendStringInfoChar(result, '{');
1710 
1711  for (i = 0; i < tupdesc->natts; i++)
1712  {
1713  Datum val;
1714  bool isnull;
1715  char *attname;
1716  JsonTypeCategory tcategory;
1717  Oid outfuncoid;
1718  Form_pg_attribute att = TupleDescAttr(tupdesc, i);
1719 
1720  if (att->attisdropped)
1721  continue;
1722 
1723  if (needsep)
1724  appendStringInfoString(result, sep);
1725  needsep = true;
1726 
1727  attname = NameStr(att->attname);
1728  escape_json(result, attname);
1729  appendStringInfoChar(result, ':');
1730 
1731  val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
1732 
1733  if (isnull)
1734  {
1735  tcategory = JSONTYPE_NULL;
1736  outfuncoid = InvalidOid;
1737  }
1738  else
1739  json_categorize_type(att->atttypid, &tcategory, &outfuncoid);
1740 
1741  datum_to_json(val, isnull, result, tcategory, outfuncoid, false);
1742  }
1743 
1744  appendStringInfoChar(result, '}');
1745  ReleaseTupleDesc(tupdesc);
1746 }
1747 
1748 /*
1749  * Append JSON text for "val" to "result".
1750  *
1751  * This is just a thin wrapper around datum_to_json. If the same type will be
1752  * printed many times, avoid using this; better to do the json_categorize_type
1753  * lookups only once.
1754  */
1755 static void
1756 add_json(Datum val, bool is_null, StringInfo result,
1757  Oid val_type, bool key_scalar)
1758 {
1759  JsonTypeCategory tcategory;
1760  Oid outfuncoid;
1761 
1762  if (val_type == InvalidOid)
1763  ereport(ERROR,
1764  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1765  errmsg("could not determine input data type")));
1766 
1767  if (is_null)
1768  {
1769  tcategory = JSONTYPE_NULL;
1770  outfuncoid = InvalidOid;
1771  }
1772  else
1773  json_categorize_type(val_type,
1774  &tcategory, &outfuncoid);
1775 
1776  datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar);
1777 }
1778 
1779 /*
1780  * SQL function array_to_json(row)
1781  */
1782 extern Datum
1784 {
1785  Datum array = PG_GETARG_DATUM(0);
1786  StringInfo result;
1787 
1788  result = makeStringInfo();
1789 
1790  array_to_json_internal(array, result, false);
1791 
1793 }
1794 
1795 /*
1796  * SQL function array_to_json(row, prettybool)
1797  */
1798 extern Datum
1800 {
1801  Datum array = PG_GETARG_DATUM(0);
1802  bool use_line_feeds = PG_GETARG_BOOL(1);
1803  StringInfo result;
1804 
1805  result = makeStringInfo();
1806 
1807  array_to_json_internal(array, result, use_line_feeds);
1808 
1810 }
1811 
1812 /*
1813  * SQL function row_to_json(row)
1814  */
1815 extern Datum
1817 {
1818  Datum array = PG_GETARG_DATUM(0);
1819  StringInfo result;
1820 
1821  result = makeStringInfo();
1822 
1823  composite_to_json(array, result, false);
1824 
1826 }
1827 
1828 /*
1829  * SQL function row_to_json(row, prettybool)
1830  */
1831 extern Datum
1833 {
1834  Datum array = PG_GETARG_DATUM(0);
1835  bool use_line_feeds = PG_GETARG_BOOL(1);
1836  StringInfo result;
1837 
1838  result = makeStringInfo();
1839 
1840  composite_to_json(array, result, use_line_feeds);
1841 
1843 }
1844 
1845 /*
1846  * SQL function to_json(anyvalue)
1847  */
1848 Datum
1850 {
1851  Datum val = PG_GETARG_DATUM(0);
1852  Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
1853  StringInfo result;
1854  JsonTypeCategory tcategory;
1855  Oid outfuncoid;
1856 
1857  if (val_type == InvalidOid)
1858  ereport(ERROR,
1859  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1860  errmsg("could not determine input data type")));
1861 
1862  json_categorize_type(val_type,
1863  &tcategory, &outfuncoid);
1864 
1865  result = makeStringInfo();
1866 
1867  datum_to_json(val, false, result, tcategory, outfuncoid, false);
1868 
1870 }
1871 
1872 /*
1873  * json_agg transition function
1874  *
1875  * aggregate input column as a json array value.
1876  */
1877 Datum
1879 {
1880  MemoryContext aggcontext,
1881  oldcontext;
1883  Datum val;
1884 
1885  if (!AggCheckCallContext(fcinfo, &aggcontext))
1886  {
1887  /* cannot be called directly because of internal-type argument */
1888  elog(ERROR, "json_agg_transfn called in non-aggregate context");
1889  }
1890 
1891  if (PG_ARGISNULL(0))
1892  {
1893  Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
1894 
1895  if (arg_type == InvalidOid)
1896  ereport(ERROR,
1897  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1898  errmsg("could not determine input data type")));
1899 
1900  /*
1901  * Make this state object in a context where it will persist for the
1902  * duration of the aggregate call. MemoryContextSwitchTo is only
1903  * needed the first time, as the StringInfo routines make sure they
1904  * use the right context to enlarge the object if necessary.
1905  */
1906  oldcontext = MemoryContextSwitchTo(aggcontext);
1907  state = (JsonAggState *) palloc(sizeof(JsonAggState));
1908  state->str = makeStringInfo();
1909  MemoryContextSwitchTo(oldcontext);
1910 
1911  appendStringInfoChar(state->str, '[');
1912  json_categorize_type(arg_type, &state->val_category,
1913  &state->val_output_func);
1914  }
1915  else
1916  {
1917  state = (JsonAggState *) PG_GETARG_POINTER(0);
1918  appendStringInfoString(state->str, ", ");
1919  }
1920 
1921  /* fast path for NULLs */
1922  if (PG_ARGISNULL(1))
1923  {
1924  datum_to_json((Datum) 0, true, state->str, JSONTYPE_NULL,
1925  InvalidOid, false);
1926  PG_RETURN_POINTER(state);
1927  }
1928 
1929  val = PG_GETARG_DATUM(1);
1930 
1931  /* add some whitespace if structured type and not first item */
1932  if (!PG_ARGISNULL(0) &&
1933  (state->val_category == JSONTYPE_ARRAY ||
1934  state->val_category == JSONTYPE_COMPOSITE))
1935  {
1936  appendStringInfoString(state->str, "\n ");
1937  }
1938 
1939  datum_to_json(val, false, state->str, state->val_category,
1940  state->val_output_func, false);
1941 
1942  /*
1943  * The transition type for json_agg() is declared to be "internal", which
1944  * is a pass-by-value type the same size as a pointer. So we can safely
1945  * pass the JsonAggState pointer through nodeAgg.c's machinations.
1946  */
1947  PG_RETURN_POINTER(state);
1948 }
1949 
1950 /*
1951  * json_agg final function
1952  */
1953 Datum
1955 {
1957 
1958  /* cannot be called directly because of internal-type argument */
1959  Assert(AggCheckCallContext(fcinfo, NULL));
1960 
1961  state = PG_ARGISNULL(0) ?
1962  NULL :
1964 
1965  /* NULL result for no rows in, as is standard with aggregates */
1966  if (state == NULL)
1967  PG_RETURN_NULL();
1968 
1969  /* Else return state with appropriate array terminator added */
1971 }
1972 
1973 /*
1974  * json_object_agg transition function.
1975  *
1976  * aggregate two input columns as a single json object value.
1977  */
1978 Datum
1980 {
1981  MemoryContext aggcontext,
1982  oldcontext;
1984  Datum arg;
1985 
1986  if (!AggCheckCallContext(fcinfo, &aggcontext))
1987  {
1988  /* cannot be called directly because of internal-type argument */
1989  elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
1990  }
1991 
1992  if (PG_ARGISNULL(0))
1993  {
1994  Oid arg_type;
1995 
1996  /*
1997  * Make the StringInfo in a context where it will persist for the
1998  * duration of the aggregate call. Switching context is only needed
1999  * for this initial step, as the StringInfo routines make sure they
2000  * use the right context to enlarge the object if necessary.
2001  */
2002  oldcontext = MemoryContextSwitchTo(aggcontext);
2003  state = (JsonAggState *) palloc(sizeof(JsonAggState));
2004  state->str = makeStringInfo();
2005  MemoryContextSwitchTo(oldcontext);
2006 
2007  arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
2008 
2009  if (arg_type == InvalidOid)
2010  ereport(ERROR,
2011  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2012  errmsg("could not determine data type for argument %d", 1)));
2013 
2014  json_categorize_type(arg_type, &state->key_category,
2015  &state->key_output_func);
2016 
2017  arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
2018 
2019  if (arg_type == InvalidOid)
2020  ereport(ERROR,
2021  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2022  errmsg("could not determine data type for argument %d", 2)));
2023 
2024  json_categorize_type(arg_type, &state->val_category,
2025  &state->val_output_func);
2026 
2027  appendStringInfoString(state->str, "{ ");
2028  }
2029  else
2030  {
2031  state = (JsonAggState *) PG_GETARG_POINTER(0);
2032  appendStringInfoString(state->str, ", ");
2033  }
2034 
2035  /*
2036  * Note: since json_object_agg() is declared as taking type "any", the
2037  * parser will not do any type conversion on unknown-type literals (that
2038  * is, undecorated strings or NULLs). Such values will arrive here as
2039  * type UNKNOWN, which fortunately does not matter to us, since
2040  * unknownout() works fine.
2041  */
2042 
2043  if (PG_ARGISNULL(1))
2044  ereport(ERROR,
2045  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2046  errmsg("field name must not be null")));
2047 
2048  arg = PG_GETARG_DATUM(1);
2049 
2050  datum_to_json(arg, false, state->str, state->key_category,
2051  state->key_output_func, true);
2052 
2053  appendStringInfoString(state->str, " : ");
2054 
2055  if (PG_ARGISNULL(2))
2056  arg = (Datum) 0;
2057  else
2058  arg = PG_GETARG_DATUM(2);
2059 
2060  datum_to_json(arg, PG_ARGISNULL(2), state->str, state->val_category,
2061  state->val_output_func, false);
2062 
2063  PG_RETURN_POINTER(state);
2064 }
2065 
2066 /*
2067  * json_object_agg final function.
2068  */
2069 Datum
2071 {
2073 
2074  /* cannot be called directly because of internal-type argument */
2075  Assert(AggCheckCallContext(fcinfo, NULL));
2076 
2077  state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0);
2078 
2079  /* NULL result for no rows in, as is standard with aggregates */
2080  if (state == NULL)
2081  PG_RETURN_NULL();
2082 
2083  /* Else return state with appropriate object terminator added */
2085 }
2086 
2087 /*
2088  * Helper function for aggregates: return given StringInfo's contents plus
2089  * specified trailing string, as a text datum. We need this because aggregate
2090  * final functions are not allowed to modify the aggregate state.
2091  */
2092 static text *
2094 {
2095  /* custom version of cstring_to_text_with_len */
2096  int buflen = buffer->len;
2097  int addlen = strlen(addon);
2098  text *result = (text *) palloc(buflen + addlen + VARHDRSZ);
2099 
2100  SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
2101  memcpy(VARDATA(result), buffer->data, buflen);
2102  memcpy(VARDATA(result) + buflen, addon, addlen);
2103 
2104  return result;
2105 }
2106 
2107 /*
2108  * SQL function json_build_object(variadic "any")
2109  */
2110 Datum
2112 {
2113  int nargs = PG_NARGS();
2114  int i;
2115  const char *sep = "";
2116  StringInfo result;
2117  Datum *args;
2118  bool *nulls;
2119  Oid *types;
2120 
2121  /* fetch argument values to build the object */
2122  nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
2123 
2124  if (nargs < 0)
2125  PG_RETURN_NULL();
2126 
2127  if (nargs % 2 != 0)
2128  ereport(ERROR,
2129  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2130  errmsg("argument list must have even number of elements"),
2131  errhint("The arguments of json_build_object() must consist of alternating keys and values.")));
2132 
2133  result = makeStringInfo();
2134 
2135  appendStringInfoChar(result, '{');
2136 
2137  for (i = 0; i < nargs; i += 2)
2138  {
2139  appendStringInfoString(result, sep);
2140  sep = ", ";
2141 
2142  /* process key */
2143  if (nulls[i])
2144  ereport(ERROR,
2145  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2146  errmsg("argument %d cannot be null", i + 1),
2147  errhint("Object keys should be text.")));
2148 
2149  add_json(args[i], false, result, types[i], true);
2150 
2151  appendStringInfoString(result, " : ");
2152 
2153  /* process value */
2154  add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false);
2155  }
2156 
2157  appendStringInfoChar(result, '}');
2158 
2160 }
2161 
2162 /*
2163  * degenerate case of json_build_object where it gets 0 arguments.
2164  */
2165 Datum
2167 {
2169 }
2170 
2171 /*
2172  * SQL function json_build_array(variadic "any")
2173  */
2174 Datum
2176 {
2177  int nargs;
2178  int i;
2179  const char *sep = "";
2180  StringInfo result;
2181  Datum *args;
2182  bool *nulls;
2183  Oid *types;
2184 
2185  /* fetch argument values to build the array */
2186  nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
2187 
2188  if (nargs < 0)
2189  PG_RETURN_NULL();
2190 
2191  result = makeStringInfo();
2192 
2193  appendStringInfoChar(result, '[');
2194 
2195  for (i = 0; i < nargs; i++)
2196  {
2197  appendStringInfoString(result, sep);
2198  sep = ", ";
2199  add_json(args[i], nulls[i], result, types[i], false);
2200  }
2201 
2202  appendStringInfoChar(result, ']');
2203 
2205 }
2206 
2207 /*
2208  * degenerate case of json_build_array where it gets 0 arguments.
2209  */
2210 Datum
2212 {
2214 }
2215 
2216 /*
2217  * SQL function json_object(text[])
2218  *
2219  * take a one or two dimensional array of text as key/value pairs
2220  * for a json object.
2221  */
2222 Datum
2224 {
2225  ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0);
2226  int ndims = ARR_NDIM(in_array);
2227  StringInfoData result;
2228  Datum *in_datums;
2229  bool *in_nulls;
2230  int in_count,
2231  count,
2232  i;
2233  text *rval;
2234  char *v;
2235 
2236  switch (ndims)
2237  {
2238  case 0:
2240  break;
2241 
2242  case 1:
2243  if ((ARR_DIMS(in_array)[0]) % 2)
2244  ereport(ERROR,
2245  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2246  errmsg("array must have even number of elements")));
2247  break;
2248 
2249  case 2:
2250  if ((ARR_DIMS(in_array)[1]) != 2)
2251  ereport(ERROR,
2252  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2253  errmsg("array must have two columns")));
2254  break;
2255 
2256  default:
2257  ereport(ERROR,
2258  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2259  errmsg("wrong number of array subscripts")));
2260  }
2261 
2262  deconstruct_array(in_array,
2263  TEXTOID, -1, false, 'i',
2264  &in_datums, &in_nulls, &in_count);
2265 
2266  count = in_count / 2;
2267 
2268  initStringInfo(&result);
2269 
2270  appendStringInfoChar(&result, '{');
2271 
2272  for (i = 0; i < count; ++i)
2273  {
2274  if (in_nulls[i * 2])
2275  ereport(ERROR,
2276  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2277  errmsg("null value not allowed for object key")));
2278 
2279  v = TextDatumGetCString(in_datums[i * 2]);
2280  if (i > 0)
2281  appendStringInfoString(&result, ", ");
2282  escape_json(&result, v);
2283  appendStringInfoString(&result, " : ");
2284  pfree(v);
2285  if (in_nulls[i * 2 + 1])
2286  appendStringInfoString(&result, "null");
2287  else
2288  {
2289  v = TextDatumGetCString(in_datums[i * 2 + 1]);
2290  escape_json(&result, v);
2291  pfree(v);
2292  }
2293  }
2294 
2295  appendStringInfoChar(&result, '}');
2296 
2297  pfree(in_datums);
2298  pfree(in_nulls);
2299 
2300  rval = cstring_to_text_with_len(result.data, result.len);
2301  pfree(result.data);
2302 
2303  PG_RETURN_TEXT_P(rval);
2304 
2305 }
2306 
2307 /*
2308  * SQL function json_object(text[], text[])
2309  *
2310  * take separate key and value arrays of text to construct a json object
2311  * pairwise.
2312  */
2313 Datum
2315 {
2316  ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0);
2317  ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1);
2318  int nkdims = ARR_NDIM(key_array);
2319  int nvdims = ARR_NDIM(val_array);
2320  StringInfoData result;
2321  Datum *key_datums,
2322  *val_datums;
2323  bool *key_nulls,
2324  *val_nulls;
2325  int key_count,
2326  val_count,
2327  i;
2328  text *rval;
2329  char *v;
2330 
2331  if (nkdims > 1 || nkdims != nvdims)
2332  ereport(ERROR,
2333  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2334  errmsg("wrong number of array subscripts")));
2335 
2336  if (nkdims == 0)
2338 
2339  deconstruct_array(key_array,
2340  TEXTOID, -1, false, 'i',
2341  &key_datums, &key_nulls, &key_count);
2342 
2343  deconstruct_array(val_array,
2344  TEXTOID, -1, false, 'i',
2345  &val_datums, &val_nulls, &val_count);
2346 
2347  if (key_count != val_count)
2348  ereport(ERROR,
2349  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
2350  errmsg("mismatched array dimensions")));
2351 
2352  initStringInfo(&result);
2353 
2354  appendStringInfoChar(&result, '{');
2355 
2356  for (i = 0; i < key_count; ++i)
2357  {
2358  if (key_nulls[i])
2359  ereport(ERROR,
2360  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2361  errmsg("null value not allowed for object key")));
2362 
2363  v = TextDatumGetCString(key_datums[i]);
2364  if (i > 0)
2365  appendStringInfoString(&result, ", ");
2366  escape_json(&result, v);
2367  appendStringInfoString(&result, " : ");
2368  pfree(v);
2369  if (val_nulls[i])
2370  appendStringInfoString(&result, "null");
2371  else
2372  {
2373  v = TextDatumGetCString(val_datums[i]);
2374  escape_json(&result, v);
2375  pfree(v);
2376  }
2377  }
2378 
2379  appendStringInfoChar(&result, '}');
2380 
2381  pfree(key_datums);
2382  pfree(key_nulls);
2383  pfree(val_datums);
2384  pfree(val_nulls);
2385 
2386  rval = cstring_to_text_with_len(result.data, result.len);
2387  pfree(result.data);
2388 
2389  PG_RETURN_TEXT_P(rval);
2390 }
2391 
2392 
2393 /*
2394  * Produce a JSON string literal, properly escaping characters in the text.
2395  */
2396 void
2398 {
2399  const char *p;
2400 
2401  appendStringInfoCharMacro(buf, '"');
2402  for (p = str; *p; p++)
2403  {
2404  switch (*p)
2405  {
2406  case '\b':
2407  appendStringInfoString(buf, "\\b");
2408  break;
2409  case '\f':
2410  appendStringInfoString(buf, "\\f");
2411  break;
2412  case '\n':
2413  appendStringInfoString(buf, "\\n");
2414  break;
2415  case '\r':
2416  appendStringInfoString(buf, "\\r");
2417  break;
2418  case '\t':
2419  appendStringInfoString(buf, "\\t");
2420  break;
2421  case '"':
2422  appendStringInfoString(buf, "\\\"");
2423  break;
2424  case '\\':
2425  appendStringInfoString(buf, "\\\\");
2426  break;
2427  default:
2428  if ((unsigned char) *p < ' ')
2429  appendStringInfo(buf, "\\u%04x", (int) *p);
2430  else
2431  appendStringInfoCharMacro(buf, *p);
2432  break;
2433  }
2434  }
2435  appendStringInfoCharMacro(buf, '"');
2436 }
2437 
2438 /*
2439  * SQL function json_typeof(json) -> text
2440  *
2441  * Returns the type of the outermost JSON value as TEXT. Possible types are
2442  * "object", "array", "string", "number", "boolean", and "null".
2443  *
2444  * Performs a single call to json_lex() to get the first token of the supplied
2445  * value. This initial token uniquely determines the value's type. As our
2446  * input must already have been validated by json_in() or json_recv(), the
2447  * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
2448  * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
2449  */
2450 Datum
2452 {
2453  text *json;
2454 
2455  JsonLexContext *lex;
2456  JsonTokenType tok;
2457  char *type;
2458 
2459  json = PG_GETARG_TEXT_PP(0);
2460  lex = makeJsonLexContext(json, false);
2461 
2462  /* Lex exactly one token from the input and check its type. */
2463  json_lex(lex);
2464  tok = lex_peek(lex);
2465  switch (tok)
2466  {
2468  type = "object";
2469  break;
2471  type = "array";
2472  break;
2473  case JSON_TOKEN_STRING:
2474  type = "string";
2475  break;
2476  case JSON_TOKEN_NUMBER:
2477  type = "number";
2478  break;
2479  case JSON_TOKEN_TRUE:
2480  case JSON_TOKEN_FALSE:
2481  type = "boolean";
2482  break;
2483  case JSON_TOKEN_NULL:
2484  type = "null";
2485  break;
2486  default:
2487  elog(ERROR, "unexpected json token: %d", tok);
2488  }
2489 
2491 }
#define MAXDATELEN
Definition: datetime.h:203
void EncodeDateOnly(struct pg_tm *tm, int style, char *str)
Definition: datetime.c:3884
json_struct_action array_end
Definition: jsonapi.h:88
#define RECORDARRAYOID
Definition: pg_type.h:682
#define TIMESTAMPTZOID
Definition: pg_type.h:525
signed short int16
Definition: c.h:293
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
Datum to_json(PG_FUNCTION_ARGS)
Definition: json.c:1849
static void datum_to_json(Datum val, bool is_null, StringInfo result, JsonTypeCategory tcategory, Oid outfuncoid, bool key_scalar)
Definition: json.c:1450
#define DATEOID
Definition: pg_type.h:511
int line_number
Definition: jsonapi.h:60
#define DatumGetDateADT(X)
Definition: date.h:52
int errhint(const char *fmt,...)
Definition: elog.c:987
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2665
void(* json_scalar_action)(void *state, char *token, JsonTokenType tokentype)
Definition: jsonapi.h:68
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
int extract_variadic_args(FunctionCallInfo fcinfo, int variadic_start, bool convert_unknown, Datum **args, Oid **types, bool **nulls)
Definition: funcapi.c:1476
unsigned char * unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
Definition: wchar.c:475
void escape_json(StringInfo buf, const char *str)
Definition: json.c:2397
TupleDesc lookup_rowtype_tupdesc(Oid type_id, int32 typmod)
Definition: typcache.c:1618
static void error(void)
Definition: sql-dyntest.c:147
struct typedefs * types
Definition: ecpg.c:29
json_struct_action object_end
Definition: jsonapi.h:86
Datum json_object_two_arg(PG_FUNCTION_ARGS)
Definition: json.c:2314
int32 DateADT
Definition: date.h:22
#define TEXTOID
Definition: pg_type.h:324
static void json_categorize_type(Oid typoid, JsonTypeCategory *tcategory, Oid *outfuncoid)
Definition: json.c:1348
int64 timestamp
Oid get_element_type(Oid typid)
Definition: lsyscache.c:2517
int64 TimestampTz
Definition: timestamp.h:39
void get_typlenbyvalalign(Oid typid, int16 *typlen, bool *typbyval, char *typalign)
Definition: lsyscache.c:2040
static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
Definition: json.c:1683
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
#define TupleDescAttr(tupdesc, i)
Definition: tupdesc.h:90
#define NUMERICOID
Definition: pg_type.h:554
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
#define VARHDRSZ
Definition: c.h:503
#define JSONOID
Definition: pg_type.h:356
char * pstrdup(const char *in)
Definition: mcxt.c:1063
#define DatumGetTextPP(X)
Definition: fmgr.h:256
static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, bool use_line_feeds)
Definition: json.c:1594
int timestamp2tm(Timestamp dt, int *tzp, struct pg_tm *tm, fsec_t *fsec, const char **tzn, pg_tz *attimezone)
Definition: timestamp.c:1758
Datum json_in(PG_FUNCTION_ARGS)
Definition: json.c:228
StringInfo makeStringInfo(void)
Definition: stringinfo.c:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
JsonTokenType token_type
Definition: jsonapi.h:58
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
json_struct_action object_start
Definition: jsonapi.h:85
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
char * prev_token_terminator
Definition: jsonapi.h:57
#define INT4OID
Definition: pg_type.h:316
static void lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
Definition: json.c:173
#define JSON_ALPHANUMERIC_CHAR(c)
Definition: json.c:180
static char * extract_mb_char(char *s)
Definition: json.c:1327
Datum json_agg_finalfn(PG_FUNCTION_ARGS)
Definition: json.c:1954
Datum array_to_json_pretty(PG_FUNCTION_ARGS)
Definition: json.c:1799
int errcode(int sqlerrcode)
Definition: elog.c:575
json_scalar_action scalar
Definition: jsonapi.h:93
Oid val_output_func
Definition: json.c:75
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
static void json_lex_number(JsonLexContext *lex, char *s, bool *num_err, int *total_len)
Definition: json.c:1028
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
long date
Definition: pgtypes_date.h:8
void EncodeSpecialTimestamp(Timestamp dt, char *str)
Definition: timestamp.c:1523
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:239
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:330
Definition: pgtime.h:25
unsigned int Oid
Definition: postgres_ext.h:31
static bool lex_accept(JsonLexContext *lex, JsonTokenType token, char **lexeme)
Definition: json.c:139
#define OidIsValid(objectId)
Definition: c.h:586
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
#define DatumGetHeapTupleHeader(X)
Definition: fmgr.h:259
int natts
Definition: tupdesc.h:79
int lex_level
Definition: jsonapi.h:59
CoercionPathType
Definition: parse_coerce.h:24
#define FirstNormalObjectId
Definition: transam.h:94
signed int int32
Definition: c.h:294
struct JsonAggState JsonAggState
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
void(* json_ofield_action)(void *state, char *fname, bool isnull)
Definition: jsonapi.h:66
char * line_start
Definition: jsonapi.h:61
HeapTupleHeader t_data
Definition: htup.h:67
#define HeapTupleHeaderGetTypMod(tup)
Definition: htup_details.h:460
static text * catenate_stringinfo_string(StringInfo buffer, const char *addon)
Definition: json.c:2093
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:248
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:127
JsonLexContext * makeJsonLexContext(text *json, bool need_escapes)
Definition: json.c:301
#define TIMESTAMP_NOT_FINITE(j)
Definition: timestamp.h:122
int input_length
Definition: jsonapi.h:54
void pfree(void *pointer)
Definition: mcxt.c:936
JsonParseContext
Definition: json.c:41
Datum json_build_object_noargs(PG_FUNCTION_ARGS)
Definition: json.c:2166
#define IS_HIGHBIT_SET(ch)
Definition: c.h:963
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:78
#define TIMESTAMPOID
Definition: pg_type.h:519
#define ERROR
Definition: elog.h:43
static void parse_array(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:570
Oid key_output_func
Definition: json.c:73
Oid get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)
Definition: fmgr.c:1904
Datum array_to_json(PG_FUNCTION_ARGS)
Definition: json.c:1783
bool IsValidJsonNumber(const char *str, int len)
Definition: json.c:193
#define ARR_DIMS(a)
Definition: array.h:279
Datum json_build_object(PG_FUNCTION_ARGS)
Definition: json.c:2111
#define DATE_NOT_FINITE(j)
Definition: date.h:42
Datum row_to_json_pretty(PG_FUNCTION_ARGS)
Definition: json.c:1832
Datum json_typeof(PG_FUNCTION_ARGS)
Definition: json.c:2451
void pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:332
#define OidFunctionCall1(functionId, arg1)
Definition: fmgr.h:623
#define INT2OID
Definition: pg_type.h:308
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:157
uint32 t_len
Definition: htup.h:64
int tm_mday
Definition: pgtime.h:30
static char * buf
Definition: pg_test_fsync.c:67
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:162
StringInfo str
Definition: json.c:71
int tm_mon
Definition: pgtime.h:31
void check_stack_depth(void)
Definition: postgres.c:3154
#define DatumGetTimestampTz(X)
Definition: timestamp.h:28
int json_count_array_elements(JsonLexContext *lex)
Definition: json.c:367
int errdetail(const char *fmt,...)
Definition: elog.c:873
static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:539
#define DatumGetBool(X)
Definition: postgres.h:399
StringInfo strval
Definition: jsonapi.h:62
#define JSONBOID
Definition: pg_type.h:638
void EncodeDateTime(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, const char *tzn, int style, char *str)
Definition: datetime.c:3999
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:187
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:62
void EncodeSpecialDate(DateADT dt, char *str)
Definition: date.c:299
int32 fsec_t
Definition: timestamp.h:41
bool type_is_rowtype(Oid typid)
Definition: lsyscache.c:2424
JsonTypeCategory
Definition: json.c:54
static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
Definition: json.c:1126
#define ereport(elevel, rest)
Definition: elog.h:122
void j2date(int jd, int *year, int *month, int *day)
Definition: datetime.c:317
json_ofield_action object_field_end
Definition: jsonapi.h:90
JsonTypeCategory key_category
Definition: json.c:72
char * token_start
Definition: jsonapi.h:55
int64 Timestamp
Definition: timestamp.h:38
Datum json_object(PG_FUNCTION_ARGS)
Definition: json.c:2223
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
static JsonTokenType lex_peek(JsonLexContext *lex)
Definition: json.c:124
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
#define heap_getattr(tup, attnum, tupleDesc, isnull)
Definition: htup_details.h:774
Datum json_object_agg_finalfn(PG_FUNCTION_ARGS)
Definition: json.c:2070
#define TextDatumGetCString(d)
Definition: builtins.h:92
#define FLOAT4OID
Definition: pg_type.h:416
#define ANYARRAYOID
Definition: pg_type.h:688
void * palloc0(Size size)
Definition: mcxt.c:864
uintptr_t Datum
Definition: postgres.h:372
char * token_terminator
Definition: jsonapi.h:56
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:313
int GetDatabaseEncoding(void)
Definition: mbutils.c:1004
json_aelem_action array_element_start
Definition: jsonapi.h:91
#define HeapTupleHeaderGetTypeId(tup)
Definition: htup_details.h:450
Datum json_out(PG_FUNCTION_ARGS)
Definition: json.c:246
#define InvalidOid
Definition: postgres_ext.h:36
#define INT8OID
Definition: pg_type.h:304
Datum json_object_agg_transfn(PG_FUNCTION_ARGS)
Definition: json.c:1979
static void add_json(Datum val, bool is_null, StringInfo result, Oid val_type, bool key_scalar)
Definition: json.c:1756
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:331
text * cstring_to_text(const char *s)
Definition: varlena.c:150
#define PG_ARGISNULL(n)
Definition: fmgr.h:174
JsonTypeCategory val_category
Definition: json.c:74
static void json_lex(JsonLexContext *lex)
Definition: json.c:614
#define Assert(condition)
Definition: c.h:680
char * input
Definition: jsonapi.h:53
json_struct_action array_start
Definition: jsonapi.h:87
Definition: regguts.h:298
static void report_invalid_token(JsonLexContext *lex)
Definition: json.c:1233
static void parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:407
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:215
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:322
static void json_lex_string(JsonLexContext *lex)
Definition: json.c:755
#define FLOAT8OID
Definition: pg_type.h:419
JsonLexContext * makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
Definition: json.c:309
int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext)
Definition: nodeAgg.c:3471
Datum json_recv(PG_FUNCTION_ARGS)
Definition: json.c:272
#define PG_NARGS()
Definition: fmgr.h:168
void(* json_struct_action)(void *state)
Definition: jsonapi.h:65
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
#define BOOLOID
Definition: pg_type.h:288
#define ARR_NDIM(a)
Definition: array.h:275
static void array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
Definition: json.c:1636
Datum json_agg_transfn(PG_FUNCTION_ARGS)
Definition: json.c:1878
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3449
char * text_to_cstring(const text *t)
Definition: varlena.c:183
int pg_utf_mblen(const unsigned char *s)
Definition: wchar.c:541
void(* json_aelem_action)(void *state, bool isnull)
Definition: jsonapi.h:67
Datum json_build_array_noargs(PG_FUNCTION_ARGS)
Definition: json.c:2211
#define USE_XSD_DATES
Definition: miscadmin.h:214
int tm_year
Definition: pgtime.h:32
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1742
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:835
int errmsg(const char *fmt,...)
Definition: elog.c:797
static JsonSemAction nullSemAction
Definition: json.c:110
Datum json_build_array(PG_FUNCTION_ARGS)
Definition: json.c:2175
CoercionPathType find_coercion_pathway(Oid targetTypeId, Oid sourceTypeId, CoercionContext ccontext, Oid *funcid)
int i
#define NameStr(name)
Definition: c.h:557
#define errcontext
Definition: elog.h:164
#define CStringGetTextDatum(s)
Definition: builtins.h:91
void * arg
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242
Definition: c.h:497
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
#define POSTGRES_EPOCH_JDATE
Definition: timestamp.h:163
json_ofield_action object_field_start
Definition: jsonapi.h:89
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:443
#define elog
Definition: elog.h:219
Datum row_to_json(PG_FUNCTION_ARGS)
Definition: json.c:1816
#define ReleaseTupleDesc(tupdesc)
Definition: tupdesc.h:121
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2290
#define ARR_ELEMTYPE(a)
Definition: array.h:277
void * semstate
Definition: jsonapi.h:84
void appendBinaryStringInfo(StringInfo str, const char *data, int datalen)
Definition: stringinfo.c:208
long val
Definition: informix.c:689
static void parse_object(JsonLexContext *lex, JsonSemAction *sem)
Definition: json.c:489
#define PG_RETURN_NULL()
Definition: fmgr.h:305
json_aelem_action array_element_end
Definition: jsonapi.h:92
Datum json_send(PG_FUNCTION_ARGS)
Definition: json.c:258
static int report_json_context(JsonLexContext *lex)
Definition: json.c:1262
JsonTokenType
Definition: jsonapi.h:20
#define DatumGetTimestamp(X)
Definition: timestamp.h:27
#define HeapTupleHeaderGetDatumLength(tup)
Definition: htup_details.h:444
#define DatumGetArrayTypeP(X)
Definition: array.h:246