PostgreSQL Source Code  git master
test_json_parser_incremental.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * test_json_parser_incremental.c
4  * Test program for incremental JSON parser
5  *
6  * Copyright (c) 2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/test/modules/test_json_parser/test_json_parser_incremental.c
10  *
11  * This program tests incremental parsing of json. The input is fed into
12  * the parser in very small chunks. In practice you would normally use
13  * much larger chunks, but doing this makes it more likely that the
14  * full range of increment handling, especially in the lexer, is exercised.
15  * If the "-c SIZE" option is provided, that chunk size is used instead
16  * of the default of 60.
17  *
18  * If the -s flag is given, the program does semantic processing. This should
19  * just mirror back the json, albeit with white space changes.
20  *
21  * The argument specifies the file containing the JSON input.
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "postgres_fe.h"
27 
28 #include <stdio.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <unistd.h>
32 
33 #include "common/jsonapi.h"
34 #include "common/logging.h"
35 #include "lib/stringinfo.h"
36 #include "mb/pg_wchar.h"
37 #include "pg_getopt.h"
38 
39 #define BUFSIZE 6000
40 #define DEFAULT_CHUNK_SIZE 60
41 
42 typedef struct DoState
43 {
48 
49 static void usage(const char *progname);
50 static void escape_json(StringInfo buf, const char *str);
51 
52 /* semantic action functions for parser */
55 static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull);
56 static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull);
59 static JsonParseErrorType do_array_element_start(void *state, bool isnull);
60 static JsonParseErrorType do_array_element_end(void *state, bool isnull);
61 static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype);
62 
65  .object_end = do_object_end,
66  .object_field_start = do_object_field_start,
67  .object_field_end = do_object_field_end,
68  .array_start = do_array_start,
69  .array_end = do_array_end,
70  .array_element_start = do_array_element_start,
71  .array_element_end = do_array_element_end,
72  .scalar = do_scalar
73 };
74 
75 int
76 main(int argc, char **argv)
77 {
78  char buff[BUFSIZE];
79  FILE *json_file;
80  JsonParseErrorType result;
81  JsonLexContext lex;
82  StringInfoData json;
83  int n_read;
84  size_t chunk_size = DEFAULT_CHUNK_SIZE;
85  struct stat statbuf;
86  off_t bytes_left;
87  JsonSemAction *testsem = &nullSemAction;
88  char *testfile;
89  int c;
90  bool need_strings = false;
91 
92  pg_logging_init(argv[0]);
93 
94  while ((c = getopt(argc, argv, "c:s")) != -1)
95  {
96  switch (c)
97  {
98  case 'c': /* chunksize */
99  sscanf(optarg, "%zu", &chunk_size);
100  if (chunk_size > BUFSIZE)
101  pg_fatal("chunk size cannot exceed %d", BUFSIZE);
102  break;
103  case 's': /* do semantic processing */
104  testsem = &sem;
105  sem.semstate = palloc(sizeof(struct DoState));
106  ((struct DoState *) sem.semstate)->lex = &lex;
107  ((struct DoState *) sem.semstate)->buf = makeStringInfo();
108  need_strings = true;
109  break;
110  }
111  }
112 
113  if (optind < argc)
114  {
115  testfile = pg_strdup(argv[optind]);
116  optind++;
117  }
118  else
119  {
120  usage(argv[0]);
121  exit(1);
122  }
123 
124  makeJsonLexContextIncremental(&lex, PG_UTF8, need_strings);
125  initStringInfo(&json);
126 
127  if ((json_file = fopen(testfile, "r")) == NULL)
128  pg_fatal("error opening input: %m");
129 
130  if (fstat(fileno(json_file), &statbuf) != 0)
131  pg_fatal("error statting input: %m");
132 
133  bytes_left = statbuf.st_size;
134 
135  for (;;)
136  {
137  /* We will break when there's nothing left to read */
138 
139  if (bytes_left < chunk_size)
140  chunk_size = bytes_left;
141 
142  n_read = fread(buff, 1, chunk_size, json_file);
143  if (n_read < chunk_size)
144  pg_fatal("error reading input file: %d", ferror(json_file));
145 
146  appendBinaryStringInfo(&json, buff, n_read);
147 
148  /*
149  * Append some trailing junk to the buffer passed to the parser. This
150  * helps us ensure that the parser does the right thing even if the
151  * chunk isn't terminated with a '\0'.
152  */
153  appendStringInfoString(&json, "1+23 trailing junk");
154  bytes_left -= n_read;
155  if (bytes_left > 0)
156  {
157  result = pg_parse_json_incremental(&lex, testsem,
158  json.data, n_read,
159  false);
160  if (result != JSON_INCOMPLETE)
161  {
162  fprintf(stderr, "%s\n", json_errdetail(result, &lex));
163  exit(1);
164  }
165  resetStringInfo(&json);
166  }
167  else
168  {
169  result = pg_parse_json_incremental(&lex, testsem,
170  json.data, n_read,
171  true);
172  if (result != JSON_SUCCESS)
173  {
174  fprintf(stderr, "%s\n", json_errdetail(result, &lex));
175  exit(1);
176  }
177  if (!need_strings)
178  printf("SUCCESS!\n");
179  break;
180  }
181  }
182  fclose(json_file);
183  exit(0);
184 }
185 
186 /*
187  * The semantic routines here essentially just output the same json, except
188  * for white space. We could pretty print it but there's no need for our
189  * purposes. The result should be able to be fed to any JSON processor
190  * such as jq for validation.
191  */
192 
193 static JsonParseErrorType
195 {
196  DoState *_state = (DoState *) state;
197 
198  printf("{\n");
199  _state->elem_is_first = true;
200 
201  return JSON_SUCCESS;
202 }
203 
204 static JsonParseErrorType
206 {
207  DoState *_state = (DoState *) state;
208 
209  printf("\n}\n");
210  _state->elem_is_first = false;
211 
212  return JSON_SUCCESS;
213 }
214 
215 static JsonParseErrorType
216 do_object_field_start(void *state, char *fname, bool isnull)
217 {
218  DoState *_state = (DoState *) state;
219 
220  if (!_state->elem_is_first)
221  printf(",\n");
222  resetStringInfo(_state->buf);
223  escape_json(_state->buf, fname);
224  printf("%s: ", _state->buf->data);
225  _state->elem_is_first = false;
226 
227  return JSON_SUCCESS;
228 }
229 
230 static JsonParseErrorType
231 do_object_field_end(void *state, char *fname, bool isnull)
232 {
233  /* nothing to do really */
234 
235  return JSON_SUCCESS;
236 }
237 
238 static JsonParseErrorType
240 {
241  DoState *_state = (DoState *) state;
242 
243  printf("[\n");
244  _state->elem_is_first = true;
245 
246  return JSON_SUCCESS;
247 }
248 
249 static JsonParseErrorType
251 {
252  DoState *_state = (DoState *) state;
253 
254  printf("\n]\n");
255  _state->elem_is_first = false;
256 
257  return JSON_SUCCESS;
258 }
259 
260 static JsonParseErrorType
261 do_array_element_start(void *state, bool isnull)
262 {
263  DoState *_state = (DoState *) state;
264 
265  if (!_state->elem_is_first)
266  printf(",\n");
267  _state->elem_is_first = false;
268 
269  return JSON_SUCCESS;
270 }
271 
272 static JsonParseErrorType
273 do_array_element_end(void *state, bool isnull)
274 {
275  /* nothing to do */
276 
277  return JSON_SUCCESS;
278 }
279 
280 static JsonParseErrorType
281 do_scalar(void *state, char *token, JsonTokenType tokentype)
282 {
283  DoState *_state = (DoState *) state;
284 
285  if (tokentype == JSON_TOKEN_STRING)
286  {
287  resetStringInfo(_state->buf);
288  escape_json(_state->buf, token);
289  printf("%s", _state->buf->data);
290  }
291  else
292  printf("%s", token);
293 
294  return JSON_SUCCESS;
295 }
296 
297 
298 /* copied from backend code */
299 static void
301 {
302  const char *p;
303 
305  for (p = str; *p; p++)
306  {
307  switch (*p)
308  {
309  case '\b':
310  appendStringInfoString(buf, "\\b");
311  break;
312  case '\f':
313  appendStringInfoString(buf, "\\f");
314  break;
315  case '\n':
316  appendStringInfoString(buf, "\\n");
317  break;
318  case '\r':
319  appendStringInfoString(buf, "\\r");
320  break;
321  case '\t':
322  appendStringInfoString(buf, "\\t");
323  break;
324  case '"':
325  appendStringInfoString(buf, "\\\"");
326  break;
327  case '\\':
328  appendStringInfoString(buf, "\\\\");
329  break;
330  default:
331  if ((unsigned char) *p < ' ')
332  appendStringInfo(buf, "\\u%04x", (int) *p);
333  else
335  break;
336  }
337  }
339 }
340 
341 static void
342 usage(const char *progname)
343 {
344  fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname);
345  fprintf(stderr, "Options:\n");
346  fprintf(stderr, " -c chunksize size of piece fed to parser (default 64)n");
347  fprintf(stderr, " -s do semantic processing\n");
348 
349 }
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
const char * str
#define token
Definition: indent_globs.h:126
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, JsonSemAction *sem, char *json, int len, bool is_last)
Definition: jsonapi.c:649
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:2096
JsonSemAction nullSemAction
Definition: jsonapi.c:224
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
Definition: jsonapi.c:366
JsonParseErrorType
Definition: jsonapi.h:37
@ JSON_SUCCESS
Definition: jsonapi.h:38
@ JSON_INCOMPLETE
Definition: jsonapi.h:39
JsonTokenType
Definition: jsonapi.h:20
@ JSON_TOKEN_STRING
Definition: jsonapi.h:22
exit(1)
void pg_logging_init(const char *argv0)
Definition: logging.c:83
const char * progname
Definition: main.c:44
void * palloc(Size size)
Definition: mcxt.c:1316
#define pg_fatal(...)
PGDLLIMPORT int optind
Definition: getopt.c:50
int getopt(int nargc, char *const *nargv, const char *ostr)
Definition: getopt.c:71
PGDLLIMPORT char * optarg
Definition: getopt.c:52
static char * buf
Definition: pg_test_fsync.c:73
@ PG_UTF8
Definition: pg_wchar.h:232
#define fprintf
Definition: port.h:242
#define printf(...)
Definition: port.h:244
char * c
StringInfo makeStringInfo(void)
Definition: stringinfo.c:41
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:78
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:97
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:233
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:182
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:204
JsonLexContext * lex
json_struct_action object_start
Definition: jsonapi.h:135
void * semstate
Definition: jsonapi.h:134
__int64 st_size
Definition: win32_port.h:273
Definition: regguts.h:323
static void usage(const char *progname)
static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull)
JsonSemAction sem
int main(int argc, char **argv)
static JsonParseErrorType do_array_element_end(void *state, bool isnull)
struct DoState DoState
static JsonParseErrorType do_array_element_start(void *state, bool isnull)
static JsonParseErrorType do_object_end(void *state)
static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype)
#define DEFAULT_CHUNK_SIZE
static JsonParseErrorType do_array_start(void *state)
static JsonParseErrorType do_object_start(void *state)
static JsonParseErrorType do_array_end(void *state)
static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull)
static void escape_json(StringInfo buf, const char *str)
#define BUFSIZE
#define fstat
Definition: win32_port.h:283