PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
test_json_parser_incremental.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * test_json_parser_incremental.c
4 * Test program for incremental JSON parser
5 *
6 * Copyright (c) 2024-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/test/modules/test_json_parser/test_json_parser_incremental.c
10 *
11 * This program tests incremental parsing of json. The input is fed into
12 * the parser in very small chunks. In practice you would normally use
13 * much larger chunks, but doing this makes it more likely that the
14 * full range of increment handling, especially in the lexer, is exercised.
15 * If the "-c SIZE" option is provided, that chunk size is used instead
16 * of the default of 60.
17 *
18 * If the -s flag is given, the program does semantic processing. This should
19 * just mirror back the json, albeit with white space changes.
20 *
21 * If the -o flag is given, the JSONLEX_CTX_OWNS_TOKENS flag is set. (This can
22 * be used in combination with a leak sanitizer; without the option, the parser
23 * may leak memory with invalid JSON.)
24 *
25 * The argument specifies the file containing the JSON input.
26 *
27 *-------------------------------------------------------------------------
28 */
29
30#include "postgres_fe.h"
31
32#include <stdio.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35#include <unistd.h>
36
37#include "common/jsonapi.h"
38#include "common/logging.h"
39#include "lib/stringinfo.h"
40#include "mb/pg_wchar.h"
41#include "pg_getopt.h"
42
43#define BUFSIZE 6000
44#define DEFAULT_CHUNK_SIZE 60
45
46typedef struct DoState
47{
52
53static void usage(const char *progname);
54static void escape_json(StringInfo buf, const char *str);
55
56/* semantic action functions for parser */
59static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull);
60static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull);
63static JsonParseErrorType do_array_element_start(void *state, bool isnull);
64static JsonParseErrorType do_array_element_end(void *state, bool isnull);
65static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype);
66
69 .object_end = do_object_end,
70 .object_field_start = do_object_field_start,
71 .object_field_end = do_object_field_end,
72 .array_start = do_array_start,
73 .array_end = do_array_end,
74 .array_element_start = do_array_element_start,
75 .array_element_end = do_array_element_end,
76 .scalar = do_scalar
77};
78
79static bool lex_owns_tokens = false;
80
81int
82main(int argc, char **argv)
83{
84 char buff[BUFSIZE];
85 FILE *json_file;
86 JsonParseErrorType result;
87 JsonLexContext *lex;
88 StringInfoData json;
89 int n_read;
90 size_t chunk_size = DEFAULT_CHUNK_SIZE;
91 struct stat statbuf;
92 off_t bytes_left;
93 const JsonSemAction *testsem = &nullSemAction;
94 char *testfile;
95 int c;
96 bool need_strings = false;
97 int ret = 0;
98
99 pg_logging_init(argv[0]);
100
101 lex = calloc(1, sizeof(JsonLexContext));
102 if (!lex)
103 pg_fatal("out of memory");
104
105 while ((c = getopt(argc, argv, "c:os")) != -1)
106 {
107 switch (c)
108 {
109 case 'c': /* chunksize */
110 chunk_size = strtou64(optarg, NULL, 10);
111 if (chunk_size > BUFSIZE)
112 pg_fatal("chunk size cannot exceed %d", BUFSIZE);
113 break;
114 case 'o': /* switch token ownership */
115 lex_owns_tokens = true;
116 break;
117 case 's': /* do semantic processing */
118 testsem = &sem;
119 sem.semstate = palloc(sizeof(struct DoState));
120 ((struct DoState *) sem.semstate)->lex = lex;
121 ((struct DoState *) sem.semstate)->buf = makeStringInfo();
122 need_strings = true;
123 break;
124 }
125 }
126
127 if (optind < argc)
128 {
129 testfile = argv[optind];
130 optind++;
131 }
132 else
133 {
134 usage(argv[0]);
135 exit(1);
136 }
137
140 initStringInfo(&json);
141
142 if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
143 pg_fatal("error opening input: %m");
144
145 if (fstat(fileno(json_file), &statbuf) != 0)
146 pg_fatal("error statting input: %m");
147
148 bytes_left = statbuf.st_size;
149
150 for (;;)
151 {
152 /* We will break when there's nothing left to read */
153
154 if (bytes_left < chunk_size)
155 chunk_size = bytes_left;
156
157 n_read = fread(buff, 1, chunk_size, json_file);
158 if (n_read < chunk_size)
159 pg_fatal("error reading input file: %d", ferror(json_file));
160
161 appendBinaryStringInfo(&json, buff, n_read);
162
163 /*
164 * Append some trailing junk to the buffer passed to the parser. This
165 * helps us ensure that the parser does the right thing even if the
166 * chunk isn't terminated with a '\0'.
167 */
168 appendStringInfoString(&json, "1+23 trailing junk");
169 bytes_left -= n_read;
170 if (bytes_left > 0)
171 {
172 result = pg_parse_json_incremental(lex, testsem,
173 json.data, n_read,
174 false);
175 if (result != JSON_INCOMPLETE)
176 {
177 fprintf(stderr, "%s\n", json_errdetail(result, lex));
178 ret = 1;
179 goto cleanup;
180 }
181 resetStringInfo(&json);
182 }
183 else
184 {
185 result = pg_parse_json_incremental(lex, testsem,
186 json.data, n_read,
187 true);
188 if (result != JSON_SUCCESS)
189 {
190 fprintf(stderr, "%s\n", json_errdetail(result, lex));
191 ret = 1;
192 goto cleanup;
193 }
194 if (!need_strings)
195 printf("SUCCESS!\n");
196 break;
197 }
198 }
199
200cleanup:
201 fclose(json_file);
203 free(json.data);
204 free(lex);
205
206 return ret;
207}
208
209/*
210 * The semantic routines here essentially just output the same json, except
211 * for white space. We could pretty print it but there's no need for our
212 * purposes. The result should be able to be fed to any JSON processor
213 * such as jq for validation.
214 */
215
218{
219 DoState *_state = (DoState *) state;
220
221 printf("{\n");
222 _state->elem_is_first = true;
223
224 return JSON_SUCCESS;
225}
226
229{
230 DoState *_state = (DoState *) state;
231
232 printf("\n}\n");
233 _state->elem_is_first = false;
234
235 return JSON_SUCCESS;
236}
237
239do_object_field_start(void *state, char *fname, bool isnull)
240{
241 DoState *_state = (DoState *) state;
242
243 if (!_state->elem_is_first)
244 printf(",\n");
245 resetStringInfo(_state->buf);
246 escape_json(_state->buf, fname);
247 printf("%s: ", _state->buf->data);
248 _state->elem_is_first = false;
249
250 return JSON_SUCCESS;
251}
252
254do_object_field_end(void *state, char *fname, bool isnull)
255{
256 if (!lex_owns_tokens)
257 free(fname);
258
259 return JSON_SUCCESS;
260}
261
264{
265 DoState *_state = (DoState *) state;
266
267 printf("[\n");
268 _state->elem_is_first = true;
269
270 return JSON_SUCCESS;
271}
272
275{
276 DoState *_state = (DoState *) state;
277
278 printf("\n]\n");
279 _state->elem_is_first = false;
280
281 return JSON_SUCCESS;
282}
283
285do_array_element_start(void *state, bool isnull)
286{
287 DoState *_state = (DoState *) state;
288
289 if (!_state->elem_is_first)
290 printf(",\n");
291 _state->elem_is_first = false;
292
293 return JSON_SUCCESS;
294}
295
297do_array_element_end(void *state, bool isnull)
298{
299 /* nothing to do */
300
301 return JSON_SUCCESS;
302}
303
305do_scalar(void *state, char *token, JsonTokenType tokentype)
306{
307 DoState *_state = (DoState *) state;
308
309 if (tokentype == JSON_TOKEN_STRING)
310 {
311 resetStringInfo(_state->buf);
312 escape_json(_state->buf, token);
313 printf("%s", _state->buf->data);
314 }
315 else
316 printf("%s", token);
317
318 if (!lex_owns_tokens)
319 free(token);
320
321 return JSON_SUCCESS;
322}
323
324
325/* copied from backend code */
326static void
328{
329 const char *p;
330
332 for (p = str; *p; p++)
333 {
334 switch (*p)
335 {
336 case '\b':
338 break;
339 case '\f':
341 break;
342 case '\n':
344 break;
345 case '\r':
347 break;
348 case '\t':
350 break;
351 case '"':
353 break;
354 case '\\':
356 break;
357 default:
358 if ((unsigned char) *p < ' ')
359 appendStringInfo(buf, "\\u%04x", (int) *p);
360 else
362 break;
363 }
364 }
366}
367
368static void
369usage(const char *progname)
370{
371 fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname);
372 fprintf(stderr, "Options:\n");
373 fprintf(stderr, " -c chunksize size of piece fed to parser (default 64)\n");
374 fprintf(stderr, " -o set JSONLEX_CTX_OWNS_TOKENS for leak checking\n");
375 fprintf(stderr, " -s do semantic processing\n");
376
377}
static void cleanup(void)
Definition: bootstrap.c:713
#define PG_BINARY_R
Definition: c.h:1246
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
const char * str
#define calloc(a, b)
Definition: header.h:55
#define free(a)
Definition: header.h:65
JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex, const JsonSemAction *sem, const char *json, size_t len, bool is_last)
Definition: jsonapi.c:868
JsonLexContext * makeJsonLexContextIncremental(JsonLexContext *lex, int encoding, bool need_escapes)
Definition: jsonapi.c:497
void setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
Definition: jsonapi.c:542
const JsonSemAction nullSemAction
Definition: jsonapi.c:287
char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
Definition: jsonapi.c:2401
void freeJsonLexContext(JsonLexContext *lex)
Definition: jsonapi.c:687
JsonParseErrorType
Definition: jsonapi.h:35
@ JSON_SUCCESS
Definition: jsonapi.h:36
@ JSON_INCOMPLETE
Definition: jsonapi.h:37
JsonTokenType
Definition: jsonapi.h:18
@ JSON_TOKEN_STRING
Definition: jsonapi.h:20
void pg_logging_init(const char *argv0)
Definition: logging.c:83
const char * progname
Definition: main.c:44
void * palloc(Size size)
Definition: mcxt.c:1943
#define pg_fatal(...)
PGDLLIMPORT int optind
Definition: getopt.c:51
int getopt(int nargc, char *const *nargv, const char *ostr)
Definition: getopt.c:72
PGDLLIMPORT char * optarg
Definition: getopt.c:53
static char * buf
Definition: pg_test_fsync.c:72
@ PG_UTF8
Definition: pg_wchar.h:232
#define printf(...)
Definition: port.h:245
char * c
StringInfo makeStringInfo(void)
Definition: stringinfo.c:72
void resetStringInfo(StringInfo str)
Definition: stringinfo.c:126
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
#define appendStringInfoCharMacro(str, ch)
Definition: stringinfo.h:231
JsonLexContext * lex
json_struct_action object_start
Definition: jsonapi.h:154
void * semstate
Definition: jsonapi.h:153
__int64 st_size
Definition: win32_port.h:263
Definition: regguts.h:323
static void usage(const char *progname)
static bool lex_owns_tokens
static JsonParseErrorType do_object_field_start(void *state, char *fname, bool isnull)
static JsonSemAction sem
int main(int argc, char **argv)
static JsonParseErrorType do_array_element_end(void *state, bool isnull)
struct DoState DoState
static JsonParseErrorType do_array_element_start(void *state, bool isnull)
static JsonParseErrorType do_object_end(void *state)
static JsonParseErrorType do_scalar(void *state, char *token, JsonTokenType tokentype)
#define DEFAULT_CHUNK_SIZE
static JsonParseErrorType do_array_start(void *state)
static JsonParseErrorType do_object_start(void *state)
static JsonParseErrorType do_array_end(void *state)
static JsonParseErrorType do_object_field_end(void *state, char *fname, bool isnull)
static void escape_json(StringInfo buf, const char *str)
#define BUFSIZE
#define fstat
Definition: win32_port.h:273