PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
jsonb.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * jsonb.h
4 * Declarations for jsonb data type support.
5 *
6 * Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 * src/include/utils/jsonb.h
9 *
10 *-------------------------------------------------------------------------
11 */
12#ifndef __JSONB_H__
13#define __JSONB_H__
14
15#include "lib/stringinfo.h"
16#include "utils/array.h"
17#include "utils/numeric.h"
18
19/* Tokens used when sequentially processing a jsonb value */
20typedef enum
21{
31
32/* Strategy numbers for GIN index opclasses */
33#define JsonbContainsStrategyNumber 7
34#define JsonbExistsStrategyNumber 9
35#define JsonbExistsAnyStrategyNumber 10
36#define JsonbExistsAllStrategyNumber 11
37#define JsonbJsonpathExistsStrategyNumber 15
38#define JsonbJsonpathPredicateStrategyNumber 16
39
40
41/*
42 * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
43 * keys and values. The storage format is text. The first byte of the text
44 * string distinguishes whether this is a key (always a string), null value,
45 * boolean value, numeric value, or string value. However, array elements
46 * that are strings are marked as though they were keys; this imprecision
47 * supports the definition of the "exists" operator, which treats array
48 * elements like keys. The remainder of the text string is empty for a null
49 * value, "t" or "f" for a boolean value, a normalized print representation of
50 * a numeric value, or the text of a string value. However, if the length of
51 * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
52 * the text representation and store an 8-hex-digit representation of the
53 * uint32 hash value, marking the prefix byte with an additional bit to
54 * distinguish that this has happened. Hashing long strings saves space and
55 * ensures that we won't overrun the maximum entry length for a GIN index.
56 * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
57 * to ensure that the on-disk text datum will have a short varlena header.)
58 * Note that when any hashed item appears in a query, we must recheck index
59 * matches against the heap tuple; currently, this costs nothing because we
60 * must always recheck for other reasons.
61 */
62#define JGINFLAG_KEY 0x01 /* key (or string array element) */
63#define JGINFLAG_NULL 0x02 /* null value */
64#define JGINFLAG_BOOL 0x03 /* boolean value */
65#define JGINFLAG_NUM 0x04 /* numeric value */
66#define JGINFLAG_STR 0x05 /* string value (if not an array element) */
67#define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
68#define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
69
70typedef struct JsonbPair JsonbPair;
71typedef struct JsonbValue JsonbValue;
72
73/*
74 * Jsonbs are varlena objects, so must meet the varlena convention that the
75 * first int32 of the object contains the total object size in bytes. Be sure
76 * to use VARSIZE() and SET_VARSIZE() to access it, though!
77 *
78 * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
79 * representation. Often, JsonbValues are just shims through which a Jsonb
80 * buffer is accessed, but they can also be deep copied and passed around.
81 *
82 * Jsonb is a tree structure. Each node in the tree consists of a JEntry
83 * header and a variable-length content (possibly of zero size). The JEntry
84 * header indicates what kind of a node it is, e.g. a string or an array,
85 * and provides the length of its variable-length portion.
86 *
87 * The JEntry and the content of a node are not stored physically together.
88 * Instead, the container array or object has an array that holds the JEntrys
89 * of all the child nodes, followed by their variable-length portions.
90 *
91 * The root node is an exception; it has no parent array or object that could
92 * hold its JEntry. Hence, no JEntry header is stored for the root node. It
93 * is implicitly known that the root node must be an array or an object,
94 * so we can get away without the type indicator as long as we can distinguish
95 * the two. For that purpose, both an array and an object begin with a uint32
96 * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
97 * scalar value needs to be stored as a Jsonb value, what we actually store is
98 * an array with one element, with the flags in the array's header field set
99 * to JB_FSCALAR | JB_FARRAY.
100 *
101 * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
102 * the variable-length portion of some node types is aligned to a 4-byte
103 * boundary, while others are not. When alignment is needed, the padding is
104 * in the beginning of the node that requires it. For example, if a numeric
105 * node is stored after a string node, so that the numeric node begins at
106 * offset 3, the variable-length portion of the numeric node will begin with
107 * one padding byte so that the actual numeric data is 4-byte aligned.
108 */
109
110/*
111 * JEntry format.
112 *
113 * The least significant 28 bits store either the data length of the entry,
114 * or its end+1 offset from the start of the variable-length portion of the
115 * containing object. The next three bits store the type of the entry, and
116 * the high-order bit tells whether the least significant bits store a length
117 * or an offset.
118 *
119 * The reason for the offset-or-length complication is to compromise between
120 * access speed and data compressibility. In the initial design each JEntry
121 * always stored an offset, but this resulted in JEntry arrays with horrible
122 * compressibility properties, so that TOAST compression of a JSONB did not
123 * work well. Storing only lengths would greatly improve compressibility,
124 * but it makes random access into large arrays expensive (O(N) not O(1)).
125 * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
126 * a length in the rest. This results in reasonably compressible data (as
127 * long as the stride isn't too small). We may have to examine as many as
128 * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
129 * given item, but that's still O(1) no matter how large the container is.
130 *
131 * We could avoid eating a flag bit for this purpose if we were to store
132 * the stride in the container header, or if we were willing to treat the
133 * stride as an unchangeable constant. Neither of those options is very
134 * attractive though.
135 */
137
138#define JENTRY_OFFLENMASK 0x0FFFFFFF
139#define JENTRY_TYPEMASK 0x70000000
140#define JENTRY_HAS_OFF 0x80000000
141
142/* values stored in the type bits */
143#define JENTRY_ISSTRING 0x00000000
144#define JENTRY_ISNUMERIC 0x10000000
145#define JENTRY_ISBOOL_FALSE 0x20000000
146#define JENTRY_ISBOOL_TRUE 0x30000000
147#define JENTRY_ISNULL 0x40000000
148#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
149
150/* Access macros. Note possible multiple evaluations */
151#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
152#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
153#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
154#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
155#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
156#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
157#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
158#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
159#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
160
161/* Macro for advancing an offset variable to the next JEntry */
162#define JBE_ADVANCE_OFFSET(offset, je) \
163 do { \
164 JEntry je_ = (je); \
165 if (JBE_HAS_OFF(je_)) \
166 (offset) = JBE_OFFLENFLD(je_); \
167 else \
168 (offset) += JBE_OFFLENFLD(je_); \
169 } while(0)
170
171/*
172 * We store an offset, not a length, every JB_OFFSET_STRIDE children.
173 * Caution: this macro should only be referenced when creating a JSONB
174 * value. When examining an existing value, pay attention to the HAS_OFF
175 * bits instead. This allows changes in the offset-placement heuristic
176 * without breaking on-disk compatibility.
177 */
178#define JB_OFFSET_STRIDE 32
179
180/*
181 * A jsonb array or object node, within a Jsonb Datum.
182 *
183 * An array has one child for each element, stored in array order.
184 *
185 * An object has two children for each key/value pair. The keys all appear
186 * first, in key sort order; then the values appear, in an order matching the
187 * key order. This arrangement keeps the keys compact in memory, making a
188 * search for a particular key more cache-friendly.
189 */
190typedef struct JsonbContainer
191{
192 uint32 header; /* number of elements or key/value pairs, and
193 * flags */
195
196 /* the data for each child node follows. */
198
199/* flags for the header-field in JsonbContainer */
200#define JB_CMASK 0x0FFFFFFF /* mask for count field */
201#define JB_FSCALAR 0x10000000 /* flag bits */
202#define JB_FOBJECT 0x20000000
203#define JB_FARRAY 0x40000000
204
205/* convenience macros for accessing a JsonbContainer struct */
206#define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
207#define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
208#define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
209#define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
210
211/* The top-level on-disk format for a jsonb datum. */
212typedef struct
213{
214 int32 vl_len_; /* varlena header (do not touch directly!) */
216} Jsonb;
217
218/* convenience macros for accessing the root container in a Jsonb datum */
219#define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
220#define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
221#define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
222#define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
223
224
226{
227 /* Scalar types */
228 jbvNull = 0x0,
232 /* Composite types */
233 jbvArray = 0x10,
235 /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
237
238 /*
239 * Virtual types.
240 *
241 * These types are used only for in-memory JSON processing and serialized
242 * into JSON strings when outputted to json/jsonb.
243 */
245};
246
247/*
248 * JsonbValue: In-memory representation of Jsonb. This is a convenient
249 * deserialized representation, that can easily support using the "val"
250 * union across underlying types during manipulation. The Jsonb on-disk
251 * representation has various alignment considerations.
252 */
254{
255 enum jbvType type; /* Influences sort order */
256
257 union
258 {
261 struct
262 {
263 int len;
264 char *val; /* Not necessarily null-terminated */
265 } string; /* String primitive type */
266
267 struct
268 {
271 bool rawScalar; /* Top-level "raw scalar" array? */
272 } array; /* Array container type */
273
274 struct
275 {
276 int nPairs; /* 1 pair, 2 elements */
278 } object; /* Associative container type */
279
280 struct
281 {
282 int len;
284 } binary; /* Array or object, in on-disk format */
285
286 struct
287 {
291 int tz; /* Numeric time zone, in seconds, for
292 * TimestampTz data type */
295};
296
297#define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \
298 (jsonbval)->type <= jbvBool) || \
299 (jsonbval)->type == jbvDatetime)
300
301/*
302 * Key/value pair within an Object.
303 *
304 * This struct type is only used briefly while constructing a Jsonb; it is
305 * *not* the on-disk representation.
306 *
307 * Pairs with duplicate keys are de-duplicated. We store the originally
308 * observed pair ordering for the purpose of removing duplicates in a
309 * well-defined way (which is "last observed wins").
310 */
312{
313 JsonbValue key; /* Must be a jbvString */
314 JsonbValue value; /* May be of any type */
315 uint32 order; /* Pair's index in original sequence */
316};
317
318/* Conversion state used when parsing Jsonb from text, or for type coercion */
319typedef struct JsonbParseState
320{
324 bool unique_keys; /* Check object key uniqueness */
325 bool skip_nulls; /* Skip null object fields */
327
328/*
329 * JsonbIterator holds details of the type for each iteration. It also stores a
330 * Jsonb varlena buffer, which can be directly accessed in some contexts.
331 */
332typedef enum
333{
340
341typedef struct JsonbIterator
342{
343 /* Container being iterated */
345 uint32 nElems; /* Number of elements in children array (will
346 * be nPairs for objects) */
347 bool isScalar; /* Pseudo-array scalar value? */
348 JEntry *children; /* JEntrys for child nodes */
349 /* Data proper. This points to the beginning of the variable-length data */
351
352 /* Current item in buffer (up to nElems) */
354
355 /* Data offset corresponding to current item */
357
358 /*
359 * If the container is an object, we want to return keys and values
360 * alternately; so curDataOffset points to the current key, and
361 * curValueOffset points to the current value.
362 */
364
365 /* Private state */
367
370
371
372/* Convenience macros */
373static inline Jsonb *
375{
376 return (Jsonb *) PG_DETOAST_DATUM(d);
377}
378
379static inline Jsonb *
381{
382 return (Jsonb *) PG_DETOAST_DATUM_COPY(d);
383}
384
385static inline Datum
387{
388 return PointerGetDatum(p);
389}
390
391#define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
392#define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
393#define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
394
395/* Support functions */
396extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
397extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
400 uint32 flags,
401 JsonbValue *key);
403 const char *keyVal, int keyLen,
404 JsonbValue *res);
406 uint32 i);
408 JsonbIteratorToken seq, JsonbValue *jbval);
411 bool skipNested);
412extern void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val);
415 JsonbIterator **mContained);
416extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
417extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
418 uint64 *hash, uint64 seed);
419
420/* jsonb.c support functions */
421extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
422 int estimated_len);
424 int estimated_len);
425extern char *JsonbUnquote(Jsonb *jb);
426extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
427extern const char *JsonbTypeName(JsonbValue *val);
428
429extern Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
431extern Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath,
432 bool *isnull, bool as_text);
433extern bool to_jsonb_is_immutable(Oid typoid);
434extern Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool *nulls,
435 const Oid *types, bool absent_on_null,
436 bool unique_keys);
437extern Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls,
438 const Oid *types, bool absent_on_null);
439
440#endif /* __JSONB_H__ */
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
size_t Size
Definition: c.h:576
struct typedefs * types
Definition: ecpg.c:30
#define PG_DETOAST_DATUM_COPY(datum)
Definition: fmgr.h:242
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define newval
long val
Definition: informix.c:689
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
jbvType
Definition: jsonb.h:226
@ jbvObject
Definition: jsonb.h:234
@ jbvNumeric
Definition: jsonb.h:230
@ jbvBool
Definition: jsonb.h:231
@ jbvArray
Definition: jsonb.h:233
@ jbvBinary
Definition: jsonb.h:236
@ jbvNull
Definition: jsonb.h:228
@ jbvDatetime
Definition: jsonb.h:244
@ jbvString
Definition: jsonb.h:229
JsonbIterState
Definition: jsonb.h:333
@ JBI_OBJECT_VALUE
Definition: jsonb.h:338
@ JBI_ARRAY_START
Definition: jsonb.h:334
@ JBI_ARRAY_ELEM
Definition: jsonb.h:335
@ JBI_OBJECT_START
Definition: jsonb.h:336
@ JBI_OBJECT_KEY
Definition: jsonb.h:337
static Jsonb * DatumGetJsonbPCopy(Datum d)
Definition: jsonb.h:380
char * JsonbUnquote(Jsonb *jb)
Definition: jsonb.c:2229
Datum jsonb_build_array_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types, bool absent_on_null)
Definition: jsonb.c:1210
static Datum JsonbPGetDatum(const Jsonb *p)
Definition: jsonb.h:386
struct JsonbParseState JsonbParseState
Datum jsonb_set_element(Jsonb *jb, Datum *path, int path_len, JsonbValue *newval)
Definition: jsonfuncs.c:1679
JsonbValue * getKeyJsonValueFromContainer(JsonbContainer *container, const char *keyVal, int keyLen, JsonbValue *res)
Definition: jsonb_util.c:405
int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
Definition: jsonb_util.c:191
Datum jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text)
Definition: jsonfuncs.c:1531
uint32 getJsonbLength(const JsonbContainer *jc, int index)
Definition: jsonb_util.c:159
char * JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len)
Definition: jsonb.c:473
JsonbValue * pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *jbval)
Definition: jsonb_util.c:573
JsonbIterator * JsonbIteratorInit(JsonbContainer *container)
Definition: jsonb_util.c:824
const char * JsonbTypeName(JsonbValue *val)
Definition: jsonb.c:180
static Jsonb * DatumGetJsonbP(Datum d)
Definition: jsonb.h:374
void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
Definition: jsonb_util.c:1323
Datum jsonb_build_object_worker(int nargs, const Datum *args, const bool *nulls, const Oid *types, bool absent_on_null, bool unique_keys)
Definition: jsonb.c:1125
void JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val)
Definition: jsonb_util.c:72
uint32 JEntry
Definition: jsonb.h:136
JsonbValue * findJsonbValueFromContainer(JsonbContainer *container, uint32 flags, JsonbValue *key)
Definition: jsonb_util.c:351
JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
Definition: jsonb_util.c:860
struct JsonbIterator JsonbIterator
uint32 getJsonbOffset(const JsonbContainer *jc, int index)
Definition: jsonb_util.c:134
bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res)
Definition: jsonb.c:1968
void JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash, uint64 seed)
Definition: jsonb_util.c:1366
JsonbValue * getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
Definition: jsonb_util.c:475
JsonbIteratorToken
Definition: jsonb.h:21
@ WJB_KEY
Definition: jsonb.h:23
@ WJB_DONE
Definition: jsonb.h:22
@ WJB_END_ARRAY
Definition: jsonb.h:27
@ WJB_VALUE
Definition: jsonb.h:24
@ WJB_END_OBJECT
Definition: jsonb.h:29
@ WJB_ELEM
Definition: jsonb.h:25
@ WJB_BEGIN_OBJECT
Definition: jsonb.h:28
@ WJB_BEGIN_ARRAY
Definition: jsonb.h:26
Jsonb * JsonbValueToJsonb(JsonbValue *val)
Definition: jsonb_util.c:92
char * JsonbToCStringIndent(StringInfo out, JsonbContainer *in, int estimated_len)
Definition: jsonb.c:482
bool JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
Definition: jsonb_util.c:1069
bool to_jsonb_is_immutable(Oid typoid)
Definition: jsonb.c:1049
struct JsonbContainer JsonbContainer
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:715
JEntry children[FLEXIBLE_ARRAY_MEMBER]
Definition: jsonb.h:194
uint32 header
Definition: jsonb.h:192
uint32 curDataOffset
Definition: jsonb.h:356
JsonbIterState state
Definition: jsonb.h:366
JEntry * children
Definition: jsonb.h:348
uint32 nElems
Definition: jsonb.h:345
struct JsonbIterator * parent
Definition: jsonb.h:368
JsonbContainer * container
Definition: jsonb.h:344
bool isScalar
Definition: jsonb.h:347
uint32 curValueOffset
Definition: jsonb.h:363
char * dataProper
Definition: jsonb.h:350
int curIndex
Definition: jsonb.h:353
uint32 order
Definition: jsonb.h:315
JsonbValue key
Definition: jsonb.h:313
JsonbValue value
Definition: jsonb.h:314
bool unique_keys
Definition: jsonb.h:324
struct JsonbParseState * next
Definition: jsonb.h:323
bool skip_nulls
Definition: jsonb.h:325
JsonbValue contVal
Definition: jsonb.h:321
JsonbPair * pairs
Definition: jsonb.h:277
struct JsonbValue::@141::@144 object
Numeric numeric
Definition: jsonb.h:259
Oid typid
Definition: jsonb.h:289
int nElems
Definition: jsonb.h:269
enum jbvType type
Definition: jsonb.h:255
int len
Definition: jsonb.h:263
char * val
Definition: jsonb.h:264
struct JsonbValue::@141::@143 array
JsonbContainer * data
Definition: jsonb.h:283
struct JsonbValue::@141::@145 binary
int32 typmod
Definition: jsonb.h:290
struct JsonbValue::@141::@146 datetime
struct JsonbValue::@141::@142 string
bool boolean
Definition: jsonb.h:260
bool rawScalar
Definition: jsonb.h:271
int tz
Definition: jsonb.h:291
int nPairs
Definition: jsonb.h:276
Datum value
Definition: jsonb.h:288
JsonbValue * elems
Definition: jsonb.h:270
Definition: jsonb.h:213
JsonbContainer root
Definition: jsonb.h:215
int32 vl_len_
Definition: jsonb.h:214
Definition: type.h:96