PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
jsonb.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * jsonb.h
4  * Declarations for jsonb data type support.
5  *
6  * Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  * src/include/utils/jsonb.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef __JSONB_H__
13 #define __JSONB_H__
14 
15 #include "lib/stringinfo.h"
16 #include "utils/array.h"
17 #include "utils/numeric.h"
18 
19 /* Tokens used when sequentially processing a jsonb value */
20 typedef enum
21 {
31 
32 /* Strategy numbers for GIN index opclasses */
33 #define JsonbContainsStrategyNumber 7
34 #define JsonbExistsStrategyNumber 9
35 #define JsonbExistsAnyStrategyNumber 10
36 #define JsonbExistsAllStrategyNumber 11
37 
38 /*
39  * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
40  * keys and values. The storage format is text. The first byte of the text
41  * string distinguishes whether this is a key (always a string), null value,
42  * boolean value, numeric value, or string value. However, array elements
43  * that are strings are marked as though they were keys; this imprecision
44  * supports the definition of the "exists" operator, which treats array
45  * elements like keys. The remainder of the text string is empty for a null
46  * value, "t" or "f" for a boolean value, a normalized print representation of
47  * a numeric value, or the text of a string value. However, if the length of
48  * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
49  * the text representation and store an 8-hex-digit representation of the
50  * uint32 hash value, marking the prefix byte with an additional bit to
51  * distinguish that this has happened. Hashing long strings saves space and
52  * ensures that we won't overrun the maximum entry length for a GIN index.
53  * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
54  * to ensure that the on-disk text datum will have a short varlena header.)
55  * Note that when any hashed item appears in a query, we must recheck index
56  * matches against the heap tuple; currently, this costs nothing because we
57  * must always recheck for other reasons.
58  */
59 #define JGINFLAG_KEY 0x01 /* key (or string array element) */
60 #define JGINFLAG_NULL 0x02 /* null value */
61 #define JGINFLAG_BOOL 0x03 /* boolean value */
62 #define JGINFLAG_NUM 0x04 /* numeric value */
63 #define JGINFLAG_STR 0x05 /* string value (if not an array element) */
64 #define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
65 #define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
66 
67 /* Convenience macros */
68 #define DatumGetJsonb(d) ((Jsonb *) PG_DETOAST_DATUM(d))
69 #define JsonbGetDatum(p) PointerGetDatum(p)
70 #define PG_GETARG_JSONB(x) DatumGetJsonb(PG_GETARG_DATUM(x))
71 #define PG_RETURN_JSONB(x) PG_RETURN_POINTER(x)
72 
73 typedef struct JsonbPair JsonbPair;
74 typedef struct JsonbValue JsonbValue;
75 
76 /*
77  * Jsonbs are varlena objects, so must meet the varlena convention that the
78  * first int32 of the object contains the total object size in bytes. Be sure
79  * to use VARSIZE() and SET_VARSIZE() to access it, though!
80  *
81  * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
82  * representation. Often, JsonbValues are just shims through which a Jsonb
83  * buffer is accessed, but they can also be deep copied and passed around.
84  *
85  * Jsonb is a tree structure. Each node in the tree consists of a JEntry
86  * header and a variable-length content (possibly of zero size). The JEntry
87  * header indicates what kind of a node it is, e.g. a string or an array,
88  * and provides the length of its variable-length portion.
89  *
90  * The JEntry and the content of a node are not stored physically together.
91  * Instead, the container array or object has an array that holds the JEntrys
92  * of all the child nodes, followed by their variable-length portions.
93  *
94  * The root node is an exception; it has no parent array or object that could
95  * hold its JEntry. Hence, no JEntry header is stored for the root node. It
96  * is implicitly known that the root node must be an array or an object,
97  * so we can get away without the type indicator as long as we can distinguish
98  * the two. For that purpose, both an array and an object begin with a uint32
99  * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
100  * scalar value needs to be stored as a Jsonb value, what we actually store is
101  * an array with one element, with the flags in the array's header field set
102  * to JB_FSCALAR | JB_FARRAY.
103  *
104  * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
105  * the variable-length portion of some node types is aligned to a 4-byte
106  * boundary, while others are not. When alignment is needed, the padding is
107  * in the beginning of the node that requires it. For example, if a numeric
108  * node is stored after a string node, so that the numeric node begins at
109  * offset 3, the variable-length portion of the numeric node will begin with
110  * one padding byte so that the actual numeric data is 4-byte aligned.
111  */
112 
113 /*
114  * JEntry format.
115  *
116  * The least significant 28 bits store either the data length of the entry,
117  * or its end+1 offset from the start of the variable-length portion of the
118  * containing object. The next three bits store the type of the entry, and
119  * the high-order bit tells whether the least significant bits store a length
120  * or an offset.
121  *
122  * The reason for the offset-or-length complication is to compromise between
123  * access speed and data compressibility. In the initial design each JEntry
124  * always stored an offset, but this resulted in JEntry arrays with horrible
125  * compressibility properties, so that TOAST compression of a JSONB did not
126  * work well. Storing only lengths would greatly improve compressibility,
127  * but it makes random access into large arrays expensive (O(N) not O(1)).
128  * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
129  * a length in the rest. This results in reasonably compressible data (as
130  * long as the stride isn't too small). We may have to examine as many as
131  * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
132  * given item, but that's still O(1) no matter how large the container is.
133  *
134  * We could avoid eating a flag bit for this purpose if we were to store
135  * the stride in the container header, or if we were willing to treat the
136  * stride as an unchangeable constant. Neither of those options is very
137  * attractive though.
138  */
139 typedef uint32 JEntry;
140 
141 #define JENTRY_OFFLENMASK 0x0FFFFFFF
142 #define JENTRY_TYPEMASK 0x70000000
143 #define JENTRY_HAS_OFF 0x80000000
144 
145 /* values stored in the type bits */
146 #define JENTRY_ISSTRING 0x00000000
147 #define JENTRY_ISNUMERIC 0x10000000
148 #define JENTRY_ISBOOL_FALSE 0x20000000
149 #define JENTRY_ISBOOL_TRUE 0x30000000
150 #define JENTRY_ISNULL 0x40000000
151 #define JENTRY_ISCONTAINER 0x50000000 /* array or object */
152 
153 /* Access macros. Note possible multiple evaluations */
154 #define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
155 #define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
156 #define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
157 #define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
158 #define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
159 #define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
160 #define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
161 #define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
162 #define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
163 
164 /* Macro for advancing an offset variable to the next JEntry */
165 #define JBE_ADVANCE_OFFSET(offset, je) \
166  do { \
167  JEntry je_ = (je); \
168  if (JBE_HAS_OFF(je_)) \
169  (offset) = JBE_OFFLENFLD(je_); \
170  else \
171  (offset) += JBE_OFFLENFLD(je_); \
172  } while(0)
173 
174 /*
175  * We store an offset, not a length, every JB_OFFSET_STRIDE children.
176  * Caution: this macro should only be referenced when creating a JSONB
177  * value. When examining an existing value, pay attention to the HAS_OFF
178  * bits instead. This allows changes in the offset-placement heuristic
179  * without breaking on-disk compatibility.
180  */
181 #define JB_OFFSET_STRIDE 32
182 
183 /*
184  * A jsonb array or object node, within a Jsonb Datum.
185  *
186  * An array has one child for each element, stored in array order.
187  *
188  * An object has two children for each key/value pair. The keys all appear
189  * first, in key sort order; then the values appear, in an order matching the
190  * key order. This arrangement keeps the keys compact in memory, making a
191  * search for a particular key more cache-friendly.
192  */
193 typedef struct JsonbContainer
194 {
195  uint32 header; /* number of elements or key/value pairs, and
196  * flags */
197  JEntry children[FLEXIBLE_ARRAY_MEMBER];
198 
199  /* the data for each child node follows. */
201 
202 /* flags for the header-field in JsonbContainer */
203 #define JB_CMASK 0x0FFFFFFF /* mask for count field */
204 #define JB_FSCALAR 0x10000000 /* flag bits */
205 #define JB_FOBJECT 0x20000000
206 #define JB_FARRAY 0x40000000
207 
208 /* convenience macros for accessing a JsonbContainer struct */
209 #define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
210 #define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
211 #define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
212 #define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
213 
214 /* The top-level on-disk format for a jsonb datum. */
215 typedef struct
216 {
217  int32 vl_len_; /* varlena header (do not touch directly!) */
219 } Jsonb;
220 
221 /* convenience macros for accessing the root container in a Jsonb datum */
222 #define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
223 #define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
224 #define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
225 #define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
226 
227 
229 {
230  /* Scalar types */
231  jbvNull = 0x0,
235  /* Composite types */
236  jbvArray = 0x10,
238  /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
240 };
241 
242 /*
243  * JsonbValue: In-memory representation of Jsonb. This is a convenient
244  * deserialized representation, that can easily support using the "val"
245  * union across underlying types during manipulation. The Jsonb on-disk
246  * representation has various alignment considerations.
247  */
249 {
250  enum jbvType type; /* Influences sort order */
251 
252  union
253  {
255  bool boolean;
256  struct
257  {
258  int len;
259  char *val; /* Not necessarily null-terminated */
260  } string; /* String primitive type */
261 
262  struct
263  {
264  int nElems;
266  bool rawScalar; /* Top-level "raw scalar" array? */
267  } array; /* Array container type */
268 
269  struct
270  {
271  int nPairs; /* 1 pair, 2 elements */
273  } object; /* Associative container type */
274 
275  struct
276  {
277  int len;
279  } binary; /* Array or object, in on-disk format */
280  } val;
281 };
282 
283 #define IsAJsonbScalar(jsonbval) ((jsonbval)->type >= jbvNull && \
284  (jsonbval)->type <= jbvBool)
285 
286 /*
287  * Key/value pair within an Object.
288  *
289  * This struct type is only used briefly while constructing a Jsonb; it is
290  * *not* the on-disk representation.
291  *
292  * Pairs with duplicate keys are de-duplicated. We store the originally
293  * observed pair ordering for the purpose of removing duplicates in a
294  * well-defined way (which is "last observed wins").
295  */
296 struct JsonbPair
297 {
298  JsonbValue key; /* Must be a jbvString */
299  JsonbValue value; /* May be of any type */
300  uint32 order; /* Pair's index in original sequence */
301 };
302 
303 /* Conversion state used when parsing Jsonb from text, or for type coercion */
304 typedef struct JsonbParseState
305 {
310 
311 /*
312  * JsonbIterator holds details of the type for each iteration. It also stores a
313  * Jsonb varlena buffer, which can be directly accessed in some contexts.
314  */
315 typedef enum
316 {
323 
324 typedef struct JsonbIterator
325 {
326  /* Container being iterated */
328  uint32 nElems; /* Number of elements in children array (will
329  * be nPairs for objects) */
330  bool isScalar; /* Pseudo-array scalar value? */
331  JEntry *children; /* JEntrys for child nodes */
332  /* Data proper. This points to the beginning of the variable-length data */
333  char *dataProper;
334 
335  /* Current item in buffer (up to nElems) */
336  int curIndex;
337 
338  /* Data offset corresponding to current item */
340 
341  /*
342  * If the container is an object, we want to return keys and values
343  * alternately; so curDataOffset points to the current key, and
344  * curValueOffset points to the current value.
345  */
347 
348  /* Private state */
350 
352 } JsonbIterator;
353 
354 
355 /* Support functions */
356 extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
357 extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
360  uint32 flags,
361  JsonbValue *key);
363  uint32 i);
364 extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
365  JsonbIteratorToken seq, JsonbValue *jbVal);
368  bool skipNested);
370 extern bool JsonbDeepContains(JsonbIterator **val,
371  JsonbIterator **mContained);
372 extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
373 
374 /* jsonb.c support functions */
375 extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
376  int estimated_len);
377 extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
378  int estimated_len);
379 
380 
381 #endif /* __JSONB_H__ */
char * JsonbToCStringIndent(StringInfo out, JsonbContainer *in, int estimated_len)
Definition: jsonb.c:437
JsonbIterState state
Definition: jsonb.h:349
uint32 order
Definition: jsonb.h:300
JEntry children[FLEXIBLE_ARRAY_MEMBER]
Definition: jsonb.h:197
JEntry * children
Definition: jsonb.h:331
struct JsonbParseState JsonbParseState
Definition: jsonb.h:215
JsonbIterator * JsonbIteratorInit(JsonbContainer *container)
Definition: jsonb_util.c:718
Numeric numeric
Definition: jsonb.h:254
struct JsonbValue::@115::@116 string
jbvType
Definition: jsonb.h:228
JsonbContainer * data
Definition: jsonb.h:278
char * val
Definition: jsonb.h:259
Definition: jsonb.h:234
int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
Definition: jsonb_util.c:178
uint32 getJsonbLength(const JsonbContainer *jc, int index)
Definition: jsonb_util.c:146
Definition: jsonb.h:22
Definition: jsonb.h:231
bool boolean
Definition: jsonb.h:255
char * JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len)
Definition: jsonb.c:428
struct JsonbContainer JsonbContainer
JsonbValue * findJsonbValueFromContainer(JsonbContainer *sheader, uint32 flags, JsonbValue *key)
Definition: jsonb_util.c:327
struct JsonbParseState * next
Definition: jsonb.h:308
struct JsonbValue::@115::@117 array
int len
Definition: jsonb.h:258
signed int int32
Definition: c.h:256
Definition: type.h:89
JsonbValue * getIthJsonbValueFromContainer(JsonbContainer *sheader, uint32 i)
Definition: jsonb_util.c:419
uint32 curDataOffset
Definition: jsonb.h:339
void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
Definition: jsonb_util.c:1214
bool rawScalar
Definition: jsonb.h:266
struct JsonbIterator JsonbIterator
JsonbContainer * container
Definition: jsonb.h:327
JsonbValue * elems
Definition: jsonb.h:265
unsigned int uint32
Definition: c.h:268
JsonbValue key
Definition: jsonb.h:298
Definition: jsonb.h:23
JsonbIteratorToken
Definition: jsonb.h:20
JsonbPair * pairs
Definition: jsonb.h:272
JsonbContainer root
Definition: jsonb.h:218
uint32 header
Definition: jsonb.h:195
JsonbValue contVal
Definition: jsonb.h:306
bool isScalar
Definition: jsonb.h:330
JsonbValue * pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, JsonbValue *jbVal)
Definition: jsonb_util.c:517
Jsonb * JsonbValueToJsonb(JsonbValue *val)
Definition: jsonb_util.c:79
JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
Definition: jsonb_util.c:754
int nPairs
Definition: jsonb.h:271
int curIndex
Definition: jsonb.h:336
struct JsonbIterator * parent
Definition: jsonb.h:351
int nElems
Definition: jsonb.h:264
uint32 JEntry
Definition: jsonb.h:139
int32 vl_len_
Definition: jsonb.h:217
size_t Size
Definition: c.h:356
char * dataProper
Definition: jsonb.h:333
bool JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
Definition: jsonb_util.c:963
JsonbValue value
Definition: jsonb.h:299
enum jbvType type
Definition: jsonb.h:250
struct JsonbValue::@115::@118 object
uint32 curValueOffset
Definition: jsonb.h:346
int i
struct JsonbValue::@115::@119 binary
uint32 nElems
Definition: jsonb.h:328
JsonbIterState
Definition: jsonb.h:315
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541
long val
Definition: informix.c:689
Definition: jsonb.h:25
uint32 getJsonbOffset(const JsonbContainer *jc, int index)
Definition: jsonb_util.c:121