PostgreSQL Source Code  git master
varatt.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * varatt.h
4  * variable-length datatypes (TOAST support)
5  *
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1995, Regents of the University of California
9  *
10  * src/include/varatt.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #ifndef VARATT_H
16 #define VARATT_H
17 
18 /*
19  * struct varatt_external is a traditional "TOAST pointer", that is, the
20  * information needed to fetch a Datum stored out-of-line in a TOAST table.
21  * The data is compressed if and only if the external size stored in
22  * va_extinfo is less than va_rawsize - VARHDRSZ.
23  *
24  * This struct must not contain any padding, because we sometimes compare
25  * these pointers using memcmp.
26  *
27  * Note that this information is stored unaligned within actual tuples, so
28  * you need to memcpy from the tuple into a local struct variable before
29  * you can look at these fields! (The reason we use memcmp is to avoid
30  * having to do that just to detect equality of two TOAST pointers...)
31  */
32 typedef struct varatt_external
33 {
34  int32 va_rawsize; /* Original data size (includes header) */
35  uint32 va_extinfo; /* External saved size (without header) and
36  * compression method */
37  Oid va_valueid; /* Unique ID of value within TOAST table */
38  Oid va_toastrelid; /* RelID of TOAST table containing it */
40 
41 /*
42  * These macros define the "saved size" portion of va_extinfo. Its remaining
43  * two high-order bits identify the compression method.
44  */
45 #define VARLENA_EXTSIZE_BITS 30
46 #define VARLENA_EXTSIZE_MASK ((1U << VARLENA_EXTSIZE_BITS) - 1)
47 
48 /*
49  * struct varatt_indirect is a "TOAST pointer" representing an out-of-line
50  * Datum that's stored in memory, not in an external toast relation.
51  * The creator of such a Datum is entirely responsible that the referenced
52  * storage survives for as long as referencing pointer Datums can exist.
53  *
54  * Note that just as for struct varatt_external, this struct is stored
55  * unaligned within any containing tuple.
56  */
57 typedef struct varatt_indirect
58 {
59  struct varlena *pointer; /* Pointer to in-memory varlena */
61 
62 /*
63  * struct varatt_expanded is a "TOAST pointer" representing an out-of-line
64  * Datum that is stored in memory, in some type-specific, not necessarily
65  * physically contiguous format that is convenient for computation not
66  * storage. APIs for this, in particular the definition of struct
67  * ExpandedObjectHeader, are in src/include/utils/expandeddatum.h.
68  *
69  * Note that just as for struct varatt_external, this struct is stored
70  * unaligned within any containing tuple.
71  */
73 
74 typedef struct varatt_expanded
75 {
78 
79 /*
80  * Type tag for the various sorts of "TOAST pointer" datums. The peculiar
81  * value for VARTAG_ONDISK comes from a requirement for on-disk compatibility
82  * with a previous notion that the tag field was the pointer datum's length.
83  */
84 typedef enum vartag_external
85 {
89  VARTAG_ONDISK = 18
91 
92 /* this test relies on the specific tag values above */
93 #define VARTAG_IS_EXPANDED(tag) \
94  (((tag) & ~1) == VARTAG_EXPANDED_RO)
95 
96 #define VARTAG_SIZE(tag) \
97  ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \
98  VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \
99  (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \
100  (AssertMacro(false), 0))
101 
102 /*
103  * These structs describe the header of a varlena object that may have been
104  * TOASTed. Generally, don't reference these structs directly, but use the
105  * macros below.
106  *
107  * We use separate structs for the aligned and unaligned cases because the
108  * compiler might otherwise think it could generate code that assumes
109  * alignment while touching fields of a 1-byte-header varlena.
110  */
111 typedef union
112 {
113  struct /* Normal varlena (4-byte length) */
114  {
116  char va_data[FLEXIBLE_ARRAY_MEMBER];
117  } va_4byte;
118  struct /* Compressed-in-line format */
119  {
120  uint32 va_header;
121  uint32 va_tcinfo; /* Original data size (excludes header) and
122  * compression method; see va_extinfo */
123  char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */
124  } va_compressed;
125 } varattrib_4b;
126 
127 typedef struct
128 {
130  char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Data begins here */
131 } varattrib_1b;
132 
133 /* TOAST pointers are a subset of varattrib_1b with an identifying tag byte */
134 typedef struct
135 {
136  uint8 va_header; /* Always 0x80 or 0x01 */
137  uint8 va_tag; /* Type of datum */
138  char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Type-specific data */
140 
141 /*
142  * Bit layouts for varlena headers on big-endian machines:
143  *
144  * 00xxxxxx 4-byte length word, aligned, uncompressed data (up to 1G)
145  * 01xxxxxx 4-byte length word, aligned, *compressed* data (up to 1G)
146  * 10000000 1-byte length word, unaligned, TOAST pointer
147  * 1xxxxxxx 1-byte length word, unaligned, uncompressed data (up to 126b)
148  *
149  * Bit layouts for varlena headers on little-endian machines:
150  *
151  * xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G)
152  * xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G)
153  * 00000001 1-byte length word, unaligned, TOAST pointer
154  * xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to 126b)
155  *
156  * The "xxx" bits are the length field (which includes itself in all cases).
157  * In the big-endian case we mask to extract the length, in the little-endian
158  * case we shift. Note that in both cases the flag bits are in the physically
159  * first byte. Also, it is not possible for a 1-byte length word to be zero;
160  * this lets us disambiguate alignment padding bytes from the start of an
161  * unaligned datum. (We now *require* pad bytes to be filled with zero!)
162  *
163  * In TOAST pointers the va_tag field (see varattrib_1b_e) is used to discern
164  * the specific type and length of the pointer datum.
165  */
166 
167 /*
168  * Endian-dependent macros. These are considered internal --- use the
169  * external macros below instead of using these directly.
170  *
171  * Note: IS_1B is true for external toast records but VARSIZE_1B will return 0
172  * for such records. Hence you should usually check for IS_EXTERNAL before
173  * checking for IS_1B.
174  */
175 
176 #ifdef WORDS_BIGENDIAN
177 
178 #define VARATT_IS_4B(PTR) \
179  ((((varattrib_1b *) (PTR))->va_header & 0x80) == 0x00)
180 #define VARATT_IS_4B_U(PTR) \
181  ((((varattrib_1b *) (PTR))->va_header & 0xC0) == 0x00)
182 #define VARATT_IS_4B_C(PTR) \
183  ((((varattrib_1b *) (PTR))->va_header & 0xC0) == 0x40)
184 #define VARATT_IS_1B(PTR) \
185  ((((varattrib_1b *) (PTR))->va_header & 0x80) == 0x80)
186 #define VARATT_IS_1B_E(PTR) \
187  ((((varattrib_1b *) (PTR))->va_header) == 0x80)
188 #define VARATT_NOT_PAD_BYTE(PTR) \
189  (*((uint8 *) (PTR)) != 0)
190 
191 /* VARSIZE_4B() should only be used on known-aligned data */
192 #define VARSIZE_4B(PTR) \
193  (((varattrib_4b *) (PTR))->va_4byte.va_header & 0x3FFFFFFF)
194 #define VARSIZE_1B(PTR) \
195  (((varattrib_1b *) (PTR))->va_header & 0x7F)
196 #define VARTAG_1B_E(PTR) \
197  (((varattrib_1b_e *) (PTR))->va_tag)
198 
199 #define SET_VARSIZE_4B(PTR,len) \
200  (((varattrib_4b *) (PTR))->va_4byte.va_header = (len) & 0x3FFFFFFF)
201 #define SET_VARSIZE_4B_C(PTR,len) \
202  (((varattrib_4b *) (PTR))->va_4byte.va_header = ((len) & 0x3FFFFFFF) | 0x40000000)
203 #define SET_VARSIZE_1B(PTR,len) \
204  (((varattrib_1b *) (PTR))->va_header = (len) | 0x80)
205 #define SET_VARTAG_1B_E(PTR,tag) \
206  (((varattrib_1b_e *) (PTR))->va_header = 0x80, \
207  ((varattrib_1b_e *) (PTR))->va_tag = (tag))
208 
209 #else /* !WORDS_BIGENDIAN */
210 
211 #define VARATT_IS_4B(PTR) \
212  ((((varattrib_1b *) (PTR))->va_header & 0x01) == 0x00)
213 #define VARATT_IS_4B_U(PTR) \
214  ((((varattrib_1b *) (PTR))->va_header & 0x03) == 0x00)
215 #define VARATT_IS_4B_C(PTR) \
216  ((((varattrib_1b *) (PTR))->va_header & 0x03) == 0x02)
217 #define VARATT_IS_1B(PTR) \
218  ((((varattrib_1b *) (PTR))->va_header & 0x01) == 0x01)
219 #define VARATT_IS_1B_E(PTR) \
220  ((((varattrib_1b *) (PTR))->va_header) == 0x01)
221 #define VARATT_NOT_PAD_BYTE(PTR) \
222  (*((uint8 *) (PTR)) != 0)
223 
224 /* VARSIZE_4B() should only be used on known-aligned data */
225 #define VARSIZE_4B(PTR) \
226  ((((varattrib_4b *) (PTR))->va_4byte.va_header >> 2) & 0x3FFFFFFF)
227 #define VARSIZE_1B(PTR) \
228  ((((varattrib_1b *) (PTR))->va_header >> 1) & 0x7F)
229 #define VARTAG_1B_E(PTR) \
230  (((varattrib_1b_e *) (PTR))->va_tag)
231 
232 #define SET_VARSIZE_4B(PTR,len) \
233  (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2))
234 #define SET_VARSIZE_4B_C(PTR,len) \
235  (((varattrib_4b *) (PTR))->va_4byte.va_header = (((uint32) (len)) << 2) | 0x02)
236 #define SET_VARSIZE_1B(PTR,len) \
237  (((varattrib_1b *) (PTR))->va_header = (((uint8) (len)) << 1) | 0x01)
238 #define SET_VARTAG_1B_E(PTR,tag) \
239  (((varattrib_1b_e *) (PTR))->va_header = 0x01, \
240  ((varattrib_1b_e *) (PTR))->va_tag = (tag))
241 
242 #endif /* WORDS_BIGENDIAN */
243 
244 #define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data)
245 #define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data)
246 #define VARDATA_1B(PTR) (((varattrib_1b *) (PTR))->va_data)
247 #define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data)
248 
249 /*
250  * Externally visible TOAST macros begin here.
251  */
252 
253 #define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data)
254 #define VARHDRSZ_COMPRESSED offsetof(varattrib_4b, va_compressed.va_data)
255 #define VARHDRSZ_SHORT offsetof(varattrib_1b, va_data)
256 
257 #define VARATT_SHORT_MAX 0x7F
258 #define VARATT_CAN_MAKE_SHORT(PTR) \
259  (VARATT_IS_4B_U(PTR) && \
260  (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT) <= VARATT_SHORT_MAX)
261 #define VARATT_CONVERTED_SHORT_SIZE(PTR) \
262  (VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT)
263 
264 /*
265  * In consumers oblivious to data alignment, call PG_DETOAST_DATUM_PACKED(),
266  * VARDATA_ANY(), VARSIZE_ANY() and VARSIZE_ANY_EXHDR(). Elsewhere, call
267  * PG_DETOAST_DATUM(), VARDATA() and VARSIZE(). Directly fetching an int16,
268  * int32 or wider field in the struct representing the datum layout requires
269  * aligned data. memcpy() is alignment-oblivious, as are most operations on
270  * datatypes, such as text, whose layout struct contains only char fields.
271  *
272  * Code assembling a new datum should call VARDATA() and SET_VARSIZE().
273  * (Datums begin life untoasted.)
274  *
275  * Other macros here should usually be used only by tuple assembly/disassembly
276  * code and code that specifically wants to work with still-toasted Datums.
277  */
278 #define VARDATA(PTR) VARDATA_4B(PTR)
279 #define VARSIZE(PTR) VARSIZE_4B(PTR)
280 
281 #define VARSIZE_SHORT(PTR) VARSIZE_1B(PTR)
282 #define VARDATA_SHORT(PTR) VARDATA_1B(PTR)
283 
284 #define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR)
285 #define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR)))
286 #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR)
287 
288 #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR)
289 #define VARATT_IS_EXTERNAL(PTR) VARATT_IS_1B_E(PTR)
290 #define VARATT_IS_EXTERNAL_ONDISK(PTR) \
291  (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK)
292 #define VARATT_IS_EXTERNAL_INDIRECT(PTR) \
293  (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT)
294 #define VARATT_IS_EXTERNAL_EXPANDED_RO(PTR) \
295  (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RO)
296 #define VARATT_IS_EXTERNAL_EXPANDED_RW(PTR) \
297  (VARATT_IS_EXTERNAL(PTR) && VARTAG_EXTERNAL(PTR) == VARTAG_EXPANDED_RW)
298 #define VARATT_IS_EXTERNAL_EXPANDED(PTR) \
299  (VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
300 #define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \
301  (VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
302 #define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR)
303 #define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR))
304 
305 #define SET_VARSIZE(PTR, len) SET_VARSIZE_4B(PTR, len)
306 #define SET_VARSIZE_SHORT(PTR, len) SET_VARSIZE_1B(PTR, len)
307 #define SET_VARSIZE_COMPRESSED(PTR, len) SET_VARSIZE_4B_C(PTR, len)
308 
309 #define SET_VARTAG_EXTERNAL(PTR, tag) SET_VARTAG_1B_E(PTR, tag)
310 
311 #define VARSIZE_ANY(PTR) \
312  (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR) : \
313  (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR) : \
314  VARSIZE_4B(PTR)))
315 
316 /* Size of a varlena data, excluding header */
317 #define VARSIZE_ANY_EXHDR(PTR) \
318  (VARATT_IS_1B_E(PTR) ? VARSIZE_EXTERNAL(PTR)-VARHDRSZ_EXTERNAL : \
319  (VARATT_IS_1B(PTR) ? VARSIZE_1B(PTR)-VARHDRSZ_SHORT : \
320  VARSIZE_4B(PTR)-VARHDRSZ))
321 
322 /* caution: this will not work on an external or compressed-in-line Datum */
323 /* caution: this will return a possibly unaligned pointer */
324 #define VARDATA_ANY(PTR) \
325  (VARATT_IS_1B(PTR) ? VARDATA_1B(PTR) : VARDATA_4B(PTR))
326 
327 /* Decompressed size and compression method of a compressed-in-line Datum */
328 #define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \
329  (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK)
330 #define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \
331  (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)
332 
333 /* Same for external Datums; but note argument is a struct varatt_external */
334 #define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \
335  ((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK)
336 #define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \
337  ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS)
338 
339 #define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \
340  do { \
341  Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \
342  (cm) == TOAST_LZ4_COMPRESSION_ID); \
343  ((toast_pointer).va_extinfo = \
344  (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \
345  } while (0)
346 
347 /*
348  * Testing whether an externally-stored value is compressed now requires
349  * comparing size stored in va_extinfo (the actual length of the external data)
350  * to rawsize (the original uncompressed datum's size). The latter includes
351  * VARHDRSZ overhead, the former doesn't. We never use compression unless it
352  * actually saves space, so we expect either equality or less-than.
353  */
354 #define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
355  (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \
356  (toast_pointer).va_rawsize - VARHDRSZ)
357 
358 #endif
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:494
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:398
unsigned char uint8
Definition: c.h:504
unsigned int Oid
Definition: postgres_ext.h:31
ExpandedObjectHeader * eohptr
Definition: varatt.h:76
uint32 va_extinfo
Definition: varatt.h:35
int32 va_rawsize
Definition: varatt.h:34
Oid va_valueid
Definition: varatt.h:37
Oid va_toastrelid
Definition: varatt.h:38
struct varlena * pointer
Definition: varatt.h:59
uint8 va_tag
Definition: varatt.h:137
uint8 va_header
Definition: varatt.h:136
uint8 va_header
Definition: varatt.h:129
Definition: c.h:687
uint32 va_header
Definition: varatt.h:115
uint32 va_tcinfo
Definition: varatt.h:121
struct varatt_external varatt_external
struct varatt_indirect varatt_indirect
struct varatt_expanded varatt_expanded
vartag_external
Definition: varatt.h:85
@ VARTAG_ONDISK
Definition: varatt.h:89
@ VARTAG_EXPANDED_RW
Definition: varatt.h:88
@ VARTAG_INDIRECT
Definition: varatt.h:86
@ VARTAG_EXPANDED_RO
Definition: varatt.h:87