PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
datum.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * datum.c
4 * POSTGRES Datum (abstract data type) manipulation routines.
5 *
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/datum.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16/*
17 * In the implementation of these routines we assume the following:
18 *
19 * A) if a type is "byVal" then all the information is stored in the
20 * Datum itself (i.e. no pointers involved!). In this case the
21 * length of the type is always greater than zero and not more than
22 * "sizeof(Datum)"
23 *
24 * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
25 * then the "Datum" always contains a pointer to a stream of bytes.
26 * The number of significant bytes are always equal to the typlen.
27 *
28 * C) if a type is not "byVal" and has typlen == -1,
29 * then the "Datum" always points to a "struct varlena".
30 * This varlena structure has information about the actual length of this
31 * particular instance of the type and about its value.
32 *
33 * D) if a type is not "byVal" and has typlen == -2,
34 * then the "Datum" always points to a null-terminated C string.
35 *
36 * Note that we do not treat "toasted" datums specially; therefore what
37 * will be copied or compared is the compressed data or toast reference.
38 * An exception is made for datumCopy() of an expanded object, however,
39 * because most callers expect to get a simple contiguous (and pfree'able)
40 * result from datumCopy(). See also datumTransfer().
41 */
42
43#include "postgres.h"
44
45#include "access/detoast.h"
46#include "common/hashfn.h"
47#include "fmgr.h"
48#include "utils/datum.h"
49#include "utils/expandeddatum.h"
50#include "utils/fmgrprotos.h"
51
52
53/*-------------------------------------------------------------------------
54 * datumGetSize
55 *
56 * Find the "real" size of a datum, given the datum value,
57 * whether it is a "by value", and the declared type length.
58 * (For TOAST pointer datums, this is the size of the pointer datum.)
59 *
60 * This is essentially an out-of-line version of the att_addlength_datum()
61 * macro in access/tupmacs.h. We do a tad more error checking though.
62 *-------------------------------------------------------------------------
63 */
64Size
65datumGetSize(Datum value, bool typByVal, int typLen)
66{
67 Size size;
68
69 if (typByVal)
70 {
71 /* Pass-by-value types are always fixed-length */
72 Assert(typLen > 0 && typLen <= sizeof(Datum));
73 size = (Size) typLen;
74 }
75 else
76 {
77 if (typLen > 0)
78 {
79 /* Fixed-length pass-by-ref type */
80 size = (Size) typLen;
81 }
82 else if (typLen == -1)
83 {
84 /* It is a varlena datatype */
85 struct varlena *s = (struct varlena *) DatumGetPointer(value);
86
87 if (!PointerIsValid(s))
89 (errcode(ERRCODE_DATA_EXCEPTION),
90 errmsg("invalid Datum pointer")));
91
92 size = (Size) VARSIZE_ANY(s);
93 }
94 else if (typLen == -2)
95 {
96 /* It is a cstring datatype */
97 char *s = (char *) DatumGetPointer(value);
98
99 if (!PointerIsValid(s))
101 (errcode(ERRCODE_DATA_EXCEPTION),
102 errmsg("invalid Datum pointer")));
103
104 size = (Size) (strlen(s) + 1);
105 }
106 else
107 {
108 elog(ERROR, "invalid typLen: %d", typLen);
109 size = 0; /* keep compiler quiet */
110 }
111 }
112
113 return size;
114}
115
116/*-------------------------------------------------------------------------
117 * datumCopy
118 *
119 * Make a copy of a non-NULL datum.
120 *
121 * If the datatype is pass-by-reference, memory is obtained with palloc().
122 *
123 * If the value is a reference to an expanded object, we flatten into memory
124 * obtained with palloc(). We need to copy because one of the main uses of
125 * this function is to copy a datum out of a transient memory context that's
126 * about to be destroyed, and the expanded object is probably in a child
127 * context that will also go away. Moreover, many callers assume that the
128 * result is a single pfree-able chunk.
129 *-------------------------------------------------------------------------
130 */
131Datum
132datumCopy(Datum value, bool typByVal, int typLen)
133{
134 Datum res;
135
136 if (typByVal)
137 res = value;
138 else if (typLen == -1)
139 {
140 /* It is a varlena datatype */
141 struct varlena *vl = (struct varlena *) DatumGetPointer(value);
142
144 {
145 /* Flatten into the caller's memory context */
147 Size resultsize;
148 char *resultptr;
149
150 resultsize = EOH_get_flat_size(eoh);
151 resultptr = (char *) palloc(resultsize);
152 EOH_flatten_into(eoh, resultptr, resultsize);
153 res = PointerGetDatum(resultptr);
154 }
155 else
156 {
157 /* Otherwise, just copy the varlena datum verbatim */
158 Size realSize;
159 char *resultptr;
160
161 realSize = (Size) VARSIZE_ANY(vl);
162 resultptr = (char *) palloc(realSize);
163 memcpy(resultptr, vl, realSize);
164 res = PointerGetDatum(resultptr);
165 }
166 }
167 else
168 {
169 /* Pass by reference, but not varlena, so not toasted */
170 Size realSize;
171 char *resultptr;
172
173 realSize = datumGetSize(value, typByVal, typLen);
174
175 resultptr = (char *) palloc(realSize);
176 memcpy(resultptr, DatumGetPointer(value), realSize);
177 res = PointerGetDatum(resultptr);
178 }
179 return res;
180}
181
182/*-------------------------------------------------------------------------
183 * datumTransfer
184 *
185 * Transfer a non-NULL datum into the current memory context.
186 *
187 * This is equivalent to datumCopy() except when the datum is a read-write
188 * pointer to an expanded object. In that case we merely reparent the object
189 * into the current context, and return its standard R/W pointer (in case the
190 * given one is a transient pointer of shorter lifespan).
191 *-------------------------------------------------------------------------
192 */
193Datum
194datumTransfer(Datum value, bool typByVal, int typLen)
195{
196 if (!typByVal && typLen == -1 &&
199 else
200 value = datumCopy(value, typByVal, typLen);
201 return value;
202}
203
204/*-------------------------------------------------------------------------
205 * datumIsEqual
206 *
207 * Return true if two datums are equal, false otherwise
208 *
209 * NOTE: XXX!
210 * We just compare the bytes of the two values, one by one.
211 * This routine will return false if there are 2 different
212 * representations of the same value (something along the lines
213 * of say the representation of zero in one's complement arithmetic).
214 * Also, it will probably not give the answer you want if either
215 * datum has been "toasted".
216 *
217 * Do not try to make this any smarter than it currently is with respect
218 * to "toasted" datums, because some of the callers could be working in the
219 * context of an aborted transaction.
220 *-------------------------------------------------------------------------
221 */
222bool
223datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
224{
225 bool res;
226
227 if (typByVal)
228 {
229 /*
230 * just compare the two datums. NOTE: just comparing "len" bytes will
231 * not do the work, because we do not know how these bytes are aligned
232 * inside the "Datum". We assume instead that any given datatype is
233 * consistent about how it fills extraneous bits in the Datum.
234 */
235 res = (value1 == value2);
236 }
237 else
238 {
239 Size size1,
240 size2;
241 char *s1,
242 *s2;
243
244 /*
245 * Compare the bytes pointed by the pointers stored in the datums.
246 */
247 size1 = datumGetSize(value1, typByVal, typLen);
248 size2 = datumGetSize(value2, typByVal, typLen);
249 if (size1 != size2)
250 return false;
251 s1 = (char *) DatumGetPointer(value1);
252 s2 = (char *) DatumGetPointer(value2);
253 res = (memcmp(s1, s2, size1) == 0);
254 }
255 return res;
256}
257
258/*-------------------------------------------------------------------------
259 * datum_image_eq
260 *
261 * Compares two datums for identical contents, based on byte images. Return
262 * true if the two datums are equal, false otherwise.
263 *-------------------------------------------------------------------------
264 */
265bool
266datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
267{
268 Size len1,
269 len2;
270 bool result = true;
271
272 if (typByVal)
273 {
274 result = (value1 == value2);
275 }
276 else if (typLen > 0)
277 {
278 result = (memcmp(DatumGetPointer(value1),
279 DatumGetPointer(value2),
280 typLen) == 0);
281 }
282 else if (typLen == -1)
283 {
284 len1 = toast_raw_datum_size(value1);
285 len2 = toast_raw_datum_size(value2);
286 /* No need to de-toast if lengths don't match. */
287 if (len1 != len2)
288 result = false;
289 else
290 {
291 struct varlena *arg1val;
292 struct varlena *arg2val;
293
294 arg1val = PG_DETOAST_DATUM_PACKED(value1);
295 arg2val = PG_DETOAST_DATUM_PACKED(value2);
296
297 result = (memcmp(VARDATA_ANY(arg1val),
298 VARDATA_ANY(arg2val),
299 len1 - VARHDRSZ) == 0);
300
301 /* Only free memory if it's a copy made here. */
302 if ((Pointer) arg1val != (Pointer) value1)
303 pfree(arg1val);
304 if ((Pointer) arg2val != (Pointer) value2)
305 pfree(arg2val);
306 }
307 }
308 else if (typLen == -2)
309 {
310 char *s1,
311 *s2;
312
313 /* Compare cstring datums */
314 s1 = DatumGetCString(value1);
315 s2 = DatumGetCString(value2);
316 len1 = strlen(s1) + 1;
317 len2 = strlen(s2) + 1;
318 if (len1 != len2)
319 return false;
320 result = (memcmp(s1, s2, len1) == 0);
321 }
322 else
323 elog(ERROR, "unexpected typLen: %d", typLen);
324
325 return result;
326}
327
328/*-------------------------------------------------------------------------
329 * datum_image_hash
330 *
331 * Generate a hash value based on the binary representation of 'value'. Most
332 * use cases will want to use the hash function specific to the Datum's type,
333 * however, some corner cases require generating a hash value based on the
334 * actual bits rather than the logical value.
335 *-------------------------------------------------------------------------
336 */
337uint32
338datum_image_hash(Datum value, bool typByVal, int typLen)
339{
340 Size len;
341 uint32 result;
342
343 if (typByVal)
344 result = hash_bytes((unsigned char *) &value, sizeof(Datum));
345 else if (typLen > 0)
346 result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
347 else if (typLen == -1)
348 {
349 struct varlena *val;
350
352
354
355 result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
356
357 /* Only free memory if it's a copy made here. */
358 if ((Pointer) val != (Pointer) value)
359 pfree(val);
360 }
361 else if (typLen == -2)
362 {
363 char *s;
364
366 len = strlen(s) + 1;
367
368 result = hash_bytes((unsigned char *) s, len);
369 }
370 else
371 {
372 elog(ERROR, "unexpected typLen: %d", typLen);
373 result = 0; /* keep compiler quiet */
374 }
375
376 return result;
377}
378
379/*-------------------------------------------------------------------------
380 * btequalimage
381 *
382 * Generic "equalimage" support function.
383 *
384 * B-Tree operator classes whose equality function could safely be replaced by
385 * datum_image_eq() in all cases can use this as their "equalimage" support
386 * function.
387 *
388 * Currently, we unconditionally assume that any B-Tree operator class that
389 * registers btequalimage as its support function 4 must be able to safely use
390 * optimizations like deduplication (i.e. we return true unconditionally). If
391 * it ever proved necessary to rescind support for an operator class, we could
392 * do that in a targeted fashion by doing something with the opcintype
393 * argument.
394 *-------------------------------------------------------------------------
395 */
396Datum
398{
399 /* Oid opcintype = PG_GETARG_OID(0); */
400
401 PG_RETURN_BOOL(true);
402}
403
404/*-------------------------------------------------------------------------
405 * datumEstimateSpace
406 *
407 * Compute the amount of space that datumSerialize will require for a
408 * particular Datum.
409 *-------------------------------------------------------------------------
410 */
411Size
412datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
413{
414 Size sz = sizeof(int);
415
416 if (!isnull)
417 {
418 /* no need to use add_size, can't overflow */
419 if (typByVal)
420 sz += sizeof(Datum);
421 else if (typLen == -1 &&
423 {
424 /* Expanded objects need to be flattened, see comment below */
426 }
427 else
428 sz += datumGetSize(value, typByVal, typLen);
429 }
430
431 return sz;
432}
433
434/*-------------------------------------------------------------------------
435 * datumSerialize
436 *
437 * Serialize a possibly-NULL datum into caller-provided storage.
438 *
439 * Note: "expanded" objects are flattened so as to produce a self-contained
440 * representation, but other sorts of toast pointers are transferred as-is.
441 * This is because the intended use of this function is to pass the value
442 * to another process within the same database server. The other process
443 * could not access an "expanded" object within this process's memory, but
444 * we assume it can dereference the same TOAST pointers this one can.
445 *
446 * The format is as follows: first, we write a 4-byte header word, which
447 * is either the length of a pass-by-reference datum, -1 for a
448 * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
449 * further is written. If it is pass-by-value, sizeof(Datum) bytes
450 * follow. Otherwise, the number of bytes indicated by the header word
451 * follow. The caller is responsible for ensuring that there is enough
452 * storage to store the number of bytes that will be written; use
453 * datumEstimateSpace() to find out how many will be needed.
454 * *start_address is updated to point to the byte immediately following
455 * those written.
456 *-------------------------------------------------------------------------
457 */
458void
459datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
460 char **start_address)
461{
462 ExpandedObjectHeader *eoh = NULL;
463 int header;
464
465 /* Write header word. */
466 if (isnull)
467 header = -2;
468 else if (typByVal)
469 header = -1;
470 else if (typLen == -1 &&
472 {
473 eoh = DatumGetEOHP(value);
474 header = EOH_get_flat_size(eoh);
475 }
476 else
477 header = datumGetSize(value, typByVal, typLen);
478 memcpy(*start_address, &header, sizeof(int));
479 *start_address += sizeof(int);
480
481 /* If not null, write payload bytes. */
482 if (!isnull)
483 {
484 if (typByVal)
485 {
486 memcpy(*start_address, &value, sizeof(Datum));
487 *start_address += sizeof(Datum);
488 }
489 else if (eoh)
490 {
491 char *tmp;
492
493 /*
494 * EOH_flatten_into expects the target address to be maxaligned,
495 * so we can't store directly to *start_address.
496 */
497 tmp = (char *) palloc(header);
498 EOH_flatten_into(eoh, tmp, header);
499 memcpy(*start_address, tmp, header);
500 *start_address += header;
501
502 /* be tidy. */
503 pfree(tmp);
504 }
505 else
506 {
507 memcpy(*start_address, DatumGetPointer(value), header);
508 *start_address += header;
509 }
510 }
511}
512
513/*-------------------------------------------------------------------------
514 * datumRestore
515 *
516 * Restore a possibly-NULL datum previously serialized by datumSerialize.
517 * *start_address is updated according to the number of bytes consumed.
518 *-------------------------------------------------------------------------
519 */
520Datum
521datumRestore(char **start_address, bool *isnull)
522{
523 int header;
524 void *d;
525
526 /* Read header word. */
527 memcpy(&header, *start_address, sizeof(int));
528 *start_address += sizeof(int);
529
530 /* If this datum is NULL, we can stop here. */
531 if (header == -2)
532 {
533 *isnull = true;
534 return (Datum) 0;
535 }
536
537 /* OK, datum is not null. */
538 *isnull = false;
539
540 /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
541 if (header == -1)
542 {
543 Datum val;
544
545 memcpy(&val, *start_address, sizeof(Datum));
546 *start_address += sizeof(Datum);
547 return val;
548 }
549
550 /* Pass-by-reference case; copy indicated number of bytes. */
551 Assert(header > 0);
552 d = palloc(header);
553 memcpy(d, *start_address, header);
554 *start_address += header;
555 return PointerGetDatum(d);
556}
char * Pointer
Definition: c.h:476
#define VARHDRSZ
Definition: c.h:646
#define Assert(condition)
Definition: c.h:812
#define PointerIsValid(pointer)
Definition: c.h:717
uint32_t uint32
Definition: c.h:485
size_t Size
Definition: c.h:559
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
Datum btequalimage(PG_FUNCTION_ARGS)
Definition: datum.c:397
Datum datumTransfer(Datum value, bool typByVal, int typLen)
Definition: datum.c:194
Size datumGetSize(Datum value, bool typByVal, int typLen)
Definition: datum.c:65
uint32 datum_image_hash(Datum value, bool typByVal, int typLen)
Definition: datum.c:338
Datum datumRestore(char **start_address, bool *isnull)
Definition: datum.c:521
void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address)
Definition: datum.c:459
Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
Definition: datum.c:412
bool datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:266
bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:223
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
ExpandedObjectHeader * DatumGetEOHP(Datum d)
Definition: expandeddatum.c:29
void EOH_flatten_into(ExpandedObjectHeader *eohptr, void *result, Size allocated_size)
Definition: expandeddatum.c:81
Datum TransferExpandedObject(Datum d, MemoryContext new_parent)
Size EOH_get_flat_size(ExpandedObjectHeader *eohptr)
Definition: expandeddatum.c:75
#define PG_DETOAST_DATUM_PACKED(datum)
Definition: fmgr.h:248
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
uint32 hash_bytes(const unsigned char *k, int keylen)
Definition: hashfn.c:146
static struct @161 value
long val
Definition: informix.c:689
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
const void size_t len
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
char * s1
char * s2
static pg_noinline void Size size
Definition: slab.c:607
Definition: c.h:641
#define VARATT_IS_EXTERNAL_EXPANDED(PTR)
Definition: varatt.h:298
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARATT_IS_EXTERNAL_EXPANDED_RW(PTR)
Definition: varatt.h:296
#define VARDATA_ANY(PTR)
Definition: varatt.h:324