PostgreSQL Source Code  git master
datum.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * datum.c
4  * POSTGRES Datum (abstract data type) manipulation routines.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/datum.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 /*
17  * In the implementation of these routines we assume the following:
18  *
19  * A) if a type is "byVal" then all the information is stored in the
20  * Datum itself (i.e. no pointers involved!). In this case the
21  * length of the type is always greater than zero and not more than
22  * "sizeof(Datum)"
23  *
24  * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
25  * then the "Datum" always contains a pointer to a stream of bytes.
26  * The number of significant bytes are always equal to the typlen.
27  *
28  * C) if a type is not "byVal" and has typlen == -1,
29  * then the "Datum" always points to a "struct varlena".
30  * This varlena structure has information about the actual length of this
31  * particular instance of the type and about its value.
32  *
33  * D) if a type is not "byVal" and has typlen == -2,
34  * then the "Datum" always points to a null-terminated C string.
35  *
36  * Note that we do not treat "toasted" datums specially; therefore what
37  * will be copied or compared is the compressed data or toast reference.
38  * An exception is made for datumCopy() of an expanded object, however,
39  * because most callers expect to get a simple contiguous (and pfree'able)
40  * result from datumCopy(). See also datumTransfer().
41  */
42 
43 #include "postgres.h"
44 
45 #include "access/detoast.h"
46 #include "common/hashfn.h"
47 #include "fmgr.h"
48 #include "utils/datum.h"
49 #include "utils/expandeddatum.h"
50 #include "utils/fmgrprotos.h"
51 
52 
53 /*-------------------------------------------------------------------------
54  * datumGetSize
55  *
56  * Find the "real" size of a datum, given the datum value,
57  * whether it is a "by value", and the declared type length.
58  * (For TOAST pointer datums, this is the size of the pointer datum.)
59  *
60  * This is essentially an out-of-line version of the att_addlength_datum()
61  * macro in access/tupmacs.h. We do a tad more error checking though.
62  *-------------------------------------------------------------------------
63  */
64 Size
65 datumGetSize(Datum value, bool typByVal, int typLen)
66 {
67  Size size;
68 
69  if (typByVal)
70  {
71  /* Pass-by-value types are always fixed-length */
72  Assert(typLen > 0 && typLen <= sizeof(Datum));
73  size = (Size) typLen;
74  }
75  else
76  {
77  if (typLen > 0)
78  {
79  /* Fixed-length pass-by-ref type */
80  size = (Size) typLen;
81  }
82  else if (typLen == -1)
83  {
84  /* It is a varlena datatype */
85  struct varlena *s = (struct varlena *) DatumGetPointer(value);
86 
87  if (!PointerIsValid(s))
88  ereport(ERROR,
89  (errcode(ERRCODE_DATA_EXCEPTION),
90  errmsg("invalid Datum pointer")));
91 
92  size = (Size) VARSIZE_ANY(s);
93  }
94  else if (typLen == -2)
95  {
96  /* It is a cstring datatype */
97  char *s = (char *) DatumGetPointer(value);
98 
99  if (!PointerIsValid(s))
100  ereport(ERROR,
101  (errcode(ERRCODE_DATA_EXCEPTION),
102  errmsg("invalid Datum pointer")));
103 
104  size = (Size) (strlen(s) + 1);
105  }
106  else
107  {
108  elog(ERROR, "invalid typLen: %d", typLen);
109  size = 0; /* keep compiler quiet */
110  }
111  }
112 
113  return size;
114 }
115 
116 /*-------------------------------------------------------------------------
117  * datumCopy
118  *
119  * Make a copy of a non-NULL datum.
120  *
121  * If the datatype is pass-by-reference, memory is obtained with palloc().
122  *
123  * If the value is a reference to an expanded object, we flatten into memory
124  * obtained with palloc(). We need to copy because one of the main uses of
125  * this function is to copy a datum out of a transient memory context that's
126  * about to be destroyed, and the expanded object is probably in a child
127  * context that will also go away. Moreover, many callers assume that the
128  * result is a single pfree-able chunk.
129  *-------------------------------------------------------------------------
130  */
131 Datum
132 datumCopy(Datum value, bool typByVal, int typLen)
133 {
134  Datum res;
135 
136  if (typByVal)
137  res = value;
138  else if (typLen == -1)
139  {
140  /* It is a varlena datatype */
141  struct varlena *vl = (struct varlena *) DatumGetPointer(value);
142 
144  {
145  /* Flatten into the caller's memory context */
147  Size resultsize;
148  char *resultptr;
149 
150  resultsize = EOH_get_flat_size(eoh);
151  resultptr = (char *) palloc(resultsize);
152  EOH_flatten_into(eoh, (void *) resultptr, resultsize);
153  res = PointerGetDatum(resultptr);
154  }
155  else
156  {
157  /* Otherwise, just copy the varlena datum verbatim */
158  Size realSize;
159  char *resultptr;
160 
161  realSize = (Size) VARSIZE_ANY(vl);
162  resultptr = (char *) palloc(realSize);
163  memcpy(resultptr, vl, realSize);
164  res = PointerGetDatum(resultptr);
165  }
166  }
167  else
168  {
169  /* Pass by reference, but not varlena, so not toasted */
170  Size realSize;
171  char *resultptr;
172 
173  realSize = datumGetSize(value, typByVal, typLen);
174 
175  resultptr = (char *) palloc(realSize);
176  memcpy(resultptr, DatumGetPointer(value), realSize);
177  res = PointerGetDatum(resultptr);
178  }
179  return res;
180 }
181 
182 /*-------------------------------------------------------------------------
183  * datumTransfer
184  *
185  * Transfer a non-NULL datum into the current memory context.
186  *
187  * This is equivalent to datumCopy() except when the datum is a read-write
188  * pointer to an expanded object. In that case we merely reparent the object
189  * into the current context, and return its standard R/W pointer (in case the
190  * given one is a transient pointer of shorter lifespan).
191  *-------------------------------------------------------------------------
192  */
193 Datum
194 datumTransfer(Datum value, bool typByVal, int typLen)
195 {
196  if (!typByVal && typLen == -1 &&
199  else
200  value = datumCopy(value, typByVal, typLen);
201  return value;
202 }
203 
204 /*-------------------------------------------------------------------------
205  * datumIsEqual
206  *
207  * Return true if two datums are equal, false otherwise
208  *
209  * NOTE: XXX!
210  * We just compare the bytes of the two values, one by one.
211  * This routine will return false if there are 2 different
212  * representations of the same value (something along the lines
213  * of say the representation of zero in one's complement arithmetic).
214  * Also, it will probably not give the answer you want if either
215  * datum has been "toasted".
216  *
217  * Do not try to make this any smarter than it currently is with respect
218  * to "toasted" datums, because some of the callers could be working in the
219  * context of an aborted transaction.
220  *-------------------------------------------------------------------------
221  */
222 bool
223 datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
224 {
225  bool res;
226 
227  if (typByVal)
228  {
229  /*
230  * just compare the two datums. NOTE: just comparing "len" bytes will
231  * not do the work, because we do not know how these bytes are aligned
232  * inside the "Datum". We assume instead that any given datatype is
233  * consistent about how it fills extraneous bits in the Datum.
234  */
235  res = (value1 == value2);
236  }
237  else
238  {
239  Size size1,
240  size2;
241  char *s1,
242  *s2;
243 
244  /*
245  * Compare the bytes pointed by the pointers stored in the datums.
246  */
247  size1 = datumGetSize(value1, typByVal, typLen);
248  size2 = datumGetSize(value2, typByVal, typLen);
249  if (size1 != size2)
250  return false;
251  s1 = (char *) DatumGetPointer(value1);
252  s2 = (char *) DatumGetPointer(value2);
253  res = (memcmp(s1, s2, size1) == 0);
254  }
255  return res;
256 }
257 
258 /*-------------------------------------------------------------------------
259  * datum_image_eq
260  *
261  * Compares two datums for identical contents, based on byte images. Return
262  * true if the two datums are equal, false otherwise.
263  *-------------------------------------------------------------------------
264  */
265 bool
266 datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
267 {
268  Size len1,
269  len2;
270  bool result = true;
271 
272  if (typByVal)
273  {
274  result = (value1 == value2);
275  }
276  else if (typLen > 0)
277  {
278  result = (memcmp(DatumGetPointer(value1),
279  DatumGetPointer(value2),
280  typLen) == 0);
281  }
282  else if (typLen == -1)
283  {
284  len1 = toast_raw_datum_size(value1);
285  len2 = toast_raw_datum_size(value2);
286  /* No need to de-toast if lengths don't match. */
287  if (len1 != len2)
288  result = false;
289  else
290  {
291  struct varlena *arg1val;
292  struct varlena *arg2val;
293 
294  arg1val = PG_DETOAST_DATUM_PACKED(value1);
295  arg2val = PG_DETOAST_DATUM_PACKED(value2);
296 
297  result = (memcmp(VARDATA_ANY(arg1val),
298  VARDATA_ANY(arg2val),
299  len1 - VARHDRSZ) == 0);
300 
301  /* Only free memory if it's a copy made here. */
302  if ((Pointer) arg1val != (Pointer) value1)
303  pfree(arg1val);
304  if ((Pointer) arg2val != (Pointer) value2)
305  pfree(arg2val);
306  }
307  }
308  else if (typLen == -2)
309  {
310  char *s1,
311  *s2;
312 
313  /* Compare cstring datums */
314  s1 = DatumGetCString(value1);
315  s2 = DatumGetCString(value2);
316  len1 = strlen(s1) + 1;
317  len2 = strlen(s2) + 1;
318  if (len1 != len2)
319  return false;
320  result = (memcmp(s1, s2, len1) == 0);
321  }
322  else
323  elog(ERROR, "unexpected typLen: %d", typLen);
324 
325  return result;
326 }
327 
328 /*-------------------------------------------------------------------------
329  * datum_image_hash
330  *
331  * Generate a hash value based on the binary representation of 'value'. Most
332  * use cases will want to use the hash function specific to the Datum's type,
333  * however, some corner cases require generating a hash value based on the
334  * actual bits rather than the logical value.
335  *-------------------------------------------------------------------------
336  */
337 uint32
338 datum_image_hash(Datum value, bool typByVal, int typLen)
339 {
340  Size len;
341  uint32 result;
342 
343  if (typByVal)
344  result = hash_bytes((unsigned char *) &value, sizeof(Datum));
345  else if (typLen > 0)
346  result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
347  else if (typLen == -1)
348  {
349  struct varlena *val;
350 
352 
354 
355  result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
356 
357  /* Only free memory if it's a copy made here. */
358  if ((Pointer) val != (Pointer) value)
359  pfree(val);
360  }
361  else if (typLen == -2)
362  {
363  char *s;
364 
365  s = DatumGetCString(value);
366  len = strlen(s) + 1;
367 
368  result = hash_bytes((unsigned char *) s, len);
369  }
370  else
371  {
372  elog(ERROR, "unexpected typLen: %d", typLen);
373  result = 0; /* keep compiler quiet */
374  }
375 
376  return result;
377 }
378 
379 /*-------------------------------------------------------------------------
380  * btequalimage
381  *
382  * Generic "equalimage" support function.
383  *
384  * B-Tree operator classes whose equality function could safely be replaced by
385  * datum_image_eq() in all cases can use this as their "equalimage" support
386  * function.
387  *
388  * Currently, we unconditionally assume that any B-Tree operator class that
389  * registers btequalimage as its support function 4 must be able to safely use
390  * optimizations like deduplication (i.e. we return true unconditionally). If
391  * it ever proved necessary to rescind support for an operator class, we could
392  * do that in a targeted fashion by doing something with the opcintype
393  * argument.
394  *-------------------------------------------------------------------------
395  */
396 Datum
398 {
399  /* Oid opcintype = PG_GETARG_OID(0); */
400 
401  PG_RETURN_BOOL(true);
402 }
403 
404 /*-------------------------------------------------------------------------
405  * datumEstimateSpace
406  *
407  * Compute the amount of space that datumSerialize will require for a
408  * particular Datum.
409  *-------------------------------------------------------------------------
410  */
411 Size
412 datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
413 {
414  Size sz = sizeof(int);
415 
416  if (!isnull)
417  {
418  /* no need to use add_size, can't overflow */
419  if (typByVal)
420  sz += sizeof(Datum);
421  else if (typLen == -1 &&
423  {
424  /* Expanded objects need to be flattened, see comment below */
426  }
427  else
428  sz += datumGetSize(value, typByVal, typLen);
429  }
430 
431  return sz;
432 }
433 
434 /*-------------------------------------------------------------------------
435  * datumSerialize
436  *
437  * Serialize a possibly-NULL datum into caller-provided storage.
438  *
439  * Note: "expanded" objects are flattened so as to produce a self-contained
440  * representation, but other sorts of toast pointers are transferred as-is.
441  * This is because the intended use of this function is to pass the value
442  * to another process within the same database server. The other process
443  * could not access an "expanded" object within this process's memory, but
444  * we assume it can dereference the same TOAST pointers this one can.
445  *
446  * The format is as follows: first, we write a 4-byte header word, which
447  * is either the length of a pass-by-reference datum, -1 for a
448  * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
449  * further is written. If it is pass-by-value, sizeof(Datum) bytes
450  * follow. Otherwise, the number of bytes indicated by the header word
451  * follow. The caller is responsible for ensuring that there is enough
452  * storage to store the number of bytes that will be written; use
453  * datumEstimateSpace() to find out how many will be needed.
454  * *start_address is updated to point to the byte immediately following
455  * those written.
456  *-------------------------------------------------------------------------
457  */
458 void
459 datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
460  char **start_address)
461 {
462  ExpandedObjectHeader *eoh = NULL;
463  int header;
464 
465  /* Write header word. */
466  if (isnull)
467  header = -2;
468  else if (typByVal)
469  header = -1;
470  else if (typLen == -1 &&
472  {
473  eoh = DatumGetEOHP(value);
474  header = EOH_get_flat_size(eoh);
475  }
476  else
477  header = datumGetSize(value, typByVal, typLen);
478  memcpy(*start_address, &header, sizeof(int));
479  *start_address += sizeof(int);
480 
481  /* If not null, write payload bytes. */
482  if (!isnull)
483  {
484  if (typByVal)
485  {
486  memcpy(*start_address, &value, sizeof(Datum));
487  *start_address += sizeof(Datum);
488  }
489  else if (eoh)
490  {
491  char *tmp;
492 
493  /*
494  * EOH_flatten_into expects the target address to be maxaligned,
495  * so we can't store directly to *start_address.
496  */
497  tmp = (char *) palloc(header);
498  EOH_flatten_into(eoh, (void *) tmp, header);
499  memcpy(*start_address, tmp, header);
500  *start_address += header;
501 
502  /* be tidy. */
503  pfree(tmp);
504  }
505  else
506  {
507  memcpy(*start_address, DatumGetPointer(value), header);
508  *start_address += header;
509  }
510  }
511 }
512 
513 /*-------------------------------------------------------------------------
514  * datumRestore
515  *
516  * Restore a possibly-NULL datum previously serialized by datumSerialize.
517  * *start_address is updated according to the number of bytes consumed.
518  *-------------------------------------------------------------------------
519  */
520 Datum
521 datumRestore(char **start_address, bool *isnull)
522 {
523  int header;
524  void *d;
525 
526  /* Read header word. */
527  memcpy(&header, *start_address, sizeof(int));
528  *start_address += sizeof(int);
529 
530  /* If this datum is NULL, we can stop here. */
531  if (header == -2)
532  {
533  *isnull = true;
534  return (Datum) 0;
535  }
536 
537  /* OK, datum is not null. */
538  *isnull = false;
539 
540  /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
541  if (header == -1)
542  {
543  Datum val;
544 
545  memcpy(&val, *start_address, sizeof(Datum));
546  *start_address += sizeof(Datum);
547  return val;
548  }
549 
550  /* Pass-by-reference case; copy indicated number of bytes. */
551  Assert(header > 0);
552  d = palloc(header);
553  memcpy(d, *start_address, header);
554  *start_address += header;
555  return PointerGetDatum(d);
556 }
unsigned int uint32
Definition: c.h:493
char * Pointer
Definition: c.h:470
#define VARHDRSZ
Definition: c.h:679
#define PointerIsValid(pointer)
Definition: c.h:750
size_t Size
Definition: c.h:592
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
Datum btequalimage(PG_FUNCTION_ARGS)
Definition: datum.c:397
Datum datumTransfer(Datum value, bool typByVal, int typLen)
Definition: datum.c:194
Size datumGetSize(Datum value, bool typByVal, int typLen)
Definition: datum.c:65
uint32 datum_image_hash(Datum value, bool typByVal, int typLen)
Definition: datum.c:338
Datum datumRestore(char **start_address, bool *isnull)
Definition: datum.c:521
void datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, char **start_address)
Definition: datum.c:459
Size datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
Definition: datum.c:412
bool datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:266
bool datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
Definition: datum.c:223
Size toast_raw_datum_size(Datum value)
Definition: detoast.c:545
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
ExpandedObjectHeader * DatumGetEOHP(Datum d)
Definition: expandeddatum.c:29
void EOH_flatten_into(ExpandedObjectHeader *eohptr, void *result, Size allocated_size)
Definition: expandeddatum.c:81
Datum TransferExpandedObject(Datum d, MemoryContext new_parent)
Size EOH_get_flat_size(ExpandedObjectHeader *eohptr)
Definition: expandeddatum.c:75
#define PG_DETOAST_DATUM_PACKED(datum)
Definition: fmgr.h:248
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
uint32 hash_bytes(const unsigned char *k, int keylen)
Definition: hashfn.c:146
long val
Definition: informix.c:664
static struct @150 value
Assert(fmt[strlen(fmt) - 1] !='\n')
void pfree(void *pointer)
Definition: mcxt.c:1508
MemoryContext CurrentMemoryContext
Definition: mcxt.c:131
void * palloc(Size size)
Definition: mcxt.c:1304
const void size_t len
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
static char * DatumGetCString(Datum X)
Definition: postgres.h:335
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
char * s1
char * s2
static pg_noinline void Size size
Definition: slab.c:607
Definition: c.h:674
#define VARATT_IS_EXTERNAL_EXPANDED(PTR)
Definition: varatt.h:298
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define VARATT_IS_EXTERNAL_EXPANDED_RW(PTR)
Definition: varatt.h:296
#define VARDATA_ANY(PTR)
Definition: varatt.h:324