PostgreSQL Source Code  git master
uuid.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * uuid.c
4  * Functions for the built-in type "uuid".
5  *
6  * Copyright (c) 2007-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/utils/adt/uuid.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "common/hashfn.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/fmgrprotos.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/timestamp.h"
24 #include "utils/uuid.h"
25 
26 /* sortsupport for uuid */
27 typedef struct
28 {
29  int64 input_count; /* number of non-null values seen */
30  bool estimating; /* true if estimating cardinality */
31 
32  hyperLogLogState abbr_card; /* cardinality estimator */
34 
35 static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext);
36 static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
37 static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
38 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40 
41 Datum
43 {
44  char *uuid_str = PG_GETARG_CSTRING(0);
45  pg_uuid_t *uuid;
46 
47  uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48  string_to_uuid(uuid_str, uuid, fcinfo->context);
49  PG_RETURN_UUID_P(uuid);
50 }
51 
52 Datum
54 {
55  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
56  static const char hex_chars[] = "0123456789abcdef";
57  char *buf,
58  *p;
59  int i;
60 
61  /* counts for the four hyphens and the zero-terminator */
62  buf = palloc(2 * UUID_LEN + 5);
63  p = buf;
64  for (i = 0; i < UUID_LEN; i++)
65  {
66  int hi;
67  int lo;
68 
69  /*
70  * We print uuid values as a string of 8, 4, 4, 4, and then 12
71  * hexadecimal characters, with each group is separated by a hyphen
72  * ("-"). Therefore, add the hyphens at the appropriate places here.
73  */
74  if (i == 4 || i == 6 || i == 8 || i == 10)
75  *p++ = '-';
76 
77  hi = uuid->data[i] >> 4;
78  lo = uuid->data[i] & 0x0F;
79 
80  *p++ = hex_chars[hi];
81  *p++ = hex_chars[lo];
82  }
83  *p = '\0';
84 
86 }
87 
88 /*
89  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
90  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
91  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
92  * digits, is the only one used for output.)
93  */
94 static void
95 string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
96 {
97  const char *src = source;
98  bool braces = false;
99  int i;
100 
101  if (src[0] == '{')
102  {
103  src++;
104  braces = true;
105  }
106 
107  for (i = 0; i < UUID_LEN; i++)
108  {
109  char str_buf[3];
110 
111  if (src[0] == '\0' || src[1] == '\0')
112  goto syntax_error;
113  memcpy(str_buf, src, 2);
114  if (!isxdigit((unsigned char) str_buf[0]) ||
115  !isxdigit((unsigned char) str_buf[1]))
116  goto syntax_error;
117 
118  str_buf[2] = '\0';
119  uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
120  src += 2;
121  if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
122  src++;
123  }
124 
125  if (braces)
126  {
127  if (*src != '}')
128  goto syntax_error;
129  src++;
130  }
131 
132  if (*src != '\0')
133  goto syntax_error;
134 
135  return;
136 
138  ereturn(escontext,,
139  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
140  errmsg("invalid input syntax for type %s: \"%s\"",
141  "uuid", source)));
142 }
143 
144 Datum
146 {
148  pg_uuid_t *uuid;
149 
150  uuid = (pg_uuid_t *) palloc(UUID_LEN);
151  memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
152  PG_RETURN_POINTER(uuid);
153 }
154 
155 Datum
157 {
158  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
159  StringInfoData buffer;
160 
161  pq_begintypsend(&buffer);
162  pq_sendbytes(&buffer, uuid->data, UUID_LEN);
164 }
165 
166 /* internal uuid compare function */
167 static int
168 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
169 {
170  return memcmp(arg1->data, arg2->data, UUID_LEN);
171 }
172 
173 Datum
175 {
176  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
177  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
178 
179  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
180 }
181 
182 Datum
184 {
185  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
186  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
187 
188  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
189 }
190 
191 Datum
193 {
194  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
195  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
196 
197  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
198 }
199 
200 Datum
202 {
203  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
204  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
205 
206  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
207 }
208 
209 Datum
211 {
212  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
213  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
214 
215  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
216 }
217 
218 Datum
220 {
221  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
222  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
223 
224  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
225 }
226 
227 /* handler for btree index operator */
228 Datum
230 {
231  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
232  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
233 
234  PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
235 }
236 
237 /*
238  * Sort support strategy routine
239  */
240 Datum
242 {
244 
245  ssup->comparator = uuid_fast_cmp;
246  ssup->ssup_extra = NULL;
247 
248  if (ssup->abbreviate)
249  {
251  MemoryContext oldcontext;
252 
253  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
254 
255  uss = palloc(sizeof(uuid_sortsupport_state));
256  uss->input_count = 0;
257  uss->estimating = true;
258  initHyperLogLog(&uss->abbr_card, 10);
259 
260  ssup->ssup_extra = uss;
261 
266 
267  MemoryContextSwitchTo(oldcontext);
268  }
269 
270  PG_RETURN_VOID();
271 }
272 
273 /*
274  * SortSupport comparison func
275  */
276 static int
278 {
279  pg_uuid_t *arg1 = DatumGetUUIDP(x);
280  pg_uuid_t *arg2 = DatumGetUUIDP(y);
281 
282  return uuid_internal_cmp(arg1, arg2);
283 }
284 
285 /*
286  * Callback for estimating effectiveness of abbreviated key optimization.
287  *
288  * We pay no attention to the cardinality of the non-abbreviated data, because
289  * there is no equality fast-path within authoritative uuid comparator.
290  */
291 static bool
292 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
293 {
294  uuid_sortsupport_state *uss = ssup->ssup_extra;
295  double abbr_card;
296 
297  if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
298  return false;
299 
300  abbr_card = estimateHyperLogLog(&uss->abbr_card);
301 
302  /*
303  * If we have >100k distinct values, then even if we were sorting many
304  * billion rows we'd likely still break even, and the penalty of undoing
305  * that many rows of abbrevs would probably not be worth it. Stop even
306  * counting at that point.
307  */
308  if (abbr_card > 100000.0)
309  {
310 #ifdef TRACE_SORT
311  if (trace_sort)
312  elog(LOG,
313  "uuid_abbrev: estimation ends at cardinality %f"
314  " after " INT64_FORMAT " values (%d rows)",
315  abbr_card, uss->input_count, memtupcount);
316 #endif
317  uss->estimating = false;
318  return false;
319  }
320 
321  /*
322  * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
323  * fudge factor allows us to abort earlier on genuinely pathological data
324  * where we've had exactly one abbreviated value in the first 2k
325  * (non-null) rows.
326  */
327  if (abbr_card < uss->input_count / 2000.0 + 0.5)
328  {
329 #ifdef TRACE_SORT
330  if (trace_sort)
331  elog(LOG,
332  "uuid_abbrev: aborting abbreviation at cardinality %f"
333  " below threshold %f after " INT64_FORMAT " values (%d rows)",
334  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
335  memtupcount);
336 #endif
337  return true;
338  }
339 
340 #ifdef TRACE_SORT
341  if (trace_sort)
342  elog(LOG,
343  "uuid_abbrev: cardinality %f after " INT64_FORMAT
344  " values (%d rows)", abbr_card, uss->input_count, memtupcount);
345 #endif
346 
347  return false;
348 }
349 
350 /*
351  * Conversion routine for sortsupport. Converts original uuid representation
352  * to abbreviated key representation. Our encoding strategy is simple -- pack
353  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
354  * machines, the bytes are stored in reverse order), and treat it as an
355  * unsigned integer.
356  */
357 static Datum
359 {
360  uuid_sortsupport_state *uss = ssup->ssup_extra;
361  pg_uuid_t *authoritative = DatumGetUUIDP(original);
362  Datum res;
363 
364  memcpy(&res, authoritative->data, sizeof(Datum));
365  uss->input_count += 1;
366 
367  if (uss->estimating)
368  {
369  uint32 tmp;
370 
371 #if SIZEOF_DATUM == 8
372  tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
373 #else /* SIZEOF_DATUM != 8 */
374  tmp = (uint32) res;
375 #endif
376 
378  }
379 
380  /*
381  * Byteswap on little-endian machines.
382  *
383  * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
384  * 3-way comparator) works correctly on all platforms. If we didn't do
385  * this, the comparator would have to call memcmp() with a pair of
386  * pointers to the first byte of each abbreviated key, which is slower.
387  */
388  res = DatumBigEndianToNative(res);
389 
390  return res;
391 }
392 
393 /* hash index support */
394 Datum
396 {
398 
399  return hash_any(key->data, UUID_LEN);
400 }
401 
402 Datum
404 {
406 
407  return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
408 }
409 
410 Datum
412 {
413  pg_uuid_t *uuid = palloc(UUID_LEN);
414 
415  if (!pg_strong_random(uuid, UUID_LEN))
416  ereport(ERROR,
417  (errcode(ERRCODE_INTERNAL_ERROR),
418  errmsg("could not generate random values")));
419 
420  /*
421  * Set magic numbers for a "version 4" (pseudorandom) UUID, see
422  * http://tools.ietf.org/html/rfc4122#section-4.4
423  */
424  uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
425  uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
426 
427  PG_RETURN_UUID_P(uuid);
428 }
429 
430 #define UUIDV1_EPOCH_JDATE 2299161 /* == date2j(1582,10,15) */
431 
432 /*
433  * Extract timestamp from UUID.
434  *
435  * Returns null if not RFC 4122 variant or not a version that has a timestamp.
436  */
437 Datum
439 {
440  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
441  int version;
442  uint64 tms;
443  TimestampTz ts;
444 
445  /* check if RFC 4122 variant */
446  if ((uuid->data[8] & 0xc0) != 0x80)
447  PG_RETURN_NULL();
448 
449  version = uuid->data[6] >> 4;
450 
451  if (version == 1)
452  {
453  tms = ((uint64) uuid->data[0] << 24)
454  + ((uint64) uuid->data[1] << 16)
455  + ((uint64) uuid->data[2] << 8)
456  + ((uint64) uuid->data[3])
457  + ((uint64) uuid->data[4] << 40)
458  + ((uint64) uuid->data[5] << 32)
459  + (((uint64) uuid->data[6] & 0xf) << 56)
460  + ((uint64) uuid->data[7] << 48);
461 
462  /* convert 100-ns intervals to us, then adjust */
463  ts = (TimestampTz) (tms / 10) -
465 
467  }
468 
469  /* not a timestamp-containing UUID version */
470  PG_RETURN_NULL();
471 }
472 
473 /*
474  * Extract UUID version.
475  *
476  * Returns null if not RFC 4122 variant.
477  */
478 Datum
480 {
481  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
482  uint16 version;
483 
484  /* check if RFC 4122 variant */
485  if ((uuid->data[8] & 0xc0) != 0x80)
486  PG_RETURN_NULL();
487 
488  version = uuid->data[6] >> 4;
489 
490  PG_RETURN_UINT16(version);
491 }
unsigned short uint16
Definition: c.h:505
unsigned int uint32
Definition: c.h:506
#define INT64_FORMAT
Definition: c.h:548
int64 TimestampTz
Definition: timestamp.h:39
#define USECS_PER_SEC
Definition: timestamp.h:134
#define SECS_PER_DAY
Definition: timestamp.h:126
#define POSTGRES_EPOCH_JDATE
Definition: timestamp.h:235
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define LOG
Definition: elog.h:31
#define ereturn(context, dummy_value,...)
Definition: elog.h:276
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_UINT16(x)
Definition: fmgr.h:357
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:186
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:167
int y
Definition: isn.c:72
int x
Definition: isn.c:71
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1316
static rewind_source * source
Definition: pg_rewind.c:89
static char * buf
Definition: pg_test_fsync.c:73
void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, const char *more, int column)
Definition: pgbench.c:5477
bool pg_strong_random(void *buf, size_t len)
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
uintptr_t Datum
Definition: postgres.h:64
void pq_sendbytes(StringInfo buf, const void *data, int datalen)
Definition: pqformat.c:126
const char * pq_getmsgbytes(StringInfo msg, int datalen)
Definition: pqformat.c:508
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:326
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:346
MemoryContextSwitchTo(old_ctx)
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
StringInfoData * StringInfo
Definition: stringinfo.h:54
Definition: nodes.h:129
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:106
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
void * ssup_extra
Definition: sortsupport.h:87
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
Definition: uuid.h:21
unsigned char data[UUID_LEN]
Definition: uuid.h:22
hyperLogLogState abbr_card
Definition: uuid.c:32
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3177
bool trace_sort
Definition: tuplesort.c:124
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
Datum uuid_send(PG_FUNCTION_ARGS)
Definition: uuid.c:156
static void string_to_uuid(const char *source, pg_uuid_t *uuid, Node *escontext)
Definition: uuid.c:95
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: uuid.c:292
Datum uuid_lt(PG_FUNCTION_ARGS)
Definition: uuid.c:174
Datum uuid_gt(PG_FUNCTION_ARGS)
Definition: uuid.c:210
Datum gen_random_uuid(PG_FUNCTION_ARGS)
Definition: uuid.c:411
Datum uuid_recv(PG_FUNCTION_ARGS)
Definition: uuid.c:145
Datum uuid_cmp(PG_FUNCTION_ARGS)
Definition: uuid.c:229
#define UUIDV1_EPOCH_JDATE
Definition: uuid.c:430
Datum uuid_le(PG_FUNCTION_ARGS)
Definition: uuid.c:183
Datum uuid_hash(PG_FUNCTION_ARGS)
Definition: uuid.c:395
Datum uuid_extract_version(PG_FUNCTION_ARGS)
Definition: uuid.c:479
Datum uuid_hash_extended(PG_FUNCTION_ARGS)
Definition: uuid.c:403
Datum uuid_out(PG_FUNCTION_ARGS)
Definition: uuid.c:53
Datum uuid_ne(PG_FUNCTION_ARGS)
Definition: uuid.c:219
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
Definition: uuid.c:168
Datum uuid_ge(PG_FUNCTION_ARGS)
Definition: uuid.c:201
Datum uuid_eq(PG_FUNCTION_ARGS)
Definition: uuid.c:192
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
Definition: uuid.c:277
Datum uuid_in(PG_FUNCTION_ARGS)
Definition: uuid.c:42
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup)
Definition: uuid.c:358
Datum uuid_extract_timestamp(PG_FUNCTION_ARGS)
Definition: uuid.c:438
Datum uuid_sortsupport(PG_FUNCTION_ARGS)
Definition: uuid.c:241
#define PG_RETURN_UUID_P(X)
Definition: uuid.h:32
#define UUID_LEN
Definition: uuid.h:18
static pg_uuid_t * DatumGetUUIDP(Datum X)
Definition: uuid.h:35
#define PG_GETARG_UUID_P(X)
Definition: uuid.h:40