PostgreSQL Source Code  git master
uuid.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * uuid.c
4  * Functions for the built-in type "uuid".
5  *
6  * Copyright (c) 2007-2022, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/utils/adt/uuid.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "common/hashfn.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
24 
25 /* sortsupport for uuid */
26 typedef struct
27 {
28  int64 input_count; /* number of non-null values seen */
29  bool estimating; /* true if estimating cardinality */
30 
31  hyperLogLogState abbr_card; /* cardinality estimator */
33 
34 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
35 static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36 static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
38 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
39 
40 Datum
42 {
43  char *uuid_str = PG_GETARG_CSTRING(0);
44  pg_uuid_t *uuid;
45 
46  uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
47  string_to_uuid(uuid_str, uuid);
48  PG_RETURN_UUID_P(uuid);
49 }
50 
51 Datum
53 {
54  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
55  static const char hex_chars[] = "0123456789abcdef";
57  int i;
58 
60  for (i = 0; i < UUID_LEN; i++)
61  {
62  int hi;
63  int lo;
64 
65  /*
66  * We print uuid values as a string of 8, 4, 4, 4, and then 12
67  * hexadecimal characters, with each group is separated by a hyphen
68  * ("-"). Therefore, add the hyphens at the appropriate places here.
69  */
70  if (i == 4 || i == 6 || i == 8 || i == 10)
72 
73  hi = uuid->data[i] >> 4;
74  lo = uuid->data[i] & 0x0F;
75 
76  appendStringInfoChar(&buf, hex_chars[hi]);
77  appendStringInfoChar(&buf, hex_chars[lo]);
78  }
79 
80  PG_RETURN_CSTRING(buf.data);
81 }
82 
83 /*
84  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
85  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
86  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
87  * digits, is the only one used for output.)
88  */
89 static void
90 string_to_uuid(const char *source, pg_uuid_t *uuid)
91 {
92  const char *src = source;
93  bool braces = false;
94  int i;
95 
96  if (src[0] == '{')
97  {
98  src++;
99  braces = true;
100  }
101 
102  for (i = 0; i < UUID_LEN; i++)
103  {
104  char str_buf[3];
105 
106  if (src[0] == '\0' || src[1] == '\0')
107  goto syntax_error;
108  memcpy(str_buf, src, 2);
109  if (!isxdigit((unsigned char) str_buf[0]) ||
110  !isxdigit((unsigned char) str_buf[1]))
111  goto syntax_error;
112 
113  str_buf[2] = '\0';
114  uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
115  src += 2;
116  if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
117  src++;
118  }
119 
120  if (braces)
121  {
122  if (*src != '}')
123  goto syntax_error;
124  src++;
125  }
126 
127  if (*src != '\0')
128  goto syntax_error;
129 
130  return;
131 
133  ereport(ERROR,
134  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
135  errmsg("invalid input syntax for type %s: \"%s\"",
136  "uuid", source)));
137 }
138 
139 Datum
141 {
143  pg_uuid_t *uuid;
144 
145  uuid = (pg_uuid_t *) palloc(UUID_LEN);
146  memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
147  PG_RETURN_POINTER(uuid);
148 }
149 
150 Datum
152 {
153  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
154  StringInfoData buffer;
155 
156  pq_begintypsend(&buffer);
157  pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
159 }
160 
161 /* internal uuid compare function */
162 static int
163 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
164 {
165  return memcmp(arg1->data, arg2->data, UUID_LEN);
166 }
167 
168 Datum
170 {
171  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
172  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
173 
174  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
175 }
176 
177 Datum
179 {
180  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
181  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
182 
183  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
184 }
185 
186 Datum
188 {
189  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
190  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
191 
192  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
193 }
194 
195 Datum
197 {
198  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
199  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
200 
201  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
202 }
203 
204 Datum
206 {
207  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
208  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
209 
210  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
211 }
212 
213 Datum
215 {
216  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
217  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
218 
219  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
220 }
221 
222 /* handler for btree index operator */
223 Datum
225 {
226  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
227  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
228 
229  PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
230 }
231 
232 /*
233  * Sort support strategy routine
234  */
235 Datum
237 {
239 
240  ssup->comparator = uuid_fast_cmp;
241  ssup->ssup_extra = NULL;
242 
243  if (ssup->abbreviate)
244  {
246  MemoryContext oldcontext;
247 
248  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
249 
250  uss = palloc(sizeof(uuid_sortsupport_state));
251  uss->input_count = 0;
252  uss->estimating = true;
253  initHyperLogLog(&uss->abbr_card, 10);
254 
255  ssup->ssup_extra = uss;
256 
261 
262  MemoryContextSwitchTo(oldcontext);
263  }
264 
265  PG_RETURN_VOID();
266 }
267 
268 /*
269  * SortSupport comparison func
270  */
271 static int
273 {
274  pg_uuid_t *arg1 = DatumGetUUIDP(x);
275  pg_uuid_t *arg2 = DatumGetUUIDP(y);
276 
277  return uuid_internal_cmp(arg1, arg2);
278 }
279 
280 /*
281  * Callback for estimating effectiveness of abbreviated key optimization.
282  *
283  * We pay no attention to the cardinality of the non-abbreviated data, because
284  * there is no equality fast-path within authoritative uuid comparator.
285  */
286 static bool
287 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
288 {
289  uuid_sortsupport_state *uss = ssup->ssup_extra;
290  double abbr_card;
291 
292  if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
293  return false;
294 
295  abbr_card = estimateHyperLogLog(&uss->abbr_card);
296 
297  /*
298  * If we have >100k distinct values, then even if we were sorting many
299  * billion rows we'd likely still break even, and the penalty of undoing
300  * that many rows of abbrevs would probably not be worth it. Stop even
301  * counting at that point.
302  */
303  if (abbr_card > 100000.0)
304  {
305 #ifdef TRACE_SORT
306  if (trace_sort)
307  elog(LOG,
308  "uuid_abbrev: estimation ends at cardinality %f"
309  " after " INT64_FORMAT " values (%d rows)",
310  abbr_card, uss->input_count, memtupcount);
311 #endif
312  uss->estimating = false;
313  return false;
314  }
315 
316  /*
317  * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
318  * fudge factor allows us to abort earlier on genuinely pathological data
319  * where we've had exactly one abbreviated value in the first 2k
320  * (non-null) rows.
321  */
322  if (abbr_card < uss->input_count / 2000.0 + 0.5)
323  {
324 #ifdef TRACE_SORT
325  if (trace_sort)
326  elog(LOG,
327  "uuid_abbrev: aborting abbreviation at cardinality %f"
328  " below threshold %f after " INT64_FORMAT " values (%d rows)",
329  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
330  memtupcount);
331 #endif
332  return true;
333  }
334 
335 #ifdef TRACE_SORT
336  if (trace_sort)
337  elog(LOG,
338  "uuid_abbrev: cardinality %f after " INT64_FORMAT
339  " values (%d rows)", abbr_card, uss->input_count, memtupcount);
340 #endif
341 
342  return false;
343 }
344 
345 /*
346  * Conversion routine for sortsupport. Converts original uuid representation
347  * to abbreviated key representation. Our encoding strategy is simple -- pack
348  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
349  * machines, the bytes are stored in reverse order), and treat it as an
350  * unsigned integer.
351  */
352 static Datum
354 {
355  uuid_sortsupport_state *uss = ssup->ssup_extra;
356  pg_uuid_t *authoritative = DatumGetUUIDP(original);
357  Datum res;
358 
359  memcpy(&res, authoritative->data, sizeof(Datum));
360  uss->input_count += 1;
361 
362  if (uss->estimating)
363  {
364  uint32 tmp;
365 
366 #if SIZEOF_DATUM == 8
367  tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
368 #else /* SIZEOF_DATUM != 8 */
369  tmp = (uint32) res;
370 #endif
371 
373  }
374 
375  /*
376  * Byteswap on little-endian machines.
377  *
378  * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
379  * 3-way comparator) works correctly on all platforms. If we didn't do
380  * this, the comparator would have to call memcmp() with a pair of
381  * pointers to the first byte of each abbreviated key, which is slower.
382  */
383  res = DatumBigEndianToNative(res);
384 
385  return res;
386 }
387 
388 /* hash index support */
389 Datum
391 {
393 
394  return hash_any(key->data, UUID_LEN);
395 }
396 
397 Datum
399 {
401 
402  return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
403 }
404 
405 Datum
407 {
408  pg_uuid_t *uuid = palloc(UUID_LEN);
409 
410  if (!pg_strong_random(uuid, UUID_LEN))
411  ereport(ERROR,
412  (errcode(ERRCODE_INTERNAL_ERROR),
413  errmsg("could not generate random values")));
414 
415  /*
416  * Set magic numbers for a "version 4" (pseudorandom) UUID, see
417  * http://tools.ietf.org/html/rfc4122#section-4.4
418  */
419  uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
420  uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
421 
422  PG_RETURN_UUID_P(uuid);
423 }
unsigned int uint32
Definition: c.h:441
#define INT64_FORMAT
Definition: c.h:483
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define LOG
Definition: elog.h:25
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:186
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:167
int y
Definition: isn.c:72
int x
Definition: isn.c:71
int i
Definition: isn.c:73
void * palloc(Size size)
Definition: mcxt.c:1068
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
static rewind_source * source
Definition: pg_rewind.c:81
static char * buf
Definition: pg_test_fsync.c:67
void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, const char *more, int column)
Definition: pgbench.c:5375
bool pg_strong_random(void *buf, size_t len)
uintptr_t Datum
Definition: postgres.h:411
#define DatumGetUInt32(X)
Definition: postgres.h:530
const char * pq_getmsgbytes(StringInfo msg, int datalen)
Definition: pqformat.c:510
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
void pq_sendbytes(StringInfo buf, const char *data, int datalen)
Definition: pqformat.c:125
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:188
void initStringInfo(StringInfo str)
Definition: stringinfo.c:59
StringInfoData * StringInfo
Definition: stringinfo.h:44
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:106
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
void * ssup_extra
Definition: sortsupport.h:87
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
Definition: uuid.h:21
unsigned char data[UUID_LEN]
Definition: uuid.h:22
hyperLogLogState abbr_card
Definition: uuid.c:31
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:4899
bool trace_sort
Definition: tuplesort.c:144
Datum uuid_send(PG_FUNCTION_ARGS)
Definition: uuid.c:151
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: uuid.c:287
Datum uuid_lt(PG_FUNCTION_ARGS)
Definition: uuid.c:169
Datum uuid_gt(PG_FUNCTION_ARGS)
Definition: uuid.c:205
Datum gen_random_uuid(PG_FUNCTION_ARGS)
Definition: uuid.c:406
Datum uuid_recv(PG_FUNCTION_ARGS)
Definition: uuid.c:140
Datum uuid_cmp(PG_FUNCTION_ARGS)
Definition: uuid.c:224
Datum uuid_le(PG_FUNCTION_ARGS)
Definition: uuid.c:178
Datum uuid_hash(PG_FUNCTION_ARGS)
Definition: uuid.c:390
Datum uuid_hash_extended(PG_FUNCTION_ARGS)
Definition: uuid.c:398
static void string_to_uuid(const char *source, pg_uuid_t *uuid)
Definition: uuid.c:90
Datum uuid_out(PG_FUNCTION_ARGS)
Definition: uuid.c:52
Datum uuid_ne(PG_FUNCTION_ARGS)
Definition: uuid.c:214
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
Definition: uuid.c:163
Datum uuid_ge(PG_FUNCTION_ARGS)
Definition: uuid.c:196
Datum uuid_eq(PG_FUNCTION_ARGS)
Definition: uuid.c:187
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
Definition: uuid.c:272
Datum uuid_in(PG_FUNCTION_ARGS)
Definition: uuid.c:41
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup)
Definition: uuid.c:353
Datum uuid_sortsupport(PG_FUNCTION_ARGS)
Definition: uuid.c:236
#define PG_RETURN_UUID_P(X)
Definition: uuid.h:27
#define UUID_LEN
Definition: uuid.h:18
#define DatumGetUUIDP(X)
Definition: uuid.h:28
#define PG_GETARG_UUID_P(X)
Definition: uuid.h:29