PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
uuid.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * uuid.c
4  * Functions for the built-in type "uuid".
5  *
6  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/utils/adt/uuid.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/hash.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/builtins.h"
21 #include "utils/guc.h"
22 #include "utils/sortsupport.h"
23 #include "utils/uuid.h"
24 
25 /* sortsupport for uuid */
26 typedef struct
27 {
28  int64 input_count; /* number of non-null values seen */
29  bool estimating; /* true if estimating cardinality */
30 
31  hyperLogLogState abbr_card; /* cardinality estimator */
33 
34 static void string_to_uuid(const char *source, pg_uuid_t *uuid);
35 static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
36 static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
37 static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
38 static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
39 static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
40 
41 Datum
43 {
44  char *uuid_str = PG_GETARG_CSTRING(0);
45  pg_uuid_t *uuid;
46 
47  uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
48  string_to_uuid(uuid_str, uuid);
49  PG_RETURN_UUID_P(uuid);
50 }
51 
52 Datum
54 {
55  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
56  static const char hex_chars[] = "0123456789abcdef";
58  int i;
59 
60  initStringInfo(&buf);
61  for (i = 0; i < UUID_LEN; i++)
62  {
63  int hi;
64  int lo;
65 
66  /*
67  * We print uuid values as a string of 8, 4, 4, 4, and then 12
68  * hexadecimal characters, with each group is separated by a hyphen
69  * ("-"). Therefore, add the hyphens at the appropriate places here.
70  */
71  if (i == 4 || i == 6 || i == 8 || i == 10)
72  appendStringInfoChar(&buf, '-');
73 
74  hi = uuid->data[i] >> 4;
75  lo = uuid->data[i] & 0x0F;
76 
77  appendStringInfoChar(&buf, hex_chars[hi]);
78  appendStringInfoChar(&buf, hex_chars[lo]);
79  }
80 
82 }
83 
84 /*
85  * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
86  * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
87  * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
88  * digits, is the only one used for output.)
89  */
90 static void
91 string_to_uuid(const char *source, pg_uuid_t *uuid)
92 {
93  const char *src = source;
94  bool braces = false;
95  int i;
96 
97  if (src[0] == '{')
98  {
99  src++;
100  braces = true;
101  }
102 
103  for (i = 0; i < UUID_LEN; i++)
104  {
105  char str_buf[3];
106 
107  if (src[0] == '\0' || src[1] == '\0')
108  goto syntax_error;
109  memcpy(str_buf, src, 2);
110  if (!isxdigit((unsigned char) str_buf[0]) ||
111  !isxdigit((unsigned char) str_buf[1]))
112  goto syntax_error;
113 
114  str_buf[2] = '\0';
115  uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
116  src += 2;
117  if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
118  src++;
119  }
120 
121  if (braces)
122  {
123  if (*src != '}')
124  goto syntax_error;
125  src++;
126  }
127 
128  if (*src != '\0')
129  goto syntax_error;
130 
131  return;
132 
134  ereport(ERROR,
135  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
136  errmsg("invalid input syntax for type %s: \"%s\"",
137  "uuid", source)));
138 }
139 
140 Datum
142 {
144  pg_uuid_t *uuid;
145 
146  uuid = (pg_uuid_t *) palloc(UUID_LEN);
147  memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
148  PG_RETURN_POINTER(uuid);
149 }
150 
151 Datum
153 {
154  pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
156 
157  pq_begintypsend(&buffer);
158  pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
160 }
161 
162 /* internal uuid compare function */
163 static int
164 uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
165 {
166  return memcmp(arg1->data, arg2->data, UUID_LEN);
167 }
168 
169 Datum
171 {
172  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
173  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
174 
175  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
176 }
177 
178 Datum
180 {
181  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
182  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
183 
184  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
185 }
186 
187 Datum
189 {
190  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
191  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
192 
193  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
194 }
195 
196 Datum
198 {
199  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
200  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
201 
202  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
203 }
204 
205 Datum
207 {
208  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
209  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
210 
211  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
212 }
213 
214 Datum
216 {
217  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
218  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
219 
220  PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
221 }
222 
223 /* handler for btree index operator */
224 Datum
226 {
227  pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
228  pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
229 
230  PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
231 }
232 
233 /*
234  * Sort support strategy routine
235  */
236 Datum
238 {
240 
241  ssup->comparator = uuid_fast_cmp;
242  ssup->ssup_extra = NULL;
243 
244  if (ssup->abbreviate)
245  {
247  MemoryContext oldcontext;
248 
249  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
250 
251  uss = palloc(sizeof(uuid_sortsupport_state));
252  uss->input_count = 0;
253  uss->estimating = true;
254  initHyperLogLog(&uss->abbr_card, 10);
255 
256  ssup->ssup_extra = uss;
257 
258  ssup->comparator = uuid_cmp_abbrev;
262 
263  MemoryContextSwitchTo(oldcontext);
264  }
265 
266  PG_RETURN_VOID();
267 }
268 
269 /*
270  * SortSupport comparison func
271  */
272 static int
274 {
275  pg_uuid_t *arg1 = DatumGetUUIDP(x);
276  pg_uuid_t *arg2 = DatumGetUUIDP(y);
277 
278  return uuid_internal_cmp(arg1, arg2);
279 }
280 
281 /*
282  * Abbreviated key comparison func
283  */
284 static int
286 {
287  if (x > y)
288  return 1;
289  else if (x == y)
290  return 0;
291  else
292  return -1;
293 }
294 
295 /*
296  * Callback for estimating effectiveness of abbreviated key optimization.
297  *
298  * We pay no attention to the cardinality of the non-abbreviated data, because
299  * there is no equality fast-path within authoritative uuid comparator.
300  */
301 static bool
302 uuid_abbrev_abort(int memtupcount, SortSupport ssup)
303 {
304  uuid_sortsupport_state *uss = ssup->ssup_extra;
305  double abbr_card;
306 
307  if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
308  return false;
309 
310  abbr_card = estimateHyperLogLog(&uss->abbr_card);
311 
312  /*
313  * If we have >100k distinct values, then even if we were sorting many
314  * billion rows we'd likely still break even, and the penalty of undoing
315  * that many rows of abbrevs would probably not be worth it. Stop even
316  * counting at that point.
317  */
318  if (abbr_card > 100000.0)
319  {
320 #ifdef TRACE_SORT
321  if (trace_sort)
322  elog(LOG,
323  "uuid_abbrev: estimation ends at cardinality %f"
324  " after " INT64_FORMAT " values (%d rows)",
325  abbr_card, uss->input_count, memtupcount);
326 #endif
327  uss->estimating = false;
328  return false;
329  }
330 
331  /*
332  * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
333  * fudge factor allows us to abort earlier on genuinely pathological data
334  * where we've had exactly one abbreviated value in the first 2k
335  * (non-null) rows.
336  */
337  if (abbr_card < uss->input_count / 2000.0 + 0.5)
338  {
339 #ifdef TRACE_SORT
340  if (trace_sort)
341  elog(LOG,
342  "uuid_abbrev: aborting abbreviation at cardinality %f"
343  " below threshold %f after " INT64_FORMAT " values (%d rows)",
344  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
345  memtupcount);
346 #endif
347  return true;
348  }
349 
350 #ifdef TRACE_SORT
351  if (trace_sort)
352  elog(LOG,
353  "uuid_abbrev: cardinality %f after " INT64_FORMAT
354  " values (%d rows)", abbr_card, uss->input_count, memtupcount);
355 #endif
356 
357  return false;
358 }
359 
360 /*
361  * Conversion routine for sortsupport. Converts original uuid representation
362  * to abbreviated key representation. Our encoding strategy is simple -- pack
363  * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
364  * machines, the bytes are stored in reverse order), and treat it as an
365  * unsigned integer.
366  */
367 static Datum
369 {
370  uuid_sortsupport_state *uss = ssup->ssup_extra;
371  pg_uuid_t *authoritative = DatumGetUUIDP(original);
372  Datum res;
373 
374  memcpy(&res, authoritative->data, sizeof(Datum));
375  uss->input_count += 1;
376 
377  if (uss->estimating)
378  {
379  uint32 tmp;
380 
381 #if SIZEOF_DATUM == 8
382  tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
383 #else /* SIZEOF_DATUM != 8 */
384  tmp = (uint32) res;
385 #endif
386 
388  }
389 
390  /*
391  * Byteswap on little-endian machines.
392  *
393  * This is needed so that uuid_cmp_abbrev() (an unsigned integer 3-way
394  * comparator) works correctly on all platforms. If we didn't do this,
395  * the comparator would have to call memcmp() with a pair of pointers to
396  * the first byte of each abbreviated key, which is slower.
397  */
398  res = DatumBigEndianToNative(res);
399 
400  return res;
401 }
402 
403 /* hash index support */
404 Datum
406 {
407  pg_uuid_t *key = PG_GETARG_UUID_P(0);
408 
409  return hash_any(key->data, UUID_LEN);
410 }
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:107
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
#define DatumGetUInt32(X)
Definition: postgres.h:492
#define PG_GETARG_UUID_P(X)
Definition: uuid.h:29
static int uuid_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
Definition: uuid.c:285
unsigned char data[UUID_LEN]
Definition: uuid.h:22
Datum uuid_hash(PG_FUNCTION_ARGS)
Definition: uuid.c:405
static void string_to_uuid(const char *source, pg_uuid_t *uuid)
Definition: uuid.c:91
Datum uuid_in(PG_FUNCTION_ARGS)
Definition: uuid.c:42
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:359
#define DatumGetUUIDP(X)
Definition: uuid.h:28
StringInfoData * StringInfo
Definition: stringinfo.h:43
void syntax_error(const char *source, int lineno, const char *line, const char *command, const char *msg, const char *more, int column)
Definition: pgbench.c:2920
Datum uuid_send(PG_FUNCTION_ARGS)
Definition: uuid.c:152
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
int errcode(int sqlerrcode)
Definition: elog.c:575
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:329
#define LOG
Definition: elog.h:26
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:379
bool trace_sort
Definition: tuplesort.c:155
const char * pq_getmsgbytes(StringInfo msg, int datalen)
Definition: pqformat.c:550
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:65
static Datum uuid_abbrev_convert(Datum original, SortSupport ssup)
Definition: uuid.c:368
#define ERROR
Definition: elog.h:43
MemoryContext ssup_cxt
Definition: sortsupport.h:66
static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: uuid.c:302
static char * buf
Definition: pg_test_fsync.c:66
#define DatumBigEndianToNative(x)
Definition: pg_bswap.h:65
unsigned int uint32
Definition: c.h:268
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:192
void * ssup_extra
Definition: sortsupport.h:87
#define ereport(elevel, rest)
Definition: elog.h:122
#define PG_RETURN_UUID_P(X)
Definition: uuid.h:27
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:169
void initStringInfo(StringInfo str)
Definition: stringinfo.c:46
Datum hash_uint32(uint32 k)
Definition: hashfunc.c:512
hyperLogLogState abbr_card
Definition: uuid.c:31
Datum uuid_recv(PG_FUNCTION_ARGS)
Definition: uuid.c:141
#define UUID_LEN
Definition: uuid.h:18
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
uintptr_t Datum
Definition: postgres.h:372
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:185
Datum uuid_gt(PG_FUNCTION_ARGS)
Definition: uuid.c:206
#define PG_RETURN_VOID()
Definition: fmgr.h:309
Datum uuid_sortsupport(PG_FUNCTION_ARGS)
Definition: uuid.c:237
#define NULL
Definition: c.h:229
Datum hash_any(register const unsigned char *k, register int keylen)
Definition: hashfunc.c:307
WalTimeSample buffer[LAG_TRACKER_BUFFER_SIZE]
Definition: walsender.c:214
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:322
static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
Definition: uuid.c:164
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:166
#define INT64_FORMAT
Definition: c.h:315
Datum uuid_ge(PG_FUNCTION_ARGS)
Definition: uuid.c:197
Datum uuid_le(PG_FUNCTION_ARGS)
Definition: uuid.c:179
Definition: uuid.h:20
void pq_sendbytes(StringInfo buf, const char *data, int datalen)
Definition: pqformat.c:115
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
Datum uuid_ne(PG_FUNCTION_ARGS)
Definition: uuid.c:215
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
Datum uuid_cmp(PG_FUNCTION_ARGS)
Definition: uuid.c:225
#define elog
Definition: elog.h:219
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:183
Datum uuid_lt(PG_FUNCTION_ARGS)
Definition: uuid.c:170
Datum uuid_out(PG_FUNCTION_ARGS)
Definition: uuid.c:53
static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
Definition: uuid.c:273
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:173
Datum uuid_eq(PG_FUNCTION_ARGS)
Definition: uuid.c:188