PostgreSQL Source Code  git master
mac.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * mac.c
4  * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
5  *
6  * Portions Copyright (c) 1998-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/backend/utils/adt/mac.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "common/hashfn.h"
17 #include "lib/hyperloglog.h"
18 #include "libpq/pqformat.h"
19 #include "port/pg_bswap.h"
20 #include "utils/fmgrprotos.h"
21 #include "utils/guc.h"
22 #include "utils/inet.h"
23 #include "utils/sortsupport.h"
24 
25 
26 /*
27  * Utility macros used for sorting and comparing:
28  */
29 
30 #define hibits(addr) \
31  ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
32 
33 #define lobits(addr) \
34  ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
35 
36 /* sortsupport for macaddr */
37 typedef struct
38 {
39  int64 input_count; /* number of non-null values seen */
40  bool estimating; /* true if estimating cardinality */
41 
42  hyperLogLogState abbr_card; /* cardinality estimator */
44 
46 static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
47 static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
48 static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
49 
50 /*
51  * MAC address reader. Accepts several common notations.
52  */
53 
54 Datum
56 {
57  char *str = PG_GETARG_CSTRING(0);
58  Node *escontext = fcinfo->context;
59  macaddr *result;
60  int a,
61  b,
62  c,
63  d,
64  e,
65  f;
66  char junk[2];
67  int count;
68 
69  /* %1s matches iff there is trailing non-whitespace garbage */
70 
71  count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
72  &a, &b, &c, &d, &e, &f, junk);
73  if (count != 6)
74  count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
75  &a, &b, &c, &d, &e, &f, junk);
76  if (count != 6)
77  count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
78  &a, &b, &c, &d, &e, &f, junk);
79  if (count != 6)
80  count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
81  &a, &b, &c, &d, &e, &f, junk);
82  if (count != 6)
83  count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
84  &a, &b, &c, &d, &e, &f, junk);
85  if (count != 6)
86  count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
87  &a, &b, &c, &d, &e, &f, junk);
88  if (count != 6)
89  count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
90  &a, &b, &c, &d, &e, &f, junk);
91  if (count != 6)
92  ereturn(escontext, (Datum) 0,
93  (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
94  errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
95  str)));
96 
97  if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
98  (c < 0) || (c > 255) || (d < 0) || (d > 255) ||
99  (e < 0) || (e > 255) || (f < 0) || (f > 255))
100  ereturn(escontext, (Datum) 0,
101  (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
102  errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
103 
104  result = (macaddr *) palloc(sizeof(macaddr));
105 
106  result->a = a;
107  result->b = b;
108  result->c = c;
109  result->d = d;
110  result->e = e;
111  result->f = f;
112 
113  PG_RETURN_MACADDR_P(result);
114 }
115 
116 /*
117  * MAC address output function. Fixed format.
118  */
119 
120 Datum
122 {
123  macaddr *addr = PG_GETARG_MACADDR_P(0);
124  char *result;
125 
126  result = (char *) palloc(32);
127 
128  snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
129  addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
130 
131  PG_RETURN_CSTRING(result);
132 }
133 
134 /*
135  * macaddr_recv - converts external binary format to macaddr
136  *
137  * The external representation is just the six bytes, MSB first.
138  */
139 Datum
141 {
143  macaddr *addr;
144 
145  addr = (macaddr *) palloc(sizeof(macaddr));
146 
147  addr->a = pq_getmsgbyte(buf);
148  addr->b = pq_getmsgbyte(buf);
149  addr->c = pq_getmsgbyte(buf);
150  addr->d = pq_getmsgbyte(buf);
151  addr->e = pq_getmsgbyte(buf);
152  addr->f = pq_getmsgbyte(buf);
153 
154  PG_RETURN_MACADDR_P(addr);
155 }
156 
157 /*
158  * macaddr_send - converts macaddr to binary format
159  */
160 Datum
162 {
163  macaddr *addr = PG_GETARG_MACADDR_P(0);
165 
167  pq_sendbyte(&buf, addr->a);
168  pq_sendbyte(&buf, addr->b);
169  pq_sendbyte(&buf, addr->c);
170  pq_sendbyte(&buf, addr->d);
171  pq_sendbyte(&buf, addr->e);
172  pq_sendbyte(&buf, addr->f);
174 }
175 
176 
177 /*
178  * Comparison function for sorting:
179  */
180 
181 static int
183 {
184  if (hibits(a1) < hibits(a2))
185  return -1;
186  else if (hibits(a1) > hibits(a2))
187  return 1;
188  else if (lobits(a1) < lobits(a2))
189  return -1;
190  else if (lobits(a1) > lobits(a2))
191  return 1;
192  else
193  return 0;
194 }
195 
196 Datum
198 {
201 
203 }
204 
205 /*
206  * Boolean comparisons.
207  */
208 
209 Datum
211 {
214 
216 }
217 
218 Datum
220 {
223 
225 }
226 
227 Datum
229 {
232 
234 }
235 
236 Datum
238 {
241 
243 }
244 
245 Datum
247 {
250 
252 }
253 
254 Datum
256 {
259 
261 }
262 
263 /*
264  * Support function for hash indexes on macaddr.
265  */
266 Datum
268 {
270 
271  return hash_any((unsigned char *) key, sizeof(macaddr));
272 }
273 
274 Datum
276 {
278 
279  return hash_any_extended((unsigned char *) key, sizeof(macaddr),
280  PG_GETARG_INT64(1));
281 }
282 
283 /*
284  * Arithmetic functions: bitwise NOT, AND, OR.
285  */
286 Datum
288 {
289  macaddr *addr = PG_GETARG_MACADDR_P(0);
290  macaddr *result;
291 
292  result = (macaddr *) palloc(sizeof(macaddr));
293  result->a = ~addr->a;
294  result->b = ~addr->b;
295  result->c = ~addr->c;
296  result->d = ~addr->d;
297  result->e = ~addr->e;
298  result->f = ~addr->f;
299  PG_RETURN_MACADDR_P(result);
300 }
301 
302 Datum
304 {
305  macaddr *addr1 = PG_GETARG_MACADDR_P(0);
306  macaddr *addr2 = PG_GETARG_MACADDR_P(1);
307  macaddr *result;
308 
309  result = (macaddr *) palloc(sizeof(macaddr));
310  result->a = addr1->a & addr2->a;
311  result->b = addr1->b & addr2->b;
312  result->c = addr1->c & addr2->c;
313  result->d = addr1->d & addr2->d;
314  result->e = addr1->e & addr2->e;
315  result->f = addr1->f & addr2->f;
316  PG_RETURN_MACADDR_P(result);
317 }
318 
319 Datum
321 {
322  macaddr *addr1 = PG_GETARG_MACADDR_P(0);
323  macaddr *addr2 = PG_GETARG_MACADDR_P(1);
324  macaddr *result;
325 
326  result = (macaddr *) palloc(sizeof(macaddr));
327  result->a = addr1->a | addr2->a;
328  result->b = addr1->b | addr2->b;
329  result->c = addr1->c | addr2->c;
330  result->d = addr1->d | addr2->d;
331  result->e = addr1->e | addr2->e;
332  result->f = addr1->f | addr2->f;
333  PG_RETURN_MACADDR_P(result);
334 }
335 
336 /*
337  * Truncation function to allow comparing mac manufacturers.
338  * From suggestion by Alex Pilosov <alex@pilosoft.com>
339  */
340 Datum
342 {
343  macaddr *addr = PG_GETARG_MACADDR_P(0);
344  macaddr *result;
345 
346  result = (macaddr *) palloc(sizeof(macaddr));
347 
348  result->a = addr->a;
349  result->b = addr->b;
350  result->c = addr->c;
351  result->d = 0;
352  result->e = 0;
353  result->f = 0;
354 
355  PG_RETURN_MACADDR_P(result);
356 }
357 
358 /*
359  * SortSupport strategy function. Populates a SortSupport struct with the
360  * information necessary to use comparison by abbreviated keys.
361  */
362 Datum
364 {
366 
368  ssup->ssup_extra = NULL;
369 
370  if (ssup->abbreviate)
371  {
373  MemoryContext oldcontext;
374 
375  oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
376 
377  uss = palloc(sizeof(macaddr_sortsupport_state));
378  uss->input_count = 0;
379  uss->estimating = true;
380  initHyperLogLog(&uss->abbr_card, 10);
381 
382  ssup->ssup_extra = uss;
383 
388 
389  MemoryContextSwitchTo(oldcontext);
390  }
391 
392  PG_RETURN_VOID();
393 }
394 
395 /*
396  * SortSupport "traditional" comparison function. Pulls two MAC addresses from
397  * the heap and runs a standard comparison on them.
398  */
399 static int
401 {
402  macaddr *arg1 = DatumGetMacaddrP(x);
403  macaddr *arg2 = DatumGetMacaddrP(y);
404 
405  return macaddr_cmp_internal(arg1, arg2);
406 }
407 
408 /*
409  * Callback for estimating effectiveness of abbreviated key optimization.
410  *
411  * We pay no attention to the cardinality of the non-abbreviated data, because
412  * there is no equality fast-path within authoritative macaddr comparator.
413  */
414 static bool
415 macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
416 {
418  double abbr_card;
419 
420  if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
421  return false;
422 
423  abbr_card = estimateHyperLogLog(&uss->abbr_card);
424 
425  /*
426  * If we have >100k distinct values, then even if we were sorting many
427  * billion rows we'd likely still break even, and the penalty of undoing
428  * that many rows of abbrevs would probably not be worth it. At this point
429  * we stop counting because we know that we're now fully committed.
430  */
431  if (abbr_card > 100000.0)
432  {
433 #ifdef TRACE_SORT
434  if (trace_sort)
435  elog(LOG,
436  "macaddr_abbrev: estimation ends at cardinality %f"
437  " after " INT64_FORMAT " values (%d rows)",
438  abbr_card, uss->input_count, memtupcount);
439 #endif
440  uss->estimating = false;
441  return false;
442  }
443 
444  /*
445  * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
446  * fudge factor allows us to abort earlier on genuinely pathological data
447  * where we've had exactly one abbreviated value in the first 2k
448  * (non-null) rows.
449  */
450  if (abbr_card < uss->input_count / 2000.0 + 0.5)
451  {
452 #ifdef TRACE_SORT
453  if (trace_sort)
454  elog(LOG,
455  "macaddr_abbrev: aborting abbreviation at cardinality %f"
456  " below threshold %f after " INT64_FORMAT " values (%d rows)",
457  abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
458  memtupcount);
459 #endif
460  return true;
461  }
462 
463 #ifdef TRACE_SORT
464  if (trace_sort)
465  elog(LOG,
466  "macaddr_abbrev: cardinality %f after " INT64_FORMAT
467  " values (%d rows)", abbr_card, uss->input_count, memtupcount);
468 #endif
469 
470  return false;
471 }
472 
473 /*
474  * SortSupport conversion routine. Converts original macaddr representation
475  * to abbreviated key representation.
476  *
477  * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
478  * unsigned integer for purposes of comparison. On a 64-bit machine, there
479  * will be two zeroed bytes of padding. The integer is converted to native
480  * endianness to facilitate easy comparison.
481  */
482 static Datum
484 {
486  macaddr *authoritative = DatumGetMacaddrP(original);
487  Datum res;
488 
489  /*
490  * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
491  * the MAC address in. There will be two bytes of zero padding on the end
492  * of the least significant bits.
493  */
494 #if SIZEOF_DATUM == 8
495  memset(&res, 0, SIZEOF_DATUM);
496  memcpy(&res, authoritative, sizeof(macaddr));
497 #else /* SIZEOF_DATUM != 8 */
498  memcpy(&res, authoritative, SIZEOF_DATUM);
499 #endif
500  uss->input_count += 1;
501 
502  /*
503  * Cardinality estimation. The estimate uses uint32, so on a 64-bit
504  * architecture, XOR the two 32-bit halves together to produce slightly
505  * more entropy. The two zeroed bytes won't have any practical impact on
506  * this operation.
507  */
508  if (uss->estimating)
509  {
510  uint32 tmp;
511 
512 #if SIZEOF_DATUM == 8
513  tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
514 #else /* SIZEOF_DATUM != 8 */
515  tmp = (uint32) res;
516 #endif
517 
519  }
520 
521  /*
522  * Byteswap on little-endian machines.
523  *
524  * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
525  * 3-way comparator) works correctly on all platforms. Without this, the
526  * comparator would have to call memcmp() with a pair of pointers to the
527  * first byte of each abbreviated key, which is slower.
528  */
529  res = DatumBigEndianToNative(res);
530 
531  return res;
532 }
unsigned int uint32
Definition: c.h:493
#define INT64_FORMAT
Definition: c.h:535
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define LOG
Definition: elog.h:31
#define ereturn(context, dummy_value,...)
Definition: elog.h:276
#define elog(elevel,...)
Definition: elog.h:224
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:277
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
static const FormData_pg_attribute a1
Definition: heap.c:142
static const FormData_pg_attribute a2
Definition: heap.c:156
void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
Definition: hyperloglog.c:66
double estimateHyperLogLog(hyperLogLogState *cState)
Definition: hyperloglog.c:186
void addHyperLogLog(hyperLogLogState *cState, uint32 hash)
Definition: hyperloglog.c:167
int y
Definition: isn.c:72
int b
Definition: isn.c:70
int x
Definition: isn.c:71
int a
Definition: isn.c:69
Datum macaddr_lt(PG_FUNCTION_ARGS)
Definition: mac.c:210
#define hibits(addr)
Definition: mac.c:30
Datum macaddr_cmp(PG_FUNCTION_ARGS)
Definition: mac.c:197
static int macaddr_cmp_internal(macaddr *a1, macaddr *a2)
Definition: mac.c:182
Datum hashmacaddrextended(PG_FUNCTION_ARGS)
Definition: mac.c:275
Datum hashmacaddr(PG_FUNCTION_ARGS)
Definition: mac.c:267
static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup)
Definition: mac.c:483
Datum macaddr_or(PG_FUNCTION_ARGS)
Definition: mac.c:320
Datum macaddr_recv(PG_FUNCTION_ARGS)
Definition: mac.c:140
Datum macaddr_ne(PG_FUNCTION_ARGS)
Definition: mac.c:255
Datum macaddr_trunc(PG_FUNCTION_ARGS)
Definition: mac.c:341
Datum macaddr_eq(PG_FUNCTION_ARGS)
Definition: mac.c:228
Datum macaddr_in(PG_FUNCTION_ARGS)
Definition: mac.c:55
Datum macaddr_not(PG_FUNCTION_ARGS)
Definition: mac.c:287
static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
Definition: mac.c:415
Datum macaddr_and(PG_FUNCTION_ARGS)
Definition: mac.c:303
Datum macaddr_send(PG_FUNCTION_ARGS)
Definition: mac.c:161
Datum macaddr_sortsupport(PG_FUNCTION_ARGS)
Definition: mac.c:363
#define lobits(addr)
Definition: mac.c:33
Datum macaddr_ge(PG_FUNCTION_ARGS)
Definition: mac.c:237
static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
Definition: mac.c:400
Datum macaddr_le(PG_FUNCTION_ARGS)
Definition: mac.c:219
Datum macaddr_out(PG_FUNCTION_ARGS)
Definition: mac.c:121
Datum macaddr_gt(PG_FUNCTION_ARGS)
Definition: mac.c:246
void * palloc(Size size)
Definition: mcxt.c:1304
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
static char * buf
Definition: pg_test_fsync.c:73
#define snprintf
Definition: port.h:238
static uint32 DatumGetUInt32(Datum X)
Definition: postgres.h:222
uintptr_t Datum
Definition: postgres.h:64
#define SIZEOF_DATUM
Definition: postgres.h:81
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:326
int pq_getmsgbyte(StringInfo msg)
Definition: pqformat.c:399
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:346
static void pq_sendbyte(StringInfo buf, uint8 byt)
Definition: pqformat.h:160
char * c
e
Definition: preproc-init.c:82
struct SortSupportData * SortSupport
Definition: sortsupport.h:58
StringInfoData * StringInfo
Definition: stringinfo.h:54
Definition: nodes.h:129
int(* comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:106
Datum(* abbrev_converter)(Datum original, SortSupport ssup)
Definition: sortsupport.h:172
void * ssup_extra
Definition: sortsupport.h:87
MemoryContext ssup_cxt
Definition: sortsupport.h:66
int(* abbrev_full_comparator)(Datum x, Datum y, SortSupport ssup)
Definition: sortsupport.h:191
bool(* abbrev_abort)(int memtupcount, SortSupport ssup)
Definition: sortsupport.h:182
hyperLogLogState abbr_card
Definition: mac.c:42
Definition: inet.h:95
unsigned char e
Definition: inet.h:100
unsigned char b
Definition: inet.h:97
unsigned char f
Definition: inet.h:101
unsigned char c
Definition: inet.h:98
unsigned char a
Definition: inet.h:96
unsigned char d
Definition: inet.h:99
int ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup)
Definition: tuplesort.c:3171
bool trace_sort
Definition: tuplesort.c:124
#define PG_GETARG_MACADDR_P(n)
Definition: inet.h:158
#define PG_RETURN_MACADDR_P(x)
Definition: inet.h:159
static macaddr * DatumGetMacaddrP(Datum X)
Definition: inet.h:147