PostgreSQL Source Code  git master
hashfunc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashfunc.c
4  * Support functions for hash access method.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashfunc.c
12  *
13  * NOTES
14  * These functions are stored in pg_amproc. For each operator class
15  * defined for hash indexes, they compute the hash value of the argument.
16  *
17  * Additional hash functions appear in /utils/adt/ files for various
18  * specialized datatypes.
19  *
20  * It is expected that every bit of a hash function's 32-bit result is
21  * as random as every other; failure to ensure this is likely to lead
22  * to poor performance of hash joins, for example. In most cases a hash
23  * function should use hash_any() or its variant hash_uint32().
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "common/hashfn.h"
32 #include "utils/builtins.h"
33 #include "utils/float.h"
34 #include "utils/pg_locale.h"
35 
36 /*
37  * Datatype-specific hash functions.
38  *
39  * These support both hash indexes and hash joins.
40  *
41  * NOTE: some of these are also used by catcache operations, without
42  * any direct connection to hash indexes. Also, the common hash_any
43  * routine is also used by dynahash tables.
44  */
45 
46 /* Note: this is used for both "char" and boolean datatypes */
47 Datum
49 {
50  return hash_uint32((int32) PG_GETARG_CHAR(0));
51 }
52 
53 Datum
55 {
57 }
58 
59 Datum
61 {
62  return hash_uint32((int32) PG_GETARG_INT16(0));
63 }
64 
65 Datum
67 {
69 }
70 
71 Datum
73 {
74  return hash_uint32(PG_GETARG_INT32(0));
75 }
76 
77 Datum
79 {
81 }
82 
83 Datum
85 {
86  /*
87  * The idea here is to produce a hash value compatible with the values
88  * produced by hashint4 and hashint2 for logically equal inputs; this is
89  * necessary to support cross-type hash joins across these input types.
90  * Since all three types are signed, we can xor the high half of the int8
91  * value if the sign is positive, or the complement of the high half when
92  * the sign is negative.
93  */
94  int64 val = PG_GETARG_INT64(0);
95  uint32 lohalf = (uint32) val;
96  uint32 hihalf = (uint32) (val >> 32);
97 
98  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
99 
100  return hash_uint32(lohalf);
101 }
102 
103 Datum
105 {
106  /* Same approach as hashint8 */
107  int64 val = PG_GETARG_INT64(0);
108  uint32 lohalf = (uint32) val;
109  uint32 hihalf = (uint32) (val >> 32);
110 
111  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
112 
113  return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
114 }
115 
116 Datum
118 {
119  return hash_uint32((uint32) PG_GETARG_OID(0));
120 }
121 
122 Datum
124 {
126 }
127 
128 Datum
130 {
131  return hash_uint32((uint32) PG_GETARG_OID(0));
132 }
133 
134 Datum
136 {
138 }
139 
140 Datum
142 {
144  float8 key8;
145 
146  /*
147  * On IEEE-float machines, minus zero and zero have different bit patterns
148  * but should compare as equal. We must ensure that they have the same
149  * hash value, which is most reliably done this way:
150  */
151  if (key == (float4) 0)
152  PG_RETURN_UINT32(0);
153 
154  /*
155  * To support cross-type hashing of float8 and float4, we want to return
156  * the same hash value hashfloat8 would produce for an equal float8 value.
157  * So, widen the value to float8 and hash that. (We must do this rather
158  * than have hashfloat8 try to narrow its value to float4; that could fail
159  * on overflow.)
160  */
161  key8 = key;
162 
163  /*
164  * Similarly, NaNs can have different bit patterns but they should all
165  * compare as equal. For backwards-compatibility reasons we force them to
166  * have the hash value of a standard float8 NaN. (You'd think we could
167  * replace key with a float4 NaN and then widen it; but on some old
168  * platforms, that way produces a different bit pattern.)
169  */
170  if (isnan(key8))
171  key8 = get_float8_nan();
172 
173  return hash_any((unsigned char *) &key8, sizeof(key8));
174 }
175 
176 Datum
178 {
180  uint64 seed = PG_GETARG_INT64(1);
181  float8 key8;
182 
183  /* Same approach as hashfloat4 */
184  if (key == (float4) 0)
185  PG_RETURN_UINT64(seed);
186  key8 = key;
187  if (isnan(key8))
188  key8 = get_float8_nan();
189 
190  return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
191 }
192 
193 Datum
195 {
197 
198  /*
199  * On IEEE-float machines, minus zero and zero have different bit patterns
200  * but should compare as equal. We must ensure that they have the same
201  * hash value, which is most reliably done this way:
202  */
203  if (key == (float8) 0)
204  PG_RETURN_UINT32(0);
205 
206  /*
207  * Similarly, NaNs can have different bit patterns but they should all
208  * compare as equal. For backwards-compatibility reasons we force them to
209  * have the hash value of a standard NaN.
210  */
211  if (isnan(key))
212  key = get_float8_nan();
213 
214  return hash_any((unsigned char *) &key, sizeof(key));
215 }
216 
217 Datum
219 {
221  uint64 seed = PG_GETARG_INT64(1);
222 
223  /* Same approach as hashfloat8 */
224  if (key == (float8) 0)
225  PG_RETURN_UINT64(seed);
226  if (isnan(key))
227  key = get_float8_nan();
228 
229  return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
230 }
231 
232 Datum
234 {
236 
237  return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
238 }
239 
240 Datum
242 {
244 
245  return hash_any_extended((unsigned char *) key->values,
246  key->dim1 * sizeof(Oid),
247  PG_GETARG_INT64(1));
248 }
249 
250 Datum
252 {
253  char *key = NameStr(*PG_GETARG_NAME(0));
254 
255  return hash_any((unsigned char *) key, strlen(key));
256 }
257 
258 Datum
260 {
261  char *key = NameStr(*PG_GETARG_NAME(0));
262 
263  return hash_any_extended((unsigned char *) key, strlen(key),
264  PG_GETARG_INT64(1));
265 }
266 
267 Datum
269 {
270  text *key = PG_GETARG_TEXT_PP(0);
271  Oid collid = PG_GET_COLLATION();
272  pg_locale_t mylocale = 0;
273  Datum result;
274 
275  if (!collid)
276  ereport(ERROR,
277  (errcode(ERRCODE_INDETERMINATE_COLLATION),
278  errmsg("could not determine which collation to use for string hashing"),
279  errhint("Use the COLLATE clause to set the collation explicitly.")));
280 
281  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
282  mylocale = pg_newlocale_from_collation(collid);
283 
284  if (!mylocale || mylocale->deterministic)
285  {
286  result = hash_any((unsigned char *) VARDATA_ANY(key),
287  VARSIZE_ANY_EXHDR(key));
288  }
289  else
290  {
291 #ifdef USE_ICU
292  if (mylocale->provider == COLLPROVIDER_ICU)
293  {
294  int32_t ulen = -1;
295  UChar *uchar = NULL;
296  Size bsize;
297  uint8_t *buf;
298 
299  ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
300 
301  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
302  uchar, ulen, NULL, 0);
303  buf = palloc(bsize);
304  ucol_getSortKey(mylocale->info.icu.ucol,
305  uchar, ulen, buf, bsize);
306 
307  result = hash_any(buf, bsize);
308 
309  pfree(buf);
310  }
311  else
312 #endif
313  /* shouldn't happen */
314  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
315  }
316 
317  /* Avoid leaking memory for toasted inputs */
318  PG_FREE_IF_COPY(key, 0);
319 
320  return result;
321 }
322 
323 Datum
325 {
326  text *key = PG_GETARG_TEXT_PP(0);
327  Oid collid = PG_GET_COLLATION();
328  pg_locale_t mylocale = 0;
329  Datum result;
330 
331  if (!collid)
332  ereport(ERROR,
333  (errcode(ERRCODE_INDETERMINATE_COLLATION),
334  errmsg("could not determine which collation to use for string hashing"),
335  errhint("Use the COLLATE clause to set the collation explicitly.")));
336 
337  if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
338  mylocale = pg_newlocale_from_collation(collid);
339 
340  if (!mylocale || mylocale->deterministic)
341  {
342  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
343  VARSIZE_ANY_EXHDR(key),
344  PG_GETARG_INT64(1));
345  }
346  else
347  {
348 #ifdef USE_ICU
349  if (mylocale->provider == COLLPROVIDER_ICU)
350  {
351  int32_t ulen = -1;
352  UChar *uchar = NULL;
353  Size bsize;
354  uint8_t *buf;
355 
356  ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
357 
358  bsize = ucol_getSortKey(mylocale->info.icu.ucol,
359  uchar, ulen, NULL, 0);
360  buf = palloc(bsize);
361  ucol_getSortKey(mylocale->info.icu.ucol,
362  uchar, ulen, buf, bsize);
363 
364  result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
365 
366  pfree(buf);
367  }
368  else
369 #endif
370  /* shouldn't happen */
371  elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
372  }
373 
374  PG_FREE_IF_COPY(key, 0);
375 
376  return result;
377 }
378 
379 /*
380  * hashvarlena() can be used for any varlena datatype in which there are
381  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
382  */
383 Datum
385 {
386  struct varlena *key = PG_GETARG_VARLENA_PP(0);
387  Datum result;
388 
389  result = hash_any((unsigned char *) VARDATA_ANY(key),
390  VARSIZE_ANY_EXHDR(key));
391 
392  /* Avoid leaking memory for toasted inputs */
393  PG_FREE_IF_COPY(key, 0);
394 
395  return result;
396 }
397 
398 Datum
400 {
401  struct varlena *key = PG_GETARG_VARLENA_PP(0);
402  Datum result;
403 
404  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
405  VARSIZE_ANY_EXHDR(key),
406  PG_GETARG_INT64(1));
407 
408  PG_FREE_IF_COPY(key, 0);
409 
410  return result;
411 }
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:282
Definition: c.h:660
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
Datum hashoid(PG_FUNCTION_ARGS)
Definition: hashfunc.c:117
int errhint(const char *fmt,...)
Definition: elog.c:1156
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
Datum hashname(PG_FUNCTION_ARGS)
Definition: hashfunc.c:251
Datum hashint8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:84
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:66
int errcode(int sqlerrcode)
Definition: elog.c:698
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:104
unsigned int Oid
Definition: postgres_ext.h:31
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
#define PG_RETURN_UINT64(x)
Definition: fmgr.h:369
#define PG_GET_COLLATION()
Definition: fmgr.h:198
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:399
#define PG_RETURN_UINT32(x)
Definition: fmgr.h:355
signed int int32
Definition: c.h:429
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:135
Datum hashchar(PG_FUNCTION_ARGS)
Definition: hashfunc.c:48
void pfree(void *pointer)
Definition: mcxt.c:1169
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition: hashfunc.c:384
#define ERROR
Definition: elog.h:46
double float8
Definition: c.h:565
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1321
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:324
static float8 get_float8_nan(void)
Definition: float.h:122
Datum hashtext(PG_FUNCTION_ARGS)
Definition: hashfunc.c:268
#define PG_GETARG_VARLENA_PP(n)
Definition: fmgr.h:289
static char * buf
Definition: pg_test_fsync.c:68
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
Oid values[FLEXIBLE_ARRAY_MEMBER]
Definition: c.h:668
int dim1
Definition: c.h:666
unsigned int uint32
Definition: c.h:441
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:241
Datum hashint4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:72
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:141
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:194
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:78
float float4
Definition: c.h:564
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1468
uintptr_t Datum
Definition: postgres.h:411
bool deterministic
Definition: pg_locale.h:85
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
#define ereport(elevel,...)
Definition: elog.h:157
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
Datum hashenum(PG_FUNCTION_ARGS)
Definition: hashfunc.c:129
size_t Size
Definition: c.h:540
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:123
Datum hashint2(PG_FUNCTION_ARGS)
Definition: hashfunc.c:60
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
void * palloc(Size size)
Definition: mcxt.c:1062
int errmsg(const char *fmt,...)
Definition: elog.c:909
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:54
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition: hashfunc.c:233
#define elog(elevel,...)
Definition: elog.h:232
#define NameStr(name)
Definition: c.h:681
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:177
union pg_locale_struct::@142 info
Definition: c.h:621
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
static Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.h:49
long val
Definition: informix.c:664
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:259
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:218