PostgreSQL Source Code  git master
hashfunc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashfunc.c
4  * Support functions for hash access method.
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashfunc.c
12  *
13  * NOTES
14  * These functions are stored in pg_amproc. For each operator class
15  * defined for hash indexes, they compute the hash value of the argument.
16  *
17  * Additional hash functions appear in /utils/adt/ files for various
18  * specialized datatypes.
19  *
20  * It is expected that every bit of a hash function's 32-bit result is
21  * as random as every other; failure to ensure this is likely to lead
22  * to poor performance of hash joins, for example. In most cases a hash
23  * function should use hash_any() or its variant hash_uint32().
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "common/hashfn.h"
32 #include "utils/builtins.h"
33 #include "utils/float.h"
34 #include "utils/pg_locale.h"
35 #include "varatt.h"
36 
37 /*
38  * Datatype-specific hash functions.
39  *
40  * These support both hash indexes and hash joins.
41  *
42  * NOTE: some of these are also used by catcache operations, without
43  * any direct connection to hash indexes. Also, the common hash_any
44  * routine is also used by dynahash tables.
45  */
46 
47 /* Note: this is used for both "char" and boolean datatypes */
48 Datum
50 {
51  return hash_uint32((int32) PG_GETARG_CHAR(0));
52 }
53 
54 Datum
56 {
58 }
59 
60 Datum
62 {
63  return hash_uint32((int32) PG_GETARG_INT16(0));
64 }
65 
66 Datum
68 {
70 }
71 
72 Datum
74 {
75  return hash_uint32(PG_GETARG_INT32(0));
76 }
77 
78 Datum
80 {
82 }
83 
84 Datum
86 {
87  /*
88  * The idea here is to produce a hash value compatible with the values
89  * produced by hashint4 and hashint2 for logically equal inputs; this is
90  * necessary to support cross-type hash joins across these input types.
91  * Since all three types are signed, we can xor the high half of the int8
92  * value if the sign is positive, or the complement of the high half when
93  * the sign is negative.
94  */
95  int64 val = PG_GETARG_INT64(0);
96  uint32 lohalf = (uint32) val;
97  uint32 hihalf = (uint32) (val >> 32);
98 
99  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
100 
101  return hash_uint32(lohalf);
102 }
103 
104 Datum
106 {
107  /* Same approach as hashint8 */
108  int64 val = PG_GETARG_INT64(0);
109  uint32 lohalf = (uint32) val;
110  uint32 hihalf = (uint32) (val >> 32);
111 
112  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
113 
114  return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
115 }
116 
117 Datum
119 {
120  return hash_uint32((uint32) PG_GETARG_OID(0));
121 }
122 
123 Datum
125 {
127 }
128 
129 Datum
131 {
132  return hash_uint32((uint32) PG_GETARG_OID(0));
133 }
134 
135 Datum
137 {
139 }
140 
141 Datum
143 {
145  float8 key8;
146 
147  /*
148  * On IEEE-float machines, minus zero and zero have different bit patterns
149  * but should compare as equal. We must ensure that they have the same
150  * hash value, which is most reliably done this way:
151  */
152  if (key == (float4) 0)
153  PG_RETURN_UINT32(0);
154 
155  /*
156  * To support cross-type hashing of float8 and float4, we want to return
157  * the same hash value hashfloat8 would produce for an equal float8 value.
158  * So, widen the value to float8 and hash that. (We must do this rather
159  * than have hashfloat8 try to narrow its value to float4; that could fail
160  * on overflow.)
161  */
162  key8 = key;
163 
164  /*
165  * Similarly, NaNs can have different bit patterns but they should all
166  * compare as equal. For backwards-compatibility reasons we force them to
167  * have the hash value of a standard float8 NaN. (You'd think we could
168  * replace key with a float4 NaN and then widen it; but on some old
169  * platforms, that way produces a different bit pattern.)
170  */
171  if (isnan(key8))
172  key8 = get_float8_nan();
173 
174  return hash_any((unsigned char *) &key8, sizeof(key8));
175 }
176 
177 Datum
179 {
181  uint64 seed = PG_GETARG_INT64(1);
182  float8 key8;
183 
184  /* Same approach as hashfloat4 */
185  if (key == (float4) 0)
186  PG_RETURN_UINT64(seed);
187  key8 = key;
188  if (isnan(key8))
189  key8 = get_float8_nan();
190 
191  return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
192 }
193 
194 Datum
196 {
198 
199  /*
200  * On IEEE-float machines, minus zero and zero have different bit patterns
201  * but should compare as equal. We must ensure that they have the same
202  * hash value, which is most reliably done this way:
203  */
204  if (key == (float8) 0)
205  PG_RETURN_UINT32(0);
206 
207  /*
208  * Similarly, NaNs can have different bit patterns but they should all
209  * compare as equal. For backwards-compatibility reasons we force them to
210  * have the hash value of a standard NaN.
211  */
212  if (isnan(key))
213  key = get_float8_nan();
214 
215  return hash_any((unsigned char *) &key, sizeof(key));
216 }
217 
218 Datum
220 {
222  uint64 seed = PG_GETARG_INT64(1);
223 
224  /* Same approach as hashfloat8 */
225  if (key == (float8) 0)
226  PG_RETURN_UINT64(seed);
227  if (isnan(key))
228  key = get_float8_nan();
229 
230  return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
231 }
232 
233 Datum
235 {
237 
238  return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
239 }
240 
241 Datum
243 {
245 
246  return hash_any_extended((unsigned char *) key->values,
247  key->dim1 * sizeof(Oid),
248  PG_GETARG_INT64(1));
249 }
250 
251 Datum
253 {
254  char *key = NameStr(*PG_GETARG_NAME(0));
255 
256  return hash_any((unsigned char *) key, strlen(key));
257 }
258 
259 Datum
261 {
262  char *key = NameStr(*PG_GETARG_NAME(0));
263 
264  return hash_any_extended((unsigned char *) key, strlen(key),
265  PG_GETARG_INT64(1));
266 }
267 
268 Datum
270 {
271  text *key = PG_GETARG_TEXT_PP(0);
273  pg_locale_t mylocale = 0;
274  Datum result;
275 
276  if (!collid)
277  ereport(ERROR,
278  (errcode(ERRCODE_INDETERMINATE_COLLATION),
279  errmsg("could not determine which collation to use for string hashing"),
280  errhint("Use the COLLATE clause to set the collation explicitly.")));
281 
282  if (!lc_collate_is_c(collid))
284 
285  if (pg_locale_deterministic(mylocale))
286  {
287  result = hash_any((unsigned char *) VARDATA_ANY(key),
289  }
290  else
291  {
292  Size bsize, rsize;
293  char *buf;
294  const char *keydata = VARDATA_ANY(key);
295  size_t keylen = VARSIZE_ANY_EXHDR(key);
296 
297 
298  bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
299  buf = palloc(bsize + 1);
300 
301  rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
302  if (rsize != bsize)
303  elog(ERROR, "pg_strnxfrm() returned unexpected result");
304 
305  /*
306  * In principle, there's no reason to include the terminating NUL
307  * character in the hash, but it was done before and the behavior
308  * must be preserved.
309  */
310  result = hash_any((uint8_t *) buf, bsize + 1);
311 
312  pfree(buf);
313  }
314 
315  /* Avoid leaking memory for toasted inputs */
316  PG_FREE_IF_COPY(key, 0);
317 
318  return result;
319 }
320 
321 Datum
323 {
324  text *key = PG_GETARG_TEXT_PP(0);
326  pg_locale_t mylocale = 0;
327  Datum result;
328 
329  if (!collid)
330  ereport(ERROR,
331  (errcode(ERRCODE_INDETERMINATE_COLLATION),
332  errmsg("could not determine which collation to use for string hashing"),
333  errhint("Use the COLLATE clause to set the collation explicitly.")));
334 
335  if (!lc_collate_is_c(collid))
337 
338  if (pg_locale_deterministic(mylocale))
339  {
340  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
342  PG_GETARG_INT64(1));
343  }
344  else
345  {
346  Size bsize, rsize;
347  char *buf;
348  const char *keydata = VARDATA_ANY(key);
349  size_t keylen = VARSIZE_ANY_EXHDR(key);
350 
351  bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352  buf = palloc(bsize + 1);
353 
354  rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355  if (rsize != bsize)
356  elog(ERROR, "pg_strnxfrm() returned unexpected result");
357 
358  /*
359  * In principle, there's no reason to include the terminating NUL
360  * character in the hash, but it was done before and the behavior
361  * must be preserved.
362  */
363  result = hash_any_extended((uint8_t *) buf, bsize + 1,
364  PG_GETARG_INT64(1));
365 
366  pfree(buf);
367  }
368 
369  PG_FREE_IF_COPY(key, 0);
370 
371  return result;
372 }
373 
374 /*
375  * hashvarlena() can be used for any varlena datatype in which there are
376  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
377  */
378 Datum
380 {
381  struct varlena *key = PG_GETARG_VARLENA_PP(0);
382  Datum result;
383 
384  result = hash_any((unsigned char *) VARDATA_ANY(key),
386 
387  /* Avoid leaking memory for toasted inputs */
388  PG_FREE_IF_COPY(key, 0);
389 
390  return result;
391 }
392 
393 Datum
395 {
396  struct varlena *key = PG_GETARG_VARLENA_PP(0);
397  Datum result;
398 
399  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
401  PG_GETARG_INT64(1));
402 
403  PG_FREE_IF_COPY(key, 0);
404 
405  return result;
406 }
#define NameStr(name)
Definition: c.h:730
unsigned int uint32
Definition: c.h:490
signed int int32
Definition: c.h:478
double float8
Definition: c.h:614
float float4
Definition: c.h:613
size_t Size
Definition: c.h:589
Oid collid
int errhint(const char *fmt,...)
Definition: elog.c:1316
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
static float8 get_float8_nan(void)
Definition: float.h:123
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_UINT32(x)
Definition: fmgr.h:355
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:282
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_UINT64(x)
Definition: fmgr.h:369
#define PG_GETARG_VARLENA_PP(n)
Definition: fmgr.h:289
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
static Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.h:49
Datum hashenum(PG_FUNCTION_ARGS)
Definition: hashfunc.c:130
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:394
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:219
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:136
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:322
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition: hashfunc.c:234
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:105
Datum hashint2(PG_FUNCTION_ARGS)
Definition: hashfunc.c:61
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:67
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:142
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:195
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:124
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:260
Datum hashint8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:85
Datum hashname(PG_FUNCTION_ARGS)
Definition: hashfunc.c:252
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:79
Datum hashtext(PG_FUNCTION_ARGS)
Definition: hashfunc.c:269
Datum hashchar(PG_FUNCTION_ARGS)
Definition: hashfunc.c:49
Datum hashoid(PG_FUNCTION_ARGS)
Definition: hashfunc.c:118
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:55
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:178
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition: hashfunc.c:379
Datum hashint4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:73
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:242
long val
Definition: informix.c:664
void pfree(void *pointer)
Definition: mcxt.c:1436
void * palloc(Size size)
Definition: mcxt.c:1210
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale)
Definition: pg_locale.c:2401
bool lc_collate_is_c(Oid collation)
Definition: pg_locale.c:1322
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1543
bool pg_locale_deterministic(pg_locale_t locale)
Definition: pg_locale.c:1519
static char * buf
Definition: pg_test_fsync.c:67
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
Definition: c.h:710
Definition: c.h:671
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317