PostgreSQL Source Code  git master
hashfunc.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * hashfunc.c
4  * Support functions for hash access method.
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/access/hash/hashfunc.c
12  *
13  * NOTES
14  * These functions are stored in pg_amproc. For each operator class
15  * defined for hash indexes, they compute the hash value of the argument.
16  *
17  * Additional hash functions appear in /utils/adt/ files for various
18  * specialized datatypes.
19  *
20  * It is expected that every bit of a hash function's 32-bit result is
21  * as random as every other; failure to ensure this is likely to lead
22  * to poor performance of hash joins, for example. In most cases a hash
23  * function should use hash_any() or its variant hash_uint32().
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #include "common/hashfn.h"
30 #include "utils/float.h"
31 #include "utils/fmgrprotos.h"
32 #include "utils/pg_locale.h"
33 #include "varatt.h"
34 
35 /*
36  * Datatype-specific hash functions.
37  *
38  * These support both hash indexes and hash joins.
39  *
40  * NOTE: some of these are also used by catcache operations, without
41  * any direct connection to hash indexes. Also, the common hash_any
42  * routine is also used by dynahash tables.
43  */
44 
45 /* Note: this is used for both "char" and boolean datatypes */
46 Datum
48 {
49  return hash_uint32((int32) PG_GETARG_CHAR(0));
50 }
51 
52 Datum
54 {
56 }
57 
58 Datum
60 {
61  return hash_uint32((int32) PG_GETARG_INT16(0));
62 }
63 
64 Datum
66 {
68 }
69 
70 Datum
72 {
73  return hash_uint32(PG_GETARG_INT32(0));
74 }
75 
76 Datum
78 {
80 }
81 
82 Datum
84 {
85  /*
86  * The idea here is to produce a hash value compatible with the values
87  * produced by hashint4 and hashint2 for logically equal inputs; this is
88  * necessary to support cross-type hash joins across these input types.
89  * Since all three types are signed, we can xor the high half of the int8
90  * value if the sign is positive, or the complement of the high half when
91  * the sign is negative.
92  */
93  int64 val = PG_GETARG_INT64(0);
94  uint32 lohalf = (uint32) val;
95  uint32 hihalf = (uint32) (val >> 32);
96 
97  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98 
99  return hash_uint32(lohalf);
100 }
101 
102 Datum
104 {
105  /* Same approach as hashint8 */
106  int64 val = PG_GETARG_INT64(0);
107  uint32 lohalf = (uint32) val;
108  uint32 hihalf = (uint32) (val >> 32);
109 
110  lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111 
112  return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113 }
114 
115 Datum
117 {
118  return hash_uint32((uint32) PG_GETARG_OID(0));
119 }
120 
121 Datum
123 {
125 }
126 
127 Datum
129 {
130  return hash_uint32((uint32) PG_GETARG_OID(0));
131 }
132 
133 Datum
135 {
137 }
138 
139 Datum
141 {
143  float8 key8;
144 
145  /*
146  * On IEEE-float machines, minus zero and zero have different bit patterns
147  * but should compare as equal. We must ensure that they have the same
148  * hash value, which is most reliably done this way:
149  */
150  if (key == (float4) 0)
151  PG_RETURN_UINT32(0);
152 
153  /*
154  * To support cross-type hashing of float8 and float4, we want to return
155  * the same hash value hashfloat8 would produce for an equal float8 value.
156  * So, widen the value to float8 and hash that. (We must do this rather
157  * than have hashfloat8 try to narrow its value to float4; that could fail
158  * on overflow.)
159  */
160  key8 = key;
161 
162  /*
163  * Similarly, NaNs can have different bit patterns but they should all
164  * compare as equal. For backwards-compatibility reasons we force them to
165  * have the hash value of a standard float8 NaN. (You'd think we could
166  * replace key with a float4 NaN and then widen it; but on some old
167  * platforms, that way produces a different bit pattern.)
168  */
169  if (isnan(key8))
170  key8 = get_float8_nan();
171 
172  return hash_any((unsigned char *) &key8, sizeof(key8));
173 }
174 
175 Datum
177 {
179  uint64 seed = PG_GETARG_INT64(1);
180  float8 key8;
181 
182  /* Same approach as hashfloat4 */
183  if (key == (float4) 0)
184  PG_RETURN_UINT64(seed);
185  key8 = key;
186  if (isnan(key8))
187  key8 = get_float8_nan();
188 
189  return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
190 }
191 
192 Datum
194 {
196 
197  /*
198  * On IEEE-float machines, minus zero and zero have different bit patterns
199  * but should compare as equal. We must ensure that they have the same
200  * hash value, which is most reliably done this way:
201  */
202  if (key == (float8) 0)
203  PG_RETURN_UINT32(0);
204 
205  /*
206  * Similarly, NaNs can have different bit patterns but they should all
207  * compare as equal. For backwards-compatibility reasons we force them to
208  * have the hash value of a standard NaN.
209  */
210  if (isnan(key))
211  key = get_float8_nan();
212 
213  return hash_any((unsigned char *) &key, sizeof(key));
214 }
215 
216 Datum
218 {
220  uint64 seed = PG_GETARG_INT64(1);
221 
222  /* Same approach as hashfloat8 */
223  if (key == (float8) 0)
224  PG_RETURN_UINT64(seed);
225  if (isnan(key))
226  key = get_float8_nan();
227 
228  return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
229 }
230 
231 Datum
233 {
235 
236  return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
237 }
238 
239 Datum
241 {
243 
244  return hash_any_extended((unsigned char *) key->values,
245  key->dim1 * sizeof(Oid),
246  PG_GETARG_INT64(1));
247 }
248 
249 Datum
251 {
252  char *key = NameStr(*PG_GETARG_NAME(0));
253 
254  return hash_any((unsigned char *) key, strlen(key));
255 }
256 
257 Datum
259 {
260  char *key = NameStr(*PG_GETARG_NAME(0));
261 
262  return hash_any_extended((unsigned char *) key, strlen(key),
263  PG_GETARG_INT64(1));
264 }
265 
266 Datum
268 {
269  text *key = PG_GETARG_TEXT_PP(0);
271  pg_locale_t mylocale;
272  Datum result;
273 
274  if (!collid)
275  ereport(ERROR,
276  (errcode(ERRCODE_INDETERMINATE_COLLATION),
277  errmsg("could not determine which collation to use for string hashing"),
278  errhint("Use the COLLATE clause to set the collation explicitly.")));
279 
281 
282  if (mylocale->deterministic)
283  {
284  result = hash_any((unsigned char *) VARDATA_ANY(key),
286  }
287  else
288  {
289  Size bsize,
290  rsize;
291  char *buf;
292  const char *keydata = VARDATA_ANY(key);
293  size_t keylen = VARSIZE_ANY_EXHDR(key);
294 
295 
296  bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
297  buf = palloc(bsize + 1);
298 
299  rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
300 
301  /* the second call may return a smaller value than the first */
302  if (rsize > bsize)
303  elog(ERROR, "pg_strnxfrm() returned unexpected result");
304 
305  /*
306  * In principle, there's no reason to include the terminating NUL
307  * character in the hash, but it was done before and the behavior must
308  * be preserved.
309  */
310  result = hash_any((uint8_t *) buf, bsize + 1);
311 
312  pfree(buf);
313  }
314 
315  /* Avoid leaking memory for toasted inputs */
316  PG_FREE_IF_COPY(key, 0);
317 
318  return result;
319 }
320 
321 Datum
323 {
324  text *key = PG_GETARG_TEXT_PP(0);
326  pg_locale_t mylocale;
327  Datum result;
328 
329  if (!collid)
330  ereport(ERROR,
331  (errcode(ERRCODE_INDETERMINATE_COLLATION),
332  errmsg("could not determine which collation to use for string hashing"),
333  errhint("Use the COLLATE clause to set the collation explicitly.")));
334 
336 
337  if (mylocale->deterministic)
338  {
339  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
341  PG_GETARG_INT64(1));
342  }
343  else
344  {
345  Size bsize,
346  rsize;
347  char *buf;
348  const char *keydata = VARDATA_ANY(key);
349  size_t keylen = VARSIZE_ANY_EXHDR(key);
350 
351  bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352  buf = palloc(bsize + 1);
353 
354  rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355 
356  /* the second call may return a smaller value than the first */
357  if (rsize > bsize)
358  elog(ERROR, "pg_strnxfrm() returned unexpected result");
359 
360  /*
361  * In principle, there's no reason to include the terminating NUL
362  * character in the hash, but it was done before and the behavior must
363  * be preserved.
364  */
365  result = hash_any_extended((uint8_t *) buf, bsize + 1,
366  PG_GETARG_INT64(1));
367 
368  pfree(buf);
369  }
370 
371  PG_FREE_IF_COPY(key, 0);
372 
373  return result;
374 }
375 
376 /*
377  * hashvarlena() can be used for any varlena datatype in which there are
378  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
379  *
380  * (However, you need to define an SQL-level wrapper function around it with
381  * the concrete input data type; otherwise hashvalidate() won't accept it.
382  * Moreover, at least for built-in types, a C-level wrapper function is also
383  * recommended; otherwise, the opr_sanity test will get upset.)
384  */
385 Datum
387 {
388  struct varlena *key = PG_GETARG_VARLENA_PP(0);
389  Datum result;
390 
391  result = hash_any((unsigned char *) VARDATA_ANY(key),
393 
394  /* Avoid leaking memory for toasted inputs */
395  PG_FREE_IF_COPY(key, 0);
396 
397  return result;
398 }
399 
400 Datum
402 {
403  struct varlena *key = PG_GETARG_VARLENA_PP(0);
404  Datum result;
405 
406  result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
408  PG_GETARG_INT64(1));
409 
410  PG_FREE_IF_COPY(key, 0);
411 
412  return result;
413 }
414 
415 Datum
417 {
418  return hashvarlena(fcinfo);
419 }
420 
421 Datum
423 {
424  return hashvarlenaextended(fcinfo);
425 }
#define NameStr(name)
Definition: c.h:737
unsigned int uint32
Definition: c.h:506
signed int int32
Definition: c.h:496
double float8
Definition: c.h:621
float float4
Definition: c.h:620
size_t Size
Definition: c.h:596
Oid collid
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
static float8 get_float8_nan(void)
Definition: float.h:123
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_UINT32(x)
Definition: fmgr.h:355
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:282
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_UINT64(x)
Definition: fmgr.h:369
#define PG_GETARG_VARLENA_PP(n)
Definition: fmgr.h:289
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
static Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.h:49
Datum hashenum(PG_FUNCTION_ARGS)
Definition: hashfunc.c:128
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:401
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:217
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:134
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:322
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition: hashfunc.c:232
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:103
Datum hashint2(PG_FUNCTION_ARGS)
Definition: hashfunc.c:59
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:65
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:140
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:193
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:122
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:258
Datum hashbytea(PG_FUNCTION_ARGS)
Definition: hashfunc.c:416
Datum hashint8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:83
Datum hashname(PG_FUNCTION_ARGS)
Definition: hashfunc.c:250
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:77
Datum hashtext(PG_FUNCTION_ARGS)
Definition: hashfunc.c:267
Datum hashchar(PG_FUNCTION_ARGS)
Definition: hashfunc.c:47
Datum hashbyteaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:422
Datum hashoid(PG_FUNCTION_ARGS)
Definition: hashfunc.c:116
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:53
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:176
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition: hashfunc.c:386
Datum hashint4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:71
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:240
long val
Definition: informix.c:689
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1751
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1358
static char * buf
Definition: pg_test_fsync.c:73
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
Definition: c.h:717
bool deterministic
Definition: pg_locale.h:82
Definition: c.h:678
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317