PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
hashfunc.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hashfunc.c
4 * Support functions for hash access method.
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
12 *
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
16 *
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
19 *
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
25 */
26
27#include "postgres.h"
28
29#include "common/hashfn.h"
30#include "utils/float.h"
31#include "utils/fmgrprotos.h"
32#include "utils/pg_locale.h"
33#include "varatt.h"
34
35/*
36 * Datatype-specific hash functions.
37 *
38 * These support both hash indexes and hash joins.
39 *
40 * NOTE: some of these are also used by catcache operations, without
41 * any direct connection to hash indexes. Also, the common hash_any
42 * routine is also used by dynahash tables.
43 */
44
45/* Note: this is used for both "char" and boolean datatypes */
48{
49 return hash_uint32((int32) PG_GETARG_CHAR(0));
50}
51
54{
56}
57
60{
62}
63
66{
68}
69
72{
74}
75
78{
80}
81
84{
85 /*
86 * The idea here is to produce a hash value compatible with the values
87 * produced by hashint4 and hashint2 for logically equal inputs; this is
88 * necessary to support cross-type hash joins across these input types.
89 * Since all three types are signed, we can xor the high half of the int8
90 * value if the sign is positive, or the complement of the high half when
91 * the sign is negative.
92 */
94 uint32 lohalf = (uint32) val;
95 uint32 hihalf = (uint32) (val >> 32);
96
97 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
98
99 return hash_uint32(lohalf);
100}
101
102Datum
104{
105 /* Same approach as hashint8 */
107 uint32 lohalf = (uint32) val;
108 uint32 hihalf = (uint32) (val >> 32);
109
110 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
111
112 return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
113}
114
115Datum
117{
118 return hash_uint32((uint32) PG_GETARG_OID(0));
119}
120
121Datum
123{
125}
126
127Datum
129{
130 return hash_uint32((uint32) PG_GETARG_OID(0));
131}
132
133Datum
135{
137}
138
139Datum
141{
143 float8 key8;
144
145 /*
146 * On IEEE-float machines, minus zero and zero have different bit patterns
147 * but should compare as equal. We must ensure that they have the same
148 * hash value, which is most reliably done this way:
149 */
150 if (key == (float4) 0)
152
153 /*
154 * To support cross-type hashing of float8 and float4, we want to return
155 * the same hash value hashfloat8 would produce for an equal float8 value.
156 * So, widen the value to float8 and hash that. (We must do this rather
157 * than have hashfloat8 try to narrow its value to float4; that could fail
158 * on overflow.)
159 */
160 key8 = key;
161
162 /*
163 * Similarly, NaNs can have different bit patterns but they should all
164 * compare as equal. For backwards-compatibility reasons we force them to
165 * have the hash value of a standard float8 NaN. (You'd think we could
166 * replace key with a float4 NaN and then widen it; but on some old
167 * platforms, that way produces a different bit pattern.)
168 */
169 if (isnan(key8))
170 key8 = get_float8_nan();
171
172 return hash_any((unsigned char *) &key8, sizeof(key8));
173}
174
175Datum
177{
179 uint64 seed = PG_GETARG_INT64(1);
180 float8 key8;
181
182 /* Same approach as hashfloat4 */
183 if (key == (float4) 0)
184 PG_RETURN_UINT64(seed);
185 key8 = key;
186 if (isnan(key8))
187 key8 = get_float8_nan();
188
189 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
190}
191
192Datum
194{
196
197 /*
198 * On IEEE-float machines, minus zero and zero have different bit patterns
199 * but should compare as equal. We must ensure that they have the same
200 * hash value, which is most reliably done this way:
201 */
202 if (key == (float8) 0)
204
205 /*
206 * Similarly, NaNs can have different bit patterns but they should all
207 * compare as equal. For backwards-compatibility reasons we force them to
208 * have the hash value of a standard NaN.
209 */
210 if (isnan(key))
212
213 return hash_any((unsigned char *) &key, sizeof(key));
214}
215
216Datum
218{
220 uint64 seed = PG_GETARG_INT64(1);
221
222 /* Same approach as hashfloat8 */
223 if (key == (float8) 0)
224 PG_RETURN_UINT64(seed);
225 if (isnan(key))
227
228 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
229}
230
231Datum
233{
235
236 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
237}
238
239Datum
241{
243
244 return hash_any_extended((unsigned char *) key->values,
245 key->dim1 * sizeof(Oid),
246 PG_GETARG_INT64(1));
247}
248
249Datum
251{
252 char *key = NameStr(*PG_GETARG_NAME(0));
253
254 return hash_any((unsigned char *) key, strlen(key));
255}
256
257Datum
259{
260 char *key = NameStr(*PG_GETARG_NAME(0));
261
262 return hash_any_extended((unsigned char *) key, strlen(key),
263 PG_GETARG_INT64(1));
264}
265
266Datum
268{
271 pg_locale_t mylocale;
272 Datum result;
273
274 if (!collid)
276 (errcode(ERRCODE_INDETERMINATE_COLLATION),
277 errmsg("could not determine which collation to use for string hashing"),
278 errhint("Use the COLLATE clause to set the collation explicitly.")));
279
281
282 if (mylocale->deterministic)
283 {
284 result = hash_any((unsigned char *) VARDATA_ANY(key),
286 }
287 else
288 {
289 Size bsize,
290 rsize;
291 char *buf;
292 const char *keydata = VARDATA_ANY(key);
293 size_t keylen = VARSIZE_ANY_EXHDR(key);
294
295
296 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
297 buf = palloc(bsize + 1);
298
299 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
300
301 /* the second call may return a smaller value than the first */
302 if (rsize > bsize)
303 elog(ERROR, "pg_strnxfrm() returned unexpected result");
304
305 /*
306 * In principle, there's no reason to include the terminating NUL
307 * character in the hash, but it was done before and the behavior must
308 * be preserved.
309 */
310 result = hash_any((uint8_t *) buf, bsize + 1);
311
312 pfree(buf);
313 }
314
315 /* Avoid leaking memory for toasted inputs */
317
318 return result;
319}
320
321Datum
323{
326 pg_locale_t mylocale;
327 Datum result;
328
329 if (!collid)
331 (errcode(ERRCODE_INDETERMINATE_COLLATION),
332 errmsg("could not determine which collation to use for string hashing"),
333 errhint("Use the COLLATE clause to set the collation explicitly.")));
334
336
337 if (mylocale->deterministic)
338 {
339 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
341 PG_GETARG_INT64(1));
342 }
343 else
344 {
345 Size bsize,
346 rsize;
347 char *buf;
348 const char *keydata = VARDATA_ANY(key);
349 size_t keylen = VARSIZE_ANY_EXHDR(key);
350
351 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
352 buf = palloc(bsize + 1);
353
354 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
355
356 /* the second call may return a smaller value than the first */
357 if (rsize > bsize)
358 elog(ERROR, "pg_strnxfrm() returned unexpected result");
359
360 /*
361 * In principle, there's no reason to include the terminating NUL
362 * character in the hash, but it was done before and the behavior must
363 * be preserved.
364 */
365 result = hash_any_extended((uint8_t *) buf, bsize + 1,
366 PG_GETARG_INT64(1));
367
368 pfree(buf);
369 }
370
372
373 return result;
374}
375
376/*
377 * hashvarlena() can be used for any varlena datatype in which there are
378 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
379 *
380 * (However, you need to define an SQL-level wrapper function around it with
381 * the concrete input data type; otherwise hashvalidate() won't accept it.
382 * Moreover, at least for built-in types, a C-level wrapper function is also
383 * recommended; otherwise, the opr_sanity test will get upset.)
384 */
385Datum
387{
388 struct varlena *key = PG_GETARG_VARLENA_PP(0);
389 Datum result;
390
391 result = hash_any((unsigned char *) VARDATA_ANY(key),
393
394 /* Avoid leaking memory for toasted inputs */
396
397 return result;
398}
399
400Datum
402{
403 struct varlena *key = PG_GETARG_VARLENA_PP(0);
404 Datum result;
405
406 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
408 PG_GETARG_INT64(1));
409
411
412 return result;
413}
414
415Datum
417{
418 return hashvarlena(fcinfo);
419}
420
421Datum
423{
424 return hashvarlenaextended(fcinfo);
425}
#define NameStr(name)
Definition: c.h:700
int64_t int64
Definition: c.h:482
double float8
Definition: c.h:584
int32_t int32
Definition: c.h:481
uint64_t uint64
Definition: c.h:486
uint32_t uint32
Definition: c.h:485
float float4
Definition: c.h:583
size_t Size
Definition: c.h:559
Oid collid
int errhint(const char *fmt,...)
Definition: elog.c:1317
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
static float8 get_float8_nan(void)
Definition: float.h:123
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_RETURN_UINT32(x)
Definition: fmgr.h:355
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:282
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_NAME(n)
Definition: fmgr.h:278
#define PG_RETURN_UINT64(x)
Definition: fmgr.h:369
#define PG_GETARG_VARLENA_PP(n)
Definition: fmgr.h:289
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
static Datum hash_uint32(uint32 k)
Definition: hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition: hashfn.h:31
static Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.h:49
Datum hashenum(PG_FUNCTION_ARGS)
Definition: hashfunc.c:128
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:401
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:217
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:134
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:322
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition: hashfunc.c:232
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:103
Datum hashint2(PG_FUNCTION_ARGS)
Definition: hashfunc.c:59
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:65
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:140
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:193
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:122
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:258
Datum hashbytea(PG_FUNCTION_ARGS)
Definition: hashfunc.c:416
Datum hashint8(PG_FUNCTION_ARGS)
Definition: hashfunc.c:83
Datum hashname(PG_FUNCTION_ARGS)
Definition: hashfunc.c:250
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:77
Datum hashtext(PG_FUNCTION_ARGS)
Definition: hashfunc.c:267
Datum hashchar(PG_FUNCTION_ARGS)
Definition: hashfunc.c:47
Datum hashbyteaextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:422
Datum hashoid(PG_FUNCTION_ARGS)
Definition: hashfunc.c:116
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:53
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:176
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition: hashfunc.c:386
Datum hashint4(PG_FUNCTION_ARGS)
Definition: hashfunc.c:71
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition: hashfunc.c:240
long val
Definition: informix.c:689
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition: pg_locale.c:1680
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition: pg_locale.c:1341
static char * buf
Definition: pg_test_fsync.c:72
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
Definition: c.h:680
bool deterministic
Definition: pg_locale.h:69
Definition: c.h:641
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317