PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashfunc.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * hashfunc.c
4 * Support functions for hash access method.
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
12 *
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
16 *
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
19 *
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
25 */
26
27#include "postgres.h"
28
29#include "common/hashfn.h"
30#include "utils/builtins.h"
31#include "utils/float.h"
32#include "utils/fmgrprotos.h"
33#include "utils/pg_locale.h"
34#include "varatt.h"
35
36/*
37 * Datatype-specific hash functions.
38 *
39 * These support both hash indexes and hash joins.
40 *
41 * NOTE: some of these are also used by catcache operations, without
42 * any direct connection to hash indexes. Also, the common hash_any
43 * routine is also used by dynahash tables.
44 */
45
46/* Note: this is used for both "char" and boolean datatypes */
52
58
64
70
76
82
85{
86 /*
87 * The idea here is to produce a hash value compatible with the values
88 * produced by hashint4 and hashint2 for logically equal inputs; this is
89 * necessary to support cross-type hash joins across these input types.
90 * Since all three types are signed, we can xor the high half of the int8
91 * value if the sign is positive, or the complement of the high half when
92 * the sign is negative.
93 */
96 uint32 hihalf = (uint32) (val >> 32);
97
98 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
99
100 return hash_uint32(lohalf);
101}
102
103Datum
105{
106 /* Same approach as hashint8 */
109 uint32 hihalf = (uint32) (val >> 32);
110
111 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
112
114}
115
116Datum
121
122Datum
127
128Datum
133
134Datum
139
140Datum
142{
143 float4 key = PG_GETARG_FLOAT4(0);
144 float8 key8;
145
146 /*
147 * On IEEE-float machines, minus zero and zero have different bit patterns
148 * but should compare as equal. We must ensure that they have the same
149 * hash value, which is most reliably done this way:
150 */
151 if (key == (float4) 0)
153
154 /*
155 * To support cross-type hashing of float8 and float4, we want to return
156 * the same hash value hashfloat8 would produce for an equal float8 value.
157 * So, widen the value to float8 and hash that. (We must do this rather
158 * than have hashfloat8 try to narrow its value to float4; that could fail
159 * on overflow.)
160 */
161 key8 = key;
162
163 /*
164 * Similarly, NaNs can have different bit patterns but they should all
165 * compare as equal. For backwards-compatibility reasons we force them to
166 * have the hash value of a standard float8 NaN. (You'd think we could
167 * replace key with a float4 NaN and then widen it; but on some old
168 * platforms, that way produces a different bit pattern.)
169 */
170 if (isnan(key8))
172
173 return hash_any((unsigned char *) &key8, sizeof(key8));
174}
175
176Datum
178{
179 float4 key = PG_GETARG_FLOAT4(0);
180 uint64 seed = PG_GETARG_INT64(1);
181 float8 key8;
182
183 /* Same approach as hashfloat4 */
184 if (key == (float4) 0)
185 PG_RETURN_UINT64(seed);
186 key8 = key;
187 if (isnan(key8))
189
190 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
191}
192
193Datum
195{
196 float8 key = PG_GETARG_FLOAT8(0);
197
198 /*
199 * On IEEE-float machines, minus zero and zero have different bit patterns
200 * but should compare as equal. We must ensure that they have the same
201 * hash value, which is most reliably done this way:
202 */
203 if (key == (float8) 0)
205
206 /*
207 * Similarly, NaNs can have different bit patterns but they should all
208 * compare as equal. For backwards-compatibility reasons we force them to
209 * have the hash value of a standard NaN.
210 */
211 if (isnan(key))
212 key = get_float8_nan();
213
214 return hash_any((unsigned char *) &key, sizeof(key));
215}
216
217Datum
219{
220 float8 key = PG_GETARG_FLOAT8(0);
221 uint64 seed = PG_GETARG_INT64(1);
222
223 /* Same approach as hashfloat8 */
224 if (key == (float8) 0)
225 PG_RETURN_UINT64(seed);
226 if (isnan(key))
227 key = get_float8_nan();
228
229 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
230}
231
232Datum
234{
236
238 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
239}
240
241Datum
243{
245
247 return hash_any_extended((unsigned char *) key->values,
248 key->dim1 * sizeof(Oid),
249 PG_GETARG_INT64(1));
250}
251
252Datum
254{
255 char *key = NameStr(*PG_GETARG_NAME(0));
256
257 return hash_any((unsigned char *) key, strlen(key));
258}
259
260Datum
262{
263 char *key = NameStr(*PG_GETARG_NAME(0));
264
265 return hash_any_extended((unsigned char *) key, strlen(key),
266 PG_GETARG_INT64(1));
267}
268
269Datum
271{
272 text *key = PG_GETARG_TEXT_PP(0);
275 Datum result;
276
277 if (!collid)
280 errmsg("could not determine which collation to use for string hashing"),
281 errhint("Use the COLLATE clause to set the collation explicitly.")));
282
284
285 if (mylocale->deterministic)
286 {
287 result = hash_any((unsigned char *) VARDATA_ANY(key),
288 VARSIZE_ANY_EXHDR(key));
289 }
290 else
291 {
292 Size bsize,
293 rsize;
294 char *buf;
295 const char *keydata = VARDATA_ANY(key);
296 size_t keylen = VARSIZE_ANY_EXHDR(key);
297
298
299 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
300 buf = palloc(bsize + 1);
301
302 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
303
304 /* the second call may return a smaller value than the first */
305 if (rsize > bsize)
306 elog(ERROR, "pg_strnxfrm() returned unexpected result");
307
308 /*
309 * In principle, there's no reason to include the terminating NUL
310 * character in the hash, but it was done before and the behavior must
311 * be preserved.
312 */
313 result = hash_any((uint8_t *) buf, bsize + 1);
314
315 pfree(buf);
316 }
317
318 /* Avoid leaking memory for toasted inputs */
319 PG_FREE_IF_COPY(key, 0);
320
321 return result;
322}
323
324Datum
326{
327 text *key = PG_GETARG_TEXT_PP(0);
330 Datum result;
331
332 if (!collid)
335 errmsg("could not determine which collation to use for string hashing"),
336 errhint("Use the COLLATE clause to set the collation explicitly.")));
337
339
340 if (mylocale->deterministic)
341 {
342 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
344 PG_GETARG_INT64(1));
345 }
346 else
347 {
348 Size bsize,
349 rsize;
350 char *buf;
351 const char *keydata = VARDATA_ANY(key);
352 size_t keylen = VARSIZE_ANY_EXHDR(key);
353
354 bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
355 buf = palloc(bsize + 1);
356
357 rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
358
359 /* the second call may return a smaller value than the first */
360 if (rsize > bsize)
361 elog(ERROR, "pg_strnxfrm() returned unexpected result");
362
363 /*
364 * In principle, there's no reason to include the terminating NUL
365 * character in the hash, but it was done before and the behavior must
366 * be preserved.
367 */
368 result = hash_any_extended((uint8_t *) buf, bsize + 1,
369 PG_GETARG_INT64(1));
370
371 pfree(buf);
372 }
373
374 PG_FREE_IF_COPY(key, 0);
375
376 return result;
377}
378
379/*
380 * hashvarlena() can be used for any varlena datatype in which there are
381 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
382 *
383 * (However, you need to define an SQL-level wrapper function around it with
384 * the concrete input data type; otherwise hashvalidate() won't accept it.
385 * Moreover, at least for built-in types, a C-level wrapper function is also
386 * recommended; otherwise, the opr_sanity test will get upset.)
387 */
388Datum
390{
392 Datum result;
393
394 result = hash_any((unsigned char *) VARDATA_ANY(key),
395 VARSIZE_ANY_EXHDR(key));
396
397 /* Avoid leaking memory for toasted inputs */
398 PG_FREE_IF_COPY(key, 0);
399
400 return result;
401}
402
403Datum
405{
407 Datum result;
408
409 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
411 PG_GETARG_INT64(1));
412
413 PG_FREE_IF_COPY(key, 0);
414
415 return result;
416}
417
418Datum
420{
421 return hashvarlena(fcinfo);
422}
423
424Datum
#define NameStr(name)
Definition c.h:777
int64_t int64
Definition c.h:555
double float8
Definition c.h:656
int32_t int32
Definition c.h:554
uint64_t uint64
Definition c.h:559
uint32_t uint32
Definition c.h:558
float float4
Definition c.h:655
size_t Size
Definition c.h:631
Oid collid
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
int errhint(const char *fmt,...) pg_attribute_printf(1
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
static float8 get_float8_nan(void)
Definition float.h:84
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define PG_GETARG_OID(n)
Definition fmgr.h:275
#define PG_RETURN_UINT32(x)
Definition fmgr.h:356
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define PG_GETARG_FLOAT8(n)
Definition fmgr.h:283
#define PG_GETARG_CHAR(n)
Definition fmgr.h:273
#define PG_GETARG_POINTER(n)
Definition fmgr.h:277
#define PG_GETARG_INT64(n)
Definition fmgr.h:284
#define PG_GETARG_NAME(n)
Definition fmgr.h:279
#define PG_RETURN_UINT64(x)
Definition fmgr.h:371
#define PG_GETARG_VARLENA_PP(n)
Definition fmgr.h:290
#define PG_GETARG_INT32(n)
Definition fmgr.h:269
#define PG_GETARG_FLOAT4(n)
Definition fmgr.h:282
#define PG_GET_COLLATION()
Definition fmgr.h:198
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_GETARG_INT16(n)
Definition fmgr.h:271
static Datum hash_uint32(uint32 k)
Definition hashfn.h:43
static Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition hashfn.h:37
static Datum hash_any(const unsigned char *k, int keylen)
Definition hashfn.h:31
static Datum hash_uint32_extended(uint32 k, uint64 seed)
Definition hashfn.h:49
Datum hashenum(PG_FUNCTION_ARGS)
Definition hashfunc.c:129
Datum hashvarlenaextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:404
Datum hashfloat8extended(PG_FUNCTION_ARGS)
Definition hashfunc.c:218
Datum hashenumextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:135
Datum hashtextextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:325
Datum hashoidvector(PG_FUNCTION_ARGS)
Definition hashfunc.c:233
Datum hashint8extended(PG_FUNCTION_ARGS)
Definition hashfunc.c:104
Datum hashint2(PG_FUNCTION_ARGS)
Definition hashfunc.c:60
Datum hashint2extended(PG_FUNCTION_ARGS)
Definition hashfunc.c:66
Datum hashfloat4(PG_FUNCTION_ARGS)
Definition hashfunc.c:141
Datum hashfloat8(PG_FUNCTION_ARGS)
Definition hashfunc.c:194
Datum hashoidextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:123
Datum hashnameextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:261
Datum hashbytea(PG_FUNCTION_ARGS)
Definition hashfunc.c:419
Datum hashint8(PG_FUNCTION_ARGS)
Definition hashfunc.c:84
Datum hashname(PG_FUNCTION_ARGS)
Definition hashfunc.c:253
Datum hashint4extended(PG_FUNCTION_ARGS)
Definition hashfunc.c:78
Datum hashtext(PG_FUNCTION_ARGS)
Definition hashfunc.c:270
Datum hashchar(PG_FUNCTION_ARGS)
Definition hashfunc.c:48
Datum hashbyteaextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:425
Datum hashoid(PG_FUNCTION_ARGS)
Definition hashfunc.c:117
Datum hashcharextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:54
Datum hashfloat4extended(PG_FUNCTION_ARGS)
Definition hashfunc.c:177
Datum hashvarlena(PG_FUNCTION_ARGS)
Definition hashfunc.c:389
Datum hashint4(PG_FUNCTION_ARGS)
Definition hashfunc.c:72
Datum hashoidvectorextended(PG_FUNCTION_ARGS)
Definition hashfunc.c:242
long val
Definition informix.c:689
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc(Size size)
Definition mcxt.c:1387
void check_valid_oidvector(const oidvector *oidArray)
Definition oid.c:118
size_t pg_strnxfrm(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale)
Definition pg_locale.c:1459
pg_locale_t pg_newlocale_from_collation(Oid collid)
Definition pg_locale.c:1189
static char buf[DEFAULT_XLOG_SEG_SIZE]
uint64_t Datum
Definition postgres.h:70
unsigned int Oid
static int fb(int x)
Definition c.h:757
Definition c.h:718
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486