PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashfn_unstable.h
Go to the documentation of this file.
1/*
2 * hashfn_unstable.h
3 *
4 * Building blocks for creating fast inlineable hash functions. The
5 * functions in this file are not guaranteed to be stable between versions,
6 * and may differ by hardware platform. Hence they must not be used in
7 * indexes or other on-disk structures. See hashfn.h if you need stability.
8 *
9 *
10 * Portions Copyright (c) 2024-2026, PostgreSQL Global Development Group
11 *
12 * src/include/common/hashfn_unstable.h
13 */
14#ifndef HASHFN_UNSTABLE_H
15#define HASHFN_UNSTABLE_H
16
17
18/*
19 * fasthash is a modification of code taken from
20 * https://code.google.com/archive/p/fast-hash/source/default/source
21 * under the terms of the MIT license. The original copyright
22 * notice follows:
23 */
24
25/* The MIT License
26
27 Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
28
29 Permission is hereby granted, free of charge, to any person
30 obtaining a copy of this software and associated documentation
31 files (the "Software"), to deal in the Software without
32 restriction, including without limitation the rights to use, copy,
33 modify, merge, publish, distribute, sublicense, and/or sell copies
34 of the Software, and to permit persons to whom the Software is
35 furnished to do so, subject to the following conditions:
36
37 The above copyright notice and this permission notice shall be
38 included in all copies or substantial portions of the Software.
39
40 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
41 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
43 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
44 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
45 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
46 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 SOFTWARE.
48*/
49
50/*
51 * fasthash as implemented here has two interfaces:
52 *
53 * 1) Standalone functions that take a single input.
54 *
55 * 2) Incremental interface. This can used for incorporating multiple
56 * inputs. First, initialize the hash state (here with a zero seed):
57 *
58 * fasthash_state hs;
59 * fasthash_init(&hs, 0);
60 *
61 * Next, accumulate input into the hash state.
62 * If the inputs are of types that can be trivially cast to uint64, it's
63 * sufficient to do:
64 *
65 * hs.accum = value1;
66 * fasthash_combine(&hs);
67 * hs.accum = value2;
68 * fasthash_combine(&hs);
69 * ...
70 *
71 * For longer or variable-length input, fasthash_accum() is a more
72 * flexible, but more verbose method. The standalone functions use this
73 * internally, so see fasthash64() for an example of this.
74 *
75 * After all inputs have been mixed in, finalize the hash and optionally
76 * reduce to 32 bits. If all inputs are fixed-length, it's sufficient
77 * to pass zero for the tweak:
78 *
79 * hashcode = fasthash_final32(&hs, 0);
80 *
81 * For variable length input, experimentation has found that SMHasher
82 * fails unless we pass the length for the tweak. When accumulating
83 * multiple varlen values, it's probably safest to calculate a tweak
84 * such that the bits of all individual lengths are present, for example:
85 *
86 * lengths = len1 + (len2 << 10) + (len3 << 20);
87 * hashcode = fasthash_final32(&hs, lengths);
88 *
89 * The incremental interface allows an optimization for NUL-terminated
90 * C strings:
91 *
92 * len = fasthash_accum_cstring(&hs, str);
93 * hashcode = fasthash_final32(&hs, len);
94 *
95 * By computing the length on-the-fly, we can avoid needing a strlen()
96 * call to tell us how many bytes to hash.
97 */
98
99
100typedef struct fasthash_state
101{
102 /* staging area for chunks of input */
104
107
108#define FH_SIZEOF_ACCUM sizeof(uint64)
109
110
111/*
112 * Initialize the hash state.
113 *
114 * 'seed' can be zero.
115 */
116static inline void
118{
119 memset(hs, 0, sizeof(fasthash_state));
120 hs->hash = seed ^ 0x880355f21e6d1965;
121}
122
123/* both the finalizer and part of the combining step */
124static inline uint64
126{
127 h ^= (h >> 23) + tweak;
128 h *= 0x2127599bf4325c37;
129 h ^= h >> 47;
130 return h;
131}
132
133/* combine one chunk of input into the hash */
134static inline void
136{
137 hs->hash ^= fasthash_mix(hs->accum, 0);
138 hs->hash *= 0x880355f21e6d1965;
139}
140
141/* accumulate up to 8 bytes of input and combine it into the hash */
142static inline void
143fasthash_accum(fasthash_state *hs, const char *k, size_t len)
144{
146
148 hs->accum = 0;
149
150 /*
151 * For consistency, bytewise loads must match the platform's endianness.
152 */
153#ifdef WORDS_BIGENDIAN
154 switch (len)
155 {
156 case 8:
157 memcpy(&hs->accum, k, 8);
158 break;
159 case 7:
160 hs->accum |= (uint64) k[6] << 8;
161 /* FALLTHROUGH */
162 case 6:
163 hs->accum |= (uint64) k[5] << 16;
164 /* FALLTHROUGH */
165 case 5:
166 hs->accum |= (uint64) k[4] << 24;
167 /* FALLTHROUGH */
168 case 4:
169 memcpy(&lower_four, k, sizeof(lower_four));
170 hs->accum |= (uint64) lower_four << 32;
171 break;
172 case 3:
173 hs->accum |= (uint64) k[2] << 40;
174 /* FALLTHROUGH */
175 case 2:
176 hs->accum |= (uint64) k[1] << 48;
177 /* FALLTHROUGH */
178 case 1:
179 hs->accum |= (uint64) k[0] << 56;
180 break;
181 case 0:
182 return;
183 }
184#else
185 switch (len)
186 {
187 case 8:
188 memcpy(&hs->accum, k, 8);
189 break;
190 case 7:
191 hs->accum |= (uint64) k[6] << 48;
192 /* FALLTHROUGH */
193 case 6:
194 hs->accum |= (uint64) k[5] << 40;
195 /* FALLTHROUGH */
196 case 5:
197 hs->accum |= (uint64) k[4] << 32;
198 /* FALLTHROUGH */
199 case 4:
200 memcpy(&lower_four, k, sizeof(lower_four));
201 hs->accum |= lower_four;
202 break;
203 case 3:
204 hs->accum |= (uint64) k[2] << 16;
205 /* FALLTHROUGH */
206 case 2:
207 hs->accum |= (uint64) k[1] << 8;
208 /* FALLTHROUGH */
209 case 1:
210 hs->accum |= (uint64) k[0];
211 break;
212 case 0:
213 return;
214 }
215#endif
216
218}
219
220/*
221 * Set high bit in lowest byte where the input is zero, from:
222 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
223 */
224#define haszero64(v) \
225 (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
226
227/*
228 * all-purpose workhorse for fasthash_accum_cstring
229 */
230static inline size_t
232{
233 const char *const start = str;
234
235 while (*str)
236 {
237 size_t chunk_len = 0;
238
239 while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
240 chunk_len++;
241
243 str += chunk_len;
244 }
245
246 return str - start;
247}
248
249/*
250 * specialized workhorse for fasthash_accum_cstring
251 *
252 * With an aligned pointer, we consume the string a word at a time.
253 * Loading the word containing the NUL terminator cannot segfault since
254 * allocation boundaries are suitably aligned. To keep from setting
255 * off alarms with address sanitizers, exclude this function from
256 * such testing.
257 */
259static inline size_t
261{
262 const char *const start = str;
263 size_t remainder;
265
267
268 /*
269 * For every chunk of input, check for zero bytes before mixing into the
270 * hash. The chunk with zeros must contain the NUL terminator.
271 */
272 for (;;)
273 {
274 uint64 chunk = *(const uint64 *) str;
275
277 if (zero_byte_low)
278 break;
279
280 hs->accum = chunk;
283 }
284
285 /* mix in remaining bytes */
287 str += remainder;
288
289 return str - start;
290}
291
292/*
293 * Mix 'str' into the hash state and return the length of the string.
294 */
295static inline size_t
297{
298#if SIZEOF_VOID_P >= 8
299
300 size_t len;
301#ifdef USE_ASSERT_CHECKING
302 size_t len_check;
304
305 memcpy(&hs_check, hs, sizeof(fasthash_state));
307#endif
309 {
311 Assert(len_check == len);
312 Assert(hs_check.hash == hs->hash);
313 return len;
314 }
315#endif /* SIZEOF_VOID_P */
316
317 /*
318 * It's not worth it to try to make the word-at-a-time optimization work
319 * on 32-bit platforms.
320 */
322}
323
324/*
325 * The finalizer
326 *
327 * 'tweak' is intended to be the input length when the caller doesn't know
328 * the length ahead of time, such as for NUL-terminated strings, otherwise
329 * zero.
330 */
331static inline uint64
336
337/*
338 * Reduce a 64-bit hash to a 32-bit hash.
339 *
340 * This optional step provides a bit more additional mixing compared to
341 * just taking the lower 32-bits.
342 */
343static inline uint32
345{
346 /*
347 * Convert the 64-bit hashcode to Fermat residue, which shall retain
348 * information from both the higher and lower parts of hashcode.
349 */
350 return h - (h >> 32);
351}
352
353/* finalize and reduce */
354static inline uint32
359
360
361/* Standalone functions */
362
363/*
364 * The original fasthash64 function, re-implemented using the incremental
365 * interface. Returns the same 64-bit hashcode as the original,
366 * at least on little-endian machines. 'len' controls not only how
367 * many bytes to hash, but also modifies the internal seed.
368 * 'seed' can be zero.
369 */
370static inline uint64
371fasthash64(const char *k, size_t len, uint64 seed)
372{
374
375 fasthash_init(&hs, 0);
376
377 /* re-initialize the seed according to input length */
378 hs.hash = seed ^ (len * 0x880355f21e6d1965);
379
380 while (len >= FH_SIZEOF_ACCUM)
381 {
383 k += FH_SIZEOF_ACCUM;
385 }
386
387 fasthash_accum(&hs, k, len);
388
389 /*
390 * Since we already mixed the input length into the seed, we can just pass
391 * zero here. This matches upstream behavior as well.
392 */
393 return fasthash_final64(&hs, 0);
394}
395
396/* like fasthash64, but returns a 32-bit hashcode */
397static inline uint32
398fasthash32(const char *k, size_t len, uint64 seed)
399{
400 return fasthash_reduce32(fasthash64(k, len, seed));
401}
402
403/*
404 * Convenience function for hashing NUL-terminated strings
405 *
406 * Note: This is faster than, and computes a different result from,
407 * "fasthash32(s, strlen(s))"
408 */
409static inline uint32
410hash_string(const char *s)
411{
413 size_t s_len;
414
415 fasthash_init(&hs, 0);
416
417 /*
418 * Combine string into the hash and save the length for tweaking the final
419 * mix.
420 */
422
423 return fasthash_final32(&hs, s_len);
424}
425
426#endif /* HASHFN_UNSTABLE_H */
#define PointerIsAligned(pointer, type)
Definition c.h:782
#define Assert(condition)
Definition c.h:873
uint64_t uint64
Definition c.h:547
uint32_t uint32
Definition c.h:546
#define haszero64(v)
return str start
static uint32 fasthash_reduce32(uint64 h)
static uint32 fasthash32(const char *k, size_t len, uint64 seed)
#define FH_SIZEOF_ACCUM
pg_attribute_no_sanitize_address() static inline size_t fasthash_accum_cstring_aligned(fasthash_state *hs
const char * str
static size_t fasthash_accum_cstring(fasthash_state *hs, const char *str)
uint64 zero_byte_low
static uint64 fasthash64(const char *k, size_t len, uint64 seed)
static uint64 fasthash_mix(uint64 h, uint64 tweak)
static uint32 fasthash_final32(fasthash_state *hs, uint64 tweak)
static void fasthash_combine(fasthash_state *hs)
static void fasthash_accum(fasthash_state *hs, const char *k, size_t len)
static uint64 fasthash_final64(fasthash_state *hs, uint64 tweak)
static uint32 hash_string(const char *s)
size_t remainder
static size_t fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
static void fasthash_init(fasthash_state *hs, uint64 seed)
const void size_t len
static int fb(int x)