PostgreSQL Source Code git master
Loading...
Searching...
No Matches
hashfn_unstable.h
Go to the documentation of this file.
1/*
2 * hashfn_unstable.h
3 *
4 * Building blocks for creating fast inlineable hash functions. The
5 * functions in this file are not guaranteed to be stable between versions,
6 * and may differ by hardware platform. Hence they must not be used in
7 * indexes or other on-disk structures. See hashfn.h if you need stability.
8 *
9 *
10 * Portions Copyright (c) 2024-2026, PostgreSQL Global Development Group
11 *
12 * src/include/common/hashfn_unstable.h
13 */
14#ifndef HASHFN_UNSTABLE_H
15#define HASHFN_UNSTABLE_H
16
17
18/*
19 * fasthash is a modification of code taken from
20 * https://code.google.com/archive/p/fast-hash/source/default/source
21 * under the terms of the MIT license. The original copyright
22 * notice follows:
23 */
24
25/*
26 * The MIT License
27 *
28 * Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
29 *
30 * Permission is hereby granted, free of charge, to any person
31 * obtaining a copy of this software and associated documentation
32 * files (the "Software"), to deal in the Software without
33 * restriction, including without limitation the rights to use, copy,
34 * modify, merge, publish, distribute, sublicense, and/or sell copies
35 * of the Software, and to permit persons to whom the Software is
36 * furnished to do so, subject to the following conditions:
37 *
38 * The above copyright notice and this permission notice shall be
39 * included in all copies or substantial portions of the Software.
40 *
41 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
42 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
43 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
44 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
45 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
48 * SOFTWARE.
49 */
50
51/*
52 * fasthash as implemented here has two interfaces:
53 *
54 * 1) Standalone functions that take a single input.
55 *
56 * 2) Incremental interface. This can used for incorporating multiple
57 * inputs. First, initialize the hash state (here with a zero seed):
58 *
59 * fasthash_state hs;
60 * fasthash_init(&hs, 0);
61 *
62 * Next, accumulate input into the hash state.
63 * If the inputs are of types that can be trivially cast to uint64, it's
64 * sufficient to do:
65 *
66 * hs.accum = value1;
67 * fasthash_combine(&hs);
68 * hs.accum = value2;
69 * fasthash_combine(&hs);
70 * ...
71 *
72 * For longer or variable-length input, fasthash_accum() is a more
73 * flexible, but more verbose method. The standalone functions use this
74 * internally, so see fasthash64() for an example of this.
75 *
76 * After all inputs have been mixed in, finalize the hash and optionally
77 * reduce to 32 bits. If all inputs are fixed-length, it's sufficient
78 * to pass zero for the tweak:
79 *
80 * hashcode = fasthash_final32(&hs, 0);
81 *
82 * For variable length input, experimentation has found that SMHasher
83 * fails unless we pass the length for the tweak. When accumulating
84 * multiple varlen values, it's probably safest to calculate a tweak
85 * such that the bits of all individual lengths are present, for example:
86 *
87 * lengths = len1 + (len2 << 10) + (len3 << 20);
88 * hashcode = fasthash_final32(&hs, lengths);
89 *
90 * The incremental interface allows an optimization for NUL-terminated
91 * C strings:
92 *
93 * len = fasthash_accum_cstring(&hs, str);
94 * hashcode = fasthash_final32(&hs, len);
95 *
96 * By computing the length on-the-fly, we can avoid needing a strlen()
97 * call to tell us how many bytes to hash.
98 */
99
100
101typedef struct fasthash_state
102{
103 /* staging area for chunks of input */
105
108
109#define FH_SIZEOF_ACCUM sizeof(uint64)
110
111
112/*
113 * Initialize the hash state.
114 *
115 * 'seed' can be zero.
116 */
117static inline void
119{
120 memset(hs, 0, sizeof(fasthash_state));
121 hs->hash = seed ^ 0x880355f21e6d1965;
122}
123
124/* both the finalizer and part of the combining step */
125static inline uint64
127{
128 h ^= (h >> 23) + tweak;
129 h *= 0x2127599bf4325c37;
130 h ^= h >> 47;
131 return h;
132}
133
134/* combine one chunk of input into the hash */
135static inline void
137{
138 hs->hash ^= fasthash_mix(hs->accum, 0);
139 hs->hash *= 0x880355f21e6d1965;
140}
141
142/* accumulate up to 8 bytes of input and combine it into the hash */
143static inline void
144fasthash_accum(fasthash_state *hs, const char *k, size_t len)
145{
147
149 hs->accum = 0;
150
151 /*
152 * For consistency, bytewise loads must match the platform's endianness.
153 */
154#ifdef WORDS_BIGENDIAN
155 switch (len)
156 {
157 case 8:
158 memcpy(&hs->accum, k, 8);
159 break;
160 case 7:
161 hs->accum |= (uint64) k[6] << 8;
163 case 6:
164 hs->accum |= (uint64) k[5] << 16;
166 case 5:
167 hs->accum |= (uint64) k[4] << 24;
169 case 4:
170 memcpy(&lower_four, k, sizeof(lower_four));
171 hs->accum |= (uint64) lower_four << 32;
172 break;
173 case 3:
174 hs->accum |= (uint64) k[2] << 40;
176 case 2:
177 hs->accum |= (uint64) k[1] << 48;
179 case 1:
180 hs->accum |= (uint64) k[0] << 56;
181 break;
182 case 0:
183 return;
184 }
185#else
186 switch (len)
187 {
188 case 8:
189 memcpy(&hs->accum, k, 8);
190 break;
191 case 7:
192 hs->accum |= (uint64) k[6] << 48;
194 case 6:
195 hs->accum |= (uint64) k[5] << 40;
197 case 5:
198 hs->accum |= (uint64) k[4] << 32;
200 case 4:
201 memcpy(&lower_four, k, sizeof(lower_four));
202 hs->accum |= lower_four;
203 break;
204 case 3:
205 hs->accum |= (uint64) k[2] << 16;
207 case 2:
208 hs->accum |= (uint64) k[1] << 8;
210 case 1:
211 hs->accum |= (uint64) k[0];
212 break;
213 case 0:
214 return;
215 }
216#endif
217
219}
220
221/*
222 * Set high bit in lowest byte where the input is zero, from:
223 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
224 */
225#define haszero64(v) \
226 (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
227
228/*
229 * all-purpose workhorse for fasthash_accum_cstring
230 */
231static inline size_t
233{
234 const char *const start = str;
235
236 while (*str)
237 {
238 size_t chunk_len = 0;
239
240 while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
241 chunk_len++;
242
244 str += chunk_len;
245 }
246
247 return str - start;
248}
249
250/*
251 * specialized workhorse for fasthash_accum_cstring
252 *
253 * With an aligned pointer, we consume the string a word at a time.
254 * Loading the word containing the NUL terminator cannot segfault since
255 * allocation boundaries are suitably aligned. To keep from setting
256 * off alarms with address sanitizers, exclude this function from
257 * such testing.
258 */
260static inline size_t
262{
263 const char *const start = str;
264 size_t remainder;
266
268
269 /*
270 * For every chunk of input, check for zero bytes before mixing into the
271 * hash. The chunk with zeros must contain the NUL terminator.
272 */
273 for (;;)
274 {
275 uint64 chunk = *(const uint64 *) str;
276
277 zero_byte_low = haszero64(chunk);
278 if (zero_byte_low)
279 break;
280
281 hs->accum = chunk;
284 }
285
286 /* mix in remaining bytes */
288 str += remainder;
289
290 return str - start;
291}
292
293/*
294 * Mix 'str' into the hash state and return the length of the string.
295 */
296static inline size_t
298{
299#if SIZEOF_VOID_P >= 8
300
301 size_t len;
302#ifdef USE_ASSERT_CHECKING
303 size_t len_check;
305
306 memcpy(&hs_check, hs, sizeof(fasthash_state));
308#endif
310 {
312 Assert(len_check == len);
313 Assert(hs_check.hash == hs->hash);
314 return len;
315 }
316#endif /* SIZEOF_VOID_P */
317
318 /*
319 * It's not worth it to try to make the word-at-a-time optimization work
320 * on 32-bit platforms.
321 */
323}
324
325/*
326 * The finalizer
327 *
328 * 'tweak' is intended to be the input length when the caller doesn't know
329 * the length ahead of time, such as for NUL-terminated strings, otherwise
330 * zero.
331 */
332static inline uint64
337
338/*
339 * Reduce a 64-bit hash to a 32-bit hash.
340 *
341 * This optional step provides a bit more additional mixing compared to
342 * just taking the lower 32-bits.
343 */
344static inline uint32
346{
347 /*
348 * Convert the 64-bit hashcode to Fermat residue, which shall retain
349 * information from both the higher and lower parts of hashcode.
350 */
351 return h - (h >> 32);
352}
353
354/* finalize and reduce */
355static inline uint32
360
361
362/* Standalone functions */
363
364/*
365 * The original fasthash64 function, re-implemented using the incremental
366 * interface. Returns the same 64-bit hashcode as the original,
367 * at least on little-endian machines. 'len' controls not only how
368 * many bytes to hash, but also modifies the internal seed.
369 * 'seed' can be zero.
370 */
371static inline uint64
372fasthash64(const char *k, size_t len, uint64 seed)
373{
375
376 fasthash_init(&hs, 0);
377
378 /* re-initialize the seed according to input length */
379 hs.hash = seed ^ (len * 0x880355f21e6d1965);
380
381 while (len >= FH_SIZEOF_ACCUM)
382 {
384 k += FH_SIZEOF_ACCUM;
386 }
387
388 fasthash_accum(&hs, k, len);
389
390 /*
391 * Since we already mixed the input length into the seed, we can just pass
392 * zero here. This matches upstream behavior as well.
393 */
394 return fasthash_final64(&hs, 0);
395}
396
397/* like fasthash64, but returns a 32-bit hashcode */
398static inline uint32
399fasthash32(const char *k, size_t len, uint64 seed)
400{
401 return fasthash_reduce32(fasthash64(k, len, seed));
402}
403
404/*
405 * Convenience function for hashing NUL-terminated strings
406 *
407 * Note: This is faster than, and computes a different result from,
408 * "fasthash32(s, strlen(s))"
409 */
410static inline uint32
411hash_string(const char *s)
412{
414 size_t s_len;
415
416 fasthash_init(&hs, 0);
417
418 /*
419 * Combine string into the hash and save the length for tweaking the final
420 * mix.
421 */
423
424 return fasthash_final32(&hs, s_len);
425}
426
427#endif /* HASHFN_UNSTABLE_H */
#define PointerIsAligned(pointer, type)
Definition c.h:852
#define Assert(condition)
Definition c.h:943
uint64_t uint64
Definition c.h:625
uint32_t uint32
Definition c.h:624
#define pg_fallthrough
Definition c.h:161
memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets))
#define haszero64(v)
return str start
static uint32 fasthash_reduce32(uint64 h)
static uint32 fasthash32(const char *k, size_t len, uint64 seed)
#define FH_SIZEOF_ACCUM
pg_attribute_no_sanitize_address() static inline size_t fasthash_accum_cstring_aligned(fasthash_state *hs
const char * str
static size_t fasthash_accum_cstring(fasthash_state *hs, const char *str)
uint64 zero_byte_low
static uint64 fasthash64(const char *k, size_t len, uint64 seed)
static uint64 fasthash_mix(uint64 h, uint64 tweak)
static uint32 fasthash_final32(fasthash_state *hs, uint64 tweak)
static void fasthash_combine(fasthash_state *hs)
static void fasthash_accum(fasthash_state *hs, const char *k, size_t len)
static uint64 fasthash_final64(fasthash_state *hs, uint64 tweak)
static uint32 hash_string(const char *s)
size_t remainder
static size_t fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
static void fasthash_init(fasthash_state *hs, uint64 seed)
const void size_t len
static int fb(int x)