PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
hashfn_unstable.h
Go to the documentation of this file.
1/*
2 * hashfn_unstable.h
3 *
4 * Building blocks for creating fast inlineable hash functions. The
5 * functions in this file are not guaranteed to be stable between versions,
6 * and may differ by hardware platform. Hence they must not be used in
7 * indexes or other on-disk structures. See hashfn.h if you need stability.
8 *
9 *
10 * Portions Copyright (c) 2024-2025, PostgreSQL Global Development Group
11 *
12 * src/include/common/hashfn_unstable.h
13 */
14#ifndef HASHFN_UNSTABLE_H
15#define HASHFN_UNSTABLE_H
16
17
18/*
19 * fasthash is a modification of code taken from
20 * https://code.google.com/archive/p/fast-hash/source/default/source
21 * under the terms of the MIT license. The original copyright
22 * notice follows:
23 */
24
25/* The MIT License
26
27 Copyright (C) 2012 Zilong Tan (eric.zltan@gmail.com)
28
29 Permission is hereby granted, free of charge, to any person
30 obtaining a copy of this software and associated documentation
31 files (the "Software"), to deal in the Software without
32 restriction, including without limitation the rights to use, copy,
33 modify, merge, publish, distribute, sublicense, and/or sell copies
34 of the Software, and to permit persons to whom the Software is
35 furnished to do so, subject to the following conditions:
36
37 The above copyright notice and this permission notice shall be
38 included in all copies or substantial portions of the Software.
39
40 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
41 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
42 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
43 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
44 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
45 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
46 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47 SOFTWARE.
48*/
49
50/*
51 * fasthash as implemented here has two interfaces:
52 *
53 * 1) Standalone functions, e.g. fasthash32() for a single value with a
54 * known length. These return the same hash code as the original, at
55 * least on little-endian machines.
56 *
57 * 2) Incremental interface. This can used for incorporating multiple
58 * inputs. First, initialize the hash state (here with a zero seed):
59 *
60 * fasthash_state hs;
61 * fasthash_init(&hs, 0);
62 *
63 * If the inputs are of types that can be trivially cast to uint64, it's
64 * sufficient to do:
65 *
66 * hs.accum = value1;
67 * fasthash_combine(&hs);
68 * hs.accum = value2;
69 * fasthash_combine(&hs);
70 * ...
71 *
72 * For longer or variable-length input, fasthash_accum() is a more
73 * flexible, but more verbose method. The standalone functions use this
74 * internally, so see fasthash64() for an example of this.
75 *
76 * After all inputs have been mixed in, finalize the hash:
77 *
78 * hashcode = fasthash_final32(&hs, 0);
79 *
80 * The incremental interface allows an optimization for NUL-terminated
81 * C strings:
82 *
83 * len = fasthash_accum_cstring(&hs, str);
84 * hashcode = fasthash_final32(&hs, len);
85 *
86 * By handling the terminator on-the-fly, we can avoid needing a strlen()
87 * call to tell us how many bytes to hash. Experimentation has found that
88 * SMHasher fails unless we incorporate the length, so it is passed to
89 * the finalizer as a tweak.
90 */
91
92
93typedef struct fasthash_state
94{
95 /* staging area for chunks of input */
97
100
101#define FH_SIZEOF_ACCUM sizeof(uint64)
102
103
104/*
105 * Initialize the hash state.
106 *
107 * 'seed' can be zero.
108 */
109static inline void
111{
112 memset(hs, 0, sizeof(fasthash_state));
113 hs->hash = seed ^ 0x880355f21e6d1965;
114}
115
116/* both the finalizer and part of the combining step */
117static inline uint64
119{
120 h ^= (h >> 23) + tweak;
121 h *= 0x2127599bf4325c37;
122 h ^= h >> 47;
123 return h;
124}
125
126/* combine one chunk of input into the hash */
127static inline void
129{
130 hs->hash ^= fasthash_mix(hs->accum, 0);
131 hs->hash *= 0x880355f21e6d1965;
132}
133
134/* accumulate up to 8 bytes of input and combine it into the hash */
135static inline void
136fasthash_accum(fasthash_state *hs, const char *k, size_t len)
137{
138 uint32 lower_four;
139
141 hs->accum = 0;
142
143 /*
144 * For consistency, bytewise loads must match the platform's endianness.
145 */
146#ifdef WORDS_BIGENDIAN
147 switch (len)
148 {
149 case 8:
150 memcpy(&hs->accum, k, 8);
151 break;
152 case 7:
153 hs->accum |= (uint64) k[6] << 8;
154 /* FALLTHROUGH */
155 case 6:
156 hs->accum |= (uint64) k[5] << 16;
157 /* FALLTHROUGH */
158 case 5:
159 hs->accum |= (uint64) k[4] << 24;
160 /* FALLTHROUGH */
161 case 4:
162 memcpy(&lower_four, k, sizeof(lower_four));
163 hs->accum |= (uint64) lower_four << 32;
164 break;
165 case 3:
166 hs->accum |= (uint64) k[2] << 40;
167 /* FALLTHROUGH */
168 case 2:
169 hs->accum |= (uint64) k[1] << 48;
170 /* FALLTHROUGH */
171 case 1:
172 hs->accum |= (uint64) k[0] << 56;
173 break;
174 case 0:
175 return;
176 }
177#else
178 switch (len)
179 {
180 case 8:
181 memcpy(&hs->accum, k, 8);
182 break;
183 case 7:
184 hs->accum |= (uint64) k[6] << 48;
185 /* FALLTHROUGH */
186 case 6:
187 hs->accum |= (uint64) k[5] << 40;
188 /* FALLTHROUGH */
189 case 5:
190 hs->accum |= (uint64) k[4] << 32;
191 /* FALLTHROUGH */
192 case 4:
193 memcpy(&lower_four, k, sizeof(lower_four));
194 hs->accum |= lower_four;
195 break;
196 case 3:
197 hs->accum |= (uint64) k[2] << 16;
198 /* FALLTHROUGH */
199 case 2:
200 hs->accum |= (uint64) k[1] << 8;
201 /* FALLTHROUGH */
202 case 1:
203 hs->accum |= (uint64) k[0];
204 break;
205 case 0:
206 return;
207 }
208#endif
209
211}
212
213/*
214 * Set high bit in lowest byte where the input is zero, from:
215 * https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
216 */
217#define haszero64(v) \
218 (((v) - 0x0101010101010101) & ~(v) & 0x8080808080808080)
219
220/*
221 * all-purpose workhorse for fasthash_accum_cstring
222 */
223static inline size_t
225{
226 const char *const start = str;
227
228 while (*str)
229 {
230 size_t chunk_len = 0;
231
232 while (chunk_len < FH_SIZEOF_ACCUM && str[chunk_len] != '\0')
233 chunk_len++;
234
235 fasthash_accum(hs, str, chunk_len);
236 str += chunk_len;
237 }
238
239 return str - start;
240}
241
242/*
243 * specialized workhorse for fasthash_accum_cstring
244 *
245 * With an aligned pointer, we consume the string a word at a time.
246 * Loading the word containing the NUL terminator cannot segfault since
247 * allocation boundaries are suitably aligned. To keep from setting
248 * off alarms with address sanitizers, exclude this function from
249 * such testing.
250 */
252static inline size_t
253fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str)
254{
255 const char *const start = str;
256 size_t remainder;
258
260
261 /*
262 * For every chunk of input, check for zero bytes before mixing into the
263 * hash. The chunk with zeros must contain the NUL terminator.
264 */
265 for (;;)
266 {
267 uint64 chunk = *(uint64 *) str;
268
269 zero_byte_low = haszero64(chunk);
270 if (zero_byte_low)
271 break;
272
273 hs->accum = chunk;
276 }
277
278 /* mix in remaining bytes */
280 str += remainder;
281
282 return str - start;
283}
284
285/*
286 * Mix 'str' into the hash state and return the length of the string.
287 */
288static inline size_t
290{
291#if SIZEOF_VOID_P >= 8
292
293 size_t len;
294#ifdef USE_ASSERT_CHECKING
295 size_t len_check;
296 fasthash_state hs_check;
297
298 memcpy(&hs_check, hs, sizeof(fasthash_state));
299 len_check = fasthash_accum_cstring_unaligned(&hs_check, str);
300#endif
302 {
303 len = fasthash_accum_cstring_aligned(hs, str);
304 Assert(len_check == len);
305 Assert(hs_check.hash == hs->hash);
306 return len;
307 }
308#endif /* SIZEOF_VOID_P */
309
310 /*
311 * It's not worth it to try to make the word-at-a-time optimization work
312 * on 32-bit platforms.
313 */
315}
316
317/*
318 * The finalizer
319 *
320 * 'tweak' is intended to be the input length when the caller doesn't know
321 * the length ahead of time, such as for NUL-terminated strings, otherwise
322 * zero.
323 */
324static inline uint64
326{
327 return fasthash_mix(hs->hash, tweak);
328}
329
330/*
331 * Reduce a 64-bit hash to a 32-bit hash.
332 *
333 * This optional step provides a bit more additional mixing compared to
334 * just taking the lower 32-bits.
335 */
336static inline uint32
338{
339 /*
340 * Convert the 64-bit hashcode to Fermat residue, which shall retain
341 * information from both the higher and lower parts of hashcode.
342 */
343 return h - (h >> 32);
344}
345
346/* finalize and reduce */
347static inline uint32
349{
350 return fasthash_reduce32(fasthash_final64(hs, tweak));
351}
352
353/*
354 * The original fasthash64 function, re-implemented using the incremental
355 * interface. Returns a 64-bit hashcode. 'len' controls not only how
356 * many bytes to hash, but also modifies the internal seed.
357 * 'seed' can be zero.
358 */
359static inline uint64
360fasthash64(const char *k, size_t len, uint64 seed)
361{
363
364 fasthash_init(&hs, 0);
365
366 /* re-initialize the seed according to input length */
367 hs.hash = seed ^ (len * 0x880355f21e6d1965);
368
369 while (len >= FH_SIZEOF_ACCUM)
370 {
372 k += FH_SIZEOF_ACCUM;
374 }
375
376 fasthash_accum(&hs, k, len);
377 return fasthash_final64(&hs, 0);
378}
379
380/* like fasthash64, but returns a 32-bit hashcode */
381static inline uint32
382fasthash32(const char *k, size_t len, uint64 seed)
383{
384 return fasthash_reduce32(fasthash64(k, len, seed));
385}
386
387/*
388 * Convenience function for hashing NUL-terminated strings
389 */
390static inline uint32
391hash_string(const char *s)
392{
394 size_t s_len;
395
396 fasthash_init(&hs, 0);
397
398 /*
399 * Combine string into the hash and save the length for tweaking the final
400 * mix.
401 */
402 s_len = fasthash_accum_cstring(&hs, s);
403
404 return fasthash_final32(&hs, s_len);
405}
406
407#endif /* HASHFN_UNSTABLE_H */
#define PointerIsAligned(pointer, type)
Definition: c.h:740
uint64_t uint64
Definition: c.h:503
uint32_t uint32
Definition: c.h:502
Assert(PointerIsAligned(start, uint64))
#define haszero64(v)
return str start
static uint32 fasthash_reduce32(uint64 h)
static uint32 fasthash32(const char *k, size_t len, uint64 seed)
#define FH_SIZEOF_ACCUM
pg_attribute_no_sanitize_address() static inline size_t fasthash_accum_cstring_aligned(fasthash_state *hs
const char * str
static size_t fasthash_accum_cstring(fasthash_state *hs, const char *str)
uint64 zero_byte_low
static uint64 fasthash64(const char *k, size_t len, uint64 seed)
static uint64 fasthash_mix(uint64 h, uint64 tweak)
static uint32 fasthash_final32(fasthash_state *hs, uint64 tweak)
struct fasthash_state fasthash_state
static void fasthash_combine(fasthash_state *hs)
static void fasthash_accum(fasthash_state *hs, const char *k, size_t len)
static uint64 fasthash_final64(fasthash_state *hs, uint64 tweak)
static uint32 hash_string(const char *s)
size_t remainder
static size_t fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str)
static void fasthash_init(fasthash_state *hs, uint64 seed)
const void size_t len