hashfn_8c_source.html

 /*-------------------------------------------------------------------------

  *

  * hashfn.c

  *      Generic hashing functions, and hash functions for use in dynahash.c

  *      hashtables

  *

  *

  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group

  * Portions Copyright (c) 1994, Regents of the University of California

  *

  *

  * IDENTIFICATION

  *    src/common/hashfn.c

  *

  * NOTES

  *    It is expected that every bit of a hash function's 32-bit result is

  *    as random as every other; failure to ensure this is likely to lead

  *    to poor performance of hash tables.  In most cases a hash

  *    function should use hash_bytes() or its variant hash_bytes_uint32(),

  *    or the wrappers hash_any() and hash_uint32 defined in hashfn.h.

  *

  *-------------------------------------------------------------------------

  */

 #include "postgres.h"


 #include "common/hashfn.h"

 #include "port/pg_bitutils.h"


 /*

  * This hash function was written by Bob Jenkins

  * (bob_jenkins@burtleburtle.net), and superficially adapted

  * for PostgreSQL by Neil Conway. For more information on this

  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,

  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.

  *

  * In the current code, we have adopted Bob's 2006 update of his hash

  * function to fetch the data a word at a time when it is suitably aligned.

  * This makes for a useful speedup, at the cost of having to maintain

  * four code paths (aligned vs unaligned, and little-endian vs big-endian).

  * It also uses two separate mixing functions mix() and final(), instead

  * of a slower multi-purpose function.

  */


 /* Get a bit mask of the bits set in non-uint32 aligned addresses */

 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)


 #define rot(x,k) pg_rotate_left32(x, k)


 /*----------

  * mix -- mix 3 32-bit values reversibly.

  *

  * This is reversible, so any information in (a,b,c) before mix() is

  * still in (a,b,c) after mix().

  *

  * If four pairs of (a,b,c) inputs are run through mix(), or through

  * mix() in reverse, there are at least 32 bits of the output that

  * are sometimes the same for one pair and different for another pair.

  * This was tested for:

  * * pairs that differed by one bit, by two bits, in any combination

  *   of top bits of (a,b,c), or in any combination of bottom bits of

  *   (a,b,c).

  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed

  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

  *   is commonly produced by subtraction) look like a single 1-bit

  *   difference.

  * * the base values were pseudorandom, all zero but one bit set, or

  *   all zero plus a counter that starts at zero.

  *

  * This does not achieve avalanche.  There are input bits of (a,b,c)

  * that fail to affect some output bits of (a,b,c), especially of a.  The

  * most thoroughly mixed value is c, but it doesn't really even achieve

  * avalanche in c.

  *

  * This allows some parallelism.  Read-after-writes are good at doubling

  * the number of bits affected, so the goal of mixing pulls in the opposite

  * direction from the goal of parallelism.  I did what I could.  Rotates

  * seem to cost as much as shifts on every machine I could lay my hands on,

  * and rotates are much kinder to the top and bottom bits, so I used rotates.

  *----------

  */

 #define mix(a,b,c) \

 { \

   a -= c;  a ^= rot(c, 4);  c += b; \

   b -= a;  b ^= rot(a, 6);  a += c; \

   c -= b;  c ^= rot(b, 8);  b += a; \

   a -= c;  a ^= rot(c,16);  c += b; \

   b -= a;  b ^= rot(a,19);  a += c; \

   c -= b;  c ^= rot(b, 4);  b += a; \

 }


 /*----------

  * final -- final mixing of 3 32-bit values (a,b,c) into c

  *

  * Pairs of (a,b,c) values differing in only a few bits will usually

  * produce values of c that look totally different.  This was tested for

  * * pairs that differed by one bit, by two bits, in any combination

  *   of top bits of (a,b,c), or in any combination of bottom bits of

  *   (a,b,c).

  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed

  *   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as

  *   is commonly produced by subtraction) look like a single 1-bit

  *   difference.

  * * the base values were pseudorandom, all zero but one bit set, or

  *   all zero plus a counter that starts at zero.

  *

  * The use of separate functions for mix() and final() allow for a

  * substantial performance increase since final() does not need to

  * do well in reverse, but is does need to affect all output bits.

  * mix(), on the other hand, does not need to affect all output

  * bits (affecting 32 bits is enough).  The original hash function had

  * a single mixing operation that had to satisfy both sets of requirements

  * and was slower as a result.

  *----------

  */

 #define final(a,b,c) \

 { \

   c ^= b; c -= rot(b,14); \

   a ^= c; a -= rot(c,11); \

   b ^= a; b -= rot(a,25); \

   c ^= b; c -= rot(b,16); \

   a ^= c; a -= rot(c, 4); \

   b ^= a; b -= rot(a,14); \

   c ^= b; c -= rot(b,24); \

 }


 /*

  * hash_bytes() -- hash a variable-length key into a 32-bit value

  *      k       : the key (the unaligned variable-length array of bytes)

  *      len     : the length of the key, counting by bytes

  *

  * Returns a uint32 value.  Every bit of the key affects every bit of

  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.

  * About 6*len+35 instructions. The best hash table sizes are powers

  * of 2.  There is no need to do mod a prime (mod is sooo slow!).

  * If you need less than 32 bits, use a bitmask.

  *

  * This procedure must never throw elog(ERROR); the ResourceOwner code

  * relies on this not to fail.

  *

  * Note: we could easily change this function to return a 64-bit hash value

  * by using the final values of both b and c.  b is perhaps a little less

  * well mixed than c, however.

  */

 uint32

 hash_bytes(const unsigned char *k, int keylen)

 {

     uint32      a,

                 b,

                 c,

                 len;


     /* Set up the internal state */

     len = keylen;

     a = b = c = 0x9e3779b9 + len + 3923095;


     /* If the source pointer is word-aligned, we use word-wide fetches */

     if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)

     {

         /* Code path for aligned source data */

         const uint32 *ka = (const uint32 *) k;


         /* handle most of the key */

         while (len >= 12)

         {

             a += ka[0];

             b += ka[1];

             c += ka[2];

             mix(a, b, c);

             ka += 3;

             len -= 12;

         }


         /* handle the last 11 bytes */

         k = (const unsigned char *) ka;

 #ifdef WORDS_BIGENDIAN

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 8);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 24);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ka[1];

                 a += ka[0];

                 break;

             case 7:

                 b += ((uint32) k[6] << 8);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 16);

                 /* fall through */

             case 5:

                 b += ((uint32) k[4] << 24);

                 /* fall through */

             case 4:

                 a += ka[0];

                 break;

             case 3:

                 a += ((uint32) k[2] << 8);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 16);

                 /* fall through */

             case 1:

                 a += ((uint32) k[0] << 24);

                 /* case 0: nothing left to add */

         }

 #else                           /* !WORDS_BIGENDIAN */

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 24);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 8);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ka[1];

                 a += ka[0];

                 break;

             case 7:

                 b += ((uint32) k[6] << 16);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 8);

                 /* fall through */

             case 5:

                 b += k[4];

                 /* fall through */

             case 4:

                 a += ka[0];

                 break;

             case 3:

                 a += ((uint32) k[2] << 16);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 8);

                 /* fall through */

             case 1:

                 a += k[0];

                 /* case 0: nothing left to add */

         }

 #endif                          /* WORDS_BIGENDIAN */

     }

     else

     {

         /* Code path for non-aligned source data */


         /* handle most of the key */

         while (len >= 12)

         {

 #ifdef WORDS_BIGENDIAN

             a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));

             b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));

             c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));

 #else                           /* !WORDS_BIGENDIAN */

             a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));

             b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));

             c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));

 #endif                          /* WORDS_BIGENDIAN */

             mix(a, b, c);

             k += 12;

             len -= 12;

         }


         /* handle the last 11 bytes */

 #ifdef WORDS_BIGENDIAN

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 8);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 24);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += k[7];

                 /* fall through */

             case 7:

                 b += ((uint32) k[6] << 8);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 16);

                 /* fall through */

             case 5:

                 b += ((uint32) k[4] << 24);

                 /* fall through */

             case 4:

                 a += k[3];

                 /* fall through */

             case 3:

                 a += ((uint32) k[2] << 8);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 16);

                 /* fall through */

             case 1:

                 a += ((uint32) k[0] << 24);

                 /* case 0: nothing left to add */

         }

 #else                           /* !WORDS_BIGENDIAN */

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 24);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 8);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ((uint32) k[7] << 24);

                 /* fall through */

             case 7:

                 b += ((uint32) k[6] << 16);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 8);

                 /* fall through */

             case 5:

                 b += k[4];

                 /* fall through */

             case 4:

                 a += ((uint32) k[3] << 24);

                 /* fall through */

             case 3:

                 a += ((uint32) k[2] << 16);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 8);

                 /* fall through */

             case 1:

                 a += k[0];

                 /* case 0: nothing left to add */

         }

 #endif                          /* WORDS_BIGENDIAN */

     }


     final(a, b, c);


     /* report the result */

     return c;

 }


 /*

  * hash_bytes_extended() -- hash into a 64-bit value, using an optional seed

  *      k       : the key (the unaligned variable-length array of bytes)

  *      len     : the length of the key, counting by bytes

  *      seed    : a 64-bit seed (0 means no seed)

  *

  * Returns a uint64 value.  Otherwise similar to hash_bytes.

  */

 uint64

 hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)

 {

     uint32      a,

                 b,

                 c,

                 len;


     /* Set up the internal state */

     len = keylen;

     a = b = c = 0x9e3779b9 + len + 3923095;


     /* If the seed is non-zero, use it to perturb the internal state. */

     if (seed != 0)

     {

         /*

          * In essence, the seed is treated as part of the data being hashed,

          * but for simplicity, we pretend that it's padded with four bytes of

          * zeroes so that the seed constitutes a 12-byte chunk.

          */

         a += (uint32) (seed >> 32);

         b += (uint32) seed;

         mix(a, b, c);

     }


     /* If the source pointer is word-aligned, we use word-wide fetches */

     if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)

     {

         /* Code path for aligned source data */

         const uint32 *ka = (const uint32 *) k;


         /* handle most of the key */

         while (len >= 12)

         {

             a += ka[0];

             b += ka[1];

             c += ka[2];

             mix(a, b, c);

             ka += 3;

             len -= 12;

         }


         /* handle the last 11 bytes */

         k = (const unsigned char *) ka;

 #ifdef WORDS_BIGENDIAN

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 8);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 24);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ka[1];

                 a += ka[0];

                 break;

             case 7:

                 b += ((uint32) k[6] << 8);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 16);

                 /* fall through */

             case 5:

                 b += ((uint32) k[4] << 24);

                 /* fall through */

             case 4:

                 a += ka[0];

                 break;

             case 3:

                 a += ((uint32) k[2] << 8);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 16);

                 /* fall through */

             case 1:

                 a += ((uint32) k[0] << 24);

                 /* case 0: nothing left to add */

         }

 #else                           /* !WORDS_BIGENDIAN */

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 24);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 8);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ka[1];

                 a += ka[0];

                 break;

             case 7:

                 b += ((uint32) k[6] << 16);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 8);

                 /* fall through */

             case 5:

                 b += k[4];

                 /* fall through */

             case 4:

                 a += ka[0];

                 break;

             case 3:

                 a += ((uint32) k[2] << 16);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 8);

                 /* fall through */

             case 1:

                 a += k[0];

                 /* case 0: nothing left to add */

         }

 #endif                          /* WORDS_BIGENDIAN */

     }

     else

     {

         /* Code path for non-aligned source data */


         /* handle most of the key */

         while (len >= 12)

         {

 #ifdef WORDS_BIGENDIAN

             a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));

             b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));

             c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));

 #else                           /* !WORDS_BIGENDIAN */

             a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));

             b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));

             c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));

 #endif                          /* WORDS_BIGENDIAN */

             mix(a, b, c);

             k += 12;

             len -= 12;

         }


         /* handle the last 11 bytes */

 #ifdef WORDS_BIGENDIAN

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 8);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 24);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += k[7];

                 /* fall through */

             case 7:

                 b += ((uint32) k[6] << 8);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 16);

                 /* fall through */

             case 5:

                 b += ((uint32) k[4] << 24);

                 /* fall through */

             case 4:

                 a += k[3];

                 /* fall through */

             case 3:

                 a += ((uint32) k[2] << 8);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 16);

                 /* fall through */

             case 1:

                 a += ((uint32) k[0] << 24);

                 /* case 0: nothing left to add */

         }

 #else                           /* !WORDS_BIGENDIAN */

         switch (len)

         {

             case 11:

                 c += ((uint32) k[10] << 24);

                 /* fall through */

             case 10:

                 c += ((uint32) k[9] << 16);

                 /* fall through */

             case 9:

                 c += ((uint32) k[8] << 8);

                 /* fall through */

             case 8:

                 /* the lowest byte of c is reserved for the length */

                 b += ((uint32) k[7] << 24);

                 /* fall through */

             case 7:

                 b += ((uint32) k[6] << 16);

                 /* fall through */

             case 6:

                 b += ((uint32) k[5] << 8);

                 /* fall through */

             case 5:

                 b += k[4];

                 /* fall through */

             case 4:

                 a += ((uint32) k[3] << 24);

                 /* fall through */

             case 3:

                 a += ((uint32) k[2] << 16);

                 /* fall through */

             case 2:

                 a += ((uint32) k[1] << 8);

                 /* fall through */

             case 1:

                 a += k[0];

                 /* case 0: nothing left to add */

         }

 #endif                          /* WORDS_BIGENDIAN */

     }


     final(a, b, c);


     /* report the result */

     return ((uint64) b << 32) | c;

 }


 /*

  * hash_bytes_uint32() -- hash a 32-bit value to a 32-bit value

  *

  * This has the same result as

  *      hash_bytes(&k, sizeof(uint32))

  * but is faster and doesn't force the caller to store k into memory.

  */

 uint32

 hash_bytes_uint32(uint32 k)

 {

     uint32      a,

                 b,

                 c;


     a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;

     a += k;


     final(a, b, c);


     /* report the result */

     return c;

 }


 /*

  * hash_bytes_uint32_extended() -- hash 32-bit value to 64-bit value, with seed

  *

  * Like hash_bytes_uint32, this is a convenience function.

  */

 uint64

 hash_bytes_uint32_extended(uint32 k, uint64 seed)

 {

     uint32      a,

                 b,

                 c;


     a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;


     if (seed != 0)

     {

         a += (uint32) (seed >> 32);

         b += (uint32) seed;

         mix(a, b, c);

     }


     a += k;


     final(a, b, c);


     /* report the result */

     return ((uint64) b << 32) | c;

 }


 /*

  * string_hash: hash function for keys that are NUL-terminated strings.

  *

  * NOTE: this is the default hash function if none is specified.

  */

 uint32

 string_hash(const void *key, Size keysize)

 {

     /*

      * If the string exceeds keysize-1 bytes, we want to hash only that many,

      * because when it is copied into the hash table it will be truncated at

      * that length.

      */

     Size        s_len = strlen((const char *) key);


     s_len = Min(s_len, keysize - 1);

     return hash_bytes((const unsigned char *) key, (int) s_len);

 }


 /*

  * tag_hash: hash function for fixed-size tag values

  */

 uint32

 tag_hash(const void *key, Size keysize)

 {

     return hash_bytes((const unsigned char *) key, (int) keysize);

 }


 /*

  * uint32_hash: hash function for keys that are uint32 or int32

  *

  * (tag_hash works for this case too, but is slower)

  */

 uint32

 uint32_hash(const void *key, Size keysize)

 {

     Assert(keysize == sizeof(uint32));

     return hash_bytes_uint32(*((const uint32 *) key));

 }

uint32
unsigned int uint32
Definition: c.h:506

Min
#define Min(x, y)
Definition: c.h:1004

Assert
#define Assert(condition)
Definition: c.h:858

Size
size_t Size
Definition: c.h:605

hash_bytes_uint32
uint32 hash_bytes_uint32(uint32 k)
Definition: hashfn.c:610

hash_bytes_extended
uint64 hash_bytes_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.c:372

hash_bytes
uint32 hash_bytes(const unsigned char *k, int keylen)
Definition: hashfn.c:146

tag_hash
uint32 tag_hash(const void *key, Size keysize)
Definition: hashfn.c:677

hash_bytes_uint32_extended
uint64 hash_bytes_uint32_extended(uint32 k, uint64 seed)
Definition: hashfn.c:631

UINT32_ALIGN_MASK
#define UINT32_ALIGN_MASK
Definition: hashfn.c:46

uint32_hash
uint32 uint32_hash(const void *key, Size keysize)
Definition: hashfn.c:688

string_hash
uint32 string_hash(const void *key, Size keysize)
Definition: hashfn.c:660

mix
#define mix(a, b, c)
Definition: hashfn.c:82

hashfn.h

b
int b
Definition: isn.c:70

a
int a
Definition: isn.c:69

sort-test.key
key
Definition: sort-test.py:19

pg_bitutils.h

len
const void size_t len
Definition: pg_crc32c_sse42.c:24

postgres.h

c
char * c
Definition: preproc-cursor.c:31