PostgreSQL Source Code  git master
bloomfilter.c File Reference
#include "postgres.h"
#include <math.h>
#include "lib/bloomfilter.h"
#include "port/pg_bitutils.h"
#include "utils/hashutils.h"
Include dependency graph for bloomfilter.c:

Go to the source code of this file.

Data Structures

struct  bloom_filter
 

Macros

#define MAX_HASH_FUNCS   10
 

Functions

static int my_bloom_power (uint64 target_bitset_bits)
 
static int optimal_k (uint64 bitset_bits, int64 total_elems)
 
static void k_hashes (bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
 
static uint32 mod_m (uint32 a, uint64 m)
 
bloom_filterbloom_create (int64 total_elems, int bloom_work_mem, uint64 seed)
 
void bloom_free (bloom_filter *filter)
 
void bloom_add_element (bloom_filter *filter, unsigned char *elem, size_t len)
 
bool bloom_lacks_element (bloom_filter *filter, unsigned char *elem, size_t len)
 
double bloom_prop_bits_set (bloom_filter *filter)
 

Macro Definition Documentation

◆ MAX_HASH_FUNCS

#define MAX_HASH_FUNCS   10

Definition at line 42 of file bloomfilter.c.

Referenced by bloom_add_element(), bloom_lacks_element(), and optimal_k().

Function Documentation

◆ bloom_add_element()

void bloom_add_element ( bloom_filter filter,
unsigned char *  elem,
size_t  len 
)

Definition at line 136 of file bloomfilter.c.

References bloom_filter::bitset, i, bloom_filter::k_hash_funcs, k_hashes(), and MAX_HASH_FUNCS.

Referenced by bt_target_page_check(), and populate_with_dummy_strings().

137 {
138  uint32 hashes[MAX_HASH_FUNCS];
139  int i;
140 
141  k_hashes(filter, hashes, elem, len);
142 
143  /* Map a bit-wise address to a byte-wise address + bit offset */
144  for (i = 0; i < filter->k_hash_funcs; i++)
145  {
146  filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7);
147  }
148 }
int k_hash_funcs
Definition: bloomfilter.c:47
unsigned int uint32
Definition: c.h:358
static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
Definition: bloomfilter.c:251
#define MAX_HASH_FUNCS
Definition: bloomfilter.c:42
unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]
Definition: bloomfilter.c:51
int i

◆ bloom_create()

bloom_filter* bloom_create ( int64  total_elems,
int  bloom_work_mem,
uint64  seed 
)

Definition at line 88 of file bloomfilter.c.

References BITS_PER_BYTE, bloom_filter::bitset, bloom_filter::k_hash_funcs, bloom_filter::m, Max, Min, my_bloom_power(), offsetof, optimal_k(), palloc0(), and bloom_filter::seed.

Referenced by bt_check_every_level(), and create_and_test_bloom().

89 {
90  bloom_filter *filter;
91  int bloom_power;
92  uint64 bitset_bytes;
93  uint64 bitset_bits;
94 
95  /*
96  * Aim for two bytes per element; this is sufficient to get a false
97  * positive rate below 1%, independent of the size of the bitset or total
98  * number of elements. Also, if rounding down the size of the bitset to
99  * the next lowest power of two turns out to be a significant drop, the
100  * false positive rate still won't exceed 2% in almost all cases.
101  */
102  bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2);
103  bitset_bytes = Max(1024 * 1024, bitset_bytes);
104 
105  /*
106  * Size in bits should be the highest power of two <= target. bitset_bits
107  * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32
108  */
109  bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE);
110  bitset_bits = UINT64CONST(1) << bloom_power;
111  bitset_bytes = bitset_bits / BITS_PER_BYTE;
112 
113  /* Allocate bloom filter with unset bitset */
114  filter = palloc0(offsetof(bloom_filter, bitset) +
115  sizeof(unsigned char) * bitset_bytes);
116  filter->k_hash_funcs = optimal_k(bitset_bits, total_elems);
117  filter->seed = seed;
118  filter->m = bitset_bits;
119 
120  return filter;
121 }
#define BITS_PER_BYTE
#define Min(x, y)
Definition: c.h:904
uint64 seed
Definition: bloomfilter.c:48
static int optimal_k(uint64 bitset_bits, int64 total_elems)
Definition: bloomfilter.c:230
int k_hash_funcs
Definition: bloomfilter.c:47
void * palloc0(Size size)
Definition: mcxt.c:980
#define Max(x, y)
Definition: c.h:898
static int my_bloom_power(uint64 target_bitset_bits)
Definition: bloomfilter.c:211
#define offsetof(type, field)
Definition: c.h:655

◆ bloom_free()

void bloom_free ( bloom_filter filter)

Definition at line 127 of file bloomfilter.c.

References pfree().

Referenced by bt_check_every_level(), and create_and_test_bloom().

128 {
129  pfree(filter);
130 }
void pfree(void *pointer)
Definition: mcxt.c:1056

◆ bloom_lacks_element()

bool bloom_lacks_element ( bloom_filter filter,
unsigned char *  elem,
size_t  len 
)

Definition at line 158 of file bloomfilter.c.

References bloom_filter::bitset, i, bloom_filter::k_hash_funcs, k_hashes(), and MAX_HASH_FUNCS.

Referenced by bt_downlink_missing_check(), bt_tuple_present_callback(), and nfalsepos_for_missing_strings().

159 {
160  uint32 hashes[MAX_HASH_FUNCS];
161  int i;
162 
163  k_hashes(filter, hashes, elem, len);
164 
165  /* Map a bit-wise address to a byte-wise address + bit offset */
166  for (i = 0; i < filter->k_hash_funcs; i++)
167  {
168  if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7))))
169  return true;
170  }
171 
172  return false;
173 }
int k_hash_funcs
Definition: bloomfilter.c:47
unsigned int uint32
Definition: c.h:358
static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len)
Definition: bloomfilter.c:251
#define MAX_HASH_FUNCS
Definition: bloomfilter.c:42
unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]
Definition: bloomfilter.c:51
int i

◆ bloom_prop_bits_set()

double bloom_prop_bits_set ( bloom_filter filter)

Definition at line 188 of file bloomfilter.c.

References BITS_PER_BYTE, bloom_filter::bitset, bloom_filter::m, and pg_popcount().

Referenced by bt_check_every_level(), and create_and_test_bloom().

189 {
190  int bitset_bytes = filter->m / BITS_PER_BYTE;
191  uint64 bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
192 
193  return bits_set / (double) filter->m;
194 }
#define BITS_PER_BYTE
uint64 pg_popcount(const char *buf, int bytes)
Definition: pg_bitutils.c:282
unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]
Definition: bloomfilter.c:51

◆ k_hashes()

static void k_hashes ( bloom_filter filter,
uint32 hashes,
unsigned char *  elem,
size_t  len 
)
static

Definition at line 251 of file bloomfilter.c.

References DatumGetUInt64, hash(), hash_any_extended(), i, bloom_filter::k_hash_funcs, bloom_filter::m, mod_m(), and bloom_filter::seed.

Referenced by bloom_add_element(), and bloom_lacks_element().

252 {
253  uint64 hash;
254  uint32 x,
255  y;
256  uint64 m;
257  int i;
258 
259  /* Use 64-bit hashing to get two independent 32-bit hashes */
260  hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed));
261  x = (uint32) hash;
262  y = (uint32) (hash >> 32);
263  m = filter->m;
264 
265  x = mod_m(x, m);
266  y = mod_m(y, m);
267 
268  /* Accumulate hashes */
269  hashes[0] = x;
270  for (i = 1; i < filter->k_hash_funcs; i++)
271  {
272  x = mod_m(x + y, m);
273  y = mod_m(y + i, m);
274 
275  hashes[i] = x;
276  }
277 }
Datum hash_any_extended(const unsigned char *k, int keylen, uint64 seed)
Definition: hashfn.c:374
static uint32 mod_m(uint32 a, uint64 m)
Definition: bloomfilter.c:289
uint64 seed
Definition: bloomfilter.c:48
int k_hash_funcs
Definition: bloomfilter.c:47
unsigned int uint32
Definition: c.h:358
#define DatumGetUInt64(X)
Definition: postgres.h:634
int i
static unsigned hash(unsigned *uv, int n)
Definition: rege_dfa.c:541

◆ mod_m()

static uint32 mod_m ( uint32  a,
uint64  m 
)
inlinestatic

Definition at line 289 of file bloomfilter.c.

References Assert, and PG_UINT32_MAX.

Referenced by k_hashes().

290 {
291  Assert(m <= PG_UINT32_MAX + UINT64CONST(1));
292  Assert(((m - 1) & m) == 0);
293 
294  return val & (m - 1);
295 }
#define PG_UINT32_MAX
Definition: c.h:442
#define Assert(condition)
Definition: c.h:732
long val
Definition: informix.c:684

◆ my_bloom_power()

static int my_bloom_power ( uint64  target_bitset_bits)
static

Definition at line 211 of file bloomfilter.c.

Referenced by bloom_create().

212 {
213  int bloom_power = -1;
214 
215  while (target_bitset_bits > 0 && bloom_power < 32)
216  {
217  bloom_power++;
218  target_bitset_bits >>= 1;
219  }
220 
221  return bloom_power;
222 }

◆ optimal_k()

static int optimal_k ( uint64  bitset_bits,
int64  total_elems 
)
static

Definition at line 230 of file bloomfilter.c.

References Max, MAX_HASH_FUNCS, Min, and rint().

Referenced by bloom_create().

231 {
232  int k = rint(log(2.0) * bitset_bits / total_elems);
233 
234  return Max(1, Min(k, MAX_HASH_FUNCS));
235 }
#define Min(x, y)
Definition: c.h:904
double rint(double x)
Definition: rint.c:21
#define Max(x, y)
Definition: c.h:898
#define MAX_HASH_FUNCS
Definition: bloomfilter.c:42