PostgreSQL Source Code  git master
test_bloomfilter.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------------
2  *
3  * test_bloomfilter.c
4  * Test false positive rate of Bloom filter.
5  *
6  * Copyright (c) 2018-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/test/modules/test_bloomfilter/test_bloomfilter.c
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "common/pg_prng.h"
16 #include "fmgr.h"
17 #include "lib/bloomfilter.h"
18 #include "miscadmin.h"
19 
21 
22 /* Fits decimal representation of PG_INT64_MIN + 2 bytes: */
23 #define MAX_ELEMENT_BYTES 21
24 /* False positive rate WARNING threshold (1%): */
25 #define FPOSITIVE_THRESHOLD 0.01
26 
27 
28 /*
29  * Populate an empty Bloom filter with "nelements" dummy strings.
30  */
31 static void
32 populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
33 {
35  int64 i;
36 
37  for (i = 0; i < nelements; i++)
38  {
40 
41  snprintf(element, sizeof(element), "i" INT64_FORMAT, i);
42  bloom_add_element(filter, (unsigned char *) element, strlen(element));
43  }
44 }
45 
46 /*
47  * Returns number of strings that are indicated as probably appearing in Bloom
48  * filter that were in fact never added by populate_with_dummy_strings().
49  * These are false positives.
50  */
51 static int64
53 {
55  int64 nfalsepos = 0;
56  int64 i;
57 
58  for (i = 0; i < nelements; i++)
59  {
61 
62  snprintf(element, sizeof(element), "M" INT64_FORMAT, i);
63  if (!bloom_lacks_element(filter, (unsigned char *) element,
64  strlen(element)))
65  nfalsepos++;
66  }
67 
68  return nfalsepos;
69 }
70 
71 static void
72 create_and_test_bloom(int power, int64 nelements, int callerseed)
73 {
74  int bloom_work_mem;
75  uint64 seed;
76  int64 nfalsepos;
77  bloom_filter *filter;
78 
79  bloom_work_mem = (1L << power) / 8L / 1024L;
80 
81  elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);
82 
83  /*
84  * Generate random seed, or use caller's. Seed should always be a
85  * positive value less than or equal to PG_INT32_MAX, to ensure that any
86  * random seed can be recreated through callerseed if the need arises.
87  */
88  seed = callerseed < 0 ? pg_prng_int32p(&pg_global_prng_state) : callerseed;
89 
90  /* Create Bloom filter, populate it, and report on false positive rate */
91  filter = bloom_create(nelements, bloom_work_mem, seed);
92  populate_with_dummy_strings(filter, nelements);
93  nfalsepos = nfalsepos_for_missing_strings(filter, nelements);
94 
95  ereport((nfalsepos > nelements * FPOSITIVE_THRESHOLD) ? WARNING : DEBUG1,
96  (errmsg_internal("seed: " UINT64_FORMAT " false positives: " INT64_FORMAT " (%.6f%%) bitset %.2f%% set",
97  seed, nfalsepos, (double) nfalsepos / nelements,
98  100.0 * bloom_prop_bits_set(filter))));
99 
100  bloom_free(filter);
101 }
102 
104 
105 /*
106  * SQL-callable entry point to perform all tests.
107  *
108  * If a 1% false positive threshold is not met, emits WARNINGs.
109  *
110  * See README for details of arguments.
111  */
112 Datum
114 {
115  int power = PG_GETARG_INT32(0);
116  int64 nelements = PG_GETARG_INT64(1);
117  int seed = PG_GETARG_INT32(2);
118  int tests = PG_GETARG_INT32(3);
119  int i;
120 
121  if (power < 23 || power > 32)
122  elog(ERROR, "power argument must be between 23 and 32 inclusive");
123 
124  if (tests <= 0)
125  elog(ERROR, "invalid number of tests: %d", tests);
126 
127  if (nelements < 0)
128  elog(ERROR, "invalid number of elements: %d", tests);
129 
130  for (i = 0; i < tests; i++)
131  {
132  elog(DEBUG1, "beginning test #%d...", i + 1);
133 
134  create_and_test_bloom(power, nelements, seed);
135  }
136 
137  PG_RETURN_VOID();
138 }
void bloom_free(bloom_filter *filter)
Definition: bloomfilter.c:126
bloom_filter * bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
Definition: bloomfilter.c:87
double bloom_prop_bits_set(bloom_filter *filter)
Definition: bloomfilter.c:187
bool bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:157
void bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:135
#define INT64_FORMAT
Definition: c.h:548
#define UINT64_FORMAT
Definition: c.h:549
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define WARNING
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int i
Definition: isn.c:73
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
int32 pg_prng_int32p(pg_prng_state *state)
Definition: pg_prng.c:254
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:64
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
static void populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
static int64 nfalsepos_for_missing_strings(bloom_filter *filter, int64 nelements)
PG_FUNCTION_INFO_V1(test_bloomfilter)
Datum test_bloomfilter(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
static void create_and_test_bloom(int power, int64 nelements, int callerseed)
#define MAX_ELEMENT_BYTES
#define FPOSITIVE_THRESHOLD