PostgreSQL Source Code  git master
test_bloomfilter.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------------
2  *
3  * test_bloomfilter.c
4  * Test false positive rate of Bloom filter.
5  *
6  * Copyright (c) 2018-2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/test/modules/test_bloomfilter/test_bloomfilter.c
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "fmgr.h"
16 #include "lib/bloomfilter.h"
17 #include "miscadmin.h"
18 
20 
21 /* Fits decimal representation of PG_INT64_MIN + 2 bytes: */
22 #define MAX_ELEMENT_BYTES 21
23 /* False positive rate WARNING threshold (1%): */
24 #define FPOSITIVE_THRESHOLD 0.01
25 
26 
27 /*
28  * Populate an empty Bloom filter with "nelements" dummy strings.
29  */
30 static void
31 populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
32 {
34  int64 i;
35 
36  for (i = 0; i < nelements; i++)
37  {
39 
40  snprintf(element, sizeof(element), "i" INT64_FORMAT, i);
41  bloom_add_element(filter, (unsigned char *) element, strlen(element));
42  }
43 }
44 
45 /*
46  * Returns number of strings that are indicated as probably appearing in Bloom
47  * filter that were in fact never added by populate_with_dummy_strings().
48  * These are false positives.
49  */
50 static int64
52 {
54  int64 nfalsepos = 0;
55  int64 i;
56 
57  for (i = 0; i < nelements; i++)
58  {
60 
61  snprintf(element, sizeof(element), "M" INT64_FORMAT, i);
62  if (!bloom_lacks_element(filter, (unsigned char *) element,
63  strlen(element)))
64  nfalsepos++;
65  }
66 
67  return nfalsepos;
68 }
69 
70 static void
71 create_and_test_bloom(int power, int64 nelements, int callerseed)
72 {
73  int bloom_work_mem;
74  uint64 seed;
75  int64 nfalsepos;
76  bloom_filter *filter;
77 
78  bloom_work_mem = (1L << power) / 8L / 1024L;
79 
80  elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);
81 
82  /*
83  * Generate random seed, or use caller's. Seed should always be a
84  * positive value less than or equal to PG_INT32_MAX, to ensure that any
85  * random seed can be recreated through callerseed if the need arises.
86  * (Don't assume that RAND_MAX cannot exceed PG_INT32_MAX.)
87  */
88  seed = callerseed < 0 ? random() % PG_INT32_MAX : callerseed;
89 
90  /* Create Bloom filter, populate it, and report on false positive rate */
91  filter = bloom_create(nelements, bloom_work_mem, seed);
92  populate_with_dummy_strings(filter, nelements);
93  nfalsepos = nfalsepos_for_missing_strings(filter, nelements);
94 
95  ereport((nfalsepos > nelements * FPOSITIVE_THRESHOLD) ? WARNING : DEBUG1,
96  (errmsg_internal("seed: " UINT64_FORMAT " false positives: " INT64_FORMAT " (%.6f%%) bitset %.2f%% set",
97  seed, nfalsepos, (double) nfalsepos / nelements,
98  100.0 * bloom_prop_bits_set(filter))));
99 
100  bloom_free(filter);
101 }
102 
104 
105 /*
106  * SQL-callable entry point to perform all tests.
107  *
108  * If a 1% false positive threshold is not met, emits WARNINGs.
109  *
110  * See README for details of arguments.
111  */
112 Datum
114 {
115  int power = PG_GETARG_INT32(0);
116  int64 nelements = PG_GETARG_INT64(1);
117  int seed = PG_GETARG_INT32(2);
118  int tests = PG_GETARG_INT32(3);
119  int i;
120 
121  if (power < 23 || power > 32)
122  elog(ERROR, "power argument must be between 23 and 32 inclusive");
123 
124  if (tests <= 0)
125  elog(ERROR, "invalid number of tests: %d", tests);
126 
127  if (nelements < 0)
128  elog(ERROR, "invalid number of elements: %d", tests);
129 
130  for (i = 0; i < tests; i++)
131  {
132  elog(DEBUG1, "beginning test #%d...", i + 1);
133 
134  create_and_test_bloom(power, nelements, seed);
135  }
136 
137  PG_RETURN_VOID();
138 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:264
bloom_filter * bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
Definition: bloomfilter.c:88
static int64 nfalsepos_for_missing_strings(bloom_filter *filter, int64 nelements)
#define DEBUG1
Definition: elog.h:25
void bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:136
long random(void)
Definition: random.c:22
static void create_and_test_bloom(int power, int64 nelements, int callerseed)
#define FPOSITIVE_THRESHOLD
#define ERROR
Definition: elog.h:43
#define ereport(elevel, rest)
Definition: elog.h:141
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:380
void bloom_free(bloom_filter *filter)
Definition: bloomfilter.c:127
#define WARNING
Definition: elog.h:40
uintptr_t Datum
Definition: postgres.h:367
#define PG_RETURN_VOID()
Definition: fmgr.h:339
int errmsg_internal(const char *fmt,...)
Definition: elog.c:814
#define INT64_FORMAT
Definition: c.h:400
double bloom_prop_bits_set(bloom_filter *filter)
Definition: bloomfilter.c:188
static void populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
#define PG_INT32_MAX
Definition: c.h:441
#define elog(elevel,...)
Definition: elog.h:226
Datum test_bloomfilter(PG_FUNCTION_ARGS)
int i
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:99
PG_MODULE_MAGIC
#define PG_GETARG_INT64(n)
Definition: fmgr.h:277
#define snprintf
Definition: port.h:192
#define UINT64_FORMAT
Definition: c.h:401
bool bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:158
PG_FUNCTION_INFO_V1(test_bloomfilter)
#define MAX_ELEMENT_BYTES