PostgreSQL Source Code git master
test_bloomfilter.c
Go to the documentation of this file.
1/*--------------------------------------------------------------------------
2 *
3 * test_bloomfilter.c
4 * Test false positive rate of Bloom filter.
5 *
6 * Copyright (c) 2018-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/test/modules/test_bloomfilter/test_bloomfilter.c
10 *
11 * -------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include "common/pg_prng.h"
16#include "fmgr.h"
17#include "lib/bloomfilter.h"
18#include "miscadmin.h"
19
21
22/* Fits decimal representation of PG_INT64_MIN + 2 bytes: */
23#define MAX_ELEMENT_BYTES 21
24/* False positive rate WARNING threshold (1%): */
25#define FPOSITIVE_THRESHOLD 0.01
26
27
28/*
29 * Populate an empty Bloom filter with "nelements" dummy strings.
30 */
31static void
33{
35 int64 i;
36
37 for (i = 0; i < nelements; i++)
38 {
40
41 snprintf(element, sizeof(element), "i" INT64_FORMAT, i);
42 bloom_add_element(filter, (unsigned char *) element, strlen(element));
43 }
44}
45
46/*
47 * Returns number of strings that are indicated as probably appearing in Bloom
48 * filter that were in fact never added by populate_with_dummy_strings().
49 * These are false positives.
50 */
51static int64
53{
55 int64 nfalsepos = 0;
56 int64 i;
57
58 for (i = 0; i < nelements; i++)
59 {
61
62 snprintf(element, sizeof(element), "M" INT64_FORMAT, i);
63 if (!bloom_lacks_element(filter, (unsigned char *) element,
64 strlen(element)))
65 nfalsepos++;
66 }
67
68 return nfalsepos;
69}
70
71static void
72create_and_test_bloom(int power, int64 nelements, int callerseed)
73{
74 int bloom_work_mem;
75 uint64 seed;
76 int64 nfalsepos;
77 bloom_filter *filter;
78
79 bloom_work_mem = (1L << power) / 8L / 1024L;
80
81 elog(DEBUG1, "bloom_work_mem (KB): %d", bloom_work_mem);
82
83 /*
84 * Generate random seed, or use caller's. Seed should always be a
85 * positive value less than or equal to PG_INT32_MAX, to ensure that any
86 * random seed can be recreated through callerseed if the need arises.
87 */
88 seed = callerseed < 0 ? pg_prng_int32p(&pg_global_prng_state) : callerseed;
89
90 /* Create Bloom filter, populate it, and report on false positive rate */
91 filter = bloom_create(nelements, bloom_work_mem, seed);
92 populate_with_dummy_strings(filter, nelements);
93 nfalsepos = nfalsepos_for_missing_strings(filter, nelements);
94
95 ereport((nfalsepos > nelements * FPOSITIVE_THRESHOLD) ? WARNING : DEBUG1,
96 (errmsg_internal("seed: " UINT64_FORMAT " false positives: " INT64_FORMAT " (%.6f%%) bitset %.2f%% set",
97 seed, nfalsepos, (double) nfalsepos / nelements,
98 100.0 * bloom_prop_bits_set(filter))));
99
100 bloom_free(filter);
101}
102
104
105/*
106 * SQL-callable entry point to perform all tests.
107 *
108 * If a 1% false positive threshold is not met, emits WARNINGs.
109 *
110 * See README for details of arguments.
111 */
112Datum
114{
115 int power = PG_GETARG_INT32(0);
116 int64 nelements = PG_GETARG_INT64(1);
117 int seed = PG_GETARG_INT32(2);
118 int tests = PG_GETARG_INT32(3);
119 int i;
120
121 if (power < 23 || power > 32)
122 elog(ERROR, "power argument must be between 23 and 32 inclusive");
123
124 if (tests <= 0)
125 elog(ERROR, "invalid number of tests: %d", tests);
126
127 if (nelements < 0)
128 elog(ERROR, "invalid number of elements: %d", tests);
129
130 for (i = 0; i < tests; i++)
131 {
132 elog(DEBUG1, "beginning test #%d...", i + 1);
133
134 create_and_test_bloom(power, nelements, seed);
135 }
136
138}
void bloom_free(bloom_filter *filter)
Definition: bloomfilter.c:126
bloom_filter * bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed)
Definition: bloomfilter.c:87
double bloom_prop_bits_set(bloom_filter *filter)
Definition: bloomfilter.c:187
bool bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:157
void bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len)
Definition: bloomfilter.c:135
#define INT64_FORMAT
Definition: c.h:506
int64_t int64
Definition: c.h:485
#define UINT64_FORMAT
Definition: c.h:507
uint64_t uint64
Definition: c.h:489
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
#define WARNING
Definition: elog.h:36
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int i
Definition: isn.c:72
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
int32 pg_prng_int32p(pg_prng_state *state)
Definition: pg_prng.c:254
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define snprintf
Definition: port.h:238
uintptr_t Datum
Definition: postgres.h:69
static chr element(struct vars *v, const chr *startp, const chr *endp)
Definition: regc_locale.c:376
static void populate_with_dummy_strings(bloom_filter *filter, int64 nelements)
static int64 nfalsepos_for_missing_strings(bloom_filter *filter, int64 nelements)
PG_FUNCTION_INFO_V1(test_bloomfilter)
Datum test_bloomfilter(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
static void create_and_test_bloom(int power, int64 nelements, int callerseed)
#define MAX_ELEMENT_BYTES
#define FPOSITIVE_THRESHOLD