PostgreSQL Source Code git master
test_integerset.c
Go to the documentation of this file.
1/*--------------------------------------------------------------------------
2 *
3 * test_integerset.c
4 * Test integer set data structure.
5 *
6 * Copyright (c) 2019-2025, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/test/modules/test_integerset/test_integerset.c
10 *
11 * -------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include "common/pg_prng.h"
16#include "fmgr.h"
17#include "lib/integerset.h"
18#include "utils/memutils.h"
19#include "utils/timestamp.h"
20
21/*
22 * If you enable this, the "pattern" tests will print information about
23 * how long populating, probing, and iterating the test set takes, and
24 * how much memory the test set consumed. That can be used as
25 * micro-benchmark of various operations and input patterns (you might
26 * want to increase the number of values used in each of the test, if
27 * you do that, to reduce noise).
28 *
29 * The information is printed to the server's stderr, mostly because
30 * that's where MemoryContextStats() output goes.
31 */
32static const bool intset_test_stats = false;
33
35
37
38/*
39 * A struct to define a pattern of integers, for use with the test_pattern()
40 * function.
41 */
42typedef struct
43{
44 char *test_name; /* short name of the test, for humans */
45 char *pattern_str; /* a bit pattern */
46 uint64 spacing; /* pattern repeats at this interval */
47 uint64 num_values; /* number of integers to set in total */
48} test_spec;
49
50static const test_spec test_specs[] = {
51 {
52 "all ones", "1111111111",
53 10, 10000000
54 },
55 {
56 "alternating bits", "0101010101",
57 10, 10000000
58 },
59 {
60 "clusters of ten", "1111111111",
61 10000, 10000000
62 },
63 {
64 "clusters of hundred",
65 "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
66 10000, 100000000
67 },
68 {
69 "one-every-64k", "1",
70 65536, 10000000
71 },
72 {
73 "sparse", "100000000000000000000000000000001",
74 10000000, 10000000
75 },
76 {
77 "single values, distance > 2^32", "1",
78 UINT64CONST(10000000000), 1000000
79 },
80 {
81 "clusters, distance > 2^32", "10101010",
82 UINT64CONST(10000000000), 10000000
83 },
84 {
85 "clusters, distance > 2^60", "10101010",
86 UINT64CONST(2000000000000000000),
87 23 /* can't be much higher than this, or we
88 * overflow uint64 */
89 }
90};
91
92static void test_pattern(const test_spec *spec);
93static void test_empty(void);
94static void test_single_value(uint64 value);
95static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
96static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
97static void test_huge_distances(void);
98
99/*
100 * SQL-callable entry point to perform all tests.
101 */
102Datum
104{
105 /* Tests for various corner cases */
106 test_empty();
112 test_single_value_and_filler(0, 1000, 2000);
113 test_single_value_and_filler(1, 1000, 2000);
114 test_single_value_and_filler(1, 1000, 2000000);
117
118 /* Test different test patterns, with lots of entries */
119 for (int i = 0; i < lengthof(test_specs); i++)
120 {
122 }
123
125}
126
127/*
128 * Test with a repeating pattern, defined by the 'spec'.
129 */
130static void
132{
134 MemoryContext intset_ctx;
135 MemoryContext old_ctx;
136 TimestampTz starttime;
137 TimestampTz endtime;
138 uint64 n;
139 uint64 last_int;
140 int patternlen;
141 uint64 *pattern_values;
142 uint64 pattern_num_values;
143
144 elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
146 fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
147
148 /* Pre-process the pattern, creating an array of integers from it. */
149 patternlen = strlen(spec->pattern_str);
150 pattern_values = palloc(patternlen * sizeof(uint64));
151 pattern_num_values = 0;
152 for (int i = 0; i < patternlen; i++)
153 {
154 if (spec->pattern_str[i] == '1')
155 pattern_values[pattern_num_values++] = i;
156 }
157
158 /*
159 * Allocate the integer set.
160 *
161 * Allocate it in a separate memory context, so that we can print its
162 * memory usage easily. (intset_create() creates a memory context of its
163 * own, too, but we don't have direct access to it, so we cannot call
164 * MemoryContextStats() on it directly).
165 */
167 "intset test",
169 MemoryContextSetIdentifier(intset_ctx, spec->test_name);
170 old_ctx = MemoryContextSwitchTo(intset_ctx);
172 MemoryContextSwitchTo(old_ctx);
173
174 /*
175 * Add values to the set.
176 */
177 starttime = GetCurrentTimestamp();
178
179 n = 0;
180 last_int = 0;
181 while (n < spec->num_values)
182 {
183 uint64 x = 0;
184
185 for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
186 {
187 x = last_int + pattern_values[i];
188
190 n++;
191 }
192 last_int += spec->spacing;
193 }
194
195 endtime = GetCurrentTimestamp();
196
198 fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
199 spec->num_values, (int) (endtime - starttime) / 1000);
200
201 /*
202 * Print stats on the amount of memory used.
203 *
204 * We print the usage reported by intset_memory_usage(), as well as the
205 * stats from the memory context. They should be in the same ballpark,
206 * but it's hard to automate testing that, so if you're making changes to
207 * the implementation, just observe that manually.
208 */
210 {
211 uint64 mem_usage;
212
213 /*
214 * Also print memory usage as reported by intset_memory_usage(). It
215 * should be in the same ballpark as the usage reported by
216 * MemoryContextStats().
217 */
218 mem_usage = intset_memory_usage(intset);
219 fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
220 mem_usage, (double) mem_usage / spec->num_values);
221
222 MemoryContextStats(intset_ctx);
223 }
224
225 /* Check that intset_get_num_entries works */
227 if (n != spec->num_values)
228 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
229
230 /*
231 * Test random-access probes with intset_is_member()
232 */
233 starttime = GetCurrentTimestamp();
234
235 for (n = 0; n < 100000; n++)
236 {
237 bool b;
238 bool expected;
239 uint64 x;
240
241 /*
242 * Pick next value to probe at random. We limit the probes to the
243 * last integer that we added to the set, plus an arbitrary constant
244 * (1000). There's no point in probing the whole 0 - 2^64 range, if
245 * only a small part of the integer space is used. We would very
246 * rarely hit values that are actually in the set.
247 */
248 x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
249
250 /* Do we expect this value to be present in the set? */
251 if (x >= last_int)
252 expected = false;
253 else
254 {
255 uint64 idx = x % spec->spacing;
256
257 if (idx >= patternlen)
258 expected = false;
259 else if (spec->pattern_str[idx] == '1')
260 expected = true;
261 else
262 expected = false;
263 }
264
265 /* Is it present according to intset_is_member() ? */
267
268 if (b != expected)
269 elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
270 }
271 endtime = GetCurrentTimestamp();
273 fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
274 n, (int) (endtime - starttime) / 1000);
275
276 /*
277 * Test iterator
278 */
279 starttime = GetCurrentTimestamp();
280
282 n = 0;
283 last_int = 0;
284 while (n < spec->num_values)
285 {
286 for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
287 {
288 uint64 expected = last_int + pattern_values[i];
289 uint64 x;
290
292 break;
293
294 if (x != expected)
295 elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
296 n++;
297 }
298 last_int += spec->spacing;
299 }
300 endtime = GetCurrentTimestamp();
302 fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
303 n, (int) (endtime - starttime) / 1000);
304
305 if (n < spec->num_values)
306 elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
307 if (n > spec->num_values)
308 elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
309
310 MemoryContextDelete(intset_ctx);
311}
312
313/*
314 * Test with a set containing a single integer.
315 */
316static void
318{
320 uint64 x;
321 uint64 num_entries;
322 bool found;
323
324 elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
325
326 /* Create the set. */
329
330 /* Test intset_get_num_entries() */
331 num_entries = intset_num_entries(intset);
332 if (num_entries != 1)
333 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
334
335 /*
336 * Test intset_is_member() at various special values, like 0 and maximum
337 * possible 64-bit integer, as well as the value itself.
338 */
339 if (intset_is_member(intset, 0) != (value == 0))
340 elog(ERROR, "intset_is_member failed for 0");
341 if (intset_is_member(intset, 1) != (value == 1))
342 elog(ERROR, "intset_is_member failed for 1");
344 elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
345 if (intset_is_member(intset, value) != true)
346 elog(ERROR, "intset_is_member failed for the tested value");
347
348 /*
349 * Test iterator
350 */
352 found = intset_iterate_next(intset, &x);
353 if (!found || x != value)
354 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
355
356 found = intset_iterate_next(intset, &x);
357 if (found)
358 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
359}
360
361/*
362 * Test with an integer set that contains:
363 *
364 * - a given single 'value', and
365 * - all integers between 'filler_min' and 'filler_max'.
366 *
367 * This exercises different codepaths than testing just with a single value,
368 * because the implementation buffers newly-added values. If we add just a
369 * single value to the set, we won't test the internal B-tree code at all,
370 * just the code that deals with the buffer.
371 */
372static void
374{
376 uint64 x;
377 bool found;
378 uint64 *iter_expected;
379 uint64 n = 0;
380 uint64 num_entries = 0;
381 uint64 mem_usage;
382
383 elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
384 value, filler_min, filler_max);
385
387
388 iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
389 if (value < filler_min)
390 {
392 iter_expected[n++] = value;
393 }
394
395 for (x = filler_min; x < filler_max; x++)
396 {
398 iter_expected[n++] = x;
399 }
400
401 if (value >= filler_max)
402 {
404 iter_expected[n++] = value;
405 }
406
407 /* Test intset_get_num_entries() */
408 num_entries = intset_num_entries(intset);
409 if (num_entries != n)
410 elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
411
412 /*
413 * Test intset_is_member() at various spots, at and around the values that
414 * we expect to be set, as well as 0 and the maximum possible value.
415 */
417 value, filler_min, filler_max);
419 value, filler_min, filler_max);
420 check_with_filler(intset, filler_min - 1,
421 value, filler_min, filler_max);
422 check_with_filler(intset, filler_min,
423 value, filler_min, filler_max);
424 check_with_filler(intset, filler_min + 1,
425 value, filler_min, filler_max);
427 value, filler_min, filler_max);
429 value, filler_min, filler_max);
431 value, filler_min, filler_max);
432 check_with_filler(intset, filler_max - 1,
433 value, filler_min, filler_max);
434 check_with_filler(intset, filler_max,
435 value, filler_min, filler_max);
436 check_with_filler(intset, filler_max + 1,
437 value, filler_min, filler_max);
439 value, filler_min, filler_max);
441 value, filler_min, filler_max);
442
444 for (uint64 i = 0; i < n; i++)
445 {
446 found = intset_iterate_next(intset, &x);
447 if (!found || x != iter_expected[i])
448 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
449 }
450 found = intset_iterate_next(intset, &x);
451 if (found)
452 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
453
454 mem_usage = intset_memory_usage(intset);
455 if (mem_usage < 5000 || mem_usage > 500000000)
456 elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
457}
458
459/*
460 * Helper function for test_single_value_and_filler.
461 *
462 * Calls intset_is_member() for value 'x', and checks that the result is what
463 * we expect.
464 */
465static void
467 uint64 value, uint64 filler_min, uint64 filler_max)
468{
469 bool expected;
470 bool actual;
471
472 expected = (x == value || (filler_min <= x && x < filler_max));
473
474 actual = intset_is_member(intset, x);
475
476 if (actual != expected)
477 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
478}
479
480/*
481 * Test empty set
482 */
483static void
485{
487 uint64 x;
488
489 elog(NOTICE, "testing intset with empty set");
490
492
493 /* Test intset_is_member() */
494 if (intset_is_member(intset, 0) != false)
495 elog(ERROR, "intset_is_member on empty set returned true");
496 if (intset_is_member(intset, 1) != false)
497 elog(ERROR, "intset_is_member on empty set returned true");
499 elog(ERROR, "intset_is_member on empty set returned true");
500
501 /* Test iterator */
504 elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
505}
506
507/*
508 * Test with integers that are more than 2^60 apart.
509 *
510 * The Simple-8b encoding used by the set implementation can only encode
511 * values up to 2^60. That makes large differences like this interesting
512 * to test.
513 */
514static void
516{
518 uint64 values[1000];
519 int num_values = 0;
520 uint64 val = 0;
521 bool found;
522 uint64 x;
523
524 elog(NOTICE, "testing intset with distances > 2^60 between values");
525
526 val = 0;
527 values[num_values++] = val;
528
529 /* Test differences on both sides of the 2^60 boundary. */
530 val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
531 values[num_values++] = val;
532
533 val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
534 values[num_values++] = val;
535
536 val += UINT64CONST(1152921504606846976); /* 2^60 */
537 values[num_values++] = val;
538
539 val += UINT64CONST(1152921504606846976); /* 2^60 */
540 values[num_values++] = val;
541
542 val += UINT64CONST(1152921504606846976); /* 2^60 */
543 values[num_values++] = val;
544
545 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
546 values[num_values++] = val;
547
548 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
549 values[num_values++] = val;
550
551 val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
552 values[num_values++] = val;
553
554 val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
555 values[num_values++] = val;
556
557 val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
558 values[num_values++] = val;
559
560 val += UINT64CONST(1152921504606846976); /* 2^60 */
561 values[num_values++] = val;
562
563 /*
564 * We're now very close to 2^64, so can't add large values anymore. But
565 * add more smaller values to the end, to make sure that all the above
566 * values get flushed and packed into the tree structure.
567 */
568 while (num_values < 1000)
569 {
571 values[num_values++] = val;
572 }
573
574 /* Create an IntegerSet using these values */
576 for (int i = 0; i < num_values; i++)
578
579 /*
580 * Test intset_is_member() around each of these values
581 */
582 for (int i = 0; i < num_values; i++)
583 {
584 uint64 y = values[i];
585 bool expected;
586 bool result;
587
588 if (y > 0)
589 {
590 expected = (values[i - 1] == y - 1);
591 result = intset_is_member(intset, y - 1);
592 if (result != expected)
593 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y - 1);
594 }
595
596 result = intset_is_member(intset, y);
597 if (result != true)
598 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y);
599
600 expected = (i != num_values - 1) ? (values[i + 1] == y + 1) : false;
601 result = intset_is_member(intset, y + 1);
602 if (result != expected)
603 elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y + 1);
604 }
605
606 /*
607 * Test iterator
608 */
610 for (int i = 0; i < num_values; i++)
611 {
612 found = intset_iterate_next(intset, &x);
613 if (!found || x != values[i])
614 elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
615 }
616 found = intset_iterate_next(intset, &x);
617 if (found)
618 elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
619}
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define UINT64_FORMAT
Definition: c.h:507
uint64_t uint64
Definition: c.h:489
#define lengthof(array)
Definition: c.h:745
#define PG_UINT64_MAX
Definition: c.h:550
#define UINT64CONST(x)
Definition: c.h:503
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define NOTICE
Definition: elog.h:35
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
static struct @162 value
long val
Definition: informix.c:689
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:358
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:349
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:553
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:623
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:642
IntegerSet * intset_create(void)
Definition: integerset.c:283
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:369
int y
Definition: isn.c:71
int b
Definition: isn.c:69
int x
Definition: isn.c:70
int i
Definition: isn.c:72
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextStats(MemoryContext context)
Definition: mcxt.c:814
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:612
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax)
Definition: pg_prng.c:144
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
uintptr_t Datum
Definition: postgres.h:69
uint64 num_values
uint64 spacing
char * pattern_str
char * test_name
static void test_pattern(const test_spec *spec)
static const bool intset_test_stats
Datum test_integerset(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
static void test_single_value(uint64 value)
PG_FUNCTION_INFO_V1(test_integerset)
static void test_empty(void)
static const test_spec test_specs[]
static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max)
static void test_huge_distances(void)