PostgreSQL Source Code  git master
test_integerset.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------------
2  *
3  * test_integerset.c
4  * Test integer set data structure.
5  *
6  * Copyright (c) 2019-2024, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/test/modules/test_integerset/test_integerset.c
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "common/pg_prng.h"
16 #include "fmgr.h"
17 #include "lib/integerset.h"
18 #include "miscadmin.h"
19 #include "nodes/bitmapset.h"
20 #include "storage/block.h"
21 #include "storage/itemptr.h"
22 #include "utils/memutils.h"
23 #include "utils/timestamp.h"
24 
25 /*
26  * If you enable this, the "pattern" tests will print information about
27  * how long populating, probing, and iterating the test set takes, and
28  * how much memory the test set consumed. That can be used as
29  * micro-benchmark of various operations and input patterns (you might
30  * want to increase the number of values used in each of the test, if
31  * you do that, to reduce noise).
32  *
33  * The information is printed to the server's stderr, mostly because
34  * that's where MemoryContextStats() output goes.
35  */
36 static const bool intset_test_stats = false;
37 
39 
41 
42 /*
43  * A struct to define a pattern of integers, for use with the test_pattern()
44  * function.
45  */
46 typedef struct
47 {
48  char *test_name; /* short name of the test, for humans */
49  char *pattern_str; /* a bit pattern */
50  uint64 spacing; /* pattern repeats at this interval */
51  uint64 num_values; /* number of integers to set in total */
52 } test_spec;
53 
54 static const test_spec test_specs[] = {
55  {
56  "all ones", "1111111111",
57  10, 10000000
58  },
59  {
60  "alternating bits", "0101010101",
61  10, 10000000
62  },
63  {
64  "clusters of ten", "1111111111",
65  10000, 10000000
66  },
67  {
68  "clusters of hundred",
69  "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
70  10000, 100000000
71  },
72  {
73  "one-every-64k", "1",
74  65536, 10000000
75  },
76  {
77  "sparse", "100000000000000000000000000000001",
78  10000000, 10000000
79  },
80  {
81  "single values, distance > 2^32", "1",
82  UINT64CONST(10000000000), 1000000
83  },
84  {
85  "clusters, distance > 2^32", "10101010",
86  UINT64CONST(10000000000), 10000000
87  },
88  {
89  "clusters, distance > 2^60", "10101010",
90  UINT64CONST(2000000000000000000),
91  23 /* can't be much higher than this, or we
92  * overflow uint64 */
93  }
94 };
95 
96 static void test_pattern(const test_spec *spec);
97 static void test_empty(void);
98 static void test_single_value(uint64 value);
99 static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
100 static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
101 static void test_huge_distances(void);
102 
103 /*
104  * SQL-callable entry point to perform all tests.
105  */
106 Datum
108 {
109  /* Tests for various corner cases */
110  test_empty();
116  test_single_value_and_filler(0, 1000, 2000);
117  test_single_value_and_filler(1, 1000, 2000);
118  test_single_value_and_filler(1, 1000, 2000000);
121 
122  /* Test different test patterns, with lots of entries */
123  for (int i = 0; i < lengthof(test_specs); i++)
124  {
126  }
127 
128  PG_RETURN_VOID();
129 }
130 
131 /*
132  * Test with a repeating pattern, defined by the 'spec'.
133  */
134 static void
136 {
138  MemoryContext intset_ctx;
140  TimestampTz starttime;
141  TimestampTz endtime;
142  uint64 n;
143  uint64 last_int;
144  int patternlen;
145  uint64 *pattern_values;
146  uint64 pattern_num_values;
147 
148  elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
149  if (intset_test_stats)
150  fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
151 
152  /* Pre-process the pattern, creating an array of integers from it. */
153  patternlen = strlen(spec->pattern_str);
154  pattern_values = palloc(patternlen * sizeof(uint64));
155  pattern_num_values = 0;
156  for (int i = 0; i < patternlen; i++)
157  {
158  if (spec->pattern_str[i] == '1')
159  pattern_values[pattern_num_values++] = i;
160  }
161 
162  /*
163  * Allocate the integer set.
164  *
165  * Allocate it in a separate memory context, so that we can print its
166  * memory usage easily. (intset_create() creates a memory context of its
167  * own, too, but we don't have direct access to it, so we cannot call
168  * MemoryContextStats() on it directly).
169  */
171  "intset test",
173  MemoryContextSetIdentifier(intset_ctx, spec->test_name);
174  old_ctx = MemoryContextSwitchTo(intset_ctx);
175  intset = intset_create();
177 
178  /*
179  * Add values to the set.
180  */
181  starttime = GetCurrentTimestamp();
182 
183  n = 0;
184  last_int = 0;
185  while (n < spec->num_values)
186  {
187  uint64 x = 0;
188 
189  for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
190  {
191  x = last_int + pattern_values[i];
192 
194  n++;
195  }
196  last_int += spec->spacing;
197  }
198 
199  endtime = GetCurrentTimestamp();
200 
201  if (intset_test_stats)
202  fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
203  spec->num_values, (int) (endtime - starttime) / 1000);
204 
205  /*
206  * Print stats on the amount of memory used.
207  *
208  * We print the usage reported by intset_memory_usage(), as well as the
209  * stats from the memory context. They should be in the same ballpark,
210  * but it's hard to automate testing that, so if you're making changes to
211  * the implementation, just observe that manually.
212  */
213  if (intset_test_stats)
214  {
215  uint64 mem_usage;
216 
217  /*
218  * Also print memory usage as reported by intset_memory_usage(). It
219  * should be in the same ballpark as the usage reported by
220  * MemoryContextStats().
221  */
222  mem_usage = intset_memory_usage(intset);
223  fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
224  mem_usage, (double) mem_usage / spec->num_values);
225 
226  MemoryContextStats(intset_ctx);
227  }
228 
229  /* Check that intset_get_num_entries works */
231  if (n != spec->num_values)
232  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
233 
234  /*
235  * Test random-access probes with intset_is_member()
236  */
237  starttime = GetCurrentTimestamp();
238 
239  for (n = 0; n < 100000; n++)
240  {
241  bool b;
242  bool expected;
243  uint64 x;
244 
245  /*
246  * Pick next value to probe at random. We limit the probes to the
247  * last integer that we added to the set, plus an arbitrary constant
248  * (1000). There's no point in probing the whole 0 - 2^64 range, if
249  * only a small part of the integer space is used. We would very
250  * rarely hit values that are actually in the set.
251  */
252  x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
253 
254  /* Do we expect this value to be present in the set? */
255  if (x >= last_int)
256  expected = false;
257  else
258  {
259  uint64 idx = x % spec->spacing;
260 
261  if (idx >= patternlen)
262  expected = false;
263  else if (spec->pattern_str[idx] == '1')
264  expected = true;
265  else
266  expected = false;
267  }
268 
269  /* Is it present according to intset_is_member() ? */
271 
272  if (b != expected)
273  elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
274  }
275  endtime = GetCurrentTimestamp();
276  if (intset_test_stats)
277  fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
278  n, (int) (endtime - starttime) / 1000);
279 
280  /*
281  * Test iterator
282  */
283  starttime = GetCurrentTimestamp();
284 
286  n = 0;
287  last_int = 0;
288  while (n < spec->num_values)
289  {
290  for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
291  {
292  uint64 expected = last_int + pattern_values[i];
293  uint64 x;
294 
295  if (!intset_iterate_next(intset, &x))
296  break;
297 
298  if (x != expected)
299  elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
300  n++;
301  }
302  last_int += spec->spacing;
303  }
304  endtime = GetCurrentTimestamp();
305  if (intset_test_stats)
306  fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
307  n, (int) (endtime - starttime) / 1000);
308 
309  if (n < spec->num_values)
310  elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
311  if (n > spec->num_values)
312  elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
313 
314  MemoryContextDelete(intset_ctx);
315 }
316 
317 /*
318  * Test with a set containing a single integer.
319  */
320 static void
322 {
324  uint64 x;
325  uint64 num_entries;
326  bool found;
327 
328  elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
329 
330  /* Create the set. */
331  intset = intset_create();
333 
334  /* Test intset_get_num_entries() */
335  num_entries = intset_num_entries(intset);
336  if (num_entries != 1)
337  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
338 
339  /*
340  * Test intset_is_member() at various special values, like 0 and maximum
341  * possible 64-bit integer, as well as the value itself.
342  */
343  if (intset_is_member(intset, 0) != (value == 0))
344  elog(ERROR, "intset_is_member failed for 0");
345  if (intset_is_member(intset, 1) != (value == 1))
346  elog(ERROR, "intset_is_member failed for 1");
348  elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
349  if (intset_is_member(intset, value) != true)
350  elog(ERROR, "intset_is_member failed for the tested value");
351 
352  /*
353  * Test iterator
354  */
356  found = intset_iterate_next(intset, &x);
357  if (!found || x != value)
358  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
359 
360  found = intset_iterate_next(intset, &x);
361  if (found)
362  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
363 }
364 
365 /*
366  * Test with an integer set that contains:
367  *
368  * - a given single 'value', and
369  * - all integers between 'filler_min' and 'filler_max'.
370  *
371  * This exercises different codepaths than testing just with a single value,
372  * because the implementation buffers newly-added values. If we add just a
373  * single value to the set, we won't test the internal B-tree code at all,
374  * just the code that deals with the buffer.
375  */
376 static void
377 test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
378 {
380  uint64 x;
381  bool found;
382  uint64 *iter_expected;
383  uint64 n = 0;
384  uint64 num_entries = 0;
385  uint64 mem_usage;
386 
387  elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
388  value, filler_min, filler_max);
389 
390  intset = intset_create();
391 
392  iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
393  if (value < filler_min)
394  {
396  iter_expected[n++] = value;
397  }
398 
399  for (x = filler_min; x < filler_max; x++)
400  {
402  iter_expected[n++] = x;
403  }
404 
405  if (value >= filler_max)
406  {
408  iter_expected[n++] = value;
409  }
410 
411  /* Test intset_get_num_entries() */
412  num_entries = intset_num_entries(intset);
413  if (num_entries != n)
414  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
415 
416  /*
417  * Test intset_is_member() at various spots, at and around the values that
418  * we expect to be set, as well as 0 and the maximum possible value.
419  */
421  value, filler_min, filler_max);
423  value, filler_min, filler_max);
424  check_with_filler(intset, filler_min - 1,
425  value, filler_min, filler_max);
426  check_with_filler(intset, filler_min,
427  value, filler_min, filler_max);
428  check_with_filler(intset, filler_min + 1,
429  value, filler_min, filler_max);
431  value, filler_min, filler_max);
433  value, filler_min, filler_max);
435  value, filler_min, filler_max);
436  check_with_filler(intset, filler_max - 1,
437  value, filler_min, filler_max);
438  check_with_filler(intset, filler_max,
439  value, filler_min, filler_max);
440  check_with_filler(intset, filler_max + 1,
441  value, filler_min, filler_max);
443  value, filler_min, filler_max);
445  value, filler_min, filler_max);
446 
448  for (uint64 i = 0; i < n; i++)
449  {
450  found = intset_iterate_next(intset, &x);
451  if (!found || x != iter_expected[i])
452  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
453  }
454  found = intset_iterate_next(intset, &x);
455  if (found)
456  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
457 
458  mem_usage = intset_memory_usage(intset);
459  if (mem_usage < 5000 || mem_usage > 500000000)
460  elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
461 }
462 
463 /*
464  * Helper function for test_single_value_and_filler.
465  *
466  * Calls intset_is_member() for value 'x', and checks that the result is what
467  * we expect.
468  */
469 static void
471  uint64 value, uint64 filler_min, uint64 filler_max)
472 {
473  bool expected;
474  bool actual;
475 
476  expected = (x == value || (filler_min <= x && x < filler_max));
477 
478  actual = intset_is_member(intset, x);
479 
480  if (actual != expected)
481  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
482 }
483 
484 /*
485  * Test empty set
486  */
487 static void
489 {
491  uint64 x;
492 
493  elog(NOTICE, "testing intset with empty set");
494 
495  intset = intset_create();
496 
497  /* Test intset_is_member() */
498  if (intset_is_member(intset, 0) != false)
499  elog(ERROR, "intset_is_member on empty set returned true");
500  if (intset_is_member(intset, 1) != false)
501  elog(ERROR, "intset_is_member on empty set returned true");
502  if (intset_is_member(intset, PG_UINT64_MAX) != false)
503  elog(ERROR, "intset_is_member on empty set returned true");
504 
505  /* Test iterator */
508  elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
509 }
510 
511 /*
512  * Test with integers that are more than 2^60 apart.
513  *
514  * The Simple-8b encoding used by the set implementation can only encode
515  * values up to 2^60. That makes large differences like this interesting
516  * to test.
517  */
518 static void
520 {
522  uint64 values[1000];
523  int num_values = 0;
524  uint64 val = 0;
525  bool found;
526  uint64 x;
527 
528  elog(NOTICE, "testing intset with distances > 2^60 between values");
529 
530  val = 0;
531  values[num_values++] = val;
532 
533  /* Test differences on both sides of the 2^60 boundary. */
534  val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
535  values[num_values++] = val;
536 
537  val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
538  values[num_values++] = val;
539 
540  val += UINT64CONST(1152921504606846976); /* 2^60 */
541  values[num_values++] = val;
542 
543  val += UINT64CONST(1152921504606846976); /* 2^60 */
544  values[num_values++] = val;
545 
546  val += UINT64CONST(1152921504606846976); /* 2^60 */
547  values[num_values++] = val;
548 
549  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
550  values[num_values++] = val;
551 
552  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
553  values[num_values++] = val;
554 
555  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
556  values[num_values++] = val;
557 
558  val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
559  values[num_values++] = val;
560 
561  val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
562  values[num_values++] = val;
563 
564  val += UINT64CONST(1152921504606846976); /* 2^60 */
565  values[num_values++] = val;
566 
567  /*
568  * We're now very close to 2^64, so can't add large values anymore. But
569  * add more smaller values to the end, to make sure that all the above
570  * values get flushed and packed into the tree structure.
571  */
572  while (num_values < 1000)
573  {
575  values[num_values++] = val;
576  }
577 
578  /* Create an IntegerSet using these values */
579  intset = intset_create();
580  for (int i = 0; i < num_values; i++)
582 
583  /*
584  * Test intset_is_member() around each of these values
585  */
586  for (int i = 0; i < num_values; i++)
587  {
588  uint64 y = values[i];
589  bool expected;
590  bool result;
591 
592  if (y > 0)
593  {
594  expected = (values[i - 1] == y - 1);
595  result = intset_is_member(intset, y - 1);
596  if (result != expected)
597  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y - 1);
598  }
599 
600  result = intset_is_member(intset, y);
601  if (result != true)
602  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y);
603 
604  expected = (i != num_values - 1) ? (values[i + 1] == y + 1) : false;
605  result = intset_is_member(intset, y + 1);
606  if (result != expected)
607  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, y + 1);
608  }
609 
610  /*
611  * Test iterator
612  */
614  for (int i = 0; i < num_values; i++)
615  {
616  found = intset_iterate_next(intset, &x);
617  if (!found || x != values[i])
618  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
619  }
620  found = intset_iterate_next(intset, &x);
621  if (found)
622  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
623 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1644
static Datum values[MAXATTR]
Definition: bootstrap.c:150
#define UINT64_FORMAT
Definition: c.h:552
#define lengthof(array)
Definition: c.h:791
#define PG_UINT64_MAX
Definition: c.h:596
int64 TimestampTz
Definition: timestamp.h:39
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define NOTICE
Definition: elog.h:35
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
long val
Definition: informix.c:689
static struct @157 value
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:359
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:350
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:554
IntegerSet * intset_create(void)
Definition: integerset.c:284
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:624
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:643
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:370
int y
Definition: isn.c:72
int b
Definition: isn.c:70
int x
Definition: isn.c:71
int i
Definition: isn.c:73
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextStats(MemoryContext context)
Definition: mcxt.c:814
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
void * palloc(Size size)
Definition: mcxt.c:1317
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:612
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:170
uint64 pg_prng_uint64_range(pg_prng_state *state, uint64 rmin, uint64 rmax)
Definition: pg_prng.c:144
uint32 pg_prng_uint32(pg_prng_state *state)
Definition: pg_prng.c:227
pg_prng_state pg_global_prng_state
Definition: pg_prng.c:34
#define fprintf
Definition: port.h:242
uintptr_t Datum
Definition: postgres.h:64
RT_SCOPE RT_RADIX_TREE *MemoryContext old_ctx
Definition: radixtree.h:1824
MemoryContextSwitchTo(old_ctx)
uint64 num_values
uint64 spacing
char * pattern_str
char * test_name
static void test_pattern(const test_spec *spec)
static const bool intset_test_stats
Datum test_integerset(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC
static void test_single_value(uint64 value)
PG_FUNCTION_INFO_V1(test_integerset)
static void test_empty(void)
static const test_spec test_specs[]
static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max)
static void test_huge_distances(void)