PostgreSQL Source Code  git master
test_integerset.c
Go to the documentation of this file.
1 /*--------------------------------------------------------------------------
2  *
3  * test_integerset.c
4  * Test integer set data structure.
5  *
6  * Copyright (c) 2019, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  * src/test/modules/test_integerset/test_integerset.c
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "fmgr.h"
16 #include "lib/integerset.h"
17 #include "nodes/bitmapset.h"
18 #include "utils/memutils.h"
19 #include "utils/timestamp.h"
20 #include "storage/block.h"
21 #include "storage/itemptr.h"
22 #include "miscadmin.h"
23 
24 /*
25  * If you enable this, the "pattern" tests will print information about
26  * how long populating, probing, and iterating the test set takes, and
27  * how much memory the test set consumed. That can be used as
28  * micro-benchmark of various operations and input patterns (you might
29  * want to increase the number of values used in each of the test, if
30  * you do that, to reduce noise).
31  *
32  * The information is printed to the server's stderr, mostly because
33  * that's where MemoryContextStats() output goes.
34  */
35 static const bool intset_test_stats = false;
36 
38 
40 
41 /*
42  * A struct to define a pattern of integers, for use with the test_pattern()
43  * function.
44  */
45 typedef struct
46 {
47  char *test_name; /* short name of the test, for humans */
48  char *pattern_str; /* a bit pattern */
49  uint64 spacing; /* pattern repeats at this interval */
50  uint64 num_values; /* number of integers to set in total */
51 } test_spec;
52 
53 static const test_spec test_specs[] = {
54  {
55  "all ones", "1111111111",
56  10, 10000000
57  },
58  {
59  "alternating bits", "0101010101",
60  10, 10000000
61  },
62  {
63  "clusters of ten", "1111111111",
64  10000, 10000000
65  },
66  {
67  "clusters of hundred",
68  "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
69  10000, 100000000
70  },
71  {
72  "one-every-64k", "1",
73  65536, 10000000
74  },
75  {
76  "sparse", "100000000000000000000000000000001",
77  10000000, 10000000
78  },
79  {
80  "single values, distance > 2^32", "1",
81  UINT64CONST(10000000000), 1000000
82  },
83  {
84  "clusters, distance > 2^32", "10101010",
85  UINT64CONST(10000000000), 10000000
86  },
87  {
88  "clusters, distance > 2^60", "10101010",
89  UINT64CONST(2000000000000000000),
90  23 /* can't be much higher than this, or we
91  * overflow uint64 */
92  }
93 };
94 
95 static void test_pattern(const test_spec *spec);
96 static void test_empty(void);
97 static void test_single_value(uint64 value);
98 static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
99 static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
100 static void test_huge_distances(void);
101 
102 /*
103  * SQL-callable entry point to perform all tests.
104  */
105 Datum
107 {
108  /* Tests for various corner cases */
109  test_empty();
115  test_single_value_and_filler(0, 1000, 2000);
116  test_single_value_and_filler(1, 1000, 2000);
117  test_single_value_and_filler(1, 1000, 2000000);
120 
121  /* Test different test patterns, with lots of entries */
122  for (int i = 0; i < lengthof(test_specs); i++)
123  {
124  test_pattern(&test_specs[i]);
125  }
126 
127  PG_RETURN_VOID();
128 }
129 
130 /*
131  * Test with a repeating pattern, defined by the 'spec'.
132  */
133 static void
135 {
137  MemoryContext intset_ctx;
138  MemoryContext old_ctx;
139  TimestampTz starttime;
140  TimestampTz endtime;
141  uint64 n;
142  uint64 last_int;
143  int patternlen;
144  uint64 *pattern_values;
145  uint64 pattern_num_values;
146 
147  elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
148  if (intset_test_stats)
149  fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
150 
151  /* Pre-process the pattern, creating an array of integers from it. */
152  patternlen = strlen(spec->pattern_str);
153  pattern_values = palloc(patternlen * sizeof(uint64));
154  pattern_num_values = 0;
155  for (int i = 0; i < patternlen; i++)
156  {
157  if (spec->pattern_str[i] == '1')
158  pattern_values[pattern_num_values++] = i;
159  }
160 
161  /*
162  * Allocate the integer set.
163  *
164  * Allocate it in a separate memory context, so that we can print its
165  * memory usage easily. (intset_create() creates a memory context of its
166  * own, too, but we don't have direct access to it, so we cannot call
167  * MemoryContextStats() on it directly).
168  */
170  "intset test",
172  MemoryContextSetIdentifier(intset_ctx, spec->test_name);
173  old_ctx = MemoryContextSwitchTo(intset_ctx);
174  intset = intset_create();
175  MemoryContextSwitchTo(old_ctx);
176 
177  /*
178  * Add values to the set.
179  */
180  starttime = GetCurrentTimestamp();
181 
182  n = 0;
183  last_int = 0;
184  while (n < spec->num_values)
185  {
186  uint64 x = 0;
187 
188  for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
189  {
190  x = last_int + pattern_values[i];
191 
192  intset_add_member(intset, x);
193  n++;
194  }
195  last_int += spec->spacing;
196  }
197 
198  endtime = GetCurrentTimestamp();
199 
200  if (intset_test_stats)
201  fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
202  spec->num_values, (int) (endtime - starttime) / 1000);
203 
204  /*
205  * Print stats on the amount of memory used.
206  *
207  * We print the usage reported by intset_memory_usage(), as well as the
208  * stats from the memory context. They should be in the same ballpark,
209  * but it's hard to automate testing that, so if you're making changes to
210  * the implementation, just observe that manually.
211  */
212  if (intset_test_stats)
213  {
214  uint64 mem_usage;
215 
216  /*
217  * Also print memory usage as reported by intset_memory_usage(). It
218  * should be in the same ballpark as the usage reported by
219  * MemoryContextStats().
220  */
221  mem_usage = intset_memory_usage(intset);
222  fprintf(stderr, "intset_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
223  mem_usage, (double) mem_usage / spec->num_values);
224 
225  MemoryContextStats(intset_ctx);
226  }
227 
228  /* Check that intset_get_num_entries works */
229  n = intset_num_entries(intset);
230  if (n != spec->num_values)
231  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
232 
233  /*
234  * Test random-access probes with intset_is_member()
235  */
236  starttime = GetCurrentTimestamp();
237 
238  for (n = 0; n < 100000; n++)
239  {
240  bool b;
241  bool expected;
242  uint64 x;
243 
244  /*
245  * Pick next value to probe at random. We limit the probes to the
246  * last integer that we added to the set, plus an arbitrary constant
247  * (1000). There's no point in probing the whole 0 - 2^64 range, if
248  * only a small part of the integer space is used. We would very
249  * rarely hit values that are actually in the set.
250  */
251  x = (pg_lrand48() << 31) | pg_lrand48();
252  x = x % (last_int + 1000);
253 
254  /* Do we expect this value to be present in the set? */
255  if (x >= last_int)
256  expected = false;
257  else
258  {
259  uint64 idx = x % spec->spacing;
260 
261  if (idx >= patternlen)
262  expected = false;
263  else if (spec->pattern_str[idx] == '1')
264  expected = true;
265  else
266  expected = false;
267  }
268 
269  /* Is it present according to intset_is_member() ? */
270  b = intset_is_member(intset, x);
271 
272  if (b != expected)
273  elog(ERROR, "mismatch at " UINT64_FORMAT ": %d vs %d", x, b, expected);
274  }
275  endtime = GetCurrentTimestamp();
276  if (intset_test_stats)
277  fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
278  n, (int) (endtime - starttime) / 1000);
279 
280  /*
281  * Test iterator
282  */
283  starttime = GetCurrentTimestamp();
284 
285  intset_begin_iterate(intset);
286  n = 0;
287  last_int = 0;
288  while (n < spec->num_values)
289  {
290  for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
291  {
292  uint64 expected = last_int + pattern_values[i];
293  uint64 x;
294 
295  if (!intset_iterate_next(intset, &x))
296  break;
297 
298  if (x != expected)
299  elog(ERROR, "iterate returned wrong value; got " UINT64_FORMAT ", expected " UINT64_FORMAT, x, expected);
300  n++;
301  }
302  last_int += spec->spacing;
303  }
304  endtime = GetCurrentTimestamp();
305  if (intset_test_stats)
306  fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
307  n, (int) (endtime - starttime) / 1000);
308 
309  if (n < spec->num_values)
310  elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
311  if (n > spec->num_values)
312  elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
313 
314  MemoryContextDelete(intset_ctx);
315 }
316 
317 /*
318  * Test with a set containing a single integer.
319  */
320 static void
322 {
324  uint64 x;
325  uint64 num_entries;
326  bool found;
327 
328  elog(NOTICE, "testing intset with single value " UINT64_FORMAT, value);
329 
330  /* Create the set. */
331  intset = intset_create();
332  intset_add_member(intset, value);
333 
334  /* Test intset_get_num_entries() */
335  num_entries = intset_num_entries(intset);
336  if (num_entries != 1)
337  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected 1", num_entries);
338 
339  /*
340  * Test intset_is_member() at various special values, like 0 and maximum
341  * possible 64-bit integer, as well as the value itself.
342  */
343  if (intset_is_member(intset, 0) != (value == 0))
344  elog(ERROR, "intset_is_member failed for 0");
345  if (intset_is_member(intset, 1) != (value == 1))
346  elog(ERROR, "intset_is_member failed for 1");
347  if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
348  elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
349  if (intset_is_member(intset, value) != true)
350  elog(ERROR, "intset_is_member failed for the tested value");
351 
352  /*
353  * Test iterator
354  */
355  intset_begin_iterate(intset);
356  found = intset_iterate_next(intset, &x);
357  if (!found || x != value)
358  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
359 
360  found = intset_iterate_next(intset, &x);
361  if (found)
362  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
363 }
364 
365 /*
366  * Test with an integer set that contains:
367  *
368  * - a given single 'value', and
369  * - all integers between 'filler_min' and 'filler_max'.
370  *
371  * This exercises different codepaths than testing just with a single value,
372  * because the implementation buffers newly-added values. If we add just a
373  * single value to the set, we won't test the internal B-tree code at all,
374  * just the code that deals with the buffer.
375  */
376 static void
377 test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
378 {
380  uint64 x;
381  bool found;
382  uint64 *iter_expected;
383  uint64 n = 0;
384  uint64 num_entries = 0;
385  uint64 mem_usage;
386 
387  elog(NOTICE, "testing intset with value " UINT64_FORMAT ", and all between " UINT64_FORMAT " and " UINT64_FORMAT,
388  value, filler_min, filler_max);
389 
390  intset = intset_create();
391 
392  iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
393  if (value < filler_min)
394  {
395  intset_add_member(intset, value);
396  iter_expected[n++] = value;
397  }
398 
399  for (x = filler_min; x < filler_max; x++)
400  {
401  intset_add_member(intset, x);
402  iter_expected[n++] = x;
403  }
404 
405  if (value >= filler_max)
406  {
407  intset_add_member(intset, value);
408  iter_expected[n++] = value;
409  }
410 
411  /* Test intset_get_num_entries() */
412  num_entries = intset_num_entries(intset);
413  if (num_entries != n)
414  elog(ERROR, "intset_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, num_entries, n);
415 
416  /*
417  * Test intset_is_member() at various spots, at and around the values that
418  * we expect to be set, as well as 0 and the maximum possible value.
419  */
420  check_with_filler(intset, 0,
421  value, filler_min, filler_max);
422  check_with_filler(intset, 1,
423  value, filler_min, filler_max);
424  check_with_filler(intset, filler_min - 1,
425  value, filler_min, filler_max);
426  check_with_filler(intset, filler_min,
427  value, filler_min, filler_max);
428  check_with_filler(intset, filler_min + 1,
429  value, filler_min, filler_max);
430  check_with_filler(intset, value - 1,
431  value, filler_min, filler_max);
432  check_with_filler(intset, value,
433  value, filler_min, filler_max);
434  check_with_filler(intset, value + 1,
435  value, filler_min, filler_max);
436  check_with_filler(intset, filler_max - 1,
437  value, filler_min, filler_max);
438  check_with_filler(intset, filler_max,
439  value, filler_min, filler_max);
440  check_with_filler(intset, filler_max + 1,
441  value, filler_min, filler_max);
442  check_with_filler(intset, PG_UINT64_MAX - 1,
443  value, filler_min, filler_max);
445  value, filler_min, filler_max);
446 
447  intset_begin_iterate(intset);
448  for (uint64 i = 0; i < n; i++)
449  {
450  found = intset_iterate_next(intset, &x);
451  if (!found || x != iter_expected[i])
452  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
453  }
454  found = intset_iterate_next(intset, &x);
455  if (found)
456  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
457 
458  mem_usage = intset_memory_usage(intset);
459  if (mem_usage < 5000 || mem_usage > 500000000)
460  elog(ERROR, "intset_memory_usage() reported suspicious value: " UINT64_FORMAT, mem_usage);
461 }
462 
463 /*
464  * Helper function for test_single_value_and_filler.
465  *
466  * Calls intset_is_member() for value 'x', and checks that the result is what
467  * we expect.
468  */
469 static void
470 check_with_filler(IntegerSet *intset, uint64 x,
471  uint64 value, uint64 filler_min, uint64 filler_max)
472 {
473  bool expected;
474  bool actual;
475 
476  expected = (x == value || (filler_min <= x && x < filler_max));
477 
478  actual = intset_is_member(intset, x);
479 
480  if (actual != expected)
481  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
482 }
483 
484 /*
485  * Test empty set
486  */
487 static void
489 {
491  uint64 x;
492 
493  elog(NOTICE, "testing intset with empty set");
494 
495  intset = intset_create();
496 
497  /* Test intset_is_member() */
498  if (intset_is_member(intset, 0) != false)
499  elog(ERROR, "intset_is_member on empty set returned true");
500  if (intset_is_member(intset, 1) != false)
501  elog(ERROR, "intset_is_member on empty set returned true");
502  if (intset_is_member(intset, PG_UINT64_MAX) != false)
503  elog(ERROR, "intset_is_member on empty set returned true");
504 
505  /* Test iterator */
506  intset_begin_iterate(intset);
507  if (intset_iterate_next(intset, &x))
508  elog(ERROR, "intset_iterate_next on empty set returned a value (" UINT64_FORMAT ")", x);
509 }
510 
511 /*
512  * Test with integers that are more than 2^60 apart.
513  *
514  * The Simple-8b encoding used by the set implementation can only encode
515  * values up to 2^60. That makes large differences like this interesting
516  * to test.
517  */
518 static void
520 {
522  uint64 values[1000];
523  int num_values = 0;
524  uint64 val = 0;
525  bool found;
526  uint64 x;
527 
528  elog(NOTICE, "testing intset with distances > 2^60 between values");
529 
530  val = 0;
531  values[num_values++] = val;
532 
533  /* Test differences on both sides of the 2^60 boundary. */
534  val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
535  values[num_values++] = val;
536 
537  val += UINT64CONST(1152921504606846976) - 1; /* 2^60 - 1 */
538  values[num_values++] = val;
539 
540  val += UINT64CONST(1152921504606846976); /* 2^60 */
541  values[num_values++] = val;
542 
543  val += UINT64CONST(1152921504606846976); /* 2^60 */
544  values[num_values++] = val;
545 
546  val += UINT64CONST(1152921504606846976); /* 2^60 */
547  values[num_values++] = val;
548 
549  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
550  values[num_values++] = val;
551 
552  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
553  values[num_values++] = val;
554 
555  val += UINT64CONST(1152921504606846976) + 1; /* 2^60 + 1 */
556  values[num_values++] = val;
557 
558  val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
559  values[num_values++] = val;
560 
561  val += UINT64CONST(1152921504606846976) + 2; /* 2^60 + 2 */
562  values[num_values++] = val;
563 
564  val += UINT64CONST(1152921504606846976); /* 2^60 */
565  values[num_values++] = val;
566 
567  /*
568  * We're now very close to 2^64, so can't add large values anymore. But
569  * add more smaller values to the end, to make sure that all the above
570  * values get flushed and packed into the tree structure.
571  */
572  while (num_values < 1000)
573  {
574  val += pg_lrand48();
575  values[num_values++] = val;
576  }
577 
578  /* Create an IntegerSet using these values */
579  intset = intset_create();
580  for (int i = 0; i < num_values; i++)
581  intset_add_member(intset, values[i]);
582 
583  /*
584  * Test intset_is_member() around each of these values
585  */
586  for (int i = 0; i < num_values; i++)
587  {
588  uint64 x = values[i];
589  bool expected;
590  bool result;
591 
592  if (x > 0)
593  {
594  expected = (values[i - 1] == x - 1);
595  result = intset_is_member(intset, x - 1);
596  if (result != expected)
597  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x - 1);
598  }
599 
600  result = intset_is_member(intset, x);
601  if (result != true)
602  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x);
603 
604  expected = (i != num_values - 1) ? (values[i + 1] == x + 1) : false;
605  result = intset_is_member(intset, x + 1);
606  if (result != expected)
607  elog(ERROR, "intset_is_member failed for " UINT64_FORMAT, x + 1);
608  }
609 
610  /*
611  * Test iterator
612  */
613  intset_begin_iterate(intset);
614  for (int i = 0; i < num_values; i++)
615  {
616  found = intset_iterate_next(intset, &x);
617  if (!found || x != values[i])
618  elog(ERROR, "intset_iterate_next failed for " UINT64_FORMAT, x);
619  }
620  found = intset_iterate_next(intset, &x);
621  if (found)
622  elog(ERROR, "intset_iterate_next failed " UINT64_FORMAT, x);
623 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:184
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:211
#define AllocSetContextCreate
Definition: memutils.h:169
#define PG_UINT64_MAX
Definition: c.h:445
static const test_spec test_specs[]
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1568
int64 TimestampTz
Definition: timestamp.h:39
char * test_name
IntegerSet * intset_create(void)
Definition: integerset.c:285
#define ALLOCSET_SMALL_SIZES
Definition: memutils.h:201
long pg_lrand48(void)
Definition: erand48.c:100
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:360
static struct @144 value
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:646
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:264
uint64 spacing
#define lengthof(array)
Definition: c.h:662
#define fprintf
Definition: port.h:196
uint64 num_values
static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
PG_MODULE_MAGIC
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:371
#define ERROR
Definition: elog.h:43
void MemoryContextStats(MemoryContext context)
Definition: mcxt.c:474
static void test_pattern(const test_spec *spec)
static void test_huge_distances(void)
MemoryContext CurrentMemoryContext
Definition: mcxt.c:38
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:627
static void test_empty(void)
uintptr_t Datum
Definition: postgres.h:367
#define NOTICE
Definition: elog.h:37
#define PG_RETURN_VOID()
Definition: fmgr.h:339
char * pattern_str
static const bool intset_test_stats
static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max)
void MemoryContextSetIdentifier(MemoryContext context, const char *id)
Definition: mcxt.c:329
PG_FUNCTION_INFO_V1(test_integerset)
static Datum values[MAXATTR]
Definition: bootstrap.c:167
void * palloc(Size size)
Definition: mcxt.c:924
Datum test_integerset(PG_FUNCTION_ARGS)
#define elog(elevel,...)
Definition: elog.h:226
int i
#define PG_FUNCTION_ARGS
Definition: fmgr.h:188
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:555
static void test_single_value(uint64 value)
#define UINT64_FORMAT
Definition: c.h:401
long val
Definition: informix.c:684
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:351