PostgreSQL Source Code  git master
integerset.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * integerset.c
4  * Data structure to hold a large set of 64-bit integers efficiently
5  *
6  * IntegerSet provides an in-memory data structure to hold a set of
7  * arbitrary 64-bit integers. Internally, the values are stored in a
8  * B-tree, with a special packed representation at the leaf level using
9  * the Simple-8b algorithm, which can pack clusters of nearby values
10  * very tightly.
11  *
12  * Memory consumption depends on the number of values stored, but also
13  * on how far the values are from each other. In the best case, with
14  * long runs of consecutive integers, memory consumption can be as low as
15  * 0.1 bytes per integer. In the worst case, if integers are more than
16  * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17  * consumption per integer is somewhere between those extremes, depending
18  * on the range of integers stored, and how "clustered" they are.
19  *
20  *
21  * Interface
22  * ---------
23  *
24  * intset_create - Create a new, empty set
25  * intset_add_member - Add an integer to the set
26  * intset_is_member - Test if an integer is in the set
27  * intset_begin_iterate - Begin iterating through all integers in set
28  * intset_iterate_next - Return next set member, if any
29  *
30  * intset_create() creates the set in the current memory context. Subsequent
31  * operations that add to the data structure will continue to allocate from
32  * that same context, even if it's not current anymore.
33  *
34  * Note that there is no function to free an integer set. If you need to do
35  * that, create a dedicated memory context to hold it, and destroy the memory
36  * context instead.
37  *
38  *
39  * Limitations
40  * -----------
41  *
42  * - Values must be added in order. (Random insertions would require
43  * splitting nodes, which hasn't been implemented.)
44  *
45  * - Values cannot be added while iteration is in progress.
46  *
47  * - No support for removing values.
48  *
49  * None of these limitations are fundamental to the data structure, so they
50  * could be lifted if needed, by writing some new code. But the current
51  * users of this facility don't need them.
52  *
53  *
54  * References
55  * ----------
56  *
57  * Simple-8b encoding is based on:
58  *
59  * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60  * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61  * (https://doi.org/10.1002/spe.948)
62  *
63  *
64  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
65  * Portions Copyright (c) 1994, Regents of the University of California
66  *
67  * IDENTIFICATION
68  * src/backend/lib/integerset.c
69  *
70  *-------------------------------------------------------------------------
71  */
72 #include "postgres.h"
73 
74 #include "lib/integerset.h"
75 #include "port/pg_bitutils.h"
76 #include "utils/memutils.h"
77 
78 
79 /*
80  * Maximum number of integers that can be encoded in a single Simple-8b
81  * codeword. (Defined here before anything else, so that we can size arrays
82  * using this.)
83  */
84 #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
85 
86 /*
87  * Parameters for shape of the in-memory B-tree.
88  *
89  * These set the size of each internal and leaf node. They don't necessarily
90  * need to be the same, because the tree is just an in-memory structure.
91  * With the default 64, each node is about 1 kb.
92  *
93  * If you change these, you must recalculate MAX_TREE_LEVELS, too!
94  */
95 #define MAX_INTERNAL_ITEMS 64
96 #define MAX_LEAF_ITEMS 64
97 
98 /*
99  * Maximum height of the tree.
100  *
101  * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
102  * theoretical maximum number of items that we can store in a set is 2^64,
103  * so MAX_TREE_LEVELS should be set so that:
104  *
105  * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
106  *
107  * In practice, we'll need far fewer levels, because you will run out of
108  * memory long before reaching that number, but let's be conservative.
109  */
110 #define MAX_TREE_LEVELS 11
111 
112 /*
113  * Node structures, for the in-memory B-tree.
114  *
115  * An internal node holds a number of downlink pointers to leaf nodes, or
116  * to internal nodes on a lower level. For each downlink, the key value
117  * corresponding to the lower level node is stored in a sorted array. The
118  * stored key values are low keys. In other words, if the downlink has value
119  * X, then all items stored on that child are >= X.
120  *
121  * Each leaf node holds a number of "items", with a varying number of
122  * integers packed into each item. Each item consists of two 64-bit words:
123  * The first word holds the first integer stored in the item, in plain format.
124  * The second word contains between 0 and 240 more integers, packed using
125  * Simple-8b encoding. By storing the first integer in plain, unpacked,
126  * format, we can use binary search to quickly find an item that holds (or
127  * would hold) a particular integer. And by storing the rest in packed form,
128  * we still get pretty good memory density, if there are clusters of integers
129  * with similar values.
130  *
131  * Each leaf node also has a pointer to the next leaf node, so that the leaf
132  * nodes can be easily walked from beginning to end when iterating.
133  */
134 typedef struct intset_node intset_node;
135 typedef struct intset_leaf_node intset_leaf_node;
137 
138 /* Common structure of both leaf and internal nodes. */
140 {
141  uint16 level; /* tree level of this node */
142  uint16 num_items; /* number of items in this node */
143 };
144 
145 /* Internal node */
147 {
148  /* common header, must match intset_node */
149  uint16 level; /* >= 1 on internal nodes */
151 
152  /*
153  * 'values' is an array of key values, and 'downlinks' are pointers to
154  * lower-level nodes, corresponding to the key values.
155  */
158 };
159 
160 /* Leaf node */
161 typedef struct
162 {
163  uint64 first; /* first integer in this item */
164  uint64 codeword; /* simple8b encoded differences from 'first' */
165 } leaf_item;
166 
167 #define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
168 
170 {
171  /* common header, must match intset_node */
172  uint16 level; /* 0 on leafs */
174 
175  intset_leaf_node *next; /* right sibling, if any */
176 
178 };
179 
180 /*
181  * We buffer insertions in a simple array, before packing and inserting them
182  * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
183  * encoder assumes that it is large enough that we can always fill a leaf
184  * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
185  * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
186  */
187 #define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
188 
189 /*
190  * IntegerSet is the top-level object representing the set.
191  *
192  * The integers are stored in an in-memory B-tree structure, plus an array
193  * for newly-added integers. IntegerSet also tracks information about memory
194  * usage, as well as the current position when iterating the set with
195  * intset_begin_iterate / intset_iterate_next.
196  */
198 {
199  /*
200  * 'context' is the memory context holding this integer set and all its
201  * tree nodes.
202  *
203  * 'mem_used' tracks the amount of memory used. We don't do anything with
204  * it in integerset.c itself, but the callers can ask for it with
205  * intset_memory_usage().
206  */
208  uint64 mem_used;
209 
210  uint64 num_entries; /* total # of values in the set */
211  uint64 highest_value; /* highest value stored in this set */
212 
213  /*
214  * B-tree to hold the packed values.
215  *
216  * 'rightmost_nodes' hold pointers to the rightmost node on each level.
217  * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
218  * parent, and so forth, all the way up to the root. These are needed when
219  * adding new values. (Currently, we require that new values are added at
220  * the end.)
221  */
222  int num_levels; /* height of the tree */
223  intset_node *root; /* root node */
225  intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
226 
227  /*
228  * Holding area for new items that haven't been inserted to the tree yet.
229  */
232 
233  /*
234  * Iterator support.
235  *
236  * 'iter_values' is an array of integers ready to be returned to the
237  * caller; 'iter_num_values' is the length of that array, and
238  * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
239  * to the leaf node, and item within the leaf node, to get the next batch
240  * of values from.
241  *
242  * Normally, 'iter_values' points to 'iter_values_buf', which holds items
243  * decoded from a leaf item. But after we have scanned the whole B-tree,
244  * we iterate through all the unbuffered values, too, by pointing
245  * iter_values to 'buffered_values'.
246  */
247  bool iter_active; /* is iteration in progress? */
248 
249  const uint64 *iter_values;
250  int iter_num_values; /* number of elements in 'iter_values' */
251  int iter_valueno; /* next index into 'iter_values' */
252 
253  intset_leaf_node *iter_node; /* current leaf node */
254  int iter_itemno; /* next item in 'iter_node' to decode */
255 
257 };
258 
259 /*
260  * Prototypes for internal functions.
261  */
262 static void intset_update_upper(IntegerSet *intset, int level,
263  intset_node *child, uint64 child_key);
265 
266 static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
267  bool nextkey);
268 static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
269  bool nextkey);
270 
271 static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
272 static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
273 static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
274 
275 
276 /*
277  * Create a new, initially empty, integer set.
278  *
279  * The integer set is created in the current memory context.
280  * We will do all subsequent allocations in the same context, too, regardless
281  * of which memory context is current when new integers are added to the set.
282  */
283 IntegerSet *
285 {
287 
288  intset = (IntegerSet *) palloc(sizeof(IntegerSet));
289  intset->context = CurrentMemoryContext;
290  intset->mem_used = GetMemoryChunkSpace(intset);
291 
292  intset->num_entries = 0;
293  intset->highest_value = 0;
294 
295  intset->num_levels = 0;
296  intset->root = NULL;
297  memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
298  intset->leftmost_leaf = NULL;
299 
300  intset->num_buffered_values = 0;
301 
302  intset->iter_active = false;
303  intset->iter_node = NULL;
304  intset->iter_itemno = 0;
305  intset->iter_valueno = 0;
306  intset->iter_num_values = 0;
307  intset->iter_values = NULL;
308 
309  return intset;
310 }
311 
312 /*
313  * Allocate a new node.
314  */
315 static intset_internal_node *
317 {
319 
321  sizeof(intset_internal_node));
322  intset->mem_used += GetMemoryChunkSpace(n);
323 
324  n->level = 0; /* caller must set */
325  n->num_items = 0;
326 
327  return n;
328 }
329 
330 static intset_leaf_node *
332 {
333  intset_leaf_node *n;
334 
335  n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
336  sizeof(intset_leaf_node));
337  intset->mem_used += GetMemoryChunkSpace(n);
338 
339  n->level = 0;
340  n->num_items = 0;
341  n->next = NULL;
342 
343  return n;
344 }
345 
346 /*
347  * Return the number of entries in the integer set.
348  */
349 uint64
351 {
352  return intset->num_entries;
353 }
354 
355 /*
356  * Return the amount of memory used by the integer set.
357  */
358 uint64
360 {
361  return intset->mem_used;
362 }
363 
364 /*
365  * Add a value to the set.
366  *
367  * Values must be added in order.
368  */
369 void
371 {
372  if (intset->iter_active)
373  elog(ERROR, "cannot add new values to integer set while iteration is in progress");
374 
375  if (x <= intset->highest_value && intset->num_entries > 0)
376  elog(ERROR, "cannot add value to integer set out of order");
377 
378  if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
379  {
380  /* Time to flush our buffer */
382  Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
383  }
384 
385  /* Add it to the buffer of newly-added values */
386  intset->buffered_values[intset->num_buffered_values] = x;
387  intset->num_buffered_values++;
388  intset->num_entries++;
389  intset->highest_value = x;
390 }
391 
392 /*
393  * Take a batch of buffered values, and pack them into the B-tree.
394  */
395 static void
397 {
398  uint64 *values = intset->buffered_values;
399  uint64 num_values = intset->num_buffered_values;
400  int num_packed = 0;
401  intset_leaf_node *leaf;
402 
403  leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
404 
405  /*
406  * If the tree is completely empty, create the first leaf page, which is
407  * also the root.
408  */
409  if (leaf == NULL)
410  {
411  /*
412  * This is the very first item in the set.
413  *
414  * Allocate root node. It's also a leaf.
415  */
417 
418  intset->root = (intset_node *) leaf;
419  intset->leftmost_leaf = leaf;
420  intset->rightmost_nodes[0] = (intset_node *) leaf;
421  intset->num_levels = 1;
422  }
423 
424  /*
425  * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
426  * stop. In most cases, we cannot encode that many values in a single
427  * value, but this way, the encoder doesn't have to worry about running
428  * out of input.
429  */
430  while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
431  {
432  leaf_item item;
433  int num_encoded;
434 
435  /*
436  * Construct the next leaf item, packing as many buffered values as
437  * possible.
438  */
439  item.first = values[num_packed];
440  item.codeword = simple8b_encode(&values[num_packed + 1],
441  &num_encoded,
442  item.first);
443 
444  /*
445  * Add the item to the node, allocating a new node if the old one is
446  * full.
447  */
448  if (leaf->num_items >= MAX_LEAF_ITEMS)
449  {
450  /* Allocate new leaf and link it to the tree */
451  intset_leaf_node *old_leaf = leaf;
452 
454  old_leaf->next = leaf;
455  intset->rightmost_nodes[0] = (intset_node *) leaf;
456  intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
457  }
458  leaf->items[leaf->num_items++] = item;
459 
460  num_packed += 1 + num_encoded;
461  }
462 
463  /*
464  * Move any remaining buffered values to the beginning of the array.
465  */
466  if (num_packed < intset->num_buffered_values)
467  {
468  memmove(&intset->buffered_values[0],
469  &intset->buffered_values[num_packed],
470  (intset->num_buffered_values - num_packed) * sizeof(uint64));
471  }
472  intset->num_buffered_values -= num_packed;
473 }
474 
475 /*
476  * Insert a downlink into parent node, after creating a new node.
477  *
478  * Recurses if the parent node is full, too.
479  */
480 static void
482  uint64 child_key)
483 {
484  intset_internal_node *parent;
485 
486  Assert(level > 0);
487 
488  /*
489  * Create a new root node, if necessary.
490  */
491  if (level >= intset->num_levels)
492  {
493  intset_node *oldroot = intset->root;
494  uint64 downlink_key;
495 
496  /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
497  if (intset->num_levels == MAX_TREE_LEVELS)
498  elog(ERROR, "could not expand integer set, maximum number of levels reached");
499  intset->num_levels++;
500 
501  /*
502  * Get the first value on the old root page, to be used as the
503  * downlink.
504  */
505  if (intset->root->level == 0)
506  downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
507  else
508  downlink_key = ((intset_internal_node *) oldroot)->values[0];
509 
511  parent->level = level;
512  parent->values[0] = downlink_key;
513  parent->downlinks[0] = oldroot;
514  parent->num_items = 1;
515 
516  intset->root = (intset_node *) parent;
517  intset->rightmost_nodes[level] = (intset_node *) parent;
518  }
519 
520  /*
521  * Place the downlink on the parent page.
522  */
523  parent = (intset_internal_node *) intset->rightmost_nodes[level];
524 
525  if (parent->num_items < MAX_INTERNAL_ITEMS)
526  {
527  parent->values[parent->num_items] = child_key;
528  parent->downlinks[parent->num_items] = child;
529  parent->num_items++;
530  }
531  else
532  {
533  /*
534  * Doesn't fit. Allocate new parent, with the downlink as the first
535  * item on it, and recursively insert the downlink to the new parent
536  * to the grandparent.
537  */
539  parent->level = level;
540  parent->values[0] = child_key;
541  parent->downlinks[0] = child;
542  parent->num_items = 1;
543 
544  intset->rightmost_nodes[level] = (intset_node *) parent;
545 
546  intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
547  }
548 }
549 
550 /*
551  * Does the set contain the given value?
552  */
553 bool
555 {
556  intset_node *node;
557  intset_leaf_node *leaf;
558  int level;
559  int itemno;
560  leaf_item *item;
561 
562  /*
563  * The value might be in the buffer of newly-added values.
564  */
565  if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
566  {
567  itemno = intset_binsrch_uint64(x,
568  intset->buffered_values,
569  intset->num_buffered_values,
570  false);
571  if (itemno >= intset->num_buffered_values)
572  return false;
573  else
574  return (intset->buffered_values[itemno] == x);
575  }
576 
577  /*
578  * Start from the root, and walk down the B-tree to find the right leaf
579  * node.
580  */
581  if (!intset->root)
582  return false;
583  node = intset->root;
584  for (level = intset->num_levels - 1; level > 0; level--)
585  {
587 
588  Assert(node->level == level);
589 
590  itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
591  if (itemno == 0)
592  return false;
593  node = n->downlinks[itemno - 1];
594  }
595  Assert(node->level == 0);
596  leaf = (intset_leaf_node *) node;
597 
598  /*
599  * Binary search to find the right item on the leaf page
600  */
601  itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
602  if (itemno == 0)
603  return false;
604  item = &leaf->items[itemno - 1];
605 
606  /* Is this a match to the first value on the item? */
607  if (item->first == x)
608  return true;
609  Assert(x > item->first);
610 
611  /* Is it in the packed codeword? */
612  if (simple8b_contains(item->codeword, x, item->first))
613  return true;
614 
615  return false;
616 }
617 
618 /*
619  * Begin in-order scan through all the values.
620  *
621  * While the iteration is in-progress, you cannot add new values to the set.
622  */
623 void
625 {
626  /* Note that we allow an iteration to be abandoned midway */
627  intset->iter_active = true;
628  intset->iter_node = intset->leftmost_leaf;
629  intset->iter_itemno = 0;
630  intset->iter_valueno = 0;
631  intset->iter_num_values = 0;
632  intset->iter_values = intset->iter_values_buf;
633 }
634 
635 /*
636  * Returns the next integer, when iterating.
637  *
638  * intset_begin_iterate() must be called first. intset_iterate_next() returns
639  * the next value in the set. Returns true, if there was another value, and
640  * stores the value in *next. Otherwise, returns false.
641  */
642 bool
644 {
645  Assert(intset->iter_active);
646  for (;;)
647  {
648  /* Return next iter_values[] entry if any */
649  if (intset->iter_valueno < intset->iter_num_values)
650  {
651  *next = intset->iter_values[intset->iter_valueno++];
652  return true;
653  }
654 
655  /* Decode next item in current leaf node, if any */
656  if (intset->iter_node &&
657  intset->iter_itemno < intset->iter_node->num_items)
658  {
659  leaf_item *item;
660  int num_decoded;
661 
662  item = &intset->iter_node->items[intset->iter_itemno++];
663 
664  intset->iter_values_buf[0] = item->first;
665  num_decoded = simple8b_decode(item->codeword,
666  &intset->iter_values_buf[1],
667  item->first);
668  intset->iter_num_values = num_decoded + 1;
669  intset->iter_valueno = 0;
670  continue;
671  }
672 
673  /* No more items on this leaf, step to next node */
674  if (intset->iter_node)
675  {
676  intset->iter_node = intset->iter_node->next;
677  intset->iter_itemno = 0;
678  continue;
679  }
680 
681  /*
682  * We have reached the end of the B-tree. But we might still have
683  * some integers in the buffer of newly-added values.
684  */
685  if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
686  {
687  intset->iter_values = intset->buffered_values;
688  intset->iter_num_values = intset->num_buffered_values;
689  intset->iter_valueno = 0;
690  continue;
691  }
692 
693  break;
694  }
695 
696  /* No more results. */
697  intset->iter_active = false;
698  *next = 0; /* prevent uninitialized-variable warnings */
699  return false;
700 }
701 
702 /*
703  * intset_binsrch_uint64() -- search a sorted array of uint64s
704  *
705  * Returns the first position with key equal or less than the given key.
706  * The returned position would be the "insert" location for the given key,
707  * that is, the position where the new key should be inserted to.
708  *
709  * 'nextkey' affects the behavior on equal keys. If true, and there is an
710  * equal key in the array, this returns the position immediately after the
711  * equal key. If false, this returns the position of the equal key itself.
712  */
713 static int
714 intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
715 {
716  int low,
717  high,
718  mid;
719 
720  low = 0;
721  high = arr_elems;
722  while (high > low)
723  {
724  mid = low + (high - low) / 2;
725 
726  if (nextkey)
727  {
728  if (item >= arr[mid])
729  low = mid + 1;
730  else
731  high = mid;
732  }
733  else
734  {
735  if (item > arr[mid])
736  low = mid + 1;
737  else
738  high = mid;
739  }
740  }
741 
742  return low;
743 }
744 
745 /* same, but for an array of leaf items */
746 static int
747 intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
748 {
749  int low,
750  high,
751  mid;
752 
753  low = 0;
754  high = arr_elems;
755  while (high > low)
756  {
757  mid = low + (high - low) / 2;
758 
759  if (nextkey)
760  {
761  if (item >= arr[mid].first)
762  low = mid + 1;
763  else
764  high = mid;
765  }
766  else
767  {
768  if (item > arr[mid].first)
769  low = mid + 1;
770  else
771  high = mid;
772  }
773  }
774 
775  return low;
776 }
777 
778 /*
779  * Simple-8b encoding.
780  *
781  * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
782  * called "codewords". The number of integers packed into a single codeword
783  * depends on the integers being packed; small integers are encoded using
784  * fewer bits than large integers. A single codeword can store a single
785  * 60-bit integer, or two 30-bit integers, for example.
786  *
787  * Since we're storing a unique, sorted, set of integers, we actually encode
788  * the *differences* between consecutive integers. That way, clusters of
789  * integers that are close to each other are packed efficiently, regardless
790  * of their absolute values.
791  *
792  * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
793  * how many integers are encoded in the codeword, and the encoded integers are
794  * packed into the remaining 60 bits. The selector allows for 16 different
795  * ways of using the remaining 60 bits, called "modes". The number of integers
796  * packed into a single codeword in each mode is listed in the simple8b_modes
797  * table below. For example, consider the following codeword:
798  *
799  * 20-bit integer 20-bit integer 20-bit integer
800  * 1101 00000000000000010010 01111010000100100000 00000000000000010100
801  * ^
802  * selector
803  *
804  * The selector 1101 is 13 in decimal. From the modes table below, we see
805  * that it means that the codeword encodes three 20-bit integers. In decimal,
806  * those integers are 18, 500000 and 20. Because we encode deltas rather than
807  * absolute values, the actual values that they represent are 18, 500018 and
808  * 500038.
809  *
810  * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
811  * (which means 240 or 120 consecutive integers, since we're encoding the
812  * deltas between integers), without using the rest of the codeword bits
813  * for anything.
814  *
815  * Simple-8b cannot encode integers larger than 60 bits. Values larger than
816  * that are always stored in the 'first' field of a leaf item, never in the
817  * packed codeword. If there is a sequence of integers that are more than
818  * 2^60 apart, the codeword will go unused on those items. To represent that,
819  * we use a magic EMPTY_CODEWORD codeword value.
820  */
821 static const struct simple8b_mode
822 {
825 } simple8b_modes[17] =
826 
827 {
828  {0, 240}, /* mode 0: 240 zeroes */
829  {0, 120}, /* mode 1: 120 zeroes */
830  {1, 60}, /* mode 2: sixty 1-bit integers */
831  {2, 30}, /* mode 3: thirty 2-bit integers */
832  {3, 20}, /* mode 4: twenty 3-bit integers */
833  {4, 15}, /* mode 5: fifteen 4-bit integers */
834  {5, 12}, /* mode 6: twelve 5-bit integers */
835  {6, 10}, /* mode 7: ten 6-bit integers */
836  {7, 8}, /* mode 8: eight 7-bit integers (four bits
837  * are wasted) */
838  {8, 7}, /* mode 9: seven 8-bit integers (four bits
839  * are wasted) */
840  {10, 6}, /* mode 10: six 10-bit integers */
841  {12, 5}, /* mode 11: five 12-bit integers */
842  {15, 4}, /* mode 12: four 15-bit integers */
843  {20, 3}, /* mode 13: three 20-bit integers */
844  {30, 2}, /* mode 14: two 30-bit integers */
845  {60, 1}, /* mode 15: one 60-bit integer */
846 
847  {0, 0} /* sentinel value */
848 };
849 
850 /*
851  * EMPTY_CODEWORD is a special value, used to indicate "no values".
852  * It is used if the next value is too large to be encoded with Simple-8b.
853  *
854  * This value looks like a mode-0 codeword, but we can distinguish it
855  * because a regular mode-0 codeword would have zeroes in the unused bits.
856  */
857 #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
858 
859 /*
860  * Encode a number of integers into a Simple-8b codeword.
861  *
862  * (What we actually encode are deltas between successive integers.
863  * "base" is the value before ints[0].)
864  *
865  * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
866  * elements, ensuring that we can produce a full codeword.
867  *
868  * Returns the encoded codeword, and sets *num_encoded to the number of
869  * input integers that were encoded. That can be zero, if the first delta
870  * is too large to be encoded.
871  */
872 static uint64
873 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
874 {
875  int selector;
876  int nints;
877  int bits;
878  uint64 diff;
879  uint64 last_val;
880  uint64 codeword;
881  int i;
882 
883  Assert(ints[0] > base);
884 
885  /*
886  * Select the "mode" to use for this codeword.
887  *
888  * In each iteration, check if the next value can be represented in the
889  * current mode we're considering. If it's too large, then step up the
890  * mode to a wider one, and repeat. If it fits, move on to the next
891  * integer. Repeat until the codeword is full, given the current mode.
892  *
893  * Note that we don't have any way to represent unused slots in the
894  * codeword, so we require each codeword to be "full". It is always
895  * possible to produce a full codeword unless the very first delta is too
896  * large to be encoded. For example, if the first delta is small but the
897  * second is too large to be encoded, we'll end up using the last "mode",
898  * which has nints == 1.
899  */
900  selector = 0;
901  nints = simple8b_modes[0].num_ints;
902  bits = simple8b_modes[0].bits_per_int;
903  diff = ints[0] - base - 1;
904  last_val = ints[0];
905  i = 0; /* number of deltas we have accepted */
906  for (;;)
907  {
908  if (diff >= (UINT64CONST(1) << bits))
909  {
910  /* too large, step up to next mode */
911  selector++;
912  nints = simple8b_modes[selector].num_ints;
913  bits = simple8b_modes[selector].bits_per_int;
914  /* we might already have accepted enough deltas for this mode */
915  if (i >= nints)
916  break;
917  }
918  else
919  {
920  /* accept this delta; then done if codeword is full */
921  i++;
922  if (i >= nints)
923  break;
924  /* examine next delta */
925  Assert(ints[i] > last_val);
926  diff = ints[i] - last_val - 1;
927  last_val = ints[i];
928  }
929  }
930 
931  if (nints == 0)
932  {
933  /*
934  * The first delta is too large to be encoded with Simple-8b.
935  *
936  * If there is at least one not-too-large integer in the input, we
937  * will encode it using mode 15 (or a more compact mode). Hence, we
938  * can only get here if the *first* delta is >= 2^60.
939  */
940  Assert(i == 0);
941  *num_encoded = 0;
942  return EMPTY_CODEWORD;
943  }
944 
945  /*
946  * Encode the integers using the selected mode. Note that we shift them
947  * into the codeword in reverse order, so that they will come out in the
948  * correct order in the decoder.
949  */
950  codeword = 0;
951  if (bits > 0)
952  {
953  for (i = nints - 1; i > 0; i--)
954  {
955  diff = ints[i] - ints[i - 1] - 1;
956  codeword |= diff;
957  codeword <<= bits;
958  }
959  diff = ints[0] - base - 1;
960  codeword |= diff;
961  }
962 
963  /* add selector to the codeword, and return */
964  codeword |= (uint64) selector << 60;
965 
966  *num_encoded = nints;
967  return codeword;
968 }
969 
970 /*
971  * Decode a codeword into an array of integers.
972  * Returns the number of integers decoded.
973  */
974 static int
975 simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
976 {
977  int selector = (codeword >> 60);
978  int nints = simple8b_modes[selector].num_ints;
979  int bits = simple8b_modes[selector].bits_per_int;
980  uint64 mask = (UINT64CONST(1) << bits) - 1;
981  uint64 curr_value;
982 
983  if (codeword == EMPTY_CODEWORD)
984  return 0;
985 
986  curr_value = base;
987  for (int i = 0; i < nints; i++)
988  {
989  uint64 diff = codeword & mask;
990 
991  curr_value += 1 + diff;
992  decoded[i] = curr_value;
993  codeword >>= bits;
994  }
995  return nints;
996 }
997 
998 /*
999  * This is very similar to simple8b_decode(), but instead of decoding all
1000  * the values to an array, it just checks if the given "key" is part of
1001  * the codeword.
1002  */
1003 static bool
1004 simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1005 {
1006  int selector = (codeword >> 60);
1007  int nints = simple8b_modes[selector].num_ints;
1008  int bits = simple8b_modes[selector].bits_per_int;
1009 
1010  if (codeword == EMPTY_CODEWORD)
1011  return false;
1012 
1013  if (bits == 0)
1014  {
1015  /* Special handling for 0-bit cases. */
1016  return (key - base) <= nints;
1017  }
1018  else
1019  {
1020  uint64 mask = (UINT64CONST(1) << bits) - 1;
1021  uint64 curr_value;
1022 
1023  curr_value = base;
1024  for (int i = 0; i < nints; i++)
1025  {
1026  uint64 diff = codeword & mask;
1027 
1028  curr_value += 1 + diff;
1029 
1030  if (curr_value >= key)
1031  {
1032  if (curr_value == key)
1033  return true;
1034  else
1035  return false;
1036  }
1037 
1038  codeword >>= bits;
1039  }
1040  }
1041  return false;
1042 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
static int32 next
Definition: blutils.c:221
static Datum values[MAXATTR]
Definition: bootstrap.c:152
unsigned short uint16
Definition: c.h:505
#define Assert(condition)
Definition: c.h:858
unsigned char uint8
Definition: c.h:504
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define MAX_VALUES_PER_LEAF_ITEM
Definition: integerset.c:167
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:359
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:350
static void intset_update_upper(IntegerSet *intset, int level, intset_node *child, uint64 child_key)
Definition: integerset.c:481
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:554
#define MAX_INTERNAL_ITEMS
Definition: integerset.c:95
#define MAX_TREE_LEVELS
Definition: integerset.c:110
IntegerSet * intset_create(void)
Definition: integerset.c:284
#define MAX_BUFFERED_VALUES
Definition: integerset.c:187
static intset_leaf_node * intset_new_leaf_node(IntegerSet *intset)
Definition: integerset.c:331
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:624
static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
Definition: integerset.c:714
static void intset_flush_buffered_values(IntegerSet *intset)
Definition: integerset.c:396
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
Definition: integerset.c:873
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:643
#define EMPTY_CODEWORD
Definition: integerset.c:857
static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
Definition: integerset.c:975
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:370
static const struct simple8b_mode simple8b_modes[17]
static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
Definition: integerset.c:747
static intset_internal_node * intset_new_internal_node(IntegerSet *intset)
Definition: integerset.c:316
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base)
Definition: integerset.c:1004
#define MAX_LEAF_ITEMS
Definition: integerset.c:96
int x
Definition: isn.c:71
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1180
void * palloc(Size size)
Definition: mcxt.c:1316
int num_buffered_values
Definition: integerset.c:231
int iter_num_values
Definition: integerset.c:250
uint64 highest_value
Definition: integerset.c:211
intset_node * root
Definition: integerset.c:223
uint64 mem_used
Definition: integerset.c:208
int iter_itemno
Definition: integerset.c:254
MemoryContext context
Definition: integerset.c:207
intset_node * rightmost_nodes[MAX_TREE_LEVELS]
Definition: integerset.c:224
uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM]
Definition: integerset.c:256
intset_leaf_node * leftmost_leaf
Definition: integerset.c:225
bool iter_active
Definition: integerset.c:247
const uint64 * iter_values
Definition: integerset.c:249
uint64 buffered_values[MAX_BUFFERED_VALUES]
Definition: integerset.c:230
intset_leaf_node * iter_node
Definition: integerset.c:253
int iter_valueno
Definition: integerset.c:251
uint64 num_entries
Definition: integerset.c:210
int num_levels
Definition: integerset.c:222
intset_node * downlinks[MAX_INTERNAL_ITEMS]
Definition: integerset.c:157
uint64 values[MAX_INTERNAL_ITEMS]
Definition: integerset.c:156
leaf_item items[MAX_LEAF_ITEMS]
Definition: integerset.c:177
intset_leaf_node * next
Definition: integerset.c:175
uint16 level
Definition: integerset.c:141
uint16 num_items
Definition: integerset.c:142
uint64 codeword
Definition: integerset.c:164
uint64 first
Definition: integerset.c:163
uint8 bits_per_int
Definition: integerset.c:823
uint8 num_ints
Definition: integerset.c:824