PostgreSQL Source Code  git master
integerset.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * integerset.c
4  * Data structure to hold a large set of 64-bit integers efficiently
5  *
6  * IntegerSet provides an in-memory data structure to hold a set of
7  * arbitrary 64-bit integers. Internally, the values are stored in a
8  * B-tree, with a special packed representation at the leaf level using
9  * the Simple-8b algorithm, which can pack clusters of nearby values
10  * very tightly.
11  *
12  * Memory consumption depends on the number of values stored, but also
13  * on how far the values are from each other. In the best case, with
14  * long runs of consecutive integers, memory consumption can be as low as
15  * 0.1 bytes per integer. In the worst case, if integers are more than
16  * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17  * consumption per integer is somewhere between those extremes, depending
18  * on the range of integers stored, and how "clustered" they are.
19  *
20  *
21  * Interface
22  * ---------
23  *
24  * intset_create - Create a new, empty set
25  * intset_add_member - Add an integer to the set
26  * intset_is_member - Test if an integer is in the set
27  * intset_begin_iterate - Begin iterating through all integers in set
28  * intset_iterate_next - Return next set member, if any
29  *
30  * intset_create() creates the set in the current memory context. Subsequent
31  * operations that add to the data structure will continue to allocate from
32  * that same context, even if it's not current anymore.
33  *
34  * Note that there is no function to free an integer set. If you need to do
35  * that, create a dedicated memory context to hold it, and destroy the memory
36  * context instead.
37  *
38  *
39  * Limitations
40  * -----------
41  *
42  * - Values must be added in order. (Random insertions would require
43  * splitting nodes, which hasn't been implemented.)
44  *
45  * - Values cannot be added while iteration is in progress.
46  *
47  * - No support for removing values.
48  *
49  * None of these limitations are fundamental to the data structure, so they
50  * could be lifted if needed, by writing some new code. But the current
51  * users of this facility don't need them.
52  *
53  *
54  * References
55  * ----------
56  *
57  * Simple-8b encoding is based on:
58  *
59  * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60  * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61  * (https://doi.org/10.1002/spe.948)
62  *
63  *
64  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
65  * Portions Copyright (c) 1994, Regents of the University of California
66  *
67  * IDENTIFICATION
68  * src/backend/lib/integerset.c
69  *
70  *-------------------------------------------------------------------------
71  */
72 #include "postgres.h"
73 
74 #include "access/htup_details.h"
75 #include "lib/integerset.h"
76 #include "port/pg_bitutils.h"
77 #include "utils/memutils.h"
78 
79 
80 /*
81  * Maximum number of integers that can be encoded in a single Simple-8b
82  * codeword. (Defined here before anything else, so that we can size arrays
83  * using this.)
84  */
85 #define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
86 
87 /*
88  * Parameters for shape of the in-memory B-tree.
89  *
90  * These set the size of each internal and leaf node. They don't necessarily
91  * need to be the same, because the tree is just an in-memory structure.
92  * With the default 64, each node is about 1 kb.
93  *
94  * If you change these, you must recalculate MAX_TREE_LEVELS, too!
95  */
96 #define MAX_INTERNAL_ITEMS 64
97 #define MAX_LEAF_ITEMS 64
98 
99 /*
100  * Maximum height of the tree.
101  *
102  * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
103  * theoretical maximum number of items that we can store in a set is 2^64,
104  * so MAX_TREE_LEVELS should be set so that:
105  *
106  * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
107  *
108  * In practice, we'll need far fewer levels, because you will run out of
109  * memory long before reaching that number, but let's be conservative.
110  */
111 #define MAX_TREE_LEVELS 11
112 
113 /*
114  * Node structures, for the in-memory B-tree.
115  *
116  * An internal node holds a number of downlink pointers to leaf nodes, or
117  * to internal nodes on a lower level. For each downlink, the key value
118  * corresponding to the lower level node is stored in a sorted array. The
119  * stored key values are low keys. In other words, if the downlink has value
120  * X, then all items stored on that child are >= X.
121  *
122  * Each leaf node holds a number of "items", with a varying number of
123  * integers packed into each item. Each item consists of two 64-bit words:
124  * The first word holds the first integer stored in the item, in plain format.
125  * The second word contains between 0 and 240 more integers, packed using
126  * Simple-8b encoding. By storing the first integer in plain, unpacked,
127  * format, we can use binary search to quickly find an item that holds (or
128  * would hold) a particular integer. And by storing the rest in packed form,
129  * we still get pretty good memory density, if there are clusters of integers
130  * with similar values.
131  *
132  * Each leaf node also has a pointer to the next leaf node, so that the leaf
133  * nodes can be easily walked from beginning to end when iterating.
134  */
135 typedef struct intset_node intset_node;
136 typedef struct intset_leaf_node intset_leaf_node;
138 
139 /* Common structure of both leaf and internal nodes. */
141 {
142  uint16 level; /* tree level of this node */
143  uint16 num_items; /* number of items in this node */
144 };
145 
146 /* Internal node */
148 {
149  /* common header, must match intset_node */
150  uint16 level; /* >= 1 on internal nodes */
152 
153  /*
154  * 'values' is an array of key values, and 'downlinks' are pointers to
155  * lower-level nodes, corresponding to the key values.
156  */
159 };
160 
161 /* Leaf node */
162 typedef struct
163 {
164  uint64 first; /* first integer in this item */
165  uint64 codeword; /* simple8b encoded differences from 'first' */
166 } leaf_item;
167 
168 #define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
169 
171 {
172  /* common header, must match intset_node */
173  uint16 level; /* 0 on leafs */
175 
176  intset_leaf_node *next; /* right sibling, if any */
177 
179 };
180 
181 /*
182  * We buffer insertions in a simple array, before packing and inserting them
183  * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
184  * encoder assumes that it is large enough that we can always fill a leaf
185  * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
186  * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
187  */
188 #define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
189 
190 /*
191  * IntegerSet is the top-level object representing the set.
192  *
193  * The integers are stored in an in-memory B-tree structure, plus an array
194  * for newly-added integers. IntegerSet also tracks information about memory
195  * usage, as well as the current position when iterating the set with
196  * intset_begin_iterate / intset_iterate_next.
197  */
199 {
200  /*
201  * 'context' is the memory context holding this integer set and all its
202  * tree nodes.
203  *
204  * 'mem_used' tracks the amount of memory used. We don't do anything with
205  * it in integerset.c itself, but the callers can ask for it with
206  * intset_memory_usage().
207  */
209  uint64 mem_used;
210 
211  uint64 num_entries; /* total # of values in the set */
212  uint64 highest_value; /* highest value stored in this set */
213 
214  /*
215  * B-tree to hold the packed values.
216  *
217  * 'rightmost_nodes' hold pointers to the rightmost node on each level.
218  * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
219  * parent, and so forth, all the way up to the root. These are needed when
220  * adding new values. (Currently, we require that new values are added at
221  * the end.)
222  */
223  int num_levels; /* height of the tree */
224  intset_node *root; /* root node */
226  intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
227 
228  /*
229  * Holding area for new items that haven't been inserted to the tree yet.
230  */
233 
234  /*
235  * Iterator support.
236  *
237  * 'iter_values' is an array of integers ready to be returned to the
238  * caller; 'iter_num_values' is the length of that array, and
239  * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
240  * to the leaf node, and item within the leaf node, to get the next batch
241  * of values from.
242  *
243  * Normally, 'iter_values' points to 'iter_values_buf', which holds items
244  * decoded from a leaf item. But after we have scanned the whole B-tree,
245  * we iterate through all the unbuffered values, too, by pointing
246  * iter_values to 'buffered_values'.
247  */
248  bool iter_active; /* is iteration in progress? */
249 
250  const uint64 *iter_values;
251  int iter_num_values; /* number of elements in 'iter_values' */
252  int iter_valueno; /* next index into 'iter_values' */
253 
254  intset_leaf_node *iter_node; /* current leaf node */
255  int iter_itemno; /* next item in 'iter_node' to decode */
256 
258 };
259 
260 /*
261  * Prototypes for internal functions.
262  */
263 static void intset_update_upper(IntegerSet *intset, int level,
264  intset_node *child, uint64 child_key);
266 
267 static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
268  bool nextkey);
269 static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
270  bool nextkey);
271 
272 static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
273 static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
274 static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
275 
276 
277 /*
278  * Create a new, initially empty, integer set.
279  *
280  * The integer set is created in the current memory context.
281  * We will do all subsequent allocations in the same context, too, regardless
282  * of which memory context is current when new integers are added to the set.
283  */
284 IntegerSet *
286 {
288 
289  intset = (IntegerSet *) palloc(sizeof(IntegerSet));
290  intset->context = CurrentMemoryContext;
291  intset->mem_used = GetMemoryChunkSpace(intset);
292 
293  intset->num_entries = 0;
294  intset->highest_value = 0;
295 
296  intset->num_levels = 0;
297  intset->root = NULL;
298  memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
299  intset->leftmost_leaf = NULL;
300 
301  intset->num_buffered_values = 0;
302 
303  intset->iter_active = false;
304  intset->iter_node = NULL;
305  intset->iter_itemno = 0;
306  intset->iter_valueno = 0;
307  intset->iter_num_values = 0;
308  intset->iter_values = NULL;
309 
310  return intset;
311 }
312 
313 /*
314  * Allocate a new node.
315  */
316 static intset_internal_node *
318 {
320 
322  sizeof(intset_internal_node));
323  intset->mem_used += GetMemoryChunkSpace(n);
324 
325  n->level = 0; /* caller must set */
326  n->num_items = 0;
327 
328  return n;
329 }
330 
331 static intset_leaf_node *
333 {
334  intset_leaf_node *n;
335 
336  n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
337  sizeof(intset_leaf_node));
338  intset->mem_used += GetMemoryChunkSpace(n);
339 
340  n->level = 0;
341  n->num_items = 0;
342  n->next = NULL;
343 
344  return n;
345 }
346 
347 /*
348  * Return the number of entries in the integer set.
349  */
350 uint64
352 {
353  return intset->num_entries;
354 }
355 
356 /*
357  * Return the amount of memory used by the integer set.
358  */
359 uint64
361 {
362  return intset->mem_used;
363 }
364 
365 /*
366  * Add a value to the set.
367  *
368  * Values must be added in order.
369  */
370 void
372 {
373  if (intset->iter_active)
374  elog(ERROR, "cannot add new values to integer set while iteration is in progress");
375 
376  if (x <= intset->highest_value && intset->num_entries > 0)
377  elog(ERROR, "cannot add value to integer set out of order");
378 
379  if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
380  {
381  /* Time to flush our buffer */
383  Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
384  }
385 
386  /* Add it to the buffer of newly-added values */
387  intset->buffered_values[intset->num_buffered_values] = x;
388  intset->num_buffered_values++;
389  intset->num_entries++;
390  intset->highest_value = x;
391 }
392 
393 /*
394  * Take a batch of buffered values, and pack them into the B-tree.
395  */
396 static void
398 {
399  uint64 *values = intset->buffered_values;
400  uint64 num_values = intset->num_buffered_values;
401  int num_packed = 0;
402  intset_leaf_node *leaf;
403 
404  leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
405 
406  /*
407  * If the tree is completely empty, create the first leaf page, which is
408  * also the root.
409  */
410  if (leaf == NULL)
411  {
412  /*
413  * This is the very first item in the set.
414  *
415  * Allocate root node. It's also a leaf.
416  */
418 
419  intset->root = (intset_node *) leaf;
420  intset->leftmost_leaf = leaf;
421  intset->rightmost_nodes[0] = (intset_node *) leaf;
422  intset->num_levels = 1;
423  }
424 
425  /*
426  * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
427  * stop. In most cases, we cannot encode that many values in a single
428  * value, but this way, the encoder doesn't have to worry about running
429  * out of input.
430  */
431  while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
432  {
433  leaf_item item;
434  int num_encoded;
435 
436  /*
437  * Construct the next leaf item, packing as many buffered values as
438  * possible.
439  */
440  item.first = values[num_packed];
441  item.codeword = simple8b_encode(&values[num_packed + 1],
442  &num_encoded,
443  item.first);
444 
445  /*
446  * Add the item to the node, allocating a new node if the old one is
447  * full.
448  */
449  if (leaf->num_items >= MAX_LEAF_ITEMS)
450  {
451  /* Allocate new leaf and link it to the tree */
452  intset_leaf_node *old_leaf = leaf;
453 
455  old_leaf->next = leaf;
456  intset->rightmost_nodes[0] = (intset_node *) leaf;
457  intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
458  }
459  leaf->items[leaf->num_items++] = item;
460 
461  num_packed += 1 + num_encoded;
462  }
463 
464  /*
465  * Move any remaining buffered values to the beginning of the array.
466  */
467  if (num_packed < intset->num_buffered_values)
468  {
469  memmove(&intset->buffered_values[0],
470  &intset->buffered_values[num_packed],
471  (intset->num_buffered_values - num_packed) * sizeof(uint64));
472  }
473  intset->num_buffered_values -= num_packed;
474 }
475 
476 /*
477  * Insert a downlink into parent node, after creating a new node.
478  *
479  * Recurses if the parent node is full, too.
480  */
481 static void
483  uint64 child_key)
484 {
485  intset_internal_node *parent;
486 
487  Assert(level > 0);
488 
489  /*
490  * Create a new root node, if necessary.
491  */
492  if (level >= intset->num_levels)
493  {
494  intset_node *oldroot = intset->root;
495  uint64 downlink_key;
496 
497  /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
498  if (intset->num_levels == MAX_TREE_LEVELS)
499  elog(ERROR, "could not expand integer set, maximum number of levels reached");
500  intset->num_levels++;
501 
502  /*
503  * Get the first value on the old root page, to be used as the
504  * downlink.
505  */
506  if (intset->root->level == 0)
507  downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
508  else
509  downlink_key = ((intset_internal_node *) oldroot)->values[0];
510 
512  parent->level = level;
513  parent->values[0] = downlink_key;
514  parent->downlinks[0] = oldroot;
515  parent->num_items = 1;
516 
517  intset->root = (intset_node *) parent;
518  intset->rightmost_nodes[level] = (intset_node *) parent;
519  }
520 
521  /*
522  * Place the downlink on the parent page.
523  */
524  parent = (intset_internal_node *) intset->rightmost_nodes[level];
525 
526  if (parent->num_items < MAX_INTERNAL_ITEMS)
527  {
528  parent->values[parent->num_items] = child_key;
529  parent->downlinks[parent->num_items] = child;
530  parent->num_items++;
531  }
532  else
533  {
534  /*
535  * Doesn't fit. Allocate new parent, with the downlink as the first
536  * item on it, and recursively insert the downlink to the new parent
537  * to the grandparent.
538  */
540  parent->level = level;
541  parent->values[0] = child_key;
542  parent->downlinks[0] = child;
543  parent->num_items = 1;
544 
545  intset->rightmost_nodes[level] = (intset_node *) parent;
546 
547  intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
548  }
549 }
550 
551 /*
552  * Does the set contain the given value?
553  */
554 bool
556 {
557  intset_node *node;
558  intset_leaf_node *leaf;
559  int level;
560  int itemno;
561  leaf_item *item;
562 
563  /*
564  * The value might be in the buffer of newly-added values.
565  */
566  if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
567  {
568  itemno = intset_binsrch_uint64(x,
569  intset->buffered_values,
570  intset->num_buffered_values,
571  false);
572  if (itemno >= intset->num_buffered_values)
573  return false;
574  else
575  return (intset->buffered_values[itemno] == x);
576  }
577 
578  /*
579  * Start from the root, and walk down the B-tree to find the right leaf
580  * node.
581  */
582  if (!intset->root)
583  return false;
584  node = intset->root;
585  for (level = intset->num_levels - 1; level > 0; level--)
586  {
588 
589  Assert(node->level == level);
590 
591  itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
592  if (itemno == 0)
593  return false;
594  node = n->downlinks[itemno - 1];
595  }
596  Assert(node->level == 0);
597  leaf = (intset_leaf_node *) node;
598 
599  /*
600  * Binary search to find the right item on the leaf page
601  */
602  itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
603  if (itemno == 0)
604  return false;
605  item = &leaf->items[itemno - 1];
606 
607  /* Is this a match to the first value on the item? */
608  if (item->first == x)
609  return true;
610  Assert(x > item->first);
611 
612  /* Is it in the packed codeword? */
613  if (simple8b_contains(item->codeword, x, item->first))
614  return true;
615 
616  return false;
617 }
618 
619 /*
620  * Begin in-order scan through all the values.
621  *
622  * While the iteration is in-progress, you cannot add new values to the set.
623  */
624 void
626 {
627  /* Note that we allow an iteration to be abandoned midway */
628  intset->iter_active = true;
629  intset->iter_node = intset->leftmost_leaf;
630  intset->iter_itemno = 0;
631  intset->iter_valueno = 0;
632  intset->iter_num_values = 0;
633  intset->iter_values = intset->iter_values_buf;
634 }
635 
636 /*
637  * Returns the next integer, when iterating.
638  *
639  * intset_begin_iterate() must be called first. intset_iterate_next() returns
640  * the next value in the set. Returns true, if there was another value, and
641  * stores the value in *next. Otherwise, returns false.
642  */
643 bool
645 {
646  Assert(intset->iter_active);
647  for (;;)
648  {
649  /* Return next iter_values[] entry if any */
650  if (intset->iter_valueno < intset->iter_num_values)
651  {
652  *next = intset->iter_values[intset->iter_valueno++];
653  return true;
654  }
655 
656  /* Decode next item in current leaf node, if any */
657  if (intset->iter_node &&
658  intset->iter_itemno < intset->iter_node->num_items)
659  {
660  leaf_item *item;
661  int num_decoded;
662 
663  item = &intset->iter_node->items[intset->iter_itemno++];
664 
665  intset->iter_values_buf[0] = item->first;
666  num_decoded = simple8b_decode(item->codeword,
667  &intset->iter_values_buf[1],
668  item->first);
669  intset->iter_num_values = num_decoded + 1;
670  intset->iter_valueno = 0;
671  continue;
672  }
673 
674  /* No more items on this leaf, step to next node */
675  if (intset->iter_node)
676  {
677  intset->iter_node = intset->iter_node->next;
678  intset->iter_itemno = 0;
679  continue;
680  }
681 
682  /*
683  * We have reached the end of the B-tree. But we might still have
684  * some integers in the buffer of newly-added values.
685  */
686  if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
687  {
688  intset->iter_values = intset->buffered_values;
689  intset->iter_num_values = intset->num_buffered_values;
690  intset->iter_valueno = 0;
691  continue;
692  }
693 
694  break;
695  }
696 
697  /* No more results. */
698  intset->iter_active = false;
699  *next = 0; /* prevent uninitialized-variable warnings */
700  return false;
701 }
702 
703 /*
704  * intset_binsrch_uint64() -- search a sorted array of uint64s
705  *
706  * Returns the first position with key equal or less than the given key.
707  * The returned position would be the "insert" location for the given key,
708  * that is, the position where the new key should be inserted to.
709  *
710  * 'nextkey' affects the behavior on equal keys. If true, and there is an
711  * equal key in the array, this returns the position immediately after the
712  * equal key. If false, this returns the position of the equal key itself.
713  */
714 static int
715 intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
716 {
717  int low,
718  high,
719  mid;
720 
721  low = 0;
722  high = arr_elems;
723  while (high > low)
724  {
725  mid = low + (high - low) / 2;
726 
727  if (nextkey)
728  {
729  if (item >= arr[mid])
730  low = mid + 1;
731  else
732  high = mid;
733  }
734  else
735  {
736  if (item > arr[mid])
737  low = mid + 1;
738  else
739  high = mid;
740  }
741  }
742 
743  return low;
744 }
745 
746 /* same, but for an array of leaf items */
747 static int
748 intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
749 {
750  int low,
751  high,
752  mid;
753 
754  low = 0;
755  high = arr_elems;
756  while (high > low)
757  {
758  mid = low + (high - low) / 2;
759 
760  if (nextkey)
761  {
762  if (item >= arr[mid].first)
763  low = mid + 1;
764  else
765  high = mid;
766  }
767  else
768  {
769  if (item > arr[mid].first)
770  low = mid + 1;
771  else
772  high = mid;
773  }
774  }
775 
776  return low;
777 }
778 
779 /*
780  * Simple-8b encoding.
781  *
782  * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
783  * called "codewords". The number of integers packed into a single codeword
784  * depends on the integers being packed; small integers are encoded using
785  * fewer bits than large integers. A single codeword can store a single
786  * 60-bit integer, or two 30-bit integers, for example.
787  *
788  * Since we're storing a unique, sorted, set of integers, we actually encode
789  * the *differences* between consecutive integers. That way, clusters of
790  * integers that are close to each other are packed efficiently, regardless
791  * of their absolute values.
792  *
793  * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
794  * how many integers are encoded in the codeword, and the encoded integers are
795  * packed into the remaining 60 bits. The selector allows for 16 different
796  * ways of using the remaining 60 bits, called "modes". The number of integers
797  * packed into a single codeword in each mode is listed in the simple8b_modes
798  * table below. For example, consider the following codeword:
799  *
800  * 20-bit integer 20-bit integer 20-bit integer
801  * 1101 00000000000000010010 01111010000100100000 00000000000000010100
802  * ^
803  * selector
804  *
805  * The selector 1101 is 13 in decimal. From the modes table below, we see
806  * that it means that the codeword encodes three 20-bit integers. In decimal,
807  * those integers are 18, 500000 and 20. Because we encode deltas rather than
808  * absolute values, the actual values that they represent are 18, 500018 and
809  * 500038.
810  *
811  * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
812  * (which means 240 or 120 consecutive integers, since we're encoding the
813  * deltas between integers), without using the rest of the codeword bits
814  * for anything.
815  *
816  * Simple-8b cannot encode integers larger than 60 bits. Values larger than
817  * that are always stored in the 'first' field of a leaf item, never in the
818  * packed codeword. If there is a sequence of integers that are more than
819  * 2^60 apart, the codeword will go unused on those items. To represent that,
820  * we use a magic EMPTY_CODEWORD codeword value.
821  */
822 static const struct simple8b_mode
823 {
826 } simple8b_modes[17] =
827 
828 {
829  {0, 240}, /* mode 0: 240 zeroes */
830  {0, 120}, /* mode 1: 120 zeroes */
831  {1, 60}, /* mode 2: sixty 1-bit integers */
832  {2, 30}, /* mode 3: thirty 2-bit integers */
833  {3, 20}, /* mode 4: twenty 3-bit integers */
834  {4, 15}, /* mode 5: fifteen 4-bit integers */
835  {5, 12}, /* mode 6: twelve 5-bit integers */
836  {6, 10}, /* mode 7: ten 6-bit integers */
837  {7, 8}, /* mode 8: eight 7-bit integers (four bits
838  * are wasted) */
839  {8, 7}, /* mode 9: seven 8-bit integers (four bits
840  * are wasted) */
841  {10, 6}, /* mode 10: six 10-bit integers */
842  {12, 5}, /* mode 11: five 12-bit integers */
843  {15, 4}, /* mode 12: four 15-bit integers */
844  {20, 3}, /* mode 13: three 20-bit integers */
845  {30, 2}, /* mode 14: two 30-bit integers */
846  {60, 1}, /* mode 15: one 60-bit integer */
847 
848  {0, 0} /* sentinel value */
849 };
850 
851 /*
852  * EMPTY_CODEWORD is a special value, used to indicate "no values".
853  * It is used if the next value is too large to be encoded with Simple-8b.
854  *
855  * This value looks like a mode-0 codeword, but we can distinguish it
856  * because a regular mode-0 codeword would have zeroes in the unused bits.
857  */
858 #define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
859 
860 /*
861  * Encode a number of integers into a Simple-8b codeword.
862  *
863  * (What we actually encode are deltas between successive integers.
864  * "base" is the value before ints[0].)
865  *
866  * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
867  * elements, ensuring that we can produce a full codeword.
868  *
869  * Returns the encoded codeword, and sets *num_encoded to the number of
870  * input integers that were encoded. That can be zero, if the first delta
871  * is too large to be encoded.
872  */
873 static uint64
874 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
875 {
876  int selector;
877  int nints;
878  int bits;
879  uint64 diff;
880  uint64 last_val;
881  uint64 codeword;
882  int i;
883 
884  Assert(ints[0] > base);
885 
886  /*
887  * Select the "mode" to use for this codeword.
888  *
889  * In each iteration, check if the next value can be represented in the
890  * current mode we're considering. If it's too large, then step up the
891  * mode to a wider one, and repeat. If it fits, move on to the next
892  * integer. Repeat until the codeword is full, given the current mode.
893  *
894  * Note that we don't have any way to represent unused slots in the
895  * codeword, so we require each codeword to be "full". It is always
896  * possible to produce a full codeword unless the very first delta is too
897  * large to be encoded. For example, if the first delta is small but the
898  * second is too large to be encoded, we'll end up using the last "mode",
899  * which has nints == 1.
900  */
901  selector = 0;
902  nints = simple8b_modes[0].num_ints;
903  bits = simple8b_modes[0].bits_per_int;
904  diff = ints[0] - base - 1;
905  last_val = ints[0];
906  i = 0; /* number of deltas we have accepted */
907  for (;;)
908  {
909  if (diff >= (UINT64CONST(1) << bits))
910  {
911  /* too large, step up to next mode */
912  selector++;
913  nints = simple8b_modes[selector].num_ints;
914  bits = simple8b_modes[selector].bits_per_int;
915  /* we might already have accepted enough deltas for this mode */
916  if (i >= nints)
917  break;
918  }
919  else
920  {
921  /* accept this delta; then done if codeword is full */
922  i++;
923  if (i >= nints)
924  break;
925  /* examine next delta */
926  Assert(ints[i] > last_val);
927  diff = ints[i] - last_val - 1;
928  last_val = ints[i];
929  }
930  }
931 
932  if (nints == 0)
933  {
934  /*
935  * The first delta is too large to be encoded with Simple-8b.
936  *
937  * If there is at least one not-too-large integer in the input, we
938  * will encode it using mode 15 (or a more compact mode). Hence, we
939  * can only get here if the *first* delta is >= 2^60.
940  */
941  Assert(i == 0);
942  *num_encoded = 0;
943  return EMPTY_CODEWORD;
944  }
945 
946  /*
947  * Encode the integers using the selected mode. Note that we shift them
948  * into the codeword in reverse order, so that they will come out in the
949  * correct order in the decoder.
950  */
951  codeword = 0;
952  if (bits > 0)
953  {
954  for (i = nints - 1; i > 0; i--)
955  {
956  diff = ints[i] - ints[i - 1] - 1;
957  codeword |= diff;
958  codeword <<= bits;
959  }
960  diff = ints[0] - base - 1;
961  codeword |= diff;
962  }
963 
964  /* add selector to the codeword, and return */
965  codeword |= (uint64) selector << 60;
966 
967  *num_encoded = nints;
968  return codeword;
969 }
970 
971 /*
972  * Decode a codeword into an array of integers.
973  * Returns the number of integers decoded.
974  */
975 static int
976 simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
977 {
978  int selector = (codeword >> 60);
979  int nints = simple8b_modes[selector].num_ints;
980  int bits = simple8b_modes[selector].bits_per_int;
981  uint64 mask = (UINT64CONST(1) << bits) - 1;
982  uint64 curr_value;
983 
984  if (codeword == EMPTY_CODEWORD)
985  return 0;
986 
987  curr_value = base;
988  for (int i = 0; i < nints; i++)
989  {
990  uint64 diff = codeword & mask;
991 
992  curr_value += 1 + diff;
993  decoded[i] = curr_value;
994  codeword >>= bits;
995  }
996  return nints;
997 }
998 
999 /*
1000  * This is very similar to simple8b_decode(), but instead of decoding all
1001  * the values to an array, it just checks if the given "key" is part of
1002  * the codeword.
1003  */
1004 static bool
1005 simple8b_contains(uint64 codeword, uint64 key, uint64 base)
1006 {
1007  int selector = (codeword >> 60);
1008  int nints = simple8b_modes[selector].num_ints;
1009  int bits = simple8b_modes[selector].bits_per_int;
1010 
1011  if (codeword == EMPTY_CODEWORD)
1012  return false;
1013 
1014  if (bits == 0)
1015  {
1016  /* Special handling for 0-bit cases. */
1017  return (key - base) <= nints;
1018  }
1019  else
1020  {
1021  uint64 mask = (UINT64CONST(1) << bits) - 1;
1022  uint64 curr_value;
1023 
1024  curr_value = base;
1025  for (int i = 0; i < nints; i++)
1026  {
1027  uint64 diff = codeword & mask;
1028 
1029  curr_value += 1 + diff;
1030 
1031  if (curr_value >= key)
1032  {
1033  if (curr_value == key)
1034  return true;
1035  else
1036  return false;
1037  }
1038 
1039  codeword >>= bits;
1040  }
1041  }
1042  return false;
1043 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
static int32 next
Definition: blutils.c:220
static Datum values[MAXATTR]
Definition: bootstrap.c:156
unsigned short uint16
Definition: c.h:494
unsigned char uint8
Definition: c.h:493
#define ERROR
Definition: elog.h:39
#define MAX_VALUES_PER_LEAF_ITEM
Definition: integerset.c:168
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:360
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:351
static void intset_update_upper(IntegerSet *intset, int level, intset_node *child, uint64 child_key)
Definition: integerset.c:482
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:555
#define MAX_INTERNAL_ITEMS
Definition: integerset.c:96
#define MAX_TREE_LEVELS
Definition: integerset.c:111
IntegerSet * intset_create(void)
Definition: integerset.c:285
#define MAX_BUFFERED_VALUES
Definition: integerset.c:188
static intset_leaf_node * intset_new_leaf_node(IntegerSet *intset)
Definition: integerset.c:332
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:625
static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
Definition: integerset.c:715
static void intset_flush_buffered_values(IntegerSet *intset)
Definition: integerset.c:397
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
Definition: integerset.c:874
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:644
#define EMPTY_CODEWORD
Definition: integerset.c:858
static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
Definition: integerset.c:976
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:371
static const struct simple8b_mode simple8b_modes[17]
static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
Definition: integerset.c:748
static intset_internal_node * intset_new_internal_node(IntegerSet *intset)
Definition: integerset.c:317
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base)
Definition: integerset.c:1005
#define MAX_LEAF_ITEMS
Definition: integerset.c:97
int x
Definition: isn.c:71
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:630
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1021
void * palloc(Size size)
Definition: mcxt.c:1226
int num_buffered_values
Definition: integerset.c:232
int iter_num_values
Definition: integerset.c:251
uint64 highest_value
Definition: integerset.c:212
intset_node * root
Definition: integerset.c:224
uint64 mem_used
Definition: integerset.c:209
int iter_itemno
Definition: integerset.c:255
MemoryContext context
Definition: integerset.c:208
intset_node * rightmost_nodes[MAX_TREE_LEVELS]
Definition: integerset.c:225
uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM]
Definition: integerset.c:257
intset_leaf_node * leftmost_leaf
Definition: integerset.c:226
bool iter_active
Definition: integerset.c:248
const uint64 * iter_values
Definition: integerset.c:250
uint64 buffered_values[MAX_BUFFERED_VALUES]
Definition: integerset.c:231
intset_leaf_node * iter_node
Definition: integerset.c:254
int iter_valueno
Definition: integerset.c:252
uint64 num_entries
Definition: integerset.c:211
int num_levels
Definition: integerset.c:223
intset_node * downlinks[MAX_INTERNAL_ITEMS]
Definition: integerset.c:158
uint64 values[MAX_INTERNAL_ITEMS]
Definition: integerset.c:157
leaf_item items[MAX_LEAF_ITEMS]
Definition: integerset.c:178
intset_leaf_node * next
Definition: integerset.c:176
uint16 level
Definition: integerset.c:142
uint16 num_items
Definition: integerset.c:143
uint64 codeword
Definition: integerset.c:165
uint64 first
Definition: integerset.c:164
uint8 bits_per_int
Definition: integerset.c:824
uint8 num_ints
Definition: integerset.c:825