PostgreSQL Source Code git master
integerset.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * integerset.c
4 * Data structure to hold a large set of 64-bit integers efficiently
5 *
6 * IntegerSet provides an in-memory data structure to hold a set of
7 * arbitrary 64-bit integers. Internally, the values are stored in a
8 * B-tree, with a special packed representation at the leaf level using
9 * the Simple-8b algorithm, which can pack clusters of nearby values
10 * very tightly.
11 *
12 * Memory consumption depends on the number of values stored, but also
13 * on how far the values are from each other. In the best case, with
14 * long runs of consecutive integers, memory consumption can be as low as
15 * 0.1 bytes per integer. In the worst case, if integers are more than
16 * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
17 * consumption per integer is somewhere between those extremes, depending
18 * on the range of integers stored, and how "clustered" they are.
19 *
20 *
21 * Interface
22 * ---------
23 *
24 * intset_create - Create a new, empty set
25 * intset_add_member - Add an integer to the set
26 * intset_is_member - Test if an integer is in the set
27 * intset_begin_iterate - Begin iterating through all integers in set
28 * intset_iterate_next - Return next set member, if any
29 *
30 * intset_create() creates the set in the current memory context. Subsequent
31 * operations that add to the data structure will continue to allocate from
32 * that same context, even if it's not current anymore.
33 *
34 * Note that there is no function to free an integer set. If you need to do
35 * that, create a dedicated memory context to hold it, and destroy the memory
36 * context instead.
37 *
38 *
39 * Limitations
40 * -----------
41 *
42 * - Values must be added in order. (Random insertions would require
43 * splitting nodes, which hasn't been implemented.)
44 *
45 * - Values cannot be added while iteration is in progress.
46 *
47 * - No support for removing values.
48 *
49 * None of these limitations are fundamental to the data structure, so they
50 * could be lifted if needed, by writing some new code. But the current
51 * users of this facility don't need them.
52 *
53 *
54 * References
55 * ----------
56 *
57 * Simple-8b encoding is based on:
58 *
59 * Vo Ngoc Anh, Alistair Moffat, Index compression using 64-bit words,
60 * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
61 * (https://doi.org/10.1002/spe.948)
62 *
63 *
64 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
65 * Portions Copyright (c) 1994, Regents of the University of California
66 *
67 * IDENTIFICATION
68 * src/backend/lib/integerset.c
69 *
70 *-------------------------------------------------------------------------
71 */
72#include "postgres.h"
73
74#include "lib/integerset.h"
75#include "utils/memutils.h"
76
77
78/*
79 * Maximum number of integers that can be encoded in a single Simple-8b
80 * codeword. (Defined here before anything else, so that we can size arrays
81 * using this.)
82 */
83#define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
84
85/*
86 * Parameters for shape of the in-memory B-tree.
87 *
88 * These set the size of each internal and leaf node. They don't necessarily
89 * need to be the same, because the tree is just an in-memory structure.
90 * With the default 64, each node is about 1 kb.
91 *
92 * If you change these, you must recalculate MAX_TREE_LEVELS, too!
93 */
94#define MAX_INTERNAL_ITEMS 64
95#define MAX_LEAF_ITEMS 64
96
97/*
98 * Maximum height of the tree.
99 *
100 * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
101 * theoretical maximum number of items that we can store in a set is 2^64,
102 * so MAX_TREE_LEVELS should be set so that:
103 *
104 * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
105 *
106 * In practice, we'll need far fewer levels, because you will run out of
107 * memory long before reaching that number, but let's be conservative.
108 */
109#define MAX_TREE_LEVELS 11
110
111/*
112 * Node structures, for the in-memory B-tree.
113 *
114 * An internal node holds a number of downlink pointers to leaf nodes, or
115 * to internal nodes on a lower level. For each downlink, the key value
116 * corresponding to the lower level node is stored in a sorted array. The
117 * stored key values are low keys. In other words, if the downlink has value
118 * X, then all items stored on that child are >= X.
119 *
120 * Each leaf node holds a number of "items", with a varying number of
121 * integers packed into each item. Each item consists of two 64-bit words:
122 * The first word holds the first integer stored in the item, in plain format.
123 * The second word contains between 0 and 240 more integers, packed using
124 * Simple-8b encoding. By storing the first integer in plain, unpacked,
125 * format, we can use binary search to quickly find an item that holds (or
126 * would hold) a particular integer. And by storing the rest in packed form,
127 * we still get pretty good memory density, if there are clusters of integers
128 * with similar values.
129 *
130 * Each leaf node also has a pointer to the next leaf node, so that the leaf
131 * nodes can be easily walked from beginning to end when iterating.
132 */
136
137/* Common structure of both leaf and internal nodes. */
139{
140 uint16 level; /* tree level of this node */
141 uint16 num_items; /* number of items in this node */
142};
143
144/* Internal node */
146{
147 /* common header, must match intset_node */
148 uint16 level; /* >= 1 on internal nodes */
150
151 /*
152 * 'values' is an array of key values, and 'downlinks' are pointers to
153 * lower-level nodes, corresponding to the key values.
154 */
157};
158
159/* Leaf node */
160typedef struct
161{
162 uint64 first; /* first integer in this item */
163 uint64 codeword; /* simple8b encoded differences from 'first' */
164} leaf_item;
165
166#define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
167
169{
170 /* common header, must match intset_node */
171 uint16 level; /* 0 on leafs */
173
174 intset_leaf_node *next; /* right sibling, if any */
175
177};
178
179/*
180 * We buffer insertions in a simple array, before packing and inserting them
181 * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
182 * encoder assumes that it is large enough that we can always fill a leaf
183 * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
184 * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
185 */
186#define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
187
188/*
189 * IntegerSet is the top-level object representing the set.
190 *
191 * The integers are stored in an in-memory B-tree structure, plus an array
192 * for newly-added integers. IntegerSet also tracks information about memory
193 * usage, as well as the current position when iterating the set with
194 * intset_begin_iterate / intset_iterate_next.
195 */
197{
198 /*
199 * 'context' is the memory context holding this integer set and all its
200 * tree nodes.
201 *
202 * 'mem_used' tracks the amount of memory used. We don't do anything with
203 * it in integerset.c itself, but the callers can ask for it with
204 * intset_memory_usage().
205 */
208
209 uint64 num_entries; /* total # of values in the set */
210 uint64 highest_value; /* highest value stored in this set */
211
212 /*
213 * B-tree to hold the packed values.
214 *
215 * 'rightmost_nodes' hold pointers to the rightmost node on each level.
216 * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
217 * parent, and so forth, all the way up to the root. These are needed when
218 * adding new values. (Currently, we require that new values are added at
219 * the end.)
220 */
221 int num_levels; /* height of the tree */
222 intset_node *root; /* root node */
224 intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
225
226 /*
227 * Holding area for new items that haven't been inserted to the tree yet.
228 */
231
232 /*
233 * Iterator support.
234 *
235 * 'iter_values' is an array of integers ready to be returned to the
236 * caller; 'iter_num_values' is the length of that array, and
237 * 'iter_valueno' is the next index. 'iter_node' and 'iter_itemno' point
238 * to the leaf node, and item within the leaf node, to get the next batch
239 * of values from.
240 *
241 * Normally, 'iter_values' points to 'iter_values_buf', which holds items
242 * decoded from a leaf item. But after we have scanned the whole B-tree,
243 * we iterate through all the unbuffered values, too, by pointing
244 * iter_values to 'buffered_values'.
245 */
246 bool iter_active; /* is iteration in progress? */
247
249 int iter_num_values; /* number of elements in 'iter_values' */
250 int iter_valueno; /* next index into 'iter_values' */
251
252 intset_leaf_node *iter_node; /* current leaf node */
253 int iter_itemno; /* next item in 'iter_node' to decode */
254
256};
257
258/*
259 * Prototypes for internal functions.
260 */
261static void intset_update_upper(IntegerSet *intset, int level,
262 intset_node *child, uint64 child_key);
264
265static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems,
266 bool nextkey);
267static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems,
268 bool nextkey);
269
270static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base);
271static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
272static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
273
274
275/*
276 * Create a new, initially empty, integer set.
277 *
278 * The integer set is created in the current memory context.
279 * We will do all subsequent allocations in the same context, too, regardless
280 * of which memory context is current when new integers are added to the set.
281 */
284{
286
287 intset = (IntegerSet *) palloc(sizeof(IntegerSet));
288 intset->context = CurrentMemoryContext;
289 intset->mem_used = GetMemoryChunkSpace(intset);
290
291 intset->num_entries = 0;
292 intset->highest_value = 0;
293
294 intset->num_levels = 0;
295 intset->root = NULL;
296 memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
297 intset->leftmost_leaf = NULL;
298
299 intset->num_buffered_values = 0;
300
301 intset->iter_active = false;
302 intset->iter_node = NULL;
303 intset->iter_itemno = 0;
304 intset->iter_valueno = 0;
305 intset->iter_num_values = 0;
306 intset->iter_values = NULL;
307
308 return intset;
309}
310
311/*
312 * Allocate a new node.
313 */
316{
318
320 sizeof(intset_internal_node));
321 intset->mem_used += GetMemoryChunkSpace(n);
322
323 n->level = 0; /* caller must set */
324 n->num_items = 0;
325
326 return n;
327}
328
329static intset_leaf_node *
331{
333
335 sizeof(intset_leaf_node));
336 intset->mem_used += GetMemoryChunkSpace(n);
337
338 n->level = 0;
339 n->num_items = 0;
340 n->next = NULL;
341
342 return n;
343}
344
345/*
346 * Return the number of entries in the integer set.
347 */
348uint64
350{
351 return intset->num_entries;
352}
353
354/*
355 * Return the amount of memory used by the integer set.
356 */
357uint64
359{
360 return intset->mem_used;
361}
362
363/*
364 * Add a value to the set.
365 *
366 * Values must be added in order.
367 */
368void
370{
371 if (intset->iter_active)
372 elog(ERROR, "cannot add new values to integer set while iteration is in progress");
373
374 if (x <= intset->highest_value && intset->num_entries > 0)
375 elog(ERROR, "cannot add value to integer set out of order");
376
377 if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
378 {
379 /* Time to flush our buffer */
381 Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
382 }
383
384 /* Add it to the buffer of newly-added values */
385 intset->buffered_values[intset->num_buffered_values] = x;
386 intset->num_buffered_values++;
387 intset->num_entries++;
388 intset->highest_value = x;
389}
390
391/*
392 * Take a batch of buffered values, and pack them into the B-tree.
393 */
394static void
396{
397 uint64 *values = intset->buffered_values;
398 uint64 num_values = intset->num_buffered_values;
399 int num_packed = 0;
400 intset_leaf_node *leaf;
401
402 leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
403
404 /*
405 * If the tree is completely empty, create the first leaf page, which is
406 * also the root.
407 */
408 if (leaf == NULL)
409 {
410 /*
411 * This is the very first item in the set.
412 *
413 * Allocate root node. It's also a leaf.
414 */
416
417 intset->root = (intset_node *) leaf;
418 intset->leftmost_leaf = leaf;
419 intset->rightmost_nodes[0] = (intset_node *) leaf;
420 intset->num_levels = 1;
421 }
422
423 /*
424 * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
425 * stop. In most cases, we cannot encode that many values in a single
426 * value, but this way, the encoder doesn't have to worry about running
427 * out of input.
428 */
429 while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
430 {
431 leaf_item item;
432 int num_encoded;
433
434 /*
435 * Construct the next leaf item, packing as many buffered values as
436 * possible.
437 */
438 item.first = values[num_packed];
439 item.codeword = simple8b_encode(&values[num_packed + 1],
440 &num_encoded,
441 item.first);
442
443 /*
444 * Add the item to the node, allocating a new node if the old one is
445 * full.
446 */
447 if (leaf->num_items >= MAX_LEAF_ITEMS)
448 {
449 /* Allocate new leaf and link it to the tree */
450 intset_leaf_node *old_leaf = leaf;
451
453 old_leaf->next = leaf;
454 intset->rightmost_nodes[0] = (intset_node *) leaf;
455 intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
456 }
457 leaf->items[leaf->num_items++] = item;
458
459 num_packed += 1 + num_encoded;
460 }
461
462 /*
463 * Move any remaining buffered values to the beginning of the array.
464 */
465 if (num_packed < intset->num_buffered_values)
466 {
467 memmove(&intset->buffered_values[0],
468 &intset->buffered_values[num_packed],
469 (intset->num_buffered_values - num_packed) * sizeof(uint64));
470 }
471 intset->num_buffered_values -= num_packed;
472}
473
474/*
475 * Insert a downlink into parent node, after creating a new node.
476 *
477 * Recurses if the parent node is full, too.
478 */
479static void
481 uint64 child_key)
482{
483 intset_internal_node *parent;
484
485 Assert(level > 0);
486
487 /*
488 * Create a new root node, if necessary.
489 */
490 if (level >= intset->num_levels)
491 {
492 intset_node *oldroot = intset->root;
493 uint64 downlink_key;
494
495 /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
496 if (intset->num_levels == MAX_TREE_LEVELS)
497 elog(ERROR, "could not expand integer set, maximum number of levels reached");
498 intset->num_levels++;
499
500 /*
501 * Get the first value on the old root page, to be used as the
502 * downlink.
503 */
504 if (intset->root->level == 0)
505 downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
506 else
507 downlink_key = ((intset_internal_node *) oldroot)->values[0];
508
510 parent->level = level;
511 parent->values[0] = downlink_key;
512 parent->downlinks[0] = oldroot;
513 parent->num_items = 1;
514
515 intset->root = (intset_node *) parent;
516 intset->rightmost_nodes[level] = (intset_node *) parent;
517 }
518
519 /*
520 * Place the downlink on the parent page.
521 */
522 parent = (intset_internal_node *) intset->rightmost_nodes[level];
523
525 {
526 parent->values[parent->num_items] = child_key;
527 parent->downlinks[parent->num_items] = child;
528 parent->num_items++;
529 }
530 else
531 {
532 /*
533 * Doesn't fit. Allocate new parent, with the downlink as the first
534 * item on it, and recursively insert the downlink to the new parent
535 * to the grandparent.
536 */
538 parent->level = level;
539 parent->values[0] = child_key;
540 parent->downlinks[0] = child;
541 parent->num_items = 1;
542
543 intset->rightmost_nodes[level] = (intset_node *) parent;
544
545 intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
546 }
547}
548
549/*
550 * Does the set contain the given value?
551 */
552bool
554{
555 intset_node *node;
556 intset_leaf_node *leaf;
557 int level;
558 int itemno;
559 leaf_item *item;
560
561 /*
562 * The value might be in the buffer of newly-added values.
563 */
564 if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
565 {
566 itemno = intset_binsrch_uint64(x,
567 intset->buffered_values,
568 intset->num_buffered_values,
569 false);
570 if (itemno >= intset->num_buffered_values)
571 return false;
572 else
573 return (intset->buffered_values[itemno] == x);
574 }
575
576 /*
577 * Start from the root, and walk down the B-tree to find the right leaf
578 * node.
579 */
580 if (!intset->root)
581 return false;
582 node = intset->root;
583 for (level = intset->num_levels - 1; level > 0; level--)
584 {
586
587 Assert(node->level == level);
588
589 itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
590 if (itemno == 0)
591 return false;
592 node = n->downlinks[itemno - 1];
593 }
594 Assert(node->level == 0);
595 leaf = (intset_leaf_node *) node;
596
597 /*
598 * Binary search to find the right item on the leaf page
599 */
600 itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
601 if (itemno == 0)
602 return false;
603 item = &leaf->items[itemno - 1];
604
605 /* Is this a match to the first value on the item? */
606 if (item->first == x)
607 return true;
608 Assert(x > item->first);
609
610 /* Is it in the packed codeword? */
611 if (simple8b_contains(item->codeword, x, item->first))
612 return true;
613
614 return false;
615}
616
617/*
618 * Begin in-order scan through all the values.
619 *
620 * While the iteration is in-progress, you cannot add new values to the set.
621 */
622void
624{
625 /* Note that we allow an iteration to be abandoned midway */
626 intset->iter_active = true;
627 intset->iter_node = intset->leftmost_leaf;
628 intset->iter_itemno = 0;
629 intset->iter_valueno = 0;
630 intset->iter_num_values = 0;
631 intset->iter_values = intset->iter_values_buf;
632}
633
634/*
635 * Returns the next integer, when iterating.
636 *
637 * intset_begin_iterate() must be called first. intset_iterate_next() returns
638 * the next value in the set. Returns true, if there was another value, and
639 * stores the value in *next. Otherwise, returns false.
640 */
641bool
643{
644 Assert(intset->iter_active);
645 for (;;)
646 {
647 /* Return next iter_values[] entry if any */
648 if (intset->iter_valueno < intset->iter_num_values)
649 {
650 *next = intset->iter_values[intset->iter_valueno++];
651 return true;
652 }
653
654 /* Decode next item in current leaf node, if any */
655 if (intset->iter_node &&
656 intset->iter_itemno < intset->iter_node->num_items)
657 {
658 leaf_item *item;
659 int num_decoded;
660
661 item = &intset->iter_node->items[intset->iter_itemno++];
662
663 intset->iter_values_buf[0] = item->first;
664 num_decoded = simple8b_decode(item->codeword,
665 &intset->iter_values_buf[1],
666 item->first);
667 intset->iter_num_values = num_decoded + 1;
668 intset->iter_valueno = 0;
669 continue;
670 }
671
672 /* No more items on this leaf, step to next node */
673 if (intset->iter_node)
674 {
675 intset->iter_node = intset->iter_node->next;
676 intset->iter_itemno = 0;
677 continue;
678 }
679
680 /*
681 * We have reached the end of the B-tree. But we might still have
682 * some integers in the buffer of newly-added values.
683 */
684 if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
685 {
686 intset->iter_values = intset->buffered_values;
687 intset->iter_num_values = intset->num_buffered_values;
688 intset->iter_valueno = 0;
689 continue;
690 }
691
692 break;
693 }
694
695 /* No more results. */
696 intset->iter_active = false;
697 *next = 0; /* prevent uninitialized-variable warnings */
698 return false;
699}
700
701/*
702 * intset_binsrch_uint64() -- search a sorted array of uint64s
703 *
704 * Returns the first position with key equal or less than the given key.
705 * The returned position would be the "insert" location for the given key,
706 * that is, the position where the new key should be inserted to.
707 *
708 * 'nextkey' affects the behavior on equal keys. If true, and there is an
709 * equal key in the array, this returns the position immediately after the
710 * equal key. If false, this returns the position of the equal key itself.
711 */
712static int
713intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
714{
715 int low,
716 high,
717 mid;
718
719 low = 0;
720 high = arr_elems;
721 while (high > low)
722 {
723 mid = low + (high - low) / 2;
724
725 if (nextkey)
726 {
727 if (item >= arr[mid])
728 low = mid + 1;
729 else
730 high = mid;
731 }
732 else
733 {
734 if (item > arr[mid])
735 low = mid + 1;
736 else
737 high = mid;
738 }
739 }
740
741 return low;
742}
743
744/* same, but for an array of leaf items */
745static int
746intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
747{
748 int low,
749 high,
750 mid;
751
752 low = 0;
753 high = arr_elems;
754 while (high > low)
755 {
756 mid = low + (high - low) / 2;
757
758 if (nextkey)
759 {
760 if (item >= arr[mid].first)
761 low = mid + 1;
762 else
763 high = mid;
764 }
765 else
766 {
767 if (item > arr[mid].first)
768 low = mid + 1;
769 else
770 high = mid;
771 }
772 }
773
774 return low;
775}
776
777/*
778 * Simple-8b encoding.
779 *
780 * The simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
781 * called "codewords". The number of integers packed into a single codeword
782 * depends on the integers being packed; small integers are encoded using
783 * fewer bits than large integers. A single codeword can store a single
784 * 60-bit integer, or two 30-bit integers, for example.
785 *
786 * Since we're storing a unique, sorted, set of integers, we actually encode
787 * the *differences* between consecutive integers. That way, clusters of
788 * integers that are close to each other are packed efficiently, regardless
789 * of their absolute values.
790 *
791 * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
792 * how many integers are encoded in the codeword, and the encoded integers are
793 * packed into the remaining 60 bits. The selector allows for 16 different
794 * ways of using the remaining 60 bits, called "modes". The number of integers
795 * packed into a single codeword in each mode is listed in the simple8b_modes
796 * table below. For example, consider the following codeword:
797 *
798 * 20-bit integer 20-bit integer 20-bit integer
799 * 1101 00000000000000010010 01111010000100100000 00000000000000010100
800 * ^
801 * selector
802 *
803 * The selector 1101 is 13 in decimal. From the modes table below, we see
804 * that it means that the codeword encodes three 20-bit integers. In decimal,
805 * those integers are 18, 500000 and 20. Because we encode deltas rather than
806 * absolute values, the actual values that they represent are 18, 500018 and
807 * 500038.
808 *
809 * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeroes
810 * (which means 240 or 120 consecutive integers, since we're encoding the
811 * deltas between integers), without using the rest of the codeword bits
812 * for anything.
813 *
814 * Simple-8b cannot encode integers larger than 60 bits. Values larger than
815 * that are always stored in the 'first' field of a leaf item, never in the
816 * packed codeword. If there is a sequence of integers that are more than
817 * 2^60 apart, the codeword will go unused on those items. To represent that,
818 * we use a magic EMPTY_CODEWORD codeword value.
819 */
820static const struct simple8b_mode
821{
824} simple8b_modes[17] =
825
826{
827 {0, 240}, /* mode 0: 240 zeroes */
828 {0, 120}, /* mode 1: 120 zeroes */
829 {1, 60}, /* mode 2: sixty 1-bit integers */
830 {2, 30}, /* mode 3: thirty 2-bit integers */
831 {3, 20}, /* mode 4: twenty 3-bit integers */
832 {4, 15}, /* mode 5: fifteen 4-bit integers */
833 {5, 12}, /* mode 6: twelve 5-bit integers */
834 {6, 10}, /* mode 7: ten 6-bit integers */
835 {7, 8}, /* mode 8: eight 7-bit integers (four bits
836 * are wasted) */
837 {8, 7}, /* mode 9: seven 8-bit integers (four bits
838 * are wasted) */
839 {10, 6}, /* mode 10: six 10-bit integers */
840 {12, 5}, /* mode 11: five 12-bit integers */
841 {15, 4}, /* mode 12: four 15-bit integers */
842 {20, 3}, /* mode 13: three 20-bit integers */
843 {30, 2}, /* mode 14: two 30-bit integers */
844 {60, 1}, /* mode 15: one 60-bit integer */
845
846 {0, 0} /* sentinel value */
848
849/*
850 * EMPTY_CODEWORD is a special value, used to indicate "no values".
851 * It is used if the next value is too large to be encoded with Simple-8b.
852 *
853 * This value looks like a mode-0 codeword, but we can distinguish it
854 * because a regular mode-0 codeword would have zeroes in the unused bits.
855 */
856#define EMPTY_CODEWORD UINT64CONST(0x0FFFFFFFFFFFFFFF)
857
858/*
859 * Encode a number of integers into a Simple-8b codeword.
860 *
861 * (What we actually encode are deltas between successive integers.
862 * "base" is the value before ints[0].)
863 *
864 * The input array must contain at least SIMPLE8B_MAX_VALUES_PER_CODEWORD
865 * elements, ensuring that we can produce a full codeword.
866 *
867 * Returns the encoded codeword, and sets *num_encoded to the number of
868 * input integers that were encoded. That can be zero, if the first delta
869 * is too large to be encoded.
870 */
871static uint64
872simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
873{
874 int selector;
875 int nints;
876 int bits;
877 uint64 diff;
878 uint64 last_val;
879 uint64 codeword;
880 int i;
881
882 Assert(ints[0] > base);
883
884 /*
885 * Select the "mode" to use for this codeword.
886 *
887 * In each iteration, check if the next value can be represented in the
888 * current mode we're considering. If it's too large, then step up the
889 * mode to a wider one, and repeat. If it fits, move on to the next
890 * integer. Repeat until the codeword is full, given the current mode.
891 *
892 * Note that we don't have any way to represent unused slots in the
893 * codeword, so we require each codeword to be "full". It is always
894 * possible to produce a full codeword unless the very first delta is too
895 * large to be encoded. For example, if the first delta is small but the
896 * second is too large to be encoded, we'll end up using the last "mode",
897 * which has nints == 1.
898 */
899 selector = 0;
900 nints = simple8b_modes[0].num_ints;
902 diff = ints[0] - base - 1;
903 last_val = ints[0];
904 i = 0; /* number of deltas we have accepted */
905 for (;;)
906 {
907 if (diff >= (UINT64CONST(1) << bits))
908 {
909 /* too large, step up to next mode */
910 selector++;
911 nints = simple8b_modes[selector].num_ints;
912 bits = simple8b_modes[selector].bits_per_int;
913 /* we might already have accepted enough deltas for this mode */
914 if (i >= nints)
915 break;
916 }
917 else
918 {
919 /* accept this delta; then done if codeword is full */
920 i++;
921 if (i >= nints)
922 break;
923 /* examine next delta */
924 Assert(ints[i] > last_val);
925 diff = ints[i] - last_val - 1;
926 last_val = ints[i];
927 }
928 }
929
930 if (nints == 0)
931 {
932 /*
933 * The first delta is too large to be encoded with Simple-8b.
934 *
935 * If there is at least one not-too-large integer in the input, we
936 * will encode it using mode 15 (or a more compact mode). Hence, we
937 * can only get here if the *first* delta is >= 2^60.
938 */
939 Assert(i == 0);
940 *num_encoded = 0;
941 return EMPTY_CODEWORD;
942 }
943
944 /*
945 * Encode the integers using the selected mode. Note that we shift them
946 * into the codeword in reverse order, so that they will come out in the
947 * correct order in the decoder.
948 */
949 codeword = 0;
950 if (bits > 0)
951 {
952 for (i = nints - 1; i > 0; i--)
953 {
954 diff = ints[i] - ints[i - 1] - 1;
955 codeword |= diff;
956 codeword <<= bits;
957 }
958 diff = ints[0] - base - 1;
959 codeword |= diff;
960 }
961
962 /* add selector to the codeword, and return */
963 codeword |= (uint64) selector << 60;
964
965 *num_encoded = nints;
966 return codeword;
967}
968
969/*
970 * Decode a codeword into an array of integers.
971 * Returns the number of integers decoded.
972 */
973static int
974simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
975{
976 int selector = (codeword >> 60);
977 int nints = simple8b_modes[selector].num_ints;
978 int bits = simple8b_modes[selector].bits_per_int;
979 uint64 mask = (UINT64CONST(1) << bits) - 1;
980 uint64 curr_value;
981
982 if (codeword == EMPTY_CODEWORD)
983 return 0;
984
985 curr_value = base;
986 for (int i = 0; i < nints; i++)
987 {
988 uint64 diff = codeword & mask;
989
990 curr_value += 1 + diff;
991 decoded[i] = curr_value;
992 codeword >>= bits;
993 }
994 return nints;
995}
996
997/*
998 * This is very similar to simple8b_decode(), but instead of decoding all
999 * the values to an array, it just checks if the given "key" is part of
1000 * the codeword.
1001 */
1002static bool
1004{
1005 int selector = (codeword >> 60);
1006 int nints = simple8b_modes[selector].num_ints;
1007 int bits = simple8b_modes[selector].bits_per_int;
1008
1009 if (codeword == EMPTY_CODEWORD)
1010 return false;
1011
1012 if (bits == 0)
1013 {
1014 /* Special handling for 0-bit cases. */
1015 return (key - base) <= nints;
1016 }
1017 else
1018 {
1019 uint64 mask = (UINT64CONST(1) << bits) - 1;
1020 uint64 curr_value;
1021
1022 curr_value = base;
1023 for (int i = 0; i < nints; i++)
1024 {
1025 uint64 diff = codeword & mask;
1026
1027 curr_value += 1 + diff;
1028
1029 if (curr_value >= key)
1030 {
1031 if (curr_value == key)
1032 return true;
1033 else
1034 return false;
1035 }
1036
1037 codeword >>= bits;
1038 }
1039 }
1040 return false;
1041}
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
static int32 next
Definition: blutils.c:221
static Datum values[MAXATTR]
Definition: bootstrap.c:151
uint8_t uint8
Definition: c.h:486
#define Assert(condition)
Definition: c.h:815
uint64_t uint64
Definition: c.h:489
uint16_t uint16
Definition: c.h:487
#define UINT64CONST(x)
Definition: c.h:503
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define MAX_VALUES_PER_LEAF_ITEM
Definition: integerset.c:166
uint64 intset_memory_usage(IntegerSet *intset)
Definition: integerset.c:358
uint64 intset_num_entries(IntegerSet *intset)
Definition: integerset.c:349
static void intset_update_upper(IntegerSet *intset, int level, intset_node *child, uint64 child_key)
Definition: integerset.c:480
bool intset_is_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:553
#define MAX_INTERNAL_ITEMS
Definition: integerset.c:94
#define MAX_TREE_LEVELS
Definition: integerset.c:109
#define MAX_BUFFERED_VALUES
Definition: integerset.c:186
static intset_internal_node * intset_new_internal_node(IntegerSet *intset)
Definition: integerset.c:315
void intset_begin_iterate(IntegerSet *intset)
Definition: integerset.c:623
static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
Definition: integerset.c:713
static void intset_flush_buffered_values(IntegerSet *intset)
Definition: integerset.c:395
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
Definition: integerset.c:872
bool intset_iterate_next(IntegerSet *intset, uint64 *next)
Definition: integerset.c:642
#define EMPTY_CODEWORD
Definition: integerset.c:856
static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
Definition: integerset.c:974
IntegerSet * intset_create(void)
Definition: integerset.c:283
void intset_add_member(IntegerSet *intset, uint64 x)
Definition: integerset.c:369
static const struct simple8b_mode simple8b_modes[17]
static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
Definition: integerset.c:746
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base)
Definition: integerset.c:1003
#define MAX_LEAF_ITEMS
Definition: integerset.c:95
static intset_leaf_node * intset_new_leaf_node(IntegerSet *intset)
Definition: integerset.c:330
int x
Definition: isn.c:70
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
int num_buffered_values
Definition: integerset.c:230
int iter_num_values
Definition: integerset.c:249
uint64 highest_value
Definition: integerset.c:210
intset_node * root
Definition: integerset.c:222
uint64 mem_used
Definition: integerset.c:207
int iter_itemno
Definition: integerset.c:253
MemoryContext context
Definition: integerset.c:206
intset_node * rightmost_nodes[MAX_TREE_LEVELS]
Definition: integerset.c:223
uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM]
Definition: integerset.c:255
intset_leaf_node * leftmost_leaf
Definition: integerset.c:224
bool iter_active
Definition: integerset.c:246
const uint64 * iter_values
Definition: integerset.c:248
uint64 buffered_values[MAX_BUFFERED_VALUES]
Definition: integerset.c:229
intset_leaf_node * iter_node
Definition: integerset.c:252
int iter_valueno
Definition: integerset.c:250
uint64 num_entries
Definition: integerset.c:209
int num_levels
Definition: integerset.c:221
intset_node * downlinks[MAX_INTERNAL_ITEMS]
Definition: integerset.c:156
uint64 values[MAX_INTERNAL_ITEMS]
Definition: integerset.c:155
leaf_item items[MAX_LEAF_ITEMS]
Definition: integerset.c:176
intset_leaf_node * next
Definition: integerset.c:174
uint16 level
Definition: integerset.c:140
uint16 num_items
Definition: integerset.c:141
uint64 codeword
Definition: integerset.c:163
uint64 first
Definition: integerset.c:162
uint8 bits_per_int
Definition: integerset.c:822
uint8 num_ints
Definition: integerset.c:823