PostgreSQL Source Code  git master
integerset.c File Reference
#include "postgres.h"
#include "access/htup_details.h"
#include "lib/integerset.h"
#include "port/pg_bitutils.h"
#include "utils/memutils.h"
Include dependency graph for integerset.c:

Go to the source code of this file.

Data Structures

struct  intset_node
 
struct  intset_internal_node
 
struct  leaf_item
 
struct  intset_leaf_node
 
struct  IntegerSet
 
struct  simple8b_mode
 

Macros

#define SIMPLE8B_MAX_VALUES_PER_CODEWORD   240
 
#define MAX_INTERNAL_ITEMS   64
 
#define MAX_LEAF_ITEMS   64
 
#define MAX_TREE_LEVELS   11
 
#define MAX_VALUES_PER_LEAF_ITEM   (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
 
#define MAX_BUFFERED_VALUES   (MAX_VALUES_PER_LEAF_ITEM * 2)
 
#define EMPTY_CODEWORD   UINT64CONST(0x0FFFFFFFFFFFFFFF)
 

Typedefs

typedef struct intset_node intset_node
 
typedef struct intset_leaf_node intset_leaf_node
 
typedef struct intset_internal_node intset_internal_node
 

Functions

static void intset_update_upper (IntegerSet *intset, int level, intset_node *child, uint64 child_key)
 
static void intset_flush_buffered_values (IntegerSet *intset)
 
static int intset_binsrch_uint64 (uint64 item, uint64 *arr, int arr_elems, bool nextkey)
 
static int intset_binsrch_leaf (uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
 
static uint64 simple8b_encode (const uint64 *ints, int *num_encoded, uint64 base)
 
static int simple8b_decode (uint64 codeword, uint64 *decoded, uint64 base)
 
static bool simple8b_contains (uint64 codeword, uint64 key, uint64 base)
 
IntegerSetintset_create (void)
 
static intset_internal_nodeintset_new_internal_node (IntegerSet *intset)
 
static intset_leaf_nodeintset_new_leaf_node (IntegerSet *intset)
 
uint64 intset_num_entries (IntegerSet *intset)
 
uint64 intset_memory_usage (IntegerSet *intset)
 
void intset_add_member (IntegerSet *intset, uint64 x)
 
bool intset_is_member (IntegerSet *intset, uint64 x)
 
void intset_begin_iterate (IntegerSet *intset)
 
bool intset_iterate_next (IntegerSet *intset, uint64 *next)
 

Variables

static const struct simple8b_mode simple8b_modes [17]
 

Macro Definition Documentation

◆ EMPTY_CODEWORD

#define EMPTY_CODEWORD   UINT64CONST(0x0FFFFFFFFFFFFFFF)

Definition at line 858 of file integerset.c.

◆ MAX_BUFFERED_VALUES

#define MAX_BUFFERED_VALUES   (MAX_VALUES_PER_LEAF_ITEM * 2)

Definition at line 188 of file integerset.c.

◆ MAX_INTERNAL_ITEMS

#define MAX_INTERNAL_ITEMS   64

Definition at line 96 of file integerset.c.

◆ MAX_LEAF_ITEMS

#define MAX_LEAF_ITEMS   64

Definition at line 97 of file integerset.c.

◆ MAX_TREE_LEVELS

#define MAX_TREE_LEVELS   11

Definition at line 111 of file integerset.c.

◆ MAX_VALUES_PER_LEAF_ITEM

#define MAX_VALUES_PER_LEAF_ITEM   (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)

Definition at line 168 of file integerset.c.

◆ SIMPLE8B_MAX_VALUES_PER_CODEWORD

#define SIMPLE8B_MAX_VALUES_PER_CODEWORD   240

Definition at line 85 of file integerset.c.

Typedef Documentation

◆ intset_internal_node

Definition at line 1 of file integerset.c.

◆ intset_leaf_node

Definition at line 1 of file integerset.c.

◆ intset_node

typedef struct intset_node intset_node

Definition at line 1 of file integerset.c.

Function Documentation

◆ intset_add_member()

void intset_add_member ( IntegerSet intset,
uint64  x 
)

Definition at line 371 of file integerset.c.

372 {
373  if (intset->iter_active)
374  elog(ERROR, "cannot add new values to integer set while iteration is in progress");
375 
376  if (x <= intset->highest_value && intset->num_entries > 0)
377  elog(ERROR, "cannot add value to integer set out of order");
378 
379  if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
380  {
381  /* Time to flush our buffer */
383  Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
384  }
385 
386  /* Add it to the buffer of newly-added values */
387  intset->buffered_values[intset->num_buffered_values] = x;
388  intset->num_buffered_values++;
389  intset->num_entries++;
390  intset->highest_value = x;
391 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
#define ERROR
Definition: elog.h:39
#define MAX_BUFFERED_VALUES
Definition: integerset.c:188
static void intset_flush_buffered_values(IntegerSet *intset)
Definition: integerset.c:397
int x
Definition: isn.c:71
Assert(fmt[strlen(fmt) - 1] !='\n')

References Assert(), elog(), ERROR, intset(), intset_flush_buffered_values(), MAX_BUFFERED_VALUES, and x.

Referenced by gistvacuumpage(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_begin_iterate()

void intset_begin_iterate ( IntegerSet intset)

Definition at line 625 of file integerset.c.

626 {
627  /* Note that we allow an iteration to be abandoned midway */
628  intset->iter_active = true;
629  intset->iter_node = intset->leftmost_leaf;
630  intset->iter_itemno = 0;
631  intset->iter_valueno = 0;
632  intset->iter_num_values = 0;
633  intset->iter_values = intset->iter_values_buf;
634 }

References intset().

Referenced by gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_binsrch_leaf()

static int intset_binsrch_leaf ( uint64  item,
leaf_item arr,
int  arr_elems,
bool  nextkey 
)
static

Definition at line 748 of file integerset.c.

749 {
750  int low,
751  high,
752  mid;
753 
754  low = 0;
755  high = arr_elems;
756  while (high > low)
757  {
758  mid = low + (high - low) / 2;
759 
760  if (nextkey)
761  {
762  if (item >= arr[mid].first)
763  low = mid + 1;
764  else
765  high = mid;
766  }
767  else
768  {
769  if (item > arr[mid].first)
770  low = mid + 1;
771  else
772  high = mid;
773  }
774  }
775 
776  return low;
777 }

Referenced by intset_is_member().

◆ intset_binsrch_uint64()

static int intset_binsrch_uint64 ( uint64  item,
uint64 *  arr,
int  arr_elems,
bool  nextkey 
)
static

Definition at line 715 of file integerset.c.

716 {
717  int low,
718  high,
719  mid;
720 
721  low = 0;
722  high = arr_elems;
723  while (high > low)
724  {
725  mid = low + (high - low) / 2;
726 
727  if (nextkey)
728  {
729  if (item >= arr[mid])
730  low = mid + 1;
731  else
732  high = mid;
733  }
734  else
735  {
736  if (item > arr[mid])
737  low = mid + 1;
738  else
739  high = mid;
740  }
741  }
742 
743  return low;
744 }

Referenced by intset_is_member().

◆ intset_create()

IntegerSet* intset_create ( void  )

Definition at line 285 of file integerset.c.

286 {
288 
289  intset = (IntegerSet *) palloc(sizeof(IntegerSet));
290  intset->context = CurrentMemoryContext;
291  intset->mem_used = GetMemoryChunkSpace(intset);
292 
293  intset->num_entries = 0;
294  intset->highest_value = 0;
295 
296  intset->num_levels = 0;
297  intset->root = NULL;
298  memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
299  intset->leftmost_leaf = NULL;
300 
301  intset->num_buffered_values = 0;
302 
303  intset->iter_active = false;
304  intset->iter_node = NULL;
305  intset->iter_itemno = 0;
306  intset->iter_valueno = 0;
307  intset->iter_num_values = 0;
308  intset->iter_values = NULL;
309 
310  return intset;
311 }
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:630
MemoryContext CurrentMemoryContext
Definition: mcxt.c:135
void * palloc(Size size)
Definition: mcxt.c:1226

References CurrentMemoryContext, GetMemoryChunkSpace(), intset(), and palloc().

Referenced by gistvacuumscan(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_flush_buffered_values()

static void intset_flush_buffered_values ( IntegerSet intset)
static

Definition at line 397 of file integerset.c.

398 {
399  uint64 *values = intset->buffered_values;
400  uint64 num_values = intset->num_buffered_values;
401  int num_packed = 0;
402  intset_leaf_node *leaf;
403 
404  leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
405 
406  /*
407  * If the tree is completely empty, create the first leaf page, which is
408  * also the root.
409  */
410  if (leaf == NULL)
411  {
412  /*
413  * This is the very first item in the set.
414  *
415  * Allocate root node. It's also a leaf.
416  */
418 
419  intset->root = (intset_node *) leaf;
420  intset->leftmost_leaf = leaf;
421  intset->rightmost_nodes[0] = (intset_node *) leaf;
422  intset->num_levels = 1;
423  }
424 
425  /*
426  * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
427  * stop. In most cases, we cannot encode that many values in a single
428  * value, but this way, the encoder doesn't have to worry about running
429  * out of input.
430  */
431  while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
432  {
433  leaf_item item;
434  int num_encoded;
435 
436  /*
437  * Construct the next leaf item, packing as many buffered values as
438  * possible.
439  */
440  item.first = values[num_packed];
441  item.codeword = simple8b_encode(&values[num_packed + 1],
442  &num_encoded,
443  item.first);
444 
445  /*
446  * Add the item to the node, allocating a new node if the old one is
447  * full.
448  */
449  if (leaf->num_items >= MAX_LEAF_ITEMS)
450  {
451  /* Allocate new leaf and link it to the tree */
452  intset_leaf_node *old_leaf = leaf;
453 
455  old_leaf->next = leaf;
456  intset->rightmost_nodes[0] = (intset_node *) leaf;
457  intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
458  }
459  leaf->items[leaf->num_items++] = item;
460 
461  num_packed += 1 + num_encoded;
462  }
463 
464  /*
465  * Move any remaining buffered values to the beginning of the array.
466  */
467  if (num_packed < intset->num_buffered_values)
468  {
469  memmove(&intset->buffered_values[0],
470  &intset->buffered_values[num_packed],
471  (intset->num_buffered_values - num_packed) * sizeof(uint64));
472  }
473  intset->num_buffered_values -= num_packed;
474 }
static Datum values[MAXATTR]
Definition: bootstrap.c:156
#define MAX_VALUES_PER_LEAF_ITEM
Definition: integerset.c:168
static void intset_update_upper(IntegerSet *intset, int level, intset_node *child, uint64 child_key)
Definition: integerset.c:482
static intset_leaf_node * intset_new_leaf_node(IntegerSet *intset)
Definition: integerset.c:332
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
Definition: integerset.c:874
#define MAX_LEAF_ITEMS
Definition: integerset.c:97
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
leaf_item items[MAX_LEAF_ITEMS]
Definition: integerset.c:178
intset_leaf_node * next
Definition: integerset.c:176
uint64 codeword
Definition: integerset.c:165
uint64 first
Definition: integerset.c:164

References leaf_item::codeword, leaf_item::first, if(), intset(), intset_new_leaf_node(), intset_update_upper(), intset_leaf_node::items, MAX_LEAF_ITEMS, MAX_VALUES_PER_LEAF_ITEM, intset_leaf_node::next, intset_leaf_node::num_items, simple8b_encode(), and values.

Referenced by intset_add_member().

◆ intset_is_member()

bool intset_is_member ( IntegerSet intset,
uint64  x 
)

Definition at line 555 of file integerset.c.

556 {
557  intset_node *node;
558  intset_leaf_node *leaf;
559  int level;
560  int itemno;
561  leaf_item *item;
562 
563  /*
564  * The value might be in the buffer of newly-added values.
565  */
566  if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
567  {
568  itemno = intset_binsrch_uint64(x,
569  intset->buffered_values,
570  intset->num_buffered_values,
571  false);
572  if (itemno >= intset->num_buffered_values)
573  return false;
574  else
575  return (intset->buffered_values[itemno] == x);
576  }
577 
578  /*
579  * Start from the root, and walk down the B-tree to find the right leaf
580  * node.
581  */
582  if (!intset->root)
583  return false;
584  node = intset->root;
585  for (level = intset->num_levels - 1; level > 0; level--)
586  {
588 
589  Assert(node->level == level);
590 
591  itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
592  if (itemno == 0)
593  return false;
594  node = n->downlinks[itemno - 1];
595  }
596  Assert(node->level == 0);
597  leaf = (intset_leaf_node *) node;
598 
599  /*
600  * Binary search to find the right item on the leaf page
601  */
602  itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
603  if (itemno == 0)
604  return false;
605  item = &leaf->items[itemno - 1];
606 
607  /* Is this a match to the first value on the item? */
608  if (item->first == x)
609  return true;
610  Assert(x > item->first);
611 
612  /* Is it in the packed codeword? */
613  if (simple8b_contains(item->codeword, x, item->first))
614  return true;
615 
616  return false;
617 }
static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
Definition: integerset.c:715
static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
Definition: integerset.c:748
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base)
Definition: integerset.c:1005
intset_node * downlinks[MAX_INTERNAL_ITEMS]
Definition: integerset.c:158
uint64 values[MAX_INTERNAL_ITEMS]
Definition: integerset.c:157
uint16 level
Definition: integerset.c:142

References Assert(), leaf_item::codeword, intset_internal_node::downlinks, leaf_item::first, intset(), intset_binsrch_leaf(), intset_binsrch_uint64(), intset_leaf_node::items, intset_node::level, intset_internal_node::num_items, intset_leaf_node::num_items, simple8b_contains(), intset_internal_node::values, and x.

Referenced by check_with_filler(), gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), and test_single_value().

◆ intset_iterate_next()

bool intset_iterate_next ( IntegerSet intset,
uint64 *  next 
)

Definition at line 644 of file integerset.c.

645 {
646  Assert(intset->iter_active);
647  for (;;)
648  {
649  /* Return next iter_values[] entry if any */
650  if (intset->iter_valueno < intset->iter_num_values)
651  {
652  *next = intset->iter_values[intset->iter_valueno++];
653  return true;
654  }
655 
656  /* Decode next item in current leaf node, if any */
657  if (intset->iter_node &&
658  intset->iter_itemno < intset->iter_node->num_items)
659  {
660  leaf_item *item;
661  int num_decoded;
662 
663  item = &intset->iter_node->items[intset->iter_itemno++];
664 
665  intset->iter_values_buf[0] = item->first;
666  num_decoded = simple8b_decode(item->codeword,
667  &intset->iter_values_buf[1],
668  item->first);
669  intset->iter_num_values = num_decoded + 1;
670  intset->iter_valueno = 0;
671  continue;
672  }
673 
674  /* No more items on this leaf, step to next node */
675  if (intset->iter_node)
676  {
677  intset->iter_node = intset->iter_node->next;
678  intset->iter_itemno = 0;
679  continue;
680  }
681 
682  /*
683  * We have reached the end of the B-tree. But we might still have
684  * some integers in the buffer of newly-added values.
685  */
686  if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
687  {
688  intset->iter_values = intset->buffered_values;
689  intset->iter_num_values = intset->num_buffered_values;
690  intset->iter_valueno = 0;
691  continue;
692  }
693 
694  break;
695  }
696 
697  /* No more results. */
698  intset->iter_active = false;
699  *next = 0; /* prevent uninitialized-variable warnings */
700  return false;
701 }
static int32 next
Definition: blutils.c:219
static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
Definition: integerset.c:976

References Assert(), leaf_item::codeword, leaf_item::first, intset(), next, and simple8b_decode().

Referenced by gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_memory_usage()

uint64 intset_memory_usage ( IntegerSet intset)

Definition at line 360 of file integerset.c.

361 {
362  return intset->mem_used;
363 }

References intset().

Referenced by test_pattern(), and test_single_value_and_filler().

◆ intset_new_internal_node()

static intset_internal_node* intset_new_internal_node ( IntegerSet intset)
static

Definition at line 317 of file integerset.c.

318 {
320 
322  sizeof(intset_internal_node));
323  intset->mem_used += GetMemoryChunkSpace(n);
324 
325  n->level = 0; /* caller must set */
326  n->num_items = 0;
327 
328  return n;
329 }
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1021

References GetMemoryChunkSpace(), intset(), intset_internal_node::level, MemoryContextAlloc(), and intset_internal_node::num_items.

Referenced by intset_update_upper().

◆ intset_new_leaf_node()

static intset_leaf_node* intset_new_leaf_node ( IntegerSet intset)
static

Definition at line 332 of file integerset.c.

333 {
334  intset_leaf_node *n;
335 
336  n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
337  sizeof(intset_leaf_node));
338  intset->mem_used += GetMemoryChunkSpace(n);
339 
340  n->level = 0;
341  n->num_items = 0;
342  n->next = NULL;
343 
344  return n;
345 }

References GetMemoryChunkSpace(), intset(), intset_leaf_node::level, MemoryContextAlloc(), intset_leaf_node::next, and intset_leaf_node::num_items.

Referenced by intset_flush_buffered_values().

◆ intset_num_entries()

uint64 intset_num_entries ( IntegerSet intset)

Definition at line 351 of file integerset.c.

352 {
353  return intset->num_entries;
354 }

References intset().

Referenced by gistvacuum_delete_empty_pages(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_update_upper()

static void intset_update_upper ( IntegerSet intset,
int  level,
intset_node child,
uint64  child_key 
)
static

Definition at line 482 of file integerset.c.

484 {
485  intset_internal_node *parent;
486 
487  Assert(level > 0);
488 
489  /*
490  * Create a new root node, if necessary.
491  */
492  if (level >= intset->num_levels)
493  {
494  intset_node *oldroot = intset->root;
495  uint64 downlink_key;
496 
497  /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
498  if (intset->num_levels == MAX_TREE_LEVELS)
499  elog(ERROR, "could not expand integer set, maximum number of levels reached");
500  intset->num_levels++;
501 
502  /*
503  * Get the first value on the old root page, to be used as the
504  * downlink.
505  */
506  if (intset->root->level == 0)
507  downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
508  else
509  downlink_key = ((intset_internal_node *) oldroot)->values[0];
510 
512  parent->level = level;
513  parent->values[0] = downlink_key;
514  parent->downlinks[0] = oldroot;
515  parent->num_items = 1;
516 
517  intset->root = (intset_node *) parent;
518  intset->rightmost_nodes[level] = (intset_node *) parent;
519  }
520 
521  /*
522  * Place the downlink on the parent page.
523  */
524  parent = (intset_internal_node *) intset->rightmost_nodes[level];
525 
526  if (parent->num_items < MAX_INTERNAL_ITEMS)
527  {
528  parent->values[parent->num_items] = child_key;
529  parent->downlinks[parent->num_items] = child;
530  parent->num_items++;
531  }
532  else
533  {
534  /*
535  * Doesn't fit. Allocate new parent, with the downlink as the first
536  * item on it, and recursively insert the downlink to the new parent
537  * to the grandparent.
538  */
540  parent->level = level;
541  parent->values[0] = child_key;
542  parent->downlinks[0] = child;
543  parent->num_items = 1;
544 
545  intset->rightmost_nodes[level] = (intset_node *) parent;
546 
547  intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
548  }
549 }
#define MAX_INTERNAL_ITEMS
Definition: integerset.c:96
#define MAX_TREE_LEVELS
Definition: integerset.c:111
static intset_internal_node * intset_new_internal_node(IntegerSet *intset)
Definition: integerset.c:317

References Assert(), intset_internal_node::downlinks, elog(), ERROR, if(), intset(), intset_new_internal_node(), intset_internal_node::level, MAX_INTERNAL_ITEMS, MAX_TREE_LEVELS, intset_internal_node::num_items, and intset_internal_node::values.

Referenced by intset_flush_buffered_values().

◆ simple8b_contains()

static bool simple8b_contains ( uint64  codeword,
uint64  key,
uint64  base 
)
static

Definition at line 1005 of file integerset.c.

1006 {
1007  int selector = (codeword >> 60);
1008  int nints = simple8b_modes[selector].num_ints;
1009  int bits = simple8b_modes[selector].bits_per_int;
1010 
1011  if (codeword == EMPTY_CODEWORD)
1012  return false;
1013 
1014  if (bits == 0)
1015  {
1016  /* Special handling for 0-bit cases. */
1017  return (key - base) <= nints;
1018  }
1019  else
1020  {
1021  uint64 mask = (UINT64CONST(1) << bits) - 1;
1022  uint64 curr_value;
1023 
1024  curr_value = base;
1025  for (int i = 0; i < nints; i++)
1026  {
1027  uint64 diff = codeword & mask;
1028 
1029  curr_value += 1 + diff;
1030 
1031  if (curr_value >= key)
1032  {
1033  if (curr_value == key)
1034  return true;
1035  else
1036  return false;
1037  }
1038 
1039  codeword >>= bits;
1040  }
1041  }
1042  return false;
1043 }
#define EMPTY_CODEWORD
Definition: integerset.c:858
static const struct simple8b_mode simple8b_modes[17]
int i
Definition: isn.c:73
uint8 bits_per_int
Definition: integerset.c:824
uint8 num_ints
Definition: integerset.c:825

References simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, sort-test::key, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_is_member().

◆ simple8b_decode()

static int simple8b_decode ( uint64  codeword,
uint64 *  decoded,
uint64  base 
)
static

Definition at line 976 of file integerset.c.

977 {
978  int selector = (codeword >> 60);
979  int nints = simple8b_modes[selector].num_ints;
980  int bits = simple8b_modes[selector].bits_per_int;
981  uint64 mask = (UINT64CONST(1) << bits) - 1;
982  uint64 curr_value;
983 
984  if (codeword == EMPTY_CODEWORD)
985  return 0;
986 
987  curr_value = base;
988  for (int i = 0; i < nints; i++)
989  {
990  uint64 diff = codeword & mask;
991 
992  curr_value += 1 + diff;
993  decoded[i] = curr_value;
994  codeword >>= bits;
995  }
996  return nints;
997 }

References simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_iterate_next().

◆ simple8b_encode()

static uint64 simple8b_encode ( const uint64 *  ints,
int *  num_encoded,
uint64  base 
)
static

Definition at line 874 of file integerset.c.

875 {
876  int selector;
877  int nints;
878  int bits;
879  uint64 diff;
880  uint64 last_val;
881  uint64 codeword;
882  int i;
883 
884  Assert(ints[0] > base);
885 
886  /*
887  * Select the "mode" to use for this codeword.
888  *
889  * In each iteration, check if the next value can be represented in the
890  * current mode we're considering. If it's too large, then step up the
891  * mode to a wider one, and repeat. If it fits, move on to the next
892  * integer. Repeat until the codeword is full, given the current mode.
893  *
894  * Note that we don't have any way to represent unused slots in the
895  * codeword, so we require each codeword to be "full". It is always
896  * possible to produce a full codeword unless the very first delta is too
897  * large to be encoded. For example, if the first delta is small but the
898  * second is too large to be encoded, we'll end up using the last "mode",
899  * which has nints == 1.
900  */
901  selector = 0;
902  nints = simple8b_modes[0].num_ints;
903  bits = simple8b_modes[0].bits_per_int;
904  diff = ints[0] - base - 1;
905  last_val = ints[0];
906  i = 0; /* number of deltas we have accepted */
907  for (;;)
908  {
909  if (diff >= (UINT64CONST(1) << bits))
910  {
911  /* too large, step up to next mode */
912  selector++;
913  nints = simple8b_modes[selector].num_ints;
914  bits = simple8b_modes[selector].bits_per_int;
915  /* we might already have accepted enough deltas for this mode */
916  if (i >= nints)
917  break;
918  }
919  else
920  {
921  /* accept this delta; then done if codeword is full */
922  i++;
923  if (i >= nints)
924  break;
925  /* examine next delta */
926  Assert(ints[i] > last_val);
927  diff = ints[i] - last_val - 1;
928  last_val = ints[i];
929  }
930  }
931 
932  if (nints == 0)
933  {
934  /*
935  * The first delta is too large to be encoded with Simple-8b.
936  *
937  * If there is at least one not-too-large integer in the input, we
938  * will encode it using mode 15 (or a more compact mode). Hence, we
939  * can only get here if the *first* delta is >= 2^60.
940  */
941  Assert(i == 0);
942  *num_encoded = 0;
943  return EMPTY_CODEWORD;
944  }
945 
946  /*
947  * Encode the integers using the selected mode. Note that we shift them
948  * into the codeword in reverse order, so that they will come out in the
949  * correct order in the decoder.
950  */
951  codeword = 0;
952  if (bits > 0)
953  {
954  for (i = nints - 1; i > 0; i--)
955  {
956  diff = ints[i] - ints[i - 1] - 1;
957  codeword |= diff;
958  codeword <<= bits;
959  }
960  diff = ints[0] - base - 1;
961  codeword |= diff;
962  }
963 
964  /* add selector to the codeword, and return */
965  codeword |= (uint64) selector << 60;
966 
967  *num_encoded = nints;
968  return codeword;
969 }

References Assert(), simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_flush_buffered_values().

Variable Documentation

◆ simple8b_modes

const struct simple8b_mode simple8b_modes[17]
static
Initial value:
=
{
{0, 240},
{0, 120},
{1, 60},
{2, 30},
{3, 20},
{4, 15},
{5, 12},
{6, 10},
{7, 8},
{8, 7},
{10, 6},
{12, 5},
{15, 4},
{20, 3},
{30, 2},
{60, 1},
{0, 0}
}

Referenced by simple8b_contains(), simple8b_decode(), and simple8b_encode().