PostgreSQL Source Code  git master
integerset.c File Reference
#include "postgres.h"
#include "lib/integerset.h"
#include "port/pg_bitutils.h"
#include "utils/memutils.h"
Include dependency graph for integerset.c:

Go to the source code of this file.

Data Structures

struct  intset_node
 
struct  intset_internal_node
 
struct  leaf_item
 
struct  intset_leaf_node
 
struct  IntegerSet
 
struct  simple8b_mode
 

Macros

#define SIMPLE8B_MAX_VALUES_PER_CODEWORD   240
 
#define MAX_INTERNAL_ITEMS   64
 
#define MAX_LEAF_ITEMS   64
 
#define MAX_TREE_LEVELS   11
 
#define MAX_VALUES_PER_LEAF_ITEM   (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
 
#define MAX_BUFFERED_VALUES   (MAX_VALUES_PER_LEAF_ITEM * 2)
 
#define EMPTY_CODEWORD   UINT64CONST(0x0FFFFFFFFFFFFFFF)
 

Typedefs

typedef struct intset_node intset_node
 
typedef struct intset_leaf_node intset_leaf_node
 
typedef struct intset_internal_node intset_internal_node
 

Functions

static void intset_update_upper (IntegerSet *intset, int level, intset_node *child, uint64 child_key)
 
static void intset_flush_buffered_values (IntegerSet *intset)
 
static int intset_binsrch_uint64 (uint64 item, uint64 *arr, int arr_elems, bool nextkey)
 
static int intset_binsrch_leaf (uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
 
static uint64 simple8b_encode (const uint64 *ints, int *num_encoded, uint64 base)
 
static int simple8b_decode (uint64 codeword, uint64 *decoded, uint64 base)
 
static bool simple8b_contains (uint64 codeword, uint64 key, uint64 base)
 
IntegerSetintset_create (void)
 
static intset_internal_nodeintset_new_internal_node (IntegerSet *intset)
 
static intset_leaf_nodeintset_new_leaf_node (IntegerSet *intset)
 
uint64 intset_num_entries (IntegerSet *intset)
 
uint64 intset_memory_usage (IntegerSet *intset)
 
void intset_add_member (IntegerSet *intset, uint64 x)
 
bool intset_is_member (IntegerSet *intset, uint64 x)
 
void intset_begin_iterate (IntegerSet *intset)
 
bool intset_iterate_next (IntegerSet *intset, uint64 *next)
 

Variables

static const struct simple8b_mode simple8b_modes [17]
 

Macro Definition Documentation

◆ EMPTY_CODEWORD

#define EMPTY_CODEWORD   UINT64CONST(0x0FFFFFFFFFFFFFFF)

Definition at line 857 of file integerset.c.

◆ MAX_BUFFERED_VALUES

#define MAX_BUFFERED_VALUES   (MAX_VALUES_PER_LEAF_ITEM * 2)

Definition at line 187 of file integerset.c.

◆ MAX_INTERNAL_ITEMS

#define MAX_INTERNAL_ITEMS   64

Definition at line 95 of file integerset.c.

◆ MAX_LEAF_ITEMS

#define MAX_LEAF_ITEMS   64

Definition at line 96 of file integerset.c.

◆ MAX_TREE_LEVELS

#define MAX_TREE_LEVELS   11

Definition at line 110 of file integerset.c.

◆ MAX_VALUES_PER_LEAF_ITEM

#define MAX_VALUES_PER_LEAF_ITEM   (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)

Definition at line 167 of file integerset.c.

◆ SIMPLE8B_MAX_VALUES_PER_CODEWORD

#define SIMPLE8B_MAX_VALUES_PER_CODEWORD   240

Definition at line 84 of file integerset.c.

Typedef Documentation

◆ intset_internal_node

Definition at line 1 of file integerset.c.

◆ intset_leaf_node

Definition at line 1 of file integerset.c.

◆ intset_node

typedef struct intset_node intset_node

Definition at line 1 of file integerset.c.

Function Documentation

◆ intset_add_member()

void intset_add_member ( IntegerSet intset,
uint64  x 
)

Definition at line 370 of file integerset.c.

371 {
372  if (intset->iter_active)
373  elog(ERROR, "cannot add new values to integer set while iteration is in progress");
374 
375  if (x <= intset->highest_value && intset->num_entries > 0)
376  elog(ERROR, "cannot add value to integer set out of order");
377 
378  if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
379  {
380  /* Time to flush our buffer */
382  Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
383  }
384 
385  /* Add it to the buffer of newly-added values */
386  intset->buffered_values[intset->num_buffered_values] = x;
387  intset->num_buffered_values++;
388  intset->num_entries++;
389  intset->highest_value = x;
390 }
Datum intset(PG_FUNCTION_ARGS)
Definition: _int_op.c:179
#define Assert(condition)
Definition: c.h:861
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define MAX_BUFFERED_VALUES
Definition: integerset.c:187
static void intset_flush_buffered_values(IntegerSet *intset)
Definition: integerset.c:396
int x
Definition: isn.c:71

References Assert, elog, ERROR, intset(), intset_flush_buffered_values(), MAX_BUFFERED_VALUES, and x.

Referenced by gistvacuumpage(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_begin_iterate()

void intset_begin_iterate ( IntegerSet intset)

Definition at line 624 of file integerset.c.

625 {
626  /* Note that we allow an iteration to be abandoned midway */
627  intset->iter_active = true;
628  intset->iter_node = intset->leftmost_leaf;
629  intset->iter_itemno = 0;
630  intset->iter_valueno = 0;
631  intset->iter_num_values = 0;
632  intset->iter_values = intset->iter_values_buf;
633 }

References intset().

Referenced by gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_binsrch_leaf()

static int intset_binsrch_leaf ( uint64  item,
leaf_item arr,
int  arr_elems,
bool  nextkey 
)
static

Definition at line 747 of file integerset.c.

748 {
749  int low,
750  high,
751  mid;
752 
753  low = 0;
754  high = arr_elems;
755  while (high > low)
756  {
757  mid = low + (high - low) / 2;
758 
759  if (nextkey)
760  {
761  if (item >= arr[mid].first)
762  low = mid + 1;
763  else
764  high = mid;
765  }
766  else
767  {
768  if (item > arr[mid].first)
769  low = mid + 1;
770  else
771  high = mid;
772  }
773  }
774 
775  return low;
776 }

Referenced by intset_is_member().

◆ intset_binsrch_uint64()

static int intset_binsrch_uint64 ( uint64  item,
uint64 *  arr,
int  arr_elems,
bool  nextkey 
)
static

Definition at line 714 of file integerset.c.

715 {
716  int low,
717  high,
718  mid;
719 
720  low = 0;
721  high = arr_elems;
722  while (high > low)
723  {
724  mid = low + (high - low) / 2;
725 
726  if (nextkey)
727  {
728  if (item >= arr[mid])
729  low = mid + 1;
730  else
731  high = mid;
732  }
733  else
734  {
735  if (item > arr[mid])
736  low = mid + 1;
737  else
738  high = mid;
739  }
740  }
741 
742  return low;
743 }

Referenced by intset_is_member().

◆ intset_create()

IntegerSet* intset_create ( void  )

Definition at line 284 of file integerset.c.

285 {
287 
288  intset = (IntegerSet *) palloc(sizeof(IntegerSet));
289  intset->context = CurrentMemoryContext;
290  intset->mem_used = GetMemoryChunkSpace(intset);
291 
292  intset->num_entries = 0;
293  intset->highest_value = 0;
294 
295  intset->num_levels = 0;
296  intset->root = NULL;
297  memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
298  intset->leftmost_leaf = NULL;
299 
300  intset->num_buffered_values = 0;
301 
302  intset->iter_active = false;
303  intset->iter_node = NULL;
304  intset->iter_itemno = 0;
305  intset->iter_valueno = 0;
306  intset->iter_num_values = 0;
307  intset->iter_values = NULL;
308 
309  return intset;
310 }
Size GetMemoryChunkSpace(void *pointer)
Definition: mcxt.c:721
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void * palloc(Size size)
Definition: mcxt.c:1317

References CurrentMemoryContext, GetMemoryChunkSpace(), intset(), and palloc().

Referenced by gistvacuumscan(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_flush_buffered_values()

static void intset_flush_buffered_values ( IntegerSet intset)
static

Definition at line 396 of file integerset.c.

397 {
398  uint64 *values = intset->buffered_values;
399  uint64 num_values = intset->num_buffered_values;
400  int num_packed = 0;
401  intset_leaf_node *leaf;
402 
403  leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
404 
405  /*
406  * If the tree is completely empty, create the first leaf page, which is
407  * also the root.
408  */
409  if (leaf == NULL)
410  {
411  /*
412  * This is the very first item in the set.
413  *
414  * Allocate root node. It's also a leaf.
415  */
417 
418  intset->root = (intset_node *) leaf;
419  intset->leftmost_leaf = leaf;
420  intset->rightmost_nodes[0] = (intset_node *) leaf;
421  intset->num_levels = 1;
422  }
423 
424  /*
425  * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
426  * stop. In most cases, we cannot encode that many values in a single
427  * value, but this way, the encoder doesn't have to worry about running
428  * out of input.
429  */
430  while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
431  {
432  leaf_item item;
433  int num_encoded;
434 
435  /*
436  * Construct the next leaf item, packing as many buffered values as
437  * possible.
438  */
439  item.first = values[num_packed];
440  item.codeword = simple8b_encode(&values[num_packed + 1],
441  &num_encoded,
442  item.first);
443 
444  /*
445  * Add the item to the node, allocating a new node if the old one is
446  * full.
447  */
448  if (leaf->num_items >= MAX_LEAF_ITEMS)
449  {
450  /* Allocate new leaf and link it to the tree */
451  intset_leaf_node *old_leaf = leaf;
452 
454  old_leaf->next = leaf;
455  intset->rightmost_nodes[0] = (intset_node *) leaf;
456  intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
457  }
458  leaf->items[leaf->num_items++] = item;
459 
460  num_packed += 1 + num_encoded;
461  }
462 
463  /*
464  * Move any remaining buffered values to the beginning of the array.
465  */
466  if (num_packed < intset->num_buffered_values)
467  {
468  memmove(&intset->buffered_values[0],
469  &intset->buffered_values[num_packed],
470  (intset->num_buffered_values - num_packed) * sizeof(uint64));
471  }
472  intset->num_buffered_values -= num_packed;
473 }
static Datum values[MAXATTR]
Definition: bootstrap.c:150
#define MAX_VALUES_PER_LEAF_ITEM
Definition: integerset.c:167
static void intset_update_upper(IntegerSet *intset, int level, intset_node *child, uint64 child_key)
Definition: integerset.c:481
static intset_leaf_node * intset_new_leaf_node(IntegerSet *intset)
Definition: integerset.c:331
static uint64 simple8b_encode(const uint64 *ints, int *num_encoded, uint64 base)
Definition: integerset.c:873
#define MAX_LEAF_ITEMS
Definition: integerset.c:96
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
leaf_item items[MAX_LEAF_ITEMS]
Definition: integerset.c:177
intset_leaf_node * next
Definition: integerset.c:175
uint64 codeword
Definition: integerset.c:164
uint64 first
Definition: integerset.c:163

References leaf_item::codeword, leaf_item::first, if(), intset(), intset_new_leaf_node(), intset_update_upper(), intset_leaf_node::items, MAX_LEAF_ITEMS, MAX_VALUES_PER_LEAF_ITEM, intset_leaf_node::next, intset_leaf_node::num_items, simple8b_encode(), and values.

Referenced by intset_add_member().

◆ intset_is_member()

bool intset_is_member ( IntegerSet intset,
uint64  x 
)

Definition at line 554 of file integerset.c.

555 {
556  intset_node *node;
557  intset_leaf_node *leaf;
558  int level;
559  int itemno;
560  leaf_item *item;
561 
562  /*
563  * The value might be in the buffer of newly-added values.
564  */
565  if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
566  {
567  itemno = intset_binsrch_uint64(x,
568  intset->buffered_values,
569  intset->num_buffered_values,
570  false);
571  if (itemno >= intset->num_buffered_values)
572  return false;
573  else
574  return (intset->buffered_values[itemno] == x);
575  }
576 
577  /*
578  * Start from the root, and walk down the B-tree to find the right leaf
579  * node.
580  */
581  if (!intset->root)
582  return false;
583  node = intset->root;
584  for (level = intset->num_levels - 1; level > 0; level--)
585  {
587 
588  Assert(node->level == level);
589 
590  itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
591  if (itemno == 0)
592  return false;
593  node = n->downlinks[itemno - 1];
594  }
595  Assert(node->level == 0);
596  leaf = (intset_leaf_node *) node;
597 
598  /*
599  * Binary search to find the right item on the leaf page
600  */
601  itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
602  if (itemno == 0)
603  return false;
604  item = &leaf->items[itemno - 1];
605 
606  /* Is this a match to the first value on the item? */
607  if (item->first == x)
608  return true;
609  Assert(x > item->first);
610 
611  /* Is it in the packed codeword? */
612  if (simple8b_contains(item->codeword, x, item->first))
613  return true;
614 
615  return false;
616 }
static int intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
Definition: integerset.c:714
static int intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
Definition: integerset.c:747
static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base)
Definition: integerset.c:1004
intset_node * downlinks[MAX_INTERNAL_ITEMS]
Definition: integerset.c:157
uint64 values[MAX_INTERNAL_ITEMS]
Definition: integerset.c:156
uint16 level
Definition: integerset.c:141

References Assert, leaf_item::codeword, intset_internal_node::downlinks, leaf_item::first, intset(), intset_binsrch_leaf(), intset_binsrch_uint64(), intset_leaf_node::items, intset_node::level, intset_internal_node::num_items, intset_leaf_node::num_items, simple8b_contains(), intset_internal_node::values, and x.

Referenced by check_with_filler(), gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), and test_single_value().

◆ intset_iterate_next()

bool intset_iterate_next ( IntegerSet intset,
uint64 *  next 
)

Definition at line 643 of file integerset.c.

644 {
645  Assert(intset->iter_active);
646  for (;;)
647  {
648  /* Return next iter_values[] entry if any */
649  if (intset->iter_valueno < intset->iter_num_values)
650  {
651  *next = intset->iter_values[intset->iter_valueno++];
652  return true;
653  }
654 
655  /* Decode next item in current leaf node, if any */
656  if (intset->iter_node &&
657  intset->iter_itemno < intset->iter_node->num_items)
658  {
659  leaf_item *item;
660  int num_decoded;
661 
662  item = &intset->iter_node->items[intset->iter_itemno++];
663 
664  intset->iter_values_buf[0] = item->first;
665  num_decoded = simple8b_decode(item->codeword,
666  &intset->iter_values_buf[1],
667  item->first);
668  intset->iter_num_values = num_decoded + 1;
669  intset->iter_valueno = 0;
670  continue;
671  }
672 
673  /* No more items on this leaf, step to next node */
674  if (intset->iter_node)
675  {
676  intset->iter_node = intset->iter_node->next;
677  intset->iter_itemno = 0;
678  continue;
679  }
680 
681  /*
682  * We have reached the end of the B-tree. But we might still have
683  * some integers in the buffer of newly-added values.
684  */
685  if (intset->iter_values == (const uint64 *) intset->iter_values_buf)
686  {
687  intset->iter_values = intset->buffered_values;
688  intset->iter_num_values = intset->num_buffered_values;
689  intset->iter_valueno = 0;
690  continue;
691  }
692 
693  break;
694  }
695 
696  /* No more results. */
697  intset->iter_active = false;
698  *next = 0; /* prevent uninitialized-variable warnings */
699  return false;
700 }
static int32 next
Definition: blutils.c:222
static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
Definition: integerset.c:975

References Assert, leaf_item::codeword, leaf_item::first, intset(), next, and simple8b_decode().

Referenced by gistvacuum_delete_empty_pages(), test_empty(), test_huge_distances(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_memory_usage()

uint64 intset_memory_usage ( IntegerSet intset)

Definition at line 359 of file integerset.c.

360 {
361  return intset->mem_used;
362 }

References intset().

Referenced by test_pattern(), and test_single_value_and_filler().

◆ intset_new_internal_node()

static intset_internal_node* intset_new_internal_node ( IntegerSet intset)
static

Definition at line 316 of file integerset.c.

317 {
319 
321  sizeof(intset_internal_node));
322  intset->mem_used += GetMemoryChunkSpace(n);
323 
324  n->level = 0; /* caller must set */
325  n->num_items = 0;
326 
327  return n;
328 }
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1181

References GetMemoryChunkSpace(), intset(), intset_internal_node::level, MemoryContextAlloc(), and intset_internal_node::num_items.

Referenced by intset_update_upper().

◆ intset_new_leaf_node()

static intset_leaf_node* intset_new_leaf_node ( IntegerSet intset)
static

Definition at line 331 of file integerset.c.

332 {
333  intset_leaf_node *n;
334 
335  n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
336  sizeof(intset_leaf_node));
337  intset->mem_used += GetMemoryChunkSpace(n);
338 
339  n->level = 0;
340  n->num_items = 0;
341  n->next = NULL;
342 
343  return n;
344 }

References GetMemoryChunkSpace(), intset(), intset_leaf_node::level, MemoryContextAlloc(), intset_leaf_node::next, and intset_leaf_node::num_items.

Referenced by intset_flush_buffered_values().

◆ intset_num_entries()

uint64 intset_num_entries ( IntegerSet intset)

Definition at line 350 of file integerset.c.

351 {
352  return intset->num_entries;
353 }

References intset().

Referenced by gistvacuum_delete_empty_pages(), test_pattern(), test_single_value(), and test_single_value_and_filler().

◆ intset_update_upper()

static void intset_update_upper ( IntegerSet intset,
int  level,
intset_node child,
uint64  child_key 
)
static

Definition at line 481 of file integerset.c.

483 {
484  intset_internal_node *parent;
485 
486  Assert(level > 0);
487 
488  /*
489  * Create a new root node, if necessary.
490  */
491  if (level >= intset->num_levels)
492  {
493  intset_node *oldroot = intset->root;
494  uint64 downlink_key;
495 
496  /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
497  if (intset->num_levels == MAX_TREE_LEVELS)
498  elog(ERROR, "could not expand integer set, maximum number of levels reached");
499  intset->num_levels++;
500 
501  /*
502  * Get the first value on the old root page, to be used as the
503  * downlink.
504  */
505  if (intset->root->level == 0)
506  downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
507  else
508  downlink_key = ((intset_internal_node *) oldroot)->values[0];
509 
511  parent->level = level;
512  parent->values[0] = downlink_key;
513  parent->downlinks[0] = oldroot;
514  parent->num_items = 1;
515 
516  intset->root = (intset_node *) parent;
517  intset->rightmost_nodes[level] = (intset_node *) parent;
518  }
519 
520  /*
521  * Place the downlink on the parent page.
522  */
523  parent = (intset_internal_node *) intset->rightmost_nodes[level];
524 
525  if (parent->num_items < MAX_INTERNAL_ITEMS)
526  {
527  parent->values[parent->num_items] = child_key;
528  parent->downlinks[parent->num_items] = child;
529  parent->num_items++;
530  }
531  else
532  {
533  /*
534  * Doesn't fit. Allocate new parent, with the downlink as the first
535  * item on it, and recursively insert the downlink to the new parent
536  * to the grandparent.
537  */
539  parent->level = level;
540  parent->values[0] = child_key;
541  parent->downlinks[0] = child;
542  parent->num_items = 1;
543 
544  intset->rightmost_nodes[level] = (intset_node *) parent;
545 
546  intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
547  }
548 }
#define MAX_INTERNAL_ITEMS
Definition: integerset.c:95
#define MAX_TREE_LEVELS
Definition: integerset.c:110
static intset_internal_node * intset_new_internal_node(IntegerSet *intset)
Definition: integerset.c:316

References Assert, intset_internal_node::downlinks, elog, ERROR, if(), intset(), intset_new_internal_node(), intset_internal_node::level, MAX_INTERNAL_ITEMS, MAX_TREE_LEVELS, intset_internal_node::num_items, and intset_internal_node::values.

Referenced by intset_flush_buffered_values().

◆ simple8b_contains()

static bool simple8b_contains ( uint64  codeword,
uint64  key,
uint64  base 
)
static

Definition at line 1004 of file integerset.c.

1005 {
1006  int selector = (codeword >> 60);
1007  int nints = simple8b_modes[selector].num_ints;
1008  int bits = simple8b_modes[selector].bits_per_int;
1009 
1010  if (codeword == EMPTY_CODEWORD)
1011  return false;
1012 
1013  if (bits == 0)
1014  {
1015  /* Special handling for 0-bit cases. */
1016  return (key - base) <= nints;
1017  }
1018  else
1019  {
1020  uint64 mask = (UINT64CONST(1) << bits) - 1;
1021  uint64 curr_value;
1022 
1023  curr_value = base;
1024  for (int i = 0; i < nints; i++)
1025  {
1026  uint64 diff = codeword & mask;
1027 
1028  curr_value += 1 + diff;
1029 
1030  if (curr_value >= key)
1031  {
1032  if (curr_value == key)
1033  return true;
1034  else
1035  return false;
1036  }
1037 
1038  codeword >>= bits;
1039  }
1040  }
1041  return false;
1042 }
#define EMPTY_CODEWORD
Definition: integerset.c:857
static const struct simple8b_mode simple8b_modes[17]
int i
Definition: isn.c:73
uint8 bits_per_int
Definition: integerset.c:823
uint8 num_ints
Definition: integerset.c:824

References simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, sort-test::key, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_is_member().

◆ simple8b_decode()

static int simple8b_decode ( uint64  codeword,
uint64 *  decoded,
uint64  base 
)
static

Definition at line 975 of file integerset.c.

976 {
977  int selector = (codeword >> 60);
978  int nints = simple8b_modes[selector].num_ints;
979  int bits = simple8b_modes[selector].bits_per_int;
980  uint64 mask = (UINT64CONST(1) << bits) - 1;
981  uint64 curr_value;
982 
983  if (codeword == EMPTY_CODEWORD)
984  return 0;
985 
986  curr_value = base;
987  for (int i = 0; i < nints; i++)
988  {
989  uint64 diff = codeword & mask;
990 
991  curr_value += 1 + diff;
992  decoded[i] = curr_value;
993  codeword >>= bits;
994  }
995  return nints;
996 }

References simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_iterate_next().

◆ simple8b_encode()

static uint64 simple8b_encode ( const uint64 *  ints,
int *  num_encoded,
uint64  base 
)
static

Definition at line 873 of file integerset.c.

874 {
875  int selector;
876  int nints;
877  int bits;
878  uint64 diff;
879  uint64 last_val;
880  uint64 codeword;
881  int i;
882 
883  Assert(ints[0] > base);
884 
885  /*
886  * Select the "mode" to use for this codeword.
887  *
888  * In each iteration, check if the next value can be represented in the
889  * current mode we're considering. If it's too large, then step up the
890  * mode to a wider one, and repeat. If it fits, move on to the next
891  * integer. Repeat until the codeword is full, given the current mode.
892  *
893  * Note that we don't have any way to represent unused slots in the
894  * codeword, so we require each codeword to be "full". It is always
895  * possible to produce a full codeword unless the very first delta is too
896  * large to be encoded. For example, if the first delta is small but the
897  * second is too large to be encoded, we'll end up using the last "mode",
898  * which has nints == 1.
899  */
900  selector = 0;
901  nints = simple8b_modes[0].num_ints;
902  bits = simple8b_modes[0].bits_per_int;
903  diff = ints[0] - base - 1;
904  last_val = ints[0];
905  i = 0; /* number of deltas we have accepted */
906  for (;;)
907  {
908  if (diff >= (UINT64CONST(1) << bits))
909  {
910  /* too large, step up to next mode */
911  selector++;
912  nints = simple8b_modes[selector].num_ints;
913  bits = simple8b_modes[selector].bits_per_int;
914  /* we might already have accepted enough deltas for this mode */
915  if (i >= nints)
916  break;
917  }
918  else
919  {
920  /* accept this delta; then done if codeword is full */
921  i++;
922  if (i >= nints)
923  break;
924  /* examine next delta */
925  Assert(ints[i] > last_val);
926  diff = ints[i] - last_val - 1;
927  last_val = ints[i];
928  }
929  }
930 
931  if (nints == 0)
932  {
933  /*
934  * The first delta is too large to be encoded with Simple-8b.
935  *
936  * If there is at least one not-too-large integer in the input, we
937  * will encode it using mode 15 (or a more compact mode). Hence, we
938  * can only get here if the *first* delta is >= 2^60.
939  */
940  Assert(i == 0);
941  *num_encoded = 0;
942  return EMPTY_CODEWORD;
943  }
944 
945  /*
946  * Encode the integers using the selected mode. Note that we shift them
947  * into the codeword in reverse order, so that they will come out in the
948  * correct order in the decoder.
949  */
950  codeword = 0;
951  if (bits > 0)
952  {
953  for (i = nints - 1; i > 0; i--)
954  {
955  diff = ints[i] - ints[i - 1] - 1;
956  codeword |= diff;
957  codeword <<= bits;
958  }
959  diff = ints[0] - base - 1;
960  codeword |= diff;
961  }
962 
963  /* add selector to the codeword, and return */
964  codeword |= (uint64) selector << 60;
965 
966  *num_encoded = nints;
967  return codeword;
968 }

References Assert, simple8b_mode::bits_per_int, EMPTY_CODEWORD, i, simple8b_mode::num_ints, and simple8b_modes.

Referenced by intset_flush_buffered_values().

Variable Documentation

◆ simple8b_modes

const struct simple8b_mode simple8b_modes[17]
static
Initial value:
=
{
{0, 240},
{0, 120},
{1, 60},
{2, 30},
{3, 20},
{4, 15},
{5, 12},
{6, 10},
{7, 8},
{8, 7},
{10, 6},
{12, 5},
{15, 4},
{20, 3},
{30, 2},
{60, 1},
{0, 0}
}

Referenced by simple8b_contains(), simple8b_decode(), and simple8b_encode().