PostgreSQL Source Code  git master
rangetypes_gist.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * rangetypes_gist.c
4  * GiST support for range types.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/utils/adt/rangetypes_gist.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include "access/gist.h"
18 #include "access/stratnum.h"
19 #include "utils/datum.h"
20 #include "utils/float.h"
21 #include "utils/fmgrprotos.h"
22 #include "utils/multirangetypes.h"
23 #include "utils/rangetypes.h"
24 
25 /*
26  * Range class properties used to segregate different classes of ranges in
27  * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot
28  * be combined with anything else.
29  */
30 #define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */
31 #define CLS_LOWER_INF 1 /* Lower bound is infinity */
32 #define CLS_UPPER_INF 2 /* Upper bound is infinity */
33 #define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */
34 #define CLS_EMPTY 8 /* Special class for empty ranges */
35 
36 #define CLS_COUNT 9 /* # of classes; includes all combinations of
37  * properties. CLS_EMPTY doesn't combine with
38  * anything else, so it's only 2^3 + 1. */
39 
40 /*
41  * Minimum accepted ratio of split for items of the same class. If the items
42  * are of different classes, we will separate along those lines regardless of
43  * the ratio.
44  */
45 #define LIMIT_RATIO 0.3
46 
47 /* Constants for fixed penalty values */
48 #define INFINITE_BOUND_PENALTY 2.0
49 #define CONTAIN_EMPTY_PENALTY 1.0
50 #define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0
51 
52 /*
53  * Per-item data for range_gist_single_sorting_split.
54  */
55 typedef struct
56 {
57  int index;
60 
61 /* place on left or right side of split? */
62 typedef enum
63 {
64  SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */
66 } SplitLR;
67 
68 /*
69  * Context for range_gist_consider_split.
70  */
71 typedef struct
72 {
73  TypeCacheEntry *typcache; /* typcache for range type */
74  bool has_subtype_diff; /* does it have subtype_diff? */
75  int entries_count; /* total number of entries being split */
76 
77  /* Information about currently selected split follows */
78 
79  bool first; /* true if no split was selected yet */
80 
81  RangeBound *left_upper; /* upper bound of left interval */
82  RangeBound *right_lower; /* lower bound of right interval */
83 
84  float4 ratio; /* split ratio */
85  float4 overlap; /* overlap between left and right predicate */
86  int common_left; /* # common entries destined for each side */
89 
90 /*
91  * Bounds extracted from a non-empty range, for use in
92  * range_gist_double_sorting_split.
93  */
94 typedef struct
95 {
99 
100 /*
101  * Represents information about an entry that can be placed in either group
102  * without affecting overlap over selected axis ("common entry").
103  */
104 typedef struct
105 {
106  /* Index of entry in the initial array */
107  int index;
108  /* Delta between closeness of range to each of the two groups */
109  double delta;
110 } CommonEntry;
111 
112 /* Helper macros to place an entry in the left or right group during split */
113 /* Note direct access to variables v, typcache, left_range, right_range */
114 #define PLACE_LEFT(range, off) \
115  do { \
116  if (v->spl_nleft > 0) \
117  left_range = range_super_union(typcache, left_range, range); \
118  else \
119  left_range = (range); \
120  v->spl_left[v->spl_nleft++] = (off); \
121  } while(0)
122 
123 #define PLACE_RIGHT(range, off) \
124  do { \
125  if (v->spl_nright > 0) \
126  right_range = range_super_union(typcache, right_range, range); \
127  else \
128  right_range = (range); \
129  v->spl_right[v->spl_nright++] = (off); \
130  } while(0)
131 
132 /* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
133 #define rangeCopy(r) \
134  ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
135  false, -1)))
136 
137 static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
138  RangeType *r2);
139 static bool range_gist_consistent_int_range(TypeCacheEntry *typcache,
140  StrategyNumber strategy,
141  const RangeType *key,
142  const RangeType *query);
144  StrategyNumber strategy,
145  const RangeType *key,
146  const MultirangeType *query);
148  StrategyNumber strategy,
149  const RangeType *key,
150  Datum query);
152  StrategyNumber strategy,
153  const RangeType *key,
154  const RangeType *query);
156  StrategyNumber strategy,
157  const RangeType *key,
158  const MultirangeType *query);
160  StrategyNumber strategy,
161  const RangeType *key,
162  Datum query);
163 static void range_gist_fallback_split(TypeCacheEntry *typcache,
164  GistEntryVector *entryvec,
165  GIST_SPLITVEC *v);
166 static void range_gist_class_split(TypeCacheEntry *typcache,
167  GistEntryVector *entryvec,
168  GIST_SPLITVEC *v,
169  SplitLR *classes_groups);
170 static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
171  GistEntryVector *entryvec,
172  GIST_SPLITVEC *v,
173  bool use_upper_bound);
174 static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
175  GistEntryVector *entryvec,
176  GIST_SPLITVEC *v);
178  RangeBound *right_lower, int min_left_count,
179  RangeBound *left_upper, int max_left_count);
181 static int single_bound_cmp(const void *a, const void *b, void *arg);
182 static int interval_cmp_lower(const void *a, const void *b, void *arg);
183 static int interval_cmp_upper(const void *a, const void *b, void *arg);
184 static int common_entry_cmp(const void *i1, const void *i2);
185 static float8 call_subtype_diff(TypeCacheEntry *typcache,
186  Datum val1, Datum val2);
187 
188 
189 /* GiST query consistency check */
190 Datum
192 {
193  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
194  Datum query = PG_GETARG_DATUM(1);
196  bool result;
197  Oid subtype = PG_GETARG_OID(3);
198  bool *recheck = (bool *) PG_GETARG_POINTER(4);
199  RangeType *key = DatumGetRangeTypeP(entry->key);
200  TypeCacheEntry *typcache;
201 
202  /* All operators served by this function are exact */
203  *recheck = false;
204 
205  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
206 
207  /*
208  * Perform consistent checking using function corresponding to key type
209  * (leaf or internal) and query subtype (range, multirange, or element).
210  * Note that invalid subtype means that query type matches key type
211  * (range).
212  */
213  if (GIST_LEAF(entry))
214  {
215  if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
216  result = range_gist_consistent_leaf_range(typcache, strategy, key,
217  DatumGetRangeTypeP(query));
218  else if (subtype == ANYMULTIRANGEOID)
219  result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
220  DatumGetMultirangeTypeP(query));
221  else
222  result = range_gist_consistent_leaf_element(typcache, strategy,
223  key, query);
224  }
225  else
226  {
227  if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
228  result = range_gist_consistent_int_range(typcache, strategy, key,
229  DatumGetRangeTypeP(query));
230  else if (subtype == ANYMULTIRANGEOID)
231  result = range_gist_consistent_int_multirange(typcache, strategy, key,
232  DatumGetMultirangeTypeP(query));
233  else
234  result = range_gist_consistent_int_element(typcache, strategy,
235  key, query);
236  }
237  PG_RETURN_BOOL(result);
238 }
239 
240 /*
241  * GiST compress method for multiranges: multirange is approximated as union
242  * range with no gaps.
243  */
244 Datum
246 {
247  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
248 
249  if (entry->leafkey)
250  {
252  RangeType *r;
253  TypeCacheEntry *typcache;
254  GISTENTRY *retval = palloc(sizeof(GISTENTRY));
255 
256  typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
257  r = multirange_get_union_range(typcache->rngtype, mr);
258 
259  gistentryinit(*retval, RangeTypePGetDatum(r),
260  entry->rel, entry->page, entry->offset, false);
261 
262  PG_RETURN_POINTER(retval);
263  }
264 
265  PG_RETURN_POINTER(entry);
266 }
267 
268 /* GiST query consistency check for multiranges */
269 Datum
271 {
272  GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
273  Datum query = PG_GETARG_DATUM(1);
275  bool result;
276  Oid subtype = PG_GETARG_OID(3);
277  bool *recheck = (bool *) PG_GETARG_POINTER(4);
278  RangeType *key = DatumGetRangeTypeP(entry->key);
279  TypeCacheEntry *typcache;
280 
281  /*
282  * All operators served by this function are inexact because multirange is
283  * approximated by union range with no gaps.
284  */
285  *recheck = true;
286 
287  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
288 
289  /*
290  * Perform consistent checking using function corresponding to key type
291  * (leaf or internal) and query subtype (range, multirange, or element).
292  * Note that invalid subtype means that query type matches key type
293  * (multirange).
294  */
295  if (GIST_LEAF(entry))
296  {
297  if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
298  result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
299  DatumGetMultirangeTypeP(query));
300  else if (subtype == ANYRANGEOID)
301  result = range_gist_consistent_leaf_range(typcache, strategy, key,
302  DatumGetRangeTypeP(query));
303  else
304  result = range_gist_consistent_leaf_element(typcache, strategy,
305  key, query);
306  }
307  else
308  {
309  if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
310  result = range_gist_consistent_int_multirange(typcache, strategy, key,
311  DatumGetMultirangeTypeP(query));
312  else if (subtype == ANYRANGEOID)
313  result = range_gist_consistent_int_range(typcache, strategy, key,
314  DatumGetRangeTypeP(query));
315  else
316  result = range_gist_consistent_int_element(typcache, strategy,
317  key, query);
318  }
319  PG_RETURN_BOOL(result);
320 }
321 
322 /* form union range */
323 Datum
325 {
327  GISTENTRY *ent = entryvec->vector;
328  RangeType *result_range;
329  TypeCacheEntry *typcache;
330  int i;
331 
332  result_range = DatumGetRangeTypeP(ent[0].key);
333 
334  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
335 
336  for (i = 1; i < entryvec->n; i++)
337  {
338  result_range = range_super_union(typcache, result_range,
339  DatumGetRangeTypeP(ent[i].key));
340  }
341 
342  PG_RETURN_RANGE_P(result_range);
343 }
344 
345 /*
346  * We store ranges as ranges in GiST indexes, so we do not need
347  * compress, decompress, or fetch functions. Note this implies a limit
348  * on the size of range values that can be indexed.
349  */
350 
351 /*
352  * GiST page split penalty function.
353  *
354  * The penalty function has the following goals (in order from most to least
355  * important):
356  * - Keep normal ranges separate
357  * - Avoid broadening the class of the original predicate
358  * - Avoid broadening (as determined by subtype_diff) the original predicate
359  * - Favor adding ranges to narrower original predicates
360  */
361 Datum
363 {
364  GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
365  GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
366  float *penalty = (float *) PG_GETARG_POINTER(2);
367  RangeType *orig = DatumGetRangeTypeP(origentry->key);
368  RangeType *new = DatumGetRangeTypeP(newentry->key);
369  TypeCacheEntry *typcache;
370  bool has_subtype_diff;
371  RangeBound orig_lower,
372  new_lower,
373  orig_upper,
374  new_upper;
375  bool orig_empty,
376  new_empty;
377 
378  if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
379  elog(ERROR, "range types do not match");
380 
381  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
382 
383  has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
384 
385  range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
386  range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
387 
388  /*
389  * Distinct branches for handling distinct classes of ranges. Note that
390  * penalty values only need to be commensurate within the same class of
391  * new range.
392  */
393  if (new_empty)
394  {
395  /* Handle insertion of empty range */
396  if (orig_empty)
397  {
398  /*
399  * The best case is to insert it to empty original range.
400  * Insertion here means no broadening of original range. Also
401  * original range is the most narrow.
402  */
403  *penalty = 0.0;
404  }
405  else if (RangeIsOrContainsEmpty(orig))
406  {
407  /*
408  * The second case is to insert empty range into range which
409  * contains at least one underlying empty range. There is still
410  * no broadening of original range, but original range is not as
411  * narrow as possible.
412  */
413  *penalty = CONTAIN_EMPTY_PENALTY;
414  }
415  else if (orig_lower.infinite && orig_upper.infinite)
416  {
417  /*
418  * Original range requires broadening. (-inf; +inf) is most far
419  * from normal range in this case.
420  */
421  *penalty = 2 * CONTAIN_EMPTY_PENALTY;
422  }
423  else if (orig_lower.infinite || orig_upper.infinite)
424  {
425  /*
426  * (-inf, x) or (x, +inf) original ranges are closer to normal
427  * ranges, so it's worse to mix it with empty ranges.
428  */
429  *penalty = 3 * CONTAIN_EMPTY_PENALTY;
430  }
431  else
432  {
433  /*
434  * The least preferred case is broadening of normal range.
435  */
436  *penalty = 4 * CONTAIN_EMPTY_PENALTY;
437  }
438  }
439  else if (new_lower.infinite && new_upper.infinite)
440  {
441  /* Handle insertion of (-inf, +inf) range */
442  if (orig_lower.infinite && orig_upper.infinite)
443  {
444  /*
445  * Best case is inserting to (-inf, +inf) original range.
446  */
447  *penalty = 0.0;
448  }
449  else if (orig_lower.infinite || orig_upper.infinite)
450  {
451  /*
452  * When original range is (-inf, x) or (x, +inf) it requires
453  * broadening of original range (extension of one bound to
454  * infinity).
455  */
456  *penalty = INFINITE_BOUND_PENALTY;
457  }
458  else
459  {
460  /*
461  * Insertion to normal original range is least preferred.
462  */
463  *penalty = 2 * INFINITE_BOUND_PENALTY;
464  }
465 
466  if (RangeIsOrContainsEmpty(orig))
467  {
468  /*
469  * Original range is narrower when it doesn't contain empty
470  * ranges. Add additional penalty otherwise.
471  */
472  *penalty += CONTAIN_EMPTY_PENALTY;
473  }
474  }
475  else if (new_lower.infinite)
476  {
477  /* Handle insertion of (-inf, x) range */
478  if (!orig_empty && orig_lower.infinite)
479  {
480  if (orig_upper.infinite)
481  {
482  /*
483  * (-inf, +inf) range won't be extended by insertion of (-inf,
484  * x) range. It's a less desirable case than insertion to
485  * (-inf, y) original range without extension, because in that
486  * case original range is narrower. But we can't express that
487  * in single float value.
488  */
489  *penalty = 0.0;
490  }
491  else
492  {
493  if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
494  {
495  /*
496  * Get extension of original range using subtype_diff. Use
497  * constant if subtype_diff unavailable.
498  */
499  if (has_subtype_diff)
500  *penalty = call_subtype_diff(typcache,
501  new_upper.val,
502  orig_upper.val);
503  else
504  *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
505  }
506  else
507  {
508  /* No extension of original range */
509  *penalty = 0.0;
510  }
511  }
512  }
513  else
514  {
515  /*
516  * If lower bound of original range is not -inf, then extension of
517  * it is infinity.
518  */
519  *penalty = get_float4_infinity();
520  }
521  }
522  else if (new_upper.infinite)
523  {
524  /* Handle insertion of (x, +inf) range */
525  if (!orig_empty && orig_upper.infinite)
526  {
527  if (orig_lower.infinite)
528  {
529  /*
530  * (-inf, +inf) range won't be extended by insertion of (x,
531  * +inf) range. It's a less desirable case than insertion to
532  * (y, +inf) original range without extension, because in that
533  * case original range is narrower. But we can't express that
534  * in single float value.
535  */
536  *penalty = 0.0;
537  }
538  else
539  {
540  if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
541  {
542  /*
543  * Get extension of original range using subtype_diff. Use
544  * constant if subtype_diff unavailable.
545  */
546  if (has_subtype_diff)
547  *penalty = call_subtype_diff(typcache,
548  orig_lower.val,
549  new_lower.val);
550  else
551  *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
552  }
553  else
554  {
555  /* No extension of original range */
556  *penalty = 0.0;
557  }
558  }
559  }
560  else
561  {
562  /*
563  * If upper bound of original range is not +inf, then extension of
564  * it is infinity.
565  */
566  *penalty = get_float4_infinity();
567  }
568  }
569  else
570  {
571  /* Handle insertion of normal (non-empty, non-infinite) range */
572  if (orig_empty || orig_lower.infinite || orig_upper.infinite)
573  {
574  /*
575  * Avoid mixing normal ranges with infinite and empty ranges.
576  */
577  *penalty = get_float4_infinity();
578  }
579  else
580  {
581  /*
582  * Calculate extension of original range by calling subtype_diff.
583  * Use constant if subtype_diff unavailable.
584  */
585  float8 diff = 0.0;
586 
587  if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
588  {
589  if (has_subtype_diff)
590  diff += call_subtype_diff(typcache,
591  orig_lower.val,
592  new_lower.val);
593  else
595  }
596  if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
597  {
598  if (has_subtype_diff)
599  diff += call_subtype_diff(typcache,
600  new_upper.val,
601  orig_upper.val);
602  else
604  }
605  *penalty = diff;
606  }
607  }
608 
609  PG_RETURN_POINTER(penalty);
610 }
611 
612 /*
613  * The GiST PickSplit method for ranges
614  *
615  * Primarily, we try to segregate ranges of different classes. If splitting
616  * ranges of the same class, use the appropriate split method for that class.
617  */
618 Datum
620 {
623  TypeCacheEntry *typcache;
624  OffsetNumber i;
625  RangeType *pred_left;
626  int nbytes;
627  OffsetNumber maxoff;
628  int count_in_classes[CLS_COUNT];
629  int j;
630  int non_empty_classes_count = 0;
631  int biggest_class = -1;
632  int biggest_class_count = 0;
633  int total_count;
634 
635  /* use first item to look up range type's info */
636  pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
637  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
638 
639  maxoff = entryvec->n - 1;
640  nbytes = (maxoff + 1) * sizeof(OffsetNumber);
641  v->spl_left = (OffsetNumber *) palloc(nbytes);
642  v->spl_right = (OffsetNumber *) palloc(nbytes);
643 
644  /*
645  * Get count distribution of range classes.
646  */
647  memset(count_in_classes, 0, sizeof(count_in_classes));
648  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
649  {
650  RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
651 
652  count_in_classes[get_gist_range_class(range)]++;
653  }
654 
655  /*
656  * Count non-empty classes and find biggest class.
657  */
658  total_count = maxoff;
659  for (j = 0; j < CLS_COUNT; j++)
660  {
661  if (count_in_classes[j] > 0)
662  {
663  if (count_in_classes[j] > biggest_class_count)
664  {
665  biggest_class_count = count_in_classes[j];
666  biggest_class = j;
667  }
668  non_empty_classes_count++;
669  }
670  }
671 
672  Assert(non_empty_classes_count > 0);
673 
674  if (non_empty_classes_count == 1)
675  {
676  /* One non-empty class, so split inside class */
677  if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
678  {
679  /* double sorting split for normal ranges */
680  range_gist_double_sorting_split(typcache, entryvec, v);
681  }
682  else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
683  {
684  /* upper bound sorting split for (-inf, x) ranges */
685  range_gist_single_sorting_split(typcache, entryvec, v, true);
686  }
687  else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
688  {
689  /* lower bound sorting split for (x, +inf) ranges */
690  range_gist_single_sorting_split(typcache, entryvec, v, false);
691  }
692  else
693  {
694  /* trivial split for all (-inf, +inf) or all empty ranges */
695  range_gist_fallback_split(typcache, entryvec, v);
696  }
697  }
698  else
699  {
700  /*
701  * Class based split.
702  *
703  * To which side of the split should each class go? Initialize them
704  * all to go to the left side.
705  */
706  SplitLR classes_groups[CLS_COUNT];
707 
708  memset(classes_groups, 0, sizeof(classes_groups));
709 
710  if (count_in_classes[CLS_NORMAL] > 0)
711  {
712  /* separate normal ranges if any */
713  classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
714  }
715  else
716  {
717  /*----------
718  * Try to split classes in one of two ways:
719  * 1) containing infinities - not containing infinities
720  * 2) containing empty - not containing empty
721  *
722  * Select the way which balances the ranges between left and right
723  * the best. If split in these ways is not possible, there are at
724  * most 3 classes, so just separate biggest class.
725  *----------
726  */
727  int infCount,
728  nonInfCount;
729  int emptyCount,
730  nonEmptyCount;
731 
732  nonInfCount =
733  count_in_classes[CLS_NORMAL] +
734  count_in_classes[CLS_CONTAIN_EMPTY] +
735  count_in_classes[CLS_EMPTY];
736  infCount = total_count - nonInfCount;
737 
738  nonEmptyCount =
739  count_in_classes[CLS_NORMAL] +
740  count_in_classes[CLS_LOWER_INF] +
741  count_in_classes[CLS_UPPER_INF] +
742  count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
743  emptyCount = total_count - nonEmptyCount;
744 
745  if (infCount > 0 && nonInfCount > 0 &&
746  (Abs(infCount - nonInfCount) <=
747  Abs(emptyCount - nonEmptyCount)))
748  {
749  classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
750  classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
751  classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
752  }
753  else if (emptyCount > 0 && nonEmptyCount > 0)
754  {
755  classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
756  classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
757  classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
758  classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
759  }
760  else
761  {
762  /*
763  * Either total_count == emptyCount or total_count ==
764  * infCount.
765  */
766  classes_groups[biggest_class] = SPLIT_RIGHT;
767  }
768  }
769 
770  range_gist_class_split(typcache, entryvec, v, classes_groups);
771  }
772 
774 }
775 
776 /* equality comparator for GiST */
777 Datum
779 {
780  RangeType *r1 = PG_GETARG_RANGE_P(0);
781  RangeType *r2 = PG_GETARG_RANGE_P(1);
782  bool *result = (bool *) PG_GETARG_POINTER(2);
783 
784  /*
785  * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
786  * that for ourselves. More generally, if the entries have been properly
787  * normalized, then unequal flags bytes must mean unequal ranges ... so
788  * let's just test all the flag bits at once.
789  */
790  if (range_get_flags(r1) != range_get_flags(r2))
791  *result = false;
792  else
793  {
794  TypeCacheEntry *typcache;
795 
796  typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
797 
798  *result = range_eq_internal(typcache, r1, r2);
799  }
800 
801  PG_RETURN_POINTER(result);
802 }
803 
804 /*
805  *----------------------------------------------------------
806  * STATIC FUNCTIONS
807  *----------------------------------------------------------
808  */
809 
810 /*
811  * Return the smallest range that contains r1 and r2
812  *
813  * This differs from regular range_union in two critical ways:
814  * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
815  * the intervening values into the result range.
816  * 2. We track whether any empty range has been union'd into the result,
817  * so that contained_by searches can be indexed. Note that this means
818  * that *all* unions formed within the GiST index must go through here.
819  */
820 static RangeType *
822 {
823  RangeType *result;
824  RangeBound lower1,
825  lower2;
826  RangeBound upper1,
827  upper2;
828  bool empty1,
829  empty2;
830  char flags1,
831  flags2;
832  RangeBound *result_lower;
833  RangeBound *result_upper;
834 
835  range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
836  range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
837  flags1 = range_get_flags(r1);
838  flags2 = range_get_flags(r2);
839 
840  if (empty1)
841  {
842  /* We can return r2 as-is if it already is or contains empty */
843  if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
844  return r2;
845  /* Else we'd better copy it (modify-in-place isn't safe) */
846  r2 = rangeCopy(r2);
848  return r2;
849  }
850  if (empty2)
851  {
852  /* We can return r1 as-is if it already is or contains empty */
853  if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
854  return r1;
855  /* Else we'd better copy it (modify-in-place isn't safe) */
856  r1 = rangeCopy(r1);
858  return r1;
859  }
860 
861  if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
862  result_lower = &lower1;
863  else
864  result_lower = &lower2;
865 
866  if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
867  result_upper = &upper1;
868  else
869  result_upper = &upper2;
870 
871  /* optimization to avoid constructing a new range */
872  if (result_lower == &lower1 && result_upper == &upper1 &&
873  ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
874  return r1;
875  if (result_lower == &lower2 && result_upper == &upper2 &&
876  ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
877  return r2;
878 
879  result = make_range(typcache, result_lower, result_upper, false);
880 
881  if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
882  range_set_contain_empty(result);
883 
884  return result;
885 }
886 
887 static bool
889  const RangeType *r,
890  const MultirangeType *mr)
891 {
892  RangeBound lower1,
893  upper1,
894  lower2,
895  upper2,
896  tmp;
897  bool empty;
898 
899  if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
900  return (RangeIsEmpty(r) && MultirangeIsEmpty(mr));
901 
902  range_deserialize(typcache, r, &lower1, &upper1, &empty);
903  Assert(!empty);
904  multirange_get_bounds(typcache, mr, 0, &lower2, &tmp);
905  multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2);
906 
907  return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 &&
908  range_cmp_bounds(typcache, &upper1, &upper2) == 0);
909 }
910 
911 /*
912  * GiST consistent test on an index internal page with range query
913  */
914 static bool
916  StrategyNumber strategy,
917  const RangeType *key,
918  const RangeType *query)
919 {
920  switch (strategy)
921  {
922  case RANGESTRAT_BEFORE:
923  if (RangeIsEmpty(key) || RangeIsEmpty(query))
924  return false;
925  return (!range_overright_internal(typcache, key, query));
926  case RANGESTRAT_OVERLEFT:
927  if (RangeIsEmpty(key) || RangeIsEmpty(query))
928  return false;
929  return (!range_after_internal(typcache, key, query));
930  case RANGESTRAT_OVERLAPS:
931  return range_overlaps_internal(typcache, key, query);
933  if (RangeIsEmpty(key) || RangeIsEmpty(query))
934  return false;
935  return (!range_before_internal(typcache, key, query));
936  case RANGESTRAT_AFTER:
937  if (RangeIsEmpty(key) || RangeIsEmpty(query))
938  return false;
939  return (!range_overleft_internal(typcache, key, query));
940  case RANGESTRAT_ADJACENT:
941  if (RangeIsEmpty(key) || RangeIsEmpty(query))
942  return false;
943  if (range_adjacent_internal(typcache, key, query))
944  return true;
945  return range_overlaps_internal(typcache, key, query);
946  case RANGESTRAT_CONTAINS:
947  return range_contains_internal(typcache, key, query);
949 
950  /*
951  * Empty ranges are contained by anything, so if key is or
952  * contains any empty ranges, we must descend into it. Otherwise,
953  * descend only if key overlaps the query.
954  */
955  if (RangeIsOrContainsEmpty(key))
956  return true;
957  return range_overlaps_internal(typcache, key, query);
958  case RANGESTRAT_EQ:
959 
960  /*
961  * If query is empty, descend only if the key is or contains any
962  * empty ranges. Otherwise, descend if key contains query.
963  */
964  if (RangeIsEmpty(query))
965  return RangeIsOrContainsEmpty(key);
966  return range_contains_internal(typcache, key, query);
967  default:
968  elog(ERROR, "unrecognized range strategy: %d", strategy);
969  return false; /* keep compiler quiet */
970  }
971 }
972 
973 /*
974  * GiST consistent test on an index internal page with multirange query
975  */
976 static bool
978  StrategyNumber strategy,
979  const RangeType *key,
980  const MultirangeType *query)
981 {
982  switch (strategy)
983  {
984  case RANGESTRAT_BEFORE:
985  if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
986  return false;
987  return (!range_overright_multirange_internal(typcache, key, query));
988  case RANGESTRAT_OVERLEFT:
989  if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
990  return false;
991  return (!range_after_multirange_internal(typcache, key, query));
992  case RANGESTRAT_OVERLAPS:
993  return range_overlaps_multirange_internal(typcache, key, query);
995  if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
996  return false;
997  return (!range_before_multirange_internal(typcache, key, query));
998  case RANGESTRAT_AFTER:
999  if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1000  return false;
1001  return (!range_overleft_multirange_internal(typcache, key, query));
1002  case RANGESTRAT_ADJACENT:
1003  if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1004  return false;
1005  if (range_adjacent_multirange_internal(typcache, key, query))
1006  return true;
1007  return range_overlaps_multirange_internal(typcache, key, query);
1008  case RANGESTRAT_CONTAINS:
1009  return range_contains_multirange_internal(typcache, key, query);
1011 
1012  /*
1013  * Empty ranges are contained by anything, so if key is or
1014  * contains any empty ranges, we must descend into it. Otherwise,
1015  * descend only if key overlaps the query.
1016  */
1017  if (RangeIsOrContainsEmpty(key))
1018  return true;
1019  return range_overlaps_multirange_internal(typcache, key, query);
1020  case RANGESTRAT_EQ:
1021 
1022  /*
1023  * If query is empty, descend only if the key is or contains any
1024  * empty ranges. Otherwise, descend if key contains query.
1025  */
1026  if (MultirangeIsEmpty(query))
1027  return RangeIsOrContainsEmpty(key);
1028  return range_contains_multirange_internal(typcache, key, query);
1029  default:
1030  elog(ERROR, "unrecognized range strategy: %d", strategy);
1031  return false; /* keep compiler quiet */
1032  }
1033 }
1034 
1035 /*
1036  * GiST consistent test on an index internal page with element query
1037  */
1038 static bool
1040  StrategyNumber strategy,
1041  const RangeType *key,
1042  Datum query)
1043 {
1044  switch (strategy)
1045  {
1047  return range_contains_elem_internal(typcache, key, query);
1048  default:
1049  elog(ERROR, "unrecognized range strategy: %d", strategy);
1050  return false; /* keep compiler quiet */
1051  }
1052 }
1053 
1054 /*
1055  * GiST consistent test on an index leaf page with range query
1056  */
1057 static bool
1059  StrategyNumber strategy,
1060  const RangeType *key,
1061  const RangeType *query)
1062 {
1063  switch (strategy)
1064  {
1065  case RANGESTRAT_BEFORE:
1066  return range_before_internal(typcache, key, query);
1067  case RANGESTRAT_OVERLEFT:
1068  return range_overleft_internal(typcache, key, query);
1069  case RANGESTRAT_OVERLAPS:
1070  return range_overlaps_internal(typcache, key, query);
1071  case RANGESTRAT_OVERRIGHT:
1072  return range_overright_internal(typcache, key, query);
1073  case RANGESTRAT_AFTER:
1074  return range_after_internal(typcache, key, query);
1075  case RANGESTRAT_ADJACENT:
1076  return range_adjacent_internal(typcache, key, query);
1077  case RANGESTRAT_CONTAINS:
1078  return range_contains_internal(typcache, key, query);
1080  return range_contained_by_internal(typcache, key, query);
1081  case RANGESTRAT_EQ:
1082  return range_eq_internal(typcache, key, query);
1083  default:
1084  elog(ERROR, "unrecognized range strategy: %d", strategy);
1085  return false; /* keep compiler quiet */
1086  }
1087 }
1088 
1089 /*
1090  * GiST consistent test on an index leaf page with multirange query
1091  */
1092 static bool
1094  StrategyNumber strategy,
1095  const RangeType *key,
1096  const MultirangeType *query)
1097 {
1098  switch (strategy)
1099  {
1100  case RANGESTRAT_BEFORE:
1101  return range_before_multirange_internal(typcache, key, query);
1102  case RANGESTRAT_OVERLEFT:
1103  return range_overleft_multirange_internal(typcache, key, query);
1104  case RANGESTRAT_OVERLAPS:
1105  return range_overlaps_multirange_internal(typcache, key, query);
1106  case RANGESTRAT_OVERRIGHT:
1107  return range_overright_multirange_internal(typcache, key, query);
1108  case RANGESTRAT_AFTER:
1109  return range_after_multirange_internal(typcache, key, query);
1110  case RANGESTRAT_ADJACENT:
1111  return range_adjacent_multirange_internal(typcache, key, query);
1112  case RANGESTRAT_CONTAINS:
1113  return range_contains_multirange_internal(typcache, key, query);
1115  return multirange_contains_range_internal(typcache, query, key);
1116  case RANGESTRAT_EQ:
1117  return multirange_union_range_equal(typcache, key, query);
1118  default:
1119  elog(ERROR, "unrecognized range strategy: %d", strategy);
1120  return false; /* keep compiler quiet */
1121  }
1122 }
1123 
1124 /*
1125  * GiST consistent test on an index leaf page with element query
1126  */
1127 static bool
1129  StrategyNumber strategy,
1130  const RangeType *key,
1131  Datum query)
1132 {
1133  switch (strategy)
1134  {
1136  return range_contains_elem_internal(typcache, key, query);
1137  default:
1138  elog(ERROR, "unrecognized range strategy: %d", strategy);
1139  return false; /* keep compiler quiet */
1140  }
1141 }
1142 
1143 /*
1144  * Trivial split: half of entries will be placed on one page
1145  * and the other half on the other page.
1146  */
1147 static void
1149  GistEntryVector *entryvec,
1150  GIST_SPLITVEC *v)
1151 {
1152  RangeType *left_range = NULL;
1153  RangeType *right_range = NULL;
1154  OffsetNumber i,
1155  maxoff,
1156  split_idx;
1157 
1158  maxoff = entryvec->n - 1;
1159  /* Split entries before this to left page, after to right: */
1160  split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
1161 
1162  v->spl_nleft = 0;
1163  v->spl_nright = 0;
1164  for (i = FirstOffsetNumber; i <= maxoff; i++)
1165  {
1166  RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1167 
1168  if (i < split_idx)
1169  PLACE_LEFT(range, i);
1170  else
1171  PLACE_RIGHT(range, i);
1172  }
1173 
1174  v->spl_ldatum = RangeTypePGetDatum(left_range);
1175  v->spl_rdatum = RangeTypePGetDatum(right_range);
1176 }
1177 
1178 /*
1179  * Split based on classes of ranges.
1180  *
1181  * See get_gist_range_class for class definitions.
1182  * classes_groups is an array of length CLS_COUNT indicating the side of the
1183  * split to which each class should go.
1184  */
1185 static void
1187  GistEntryVector *entryvec,
1188  GIST_SPLITVEC *v,
1189  SplitLR *classes_groups)
1190 {
1191  RangeType *left_range = NULL;
1192  RangeType *right_range = NULL;
1193  OffsetNumber i,
1194  maxoff;
1195 
1196  maxoff = entryvec->n - 1;
1197 
1198  v->spl_nleft = 0;
1199  v->spl_nright = 0;
1200  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1201  {
1202  RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1203  int class;
1204 
1205  /* Get class of range */
1206  class = get_gist_range_class(range);
1207 
1208  /* Place range to appropriate page */
1209  if (classes_groups[class] == SPLIT_LEFT)
1210  PLACE_LEFT(range, i);
1211  else
1212  {
1213  Assert(classes_groups[class] == SPLIT_RIGHT);
1214  PLACE_RIGHT(range, i);
1215  }
1216  }
1217 
1218  v->spl_ldatum = RangeTypePGetDatum(left_range);
1219  v->spl_rdatum = RangeTypePGetDatum(right_range);
1220 }
1221 
1222 /*
1223  * Sorting based split. First half of entries according to the sort will be
1224  * placed to one page, and second half of entries will be placed to other
1225  * page. use_upper_bound parameter indicates whether to use upper or lower
1226  * bound for sorting.
1227  */
1228 static void
1230  GistEntryVector *entryvec,
1231  GIST_SPLITVEC *v,
1232  bool use_upper_bound)
1233 {
1234  SingleBoundSortItem *sortItems;
1235  RangeType *left_range = NULL;
1236  RangeType *right_range = NULL;
1237  OffsetNumber i,
1238  maxoff,
1239  split_idx;
1240 
1241  maxoff = entryvec->n - 1;
1242 
1243  sortItems = (SingleBoundSortItem *)
1244  palloc(maxoff * sizeof(SingleBoundSortItem));
1245 
1246  /*
1247  * Prepare auxiliary array and sort the values.
1248  */
1249  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1250  {
1251  RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1252  RangeBound bound2;
1253  bool empty;
1254 
1255  sortItems[i - 1].index = i;
1256  /* Put appropriate bound into array */
1257  if (use_upper_bound)
1258  range_deserialize(typcache, range, &bound2,
1259  &sortItems[i - 1].bound, &empty);
1260  else
1261  range_deserialize(typcache, range, &sortItems[i - 1].bound,
1262  &bound2, &empty);
1263  Assert(!empty);
1264  }
1265 
1266  qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
1267  single_bound_cmp, typcache);
1268 
1269  split_idx = maxoff / 2;
1270 
1271  v->spl_nleft = 0;
1272  v->spl_nright = 0;
1273 
1274  for (i = 0; i < maxoff; i++)
1275  {
1276  int idx = sortItems[i].index;
1277  RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1278 
1279  if (i < split_idx)
1280  PLACE_LEFT(range, idx);
1281  else
1282  PLACE_RIGHT(range, idx);
1283  }
1284 
1285  v->spl_ldatum = RangeTypePGetDatum(left_range);
1286  v->spl_rdatum = RangeTypePGetDatum(right_range);
1287 }
1288 
1289 /*
1290  * Double sorting split algorithm.
1291  *
1292  * The algorithm considers dividing ranges into two groups. The first (left)
1293  * group contains general left bound. The second (right) group contains
1294  * general right bound. The challenge is to find upper bound of left group
1295  * and lower bound of right group so that overlap of groups is minimal and
1296  * ratio of distribution is acceptable. Algorithm finds for each lower bound of
1297  * right group minimal upper bound of left group, and for each upper bound of
1298  * left group maximal lower bound of right group. For each found pair
1299  * range_gist_consider_split considers replacement of currently selected
1300  * split with the new one.
1301  *
1302  * After that, all the entries are divided into three groups:
1303  * 1) Entries which should be placed to the left group
1304  * 2) Entries which should be placed to the right group
1305  * 3) "Common entries" which can be placed to either group without affecting
1306  * amount of overlap.
1307  *
1308  * The common ranges are distributed by difference of distance from lower
1309  * bound of common range to lower bound of right group and distance from upper
1310  * bound of common range to upper bound of left group.
1311  *
1312  * For details see:
1313  * "A new double sorting-based node splitting algorithm for R-tree",
1314  * A. Korotkov
1315  * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
1316  */
1317 static void
1319  GistEntryVector *entryvec,
1320  GIST_SPLITVEC *v)
1321 {
1322  ConsiderSplitContext context;
1323  OffsetNumber i,
1324  maxoff;
1325  RangeType *range,
1326  *left_range = NULL,
1327  *right_range = NULL;
1328  int common_entries_count;
1329  NonEmptyRange *by_lower,
1330  *by_upper;
1331  CommonEntry *common_entries;
1332  int nentries,
1333  i1,
1334  i2;
1335  RangeBound *right_lower,
1336  *left_upper;
1337 
1338  memset(&context, 0, sizeof(ConsiderSplitContext));
1339  context.typcache = typcache;
1340  context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
1341 
1342  maxoff = entryvec->n - 1;
1343  nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
1344  context.first = true;
1345 
1346  /* Allocate arrays for sorted range bounds */
1347  by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1348  by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1349 
1350  /* Fill arrays of bounds */
1351  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1352  {
1353  RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
1354  bool empty;
1355 
1356  range_deserialize(typcache, range,
1357  &by_lower[i - FirstOffsetNumber].lower,
1358  &by_lower[i - FirstOffsetNumber].upper,
1359  &empty);
1360  Assert(!empty);
1361  }
1362 
1363  /*
1364  * Make two arrays of range bounds: one sorted by lower bound and another
1365  * sorted by upper bound.
1366  */
1367  memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
1368  qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
1369  interval_cmp_lower, typcache);
1370  qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
1371  interval_cmp_upper, typcache);
1372 
1373  /*----------
1374  * The goal is to form a left and right range, so that every entry
1375  * range is contained by either left or right interval (or both).
1376  *
1377  * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
1378  *
1379  * 0 1 2 3 4
1380  * +-+
1381  * +---+
1382  * +-+
1383  * +---+
1384  *
1385  * The left and right ranges are of the form (0,a) and (b,4).
1386  * We first consider splits where b is the lower bound of an entry.
1387  * We iterate through all entries, and for each b, calculate the
1388  * smallest possible a. Then we consider splits where a is the
1389  * upper bound of an entry, and for each a, calculate the greatest
1390  * possible b.
1391  *
1392  * In the above example, the first loop would consider splits:
1393  * b=0: (0,1)-(0,4)
1394  * b=1: (0,1)-(1,4)
1395  * b=2: (0,3)-(2,4)
1396  *
1397  * And the second loop:
1398  * a=1: (0,1)-(1,4)
1399  * a=3: (0,3)-(2,4)
1400  * a=4: (0,4)-(2,4)
1401  *----------
1402  */
1403 
1404  /*
1405  * Iterate over lower bound of right group, finding smallest possible
1406  * upper bound of left group.
1407  */
1408  i1 = 0;
1409  i2 = 0;
1410  right_lower = &by_lower[i1].lower;
1411  left_upper = &by_upper[i2].lower;
1412  while (true)
1413  {
1414  /*
1415  * Find next lower bound of right group.
1416  */
1417  while (i1 < nentries &&
1418  range_cmp_bounds(typcache, right_lower,
1419  &by_lower[i1].lower) == 0)
1420  {
1421  if (range_cmp_bounds(typcache, &by_lower[i1].upper,
1422  left_upper) > 0)
1423  left_upper = &by_lower[i1].upper;
1424  i1++;
1425  }
1426  if (i1 >= nentries)
1427  break;
1428  right_lower = &by_lower[i1].lower;
1429 
1430  /*
1431  * Find count of ranges which anyway should be placed to the left
1432  * group.
1433  */
1434  while (i2 < nentries &&
1435  range_cmp_bounds(typcache, &by_upper[i2].upper,
1436  left_upper) <= 0)
1437  i2++;
1438 
1439  /*
1440  * Consider found split to see if it's better than what we had.
1441  */
1442  range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
1443  }
1444 
1445  /*
1446  * Iterate over upper bound of left group finding greatest possible lower
1447  * bound of right group.
1448  */
1449  i1 = nentries - 1;
1450  i2 = nentries - 1;
1451  right_lower = &by_lower[i1].upper;
1452  left_upper = &by_upper[i2].upper;
1453  while (true)
1454  {
1455  /*
1456  * Find next upper bound of left group.
1457  */
1458  while (i2 >= 0 &&
1459  range_cmp_bounds(typcache, left_upper,
1460  &by_upper[i2].upper) == 0)
1461  {
1462  if (range_cmp_bounds(typcache, &by_upper[i2].lower,
1463  right_lower) < 0)
1464  right_lower = &by_upper[i2].lower;
1465  i2--;
1466  }
1467  if (i2 < 0)
1468  break;
1469  left_upper = &by_upper[i2].upper;
1470 
1471  /*
1472  * Find count of intervals which anyway should be placed to the right
1473  * group.
1474  */
1475  while (i1 >= 0 &&
1476  range_cmp_bounds(typcache, &by_lower[i1].lower,
1477  right_lower) >= 0)
1478  i1--;
1479 
1480  /*
1481  * Consider found split to see if it's better than what we had.
1482  */
1483  range_gist_consider_split(&context, right_lower, i1 + 1,
1484  left_upper, i2 + 1);
1485  }
1486 
1487  /*
1488  * If we failed to find any acceptable splits, use trivial split.
1489  */
1490  if (context.first)
1491  {
1492  range_gist_fallback_split(typcache, entryvec, v);
1493  return;
1494  }
1495 
1496  /*
1497  * Ok, we have now selected bounds of the groups. Now we have to
1498  * distribute entries themselves. At first we distribute entries which can
1499  * be placed unambiguously and collect "common entries" to array.
1500  */
1501 
1502  /* Allocate vectors for results */
1503  v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1504  v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1505  v->spl_nleft = 0;
1506  v->spl_nright = 0;
1507 
1508  /*
1509  * Allocate an array for "common entries" - entries which can be placed to
1510  * either group without affecting overlap along selected axis.
1511  */
1512  common_entries_count = 0;
1513  common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1514 
1515  /*
1516  * Distribute entries which can be distributed unambiguously, and collect
1517  * common entries.
1518  */
1519  for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1520  {
1521  RangeBound lower,
1522  upper;
1523  bool empty;
1524 
1525  /*
1526  * Get upper and lower bounds along selected axis.
1527  */
1528  range = DatumGetRangeTypeP(entryvec->vector[i].key);
1529 
1530  range_deserialize(typcache, range, &lower, &upper, &empty);
1531 
1532  if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
1533  {
1534  /* Fits in the left group */
1535  if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
1536  {
1537  /* Fits also in the right group, so "common entry" */
1538  common_entries[common_entries_count].index = i;
1539  if (context.has_subtype_diff)
1540  {
1541  /*
1542  * delta = (lower - context.right_lower) -
1543  * (context.left_upper - upper)
1544  */
1545  common_entries[common_entries_count].delta =
1546  call_subtype_diff(typcache,
1547  lower.val,
1548  context.right_lower->val) -
1549  call_subtype_diff(typcache,
1550  context.left_upper->val,
1551  upper.val);
1552  }
1553  else
1554  {
1555  /* Without subtype_diff, take all deltas as zero */
1556  common_entries[common_entries_count].delta = 0;
1557  }
1558  common_entries_count++;
1559  }
1560  else
1561  {
1562  /* Doesn't fit to the right group, so join to the left group */
1563  PLACE_LEFT(range, i);
1564  }
1565  }
1566  else
1567  {
1568  /*
1569  * Each entry should fit on either left or right group. Since this
1570  * entry didn't fit in the left group, it better fit in the right
1571  * group.
1572  */
1573  Assert(range_cmp_bounds(typcache, &lower,
1574  context.right_lower) >= 0);
1575  PLACE_RIGHT(range, i);
1576  }
1577  }
1578 
1579  /*
1580  * Distribute "common entries", if any.
1581  */
1582  if (common_entries_count > 0)
1583  {
1584  /*
1585  * Sort "common entries" by calculated deltas in order to distribute
1586  * the most ambiguous entries first.
1587  */
1588  qsort(common_entries, common_entries_count, sizeof(CommonEntry),
1590 
1591  /*
1592  * Distribute "common entries" between groups according to sorting.
1593  */
1594  for (i = 0; i < common_entries_count; i++)
1595  {
1596  int idx = common_entries[i].index;
1597 
1598  range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1599 
1600  /*
1601  * Check if we have to place this entry in either group to achieve
1602  * LIMIT_RATIO.
1603  */
1604  if (i < context.common_left)
1605  PLACE_LEFT(range, idx);
1606  else
1607  PLACE_RIGHT(range, idx);
1608  }
1609  }
1610 
1611  v->spl_ldatum = PointerGetDatum(left_range);
1612  v->spl_rdatum = PointerGetDatum(right_range);
1613 }
1614 
1615 /*
1616  * Consider replacement of currently selected split with a better one
1617  * during range_gist_double_sorting_split.
1618  */
1619 static void
1621  RangeBound *right_lower, int min_left_count,
1622  RangeBound *left_upper, int max_left_count)
1623 {
1624  int left_count,
1625  right_count;
1626  float4 ratio,
1627  overlap;
1628 
1629  /*
1630  * Calculate entries distribution ratio assuming most uniform distribution
1631  * of common entries.
1632  */
1633  if (min_left_count >= (context->entries_count + 1) / 2)
1634  left_count = min_left_count;
1635  else if (max_left_count <= context->entries_count / 2)
1636  left_count = max_left_count;
1637  else
1638  left_count = context->entries_count / 2;
1639  right_count = context->entries_count - left_count;
1640 
1641  /*
1642  * Ratio of split: quotient between size of smaller group and total
1643  * entries count. This is necessarily 0.5 or less; if it's less than
1644  * LIMIT_RATIO then we will never accept the new split.
1645  */
1646  ratio = ((float4) Min(left_count, right_count)) /
1647  ((float4) context->entries_count);
1648 
1649  if (ratio > LIMIT_RATIO)
1650  {
1651  bool selectthis = false;
1652 
1653  /*
1654  * The ratio is acceptable, so compare current split with previously
1655  * selected one. We search for minimal overlap (allowing negative
1656  * values) and minimal ratio secondarily. If subtype_diff is
1657  * available, it's used for overlap measure. Without subtype_diff we
1658  * use number of "common entries" as an overlap measure.
1659  */
1660  if (context->has_subtype_diff)
1661  overlap = call_subtype_diff(context->typcache,
1662  left_upper->val,
1663  right_lower->val);
1664  else
1665  overlap = max_left_count - min_left_count;
1666 
1667  /* If there is no previous selection, select this split */
1668  if (context->first)
1669  selectthis = true;
1670  else
1671  {
1672  /*
1673  * Choose the new split if it has a smaller overlap, or same
1674  * overlap but better ratio.
1675  */
1676  if (overlap < context->overlap ||
1677  (overlap == context->overlap && ratio > context->ratio))
1678  selectthis = true;
1679  }
1680 
1681  if (selectthis)
1682  {
1683  /* save information about selected split */
1684  context->first = false;
1685  context->ratio = ratio;
1686  context->overlap = overlap;
1687  context->right_lower = right_lower;
1688  context->left_upper = left_upper;
1689  context->common_left = max_left_count - left_count;
1690  context->common_right = left_count - min_left_count;
1691  }
1692  }
1693 }
1694 
1695 /*
1696  * Find class number for range.
1697  *
1698  * The class number is a valid combination of the properties of the
1699  * range. Note: the highest possible number is 8, because CLS_EMPTY
1700  * can't be combined with anything else.
1701  */
1702 static int
1704 {
1705  int classNumber;
1706  char flags;
1707 
1708  flags = range_get_flags(range);
1709  if (flags & RANGE_EMPTY)
1710  {
1711  classNumber = CLS_EMPTY;
1712  }
1713  else
1714  {
1715  classNumber = 0;
1716  if (flags & RANGE_LB_INF)
1717  classNumber |= CLS_LOWER_INF;
1718  if (flags & RANGE_UB_INF)
1719  classNumber |= CLS_UPPER_INF;
1720  if (flags & RANGE_CONTAIN_EMPTY)
1721  classNumber |= CLS_CONTAIN_EMPTY;
1722  }
1723  return classNumber;
1724 }
1725 
1726 /*
1727  * Comparison function for range_gist_single_sorting_split.
1728  */
1729 static int
1730 single_bound_cmp(const void *a, const void *b, void *arg)
1731 {
1734  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1735 
1736  return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
1737 }
1738 
1739 /*
1740  * Compare NonEmptyRanges by lower bound.
1741  */
1742 static int
1743 interval_cmp_lower(const void *a, const void *b, void *arg)
1744 {
1745  NonEmptyRange *i1 = (NonEmptyRange *) a;
1746  NonEmptyRange *i2 = (NonEmptyRange *) b;
1747  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1748 
1749  return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
1750 }
1751 
1752 /*
1753  * Compare NonEmptyRanges by upper bound.
1754  */
1755 static int
1756 interval_cmp_upper(const void *a, const void *b, void *arg)
1757 {
1758  NonEmptyRange *i1 = (NonEmptyRange *) a;
1759  NonEmptyRange *i2 = (NonEmptyRange *) b;
1760  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1761 
1762  return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
1763 }
1764 
1765 /*
1766  * Compare CommonEntrys by their deltas.
1767  */
1768 static int
1769 common_entry_cmp(const void *i1, const void *i2)
1770 {
1771  double delta1 = ((CommonEntry *) i1)->delta;
1772  double delta2 = ((CommonEntry *) i2)->delta;
1773 
1774  if (delta1 < delta2)
1775  return -1;
1776  else if (delta1 > delta2)
1777  return 1;
1778  else
1779  return 0;
1780 }
1781 
1782 /*
1783  * Convenience function to invoke type-specific subtype_diff function.
1784  * Caller must have already checked that there is one for the range type.
1785  */
1786 static float8
1788 {
1789  float8 value;
1790 
1792  typcache->rng_collation,
1793  val1, val2));
1794  /* Cope with buggy subtype_diff function by returning zero */
1795  if (value >= 0.0)
1796  return value;
1797  return 0.0;
1798 }
#define GIST_LEAF(entry)
Definition: gist.h:170
RangeType * make_range(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty)
Definition: rangetypes.c:1884
Relation rel
Definition: gist.h:161
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
RangeBound upper
#define RangeIsEmpty(r)
Definition: rangetypes.h:56
#define CLS_EMPTY
static void range_gist_consider_split(ConsiderSplitContext *context, RangeBound *right_lower, int min_left_count, RangeBound *left_upper, int max_left_count)
bool range_overlaps_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:823
#define RANGE_EMPTY
Definition: rangetypes.h:38
#define RANGESTRAT_OVERRIGHT
Definition: rangetypes.h:85
static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query)
#define CLS_COUNT
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1635
static struct @142 value
#define RANGESTRAT_ADJACENT
Definition: rangetypes.h:87
#define RangeTypeGetOid(r)
Definition: rangetypes.h:35
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:46
#define CONTAIN_EMPTY_PENALTY
static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query)
#define PointerGetDatum(X)
Definition: postgres.h:600
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define INFINITE_BOUND_PENALTY
RangeBound * right_lower
float8 delta
Definition: gistproc.c:277
RangeBound * left_upper
bool range_after_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:684
static void range_gist_class_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, SplitLR *classes_groups)
#define Min(x, y)
Definition: c.h:986
bool range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum val)
Definition: rangetypes.c:2473
OffsetNumber * spl_left
Definition: gist.h:142
Datum spl_rdatum
Definition: gist.h:149
Datum val
Definition: rangetypes.h:64
static float8 call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
int32 n
Definition: gist.h:235
uint16 StrategyNumber
Definition: stratnum.h:22
char range_get_flags(const RangeType *range)
Definition: rangetypes.c:1855
static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query)
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
RangeType * multirange_get_union_range(TypeCacheEntry *rangetyp, const MultirangeType *mr)
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:77
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1148
int spl_nleft
Definition: gist.h:143
unsigned int Oid
Definition: postgres_ext.h:31
#define RANGE_LB_INF
Definition: rangetypes.h:41
#define OidIsValid(objectId)
Definition: c.h:710
#define RangeTypePGetDatum(X)
Definition: rangetypes.h:75
void range_set_contain_empty(RangeType *range)
Definition: rangetypes.c:1869
#define CLS_CONTAIN_EMPTY
uint16 OffsetNumber
Definition: off.h:24
Page page
Definition: gist.h:162
#define Abs(x)
Definition: c.h:992
Definition: type.h:89
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
Definition: gist.h:236
static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query)
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:102
Datum multirange_gist_compress(PG_FUNCTION_ARGS)
#define PLACE_LEFT(range, off)
#define ERROR
Definition: elog.h:46
double float8
Definition: c.h:565
Datum range_gist_same(PG_FUNCTION_ARGS)
#define LIMIT_RATIO
static int get_gist_range_class(RangeType *range)
bool range_overleft_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
#define RANGESTRAT_CONTAINS_ELEM
Definition: rangetypes.h:90
int spl_nright
Definition: gist.h:148
RangeBound lower
bool range_overright_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
#define RANGESTRAT_BEFORE
Definition: rangetypes.h:82
static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query)
#define DatumGetMultirangeTypeP(X)
bool range_before_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
Datum key
Definition: gist.h:160
TypeCacheEntry * multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define FirstOffsetNumber
Definition: off.h:27
bool range_contains_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
Datum range_gist_picksplit(PG_FUNCTION_ARGS)
void multirange_get_bounds(TypeCacheEntry *rangetyp, const MultirangeType *multirange, uint32 i, RangeBound *lower, RangeBound *upper)
static bool multirange_union_range_equal(TypeCacheEntry *typcache, const RangeType *r, const MultirangeType *mr)
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
SplitLR
#define PG_RETURN_RANGE_P(x)
Definition: rangetypes.h:78
bool leafkey
Definition: gist.h:164
bool range_contains_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:2432
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static RangeType * range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
float float4
Definition: c.h:564
Datum range_gist_consistent(PG_FUNCTION_ARGS)
#define PLACE_RIGHT(range, off)
#define RANGESTRAT_CONTAINS
Definition: rangetypes.h:88
Datum range_gist_penalty(PG_FUNCTION_ARGS)
#define DatumGetFloat8(X)
Definition: postgres.h:758
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
uintptr_t Datum
Definition: postgres.h:411
static void range_gist_fallback_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v)
#define CLS_LOWER_INF
bool range_overleft_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:869
Oid fn_oid
Definition: fmgr.h:59
#define RANGE_CONTAIN_EMPTY
Definition: rangetypes.h:45
bool multirange_contains_range_internal(TypeCacheEntry *rangetyp, const MultirangeType *mr, const RangeType *r)
Datum spl_ldatum
Definition: gist.h:144
#define Assert(condition)
Definition: c.h:804
static int single_bound_cmp(const void *a, const void *b, void *arg)
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
bool range_before_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:646
#define CLS_UPPER_INF
bool range_contained_by_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:2464
#define RANGESTRAT_CONTAINED_BY
Definition: rangetypes.h:89
#define RANGESTRAT_OVERLEFT
Definition: rangetypes.h:83
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
#define RANGESTRAT_EQ
Definition: rangetypes.h:91
#define PG_GETARG_RANGE_P(n)
Definition: rangetypes.h:76
OffsetNumber * spl_right
Definition: gist.h:147
#define DatumGetRangeTypeP(X)
Definition: rangetypes.h:73
bool infinite
Definition: rangetypes.h:65
Oid rng_collation
Definition: typcache.h:99
#define MultirangeIsEmpty(mr)
struct TypeCacheEntry * rngtype
Definition: typcache.h:107
static void range_gist_double_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v)
bool range_adjacent_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
#define DEFAULT_SUBTYPE_DIFF_PENALTY
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define RANGE_UB_INF
Definition: rangetypes.h:42
static Ranges * range_deserialize(int maxvalues, SerializedRanges *range)
#define RangeIsOrContainsEmpty(r)
Definition: rangetypes.h:57
#define rangeCopy(r)
Datum multirange_gist_consistent(PG_FUNCTION_ARGS)
bool range_overright_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:910
void * palloc(Size size)
Definition: mcxt.c:1062
#define RANGESTRAT_OVERLAPS
Definition: rangetypes.h:84
#define elog(elevel,...)
Definition: elog.h:232
int i
static void range_gist_single_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, bool use_upper_bound)
#define CLS_NORMAL
bool range_adjacent_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:780
void * arg
#define MultirangeTypeGetOid(mr)
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
bool range_overlaps_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
#define RANGESTRAT_AFTER
Definition: rangetypes.h:86
static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query)
#define qsort(a, b, c, d)
Definition: port.h:504
Datum range_gist_union(PG_FUNCTION_ARGS)
static int interval_cmp_lower(const void *a, const void *b, void *arg)
OffsetNumber offset
Definition: gist.h:163
TypeCacheEntry * typcache
bool range_eq_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:555
bool range_after_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:1924
static int interval_cmp_upper(const void *a, const void *b, void *arg)
static float4 get_float4_infinity(void)
Definition: float.h:73
static int common_entry_cmp(const void *i1, const void *i2)