PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
rangetypes_gist.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * rangetypes_gist.c
4 * GiST support for range types.
5 *
6 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/utils/adt/rangetypes_gist.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/gist.h"
18#include "access/stratnum.h"
19#include "utils/datum.h"
20#include "utils/float.h"
21#include "utils/fmgrprotos.h"
23#include "utils/rangetypes.h"
24
25/*
26 * Range class properties used to segregate different classes of ranges in
27 * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot
28 * be combined with anything else.
29 */
30#define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */
31#define CLS_LOWER_INF 1 /* Lower bound is infinity */
32#define CLS_UPPER_INF 2 /* Upper bound is infinity */
33#define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */
34#define CLS_EMPTY 8 /* Special class for empty ranges */
35
36#define CLS_COUNT 9 /* # of classes; includes all combinations of
37 * properties. CLS_EMPTY doesn't combine with
38 * anything else, so it's only 2^3 + 1. */
39
40/*
41 * Minimum accepted ratio of split for items of the same class. If the items
42 * are of different classes, we will separate along those lines regardless of
43 * the ratio.
44 */
45#define LIMIT_RATIO 0.3
47/* Constants for fixed penalty values */
48#define INFINITE_BOUND_PENALTY 2.0
49#define CONTAIN_EMPTY_PENALTY 1.0
50#define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0
51
52/*
53 * Per-item data for range_gist_single_sorting_split.
54 */
55typedef struct
57 int index;
58 RangeBound bound;
61/* place on left or right side of split? */
62typedef enum
64 SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */
66} SplitLR;
67
68/*
69 * Context for range_gist_consider_split.
70 */
71typedef struct
73 TypeCacheEntry *typcache; /* typcache for range type */
74 bool has_subtype_diff; /* does it have subtype_diff? */
75 int entries_count; /* total number of entries being split */
76
77 /* Information about currently selected split follows */
78
79 bool first; /* true if no split was selected yet */
81 RangeBound *left_upper; /* upper bound of left interval */
82 RangeBound *right_lower; /* lower bound of right interval */
83
84 float4 ratio; /* split ratio */
85 float4 overlap; /* overlap between left and right predicate */
86 int common_left; /* # common entries destined for each side */
87 int common_right;
89
90/*
91 * Bounds extracted from a non-empty range, for use in
92 * range_gist_double_sorting_split.
93 */
94typedef struct
99
100/*
101 * Represents information about an entry that can be placed in either group
102 * without affecting overlap over selected axis ("common entry").
103 */
104typedef struct
105{
106 /* Index of entry in the initial array */
107 int index;
108 /* Delta between closeness of range to each of the two groups */
109 double delta;
111
112/* Helper macros to place an entry in the left or right group during split */
113/* Note direct access to variables v, typcache, left_range, right_range */
114#define PLACE_LEFT(range, off) \
115 do { \
116 if (v->spl_nleft > 0) \
117 left_range = range_super_union(typcache, left_range, range); \
118 else \
119 left_range = (range); \
120 v->spl_left[v->spl_nleft++] = (off); \
121 } while(0)
122
123#define PLACE_RIGHT(range, off) \
124 do { \
125 if (v->spl_nright > 0) \
126 right_range = range_super_union(typcache, right_range, range); \
127 else \
128 right_range = (range); \
129 v->spl_right[v->spl_nright++] = (off); \
130 } while(0)
132/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
133#define rangeCopy(r) \
134 ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
135 false, -1)))
136
138 RangeType *r2);
140 StrategyNumber strategy,
141 const RangeType *key,
142 const RangeType *query);
144 StrategyNumber strategy,
145 const RangeType *key,
146 const MultirangeType *query);
148 StrategyNumber strategy,
149 const RangeType *key,
150 Datum query);
152 StrategyNumber strategy,
153 const RangeType *key,
154 const RangeType *query);
156 StrategyNumber strategy,
157 const RangeType *key,
158 const MultirangeType *query);
160 StrategyNumber strategy,
161 const RangeType *key,
162 Datum query);
163static void range_gist_fallback_split(TypeCacheEntry *typcache,
164 GistEntryVector *entryvec,
165 GIST_SPLITVEC *v);
166static void range_gist_class_split(TypeCacheEntry *typcache,
167 GistEntryVector *entryvec,
168 GIST_SPLITVEC *v,
169 SplitLR *classes_groups);
171 GistEntryVector *entryvec,
172 GIST_SPLITVEC *v,
173 bool use_upper_bound);
175 GistEntryVector *entryvec,
176 GIST_SPLITVEC *v);
178 RangeBound *right_lower, int min_left_count,
179 RangeBound *left_upper, int max_left_count);
181static int single_bound_cmp(const void *a, const void *b, void *arg);
182static int interval_cmp_lower(const void *a, const void *b, void *arg);
183static int interval_cmp_upper(const void *a, const void *b, void *arg);
184static int common_entry_cmp(const void *i1, const void *i2);
186 Datum val1, Datum val2);
187
188
189/* GiST query consistency check */
190Datum
192{
193 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
194 Datum query = PG_GETARG_DATUM(1);
196 bool result;
197 Oid subtype = PG_GETARG_OID(3);
198 bool *recheck = (bool *) PG_GETARG_POINTER(4);
200 TypeCacheEntry *typcache;
201
202 /* All operators served by this function are exact */
203 *recheck = false;
204
205 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
206
207 /*
208 * Perform consistent checking using function corresponding to key type
209 * (leaf or internal) and query subtype (range, multirange, or element).
210 * Note that invalid subtype means that query type matches key type
211 * (range).
212 */
213 if (GIST_LEAF(entry))
214 {
215 if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
216 result = range_gist_consistent_leaf_range(typcache, strategy, key,
217 DatumGetRangeTypeP(query));
218 else if (subtype == ANYMULTIRANGEOID)
219 result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
221 else
222 result = range_gist_consistent_leaf_element(typcache, strategy,
223 key, query);
224 }
225 else
226 {
227 if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
228 result = range_gist_consistent_int_range(typcache, strategy, key,
229 DatumGetRangeTypeP(query));
230 else if (subtype == ANYMULTIRANGEOID)
231 result = range_gist_consistent_int_multirange(typcache, strategy, key,
233 else
234 result = range_gist_consistent_int_element(typcache, strategy,
235 key, query);
236 }
237 PG_RETURN_BOOL(result);
238}
239
240/*
241 * GiST compress method for multiranges: multirange is approximated as union
242 * range with no gaps.
243 */
244Datum
246{
247 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
248
249 if (entry->leafkey)
250 {
252 RangeType *r;
253 TypeCacheEntry *typcache;
254 GISTENTRY *retval = palloc(sizeof(GISTENTRY));
255
256 typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
257 r = multirange_get_union_range(typcache->rngtype, mr);
258
260 entry->rel, entry->page, entry->offset, false);
261
262 PG_RETURN_POINTER(retval);
263 }
264
265 PG_RETURN_POINTER(entry);
266}
267
268/* GiST query consistency check for multiranges */
269Datum
271{
272 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
273 Datum query = PG_GETARG_DATUM(1);
275 bool result;
276 Oid subtype = PG_GETARG_OID(3);
277 bool *recheck = (bool *) PG_GETARG_POINTER(4);
279 TypeCacheEntry *typcache;
280
281 /*
282 * All operators served by this function are inexact because multirange is
283 * approximated by union range with no gaps.
284 */
285 *recheck = true;
286
287 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
288
289 /*
290 * Perform consistent checking using function corresponding to key type
291 * (leaf or internal) and query subtype (range, multirange, or element).
292 * Note that invalid subtype means that query type matches key type
293 * (multirange).
294 */
295 if (GIST_LEAF(entry))
296 {
297 if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
298 result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
300 else if (subtype == ANYRANGEOID)
301 result = range_gist_consistent_leaf_range(typcache, strategy, key,
302 DatumGetRangeTypeP(query));
303 else
304 result = range_gist_consistent_leaf_element(typcache, strategy,
305 key, query);
306 }
307 else
308 {
309 if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
310 result = range_gist_consistent_int_multirange(typcache, strategy, key,
312 else if (subtype == ANYRANGEOID)
313 result = range_gist_consistent_int_range(typcache, strategy, key,
314 DatumGetRangeTypeP(query));
315 else
316 result = range_gist_consistent_int_element(typcache, strategy,
317 key, query);
318 }
319 PG_RETURN_BOOL(result);
320}
321
322/* form union range */
323Datum
325{
327 GISTENTRY *ent = entryvec->vector;
328 RangeType *result_range;
329 TypeCacheEntry *typcache;
330 int i;
331
332 result_range = DatumGetRangeTypeP(ent[0].key);
333
334 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
335
336 for (i = 1; i < entryvec->n; i++)
337 {
338 result_range = range_super_union(typcache, result_range,
339 DatumGetRangeTypeP(ent[i].key));
340 }
341
342 PG_RETURN_RANGE_P(result_range);
343}
344
345/*
346 * We store ranges as ranges in GiST indexes, so we do not need
347 * compress, decompress, or fetch functions. Note this implies a limit
348 * on the size of range values that can be indexed.
349 */
350
351/*
352 * GiST page split penalty function.
353 *
354 * The penalty function has the following goals (in order from most to least
355 * important):
356 * - Keep normal ranges separate
357 * - Avoid broadening the class of the original predicate
358 * - Avoid broadening (as determined by subtype_diff) the original predicate
359 * - Favor adding ranges to narrower original predicates
360 */
361Datum
363{
364 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
365 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
366 float *penalty = (float *) PG_GETARG_POINTER(2);
367 RangeType *orig = DatumGetRangeTypeP(origentry->key);
368 RangeType *new = DatumGetRangeTypeP(newentry->key);
369 TypeCacheEntry *typcache;
370 bool has_subtype_diff;
371 RangeBound orig_lower,
372 new_lower,
373 orig_upper,
374 new_upper;
375 bool orig_empty,
376 new_empty;
377
378 if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
379 elog(ERROR, "range types do not match");
380
381 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
382
383 has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
384
385 range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
386 range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
387
388 /*
389 * Distinct branches for handling distinct classes of ranges. Note that
390 * penalty values only need to be commensurate within the same class of
391 * new range.
392 */
393 if (new_empty)
394 {
395 /* Handle insertion of empty range */
396 if (orig_empty)
397 {
398 /*
399 * The best case is to insert it to empty original range.
400 * Insertion here means no broadening of original range. Also
401 * original range is the most narrow.
402 */
403 *penalty = 0.0;
404 }
405 else if (RangeIsOrContainsEmpty(orig))
406 {
407 /*
408 * The second case is to insert empty range into range which
409 * contains at least one underlying empty range. There is still
410 * no broadening of original range, but original range is not as
411 * narrow as possible.
412 */
413 *penalty = CONTAIN_EMPTY_PENALTY;
414 }
415 else if (orig_lower.infinite && orig_upper.infinite)
416 {
417 /*
418 * Original range requires broadening. (-inf; +inf) is most far
419 * from normal range in this case.
420 */
421 *penalty = 2 * CONTAIN_EMPTY_PENALTY;
422 }
423 else if (orig_lower.infinite || orig_upper.infinite)
424 {
425 /*
426 * (-inf, x) or (x, +inf) original ranges are closer to normal
427 * ranges, so it's worse to mix it with empty ranges.
428 */
429 *penalty = 3 * CONTAIN_EMPTY_PENALTY;
430 }
431 else
432 {
433 /*
434 * The least preferred case is broadening of normal range.
435 */
436 *penalty = 4 * CONTAIN_EMPTY_PENALTY;
437 }
438 }
439 else if (new_lower.infinite && new_upper.infinite)
440 {
441 /* Handle insertion of (-inf, +inf) range */
442 if (orig_lower.infinite && orig_upper.infinite)
443 {
444 /*
445 * Best case is inserting to (-inf, +inf) original range.
446 */
447 *penalty = 0.0;
448 }
449 else if (orig_lower.infinite || orig_upper.infinite)
450 {
451 /*
452 * When original range is (-inf, x) or (x, +inf) it requires
453 * broadening of original range (extension of one bound to
454 * infinity).
455 */
456 *penalty = INFINITE_BOUND_PENALTY;
457 }
458 else
459 {
460 /*
461 * Insertion to normal original range is least preferred.
462 */
463 *penalty = 2 * INFINITE_BOUND_PENALTY;
464 }
465
466 if (RangeIsOrContainsEmpty(orig))
467 {
468 /*
469 * Original range is narrower when it doesn't contain empty
470 * ranges. Add additional penalty otherwise.
471 */
472 *penalty += CONTAIN_EMPTY_PENALTY;
473 }
474 }
475 else if (new_lower.infinite)
476 {
477 /* Handle insertion of (-inf, x) range */
478 if (!orig_empty && orig_lower.infinite)
479 {
480 if (orig_upper.infinite)
481 {
482 /*
483 * (-inf, +inf) range won't be extended by insertion of (-inf,
484 * x) range. It's a less desirable case than insertion to
485 * (-inf, y) original range without extension, because in that
486 * case original range is narrower. But we can't express that
487 * in single float value.
488 */
489 *penalty = 0.0;
490 }
491 else
492 {
493 if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
494 {
495 /*
496 * Get extension of original range using subtype_diff. Use
497 * constant if subtype_diff unavailable.
498 */
499 if (has_subtype_diff)
500 *penalty = call_subtype_diff(typcache,
501 new_upper.val,
502 orig_upper.val);
503 else
505 }
506 else
507 {
508 /* No extension of original range */
509 *penalty = 0.0;
510 }
511 }
512 }
513 else
514 {
515 /*
516 * If lower bound of original range is not -inf, then extension of
517 * it is infinity.
518 */
519 *penalty = get_float4_infinity();
520 }
521 }
522 else if (new_upper.infinite)
523 {
524 /* Handle insertion of (x, +inf) range */
525 if (!orig_empty && orig_upper.infinite)
526 {
527 if (orig_lower.infinite)
528 {
529 /*
530 * (-inf, +inf) range won't be extended by insertion of (x,
531 * +inf) range. It's a less desirable case than insertion to
532 * (y, +inf) original range without extension, because in that
533 * case original range is narrower. But we can't express that
534 * in single float value.
535 */
536 *penalty = 0.0;
537 }
538 else
539 {
540 if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
541 {
542 /*
543 * Get extension of original range using subtype_diff. Use
544 * constant if subtype_diff unavailable.
545 */
546 if (has_subtype_diff)
547 *penalty = call_subtype_diff(typcache,
548 orig_lower.val,
549 new_lower.val);
550 else
552 }
553 else
554 {
555 /* No extension of original range */
556 *penalty = 0.0;
557 }
558 }
559 }
560 else
561 {
562 /*
563 * If upper bound of original range is not +inf, then extension of
564 * it is infinity.
565 */
566 *penalty = get_float4_infinity();
567 }
568 }
569 else
570 {
571 /* Handle insertion of normal (non-empty, non-infinite) range */
572 if (orig_empty || orig_lower.infinite || orig_upper.infinite)
573 {
574 /*
575 * Avoid mixing normal ranges with infinite and empty ranges.
576 */
577 *penalty = get_float4_infinity();
578 }
579 else
580 {
581 /*
582 * Calculate extension of original range by calling subtype_diff.
583 * Use constant if subtype_diff unavailable.
584 */
585 float8 diff = 0.0;
586
587 if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
588 {
589 if (has_subtype_diff)
590 diff += call_subtype_diff(typcache,
591 orig_lower.val,
592 new_lower.val);
593 else
595 }
596 if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
597 {
598 if (has_subtype_diff)
599 diff += call_subtype_diff(typcache,
600 new_upper.val,
601 orig_upper.val);
602 else
604 }
605 *penalty = diff;
606 }
607 }
608
609 PG_RETURN_POINTER(penalty);
610}
611
612/*
613 * The GiST PickSplit method for ranges
614 *
615 * Primarily, we try to segregate ranges of different classes. If splitting
616 * ranges of the same class, use the appropriate split method for that class.
617 */
618Datum
620{
623 TypeCacheEntry *typcache;
625 RangeType *pred_left;
626 int nbytes;
627 OffsetNumber maxoff;
628 int count_in_classes[CLS_COUNT];
629 int j;
630 int non_empty_classes_count = 0;
631 int biggest_class = -1;
632 int biggest_class_count = 0;
633 int total_count;
634
635 /* use first item to look up range type's info */
636 pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
637 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
638
639 maxoff = entryvec->n - 1;
640 nbytes = (maxoff + 1) * sizeof(OffsetNumber);
641 v->spl_left = (OffsetNumber *) palloc(nbytes);
642 v->spl_right = (OffsetNumber *) palloc(nbytes);
643
644 /*
645 * Get count distribution of range classes.
646 */
647 memset(count_in_classes, 0, sizeof(count_in_classes));
648 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
649 {
651
652 count_in_classes[get_gist_range_class(range)]++;
653 }
654
655 /*
656 * Count non-empty classes and find biggest class.
657 */
658 total_count = maxoff;
659 for (j = 0; j < CLS_COUNT; j++)
660 {
661 if (count_in_classes[j] > 0)
662 {
663 if (count_in_classes[j] > biggest_class_count)
664 {
665 biggest_class_count = count_in_classes[j];
666 biggest_class = j;
667 }
668 non_empty_classes_count++;
669 }
670 }
671
672 Assert(non_empty_classes_count > 0);
673
674 if (non_empty_classes_count == 1)
675 {
676 /* One non-empty class, so split inside class */
677 if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
678 {
679 /* double sorting split for normal ranges */
680 range_gist_double_sorting_split(typcache, entryvec, v);
681 }
682 else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
683 {
684 /* upper bound sorting split for (-inf, x) ranges */
685 range_gist_single_sorting_split(typcache, entryvec, v, true);
686 }
687 else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
688 {
689 /* lower bound sorting split for (x, +inf) ranges */
690 range_gist_single_sorting_split(typcache, entryvec, v, false);
691 }
692 else
693 {
694 /* trivial split for all (-inf, +inf) or all empty ranges */
695 range_gist_fallback_split(typcache, entryvec, v);
696 }
697 }
698 else
699 {
700 /*
701 * Class based split.
702 *
703 * To which side of the split should each class go? Initialize them
704 * all to go to the left side.
705 */
706 SplitLR classes_groups[CLS_COUNT];
707
708 memset(classes_groups, 0, sizeof(classes_groups));
709
710 if (count_in_classes[CLS_NORMAL] > 0)
711 {
712 /* separate normal ranges if any */
713 classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
714 }
715 else
716 {
717 /*----------
718 * Try to split classes in one of two ways:
719 * 1) containing infinities - not containing infinities
720 * 2) containing empty - not containing empty
721 *
722 * Select the way which balances the ranges between left and right
723 * the best. If split in these ways is not possible, there are at
724 * most 3 classes, so just separate biggest class.
725 *----------
726 */
727 int infCount,
728 nonInfCount;
729 int emptyCount,
730 nonEmptyCount;
731
732 nonInfCount =
733 count_in_classes[CLS_NORMAL] +
734 count_in_classes[CLS_CONTAIN_EMPTY] +
735 count_in_classes[CLS_EMPTY];
736 infCount = total_count - nonInfCount;
737
738 nonEmptyCount =
739 count_in_classes[CLS_NORMAL] +
740 count_in_classes[CLS_LOWER_INF] +
741 count_in_classes[CLS_UPPER_INF] +
742 count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
743 emptyCount = total_count - nonEmptyCount;
744
745 if (infCount > 0 && nonInfCount > 0 &&
746 (abs(infCount - nonInfCount) <=
747 abs(emptyCount - nonEmptyCount)))
748 {
749 classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
750 classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
751 classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
752 }
753 else if (emptyCount > 0 && nonEmptyCount > 0)
754 {
755 classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
756 classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
757 classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
758 classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
759 }
760 else
761 {
762 /*
763 * Either total_count == emptyCount or total_count ==
764 * infCount.
765 */
766 classes_groups[biggest_class] = SPLIT_RIGHT;
767 }
768 }
769
770 range_gist_class_split(typcache, entryvec, v, classes_groups);
771 }
772
774}
775
776/* equality comparator for GiST */
777Datum
779{
782 bool *result = (bool *) PG_GETARG_POINTER(2);
783
784 /*
785 * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
786 * that for ourselves. More generally, if the entries have been properly
787 * normalized, then unequal flags bytes must mean unequal ranges ... so
788 * let's just test all the flag bits at once.
789 */
790 if (range_get_flags(r1) != range_get_flags(r2))
791 *result = false;
792 else
793 {
794 TypeCacheEntry *typcache;
795
796 typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
797
798 *result = range_eq_internal(typcache, r1, r2);
799 }
800
801 PG_RETURN_POINTER(result);
802}
803
804/*
805 *----------------------------------------------------------
806 * STATIC FUNCTIONS
807 *----------------------------------------------------------
808 */
809
810/*
811 * Return the smallest range that contains r1 and r2
812 *
813 * This differs from regular range_union in two critical ways:
814 * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
815 * the intervening values into the result range.
816 * 2. We track whether any empty range has been union'd into the result,
817 * so that contained_by searches can be indexed. Note that this means
818 * that *all* unions formed within the GiST index must go through here.
819 */
820static RangeType *
822{
823 RangeType *result;
824 RangeBound lower1,
825 lower2;
826 RangeBound upper1,
827 upper2;
828 bool empty1,
829 empty2;
830 char flags1,
831 flags2;
832 RangeBound *result_lower;
833 RangeBound *result_upper;
834
835 range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
836 range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
837 flags1 = range_get_flags(r1);
838 flags2 = range_get_flags(r2);
839
840 if (empty1)
841 {
842 /* We can return r2 as-is if it already is or contains empty */
843 if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
844 return r2;
845 /* Else we'd better copy it (modify-in-place isn't safe) */
846 r2 = rangeCopy(r2);
848 return r2;
849 }
850 if (empty2)
851 {
852 /* We can return r1 as-is if it already is or contains empty */
853 if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
854 return r1;
855 /* Else we'd better copy it (modify-in-place isn't safe) */
856 r1 = rangeCopy(r1);
858 return r1;
859 }
860
861 if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
862 result_lower = &lower1;
863 else
864 result_lower = &lower2;
865
866 if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
867 result_upper = &upper1;
868 else
869 result_upper = &upper2;
870
871 /* optimization to avoid constructing a new range */
872 if (result_lower == &lower1 && result_upper == &upper1 &&
873 ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
874 return r1;
875 if (result_lower == &lower2 && result_upper == &upper2 &&
876 ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
877 return r2;
878
879 result = make_range(typcache, result_lower, result_upper, false, NULL);
880
881 if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
883
884 return result;
885}
887static bool
889 const RangeType *r,
890 const MultirangeType *mr)
891{
892 RangeBound lower1,
893 upper1,
894 lower2,
895 upper2,
896 tmp;
897 bool empty;
898
899 if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
900 return (RangeIsEmpty(r) && MultirangeIsEmpty(mr));
901
902 range_deserialize(typcache, r, &lower1, &upper1, &empty);
903 Assert(!empty);
904 multirange_get_bounds(typcache, mr, 0, &lower2, &tmp);
905 multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2);
906
907 return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 &&
908 range_cmp_bounds(typcache, &upper1, &upper2) == 0);
909}
910
911/*
912 * GiST consistent test on an index internal page with range query
913 */
914static bool
916 StrategyNumber strategy,
917 const RangeType *key,
918 const RangeType *query)
919{
920 switch (strategy)
921 {
923 if (RangeIsEmpty(key) || RangeIsEmpty(query))
924 return false;
925 return (!range_overright_internal(typcache, key, query));
927 if (RangeIsEmpty(key) || RangeIsEmpty(query))
928 return false;
929 return (!range_after_internal(typcache, key, query));
931 return range_overlaps_internal(typcache, key, query);
933 if (RangeIsEmpty(key) || RangeIsEmpty(query))
934 return false;
935 return (!range_before_internal(typcache, key, query));
936 case RANGESTRAT_AFTER:
937 if (RangeIsEmpty(key) || RangeIsEmpty(query))
938 return false;
939 return (!range_overleft_internal(typcache, key, query));
941 if (RangeIsEmpty(key) || RangeIsEmpty(query))
942 return false;
943 if (range_adjacent_internal(typcache, key, query))
944 return true;
945 return range_overlaps_internal(typcache, key, query);
947 return range_contains_internal(typcache, key, query);
949
950 /*
951 * Empty ranges are contained by anything, so if key is or
952 * contains any empty ranges, we must descend into it. Otherwise,
953 * descend only if key overlaps the query.
954 */
956 return true;
957 return range_overlaps_internal(typcache, key, query);
958 case RANGESTRAT_EQ:
959
960 /*
961 * If query is empty, descend only if the key is or contains any
962 * empty ranges. Otherwise, descend if key contains query.
963 */
964 if (RangeIsEmpty(query))
966 return range_contains_internal(typcache, key, query);
967 default:
968 elog(ERROR, "unrecognized range strategy: %d", strategy);
969 return false; /* keep compiler quiet */
970 }
971}
972
973/*
974 * GiST consistent test on an index internal page with multirange query
975 */
976static bool
978 StrategyNumber strategy,
979 const RangeType *key,
980 const MultirangeType *query)
981{
982 switch (strategy)
983 {
985 if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
986 return false;
987 return (!range_overright_multirange_internal(typcache, key, query));
989 if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
990 return false;
991 return (!range_after_multirange_internal(typcache, key, query));
993 return range_overlaps_multirange_internal(typcache, key, query);
995 if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
996 return false;
997 return (!range_before_multirange_internal(typcache, key, query));
998 case RANGESTRAT_AFTER:
999 if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1000 return false;
1001 return (!range_overleft_multirange_internal(typcache, key, query));
1003 if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
1004 return false;
1005 if (range_adjacent_multirange_internal(typcache, key, query))
1006 return true;
1007 return range_overlaps_multirange_internal(typcache, key, query);
1009 return range_contains_multirange_internal(typcache, key, query);
1011
1012 /*
1013 * Empty ranges are contained by anything, so if key is or
1014 * contains any empty ranges, we must descend into it. Otherwise,
1015 * descend only if key overlaps the query.
1016 */
1018 return true;
1019 return range_overlaps_multirange_internal(typcache, key, query);
1020 case RANGESTRAT_EQ:
1021
1022 /*
1023 * If query is empty, descend only if the key is or contains any
1024 * empty ranges. Otherwise, descend if key contains query.
1025 */
1026 if (MultirangeIsEmpty(query))
1028 return range_contains_multirange_internal(typcache, key, query);
1029 default:
1030 elog(ERROR, "unrecognized range strategy: %d", strategy);
1031 return false; /* keep compiler quiet */
1032 }
1033}
1034
1035/*
1036 * GiST consistent test on an index internal page with element query
1038static bool
1040 StrategyNumber strategy,
1041 const RangeType *key,
1042 Datum query)
1043{
1044 switch (strategy)
1045 {
1047 return range_contains_elem_internal(typcache, key, query);
1048 default:
1049 elog(ERROR, "unrecognized range strategy: %d", strategy);
1050 return false; /* keep compiler quiet */
1051 }
1052}
1053
1054/*
1055 * GiST consistent test on an index leaf page with range query
1057static bool
1059 StrategyNumber strategy,
1060 const RangeType *key,
1061 const RangeType *query)
1062{
1063 switch (strategy)
1064 {
1065 case RANGESTRAT_BEFORE:
1066 return range_before_internal(typcache, key, query);
1068 return range_overleft_internal(typcache, key, query);
1070 return range_overlaps_internal(typcache, key, query);
1072 return range_overright_internal(typcache, key, query);
1073 case RANGESTRAT_AFTER:
1074 return range_after_internal(typcache, key, query);
1076 return range_adjacent_internal(typcache, key, query);
1078 return range_contains_internal(typcache, key, query);
1080 return range_contained_by_internal(typcache, key, query);
1081 case RANGESTRAT_EQ:
1082 return range_eq_internal(typcache, key, query);
1083 default:
1084 elog(ERROR, "unrecognized range strategy: %d", strategy);
1085 return false; /* keep compiler quiet */
1086 }
1087}
1088
1089/*
1090 * GiST consistent test on an index leaf page with multirange query
1092static bool
1094 StrategyNumber strategy,
1095 const RangeType *key,
1096 const MultirangeType *query)
1097{
1098 switch (strategy)
1099 {
1100 case RANGESTRAT_BEFORE:
1101 return range_before_multirange_internal(typcache, key, query);
1103 return range_overleft_multirange_internal(typcache, key, query);
1105 return range_overlaps_multirange_internal(typcache, key, query);
1107 return range_overright_multirange_internal(typcache, key, query);
1108 case RANGESTRAT_AFTER:
1109 return range_after_multirange_internal(typcache, key, query);
1111 return range_adjacent_multirange_internal(typcache, key, query);
1113 return range_contains_multirange_internal(typcache, key, query);
1115 return multirange_contains_range_internal(typcache, query, key);
1116 case RANGESTRAT_EQ:
1117 return multirange_union_range_equal(typcache, key, query);
1118 default:
1119 elog(ERROR, "unrecognized range strategy: %d", strategy);
1120 return false; /* keep compiler quiet */
1121 }
1122}
1123
1124/*
1125 * GiST consistent test on an index leaf page with element query
1127static bool
1129 StrategyNumber strategy,
1130 const RangeType *key,
1131 Datum query)
1132{
1133 switch (strategy)
1134 {
1136 return range_contains_elem_internal(typcache, key, query);
1137 default:
1138 elog(ERROR, "unrecognized range strategy: %d", strategy);
1139 return false; /* keep compiler quiet */
1140 }
1141}
1142
1143/*
1144 * Trivial split: half of entries will be placed on one page
1145 * and the other half on the other page.
1147static void
1149 GistEntryVector *entryvec,
1150 GIST_SPLITVEC *v)
1151{
1152 RangeType *left_range = NULL;
1153 RangeType *right_range = NULL;
1155 maxoff,
1156 split_idx;
1157
1158 maxoff = entryvec->n - 1;
1159 /* Split entries before this to left page, after to right: */
1160 split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
1161
1162 v->spl_nleft = 0;
1163 v->spl_nright = 0;
1164 for (i = FirstOffsetNumber; i <= maxoff; i++)
1165 {
1167
1168 if (i < split_idx)
1169 PLACE_LEFT(range, i);
1170 else
1172 }
1173
1174 v->spl_ldatum = RangeTypePGetDatum(left_range);
1175 v->spl_rdatum = RangeTypePGetDatum(right_range);
1176}
1177
1178/*
1179 * Split based on classes of ranges.
1180 *
1181 * See get_gist_range_class for class definitions.
1182 * classes_groups is an array of length CLS_COUNT indicating the side of the
1183 * split to which each class should go.
1185static void
1187 GistEntryVector *entryvec,
1188 GIST_SPLITVEC *v,
1189 SplitLR *classes_groups)
1190{
1191 RangeType *left_range = NULL;
1192 RangeType *right_range = NULL;
1194 maxoff;
1195
1196 maxoff = entryvec->n - 1;
1197
1198 v->spl_nleft = 0;
1199 v->spl_nright = 0;
1200 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1201 {
1203 int class;
1204
1205 /* Get class of range */
1206 class = get_gist_range_class(range);
1207
1208 /* Place range to appropriate page */
1209 if (classes_groups[class] == SPLIT_LEFT)
1210 PLACE_LEFT(range, i);
1211 else
1212 {
1213 Assert(classes_groups[class] == SPLIT_RIGHT);
1215 }
1216 }
1217
1218 v->spl_ldatum = RangeTypePGetDatum(left_range);
1219 v->spl_rdatum = RangeTypePGetDatum(right_range);
1220}
1221
1222/*
1223 * Sorting based split. First half of entries according to the sort will be
1224 * placed to one page, and second half of entries will be placed to other
1225 * page. use_upper_bound parameter indicates whether to use upper or lower
1226 * bound for sorting.
1228static void
1230 GistEntryVector *entryvec,
1231 GIST_SPLITVEC *v,
1232 bool use_upper_bound)
1233{
1234 SingleBoundSortItem *sortItems;
1235 RangeType *left_range = NULL;
1236 RangeType *right_range = NULL;
1238 maxoff,
1239 split_idx;
1240
1241 maxoff = entryvec->n - 1;
1242
1243 sortItems = (SingleBoundSortItem *)
1244 palloc(maxoff * sizeof(SingleBoundSortItem));
1245
1246 /*
1247 * Prepare auxiliary array and sort the values.
1248 */
1249 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1250 {
1252 RangeBound bound2;
1253 bool empty;
1254
1255 sortItems[i - 1].index = i;
1256 /* Put appropriate bound into array */
1257 if (use_upper_bound)
1258 range_deserialize(typcache, range, &bound2,
1259 &sortItems[i - 1].bound, &empty);
1260 else
1261 range_deserialize(typcache, range, &sortItems[i - 1].bound,
1262 &bound2, &empty);
1263 Assert(!empty);
1264 }
1265
1266 qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
1267 single_bound_cmp, typcache);
1268
1269 split_idx = maxoff / 2;
1270
1271 v->spl_nleft = 0;
1272 v->spl_nright = 0;
1273
1274 for (i = 0; i < maxoff; i++)
1275 {
1276 int idx = sortItems[i].index;
1278
1279 if (i < split_idx)
1281 else
1283 }
1284
1285 v->spl_ldatum = RangeTypePGetDatum(left_range);
1286 v->spl_rdatum = RangeTypePGetDatum(right_range);
1287}
1288
1289/*
1290 * Double sorting split algorithm.
1291 *
1292 * The algorithm considers dividing ranges into two groups. The first (left)
1293 * group contains general left bound. The second (right) group contains
1294 * general right bound. The challenge is to find upper bound of left group
1295 * and lower bound of right group so that overlap of groups is minimal and
1296 * ratio of distribution is acceptable. Algorithm finds for each lower bound of
1297 * right group minimal upper bound of left group, and for each upper bound of
1298 * left group maximal lower bound of right group. For each found pair
1299 * range_gist_consider_split considers replacement of currently selected
1300 * split with the new one.
1301 *
1302 * After that, all the entries are divided into three groups:
1303 * 1) Entries which should be placed to the left group
1304 * 2) Entries which should be placed to the right group
1305 * 3) "Common entries" which can be placed to either group without affecting
1306 * amount of overlap.
1307 *
1308 * The common ranges are distributed by difference of distance from lower
1309 * bound of common range to lower bound of right group and distance from upper
1310 * bound of common range to upper bound of left group.
1311 *
1312 * For details see:
1313 * "A new double sorting-based node splitting algorithm for R-tree",
1314 * A. Korotkov
1315 * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
1317static void
1319 GistEntryVector *entryvec,
1320 GIST_SPLITVEC *v)
1321{
1324 maxoff;
1325 RangeType *left_range = NULL,
1326 *right_range = NULL;
1327 int common_entries_count;
1328 NonEmptyRange *by_lower,
1329 *by_upper;
1330 CommonEntry *common_entries;
1331 int nentries,
1332 i1,
1333 i2;
1334 RangeBound *right_lower,
1335 *left_upper;
1336
1337 memset(&context, 0, sizeof(ConsiderSplitContext));
1338 context.typcache = typcache;
1339 context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
1340
1341 maxoff = entryvec->n - 1;
1342 nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
1343 context.first = true;
1344
1345 /* Allocate arrays for sorted range bounds */
1346 by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1347 by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
1348
1349 /* Fill arrays of bounds */
1350 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1351 {
1353 bool empty;
1354
1355 range_deserialize(typcache, range,
1356 &by_lower[i - FirstOffsetNumber].lower,
1357 &by_lower[i - FirstOffsetNumber].upper,
1358 &empty);
1359 Assert(!empty);
1360 }
1361
1362 /*
1363 * Make two arrays of range bounds: one sorted by lower bound and another
1364 * sorted by upper bound.
1365 */
1366 memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
1367 qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
1368 interval_cmp_lower, typcache);
1369 qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
1370 interval_cmp_upper, typcache);
1371
1372 /*----------
1373 * The goal is to form a left and right range, so that every entry
1374 * range is contained by either left or right interval (or both).
1375 *
1376 * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
1377 *
1378 * 0 1 2 3 4
1379 * +-+
1380 * +---+
1381 * +-+
1382 * +---+
1383 *
1384 * The left and right ranges are of the form (0,a) and (b,4).
1385 * We first consider splits where b is the lower bound of an entry.
1386 * We iterate through all entries, and for each b, calculate the
1387 * smallest possible a. Then we consider splits where a is the
1388 * upper bound of an entry, and for each a, calculate the greatest
1389 * possible b.
1390 *
1391 * In the above example, the first loop would consider splits:
1392 * b=0: (0,1)-(0,4)
1393 * b=1: (0,1)-(1,4)
1394 * b=2: (0,3)-(2,4)
1395 *
1396 * And the second loop:
1397 * a=1: (0,1)-(1,4)
1398 * a=3: (0,3)-(2,4)
1399 * a=4: (0,4)-(2,4)
1400 *----------
1401 */
1402
1403 /*
1404 * Iterate over lower bound of right group, finding smallest possible
1405 * upper bound of left group.
1406 */
1407 i1 = 0;
1408 i2 = 0;
1409 right_lower = &by_lower[i1].lower;
1410 left_upper = &by_upper[i2].lower;
1411 while (true)
1412 {
1413 /*
1414 * Find next lower bound of right group.
1415 */
1416 while (i1 < nentries &&
1417 range_cmp_bounds(typcache, right_lower,
1418 &by_lower[i1].lower) == 0)
1419 {
1420 if (range_cmp_bounds(typcache, &by_lower[i1].upper,
1421 left_upper) > 0)
1422 left_upper = &by_lower[i1].upper;
1423 i1++;
1424 }
1425 if (i1 >= nentries)
1426 break;
1427 right_lower = &by_lower[i1].lower;
1428
1429 /*
1430 * Find count of ranges which anyway should be placed to the left
1431 * group.
1432 */
1433 while (i2 < nentries &&
1434 range_cmp_bounds(typcache, &by_upper[i2].upper,
1435 left_upper) <= 0)
1436 i2++;
1437
1438 /*
1439 * Consider found split to see if it's better than what we had.
1440 */
1441 range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
1442 }
1443
1444 /*
1445 * Iterate over upper bound of left group finding greatest possible lower
1446 * bound of right group.
1447 */
1448 i1 = nentries - 1;
1449 i2 = nentries - 1;
1450 right_lower = &by_lower[i1].upper;
1451 left_upper = &by_upper[i2].upper;
1452 while (true)
1453 {
1454 /*
1455 * Find next upper bound of left group.
1456 */
1457 while (i2 >= 0 &&
1458 range_cmp_bounds(typcache, left_upper,
1459 &by_upper[i2].upper) == 0)
1460 {
1461 if (range_cmp_bounds(typcache, &by_upper[i2].lower,
1462 right_lower) < 0)
1463 right_lower = &by_upper[i2].lower;
1464 i2--;
1465 }
1466 if (i2 < 0)
1467 break;
1468 left_upper = &by_upper[i2].upper;
1469
1470 /*
1471 * Find count of intervals which anyway should be placed to the right
1472 * group.
1473 */
1474 while (i1 >= 0 &&
1475 range_cmp_bounds(typcache, &by_lower[i1].lower,
1476 right_lower) >= 0)
1477 i1--;
1478
1479 /*
1480 * Consider found split to see if it's better than what we had.
1481 */
1482 range_gist_consider_split(&context, right_lower, i1 + 1,
1483 left_upper, i2 + 1);
1484 }
1485
1486 /*
1487 * If we failed to find any acceptable splits, use trivial split.
1488 */
1489 if (context.first)
1490 {
1491 range_gist_fallback_split(typcache, entryvec, v);
1492 return;
1493 }
1494
1495 /*
1496 * Ok, we have now selected bounds of the groups. Now we have to
1497 * distribute entries themselves. At first we distribute entries which can
1498 * be placed unambiguously and collect "common entries" to array.
1499 */
1500
1501 /* Allocate vectors for results */
1502 v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1503 v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
1504 v->spl_nleft = 0;
1505 v->spl_nright = 0;
1506
1507 /*
1508 * Allocate an array for "common entries" - entries which can be placed to
1509 * either group without affecting overlap along selected axis.
1510 */
1511 common_entries_count = 0;
1512 common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
1513
1514 /*
1515 * Distribute entries which can be distributed unambiguously, and collect
1516 * common entries.
1517 */
1518 for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1519 {
1522 upper;
1523 bool empty;
1524
1525 /*
1526 * Get upper and lower bounds along selected axis.
1527 */
1528 range = DatumGetRangeTypeP(entryvec->vector[i].key);
1529
1530 range_deserialize(typcache, range, &lower, &upper, &empty);
1531
1532 if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
1533 {
1534 /* Fits in the left group */
1535 if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
1536 {
1537 /* Fits also in the right group, so "common entry" */
1538 common_entries[common_entries_count].index = i;
1539 if (context.has_subtype_diff)
1540 {
1541 /*
1542 * delta = (lower - context.right_lower) -
1543 * (context.left_upper - upper)
1544 */
1545 common_entries[common_entries_count].delta =
1546 call_subtype_diff(typcache,
1547 lower.val,
1548 context.right_lower->val) -
1549 call_subtype_diff(typcache,
1550 context.left_upper->val,
1551 upper.val);
1552 }
1553 else
1554 {
1555 /* Without subtype_diff, take all deltas as zero */
1556 common_entries[common_entries_count].delta = 0;
1557 }
1558 common_entries_count++;
1559 }
1560 else
1561 {
1562 /* Doesn't fit to the right group, so join to the left group */
1563 PLACE_LEFT(range, i);
1564 }
1565 }
1566 else
1567 {
1568 /*
1569 * Each entry should fit on either left or right group. Since this
1570 * entry didn't fit in the left group, it better fit in the right
1571 * group.
1572 */
1573 Assert(range_cmp_bounds(typcache, &lower,
1574 context.right_lower) >= 0);
1576 }
1577 }
1578
1579 /*
1580 * Distribute "common entries", if any.
1581 */
1582 if (common_entries_count > 0)
1583 {
1584 /*
1585 * Sort "common entries" by calculated deltas in order to distribute
1586 * the most ambiguous entries first.
1587 */
1588 qsort(common_entries, common_entries_count, sizeof(CommonEntry),
1590
1591 /*
1592 * Distribute "common entries" between groups according to sorting.
1593 */
1594 for (i = 0; i < common_entries_count; i++)
1595 {
1597 int idx = common_entries[i].index;
1598
1599 range = DatumGetRangeTypeP(entryvec->vector[idx].key);
1600
1601 /*
1602 * Check if we have to place this entry in either group to achieve
1603 * LIMIT_RATIO.
1604 */
1605 if (i < context.common_left)
1607 else
1609 }
1610 }
1611
1612 v->spl_ldatum = PointerGetDatum(left_range);
1613 v->spl_rdatum = PointerGetDatum(right_range);
1614}
1615
1616/*
1617 * Consider replacement of currently selected split with a better one
1618 * during range_gist_double_sorting_split.
1620static void
1622 RangeBound *right_lower, int min_left_count,
1623 RangeBound *left_upper, int max_left_count)
1624{
1625 int left_count,
1626 right_count;
1627 float4 ratio,
1628 overlap;
1629
1630 /*
1631 * Calculate entries distribution ratio assuming most uniform distribution
1632 * of common entries.
1633 */
1634 if (min_left_count >= (context->entries_count + 1) / 2)
1635 left_count = min_left_count;
1636 else if (max_left_count <= context->entries_count / 2)
1637 left_count = max_left_count;
1638 else
1639 left_count = context->entries_count / 2;
1640 right_count = context->entries_count - left_count;
1641
1642 /*
1643 * Ratio of split: quotient between size of smaller group and total
1644 * entries count. This is necessarily 0.5 or less; if it's less than
1645 * LIMIT_RATIO then we will never accept the new split.
1646 */
1647 ratio = ((float4) Min(left_count, right_count)) /
1648 ((float4) context->entries_count);
1649
1650 if (ratio > LIMIT_RATIO)
1651 {
1652 bool selectthis = false;
1653
1654 /*
1655 * The ratio is acceptable, so compare current split with previously
1656 * selected one. We search for minimal overlap (allowing negative
1657 * values) and minimal ratio secondarily. If subtype_diff is
1658 * available, it's used for overlap measure. Without subtype_diff we
1659 * use number of "common entries" as an overlap measure.
1660 */
1661 if (context->has_subtype_diff)
1662 overlap = call_subtype_diff(context->typcache,
1663 left_upper->val,
1664 right_lower->val);
1665 else
1666 overlap = max_left_count - min_left_count;
1667
1668 /* If there is no previous selection, select this split */
1669 if (context->first)
1670 selectthis = true;
1671 else
1672 {
1673 /*
1674 * Choose the new split if it has a smaller overlap, or same
1675 * overlap but better ratio.
1676 */
1677 if (overlap < context->overlap ||
1678 (overlap == context->overlap && ratio > context->ratio))
1679 selectthis = true;
1680 }
1681
1682 if (selectthis)
1683 {
1684 /* save information about selected split */
1685 context->first = false;
1686 context->ratio = ratio;
1687 context->overlap = overlap;
1688 context->right_lower = right_lower;
1689 context->left_upper = left_upper;
1690 context->common_left = max_left_count - left_count;
1691 context->common_right = left_count - min_left_count;
1692 }
1693 }
1694}
1695
1696/*
1697 * Find class number for range.
1698 *
1699 * The class number is a valid combination of the properties of the
1700 * range. Note: the highest possible number is 8, because CLS_EMPTY
1701 * can't be combined with anything else.
1703static int
1705{
1706 int classNumber;
1707 char flags;
1708
1709 flags = range_get_flags(range);
1710 if (flags & RANGE_EMPTY)
1711 {
1712 classNumber = CLS_EMPTY;
1713 }
1714 else
1715 {
1716 classNumber = 0;
1717 if (flags & RANGE_LB_INF)
1718 classNumber |= CLS_LOWER_INF;
1719 if (flags & RANGE_UB_INF)
1720 classNumber |= CLS_UPPER_INF;
1721 if (flags & RANGE_CONTAIN_EMPTY)
1722 classNumber |= CLS_CONTAIN_EMPTY;
1723 }
1724 return classNumber;
1725}
1726
1727/*
1728 * Comparison function for range_gist_single_sorting_split.
1730static int
1731single_bound_cmp(const void *a, const void *b, void *arg)
1732{
1735 TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1736
1737 return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
1738}
1739
1740/*
1741 * Compare NonEmptyRanges by lower bound.
1743static int
1744interval_cmp_lower(const void *a, const void *b, void *arg)
1745{
1746 NonEmptyRange *i1 = (NonEmptyRange *) a;
1747 NonEmptyRange *i2 = (NonEmptyRange *) b;
1748 TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1749
1750 return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
1751}
1752
1753/*
1754 * Compare NonEmptyRanges by upper bound.
1756static int
1757interval_cmp_upper(const void *a, const void *b, void *arg)
1758{
1759 NonEmptyRange *i1 = (NonEmptyRange *) a;
1760 NonEmptyRange *i2 = (NonEmptyRange *) b;
1761 TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
1762
1763 return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
1764}
1765
1766/*
1767 * Compare CommonEntrys by their deltas.
1769static int
1770common_entry_cmp(const void *i1, const void *i2)
1771{
1772 double delta1 = ((CommonEntry *) i1)->delta;
1773 double delta2 = ((CommonEntry *) i2)->delta;
1774
1775 if (delta1 < delta2)
1776 return -1;
1777 else if (delta1 > delta2)
1778 return 1;
1779 else
1780 return 0;
1781}
1782
1783/*
1784 * Convenience function to invoke type-specific subtype_diff function.
1785 * Caller must have already checked that there is one for the range type.
1787static float8
1788call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
1789{
1790 float8 value;
1791
1793 typcache->rng_collation,
1794 val1, val2));
1795 /* Cope with buggy subtype_diff function by returning zero */
1796 if (value >= 0.0)
1797 return value;
1798 return 0.0;
1799}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
#define Min(x, y)
Definition: c.h:958
#define Assert(condition)
Definition: c.h:812
double float8
Definition: c.h:584
float float4
Definition: c.h:583
#define OidIsValid(objectId)
Definition: c.h:729
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
static float4 get_float4_infinity(void)
Definition: float.h:74
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_UINT16(n)
Definition: fmgr.h:272
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define GIST_LEAF(entry)
Definition: gist.h:170
#define gistentryinit(e, k, r, pg, o, l)
Definition: gist.h:244
static struct @161 value
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
void * palloc(Size size)
Definition: mcxt.c:1317
void multirange_get_bounds(TypeCacheEntry *rangetyp, const MultirangeType *multirange, uint32 i, RangeBound *lower, RangeBound *upper)
bool range_adjacent_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool range_after_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool multirange_contains_range_internal(TypeCacheEntry *rangetyp, const MultirangeType *mr, const RangeType *r)
TypeCacheEntry * multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
RangeType * multirange_get_union_range(TypeCacheEntry *rangetyp, const MultirangeType *mr)
bool range_before_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool range_overright_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool range_contains_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool range_overlaps_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
bool range_overleft_multirange_internal(TypeCacheEntry *rangetyp, const RangeType *r, const MultirangeType *mr)
#define MultirangeIsEmpty(mr)
#define MultirangeTypeGetOid(mr)
static MultirangeType * DatumGetMultirangeTypeP(Datum X)
#define OffsetNumberNext(offsetNumber)
Definition: off.h:52
uint16 OffsetNumber
Definition: off.h:24
#define FirstOffsetNumber
Definition: off.h:27
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
void * arg
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
Definition: port.h:447
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:494
unsigned int Oid
Definition: postgres_ext.h:31
tree context
Definition: radixtree.h:1837
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:2016
bool range_contained_by_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:2618
RangeType * make_range(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty, struct Node *escontext)
Definition: rangetypes.c:1952
bool range_contains_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:2586
bool range_after_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:702
bool range_overlaps_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:841
bool range_before_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:664
bool range_overright_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:928
bool range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum val)
Definition: rangetypes.c:2627
void range_deserialize(TypeCacheEntry *typcache, const RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1856
bool range_eq_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:573
bool range_adjacent_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:798
void range_set_contain_empty(RangeType *range)
Definition: rangetypes.c:1937
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1703
char range_get_flags(const RangeType *range)
Definition: rangetypes.c:1923
bool range_overleft_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
Definition: rangetypes.c:887
#define RANGESTRAT_OVERLAPS
Definition: rangetypes.h:98
#define RANGESTRAT_AFTER
Definition: rangetypes.h:100
static RangeType * DatumGetRangeTypeP(Datum X)
Definition: rangetypes.h:73
#define RANGESTRAT_BEFORE
Definition: rangetypes.h:96
#define RANGESTRAT_OVERRIGHT
Definition: rangetypes.h:99
#define RANGE_CONTAIN_EMPTY
Definition: rangetypes.h:45
#define RangeIsOrContainsEmpty(r)
Definition: rangetypes.h:56
#define RANGESTRAT_OVERLEFT
Definition: rangetypes.h:97
#define RANGESTRAT_CONTAINED_BY
Definition: rangetypes.h:103
#define RangeIsEmpty(r)
Definition: rangetypes.h:55
static Datum RangeTypePGetDatum(const RangeType *X)
Definition: rangetypes.h:85
#define RANGE_UB_INF
Definition: rangetypes.h:42
#define RANGESTRAT_EQ
Definition: rangetypes.h:105
#define PG_RETURN_RANGE_P(x)
Definition: rangetypes.h:92
#define RANGE_EMPTY
Definition: rangetypes.h:38
#define RANGESTRAT_ADJACENT
Definition: rangetypes.h:101
#define PG_GETARG_RANGE_P(n)
Definition: rangetypes.h:90
#define RANGESTRAT_CONTAINS_ELEM
Definition: rangetypes.h:104
#define RANGESTRAT_CONTAINS
Definition: rangetypes.h:102
#define RANGE_LB_INF
Definition: rangetypes.h:41
#define RangeTypeGetOid(r)
Definition: rangetypes.h:35
static int interval_cmp_lower(const void *a, const void *b, void *arg)
static void range_gist_consider_split(ConsiderSplitContext *context, RangeBound *right_lower, int min_left_count, RangeBound *left_upper, int max_left_count)
#define CLS_CONTAIN_EMPTY
static bool range_gist_consistent_int_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query)
#define DEFAULT_SUBTYPE_DIFF_PENALTY
static bool multirange_union_range_equal(TypeCacheEntry *typcache, const RangeType *r, const MultirangeType *mr)
Datum multirange_gist_compress(PG_FUNCTION_ARGS)
static int single_bound_cmp(const void *a, const void *b, void *arg)
#define LIMIT_RATIO
Datum range_gist_union(PG_FUNCTION_ARGS)
static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query)
#define CLS_UPPER_INF
#define PLACE_RIGHT(range, off)
#define CLS_NORMAL
Datum range_gist_same(PG_FUNCTION_ARGS)
SplitLR
@ SPLIT_RIGHT
@ SPLIT_LEFT
#define rangeCopy(r)
static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query)
static void range_gist_fallback_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v)
static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, Datum query)
static void range_gist_double_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v)
#define CLS_EMPTY
Datum range_gist_picksplit(PG_FUNCTION_ARGS)
static void range_gist_class_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, SplitLR *classes_groups)
#define CONTAIN_EMPTY_PENALTY
#define PLACE_LEFT(range, off)
static int get_gist_range_class(RangeType *range)
static float8 call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
static int common_entry_cmp(const void *i1, const void *i2)
static int interval_cmp_upper(const void *a, const void *b, void *arg)
Datum multirange_gist_consistent(PG_FUNCTION_ARGS)
Datum range_gist_consistent(PG_FUNCTION_ARGS)
static bool range_gist_consistent_int_range(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const RangeType *query)
static void range_gist_single_sorting_split(TypeCacheEntry *typcache, GistEntryVector *entryvec, GIST_SPLITVEC *v, bool use_upper_bound)
static RangeType * range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache, StrategyNumber strategy, const RangeType *key, const MultirangeType *query)
#define INFINITE_BOUND_PENALTY
Datum range_gist_penalty(PG_FUNCTION_ARGS)
#define CLS_LOWER_INF
#define CLS_COUNT
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
uint16 StrategyNumber
Definition: stratnum.h:22
float8 delta
Definition: gistproc.c:276
Oid fn_oid
Definition: fmgr.h:59
OffsetNumber offset
Definition: gist.h:163
Datum key
Definition: gist.h:160
Page page
Definition: gist.h:162
Relation rel
Definition: gist.h:161
bool leafkey
Definition: gist.h:164
int spl_nleft
Definition: gist.h:143
OffsetNumber * spl_right
Definition: gist.h:147
Datum spl_ldatum
Definition: gist.h:144
Datum spl_rdatum
Definition: gist.h:149
int spl_nright
Definition: gist.h:148
OffsetNumber * spl_left
Definition: gist.h:142
GISTENTRY vector[FLEXIBLE_ARRAY_MEMBER]
Definition: gist.h:236
int32 n
Definition: gist.h:235
RangeBound upper
RangeBound lower
bool infinite
Definition: rangetypes.h:64
Datum val
Definition: rangetypes.h:63
Oid rng_collation
Definition: typcache.h:100
struct TypeCacheEntry * rngtype
Definition: typcache.h:108
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:103
Definition: type.h:96