PostgreSQL Source Code git master
brin_minmax_multi.c
Go to the documentation of this file.
1/*
2 * brin_minmax_multi.c
3 * Implementation of Multi Min/Max opclass for BRIN
4 *
5 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
7 *
8 *
9 * Implements a variant of minmax opclass, where the summary is composed of
10 * multiple smaller intervals. This allows us to handle outliers, which
11 * usually make the simple minmax opclass inefficient.
12 *
13 * Consider for example page range with simple minmax interval [1000,2000],
14 * and assume a new row gets inserted into the range with value 1000000.
15 * Due to that the interval gets [1000,1000000]. I.e. the minmax interval
16 * got 1000x wider and won't be useful to eliminate scan keys between 2001
17 * and 1000000.
18 *
19 * With minmax-multi opclass, we may have [1000,2000] interval initially,
20 * but after adding the new row we start tracking it as two interval:
21 *
22 * [1000,2000] and [1000000,1000000]
23 *
24 * This allows us to still eliminate the page range when the scan keys hit
25 * the gap between 2000 and 1000000, making it useful in cases when the
26 * simple minmax opclass gets inefficient.
27 *
28 * The number of intervals tracked per page range is somewhat flexible.
29 * What is restricted is the number of values per page range, and the limit
30 * is currently 32 (see values_per_range reloption). Collapsed intervals
31 * (with equal minimum and maximum value) are stored as a single value,
32 * while regular intervals require two values.
33 *
34 * When the number of values gets too high (by adding new values to the
35 * summary), we merge some of the intervals to free space for more values.
36 * This is done in a greedy way - we simply pick the two closest intervals,
37 * merge them, and repeat this until the number of values to store gets
38 * sufficiently low (below 50% of maximum values), but that is mostly
39 * arbitrary threshold and may be changed easily).
40 *
41 * To pick the closest intervals we use the "distance" support procedure,
42 * which measures space between two ranges (i.e. the length of an interval).
43 * The computed value may be an approximation - in the worst case we will
44 * merge two ranges that are slightly less optimal at that step, but the
45 * index should still produce correct results.
46 *
47 * The compactions (reducing the number of values) is fairly expensive, as
48 * it requires calling the distance functions, sorting etc. So when building
49 * the summary, we use a significantly larger buffer, and only enforce the
50 * exact limit at the very end. This improves performance, and it also helps
51 * with building better ranges (due to the greedy approach).
52 *
53 *
54 * IDENTIFICATION
55 * src/backend/access/brin/brin_minmax_multi.c
56 */
57#include "postgres.h"
58
59/* needed for PGSQL_AF_INET */
60#include <sys/socket.h>
61
62#include "access/brin.h"
64#include "access/brin_tuple.h"
65#include "access/genam.h"
66#include "access/htup_details.h"
67#include "access/reloptions.h"
68#include "access/stratnum.h"
69#include "catalog/pg_am.h"
70#include "catalog/pg_amop.h"
71#include "catalog/pg_type.h"
72#include "utils/array.h"
73#include "utils/builtins.h"
74#include "utils/date.h"
75#include "utils/datum.h"
76#include "utils/float.h"
77#include "utils/inet.h"
78#include "utils/lsyscache.h"
79#include "utils/memutils.h"
80#include "utils/pg_lsn.h"
81#include "utils/rel.h"
82#include "utils/syscache.h"
83#include "utils/timestamp.h"
84#include "utils/uuid.h"
85
86/*
87 * Additional SQL level support functions
88 *
89 * Procedure numbers must not use values reserved for BRIN itself; see
90 * brin_internal.h.
91 */
92#define MINMAX_MAX_PROCNUMS 1 /* maximum support procs we need */
93#define PROCNUM_DISTANCE 11 /* required, distance between values */
94
95/*
96 * Subtract this from procnum to obtain index in MinmaxMultiOpaque arrays
97 * (Must be equal to minimum of private procnums).
98 */
99#define PROCNUM_BASE 11
100
101/*
102 * Sizing the insert buffer - we use 10x the number of values specified
103 * in the reloption, but we cap it to 8192 not to get too large. When
104 * the buffer gets full, we reduce the number of values by half.
105 */
106#define MINMAX_BUFFER_FACTOR 10
107#define MINMAX_BUFFER_MIN 256
108#define MINMAX_BUFFER_MAX 8192
109#define MINMAX_BUFFER_LOAD_FACTOR 0.5
110
111typedef struct MinmaxMultiOpaque
112{
117
118/*
119 * Storage type for BRIN's minmax reloptions
120 */
121typedef struct MinMaxMultiOptions
122{
123 int32 vl_len_; /* varlena header (do not touch directly!) */
124 int valuesPerRange; /* number of values per range */
126
127#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE 32
128
129#define MinMaxMultiGetValuesPerRange(opts) \
130 ((opts) && (((MinMaxMultiOptions *) (opts))->valuesPerRange != 0) ? \
131 ((MinMaxMultiOptions *) (opts))->valuesPerRange : \
132 MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE)
133
134#define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
135
136/*
137 * The summary of minmax-multi indexes has two representations - Ranges for
138 * convenient processing, and SerializedRanges for storage in bytea value.
139 *
140 * The Ranges struct stores the boundary values in a single array, but we
141 * treat regular and single-point ranges differently to save space. For
142 * regular ranges (with different boundary values) we have to store both
143 * the lower and upper bound of the range, while for "single-point ranges"
144 * we only need to store a single value.
145 *
146 * The 'values' array stores boundary values for regular ranges first (there
147 * are 2*nranges values to store), and then the nvalues boundary values for
148 * single-point ranges. That is, we have (2*nranges + nvalues) boundary
149 * values in the array.
150 *
151 * +-------------------------+----------------------------------+
152 * | ranges (2 * nranges of) | single point values (nvalues of) |
153 * +-------------------------+----------------------------------+
154 *
155 * This allows us to quickly add new values, and store outliers without
156 * having to widen any of the existing range values.
157 *
158 * 'nsorted' denotes how many of 'nvalues' in the values[] array are sorted.
159 * When nsorted == nvalues, all single point values are sorted.
160 *
161 * We never store more than maxvalues values (as set by values_per_range
162 * reloption). If needed we merge some of the ranges.
163 *
164 * To minimize palloc overhead, we always allocate the full array with
165 * space for maxvalues elements. This should be fine as long as the
166 * maxvalues is reasonably small (64 seems fine), which is the case
167 * thanks to values_per_range reloption being limited to 256.
168 */
169typedef struct Ranges
170{
171 /* Cache information that we need quite often. */
176
177 /* (2*nranges + nvalues) <= maxvalues */
178 int nranges; /* number of ranges in the values[] array */
179 int nsorted; /* number of nvalues which are sorted */
180 int nvalues; /* number of point values in values[] array */
181 int maxvalues; /* number of elements in the values[] array */
182
183 /*
184 * We simply add the values into a large buffer, without any expensive
185 * steps (sorting, deduplication, ...). The buffer is a multiple of the
186 * target number of values, so the compaction happens less often,
187 * amortizing the costs. We keep the actual target and compact to the
188 * requested number of values at the very end, before serializing to
189 * on-disk representation.
190 */
191 /* requested number of values */
193
194 /* values stored for this range - either raw values, or ranges */
197
198/*
199 * On-disk the summary is stored as a bytea value, with a simple header
200 * with basic metadata, followed by the boundary values. It has a varlena
201 * header, so can be treated as varlena directly.
202 *
203 * See brin_range_serialize/brin_range_deserialize for serialization details.
204 */
205typedef struct SerializedRanges
206{
207 /* varlena header (do not touch directly!) */
209
210 /* type of values stored in the data array */
212
213 /* (2*nranges + nvalues) <= maxvalues */
214 int nranges; /* number of ranges in the array (stored) */
215 int nvalues; /* number of values in the data array (all) */
216 int maxvalues; /* maximum number of values (reloption) */
217
218 /* contains the actual data */
221
223
224static Ranges *brin_range_deserialize(int maxvalues,
225 SerializedRanges *serialized);
226
227
228/*
229 * Used to represent ranges expanded to make merging and combining easier.
230 *
231 * Each expanded range is essentially an interval, represented by min/max
232 * values, along with a flag whether it's a collapsed range (in which case
233 * the min and max values are equal). We have the flag to handle by-ref
234 * data types - we can't simply compare the datums, and this saves some
235 * calls to the type-specific comparator function.
236 */
237typedef struct ExpandedRange
238{
239 Datum minval; /* lower boundary */
240 Datum maxval; /* upper boundary */
241 bool collapsed; /* true if minval==maxval */
243
244/*
245 * Represents a distance between two ranges (identified by index into
246 * an array of extended ranges).
247 */
248typedef struct DistanceValue
249{
250 int index;
251 double value;
253
254
255/* Cache for support and strategy procedures. */
256
258 uint16 procnum);
259
261 uint16 attno, Oid subtype,
262 uint16 strategynum);
263
264typedef struct compare_context
265{
269
270static int compare_values(const void *a, const void *b, void *arg);
271
272
273#ifdef USE_ASSERT_CHECKING
274/*
275 * Check that the order of the array values is correct, using the cmp
276 * function (which should be BTLessStrategyNumber).
277 */
278static void
279AssertArrayOrder(FmgrInfo *cmp, Oid colloid, Datum *values, int nvalues)
280{
281 int i;
282 Datum lt;
283
284 for (i = 0; i < (nvalues - 1); i++)
285 {
286 lt = FunctionCall2Coll(cmp, colloid, values[i], values[i + 1]);
287 Assert(DatumGetBool(lt));
288 }
289}
290#endif
291
292/*
293 * Comprehensive check of the Ranges structure.
294 */
295static void
296AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
297{
298#ifdef USE_ASSERT_CHECKING
299 int i;
300
301 /* some basic sanity checks */
302 Assert(ranges->nranges >= 0);
303 Assert(ranges->nsorted >= 0);
304 Assert(ranges->nvalues >= ranges->nsorted);
305 Assert(ranges->maxvalues >= 2 * ranges->nranges + ranges->nvalues);
306 Assert(ranges->typid != InvalidOid);
307
308 /*
309 * First the ranges - there are 2*nranges boundary values, and the values
310 * have to be strictly ordered (equal values would mean the range is
311 * collapsed, and should be stored as a point). This also guarantees that
312 * the ranges do not overlap.
313 */
314 AssertArrayOrder(cmpFn, colloid, ranges->values, 2 * ranges->nranges);
315
316 /* then the single-point ranges (with nvalues boundary values ) */
317 AssertArrayOrder(cmpFn, colloid, &ranges->values[2 * ranges->nranges],
318 ranges->nsorted);
319
320 /*
321 * Check that none of the values are not covered by ranges (both sorted
322 * and unsorted)
323 */
324 if (ranges->nranges > 0)
325 {
326 for (i = 0; i < ranges->nvalues; i++)
327 {
328 Datum compar;
329 int start,
330 end;
331 Datum minvalue = ranges->values[0];
332 Datum maxvalue = ranges->values[2 * ranges->nranges - 1];
333 Datum value = ranges->values[2 * ranges->nranges + i];
334
335 compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
336
337 /*
338 * If the value is smaller than the lower bound in the first range
339 * then it cannot possibly be in any of the ranges.
340 */
341 if (DatumGetBool(compar))
342 continue;
343
344 compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
345
346 /*
347 * Likewise, if the value is larger than the upper bound of the
348 * final range, then it cannot possibly be inside any of the
349 * ranges.
350 */
351 if (DatumGetBool(compar))
352 continue;
353
354 /* bsearch the ranges to see if 'value' fits within any of them */
355 start = 0; /* first range */
356 end = ranges->nranges - 1; /* last range */
357 while (true)
358 {
359 int midpoint = (start + end) / 2;
360
361 /* this means we ran out of ranges in the last step */
362 if (start > end)
363 break;
364
365 /* copy the min/max values from the ranges */
366 minvalue = ranges->values[2 * midpoint];
367 maxvalue = ranges->values[2 * midpoint + 1];
368
369 /*
370 * Is the value smaller than the minval? If yes, we'll recurse
371 * to the left side of range array.
372 */
373 compar = FunctionCall2Coll(cmpFn, colloid, value, minvalue);
374
375 /* smaller than the smallest value in this range */
376 if (DatumGetBool(compar))
377 {
378 end = (midpoint - 1);
379 continue;
380 }
381
382 /*
383 * Is the value greater than the minval? If yes, we'll recurse
384 * to the right side of range array.
385 */
386 compar = FunctionCall2Coll(cmpFn, colloid, maxvalue, value);
387
388 /* larger than the largest value in this range */
389 if (DatumGetBool(compar))
390 {
391 start = (midpoint + 1);
392 continue;
393 }
394
395 /* hey, we found a matching range */
396 Assert(false);
397 }
398 }
399 }
400
401 /* and values in the unsorted part must not be in the sorted part */
402 if (ranges->nsorted > 0)
403 {
404 compare_context cxt;
405
406 cxt.colloid = ranges->colloid;
407 cxt.cmpFn = ranges->cmp;
408
409 for (i = ranges->nsorted; i < ranges->nvalues; i++)
410 {
411 Datum value = ranges->values[2 * ranges->nranges + i];
412
413 Assert(bsearch_arg(&value, &ranges->values[2 * ranges->nranges],
414 ranges->nsorted, sizeof(Datum),
415 compare_values, &cxt) == NULL);
416 }
417 }
418#endif
419}
420
421/*
422 * Check that the expanded ranges (built when reducing the number of ranges
423 * by combining some of them) are correctly sorted and do not overlap.
424 */
425static void
427 Form_pg_attribute attr, ExpandedRange *ranges,
428 int nranges)
429{
430#ifdef USE_ASSERT_CHECKING
431 int i;
432 FmgrInfo *eq;
433 FmgrInfo *lt;
434
435 eq = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
437
438 lt = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
440
441 /*
442 * Each range independently should be valid, i.e. that for the boundary
443 * values (lower <= upper).
444 */
445 for (i = 0; i < nranges; i++)
446 {
447 Datum r;
448 Datum minval = ranges[i].minval;
449 Datum maxval = ranges[i].maxval;
450
451 if (ranges[i].collapsed) /* collapsed: minval == maxval */
452 r = FunctionCall2Coll(eq, colloid, minval, maxval);
453 else /* non-collapsed: minval < maxval */
454 r = FunctionCall2Coll(lt, colloid, minval, maxval);
455
457 }
458
459 /*
460 * And the ranges should be ordered and must not overlap, i.e. upper <
461 * lower for boundaries of consecutive ranges.
462 */
463 for (i = 0; i < nranges - 1; i++)
464 {
465 Datum r;
466 Datum maxval = ranges[i].maxval;
467 Datum minval = ranges[i + 1].minval;
468
469 r = FunctionCall2Coll(lt, colloid, maxval, minval);
470
472 }
473#endif
474}
475
476
477/*
478 * minmax_multi_init
479 * Initialize the deserialized range list, allocate all the memory.
480 *
481 * This is only in-memory representation of the ranges, so we allocate
482 * enough space for the maximum number of values (so as not to have to do
483 * repallocs as the ranges grow).
484 */
485static Ranges *
486minmax_multi_init(int maxvalues)
487{
488 Size len;
489 Ranges *ranges;
490
491 Assert(maxvalues > 0);
492
493 len = offsetof(Ranges, values); /* fixed header */
494 len += maxvalues * sizeof(Datum); /* Datum values */
495
496 ranges = (Ranges *) palloc0(len);
497
498 ranges->maxvalues = maxvalues;
499
500 return ranges;
501}
502
503
504/*
505 * range_deduplicate_values
506 * Deduplicate the part with values in the simple points.
507 *
508 * This is meant to be a cheaper way of reducing the size of the ranges. It
509 * does not touch the ranges, and only sorts the other values - it does not
510 * call the distance functions, which may be quite expensive, etc.
511 *
512 * We do know the values are not duplicate with the ranges, because we check
513 * that before adding a new value. Same for the sorted part of values.
514 */
515static void
517{
518 int i,
519 n;
520 int start;
521 compare_context cxt;
522
523 /*
524 * If there are no unsorted values, we're done (this probably can't
525 * happen, as we're adding values to unsorted part).
526 */
527 if (range->nsorted == range->nvalues)
528 return;
529
530 /* sort the values */
531 cxt.colloid = range->colloid;
532 cxt.cmpFn = range->cmp;
533
534 /* the values start right after the ranges (which are always sorted) */
535 start = 2 * range->nranges;
536
537 /*
538 * XXX This might do a merge sort, to leverage that the first part of the
539 * array is already sorted. If the sorted part is large, it might be quite
540 * a bit faster.
541 */
542 qsort_arg(&range->values[start],
543 range->nvalues, sizeof(Datum),
544 compare_values, &cxt);
545
546 n = 1;
547 for (i = 1; i < range->nvalues; i++)
548 {
549 /* same as preceding value, so store it */
550 if (compare_values(&range->values[start + i - 1],
551 &range->values[start + i],
552 &cxt) == 0)
553 continue;
554
555 range->values[start + n] = range->values[start + i];
556
557 n++;
558 }
559
560 /* now all the values are sorted */
561 range->nvalues = n;
562 range->nsorted = n;
563
564 AssertCheckRanges(range, range->cmp, range->colloid);
565}
566
567
568/*
569 * brin_range_serialize
570 * Serialize the in-memory representation into a compact varlena value.
571 *
572 * Simply copy the header and then also the individual values, as stored
573 * in the in-memory value array.
574 */
575static SerializedRanges *
577{
578 Size len;
579 int nvalues;
580 SerializedRanges *serialized;
581 Oid typid;
582 int typlen;
583 bool typbyval;
584
585 char *ptr;
586
587 /* simple sanity checks */
588 Assert(range->nranges >= 0);
589 Assert(range->nsorted >= 0);
590 Assert(range->nvalues >= 0);
591 Assert(range->maxvalues > 0);
592 Assert(range->target_maxvalues > 0);
593
594 /* at this point the range should be compacted to the target size */
595 Assert(2 * range->nranges + range->nvalues <= range->target_maxvalues);
596
597 Assert(range->target_maxvalues <= range->maxvalues);
598
599 /* range boundaries are always sorted */
600 Assert(range->nvalues >= range->nsorted);
601
602 /* deduplicate values, if there's unsorted part */
604
605 /* see how many Datum values we actually have */
606 nvalues = 2 * range->nranges + range->nvalues;
607
608 typid = range->typid;
609 typbyval = get_typbyval(typid);
610 typlen = get_typlen(typid);
611
612 /* header is always needed */
613 len = offsetof(SerializedRanges, data);
614
615 /*
616 * The space needed depends on data type - for fixed-length data types
617 * (by-value and some by-reference) it's pretty simple, just multiply
618 * (attlen * nvalues) and we're done. For variable-length by-reference
619 * types we need to actually walk all the values and sum the lengths.
620 */
621 if (typlen == -1) /* varlena */
622 {
623 int i;
624
625 for (i = 0; i < nvalues; i++)
626 {
627 len += VARSIZE_ANY(range->values[i]);
628 }
629 }
630 else if (typlen == -2) /* cstring */
631 {
632 int i;
633
634 for (i = 0; i < nvalues; i++)
635 {
636 /* don't forget to include the null terminator ;-) */
637 len += strlen(DatumGetCString(range->values[i])) + 1;
638 }
639 }
640 else /* fixed-length types (even by-reference) */
641 {
642 Assert(typlen > 0);
643 len += nvalues * typlen;
644 }
645
646 /*
647 * Allocate the serialized object, copy the basic information. The
648 * serialized object is a varlena, so update the header.
649 */
650 serialized = (SerializedRanges *) palloc0(len);
651 SET_VARSIZE(serialized, len);
652
653 serialized->typid = typid;
654 serialized->nranges = range->nranges;
655 serialized->nvalues = range->nvalues;
656 serialized->maxvalues = range->target_maxvalues;
657
658 /*
659 * And now copy also the boundary values (like the length calculation this
660 * depends on the particular data type).
661 */
662 ptr = serialized->data; /* start of the serialized data */
663
664 for (int i = 0; i < nvalues; i++)
665 {
666 if (typbyval) /* simple by-value data types */
667 {
668 Datum tmp;
669
670 /*
671 * For byval types, we need to copy just the significant bytes -
672 * we can't use memcpy directly, as that assumes little-endian
673 * behavior. store_att_byval does almost what we need, but it
674 * requires a properly aligned buffer - the output buffer does not
675 * guarantee that. So we simply use a local Datum variable (which
676 * guarantees proper alignment), and then copy the value from it.
677 */
678 store_att_byval(&tmp, range->values[i], typlen);
679
680 memcpy(ptr, &tmp, typlen);
681 ptr += typlen;
682 }
683 else if (typlen > 0) /* fixed-length by-ref types */
684 {
685 memcpy(ptr, DatumGetPointer(range->values[i]), typlen);
686 ptr += typlen;
687 }
688 else if (typlen == -1) /* varlena */
689 {
690 int tmp = VARSIZE_ANY(DatumGetPointer(range->values[i]));
691
692 memcpy(ptr, DatumGetPointer(range->values[i]), tmp);
693 ptr += tmp;
694 }
695 else if (typlen == -2) /* cstring */
696 {
697 int tmp = strlen(DatumGetCString(range->values[i])) + 1;
698
699 memcpy(ptr, DatumGetCString(range->values[i]), tmp);
700 ptr += tmp;
701 }
702
703 /* make sure we haven't overflown the buffer end */
704 Assert(ptr <= ((char *) serialized + len));
705 }
706
707 /* exact size */
708 Assert(ptr == ((char *) serialized + len));
709
710 return serialized;
711}
712
713/*
714 * brin_range_deserialize
715 * Serialize the in-memory representation into a compact varlena value.
716 *
717 * Simply copy the header and then also the individual values, as stored
718 * in the in-memory value array.
719 */
720static Ranges *
721brin_range_deserialize(int maxvalues, SerializedRanges *serialized)
722{
723 int i,
724 nvalues;
725 char *ptr,
726 *dataptr;
727 bool typbyval;
728 int typlen;
729 Size datalen;
730
731 Ranges *range;
732
733 Assert(serialized->nranges >= 0);
734 Assert(serialized->nvalues >= 0);
735 Assert(serialized->maxvalues > 0);
736
737 nvalues = 2 * serialized->nranges + serialized->nvalues;
738
739 Assert(nvalues <= serialized->maxvalues);
740 Assert(serialized->maxvalues <= maxvalues);
741
742 range = minmax_multi_init(maxvalues);
743
744 /* copy the header info */
745 range->nranges = serialized->nranges;
746 range->nvalues = serialized->nvalues;
747 range->nsorted = serialized->nvalues;
748 range->maxvalues = maxvalues;
749 range->target_maxvalues = serialized->maxvalues;
750
751 range->typid = serialized->typid;
752
753 typbyval = get_typbyval(serialized->typid);
754 typlen = get_typlen(serialized->typid);
755
756 /*
757 * And now deconstruct the values into Datum array. We have to copy the
758 * data because the serialized representation ignores alignment, and we
759 * don't want to rely on it being kept around anyway.
760 */
761 ptr = serialized->data;
762
763 /*
764 * We don't want to allocate many pieces, so we just allocate everything
765 * in one chunk. How much space will we need?
766 *
767 * XXX We don't need to copy simple by-value data types.
768 */
769 datalen = 0;
770 dataptr = NULL;
771 for (i = 0; (i < nvalues) && (!typbyval); i++)
772 {
773 if (typlen > 0) /* fixed-length by-ref types */
774 datalen += MAXALIGN(typlen);
775 else if (typlen == -1) /* varlena */
776 {
777 datalen += MAXALIGN(VARSIZE_ANY(ptr));
778 ptr += VARSIZE_ANY(ptr);
779 }
780 else if (typlen == -2) /* cstring */
781 {
782 Size slen = strlen(ptr) + 1;
783
784 datalen += MAXALIGN(slen);
785 ptr += slen;
786 }
787 }
788
789 if (datalen > 0)
790 dataptr = palloc(datalen);
791
792 /*
793 * Restore the source pointer (might have been modified when calculating
794 * the space we need to allocate).
795 */
796 ptr = serialized->data;
797
798 for (i = 0; i < nvalues; i++)
799 {
800 if (typbyval) /* simple by-value data types */
801 {
802 Datum v = 0;
803
804 memcpy(&v, ptr, typlen);
805
806 range->values[i] = fetch_att(&v, true, typlen);
807 ptr += typlen;
808 }
809 else if (typlen > 0) /* fixed-length by-ref types */
810 {
811 range->values[i] = PointerGetDatum(dataptr);
812
813 memcpy(dataptr, ptr, typlen);
814 dataptr += MAXALIGN(typlen);
815
816 ptr += typlen;
817 }
818 else if (typlen == -1) /* varlena */
819 {
820 range->values[i] = PointerGetDatum(dataptr);
821
822 memcpy(dataptr, ptr, VARSIZE_ANY(ptr));
823 dataptr += MAXALIGN(VARSIZE_ANY(ptr));
824 ptr += VARSIZE_ANY(ptr);
825 }
826 else if (typlen == -2) /* cstring */
827 {
828 Size slen = strlen(ptr) + 1;
829
830 range->values[i] = PointerGetDatum(dataptr);
831
832 memcpy(dataptr, ptr, slen);
833 dataptr += MAXALIGN(slen);
834 ptr += slen;
835 }
836
837 /* make sure we haven't overflown the buffer end */
838 Assert(ptr <= ((char *) serialized + VARSIZE_ANY(serialized)));
839 }
840
841 /* should have consumed the whole input value exactly */
842 Assert(ptr == ((char *) serialized + VARSIZE_ANY(serialized)));
843
844 /* return the deserialized value */
845 return range;
846}
847
848/*
849 * compare_expanded_ranges
850 * Compare the expanded ranges - first by minimum, then by maximum.
851 *
852 * We do guarantee that ranges in a single Ranges object do not overlap, so it
853 * may seem strange that we don't order just by minimum. But when merging two
854 * Ranges (which happens in the union function), the ranges may in fact
855 * overlap. So we do compare both.
856 */
857static int
858compare_expanded_ranges(const void *a, const void *b, void *arg)
859{
860 ExpandedRange *ra = (ExpandedRange *) a;
861 ExpandedRange *rb = (ExpandedRange *) b;
862 Datum r;
863
865
866 /* first compare minvals */
867 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->minval, rb->minval);
868
869 if (DatumGetBool(r))
870 return -1;
871
872 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->minval, ra->minval);
873
874 if (DatumGetBool(r))
875 return 1;
876
877 /* then compare maxvals */
878 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, ra->maxval, rb->maxval);
879
880 if (DatumGetBool(r))
881 return -1;
882
883 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, rb->maxval, ra->maxval);
884
885 if (DatumGetBool(r))
886 return 1;
887
888 return 0;
889}
890
891/*
892 * compare_values
893 * Compare the values.
894 */
895static int
896compare_values(const void *a, const void *b, void *arg)
897{
898 Datum *da = (Datum *) a;
899 Datum *db = (Datum *) b;
900 Datum r;
901
903
904 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *da, *db);
905
906 if (DatumGetBool(r))
907 return -1;
908
909 r = FunctionCall2Coll(cxt->cmpFn, cxt->colloid, *db, *da);
910
911 if (DatumGetBool(r))
912 return 1;
913
914 return 0;
915}
916
917/*
918 * Check if the new value matches one of the existing ranges.
919 */
920static bool
921has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges,
922 Datum newval, AttrNumber attno, Oid typid)
923{
924 Datum compar;
925
926 Datum minvalue;
927 Datum maxvalue;
928
929 FmgrInfo *cmpLessFn;
930 FmgrInfo *cmpGreaterFn;
931
932 /* binary search on ranges */
933 int start,
934 end;
935
936 if (ranges->nranges == 0)
937 return false;
938
939 minvalue = ranges->values[0];
940 maxvalue = ranges->values[2 * ranges->nranges - 1];
941
942 /*
943 * Otherwise, need to compare the new value with boundaries of all the
944 * ranges. First check if it's less than the absolute minimum, which is
945 * the first value in the array.
946 */
947 cmpLessFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
949 compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
950
951 /* smaller than the smallest value in the range list */
952 if (DatumGetBool(compar))
953 return false;
954
955 /*
956 * And now compare it to the existing maximum (last value in the data
957 * array). But only if we haven't already ruled out a possible match in
958 * the minvalue check.
959 */
960 cmpGreaterFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
962 compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
963
964 if (DatumGetBool(compar))
965 return false;
966
967 /*
968 * So we know it's in the general min/max, the question is whether it
969 * falls in one of the ranges or gaps. We'll do a binary search on
970 * individual ranges - for each range we check equality (value falls into
971 * the range), and then check ranges either above or below the current
972 * range.
973 */
974 start = 0; /* first range */
975 end = (ranges->nranges - 1); /* last range */
976 while (true)
977 {
978 int midpoint = (start + end) / 2;
979
980 /* this means we ran out of ranges in the last step */
981 if (start > end)
982 return false;
983
984 /* copy the min/max values from the ranges */
985 minvalue = ranges->values[2 * midpoint];
986 maxvalue = ranges->values[2 * midpoint + 1];
987
988 /*
989 * Is the value smaller than the minval? If yes, we'll recurse to the
990 * left side of range array.
991 */
992 compar = FunctionCall2Coll(cmpLessFn, colloid, newval, minvalue);
993
994 /* smaller than the smallest value in this range */
995 if (DatumGetBool(compar))
996 {
997 end = (midpoint - 1);
998 continue;
999 }
1000
1001 /*
1002 * Is the value greater than the minval? If yes, we'll recurse to the
1003 * right side of range array.
1004 */
1005 compar = FunctionCall2Coll(cmpGreaterFn, colloid, newval, maxvalue);
1006
1007 /* larger than the largest value in this range */
1008 if (DatumGetBool(compar))
1009 {
1010 start = (midpoint + 1);
1011 continue;
1012 }
1013
1014 /* hey, we found a matching range */
1015 return true;
1016 }
1017
1018 return false;
1019}
1020
1021
1022/*
1023 * range_contains_value
1024 * See if the new value is already contained in the range list.
1025 *
1026 * We first inspect the list of intervals. We use a small trick - we check
1027 * the value against min/max of the whole range (min of the first interval,
1028 * max of the last one) first, and only inspect the individual intervals if
1029 * this passes.
1030 *
1031 * If the value matches none of the intervals, we check the exact values.
1032 * We simply loop through them and invoke equality operator on them.
1033 *
1034 * The last parameter (full) determines whether we need to search all the
1035 * values, including the unsorted part. With full=false, the unsorted part
1036 * is not searched, which may produce false negatives and duplicate values
1037 * (in the unsorted part only), but when we're building the range that's
1038 * fine - we'll deduplicate before serialization, and it can only happen
1039 * if there already are unsorted values (so it was already modified).
1040 *
1041 * Serialized ranges don't have any unsorted values, so this can't cause
1042 * false negatives during querying.
1043 */
1044static bool
1046 AttrNumber attno, Form_pg_attribute attr,
1047 Ranges *ranges, Datum newval, bool full)
1048{
1049 int i;
1050 FmgrInfo *cmpEqualFn;
1051 Oid typid = attr->atttypid;
1052
1053 /*
1054 * First inspect the ranges, if there are any. We first check the whole
1055 * range, and only when there's still a chance of getting a match we
1056 * inspect the individual ranges.
1057 */
1058 if (has_matching_range(bdesc, colloid, ranges, newval, attno, typid))
1059 return true;
1060
1061 cmpEqualFn = minmax_multi_get_strategy_procinfo(bdesc, attno, typid,
1063
1064 /*
1065 * There is no matching range, so let's inspect the sorted values.
1066 *
1067 * We do a sequential search for small numbers of values, and binary
1068 * search once we have more than 16 values. This threshold is somewhat
1069 * arbitrary, as it depends on how expensive the comparison function is.
1070 *
1071 * XXX If we use the threshold here, maybe we should do the same thing in
1072 * has_matching_range? Or maybe we should do the bin search all the time?
1073 *
1074 * XXX We could use the same optimization as for ranges, to check if the
1075 * value is between min/max, to maybe rule out all sorted values without
1076 * having to inspect all of them.
1077 */
1078 if (ranges->nsorted >= 16)
1079 {
1080 compare_context cxt;
1081
1082 cxt.colloid = ranges->colloid;
1083 cxt.cmpFn = ranges->cmp;
1084
1085 if (bsearch_arg(&newval, &ranges->values[2 * ranges->nranges],
1086 ranges->nsorted, sizeof(Datum),
1087 compare_values, &cxt) != NULL)
1088 return true;
1089 }
1090 else
1091 {
1092 for (i = 2 * ranges->nranges; i < 2 * ranges->nranges + ranges->nsorted; i++)
1093 {
1094 Datum compar;
1095
1096 compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
1097
1098 /* found an exact match */
1099 if (DatumGetBool(compar))
1100 return true;
1101 }
1102 }
1103
1104 /* If not asked to inspect the unsorted part, we're done. */
1105 if (!full)
1106 return false;
1107
1108 /* Inspect the unsorted part. */
1109 for (i = 2 * ranges->nranges + ranges->nsorted; i < 2 * ranges->nranges + ranges->nvalues; i++)
1110 {
1111 Datum compar;
1112
1113 compar = FunctionCall2Coll(cmpEqualFn, colloid, newval, ranges->values[i]);
1114
1115 /* found an exact match */
1116 if (DatumGetBool(compar))
1117 return true;
1118 }
1119
1120 /* the value is not covered by this BRIN tuple */
1121 return false;
1122}
1123
1124/*
1125 * Expand ranges from Ranges into ExpandedRange array. This expects the
1126 * eranges to be pre-allocated and with the correct size - there needs to be
1127 * (nranges + nvalues) elements.
1128 *
1129 * The order of expanded ranges is arbitrary. We do expand the ranges first,
1130 * and this part is sorted. But then we expand the values, and this part may
1131 * be unsorted.
1132 */
1133static void
1134fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
1135{
1136 int idx;
1137 int i;
1138
1139 /* Check that the output array has the right size. */
1140 Assert(neranges == (ranges->nranges + ranges->nvalues));
1141
1142 idx = 0;
1143 for (i = 0; i < ranges->nranges; i++)
1144 {
1145 eranges[idx].minval = ranges->values[2 * i];
1146 eranges[idx].maxval = ranges->values[2 * i + 1];
1147 eranges[idx].collapsed = false;
1148 idx++;
1149
1150 Assert(idx <= neranges);
1151 }
1152
1153 for (i = 0; i < ranges->nvalues; i++)
1154 {
1155 eranges[idx].minval = ranges->values[2 * ranges->nranges + i];
1156 eranges[idx].maxval = ranges->values[2 * ranges->nranges + i];
1157 eranges[idx].collapsed = true;
1158 idx++;
1159
1160 Assert(idx <= neranges);
1161 }
1162
1163 /* Did we produce the expected number of elements? */
1164 Assert(idx == neranges);
1165
1166 return;
1167}
1168
1169/*
1170 * Sort and deduplicate expanded ranges.
1171 *
1172 * The ranges may be deduplicated - we're simply appending values, without
1173 * checking for duplicates etc. So maybe the deduplication will reduce the
1174 * number of ranges enough, and we won't have to compute the distances etc.
1175 *
1176 * Returns the number of expanded ranges.
1177 */
1178static int
1180 ExpandedRange *eranges, int neranges)
1181{
1182 int n;
1183 int i;
1184 compare_context cxt;
1185
1186 Assert(neranges > 0);
1187
1188 /* sort the values */
1189 cxt.colloid = colloid;
1190 cxt.cmpFn = cmp;
1191
1192 /*
1193 * XXX We do qsort on all the values, but we could also leverage the fact
1194 * that some of the input data is already sorted (all the ranges and maybe
1195 * some of the points) and do merge sort.
1196 */
1197 qsort_arg(eranges, neranges, sizeof(ExpandedRange),
1199
1200 /*
1201 * Deduplicate the ranges - simply compare each range to the preceding
1202 * one, and skip the duplicate ones.
1203 */
1204 n = 1;
1205 for (i = 1; i < neranges; i++)
1206 {
1207 /* if the current range is equal to the preceding one, do nothing */
1208 if (!compare_expanded_ranges(&eranges[i - 1], &eranges[i], &cxt))
1209 continue;
1210
1211 /* otherwise, copy it to n-th place (if not already there) */
1212 if (i != n)
1213 memcpy(&eranges[n], &eranges[i], sizeof(ExpandedRange));
1214
1215 n++;
1216 }
1217
1218 Assert((n > 0) && (n <= neranges));
1219
1220 return n;
1221}
1222
1223/*
1224 * When combining multiple Range values (in union function), some of the
1225 * ranges may overlap. We simply merge the overlapping ranges to fix that.
1226 *
1227 * XXX This assumes the expanded ranges were previously sorted (by minval
1228 * and then maxval). We leverage this when detecting overlap.
1229 */
1230static int
1232 ExpandedRange *eranges, int neranges)
1233{
1234 int idx;
1235
1236 /* Merge ranges (idx) and (idx+1) if they overlap. */
1237 idx = 0;
1238 while (idx < (neranges - 1))
1239 {
1240 Datum r;
1241
1242 /*
1243 * comparing [?,maxval] vs. [minval,?] - the ranges overlap if (minval
1244 * < maxval)
1245 */
1246 r = FunctionCall2Coll(cmp, colloid,
1247 eranges[idx].maxval,
1248 eranges[idx + 1].minval);
1249
1250 /*
1251 * Nope, maxval < minval, so no overlap. And we know the ranges are
1252 * ordered, so there are no more overlaps, because all the remaining
1253 * ranges have greater or equal minval.
1254 */
1255 if (DatumGetBool(r))
1256 {
1257 /* proceed to the next range */
1258 idx += 1;
1259 continue;
1260 }
1261
1262 /*
1263 * So ranges 'idx' and 'idx+1' do overlap, but we don't know if
1264 * 'idx+1' is contained in 'idx', or if they overlap only partially.
1265 * So compare the upper bounds and keep the larger one.
1266 */
1267 r = FunctionCall2Coll(cmp, colloid,
1268 eranges[idx].maxval,
1269 eranges[idx + 1].maxval);
1270
1271 if (DatumGetBool(r))
1272 eranges[idx].maxval = eranges[idx + 1].maxval;
1273
1274 /*
1275 * The range certainly is no longer collapsed (irrespectively of the
1276 * previous state).
1277 */
1278 eranges[idx].collapsed = false;
1279
1280 /*
1281 * Now get rid of the (idx+1) range entirely by shifting the remaining
1282 * ranges by 1. There are neranges elements, and we need to move
1283 * elements from (idx+2). That means the number of elements to move is
1284 * [ncranges - (idx+2)].
1285 */
1286 memmove(&eranges[idx + 1], &eranges[idx + 2],
1287 (neranges - (idx + 2)) * sizeof(ExpandedRange));
1288
1289 /*
1290 * Decrease the number of ranges, and repeat (with the same range, as
1291 * it might overlap with additional ranges thanks to the merge).
1292 */
1293 neranges--;
1294 }
1295
1296 return neranges;
1297}
1298
1299/*
1300 * Simple comparator for distance values, comparing the double value.
1301 * This is intentionally sorting the distances in descending order, i.e.
1302 * the longer gaps will be at the front.
1303 */
1304static int
1305compare_distances(const void *a, const void *b)
1306{
1307 DistanceValue *da = (DistanceValue *) a;
1308 DistanceValue *db = (DistanceValue *) b;
1309
1310 if (da->value < db->value)
1311 return 1;
1312 else if (da->value > db->value)
1313 return -1;
1314
1315 return 0;
1316}
1317
1318/*
1319 * Given an array of expanded ranges, compute size of the gaps between each
1320 * range. For neranges there are (neranges-1) gaps.
1321 *
1322 * We simply call the "distance" function to compute the (max-min) for pairs
1323 * of consecutive ranges. The function may be fairly expensive, so we do that
1324 * just once (and then use it to pick as many ranges to merge as possible).
1325 *
1326 * See reduce_expanded_ranges for details.
1327 */
1328static DistanceValue *
1329build_distances(FmgrInfo *distanceFn, Oid colloid,
1330 ExpandedRange *eranges, int neranges)
1331{
1332 int i;
1333 int ndistances;
1334 DistanceValue *distances;
1335
1336 Assert(neranges > 0);
1337
1338 /* If there's only a single range, there's no distance to calculate. */
1339 if (neranges == 1)
1340 return NULL;
1341
1342 ndistances = (neranges - 1);
1343 distances = (DistanceValue *) palloc0(sizeof(DistanceValue) * ndistances);
1344
1345 /*
1346 * Walk through the ranges once and compute the distance between the
1347 * ranges so that we can sort them once.
1348 */
1349 for (i = 0; i < ndistances; i++)
1350 {
1351 Datum a1,
1352 a2,
1353 r;
1354
1355 a1 = eranges[i].maxval;
1356 a2 = eranges[i + 1].minval;
1357
1358 /* compute length of the gap (between max/min) */
1359 r = FunctionCall2Coll(distanceFn, colloid, a1, a2);
1360
1361 /* remember the index of the gap the distance is for */
1362 distances[i].index = i;
1363 distances[i].value = DatumGetFloat8(r);
1364 }
1365
1366 /*
1367 * Sort the distances in descending order, so that the longest gaps are at
1368 * the front.
1369 */
1370 qsort(distances, ndistances, sizeof(DistanceValue), compare_distances);
1371
1372 return distances;
1373}
1374
1375/*
1376 * Builds expanded ranges for the existing ranges (and single-point ranges),
1377 * and also the new value (which did not fit into the array). This expanded
1378 * representation makes the processing a bit easier, as it allows handling
1379 * ranges and points the same way.
1380 *
1381 * We sort and deduplicate the expanded ranges - this is necessary, because
1382 * the points may be unsorted. And moreover the two parts (ranges and
1383 * points) are sorted on their own.
1384 */
1385static ExpandedRange *
1387 int *nranges)
1388{
1389 int neranges;
1390 ExpandedRange *eranges;
1391
1392 /* both ranges and points are expanded into a separate element */
1393 neranges = ranges->nranges + ranges->nvalues;
1394
1395 eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
1396
1397 /* fill the expanded ranges */
1398 fill_expanded_ranges(eranges, neranges, ranges);
1399
1400 /* sort and deduplicate the expanded ranges */
1401 neranges = sort_expanded_ranges(cmp, colloid, eranges, neranges);
1402
1403 /* remember how many ranges we built */
1404 *nranges = neranges;
1405
1406 return eranges;
1407}
1408
1409#ifdef USE_ASSERT_CHECKING
1410/*
1411 * Counts boundary values needed to store the ranges. Each single-point
1412 * range is stored using a single value, each regular range needs two.
1413 */
1414static int
1415count_values(ExpandedRange *cranges, int ncranges)
1416{
1417 int i;
1418 int count;
1419
1420 count = 0;
1421 for (i = 0; i < ncranges; i++)
1422 {
1423 if (cranges[i].collapsed)
1424 count += 1;
1425 else
1426 count += 2;
1427 }
1428
1429 return count;
1430}
1431#endif
1432
1433/*
1434 * reduce_expanded_ranges
1435 * reduce the ranges until the number of values is low enough
1436 *
1437 * Combines ranges until the number of boundary values drops below the
1438 * threshold specified by max_values. This happens by merging enough
1439 * ranges by the distance between them.
1440 *
1441 * Returns the number of result ranges.
1442 *
1443 * We simply use the global min/max and then add boundaries for enough
1444 * largest gaps. Each gap adds 2 values, so we simply use (target/2-1)
1445 * distances. Then we simply sort all the values - each two values are
1446 * a boundary of a range (possibly collapsed).
1447 *
1448 * XXX Some of the ranges may be collapsed (i.e. the min/max values are
1449 * equal), but we ignore that for now. We could repeat the process,
1450 * adding a couple more gaps recursively.
1451 *
1452 * XXX The ranges to merge are selected solely using the distance. But
1453 * that may not be the best strategy, for example when multiple gaps
1454 * are of equal (or very similar) length.
1455 *
1456 * Consider for example points 1, 2, 3, .., 64, which have gaps of the
1457 * same length 1 of course. In that case, we tend to pick the first
1458 * gap of that length, which leads to this:
1459 *
1460 * step 1: [1, 2], 3, 4, 5, .., 64
1461 * step 2: [1, 3], 4, 5, .., 64
1462 * step 3: [1, 4], 5, .., 64
1463 * ...
1464 *
1465 * So in the end we'll have one "large" range and multiple small points.
1466 * That may be fine, but it seems a bit strange and non-optimal. Maybe
1467 * we should consider other things when picking ranges to merge - e.g.
1468 * length of the ranges? Or perhaps randomize the choice of ranges, with
1469 * probability inversely proportional to the distance (the gap lengths
1470 * may be very close, but not exactly the same).
1471 *
1472 * XXX Or maybe we could just handle this by using random value as a
1473 * tie-break, or by adding random noise to the actual distance.
1474 */
1475static int
1477 DistanceValue *distances, int max_values,
1478 FmgrInfo *cmp, Oid colloid)
1479{
1480 int i;
1481 int nvalues;
1482 Datum *values;
1483
1484 compare_context cxt;
1485
1486 /* total number of gaps between ranges */
1487 int ndistances = (neranges - 1);
1488
1489 /* number of gaps to keep */
1490 int keep = (max_values / 2 - 1);
1491
1492 /*
1493 * Maybe we have a sufficiently low number of ranges already?
1494 *
1495 * XXX This should happen before we actually do the expensive stuff like
1496 * sorting, so maybe this should be just an assert.
1497 */
1498 if (keep >= ndistances)
1499 return neranges;
1500
1501 /* sort the values */
1502 cxt.colloid = colloid;
1503 cxt.cmpFn = cmp;
1504
1505 /* allocate space for the boundary values */
1506 nvalues = 0;
1507 values = (Datum *) palloc(sizeof(Datum) * max_values);
1508
1509 /* add the global min/max values, from the first/last range */
1510 values[nvalues++] = eranges[0].minval;
1511 values[nvalues++] = eranges[neranges - 1].maxval;
1512
1513 /* add boundary values for enough gaps */
1514 for (i = 0; i < keep; i++)
1515 {
1516 /* index of the gap between (index) and (index+1) ranges */
1517 int index = distances[i].index;
1518
1519 Assert((index >= 0) && ((index + 1) < neranges));
1520
1521 /* add max from the preceding range, minval from the next one */
1522 values[nvalues++] = eranges[index].maxval;
1523 values[nvalues++] = eranges[index + 1].minval;
1524
1525 Assert(nvalues <= max_values);
1526 }
1527
1528 /* We should have an even number of range values. */
1529 Assert(nvalues % 2 == 0);
1530
1531 /*
1532 * Sort the values using the comparator function, and form ranges from the
1533 * sorted result.
1534 */
1535 qsort_arg(values, nvalues, sizeof(Datum),
1536 compare_values, &cxt);
1537
1538 /* We have nvalues boundary values, which means nvalues/2 ranges. */
1539 for (i = 0; i < (nvalues / 2); i++)
1540 {
1541 eranges[i].minval = values[2 * i];
1542 eranges[i].maxval = values[2 * i + 1];
1543
1544 /* if the boundary values are the same, it's a collapsed range */
1545 eranges[i].collapsed = (compare_values(&values[2 * i],
1546 &values[2 * i + 1],
1547 &cxt) == 0);
1548 }
1549
1550 return (nvalues / 2);
1551}
1552
1553/*
1554 * Store the boundary values from ExpandedRanges back into 'ranges' (using
1555 * only the minimal number of values needed).
1556 */
1557static void
1558store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
1559{
1560 int i;
1561 int idx = 0;
1562
1563 /* first copy in the regular ranges */
1564 ranges->nranges = 0;
1565 for (i = 0; i < neranges; i++)
1566 {
1567 if (!eranges[i].collapsed)
1568 {
1569 ranges->values[idx++] = eranges[i].minval;
1570 ranges->values[idx++] = eranges[i].maxval;
1571 ranges->nranges++;
1572 }
1573 }
1574
1575 /* now copy in the collapsed ones */
1576 ranges->nvalues = 0;
1577 for (i = 0; i < neranges; i++)
1578 {
1579 if (eranges[i].collapsed)
1580 {
1581 ranges->values[idx++] = eranges[i].minval;
1582 ranges->nvalues++;
1583 }
1584 }
1585
1586 /* all the values are sorted */
1587 ranges->nsorted = ranges->nvalues;
1588
1589 Assert(count_values(eranges, neranges) == 2 * ranges->nranges + ranges->nvalues);
1590 Assert(2 * ranges->nranges + ranges->nvalues <= ranges->maxvalues);
1591}
1592
1593
1594/*
1595 * Consider freeing space in the ranges. Checks if there's space for at least
1596 * one new value, and performs compaction if needed.
1597 *
1598 * Returns true if the value was actually modified.
1599 */
1600static bool
1602 AttrNumber attno, Form_pg_attribute attr,
1603 Ranges *range)
1604{
1605 MemoryContext ctx;
1606 MemoryContext oldctx;
1607
1608 FmgrInfo *cmpFn,
1609 *distanceFn;
1610
1611 /* expanded ranges */
1612 ExpandedRange *eranges;
1613 int neranges;
1614 DistanceValue *distances;
1615
1616 /*
1617 * If there is free space in the buffer, we're done without having to
1618 * modify anything.
1619 */
1620 if (2 * range->nranges + range->nvalues < range->maxvalues)
1621 return false;
1622
1623 /* we'll certainly need the comparator, so just look it up now */
1624 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
1626
1627 /* deduplicate values, if there's an unsorted part */
1629
1630 /*
1631 * Did we reduce enough free space by just the deduplication?
1632 *
1633 * We don't simply check against range->maxvalues again. The deduplication
1634 * might have freed very little space (e.g. just one value), forcing us to
1635 * do deduplication very often. In that case, it's better to do the
1636 * compaction and reduce more space.
1637 */
1638 if (2 * range->nranges + range->nvalues <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR)
1639 return true;
1640
1641 /*
1642 * We need to combine some of the existing ranges, to reduce the number of
1643 * values we have to store.
1644 *
1645 * The distanceFn calls (which may internally call e.g. numeric_le) may
1646 * allocate quite a bit of memory, and we must not leak it (we might have
1647 * to do this repeatedly, even for a single BRIN page range). Otherwise
1648 * we'd have problems e.g. when building new indexes. So we use a memory
1649 * context and make sure we free the memory at the end (so if we call the
1650 * distance function many times, it might be an issue, but meh).
1651 */
1653 "minmax-multi context",
1655
1656 oldctx = MemoryContextSwitchTo(ctx);
1657
1658 /* build the expanded ranges */
1659 eranges = build_expanded_ranges(cmpFn, colloid, range, &neranges);
1660
1661 /* Is the expanded representation of ranges correct? */
1662 AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
1663
1664 /* and we'll also need the 'distance' procedure */
1665 distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
1666
1667 /* build array of gap distances and sort them in ascending order */
1668 distances = build_distances(distanceFn, colloid, eranges, neranges);
1669
1670 /*
1671 * Combine ranges until we release at least 50% of the space. This
1672 * threshold is somewhat arbitrary, perhaps needs tuning. We must not use
1673 * too low or high value.
1674 */
1675 neranges = reduce_expanded_ranges(eranges, neranges, distances,
1676 range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR,
1677 cmpFn, colloid);
1678
1679 /* Is the result of reducing expanded ranges correct? */
1680 AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
1681
1682 /* Make sure we've sufficiently reduced the number of ranges. */
1683 Assert(count_values(eranges, neranges) <= range->maxvalues * MINMAX_BUFFER_LOAD_FACTOR);
1684
1685 /* decompose the expanded ranges into regular ranges and single values */
1686 store_expanded_ranges(range, eranges, neranges);
1687
1688 MemoryContextSwitchTo(oldctx);
1690
1691 /* Did we break the ranges somehow? */
1692 AssertCheckRanges(range, cmpFn, colloid);
1693
1694 return true;
1695}
1696
1697/*
1698 * range_add_value
1699 * Add the new value to the minmax-multi range.
1700 */
1701static bool
1703 AttrNumber attno, Form_pg_attribute attr,
1704 Ranges *ranges, Datum newval)
1705{
1706 FmgrInfo *cmpFn;
1707 bool modified = false;
1708
1709 /* we'll certainly need the comparator, so just look it up now */
1710 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
1712
1713 /* comprehensive checks of the input ranges */
1714 AssertCheckRanges(ranges, cmpFn, colloid);
1715
1716 /*
1717 * Make sure there's enough free space in the buffer. We only trigger this
1718 * when the buffer is full, which means it had to be modified as we size
1719 * it to be larger than what is stored on disk.
1720 *
1721 * This needs to happen before we check if the value is contained in the
1722 * range, because the value might be in the unsorted part, and we don't
1723 * check that in range_contains_value. The deduplication would then move
1724 * it to the sorted part, and we'd add the value too, which violates the
1725 * rule that we never have duplicates with the ranges or sorted values.
1726 *
1727 * We might also deduplicate and recheck if the value is contained, but
1728 * that seems like overkill. We'd need to deduplicate anyway, so why not
1729 * do it now.
1730 */
1731 modified = ensure_free_space_in_buffer(bdesc, colloid,
1732 attno, attr, ranges);
1733
1734 /*
1735 * Bail out if the value already is covered by the range.
1736 *
1737 * We could also add values until we hit values_per_range, and then do the
1738 * deduplication in a batch, hoping for better efficiency. But that would
1739 * mean we actually modify the range every time, which means having to
1740 * serialize the value, which does palloc, walks the values, copies them,
1741 * etc. Not exactly cheap.
1742 *
1743 * So instead we do the check, which should be fairly cheap - assuming the
1744 * comparator function is not very expensive.
1745 *
1746 * This also implies the values array can't contain duplicate values.
1747 */
1748 if (range_contains_value(bdesc, colloid, attno, attr, ranges, newval, false))
1749 return modified;
1750
1751 /* Make a copy of the value, if needed. */
1752 newval = datumCopy(newval, attr->attbyval, attr->attlen);
1753
1754 /*
1755 * If there's space in the values array, copy it in and we're done.
1756 *
1757 * We do want to keep the values sorted (to speed up searches), so we do a
1758 * simple insertion sort. We could do something more elaborate, e.g. by
1759 * sorting the values only now and then, but for small counts (e.g. when
1760 * maxvalues is 64) this should be fine.
1761 */
1762 ranges->values[2 * ranges->nranges + ranges->nvalues] = newval;
1763 ranges->nvalues++;
1764
1765 /* If we added the first value, we can consider it as sorted. */
1766 if (ranges->nvalues == 1)
1767 ranges->nsorted = 1;
1768
1769 /*
1770 * Check we haven't broken the ordering of boundary values (checks both
1771 * parts, but that doesn't hurt).
1772 */
1773 AssertCheckRanges(ranges, cmpFn, colloid);
1774
1775 /* Check the range contains the value we just added. */
1776 Assert(range_contains_value(bdesc, colloid, attno, attr, ranges, newval, true));
1777
1778 /* yep, we've modified the range */
1779 return true;
1780}
1781
1782/*
1783 * Generate range representation of data collected during "batch mode".
1784 * This is similar to reduce_expanded_ranges, except that we can't assume
1785 * the values are sorted and there may be duplicate values.
1786 */
1787static void
1788compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
1789{
1790 FmgrInfo *cmpFn,
1791 *distanceFn;
1792
1793 /* expanded ranges */
1794 ExpandedRange *eranges;
1795 int neranges;
1796 DistanceValue *distances;
1797
1798 MemoryContext ctx;
1799 MemoryContext oldctx;
1800
1801 /*
1802 * Do we need to actually compactify anything?
1803 *
1804 * There are two reasons why compaction may be needed - firstly, there may
1805 * be too many values, or some of the values may be unsorted.
1806 */
1807 if ((ranges->nranges * 2 + ranges->nvalues <= max_values) &&
1808 (ranges->nsorted == ranges->nvalues))
1809 return;
1810
1811 /* we'll certainly need the comparator, so just look it up now */
1812 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, ranges->attno, ranges->typid,
1814
1815 /* and we'll also need the 'distance' procedure */
1816 distanceFn = minmax_multi_get_procinfo(bdesc, ranges->attno, PROCNUM_DISTANCE);
1817
1818 /*
1819 * The distanceFn calls (which may internally call e.g. numeric_le) may
1820 * allocate quite a bit of memory, and we must not leak it. Otherwise,
1821 * we'd have problems e.g. when building indexes. So we create a local
1822 * memory context and make sure we free the memory before leaving this
1823 * function (not after every call).
1824 */
1826 "minmax-multi context",
1828
1829 oldctx = MemoryContextSwitchTo(ctx);
1830
1831 /* build the expanded ranges */
1832 eranges = build_expanded_ranges(cmpFn, ranges->colloid, ranges, &neranges);
1833
1834 /* build array of gap distances and sort them in ascending order */
1835 distances = build_distances(distanceFn, ranges->colloid,
1836 eranges, neranges);
1837
1838 /*
1839 * Combine ranges until we get below max_values. We don't use any scale
1840 * factor, because this is used during serialization, and we don't expect
1841 * more tuples to be inserted anytime soon.
1842 */
1843 neranges = reduce_expanded_ranges(eranges, neranges, distances,
1844 max_values, cmpFn, ranges->colloid);
1845
1846 Assert(count_values(eranges, neranges) <= max_values);
1847
1848 /* transform back into regular ranges and single values */
1849 store_expanded_ranges(ranges, eranges, neranges);
1850
1851 /* check all the range invariants */
1852 AssertCheckRanges(ranges, cmpFn, ranges->colloid);
1853
1854 MemoryContextSwitchTo(oldctx);
1856}
1857
1858Datum
1860{
1861 BrinOpcInfo *result;
1862
1863 /*
1864 * opaque->strategy_procinfos is initialized lazily; here it is set to
1865 * all-uninitialized by palloc0 which sets fn_oid to InvalidOid.
1866 */
1867
1868 result = palloc0(MAXALIGN(SizeofBrinOpcInfo(1)) +
1869 sizeof(MinmaxMultiOpaque));
1870 result->oi_nstored = 1;
1871 result->oi_regular_nulls = true;
1872 result->oi_opaque = (MinmaxMultiOpaque *)
1873 MAXALIGN((char *) result + SizeofBrinOpcInfo(1));
1874 result->oi_typcache[0] = lookup_type_cache(PG_BRIN_MINMAX_MULTI_SUMMARYOID, 0);
1875
1876 PG_RETURN_POINTER(result);
1877}
1878
1879/*
1880 * Compute the distance between two float4 values (plain subtraction).
1881 */
1882Datum
1884{
1885 float a1 = PG_GETARG_FLOAT4(0);
1886 float a2 = PG_GETARG_FLOAT4(1);
1887
1888 /* if both values are NaN, then we consider them the same */
1889 if (isnan(a1) && isnan(a2))
1890 PG_RETURN_FLOAT8(0.0);
1891
1892 /* if one value is NaN, use infinite distance */
1893 if (isnan(a1) || isnan(a2))
1895
1896 /*
1897 * We know the values are range boundaries, but the range may be collapsed
1898 * (i.e. single points), with equal values.
1899 */
1900 Assert(a1 <= a2);
1901
1902 PG_RETURN_FLOAT8((double) a2 - (double) a1);
1903}
1904
1905/*
1906 * Compute the distance between two float8 values (plain subtraction).
1907 */
1908Datum
1910{
1911 double a1 = PG_GETARG_FLOAT8(0);
1912 double a2 = PG_GETARG_FLOAT8(1);
1913
1914 /* if both values are NaN, then we consider them the same */
1915 if (isnan(a1) && isnan(a2))
1916 PG_RETURN_FLOAT8(0.0);
1917
1918 /* if one value is NaN, use infinite distance */
1919 if (isnan(a1) || isnan(a2))
1921
1922 /*
1923 * We know the values are range boundaries, but the range may be collapsed
1924 * (i.e. single points), with equal values.
1925 */
1926 Assert(a1 <= a2);
1927
1929}
1930
1931/*
1932 * Compute the distance between two int2 values (plain subtraction).
1933 */
1934Datum
1936{
1939
1940 /*
1941 * We know the values are range boundaries, but the range may be collapsed
1942 * (i.e. single points), with equal values.
1943 */
1944 Assert(a1 <= a2);
1945
1946 PG_RETURN_FLOAT8((double) a2 - (double) a1);
1947}
1948
1949/*
1950 * Compute the distance between two int4 values (plain subtraction).
1951 */
1952Datum
1954{
1957
1958 /*
1959 * We know the values are range boundaries, but the range may be collapsed
1960 * (i.e. single points), with equal values.
1961 */
1962 Assert(a1 <= a2);
1963
1964 PG_RETURN_FLOAT8((double) a2 - (double) a1);
1965}
1966
1967/*
1968 * Compute the distance between two int8 values (plain subtraction).
1969 */
1970Datum
1972{
1975
1976 /*
1977 * We know the values are range boundaries, but the range may be collapsed
1978 * (i.e. single points), with equal values.
1979 */
1980 Assert(a1 <= a2);
1981
1982 PG_RETURN_FLOAT8((double) a2 - (double) a1);
1983}
1984
1985/*
1986 * Compute the distance between two tid values (by mapping them to float8 and
1987 * then subtracting them).
1988 */
1989Datum
1991{
1992 double da1,
1993 da2;
1994
1997
1998 /*
1999 * We know the values are range boundaries, but the range may be collapsed
2000 * (i.e. single points), with equal values.
2001 */
2002 Assert(ItemPointerCompare(pa1, pa2) <= 0);
2003
2004 /*
2005 * We use the no-check variants here, because user-supplied values may
2006 * have (ip_posid == 0). See ItemPointerCompare.
2007 */
2010
2013
2014 PG_RETURN_FLOAT8(da2 - da1);
2015}
2016
2017/*
2018 * Compute the distance between two numeric values (plain subtraction).
2019 */
2020Datum
2022{
2023 Datum d;
2026
2027 /*
2028 * We know the values are range boundaries, but the range may be collapsed
2029 * (i.e. single points), with equal values.
2030 */
2032
2033 d = DirectFunctionCall2(numeric_sub, a2, a1); /* a2 - a1 */
2034
2036}
2037
2038/*
2039 * Compute the approximate distance between two UUID values.
2040 *
2041 * XXX We do not need a perfectly accurate value, so we approximate the
2042 * deltas (which would have to be 128-bit integers) with a 64-bit float.
2043 * The small inaccuracies do not matter in practice, in the worst case
2044 * we'll decide to merge ranges that are not the closest ones.
2045 */
2046Datum
2048{
2049 int i;
2050 float8 delta = 0;
2051
2054
2055 pg_uuid_t *u1 = DatumGetUUIDP(a1);
2056 pg_uuid_t *u2 = DatumGetUUIDP(a2);
2057
2058 /*
2059 * We know the values are range boundaries, but the range may be collapsed
2060 * (i.e. single points), with equal values.
2061 */
2063
2064 /* compute approximate delta as a double precision value */
2065 for (i = UUID_LEN - 1; i >= 0; i--)
2066 {
2067 delta += (int) u2->data[i] - (int) u1->data[i];
2068 delta /= 256;
2069 }
2070
2071 Assert(delta >= 0);
2072
2073 PG_RETURN_FLOAT8(delta);
2074}
2075
2076/*
2077 * Compute the approximate distance between two dates.
2078 */
2079Datum
2081{
2082 float8 delta = 0;
2083 DateADT dateVal1 = PG_GETARG_DATEADT(0);
2084 DateADT dateVal2 = PG_GETARG_DATEADT(1);
2085
2086 delta = (float8) dateVal2 - (float8) dateVal1;
2087
2088 Assert(delta >= 0);
2089
2090 PG_RETURN_FLOAT8(delta);
2091}
2092
2093/*
2094 * Compute the approximate distance between two time (without tz) values.
2095 *
2096 * TimeADT is just an int64, so we simply subtract the values directly.
2097 */
2098Datum
2100{
2101 float8 delta = 0;
2102
2103 TimeADT ta = PG_GETARG_TIMEADT(0);
2104 TimeADT tb = PG_GETARG_TIMEADT(1);
2105
2106 delta = (tb - ta);
2107
2108 Assert(delta >= 0);
2109
2110 PG_RETURN_FLOAT8(delta);
2111}
2112
2113/*
2114 * Compute the approximate distance between two timetz values.
2115 *
2116 * Simply subtracts the TimeADT (int64) values embedded in TimeTzADT.
2117 */
2118Datum
2120{
2121 float8 delta = 0;
2122
2125
2126 delta = (tb->time - ta->time) + (tb->zone - ta->zone) * USECS_PER_SEC;
2127
2128 Assert(delta >= 0);
2129
2130 PG_RETURN_FLOAT8(delta);
2131}
2132
2133/*
2134 * Compute the distance between two timestamp values.
2135 */
2136Datum
2138{
2139 float8 delta = 0;
2140
2143
2144 delta = (float8) dt2 - (float8) dt1;
2145
2146 Assert(delta >= 0);
2147
2148 PG_RETURN_FLOAT8(delta);
2149}
2150
2151/*
2152 * Compute the distance between two interval values.
2153 */
2154Datum
2156{
2157 float8 delta = 0;
2158
2161
2162 int64 dayfraction;
2163 int64 days;
2164
2165 /*
2166 * Delta is (fractional) number of days between the intervals. Assume
2167 * months have 30 days for consistency with interval_cmp_internal. We
2168 * don't need to be exact, in the worst case we'll build a bit less
2169 * efficient ranges. But we should not contradict interval_cmp.
2170 */
2171 dayfraction = (ib->time % USECS_PER_DAY) - (ia->time % USECS_PER_DAY);
2172 days = (ib->time / USECS_PER_DAY) - (ia->time / USECS_PER_DAY);
2173 days += (int64) ib->day - (int64) ia->day;
2174 days += ((int64) ib->month - (int64) ia->month) * INT64CONST(30);
2175
2176 /* convert to double precision */
2177 delta = (double) days + dayfraction / (double) USECS_PER_DAY;
2178
2179 Assert(delta >= 0);
2180
2181 PG_RETURN_FLOAT8(delta);
2182}
2183
2184/*
2185 * Compute the distance between two pg_lsn values.
2186 *
2187 * LSN is just an int64 encoding position in the stream, so just subtract
2188 * those int64 values directly.
2189 */
2190Datum
2192{
2193 float8 delta = 0;
2194
2195 XLogRecPtr lsna = PG_GETARG_LSN(0);
2196 XLogRecPtr lsnb = PG_GETARG_LSN(1);
2197
2198 delta = (lsnb - lsna);
2199
2200 Assert(delta >= 0);
2201
2202 PG_RETURN_FLOAT8(delta);
2203}
2204
2205/*
2206 * Compute the distance between two macaddr values.
2207 *
2208 * mac addresses are treated as 6 unsigned chars, so do the same thing we
2209 * already do for UUID values.
2210 */
2211Datum
2213{
2214 float8 delta;
2215
2218
2219 delta = ((float8) b->f - (float8) a->f);
2220 delta /= 256;
2221
2222 delta += ((float8) b->e - (float8) a->e);
2223 delta /= 256;
2224
2225 delta += ((float8) b->d - (float8) a->d);
2226 delta /= 256;
2227
2228 delta += ((float8) b->c - (float8) a->c);
2229 delta /= 256;
2230
2231 delta += ((float8) b->b - (float8) a->b);
2232 delta /= 256;
2233
2234 delta += ((float8) b->a - (float8) a->a);
2235 delta /= 256;
2236
2237 Assert(delta >= 0);
2238
2239 PG_RETURN_FLOAT8(delta);
2240}
2241
2242/*
2243 * Compute the distance between two macaddr8 values.
2244 *
2245 * macaddr8 addresses are 8 unsigned chars, so do the same thing we
2246 * already do for UUID values.
2247 */
2248Datum
2250{
2251 float8 delta;
2252
2255
2256 delta = ((float8) b->h - (float8) a->h);
2257 delta /= 256;
2258
2259 delta += ((float8) b->g - (float8) a->g);
2260 delta /= 256;
2261
2262 delta += ((float8) b->f - (float8) a->f);
2263 delta /= 256;
2264
2265 delta += ((float8) b->e - (float8) a->e);
2266 delta /= 256;
2267
2268 delta += ((float8) b->d - (float8) a->d);
2269 delta /= 256;
2270
2271 delta += ((float8) b->c - (float8) a->c);
2272 delta /= 256;
2273
2274 delta += ((float8) b->b - (float8) a->b);
2275 delta /= 256;
2276
2277 delta += ((float8) b->a - (float8) a->a);
2278 delta /= 256;
2279
2280 Assert(delta >= 0);
2281
2282 PG_RETURN_FLOAT8(delta);
2283}
2284
2285/*
2286 * Compute the distance between two inet values.
2287 *
2288 * The distance is defined as the difference between 32-bit/128-bit values,
2289 * depending on the IP version. The distance is computed by subtracting
2290 * the bytes and normalizing it to [0,1] range for each IP family.
2291 * Addresses from different families are considered to be in maximum
2292 * distance, which is 1.0.
2293 *
2294 * XXX Does this need to consider the mask (bits)? For now, it's ignored.
2295 */
2296Datum
2298{
2299 float8 delta;
2300 int i;
2301 int len;
2302 unsigned char *addra,
2303 *addrb;
2304
2305 inet *ipa = PG_GETARG_INET_PP(0);
2306 inet *ipb = PG_GETARG_INET_PP(1);
2307
2308 int lena,
2309 lenb;
2310
2311 /*
2312 * If the addresses are from different families, consider them to be in
2313 * maximal possible distance (which is 1.0).
2314 */
2315 if (ip_family(ipa) != ip_family(ipb))
2316 PG_RETURN_FLOAT8(1.0);
2317
2318 addra = (unsigned char *) palloc(ip_addrsize(ipa));
2319 memcpy(addra, ip_addr(ipa), ip_addrsize(ipa));
2320
2321 addrb = (unsigned char *) palloc(ip_addrsize(ipb));
2322 memcpy(addrb, ip_addr(ipb), ip_addrsize(ipb));
2323
2324 /*
2325 * The length is calculated from the mask length, because we sort the
2326 * addresses by first address in the range, so A.B.C.D/24 < A.B.C.1 (the
2327 * first range starts at A.B.C.0, which is before A.B.C.1). We don't want
2328 * to produce a negative delta in this case, so we just cut the extra
2329 * bytes.
2330 *
2331 * XXX Maybe this should be a bit more careful and cut the bits, not just
2332 * whole bytes.
2333 */
2334 lena = ip_bits(ipa);
2335 lenb = ip_bits(ipb);
2336
2337 len = ip_addrsize(ipa);
2338
2339 /* apply the network mask to both addresses */
2340 for (i = 0; i < len; i++)
2341 {
2342 unsigned char mask;
2343 int nbits;
2344
2345 nbits = Max(0, lena - (i * 8));
2346 if (nbits < 8)
2347 {
2348 mask = (0xFF << (8 - nbits));
2349 addra[i] = (addra[i] & mask);
2350 }
2351
2352 nbits = Max(0, lenb - (i * 8));
2353 if (nbits < 8)
2354 {
2355 mask = (0xFF << (8 - nbits));
2356 addrb[i] = (addrb[i] & mask);
2357 }
2358 }
2359
2360 /* Calculate the difference between the addresses. */
2361 delta = 0;
2362 for (i = len - 1; i >= 0; i--)
2363 {
2364 unsigned char a = addra[i];
2365 unsigned char b = addrb[i];
2366
2367 delta += (float8) b - (float8) a;
2368 delta /= 256;
2369 }
2370
2371 Assert((delta >= 0) && (delta <= 1));
2372
2373 pfree(addra);
2374 pfree(addrb);
2375
2376 PG_RETURN_FLOAT8(delta);
2377}
2378
2379static void
2381{
2382 Ranges *ranges = (Ranges *) DatumGetPointer(src);
2384
2385 /*
2386 * In batch mode, we need to compress the accumulated values to the
2387 * actually requested number of values/ranges.
2388 */
2389 compactify_ranges(bdesc, ranges, ranges->target_maxvalues);
2390
2391 /* At this point everything has to be fully sorted. */
2392 Assert(ranges->nsorted == ranges->nvalues);
2393
2394 s = brin_range_serialize(ranges);
2395 dst[0] = PointerGetDatum(s);
2396}
2397
2398static int
2400{
2402}
2403
2404/*
2405 * Examine the given index tuple (which contains the partial status of a
2406 * certain page range) by comparing it to the given value that comes from
2407 * another heap tuple. If the new value is outside the min/max range
2408 * specified by the existing tuple values, update the index tuple and return
2409 * true. Otherwise, return false and do not modify in this case.
2410 */
2411Datum
2413{
2414 BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
2415 BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
2419 Oid colloid = PG_GET_COLLATION();
2420 bool modified = false;
2421 Form_pg_attribute attr;
2422 AttrNumber attno;
2423 Ranges *ranges;
2424 SerializedRanges *serialized = NULL;
2425
2426 Assert(!isnull);
2427
2428 attno = column->bv_attno;
2429 attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
2430
2431 /* use the already deserialized value, if possible */
2432 ranges = (Ranges *) DatumGetPointer(column->bv_mem_value);
2433
2434 /*
2435 * If this is the first non-null value, we need to initialize the range
2436 * list. Otherwise, just extract the existing range list from BrinValues.
2437 *
2438 * When starting with an empty range, we assume this is a batch mode and
2439 * we use a larger buffer. The buffer size is derived from the BRIN range
2440 * size, number of rows per page, with some sensible min/max values. A
2441 * small buffer would be bad for performance, but a large buffer might
2442 * require a lot of memory (because of keeping all the values).
2443 */
2444 if (column->bv_allnulls)
2445 {
2446 MemoryContext oldctx;
2447
2448 int target_maxvalues;
2449 int maxvalues;
2450 BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
2451
2452 /* what was specified as a reloption? */
2453 target_maxvalues = brin_minmax_multi_get_values(bdesc, opts);
2454
2455 /*
2456 * Determine the insert buffer size - we use 10x the target, capped to
2457 * the maximum number of values in the heap range. This is more than
2458 * enough, considering the actual number of rows per page is likely
2459 * much lower, but meh.
2460 */
2461 maxvalues = Min(target_maxvalues * MINMAX_BUFFER_FACTOR,
2462 MaxHeapTuplesPerPage * pagesPerRange);
2463
2464 /* but always at least the original value */
2465 maxvalues = Max(maxvalues, target_maxvalues);
2466
2467 /* always cap by MIN/MAX */
2468 maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
2469 maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
2470
2471 oldctx = MemoryContextSwitchTo(column->bv_context);
2472 ranges = minmax_multi_init(maxvalues);
2473 ranges->attno = attno;
2474 ranges->colloid = colloid;
2475 ranges->typid = attr->atttypid;
2476 ranges->target_maxvalues = target_maxvalues;
2477
2478 /* we'll certainly need the comparator, so just look it up now */
2479 ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
2481
2482 MemoryContextSwitchTo(oldctx);
2483
2484 column->bv_allnulls = false;
2485 modified = true;
2486
2487 column->bv_mem_value = PointerGetDatum(ranges);
2489 }
2490 else if (!ranges)
2491 {
2492 MemoryContext oldctx;
2493
2494 int maxvalues;
2495 BlockNumber pagesPerRange = BrinGetPagesPerRange(bdesc->bd_index);
2496
2497 oldctx = MemoryContextSwitchTo(column->bv_context);
2498
2499 serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
2500
2501 /*
2502 * Determine the insert buffer size - we use 10x the target, capped to
2503 * the maximum number of values in the heap range. This is more than
2504 * enough, considering the actual number of rows per page is likely
2505 * much lower, but meh.
2506 */
2507 maxvalues = Min(serialized->maxvalues * MINMAX_BUFFER_FACTOR,
2508 MaxHeapTuplesPerPage * pagesPerRange);
2509
2510 /* but always at least the original value */
2511 maxvalues = Max(maxvalues, serialized->maxvalues);
2512
2513 /* always cap by MIN/MAX */
2514 maxvalues = Max(maxvalues, MINMAX_BUFFER_MIN);
2515 maxvalues = Min(maxvalues, MINMAX_BUFFER_MAX);
2516
2517 ranges = brin_range_deserialize(maxvalues, serialized);
2518
2519 ranges->attno = attno;
2520 ranges->colloid = colloid;
2521 ranges->typid = attr->atttypid;
2522
2523 /* we'll certainly need the comparator, so just look it up now */
2524 ranges->cmp = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
2526
2527 column->bv_mem_value = PointerGetDatum(ranges);
2529
2530 MemoryContextSwitchTo(oldctx);
2531 }
2532
2533 /*
2534 * Try to add the new value to the range. We need to update the modified
2535 * flag, so that we serialize the updated summary later.
2536 */
2537 modified |= range_add_value(bdesc, colloid, attno, attr, ranges, newval);
2538
2539
2540 PG_RETURN_BOOL(modified);
2541}
2542
2543/*
2544 * Given an index tuple corresponding to a certain page range and a scan key,
2545 * return whether the scan key is consistent with the index tuple's min/max
2546 * values. Return true if so, false otherwise.
2547 */
2548Datum
2550{
2551 BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
2552 BrinValues *column = (BrinValues *) PG_GETARG_POINTER(1);
2553 ScanKey *keys = (ScanKey *) PG_GETARG_POINTER(2);
2554 int nkeys = PG_GETARG_INT32(3);
2555
2556 Oid colloid = PG_GET_COLLATION(),
2557 subtype;
2558 AttrNumber attno;
2559 Datum value;
2560 FmgrInfo *finfo;
2561 SerializedRanges *serialized;
2562 Ranges *ranges;
2563 int keyno;
2564 int rangeno;
2565 int i;
2566
2567 attno = column->bv_attno;
2568
2569 serialized = (SerializedRanges *) PG_DETOAST_DATUM(column->bv_values[0]);
2570 ranges = brin_range_deserialize(serialized->maxvalues, serialized);
2571
2572 /* inspect the ranges, and for each one evaluate the scan keys */
2573 for (rangeno = 0; rangeno < ranges->nranges; rangeno++)
2574 {
2575 Datum minval = ranges->values[2 * rangeno];
2576 Datum maxval = ranges->values[2 * rangeno + 1];
2577
2578 /* assume the range is matching, and we'll try to prove otherwise */
2579 bool matching = true;
2580
2581 for (keyno = 0; keyno < nkeys; keyno++)
2582 {
2583 bool matches;
2584 ScanKey key = keys[keyno];
2585
2586 /* NULL keys are handled and filtered-out in bringetbitmap */
2587 Assert(!(key->sk_flags & SK_ISNULL));
2588
2589 attno = key->sk_attno;
2590 subtype = key->sk_subtype;
2591 value = key->sk_argument;
2592 switch (key->sk_strategy)
2593 {
2596 finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
2597 key->sk_strategy);
2598 /* first value from the array */
2599 matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, minval, value));
2600 break;
2601
2603 {
2604 Datum compar;
2605 FmgrInfo *cmpFn;
2606
2607 /* by default this range does not match */
2608 matches = false;
2609
2610 /*
2611 * Otherwise, need to compare the new value with
2612 * boundaries of all the ranges. First check if it's
2613 * less than the absolute minimum, which is the first
2614 * value in the array.
2615 */
2616 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
2618 compar = FunctionCall2Coll(cmpFn, colloid, minval, value);
2619
2620 /* smaller than the smallest value in this range */
2621 if (DatumGetBool(compar))
2622 break;
2623
2624 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
2626 compar = FunctionCall2Coll(cmpFn, colloid, maxval, value);
2627
2628 /* larger than the largest value in this range */
2629 if (DatumGetBool(compar))
2630 break;
2631
2632 /*
2633 * We haven't managed to eliminate this range, so
2634 * consider it matching.
2635 */
2636 matches = true;
2637
2638 break;
2639 }
2642 finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
2643 key->sk_strategy);
2644 /* last value from the array */
2645 matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, maxval, value));
2646 break;
2647
2648 default:
2649 /* shouldn't happen */
2650 elog(ERROR, "invalid strategy number %d", key->sk_strategy);
2651 matches = false;
2652 break;
2653 }
2654
2655 /* the range has to match all the scan keys */
2656 matching &= matches;
2657
2658 /* once we find a non-matching key, we're done */
2659 if (!matching)
2660 break;
2661 }
2662
2663 /*
2664 * have we found a range matching all scan keys? if yes, we're done
2665 */
2666 if (matching)
2667 PG_RETURN_BOOL(true);
2668 }
2669
2670 /*
2671 * And now inspect the values. We don't bother with doing a binary search
2672 * here, because we're dealing with serialized / fully compacted ranges,
2673 * so there should be only very few values.
2674 */
2675 for (i = 0; i < ranges->nvalues; i++)
2676 {
2677 Datum val = ranges->values[2 * ranges->nranges + i];
2678
2679 /* assume the range is matching, and we'll try to prove otherwise */
2680 bool matching = true;
2681
2682 for (keyno = 0; keyno < nkeys; keyno++)
2683 {
2684 bool matches;
2685 ScanKey key = keys[keyno];
2686
2687 /* we've already dealt with NULL keys at the beginning */
2688 if (key->sk_flags & SK_ISNULL)
2689 continue;
2690
2691 attno = key->sk_attno;
2692 subtype = key->sk_subtype;
2693 value = key->sk_argument;
2694 switch (key->sk_strategy)
2695 {
2701
2702 finfo = minmax_multi_get_strategy_procinfo(bdesc, attno, subtype,
2703 key->sk_strategy);
2704 matches = DatumGetBool(FunctionCall2Coll(finfo, colloid, val, value));
2705 break;
2706
2707 default:
2708 /* shouldn't happen */
2709 elog(ERROR, "invalid strategy number %d", key->sk_strategy);
2710 matches = false;
2711 break;
2712 }
2713
2714 /* the range has to match all the scan keys */
2715 matching &= matches;
2716
2717 /* once we find a non-matching key, we're done */
2718 if (!matching)
2719 break;
2720 }
2721
2722 /* have we found a range matching all scan keys? if yes, we're done */
2723 if (matching)
2724 PG_RETURN_BOOL(true);
2725 }
2726
2727 PG_RETURN_BOOL(false);
2728}
2729
2730/*
2731 * Given two BrinValues, update the first of them as a union of the summary
2732 * values contained in both. The second one is untouched.
2733 */
2734Datum
2736{
2737 BrinDesc *bdesc = (BrinDesc *) PG_GETARG_POINTER(0);
2738 BrinValues *col_a = (BrinValues *) PG_GETARG_POINTER(1);
2739 BrinValues *col_b = (BrinValues *) PG_GETARG_POINTER(2);
2740
2741 Oid colloid = PG_GET_COLLATION();
2742 SerializedRanges *serialized_a;
2743 SerializedRanges *serialized_b;
2744 Ranges *ranges_a;
2745 Ranges *ranges_b;
2746 AttrNumber attno;
2747 Form_pg_attribute attr;
2748 ExpandedRange *eranges;
2749 int neranges;
2750 FmgrInfo *cmpFn,
2751 *distanceFn;
2752 DistanceValue *distances;
2753 MemoryContext ctx;
2754 MemoryContext oldctx;
2755
2756 Assert(col_a->bv_attno == col_b->bv_attno);
2757 Assert(!col_a->bv_allnulls && !col_b->bv_allnulls);
2758
2759 attno = col_a->bv_attno;
2760 attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
2761
2762 serialized_a = (SerializedRanges *) PG_DETOAST_DATUM(col_a->bv_values[0]);
2763 serialized_b = (SerializedRanges *) PG_DETOAST_DATUM(col_b->bv_values[0]);
2764
2765 ranges_a = brin_range_deserialize(serialized_a->maxvalues, serialized_a);
2766 ranges_b = brin_range_deserialize(serialized_b->maxvalues, serialized_b);
2767
2768 /* make sure neither of the ranges is NULL */
2769 Assert(ranges_a && ranges_b);
2770
2771 neranges = (ranges_a->nranges + ranges_a->nvalues) +
2772 (ranges_b->nranges + ranges_b->nvalues);
2773
2774 /*
2775 * The distanceFn calls (which may internally call e.g. numeric_le) may
2776 * allocate quite a bit of memory, and we must not leak it. Otherwise,
2777 * we'd have problems e.g. when building indexes. So we create a local
2778 * memory context and make sure we free the memory before leaving this
2779 * function (not after every call).
2780 */
2782 "minmax-multi context",
2784
2785 oldctx = MemoryContextSwitchTo(ctx);
2786
2787 /* allocate and fill */
2788 eranges = (ExpandedRange *) palloc0(neranges * sizeof(ExpandedRange));
2789
2790 /* fill the expanded ranges with entries for the first range */
2791 fill_expanded_ranges(eranges, ranges_a->nranges + ranges_a->nvalues,
2792 ranges_a);
2793
2794 /* and now add combine ranges for the second range */
2795 fill_expanded_ranges(&eranges[ranges_a->nranges + ranges_a->nvalues],
2796 ranges_b->nranges + ranges_b->nvalues,
2797 ranges_b);
2798
2799 cmpFn = minmax_multi_get_strategy_procinfo(bdesc, attno, attr->atttypid,
2801
2802 /* sort the expanded ranges */
2803 neranges = sort_expanded_ranges(cmpFn, colloid, eranges, neranges);
2804
2805 /*
2806 * We've loaded two different lists of expanded ranges, so some of them
2807 * may be overlapping. So walk through them and merge them.
2808 */
2809 neranges = merge_overlapping_ranges(cmpFn, colloid, eranges, neranges);
2810
2811 /* check that the combine ranges are correct (no overlaps, ordering) */
2812 AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
2813
2814 /*
2815 * If needed, reduce some of the ranges.
2816 *
2817 * XXX This may be fairly expensive, so maybe we should do it only when
2818 * it's actually needed (when we have too many ranges).
2819 */
2820
2821 /* build array of gap distances and sort them in ascending order */
2822 distanceFn = minmax_multi_get_procinfo(bdesc, attno, PROCNUM_DISTANCE);
2823 distances = build_distances(distanceFn, colloid, eranges, neranges);
2824
2825 /*
2826 * See how many values would be needed to store the current ranges, and if
2827 * needed combine as many of them to get below the threshold. The
2828 * collapsed ranges will be stored as a single value.
2829 *
2830 * XXX This does not apply the load factor, as we don't expect to add more
2831 * values to the range, so we prefer to keep as many ranges as possible.
2832 *
2833 * XXX Can the maxvalues be different in the two ranges? Perhaps we should
2834 * use maximum of those?
2835 */
2836 neranges = reduce_expanded_ranges(eranges, neranges, distances,
2837 ranges_a->maxvalues,
2838 cmpFn, colloid);
2839
2840 /* Is the result of reducing expanded ranges correct? */
2841 AssertCheckExpandedRanges(bdesc, colloid, attno, attr, eranges, neranges);
2842
2843 /* update the first range summary */
2844 store_expanded_ranges(ranges_a, eranges, neranges);
2845
2846 MemoryContextSwitchTo(oldctx);
2848
2849 /* cleanup and update the serialized value */
2850 pfree(serialized_a);
2851 col_a->bv_values[0] = PointerGetDatum(brin_range_serialize(ranges_a));
2852
2854}
2855
2856/*
2857 * Cache and return minmax multi opclass support procedure
2858 *
2859 * Return the procedure corresponding to the given function support number
2860 * or null if it does not exist.
2861 */
2862static FmgrInfo *
2864{
2865 MinmaxMultiOpaque *opaque;
2866 uint16 basenum = procnum - PROCNUM_BASE;
2867
2868 /*
2869 * We cache these in the opaque struct, to avoid repetitive syscache
2870 * lookups.
2871 */
2872 opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
2873
2874 if (opaque->extra_procinfos[basenum].fn_oid == InvalidOid)
2875 {
2877 procnum)))
2878 fmgr_info_copy(&opaque->extra_procinfos[basenum],
2879 index_getprocinfo(bdesc->bd_index, attno, procnum),
2880 bdesc->bd_context);
2881 else
2882 ereport(ERROR,
2883 errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
2884 errmsg_internal("invalid opclass definition"),
2885 errdetail_internal("The operator class is missing support function %d for column %d.",
2886 procnum, attno));
2887 }
2888
2889 return &opaque->extra_procinfos[basenum];
2890}
2891
2892/*
2893 * Cache and return the procedure for the given strategy.
2894 *
2895 * Note: this function mirrors minmax_multi_get_strategy_procinfo; see notes
2896 * there. If changes are made here, see that function too.
2897 */
2898static FmgrInfo *
2900 uint16 strategynum)
2901{
2902 MinmaxMultiOpaque *opaque;
2903
2904 Assert(strategynum >= 1 &&
2905 strategynum <= BTMaxStrategyNumber);
2906
2907 opaque = (MinmaxMultiOpaque *) bdesc->bd_info[attno - 1]->oi_opaque;
2908
2909 /*
2910 * We cache the procedures for the previous subtype in the opaque struct,
2911 * to avoid repetitive syscache lookups. If the subtype changed,
2912 * invalidate all the cached entries.
2913 */
2914 if (opaque->cached_subtype != subtype)
2915 {
2916 uint16 i;
2917
2918 for (i = 1; i <= BTMaxStrategyNumber; i++)
2919 opaque->strategy_procinfos[i - 1].fn_oid = InvalidOid;
2920 opaque->cached_subtype = subtype;
2921 }
2922
2923 if (opaque->strategy_procinfos[strategynum - 1].fn_oid == InvalidOid)
2924 {
2925 Form_pg_attribute attr;
2926 HeapTuple tuple;
2927 Oid opfamily,
2928 oprid;
2929
2930 opfamily = bdesc->bd_index->rd_opfamily[attno - 1];
2931 attr = TupleDescAttr(bdesc->bd_tupdesc, attno - 1);
2932 tuple = SearchSysCache4(AMOPSTRATEGY, ObjectIdGetDatum(opfamily),
2933 ObjectIdGetDatum(attr->atttypid),
2934 ObjectIdGetDatum(subtype),
2935 Int16GetDatum(strategynum));
2936 if (!HeapTupleIsValid(tuple))
2937 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2938 strategynum, attr->atttypid, subtype, opfamily);
2939
2940 oprid = DatumGetObjectId(SysCacheGetAttrNotNull(AMOPSTRATEGY, tuple,
2941 Anum_pg_amop_amopopr));
2942 ReleaseSysCache(tuple);
2944
2946 &opaque->strategy_procinfos[strategynum - 1],
2947 bdesc->bd_context);
2948 }
2949
2950 return &opaque->strategy_procinfos[strategynum - 1];
2951}
2952
2953Datum
2955{
2957
2958 init_local_reloptions(relopts, sizeof(MinMaxMultiOptions));
2959
2960 add_local_int_reloption(relopts, "values_per_range", "desc",
2962 offsetof(MinMaxMultiOptions, valuesPerRange));
2963
2965}
2966
2967/*
2968 * brin_minmax_multi_summary_in
2969 * - input routine for type brin_minmax_multi_summary.
2970 *
2971 * brin_minmax_multi_summary is only used internally to represent summaries
2972 * in BRIN minmax-multi indexes, so it has no operations of its own, and we
2973 * disallow input too.
2974 */
2975Datum
2977{
2978 /*
2979 * brin_minmax_multi_summary stores the data in binary form and parsing
2980 * text input is not needed, so disallow this.
2981 */
2982 ereport(ERROR,
2983 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2984 errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
2985
2986 PG_RETURN_VOID(); /* keep compiler quiet */
2987}
2988
2989
2990/*
2991 * brin_minmax_multi_summary_out
2992 * - output routine for type brin_minmax_multi_summary.
2993 *
2994 * BRIN minmax-multi summaries are serialized into a bytea value, but we
2995 * want to output something nicer humans can understand.
2996 */
2997Datum
2999{
3000 int i;
3001 int idx;
3002 SerializedRanges *ranges;
3003 Ranges *ranges_deserialized;
3005 bool isvarlena;
3006 Oid outfunc;
3007 FmgrInfo fmgrinfo;
3008 ArrayBuildState *astate_values = NULL;
3009
3012
3013 /*
3014 * Detoast to get value with full 4B header (can't be stored in a toast
3015 * table, but can use 1B header).
3016 */
3018
3019 /* lookup output func for the type */
3020 getTypeOutputInfo(ranges->typid, &outfunc, &isvarlena);
3021 fmgr_info(outfunc, &fmgrinfo);
3022
3023 /* deserialize the range info easy-to-process pieces */
3024 ranges_deserialized = brin_range_deserialize(ranges->maxvalues, ranges);
3025
3026 appendStringInfo(&str, "nranges: %d nvalues: %d maxvalues: %d",
3027 ranges_deserialized->nranges,
3028 ranges_deserialized->nvalues,
3029 ranges_deserialized->maxvalues);
3030
3031 /* serialize ranges */
3032 idx = 0;
3033 for (i = 0; i < ranges_deserialized->nranges; i++)
3034 {
3035 char *a,
3036 *b;
3037 text *c;
3039
3041
3042 a = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
3043 b = OutputFunctionCall(&fmgrinfo, ranges_deserialized->values[idx++]);
3044
3045 appendStringInfo(&buf, "%s ... %s", a, b);
3046
3047 c = cstring_to_text_with_len(buf.data, buf.len);
3048
3049 astate_values = accumArrayResult(astate_values,
3051 false,
3052 TEXTOID,
3054 }
3055
3056 if (ranges_deserialized->nranges > 0)
3057 {
3058 Oid typoutput;
3059 bool typIsVarlena;
3060 Datum val;
3061 char *extval;
3062
3063 getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
3064
3065 val = makeArrayResult(astate_values, CurrentMemoryContext);
3066
3067 extval = OidOutputFunctionCall(typoutput, val);
3068
3069 appendStringInfo(&str, " ranges: %s", extval);
3070 }
3071
3072 /* serialize individual values */
3073 astate_values = NULL;
3074
3075 for (i = 0; i < ranges_deserialized->nvalues; i++)
3076 {
3077 Datum a;
3078 text *b;
3079
3080 a = FunctionCall1(&fmgrinfo, ranges_deserialized->values[idx++]);
3082
3083 astate_values = accumArrayResult(astate_values,
3085 false,
3086 TEXTOID,
3088 }
3089
3090 if (ranges_deserialized->nvalues > 0)
3091 {
3092 Oid typoutput;
3093 bool typIsVarlena;
3094 Datum val;
3095 char *extval;
3096
3097 getTypeOutputInfo(ANYARRAYOID, &typoutput, &typIsVarlena);
3098
3099 val = makeArrayResult(astate_values, CurrentMemoryContext);
3100
3101 extval = OidOutputFunctionCall(typoutput, val);
3102
3103 appendStringInfo(&str, " values: %s", extval);
3104 }
3105
3106
3108
3109 PG_RETURN_CSTRING(str.data);
3110}
3111
3112/*
3113 * brin_minmax_multi_summary_recv
3114 * - binary input routine for type brin_minmax_multi_summary.
3115 */
3116Datum
3118{
3119 ereport(ERROR,
3120 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3121 errmsg("cannot accept a value of type %s", "brin_minmax_multi_summary")));
3122
3123 PG_RETURN_VOID(); /* keep compiler quiet */
3124}
3125
3126/*
3127 * brin_minmax_multi_summary_send
3128 * - binary output routine for type brin_minmax_multi_summary.
3129 *
3130 * BRIN minmax-multi summaries are serialized in a bytea value (although
3131 * the type is named differently), so let's just send that.
3132 */
3133Datum
3135{
3136 return byteasend(fcinfo);
3137}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:259
ArrayBuildState * accumArrayResult(ArrayBuildState *astate, Datum dvalue, bool disnull, Oid element_type, MemoryContext rcontext)
Definition: arrayfuncs.c:5350
Datum makeArrayResult(ArrayBuildState *astate, MemoryContext rcontext)
Definition: arrayfuncs.c:5420
int16 AttrNumber
Definition: attnum.h:21
const char *const days[]
Definition: datetime.c:84
Datum numeric_sub(PG_FUNCTION_ARGS)
Definition: numeric.c:3043
Datum numeric_le(PG_FUNCTION_ARGS)
Definition: numeric.c:2608
Datum numeric_float8(PG_FUNCTION_ARGS)
Definition: numeric.c:4746
uint32 BlockNumber
Definition: block.h:31
static Datum values[MAXATTR]
Definition: bootstrap.c:151
#define BrinGetPagesPerRange(relation)
Definition: brin.h:40
#define SizeofBrinOpcInfo(ncols)
Definition: brin_internal.h:41
Datum brin_minmax_multi_distance_float8(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_options(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_union(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_distance_float4(PG_FUNCTION_ARGS)
static ExpandedRange * build_expanded_ranges(FmgrInfo *cmp, Oid colloid, Ranges *ranges, int *nranges)
#define MinMaxMultiGetValuesPerRange(opts)
static void AssertCheckExpandedRanges(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, ExpandedRange *ranges, int nranges)
struct DistanceValue DistanceValue
Datum brin_minmax_multi_distance_int8(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_summary_recv(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_summary_out(PG_FUNCTION_ARGS)
static DistanceValue * build_distances(FmgrInfo *distanceFn, Oid colloid, ExpandedRange *eranges, int neranges)
Datum brin_minmax_multi_add_value(PG_FUNCTION_ARGS)
struct SerializedRanges SerializedRanges
Datum brin_minmax_multi_distance_uuid(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_distance_inet(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_consistent(PG_FUNCTION_ARGS)
static void AssertCheckRanges(Ranges *ranges, FmgrInfo *cmpFn, Oid colloid)
static int compare_expanded_ranges(const void *a, const void *b, void *arg)
Datum brin_minmax_multi_distance_time(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_distance_timestamp(PG_FUNCTION_ARGS)
static bool range_add_value(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *ranges, Datum newval)
static bool ensure_free_space_in_buffer(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *range)
static int reduce_expanded_ranges(ExpandedRange *eranges, int neranges, DistanceValue *distances, int max_values, FmgrInfo *cmp, Oid colloid)
struct MinMaxMultiOptions MinMaxMultiOptions
struct MinmaxMultiOpaque MinmaxMultiOpaque
static int compare_values(const void *a, const void *b, void *arg)
static void compactify_ranges(BrinDesc *bdesc, Ranges *ranges, int max_values)
#define MINMAX_BUFFER_MAX
Datum brin_minmax_multi_distance_numeric(PG_FUNCTION_ARGS)
#define MINMAX_BUFFER_LOAD_FACTOR
Datum brin_minmax_multi_summary_send(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_distance_pg_lsn(PG_FUNCTION_ARGS)
static int brin_minmax_multi_get_values(BrinDesc *bdesc, MinMaxMultiOptions *opts)
struct compare_context compare_context
static FmgrInfo * minmax_multi_get_procinfo(BrinDesc *bdesc, uint16 attno, uint16 procnum)
struct ExpandedRange ExpandedRange
Datum brin_minmax_multi_distance_macaddr8(PG_FUNCTION_ARGS)
#define MINMAX_MAX_PROCNUMS
static int sort_expanded_ranges(FmgrInfo *cmp, Oid colloid, ExpandedRange *eranges, int neranges)
#define MINMAX_BUFFER_FACTOR
Datum brin_minmax_multi_distance_date(PG_FUNCTION_ARGS)
struct Ranges Ranges
static void range_deduplicate_values(Ranges *range)
static void fill_expanded_ranges(ExpandedRange *eranges, int neranges, Ranges *ranges)
static int merge_overlapping_ranges(FmgrInfo *cmp, Oid colloid, ExpandedRange *eranges, int neranges)
Datum brin_minmax_multi_summary_in(PG_FUNCTION_ARGS)
static void store_expanded_ranges(Ranges *ranges, ExpandedRange *eranges, int neranges)
#define PROCNUM_BASE
static bool has_matching_range(BrinDesc *bdesc, Oid colloid, Ranges *ranges, Datum newval, AttrNumber attno, Oid typid)
Datum brin_minmax_multi_distance_int2(PG_FUNCTION_ARGS)
static bool range_contains_value(BrinDesc *bdesc, Oid colloid, AttrNumber attno, Form_pg_attribute attr, Ranges *ranges, Datum newval, bool full)
Datum brin_minmax_multi_opcinfo(PG_FUNCTION_ARGS)
static int compare_distances(const void *a, const void *b)
Datum brin_minmax_multi_distance_interval(PG_FUNCTION_ARGS)
Datum brin_minmax_multi_distance_timetz(PG_FUNCTION_ARGS)
static void brin_minmax_multi_serialize(BrinDesc *bdesc, Datum src, Datum *dst)
static SerializedRanges * brin_range_serialize(Ranges *range)
Datum brin_minmax_multi_distance_tid(PG_FUNCTION_ARGS)
static Ranges * brin_range_deserialize(int maxvalues, SerializedRanges *serialized)
#define PROCNUM_DISTANCE
Datum brin_minmax_multi_distance_macaddr(PG_FUNCTION_ARGS)
static FmgrInfo * minmax_multi_get_strategy_procinfo(BrinDesc *bdesc, uint16 attno, Oid subtype, uint16 strategynum)
#define MINMAX_MULTI_DEFAULT_VALUES_PER_PAGE
Datum brin_minmax_multi_distance_int4(PG_FUNCTION_ARGS)
static Ranges * minmax_multi_init(int maxvalues)
#define MINMAX_BUFFER_MIN
#define INT64CONST(x)
Definition: c.h:516
#define RegProcedureIsValid(p)
Definition: c.h:748
#define Min(x, y)
Definition: c.h:975
#define MAXALIGN(LEN)
Definition: c.h:782
#define PG_USED_FOR_ASSERTS_ONLY
Definition: c.h:224
#define Max(x, y)
Definition: c.h:969
int64_t int64
Definition: c.h:499
double float8
Definition: c.h:601
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:434
int16_t int16
Definition: c.h:497
int32_t int32
Definition: c.h:498
uint16_t uint16
Definition: c.h:501
size_t Size
Definition: c.h:576
int64 Timestamp
Definition: timestamp.h:38
#define USECS_PER_DAY
Definition: timestamp.h:131
#define USECS_PER_SEC
Definition: timestamp.h:134
#define PG_GETARG_TIMEADT(n)
Definition: date.h:90
int32 DateADT
Definition: date.h:23
int64 TimeADT
Definition: date.h:25
#define PG_GETARG_TIMETZADT_P(n)
Definition: date.h:91
#define PG_GETARG_DATEADT(n)
Definition: date.h:89
Datum datumCopy(Datum value, bool typByVal, int typLen)
Definition: datum.c:132
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1157
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1230
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
static float8 get_float8_infinity(void)
Definition: float.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
void fmgr_info(Oid functionId, FmgrInfo *finfo)
Definition: fmgr.c:127
char * OidOutputFunctionCall(Oid functionId, Datum val)
Definition: fmgr.c:1763
void fmgr_info_cxt(Oid functionId, FmgrInfo *finfo, MemoryContext mcxt)
Definition: fmgr.c:137
char * OutputFunctionCall(FmgrInfo *flinfo, Datum val)
Definition: fmgr.c:1683
void fmgr_info_copy(FmgrInfo *dstinfo, FmgrInfo *srcinfo, MemoryContext destcxt)
Definition: fmgr.c:580
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:643
#define PG_GETARG_FLOAT8(n)
Definition: fmgr.h:282
#define PG_RETURN_FLOAT8(x)
Definition: fmgr.h:367
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:362
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:641
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GET_OPCLASS_OPTIONS()
Definition: fmgr.h:342
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define FunctionCall1(flinfo, arg1)
Definition: fmgr.h:659
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_FLOAT4(n)
Definition: fmgr.h:281
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_GET_COLLATION()
Definition: fmgr.h:198
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define PG_GETARG_INT16(n)
Definition: fmgr.h:271
#define newval
Assert(PointerIsAligned(start, uint64))
return str start
const char * str
static const FormData_pg_attribute a1
Definition: heap.c:144
static const FormData_pg_attribute a2
Definition: heap.c:157
#define HeapTupleIsValid(tuple)
Definition: htup.h:78
#define MaxHeapTuplesPerPage
Definition: htup_details.h:624
FmgrInfo * index_getprocinfo(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:906
RegProcedure index_getprocid(Relation irel, AttrNumber attnum, uint16 procnum)
Definition: indexam.c:872
long val
Definition: informix.c:689
static struct @165 value
int b
Definition: isn.c:71
int a
Definition: isn.c:70
int i
Definition: isn.c:74
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:78
int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
Definition: itemptr.c:51
static OffsetNumber ItemPointerGetOffsetNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:114
static BlockNumber ItemPointerGetBlockNumberNoCheck(const ItemPointerData *pointer)
Definition: itemptr.h:93
ItemPointerData * ItemPointer
Definition: itemptr.h:49
void getTypeOutputInfo(Oid type, Oid *typOutput, bool *typIsVarlena)
Definition: lsyscache.c:2990
RegProcedure get_opcode(Oid opno)
Definition: lsyscache.c:1368
int16 get_typlen(Oid typid)
Definition: lsyscache.c:2280
bool get_typbyval(Oid typid)
Definition: lsyscache.c:2305
void pfree(void *pointer)
Definition: mcxt.c:1524
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:454
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
Oid oprid(Operator op)
Definition: parse_oper.c:238
static AmcheckOptions opts
Definition: pg_amcheck.c:112
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
void * arg
const void size_t len
const void * data
#define PG_GETARG_LSN(n)
Definition: pg_lsn.h:33
static char * buf
Definition: pg_test_fsync.c:72
void * bsearch_arg(const void *key, const void *base0, size_t nmemb, size_t size, int(*compar)(const void *, const void *, void *), void *arg)
Definition: bsearch_arg.c:55
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
Definition: port.h:475
static bool DatumGetBool(Datum X)
Definition: postgres.h:95
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:247
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:177
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:499
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:257
static char * DatumGetCString(Datum X)
Definition: postgres.h:340
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
#define InvalidOid
Definition: postgres_ext.h:37
unsigned int Oid
Definition: postgres_ext.h:32
char * c
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
void init_local_reloptions(local_relopts *relopts, Size relopt_struct_size)
Definition: reloptions.c:753
void add_local_int_reloption(local_relopts *relopts, const char *name, const char *desc, int default_val, int min_val, int max_val, int offset)
Definition: reloptions.c:937
#define SK_ISNULL
Definition: skey.h:115
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTMaxStrategyNumber
Definition: stratnum.h:35
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
#define BTLessEqualStrategyNumber
Definition: stratnum.h:30
#define BTGreaterEqualStrategyNumber
Definition: stratnum.h:32
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
TupleDesc bd_tupdesc
Definition: brin_internal.h:53
BrinOpcInfo * bd_info[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:62
Relation bd_index
Definition: brin_internal.h:50
MemoryContext bd_context
Definition: brin_internal.h:47
TypeCacheEntry * oi_typcache[FLEXIBLE_ARRAY_MEMBER]
Definition: brin_internal.h:37
uint16 oi_nstored
Definition: brin_internal.h:28
bool oi_regular_nulls
Definition: brin_internal.h:31
void * oi_opaque
Definition: brin_internal.h:34
MemoryContext bv_context
Definition: brin_tuple.h:36
Datum bv_mem_value
Definition: brin_tuple.h:35
brin_serialize_callback_type bv_serialize
Definition: brin_tuple.h:37
Datum * bv_values
Definition: brin_tuple.h:34
AttrNumber bv_attno
Definition: brin_tuple.h:31
bool bv_allnulls
Definition: brin_tuple.h:33
Definition: fmgr.h:57
Oid fn_oid
Definition: fmgr.h:59
int32 day
Definition: timestamp.h:51
int32 month
Definition: timestamp.h:52
TimeOffset time
Definition: timestamp.h:49
FmgrInfo extra_procinfos[MINMAX_MAX_PROCNUMS]
FmgrInfo strategy_procinfos[BTMaxStrategyNumber]
int target_maxvalues
AttrNumber attno
FmgrInfo * cmp
Datum values[FLEXIBLE_ARRAY_MEMBER]
Oid * rd_opfamily
Definition: rel.h:207
char data[FLEXIBLE_ARRAY_MEMBER]
Definition: date.h:28
TimeADT time
Definition: date.h:29
int32 zone
Definition: date.h:30
int nranges
Definition: regguts.h:283
Definition: type.h:96
Definition: inet.h:53
Definition: inet.h:108
Definition: inet.h:95
Definition: uuid.h:21
unsigned char data[UUID_LEN]
Definition: uuid.h:22
Definition: c.h:658
void ReleaseSysCache(HeapTuple tuple)
Definition: syscache.c:269
HeapTuple SearchSysCache4(int cacheId, Datum key1, Datum key2, Datum key3, Datum key4)
Definition: syscache.c:254
Datum SysCacheGetAttrNotNull(int cacheId, HeapTuple tup, AttrNumber attributeNumber)
Definition: syscache.c:631
static FormData_pg_attribute * TupleDescAttr(TupleDesc tupdesc, int i)
Definition: tupdesc.h:154
static Datum fetch_att(const void *T, bool attbyval, int attlen)
Definition: tupmacs.h:53
static void store_att_byval(void *T, Datum newdatum, int attlen)
Definition: tupmacs.h:211
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:386
#define PG_GETARG_MACADDR_P(n)
Definition: inet.h:158
#define PG_GETARG_MACADDR8_P(n)
Definition: inet.h:174
#define ip_addr(inetptr)
Definition: inet.h:77
#define PG_GETARG_INET_PP(n)
Definition: inet.h:134
#define ip_family(inetptr)
Definition: inet.h:71
#define ip_addrsize(inetptr)
Definition: inet.h:80
#define ip_bits(inetptr)
Definition: inet.h:74
#define PG_GETARG_TIMESTAMP(n)
Definition: timestamp.h:63
#define PG_GETARG_INTERVAL_P(n)
Definition: timestamp.h:65
Datum uuid_le(PG_FUNCTION_ARGS)
Definition: uuid.c:215
static pg_uuid_t * DatumGetUUIDP(Datum X)
Definition: uuid.h:35
#define UUID_LEN
Definition: uuid.h:18
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:204
text * cstring_to_text(const char *s)
Definition: varlena.c:192
Datum byteasend(PG_FUNCTION_ARGS)
Definition: varlena.c:498
uint64 XLogRecPtr
Definition: xlogdefs.h:21