PostgreSQL Source Code git master
rangetypes_typanalyze.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * rangetypes_typanalyze.c
4 * Functions for gathering statistics from range columns
5 *
6 * For a range type column, histograms of lower and upper bounds, and
7 * the fraction of NULL and empty ranges are collected.
8 *
9 * Both histograms have the same length, and they are combined into a
10 * single array of ranges. This has the same shape as the histogram that
11 * std_typanalyze would collect, but the values are different. Each range
12 * in the array is a valid range, even though the lower and upper bounds
13 * come from different tuples. In theory, the standard scalar selectivity
14 * functions could be used with the combined histogram.
15 *
16 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
17 * Portions Copyright (c) 1994, Regents of the University of California
18 *
19 *
20 * IDENTIFICATION
21 * src/backend/utils/adt/rangetypes_typanalyze.c
22 *
23 *-------------------------------------------------------------------------
24 */
25#include "postgres.h"
26
27#include "catalog/pg_operator.h"
28#include "commands/vacuum.h"
29#include "utils/float.h"
30#include "utils/fmgrprotos.h"
31#include "utils/lsyscache.h"
33#include "utils/rangetypes.h"
34#include "varatt.h"
35
36static int float8_qsort_cmp(const void *a1, const void *a2, void *arg);
37static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg);
38static void compute_range_stats(VacAttrStats *stats,
39 AnalyzeAttrFetchFunc fetchfunc, int samplerows,
40 double totalrows);
41
42/*
43 * range_typanalyze -- typanalyze function for range columns
44 */
47{
49 TypeCacheEntry *typcache;
50
51 /* Get information about range type; note column might be a domain */
52 typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid));
53
54 if (stats->attstattarget < 0)
56
58 stats->extra_data = typcache;
59 /* same as in std_typanalyze */
60 stats->minrows = 300 * stats->attstattarget;
61
62 PG_RETURN_BOOL(true);
63}
64
65/*
66 * multirange_typanalyze -- typanalyze function for multirange columns
67 *
68 * We do the same analysis as for ranges, but on the smallest range that
69 * completely includes the multirange.
70 */
73{
75 TypeCacheEntry *typcache;
76
77 /* Get information about multirange type; note column might be a domain */
78 typcache = multirange_get_typcache(fcinfo, getBaseType(stats->attrtypid));
79
80 if (stats->attstattarget < 0)
82
84 stats->extra_data = typcache;
85 /* same as in std_typanalyze */
86 stats->minrows = 300 * stats->attstattarget;
87
88 PG_RETURN_BOOL(true);
89}
90
91/*
92 * Comparison function for sorting float8s, used for range lengths.
93 */
94static int
95float8_qsort_cmp(const void *a1, const void *a2, void *arg)
96{
97 const float8 *f1 = (const float8 *) a1;
98 const float8 *f2 = (const float8 *) a2;
99
100 if (*f1 < *f2)
101 return -1;
102 else if (*f1 == *f2)
103 return 0;
104 else
105 return 1;
106}
107
108/*
109 * Comparison function for sorting RangeBounds.
110 */
111static int
112range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
113{
114 RangeBound *b1 = (RangeBound *) a1;
115 RangeBound *b2 = (RangeBound *) a2;
116 TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
117
118 return range_cmp_bounds(typcache, b1, b2);
119}
120
121/*
122 * compute_range_stats() -- compute statistics for a range column
123 */
124static void
126 int samplerows, double totalrows)
127{
128 TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
129 TypeCacheEntry *mltrng_typcache = NULL;
130 bool has_subdiff;
131 int null_cnt = 0;
132 int non_null_cnt = 0;
133 int non_empty_cnt = 0;
134 int empty_cnt = 0;
135 int range_no;
136 int slot_idx;
137 int num_bins = stats->attstattarget;
138 int num_hist;
139 float8 *lengths;
140 RangeBound *lowers,
141 *uppers;
142 double total_width = 0;
143
144 if (typcache->typtype == TYPTYPE_MULTIRANGE)
145 {
146 mltrng_typcache = typcache;
147 typcache = typcache->rngtype;
148 }
149 else
150 Assert(typcache->typtype == TYPTYPE_RANGE);
151 has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
152
153 /* Allocate memory to hold range bounds and lengths of the sample ranges. */
154 lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
155 uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
156 lengths = (float8 *) palloc(sizeof(float8) * samplerows);
157
158 /* Loop over the sample ranges. */
159 for (range_no = 0; range_no < samplerows; range_no++)
160 {
161 Datum value;
162 bool isnull,
163 empty;
164 MultirangeType *multirange;
167 upper;
168 float8 length;
169
171
172 value = fetchfunc(stats, range_no, &isnull);
173 if (isnull)
174 {
175 /* range is null, just count that */
176 null_cnt++;
177 continue;
178 }
179
180 /*
181 * XXX: should we ignore wide values, like std_typanalyze does, to
182 * avoid bloating the statistics table?
183 */
184 total_width += VARSIZE_ANY(DatumGetPointer(value));
185
186 /* Get range and deserialize it for further analysis. */
187 if (mltrng_typcache != NULL)
188 {
189 /* Treat multiranges like a big range without gaps. */
190 multirange = DatumGetMultirangeTypeP(value);
191 if (!MultirangeIsEmpty(multirange))
192 {
193 RangeBound tmp;
194
195 multirange_get_bounds(typcache, multirange, 0,
196 &lower, &tmp);
197 multirange_get_bounds(typcache, multirange,
198 multirange->rangeCount - 1,
199 &tmp, &upper);
200 empty = false;
201 }
202 else
203 {
204 empty = true;
205 }
206 }
207 else
208 {
210 range_deserialize(typcache, range, &lower, &upper, &empty);
211 }
212
213 if (!empty)
214 {
215 /* Remember bounds and length for further usage in histograms */
216 lowers[non_empty_cnt] = lower;
217 uppers[non_empty_cnt] = upper;
218
219 if (lower.infinite || upper.infinite)
220 {
221 /* Length of any kind of an infinite range is infinite */
222 length = get_float8_infinity();
223 }
224 else if (has_subdiff)
225 {
226 /*
227 * For an ordinary range, use subdiff function between upper
228 * and lower bound values.
229 */
231 typcache->rng_collation,
232 upper.val, lower.val));
233 }
234 else
235 {
236 /* Use default value of 1.0 if no subdiff is available. */
237 length = 1.0;
238 }
239 lengths[non_empty_cnt] = length;
240
241 non_empty_cnt++;
242 }
243 else
244 empty_cnt++;
245
246 non_null_cnt++;
247 }
248
249 slot_idx = 0;
250
251 /* We can only compute real stats if we found some non-null values. */
252 if (non_null_cnt > 0)
253 {
254 Datum *bound_hist_values;
255 Datum *length_hist_values;
256 int pos,
257 posfrac,
258 delta,
259 deltafrac,
260 i;
261 MemoryContext old_cxt;
262 float4 *emptyfrac;
263
264 stats->stats_valid = true;
265 /* Do the simple null-frac and width stats */
266 stats->stanullfrac = (double) null_cnt / (double) samplerows;
267 stats->stawidth = total_width / (double) non_null_cnt;
268
269 /* Estimate that non-null values are unique */
270 stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
271
272 /* Must copy the target values into anl_context */
273 old_cxt = MemoryContextSwitchTo(stats->anl_context);
274
275 /*
276 * Generate a bounds histogram slot entry if there are at least two
277 * values.
278 */
279 if (non_empty_cnt >= 2)
280 {
281 /* Sort bound values */
282 qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound),
283 range_bound_qsort_cmp, typcache);
284 qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound),
285 range_bound_qsort_cmp, typcache);
286
287 num_hist = non_empty_cnt;
288 if (num_hist > num_bins)
289 num_hist = num_bins + 1;
290
291 bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
292
293 /*
294 * The object of this loop is to construct ranges from first and
295 * last entries in lowers[] and uppers[] along with evenly-spaced
296 * values in between. So the i'th value is a range of lowers[(i *
297 * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) /
298 * (num_hist - 1)]. But computing that subscript directly risks
299 * integer overflow when the stats target is more than a couple
300 * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos
301 * at each step, tracking the integral and fractional parts of the
302 * sum separately.
303 */
304 delta = (non_empty_cnt - 1) / (num_hist - 1);
305 deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
306 pos = posfrac = 0;
307
308 for (i = 0; i < num_hist; i++)
309 {
310 bound_hist_values[i] = PointerGetDatum(range_serialize(typcache,
311 &lowers[pos],
312 &uppers[pos],
313 false,
314 NULL));
315 pos += delta;
316 posfrac += deltafrac;
317 if (posfrac >= (num_hist - 1))
318 {
319 /* fractional part exceeds 1, carry to integer part */
320 pos++;
321 posfrac -= (num_hist - 1);
322 }
323 }
324
325 stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
326 stats->stavalues[slot_idx] = bound_hist_values;
327 stats->numvalues[slot_idx] = num_hist;
328
329 /* Store ranges even if we're analyzing a multirange column */
330 stats->statypid[slot_idx] = typcache->type_id;
331 stats->statyplen[slot_idx] = typcache->typlen;
332 stats->statypbyval[slot_idx] = typcache->typbyval;
333 stats->statypalign[slot_idx] = typcache->typalign;
334
335 slot_idx++;
336 }
337
338 /*
339 * Generate a length histogram slot entry if there are at least two
340 * values.
341 */
342 if (non_empty_cnt >= 2)
343 {
344 /*
345 * Ascending sort of range lengths for further filling of
346 * histogram
347 */
348 qsort_interruptible(lengths, non_empty_cnt, sizeof(float8),
349 float8_qsort_cmp, NULL);
350
351 num_hist = non_empty_cnt;
352 if (num_hist > num_bins)
353 num_hist = num_bins + 1;
354
355 length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
356
357 /*
358 * The object of this loop is to copy the first and last lengths[]
359 * entries along with evenly-spaced values in between. So the i'th
360 * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
361 * computing that subscript directly risks integer overflow when
362 * the stats target is more than a couple thousand. Instead we
363 * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
364 * the integral and fractional parts of the sum separately.
365 */
366 delta = (non_empty_cnt - 1) / (num_hist - 1);
367 deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
368 pos = posfrac = 0;
369
370 for (i = 0; i < num_hist; i++)
371 {
372 length_hist_values[i] = Float8GetDatum(lengths[pos]);
373 pos += delta;
374 posfrac += deltafrac;
375 if (posfrac >= (num_hist - 1))
376 {
377 /* fractional part exceeds 1, carry to integer part */
378 pos++;
379 posfrac -= (num_hist - 1);
380 }
381 }
382 }
383 else
384 {
385 /*
386 * Even when we don't create the histogram, store an empty array
387 * to mean "no histogram". We can't just leave stavalues NULL,
388 * because get_attstatsslot() errors if you ask for stavalues, and
389 * it's NULL. We'll still store the empty fraction in stanumbers.
390 */
391 length_hist_values = palloc(0);
392 num_hist = 0;
393 }
394 stats->staop[slot_idx] = Float8LessOperator;
395 stats->stacoll[slot_idx] = InvalidOid;
396 stats->stavalues[slot_idx] = length_hist_values;
397 stats->numvalues[slot_idx] = num_hist;
398 stats->statypid[slot_idx] = FLOAT8OID;
399 stats->statyplen[slot_idx] = sizeof(float8);
400 stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL;
401 stats->statypalign[slot_idx] = 'd';
402
403 /* Store the fraction of empty ranges */
404 emptyfrac = (float4 *) palloc(sizeof(float4));
405 *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
406 stats->stanumbers[slot_idx] = emptyfrac;
407 stats->numnumbers[slot_idx] = 1;
408
409 stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
410 slot_idx++;
411
412 MemoryContextSwitchTo(old_cxt);
413 }
414 else if (null_cnt > 0)
415 {
416 /* We found only nulls; assume the column is entirely null */
417 stats->stats_valid = true;
418 stats->stanullfrac = 1.0;
419 stats->stawidth = 0; /* "unknown" */
420 stats->stadistinct = 0.0; /* "unknown" */
421 }
422
423 /*
424 * We don't need to bother cleaning up any of our temporary palloc's. The
425 * hashtable should also go away, as it used a child memory context.
426 */
427}
#define Assert(condition)
Definition: c.h:815
double float8
Definition: c.h:587
#define FLOAT8PASSBYVAL
Definition: c.h:592
float float4
Definition: c.h:586
#define OidIsValid(objectId)
Definition: c.h:732
int default_statistics_target
Definition: analyze.c:71
static float8 get_float8_infinity(void)
Definition: float.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1149
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1816
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static const FormData_pg_attribute a1
Definition: heap.c:143
static const FormData_pg_attribute a2
Definition: heap.c:156
static struct @162 value
int i
Definition: isn.c:72
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:76
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2521
void * palloc(Size size)
Definition: mcxt.c:1317
void multirange_get_bounds(TypeCacheEntry *rangetyp, const MultirangeType *multirange, uint32 i, RangeBound *lower, RangeBound *upper)
TypeCacheEntry * multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
#define MultirangeIsEmpty(mr)
static MultirangeType * DatumGetMultirangeTypeP(Datum X)
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void * arg
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:327
uintptr_t Datum
Definition: postgres.h:69
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:499
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:317
#define InvalidOid
Definition: postgres_ext.h:37
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:2016
RangeType * range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty, struct Node *escontext)
Definition: rangetypes.c:1727
void range_deserialize(TypeCacheEntry *typcache, const RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1856
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1703
static RangeType * DatumGetRangeTypeP(Datum X)
Definition: rangetypes.h:73
Datum multirange_typanalyze(PG_FUNCTION_ARGS)
static int float8_qsort_cmp(const void *a1, const void *a2, void *arg)
static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
Datum range_typanalyze(PG_FUNCTION_ARGS)
static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
int f1[ARRAY_SIZE]
Definition: sql-declare.c:113
int f2[ARRAY_SIZE]
Definition: sql-declare.c:116
Oid fn_oid
Definition: fmgr.h:59
Oid rng_collation
Definition: typcache.h:100
char typalign
Definition: typcache.h:41
char typtype
Definition: typcache.h:43
struct TypeCacheEntry * rngtype
Definition: typcache.h:108
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:103
bool typbyval
Definition: typcache.h:40
int16 typlen
Definition: typcache.h:39
bool stats_valid
Definition: vacuum.h:144
float4 stanullfrac
Definition: vacuum.h:145
int16 stakind[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:148
MemoryContext anl_context
Definition: vacuum.h:130
Oid statypid[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:162
Oid staop[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:149
Oid stacoll[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:150
char statypalign[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:165
float4 * stanumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:152
Oid attrtypid
Definition: vacuum.h:126
int minrows
Definition: vacuum.h:137
int attstattarget
Definition: vacuum.h:125
int32 stawidth
Definition: vacuum.h:146
void * extra_data
Definition: vacuum.h:138
bool statypbyval[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:164
int16 statyplen[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:163
int numvalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:153
Datum * stavalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:154
float4 stadistinct
Definition: vacuum.h:147
int numnumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:151
AnalyzeAttrComputeStatsFunc compute_stats
Definition: vacuum.h:136
void vacuum_delay_point(void)
Definition: vacuum.c:2361
Datum(* AnalyzeAttrFetchFunc)(VacAttrStatsP stats, int rownum, bool *isNull)
Definition: vacuum.h:108
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311