PostgreSQL Source Code  git master
rangetypes_typanalyze.c File Reference
#include "postgres.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
#include "utils/multirangetypes.h"
Include dependency graph for rangetypes_typanalyze.c:

Go to the source code of this file.

Functions

static int float8_qsort_cmp (const void *a1, const void *a2)
 
static int range_bound_qsort_cmp (const void *a1, const void *a2, void *arg)
 
static void compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
 
Datum range_typanalyze (PG_FUNCTION_ARGS)
 
Datum multirange_typanalyze (PG_FUNCTION_ARGS)
 

Function Documentation

◆ compute_range_stats()

static void compute_range_stats ( VacAttrStats stats,
AnalyzeAttrFetchFunc  fetchfunc,
int  samplerows,
double  totalrows 
)
static

Definition at line 126 of file rangetypes_typanalyze.c.

References VacAttrStats::anl_context, Assert, VacAttrStats::attr, DatumGetFloat8, DatumGetMultirangeTypeP, DatumGetPointer, DatumGetRangeTypeP, VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FLOAT8PASSBYVAL, FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, RangeBound::infinite, InvalidOid, lower(), MemoryContextSwitchTo(), multirange_get_bounds(), MultirangeIsEmpty, VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum, qsort, qsort_arg(), range(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), MultirangeType::rangeCount, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, TypeCacheEntry::rngtype, VacAttrStats::stacoll, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, TypeCacheEntry::typalign, TypeCacheEntry::typbyval, TypeCacheEntry::type_id, TypeCacheEntry::typlen, TypeCacheEntry::typtype, upper(), vacuum_delay_point(), RangeBound::val, value, and VARSIZE_ANY.

Referenced by multirange_typanalyze(), and range_typanalyze().

128 {
129  TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
130  TypeCacheEntry *mltrng_typcache = NULL;
131  bool has_subdiff;
132  int null_cnt = 0;
133  int non_null_cnt = 0;
134  int non_empty_cnt = 0;
135  int empty_cnt = 0;
136  int range_no;
137  int slot_idx;
138  int num_bins = stats->attr->attstattarget;
139  int num_hist;
140  float8 *lengths;
141  RangeBound *lowers,
142  *uppers;
143  double total_width = 0;
144 
145  if (typcache->typtype == TYPTYPE_MULTIRANGE)
146  {
147  mltrng_typcache = typcache;
148  typcache = typcache->rngtype;
149  }
150  else
151  Assert(typcache->typtype == TYPTYPE_RANGE);
152  has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
153 
154  /* Allocate memory to hold range bounds and lengths of the sample ranges. */
155  lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
156  uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
157  lengths = (float8 *) palloc(sizeof(float8) * samplerows);
158 
159  /* Loop over the sample ranges. */
160  for (range_no = 0; range_no < samplerows; range_no++)
161  {
162  Datum value;
163  bool isnull,
164  empty;
165  MultirangeType *multirange;
166  RangeType *range;
168  upper;
169  float8 length;
170 
172 
173  value = fetchfunc(stats, range_no, &isnull);
174  if (isnull)
175  {
176  /* range is null, just count that */
177  null_cnt++;
178  continue;
179  }
180 
181  /*
182  * XXX: should we ignore wide values, like std_typanalyze does, to
183  * avoid bloating the statistics table?
184  */
185  total_width += VARSIZE_ANY(DatumGetPointer(value));
186 
187  /* Get range and deserialize it for further analysis. */
188  if (mltrng_typcache != NULL)
189  {
190  /* Treat multiranges like a big range without gaps. */
191  multirange = DatumGetMultirangeTypeP(value);
192  if (!MultirangeIsEmpty(multirange))
193  {
194  RangeBound tmp;
195 
196  multirange_get_bounds(typcache, multirange, 0,
197  &lower, &tmp);
198  multirange_get_bounds(typcache, multirange,
199  multirange->rangeCount - 1,
200  &tmp, &upper);
201  empty = false;
202  }
203  else
204  {
205  empty = true;
206  }
207  }
208  else
209  {
210  range = DatumGetRangeTypeP(value);
211  range_deserialize(typcache, range, &lower, &upper, &empty);
212  }
213 
214  if (!empty)
215  {
216  /* Remember bounds and length for further usage in histograms */
217  lowers[non_empty_cnt] = lower;
218  uppers[non_empty_cnt] = upper;
219 
220  if (lower.infinite || upper.infinite)
221  {
222  /* Length of any kind of an infinite range is infinite */
223  length = get_float8_infinity();
224  }
225  else if (has_subdiff)
226  {
227  /*
228  * For an ordinary range, use subdiff function between upper
229  * and lower bound values.
230  */
232  typcache->rng_collation,
233  upper.val, lower.val));
234  }
235  else
236  {
237  /* Use default value of 1.0 if no subdiff is available. */
238  length = 1.0;
239  }
240  lengths[non_empty_cnt] = length;
241 
242  non_empty_cnt++;
243  }
244  else
245  empty_cnt++;
246 
247  non_null_cnt++;
248  }
249 
250  slot_idx = 0;
251 
252  /* We can only compute real stats if we found some non-null values. */
253  if (non_null_cnt > 0)
254  {
255  Datum *bound_hist_values;
256  Datum *length_hist_values;
257  int pos,
258  posfrac,
259  delta,
260  deltafrac,
261  i;
262  MemoryContext old_cxt;
263  float4 *emptyfrac;
264 
265  stats->stats_valid = true;
266  /* Do the simple null-frac and width stats */
267  stats->stanullfrac = (double) null_cnt / (double) samplerows;
268  stats->stawidth = total_width / (double) non_null_cnt;
269 
270  /* Estimate that non-null values are unique */
271  stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
272 
273  /* Must copy the target values into anl_context */
274  old_cxt = MemoryContextSwitchTo(stats->anl_context);
275 
276  /*
277  * Generate a bounds histogram slot entry if there are at least two
278  * values.
279  */
280  if (non_empty_cnt >= 2)
281  {
282  /* Sort bound values */
283  qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
284  range_bound_qsort_cmp, typcache);
285  qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
286  range_bound_qsort_cmp, typcache);
287 
288  num_hist = non_empty_cnt;
289  if (num_hist > num_bins)
290  num_hist = num_bins + 1;
291 
292  bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
293 
294  /*
295  * The object of this loop is to construct ranges from first and
296  * last entries in lowers[] and uppers[] along with evenly-spaced
297  * values in between. So the i'th value is a range of lowers[(i *
298  * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) /
299  * (num_hist - 1)]. But computing that subscript directly risks
300  * integer overflow when the stats target is more than a couple
301  * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos
302  * at each step, tracking the integral and fractional parts of the
303  * sum separately.
304  */
305  delta = (non_empty_cnt - 1) / (num_hist - 1);
306  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
307  pos = posfrac = 0;
308 
309  for (i = 0; i < num_hist; i++)
310  {
311  bound_hist_values[i] = PointerGetDatum(range_serialize(typcache,
312  &lowers[pos],
313  &uppers[pos],
314  false));
315  pos += delta;
316  posfrac += deltafrac;
317  if (posfrac >= (num_hist - 1))
318  {
319  /* fractional part exceeds 1, carry to integer part */
320  pos++;
321  posfrac -= (num_hist - 1);
322  }
323  }
324 
325  stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
326  stats->stavalues[slot_idx] = bound_hist_values;
327  stats->numvalues[slot_idx] = num_hist;
328 
329  /* Store ranges even if we're analyzing a multirange column */
330  stats->statypid[slot_idx] = typcache->type_id;
331  stats->statyplen[slot_idx] = typcache->typlen;
332  stats->statypbyval[slot_idx] = typcache->typbyval;
333  stats->statypalign[slot_idx] = typcache->typalign;
334 
335  slot_idx++;
336  }
337 
338  /*
339  * Generate a length histogram slot entry if there are at least two
340  * values.
341  */
342  if (non_empty_cnt >= 2)
343  {
344  /*
345  * Ascending sort of range lengths for further filling of
346  * histogram
347  */
348  qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);
349 
350  num_hist = non_empty_cnt;
351  if (num_hist > num_bins)
352  num_hist = num_bins + 1;
353 
354  length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
355 
356  /*
357  * The object of this loop is to copy the first and last lengths[]
358  * entries along with evenly-spaced values in between. So the i'th
359  * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
360  * computing that subscript directly risks integer overflow when
361  * the stats target is more than a couple thousand. Instead we
362  * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
363  * the integral and fractional parts of the sum separately.
364  */
365  delta = (non_empty_cnt - 1) / (num_hist - 1);
366  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
367  pos = posfrac = 0;
368 
369  for (i = 0; i < num_hist; i++)
370  {
371  length_hist_values[i] = Float8GetDatum(lengths[pos]);
372  pos += delta;
373  posfrac += deltafrac;
374  if (posfrac >= (num_hist - 1))
375  {
376  /* fractional part exceeds 1, carry to integer part */
377  pos++;
378  posfrac -= (num_hist - 1);
379  }
380  }
381  }
382  else
383  {
384  /*
385  * Even when we don't create the histogram, store an empty array
386  * to mean "no histogram". We can't just leave stavalues NULL,
387  * because get_attstatsslot() errors if you ask for stavalues, and
388  * it's NULL. We'll still store the empty fraction in stanumbers.
389  */
390  length_hist_values = palloc(0);
391  num_hist = 0;
392  }
393  stats->staop[slot_idx] = Float8LessOperator;
394  stats->stacoll[slot_idx] = InvalidOid;
395  stats->stavalues[slot_idx] = length_hist_values;
396  stats->numvalues[slot_idx] = num_hist;
397  stats->statypid[slot_idx] = FLOAT8OID;
398  stats->statyplen[slot_idx] = sizeof(float8);
399  stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL;
400  stats->statypalign[slot_idx] = 'd';
401 
402  /* Store the fraction of empty ranges */
403  emptyfrac = (float4 *) palloc(sizeof(float4));
404  *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
405  stats->stanumbers[slot_idx] = emptyfrac;
406  stats->numnumbers[slot_idx] = 1;
407 
408  stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
409  slot_idx++;
410 
411  MemoryContextSwitchTo(old_cxt);
412  }
413  else if (null_cnt > 0)
414  {
415  /* We found only nulls; assume the column is entirely null */
416  stats->stats_valid = true;
417  stats->stanullfrac = 1.0;
418  stats->stawidth = 0; /* "unknown" */
419  stats->stadistinct = 0.0; /* "unknown" */
420  }
421 
422  /*
423  * We don't need to bother cleaning up any of our temporary palloc's. The
424  * hashtable should also go away, as it used a child memory context.
425  */
426 }
static float8 get_float8_infinity(void)
Definition: float.h:93
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:46
#define PointerGetDatum(X)
Definition: postgres.h:600
Datum * stavalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:152
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Datum val
Definition: rangetypes.h:64
bool statypbyval[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:162
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:77
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1148
static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
int16 typlen
Definition: typcache.h:39
bool typbyval
Definition: typcache.h:40
#define OidIsValid(objectId)
Definition: c.h:710
char statypalign[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:163
Form_pg_attribute attr
Definition: vacuum.h:123
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1706
static int float8_qsort_cmp(const void *a1, const void *a2)
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:102
double float8
Definition: c.h:565
int32 stawidth
Definition: vacuum.h:144
Oid stacoll[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:148
#define DatumGetMultirangeTypeP(X)
int numnumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:149
void multirange_get_bounds(TypeCacheEntry *rangetyp, const MultirangeType *multirange, uint32 i, RangeBound *lower, RangeBound *upper)
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
float4 stanullfrac
Definition: vacuum.h:143
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Oid staop[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:147
bool stats_valid
Definition: vacuum.h:142
float float4
Definition: c.h:564
#define DatumGetFloat8(X)
Definition: postgres.h:758
uintptr_t Datum
Definition: postgres.h:411
int16 stakind[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:146
#define VARSIZE_ANY(PTR)
Definition: postgres.h:348
Oid statypid[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:160
#define InvalidOid
Definition: postgres_ext.h:36
static struct @143 value
Oid fn_oid
Definition: fmgr.h:59
char typtype
Definition: typcache.h:43
#define FLOAT8PASSBYVAL
Definition: c.h:570
#define Assert(condition)
Definition: c.h:804
float4 * stanumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:150
#define DatumGetRangeTypeP(X)
Definition: rangetypes.h:73
bool infinite
Definition: rangetypes.h:65
Oid rng_collation
Definition: typcache.h:99
#define MultirangeIsEmpty(mr)
struct TypeCacheEntry * rngtype
Definition: typcache.h:107
MemoryContext anl_context
Definition: vacuum.h:128
#define DatumGetPointer(X)
Definition: postgres.h:593
static Ranges * range_deserialize(int maxvalues, SerializedRanges *range)
int numvalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:151
char typalign
Definition: typcache.h:41
void * palloc(Size size)
Definition: mcxt.c:1062
static SerializedRanges * range_serialize(Ranges *range)
int16 statyplen[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:161
int i
void * extra_data
Definition: vacuum.h:136
#define qsort(a, b, c, d)
Definition: port.h:504
void vacuum_delay_point(void)
Definition: vacuum.c:2149
float4 stadistinct
Definition: vacuum.h:145

◆ float8_qsort_cmp()

static int float8_qsort_cmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 96 of file rangetypes_typanalyze.c.

References f1, and f2.

Referenced by compute_range_stats().

97 {
98  const float8 *f1 = (const float8 *) a1;
99  const float8 *f2 = (const float8 *) a2;
100 
101  if (*f1 < *f2)
102  return -1;
103  else if (*f1 == *f2)
104  return 0;
105  else
106  return 1;
107 }
static const FormData_pg_attribute a2
Definition: heap.c:167
double float8
Definition: c.h:565
int f2[ARRAY_SIZE]
Definition: sql-declare.c:116
int f1[ARRAY_SIZE]
Definition: sql-declare.c:113
static const FormData_pg_attribute a1
Definition: heap.c:153

◆ multirange_typanalyze()

Datum multirange_typanalyze ( PG_FUNCTION_ARGS  )

Definition at line 72 of file rangetypes_typanalyze.c.

References VacAttrStats::attr, VacAttrStats::attrtypid, compute_range_stats(), VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, getBaseType(), VacAttrStats::minrows, multirange_get_typcache(), PG_GETARG_POINTER, and PG_RETURN_BOOL.

73 {
75  TypeCacheEntry *typcache;
76  Form_pg_attribute attr = stats->attr;
77 
78  /* Get information about multirange type; note column might be a domain */
79  typcache = multirange_get_typcache(fcinfo, getBaseType(stats->attrtypid));
80 
81  if (attr->attstattarget < 0)
82  attr->attstattarget = default_statistics_target;
83 
85  stats->extra_data = typcache;
86  /* same as in std_typanalyze */
87  stats->minrows = 300 * attr->attstattarget;
88 
89  PG_RETURN_BOOL(true);
90 }
int minrows
Definition: vacuum.h:135
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
Form_pg_attribute attr
Definition: vacuum.h:123
Oid attrtypid
Definition: vacuum.h:124
TypeCacheEntry * multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:207
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
AnalyzeAttrComputeStatsFunc compute_stats
Definition: vacuum.h:134
void * extra_data
Definition: vacuum.h:136
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2468
int default_statistics_target
Definition: analyze.c:82

◆ range_bound_qsort_cmp()

static int range_bound_qsort_cmp ( const void *  a1,
const void *  a2,
void *  arg 
)
static

Definition at line 113 of file rangetypes_typanalyze.c.

References range_cmp_bounds().

Referenced by compute_range_stats().

114 {
115  RangeBound *b1 = (RangeBound *) a1;
116  RangeBound *b2 = (RangeBound *) a2;
117  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
118 
119  return range_cmp_bounds(typcache, b1, b2);
120 }
static const FormData_pg_attribute a2
Definition: heap.c:167
void * arg
static const FormData_pg_attribute a1
Definition: heap.c:153
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:1924

◆ range_typanalyze()

Datum range_typanalyze ( PG_FUNCTION_ARGS  )

Definition at line 45 of file rangetypes_typanalyze.c.

References VacAttrStats::attr, VacAttrStats::attrtypid, compute_range_stats(), VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, getBaseType(), VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().

46 {
48  TypeCacheEntry *typcache;
49  Form_pg_attribute attr = stats->attr;
50 
51  /* Get information about range type; note column might be a domain */
52  typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid));
53 
54  if (attr->attstattarget < 0)
55  attr->attstattarget = default_statistics_target;
56 
58  stats->extra_data = typcache;
59  /* same as in std_typanalyze */
60  stats->minrows = 300 * attr->attstattarget;
61 
62  PG_RETURN_BOOL(true);
63 }
int minrows
Definition: vacuum.h:135
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1635
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
Form_pg_attribute attr
Definition: vacuum.h:123
Oid attrtypid
Definition: vacuum.h:124
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:207
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
AnalyzeAttrComputeStatsFunc compute_stats
Definition: vacuum.h:134
void * extra_data
Definition: vacuum.h:136
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2468
int default_statistics_target
Definition: analyze.c:82