PostgreSQL Source Code  git master
rangetypes_typanalyze.c File Reference
#include "postgres.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
#include "utils/multirangetypes.h"
#include "varatt.h"
Include dependency graph for rangetypes_typanalyze.c:

Go to the source code of this file.

Functions

static int float8_qsort_cmp (const void *a1, const void *a2, void *arg)
 
static int range_bound_qsort_cmp (const void *a1, const void *a2, void *arg)
 
static void compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
 
Datum range_typanalyze (PG_FUNCTION_ARGS)
 
Datum multirange_typanalyze (PG_FUNCTION_ARGS)
 

Function Documentation

◆ compute_range_stats()

static void compute_range_stats ( VacAttrStats stats,
AnalyzeAttrFetchFunc  fetchfunc,
int  samplerows,
double  totalrows 
)
static

Definition at line 127 of file rangetypes_typanalyze.c.

129 {
130  TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
131  TypeCacheEntry *mltrng_typcache = NULL;
132  bool has_subdiff;
133  int null_cnt = 0;
134  int non_null_cnt = 0;
135  int non_empty_cnt = 0;
136  int empty_cnt = 0;
137  int range_no;
138  int slot_idx;
139  int num_bins = stats->attr->attstattarget;
140  int num_hist;
141  float8 *lengths;
142  RangeBound *lowers,
143  *uppers;
144  double total_width = 0;
145 
146  if (typcache->typtype == TYPTYPE_MULTIRANGE)
147  {
148  mltrng_typcache = typcache;
149  typcache = typcache->rngtype;
150  }
151  else
152  Assert(typcache->typtype == TYPTYPE_RANGE);
153  has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
154 
155  /* Allocate memory to hold range bounds and lengths of the sample ranges. */
156  lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
157  uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
158  lengths = (float8 *) palloc(sizeof(float8) * samplerows);
159 
160  /* Loop over the sample ranges. */
161  for (range_no = 0; range_no < samplerows; range_no++)
162  {
163  Datum value;
164  bool isnull,
165  empty;
166  MultirangeType *multirange;
167  RangeType *range;
169  upper;
170  float8 length;
171 
173 
174  value = fetchfunc(stats, range_no, &isnull);
175  if (isnull)
176  {
177  /* range is null, just count that */
178  null_cnt++;
179  continue;
180  }
181 
182  /*
183  * XXX: should we ignore wide values, like std_typanalyze does, to
184  * avoid bloating the statistics table?
185  */
186  total_width += VARSIZE_ANY(DatumGetPointer(value));
187 
188  /* Get range and deserialize it for further analysis. */
189  if (mltrng_typcache != NULL)
190  {
191  /* Treat multiranges like a big range without gaps. */
192  multirange = DatumGetMultirangeTypeP(value);
193  if (!MultirangeIsEmpty(multirange))
194  {
195  RangeBound tmp;
196 
197  multirange_get_bounds(typcache, multirange, 0,
198  &lower, &tmp);
199  multirange_get_bounds(typcache, multirange,
200  multirange->rangeCount - 1,
201  &tmp, &upper);
202  empty = false;
203  }
204  else
205  {
206  empty = true;
207  }
208  }
209  else
210  {
212  range_deserialize(typcache, range, &lower, &upper, &empty);
213  }
214 
215  if (!empty)
216  {
217  /* Remember bounds and length for further usage in histograms */
218  lowers[non_empty_cnt] = lower;
219  uppers[non_empty_cnt] = upper;
220 
221  if (lower.infinite || upper.infinite)
222  {
223  /* Length of any kind of an infinite range is infinite */
224  length = get_float8_infinity();
225  }
226  else if (has_subdiff)
227  {
228  /*
229  * For an ordinary range, use subdiff function between upper
230  * and lower bound values.
231  */
233  typcache->rng_collation,
234  upper.val, lower.val));
235  }
236  else
237  {
238  /* Use default value of 1.0 if no subdiff is available. */
239  length = 1.0;
240  }
241  lengths[non_empty_cnt] = length;
242 
243  non_empty_cnt++;
244  }
245  else
246  empty_cnt++;
247 
248  non_null_cnt++;
249  }
250 
251  slot_idx = 0;
252 
253  /* We can only compute real stats if we found some non-null values. */
254  if (non_null_cnt > 0)
255  {
256  Datum *bound_hist_values;
257  Datum *length_hist_values;
258  int pos,
259  posfrac,
260  delta,
261  deltafrac,
262  i;
263  MemoryContext old_cxt;
264  float4 *emptyfrac;
265 
266  stats->stats_valid = true;
267  /* Do the simple null-frac and width stats */
268  stats->stanullfrac = (double) null_cnt / (double) samplerows;
269  stats->stawidth = total_width / (double) non_null_cnt;
270 
271  /* Estimate that non-null values are unique */
272  stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
273 
274  /* Must copy the target values into anl_context */
275  old_cxt = MemoryContextSwitchTo(stats->anl_context);
276 
277  /*
278  * Generate a bounds histogram slot entry if there are at least two
279  * values.
280  */
281  if (non_empty_cnt >= 2)
282  {
283  /* Sort bound values */
284  qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound),
285  range_bound_qsort_cmp, typcache);
286  qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound),
287  range_bound_qsort_cmp, typcache);
288 
289  num_hist = non_empty_cnt;
290  if (num_hist > num_bins)
291  num_hist = num_bins + 1;
292 
293  bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
294 
295  /*
296  * The object of this loop is to construct ranges from first and
297  * last entries in lowers[] and uppers[] along with evenly-spaced
298  * values in between. So the i'th value is a range of lowers[(i *
299  * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) /
300  * (num_hist - 1)]. But computing that subscript directly risks
301  * integer overflow when the stats target is more than a couple
302  * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos
303  * at each step, tracking the integral and fractional parts of the
304  * sum separately.
305  */
306  delta = (non_empty_cnt - 1) / (num_hist - 1);
307  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
308  pos = posfrac = 0;
309 
310  for (i = 0; i < num_hist; i++)
311  {
312  bound_hist_values[i] = PointerGetDatum(range_serialize(typcache,
313  &lowers[pos],
314  &uppers[pos],
315  false,
316  NULL));
317  pos += delta;
318  posfrac += deltafrac;
319  if (posfrac >= (num_hist - 1))
320  {
321  /* fractional part exceeds 1, carry to integer part */
322  pos++;
323  posfrac -= (num_hist - 1);
324  }
325  }
326 
327  stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
328  stats->stavalues[slot_idx] = bound_hist_values;
329  stats->numvalues[slot_idx] = num_hist;
330 
331  /* Store ranges even if we're analyzing a multirange column */
332  stats->statypid[slot_idx] = typcache->type_id;
333  stats->statyplen[slot_idx] = typcache->typlen;
334  stats->statypbyval[slot_idx] = typcache->typbyval;
335  stats->statypalign[slot_idx] = typcache->typalign;
336 
337  slot_idx++;
338  }
339 
340  /*
341  * Generate a length histogram slot entry if there are at least two
342  * values.
343  */
344  if (non_empty_cnt >= 2)
345  {
346  /*
347  * Ascending sort of range lengths for further filling of
348  * histogram
349  */
350  qsort_interruptible(lengths, non_empty_cnt, sizeof(float8),
351  float8_qsort_cmp, NULL);
352 
353  num_hist = non_empty_cnt;
354  if (num_hist > num_bins)
355  num_hist = num_bins + 1;
356 
357  length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
358 
359  /*
360  * The object of this loop is to copy the first and last lengths[]
361  * entries along with evenly-spaced values in between. So the i'th
362  * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
363  * computing that subscript directly risks integer overflow when
364  * the stats target is more than a couple thousand. Instead we
365  * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
366  * the integral and fractional parts of the sum separately.
367  */
368  delta = (non_empty_cnt - 1) / (num_hist - 1);
369  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
370  pos = posfrac = 0;
371 
372  for (i = 0; i < num_hist; i++)
373  {
374  length_hist_values[i] = Float8GetDatum(lengths[pos]);
375  pos += delta;
376  posfrac += deltafrac;
377  if (posfrac >= (num_hist - 1))
378  {
379  /* fractional part exceeds 1, carry to integer part */
380  pos++;
381  posfrac -= (num_hist - 1);
382  }
383  }
384  }
385  else
386  {
387  /*
388  * Even when we don't create the histogram, store an empty array
389  * to mean "no histogram". We can't just leave stavalues NULL,
390  * because get_attstatsslot() errors if you ask for stavalues, and
391  * it's NULL. We'll still store the empty fraction in stanumbers.
392  */
393  length_hist_values = palloc(0);
394  num_hist = 0;
395  }
396  stats->staop[slot_idx] = Float8LessOperator;
397  stats->stacoll[slot_idx] = InvalidOid;
398  stats->stavalues[slot_idx] = length_hist_values;
399  stats->numvalues[slot_idx] = num_hist;
400  stats->statypid[slot_idx] = FLOAT8OID;
401  stats->statyplen[slot_idx] = sizeof(float8);
402  stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL;
403  stats->statypalign[slot_idx] = 'd';
404 
405  /* Store the fraction of empty ranges */
406  emptyfrac = (float4 *) palloc(sizeof(float4));
407  *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
408  stats->stanumbers[slot_idx] = emptyfrac;
409  stats->numnumbers[slot_idx] = 1;
410 
411  stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
412  slot_idx++;
413 
414  MemoryContextSwitchTo(old_cxt);
415  }
416  else if (null_cnt > 0)
417  {
418  /* We found only nulls; assume the column is entirely null */
419  stats->stats_valid = true;
420  stats->stanullfrac = 1.0;
421  stats->stawidth = 0; /* "unknown" */
422  stats->stadistinct = 0.0; /* "unknown" */
423  }
424 
425  /*
426  * We don't need to bother cleaning up any of our temporary palloc's. The
427  * hashtable should also go away, as it used a child memory context.
428  */
429 }
double float8
Definition: c.h:614
#define FLOAT8PASSBYVAL
Definition: c.h:619
float float4
Definition: c.h:613
#define OidIsValid(objectId)
Definition: c.h:759
static float8 get_float8_infinity(void)
Definition: float.h:94
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1120
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1787
static struct @147 value
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
void * palloc(Size size)
Definition: mcxt.c:1226
void multirange_get_bounds(TypeCacheEntry *rangetyp, const MultirangeType *multirange, uint32 i, RangeBound *lower, RangeBound *upper)
#define MultirangeIsEmpty(mr)
static MultirangeType * DatumGetMultirangeTypeP(Datum X)
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:49
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:80
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static float8 DatumGetFloat8(Datum X)
Definition: postgres.h:494
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
#define InvalidOid
Definition: postgres_ext.h:36
void range_deserialize(TypeCacheEntry *typcache, const RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1849
RangeType * range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty, struct Node *escontext)
Definition: rangetypes.c:1720
static RangeType * DatumGetRangeTypeP(Datum X)
Definition: rangetypes.h:73
static int float8_qsort_cmp(const void *a1, const void *a2, void *arg)
static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:412
Oid fn_oid
Definition: fmgr.h:59
Oid rng_collation
Definition: typcache.h:99
char typalign
Definition: typcache.h:41
char typtype
Definition: typcache.h:43
struct TypeCacheEntry * rngtype
Definition: typcache.h:107
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:102
bool typbyval
Definition: typcache.h:40
int16 typlen
Definition: typcache.h:39
bool stats_valid
Definition: vacuum.h:147
float4 stanullfrac
Definition: vacuum.h:148
int16 stakind[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:151
MemoryContext anl_context
Definition: vacuum.h:133
Oid statypid[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:165
Oid staop[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:152
Oid stacoll[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:153
char statypalign[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:168
float4 * stanumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:155
Form_pg_attribute attr
Definition: vacuum.h:128
int32 stawidth
Definition: vacuum.h:149
void * extra_data
Definition: vacuum.h:141
bool statypbyval[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:167
int16 statyplen[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:166
int numvalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:156
Datum * stavalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:157
float4 stadistinct
Definition: vacuum.h:150
int numnumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:154
void vacuum_delay_point(void)
Definition: vacuum.c:2326
#define VARSIZE_ANY(PTR)
Definition: varatt.h:311

References VacAttrStats::anl_context, Assert(), VacAttrStats::attr, DatumGetFloat8(), DatumGetMultirangeTypeP(), DatumGetPointer(), DatumGetRangeTypeP(), VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FLOAT8PASSBYVAL, FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, if(), InvalidOid, lower(), MemoryContextSwitchTo(), multirange_get_bounds(), MultirangeIsEmpty, VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum(), qsort_interruptible(), range(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), MultirangeType::rangeCount, TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, TypeCacheEntry::rngtype, VacAttrStats::stacoll, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, TypeCacheEntry::typalign, TypeCacheEntry::typbyval, TypeCacheEntry::type_id, TypeCacheEntry::typlen, TypeCacheEntry::typtype, upper(), vacuum_delay_point(), value, and VARSIZE_ANY.

Referenced by multirange_typanalyze(), and range_typanalyze().

◆ float8_qsort_cmp()

static int float8_qsort_cmp ( const void *  a1,
const void *  a2,
void *  arg 
)
static

Definition at line 97 of file rangetypes_typanalyze.c.

98 {
99  const float8 *f1 = (const float8 *) a1;
100  const float8 *f2 = (const float8 *) a2;
101 
102  if (*f1 < *f2)
103  return -1;
104  else if (*f1 == *f2)
105  return 0;
106  else
107  return 1;
108 }
static const FormData_pg_attribute a1
Definition: heap.c:141
static const FormData_pg_attribute a2
Definition: heap.c:155
int f1[ARRAY_SIZE]
Definition: sql-declare.c:113
int f2[ARRAY_SIZE]
Definition: sql-declare.c:116

References a1, a2, f1, and f2.

Referenced by compute_range_stats().

◆ multirange_typanalyze()

Datum multirange_typanalyze ( PG_FUNCTION_ARGS  )

Definition at line 73 of file rangetypes_typanalyze.c.

74 {
76  TypeCacheEntry *typcache;
77  Form_pg_attribute attr = stats->attr;
78 
79  /* Get information about multirange type; note column might be a domain */
80  typcache = multirange_get_typcache(fcinfo, getBaseType(stats->attrtypid));
81 
82  if (attr->attstattarget < 0)
83  attr->attstattarget = default_statistics_target;
84 
86  stats->extra_data = typcache;
87  /* same as in std_typanalyze */
88  stats->minrows = 300 * attr->attstattarget;
89 
90  PG_RETURN_BOOL(true);
91 }
int default_statistics_target
Definition: analyze.c:83
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2479
TypeCacheEntry * multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:209
static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
Oid attrtypid
Definition: vacuum.h:129
int minrows
Definition: vacuum.h:140
AnalyzeAttrComputeStatsFunc compute_stats
Definition: vacuum.h:139

References VacAttrStats::attr, VacAttrStats::attrtypid, compute_range_stats(), VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, getBaseType(), VacAttrStats::minrows, multirange_get_typcache(), PG_GETARG_POINTER, and PG_RETURN_BOOL.

◆ range_bound_qsort_cmp()

static int range_bound_qsort_cmp ( const void *  a1,
const void *  a2,
void *  arg 
)
static

Definition at line 114 of file rangetypes_typanalyze.c.

115 {
116  RangeBound *b1 = (RangeBound *) a1;
117  RangeBound *b2 = (RangeBound *) a2;
118  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
119 
120  return range_cmp_bounds(typcache, b1, b2);
121 }
void * arg
int range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
Definition: rangetypes.c:2009

References a1, a2, arg, and range_cmp_bounds().

Referenced by compute_range_stats().

◆ range_typanalyze()

Datum range_typanalyze ( PG_FUNCTION_ARGS  )

Definition at line 46 of file rangetypes_typanalyze.c.

47 {
49  TypeCacheEntry *typcache;
50  Form_pg_attribute attr = stats->attr;
51 
52  /* Get information about range type; note column might be a domain */
53  typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid));
54 
55  if (attr->attstattarget < 0)
56  attr->attstattarget = default_statistics_target;
57 
59  stats->extra_data = typcache;
60  /* same as in std_typanalyze */
61  stats->minrows = 300 * attr->attstattarget;
62 
63  PG_RETURN_BOOL(true);
64 }
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1696

References VacAttrStats::attr, VacAttrStats::attrtypid, compute_range_stats(), VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, getBaseType(), VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().