PostgreSQL Source Code  git master
rangetypes_typanalyze.c File Reference
#include "postgres.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "utils/float.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
Include dependency graph for rangetypes_typanalyze.c:

Go to the source code of this file.

Functions

static int float8_qsort_cmp (const void *a1, const void *a2)
 
static int range_bound_qsort_cmp (const void *a1, const void *a2, void *arg)
 
static void compute_range_stats (VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
 
Datum range_typanalyze (PG_FUNCTION_ARGS)
 

Function Documentation

◆ compute_range_stats()

static void compute_range_stats ( VacAttrStats stats,
AnalyzeAttrFetchFunc  fetchfunc,
int  samplerows,
double  totalrows 
)
static

Definition at line 97 of file rangetypes_typanalyze.c.

References VacAttrStats::anl_context, VacAttrStats::attr, DatumGetFloat8, DatumGetPointer, DatumGetRangeTypeP, VacAttrStats::extra_data, float8_qsort_cmp(), Float8GetDatum(), FmgrInfo::fn_oid, FunctionCall2Coll(), get_float8_infinity(), i, RangeBound::infinite, InvalidOid, lower(), MemoryContextSwitchTo(), VacAttrStats::numnumbers, VacAttrStats::numvalues, OidIsValid, palloc(), PointerGetDatum, qsort, qsort_arg(), range(), range_bound_qsort_cmp(), range_deserialize(), range_serialize(), TypeCacheEntry::rng_collation, TypeCacheEntry::rng_subdiff_finfo, VacAttrStats::stacoll, VacAttrStats::stadistinct, VacAttrStats::stakind, VacAttrStats::stanullfrac, VacAttrStats::stanumbers, VacAttrStats::staop, VacAttrStats::stats_valid, VacAttrStats::statypalign, VacAttrStats::statypbyval, VacAttrStats::statypid, VacAttrStats::statyplen, VacAttrStats::stavalues, VacAttrStats::stawidth, upper(), vacuum_delay_point(), RangeBound::val, value, and VARSIZE_ANY.

Referenced by range_typanalyze().

99 {
100  TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
101  bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
102  int null_cnt = 0;
103  int non_null_cnt = 0;
104  int non_empty_cnt = 0;
105  int empty_cnt = 0;
106  int range_no;
107  int slot_idx;
108  int num_bins = stats->attr->attstattarget;
109  int num_hist;
110  float8 *lengths;
111  RangeBound *lowers,
112  *uppers;
113  double total_width = 0;
114 
115  /* Allocate memory to hold range bounds and lengths of the sample ranges. */
116  lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
117  uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
118  lengths = (float8 *) palloc(sizeof(float8) * samplerows);
119 
120  /* Loop over the sample ranges. */
121  for (range_no = 0; range_no < samplerows; range_no++)
122  {
123  Datum value;
124  bool isnull,
125  empty;
126  RangeType *range;
128  upper;
129  float8 length;
130 
132 
133  value = fetchfunc(stats, range_no, &isnull);
134  if (isnull)
135  {
136  /* range is null, just count that */
137  null_cnt++;
138  continue;
139  }
140 
141  /*
142  * XXX: should we ignore wide values, like std_typanalyze does, to
143  * avoid bloating the statistics table?
144  */
145  total_width += VARSIZE_ANY(DatumGetPointer(value));
146 
147  /* Get range and deserialize it for further analysis. */
148  range = DatumGetRangeTypeP(value);
149  range_deserialize(typcache, range, &lower, &upper, &empty);
150 
151  if (!empty)
152  {
153  /* Remember bounds and length for further usage in histograms */
154  lowers[non_empty_cnt] = lower;
155  uppers[non_empty_cnt] = upper;
156 
157  if (lower.infinite || upper.infinite)
158  {
159  /* Length of any kind of an infinite range is infinite */
160  length = get_float8_infinity();
161  }
162  else if (has_subdiff)
163  {
164  /*
165  * For an ordinary range, use subdiff function between upper
166  * and lower bound values.
167  */
169  &typcache->rng_subdiff_finfo,
170  typcache->rng_collation,
171  upper.val, lower.val));
172  }
173  else
174  {
175  /* Use default value of 1.0 if no subdiff is available. */
176  length = 1.0;
177  }
178  lengths[non_empty_cnt] = length;
179 
180  non_empty_cnt++;
181  }
182  else
183  empty_cnt++;
184 
185  non_null_cnt++;
186  }
187 
188  slot_idx = 0;
189 
190  /* We can only compute real stats if we found some non-null values. */
191  if (non_null_cnt > 0)
192  {
193  Datum *bound_hist_values;
194  Datum *length_hist_values;
195  int pos,
196  posfrac,
197  delta,
198  deltafrac,
199  i;
200  MemoryContext old_cxt;
201  float4 *emptyfrac;
202 
203  stats->stats_valid = true;
204  /* Do the simple null-frac and width stats */
205  stats->stanullfrac = (double) null_cnt / (double) samplerows;
206  stats->stawidth = total_width / (double) non_null_cnt;
207 
208  /* Estimate that non-null values are unique */
209  stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
210 
211  /* Must copy the target values into anl_context */
212  old_cxt = MemoryContextSwitchTo(stats->anl_context);
213 
214  /*
215  * Generate a bounds histogram slot entry if there are at least two
216  * values.
217  */
218  if (non_empty_cnt >= 2)
219  {
220  /* Sort bound values */
221  qsort_arg(lowers, non_empty_cnt, sizeof(RangeBound),
222  range_bound_qsort_cmp, typcache);
223  qsort_arg(uppers, non_empty_cnt, sizeof(RangeBound),
224  range_bound_qsort_cmp, typcache);
225 
226  num_hist = non_empty_cnt;
227  if (num_hist > num_bins)
228  num_hist = num_bins + 1;
229 
230  bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
231 
232  /*
233  * The object of this loop is to construct ranges from first and
234  * last entries in lowers[] and uppers[] along with evenly-spaced
235  * values in between. So the i'th value is a range of lowers[(i *
236  * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) /
237  * (num_hist - 1)]. But computing that subscript directly risks
238  * integer overflow when the stats target is more than a couple
239  * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos
240  * at each step, tracking the integral and fractional parts of the
241  * sum separately.
242  */
243  delta = (non_empty_cnt - 1) / (num_hist - 1);
244  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
245  pos = posfrac = 0;
246 
247  for (i = 0; i < num_hist; i++)
248  {
249  bound_hist_values[i] = PointerGetDatum(range_serialize(
250  typcache, &lowers[pos], &uppers[pos], false));
251  pos += delta;
252  posfrac += deltafrac;
253  if (posfrac >= (num_hist - 1))
254  {
255  /* fractional part exceeds 1, carry to integer part */
256  pos++;
257  posfrac -= (num_hist - 1);
258  }
259  }
260 
261  stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
262  stats->stavalues[slot_idx] = bound_hist_values;
263  stats->numvalues[slot_idx] = num_hist;
264  slot_idx++;
265  }
266 
267  /*
268  * Generate a length histogram slot entry if there are at least two
269  * values.
270  */
271  if (non_empty_cnt >= 2)
272  {
273  /*
274  * Ascending sort of range lengths for further filling of
275  * histogram
276  */
277  qsort(lengths, non_empty_cnt, sizeof(float8), float8_qsort_cmp);
278 
279  num_hist = non_empty_cnt;
280  if (num_hist > num_bins)
281  num_hist = num_bins + 1;
282 
283  length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
284 
285  /*
286  * The object of this loop is to copy the first and last lengths[]
287  * entries along with evenly-spaced values in between. So the i'th
288  * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
289  * computing that subscript directly risks integer overflow when
290  * the stats target is more than a couple thousand. Instead we
291  * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
292  * the integral and fractional parts of the sum separately.
293  */
294  delta = (non_empty_cnt - 1) / (num_hist - 1);
295  deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
296  pos = posfrac = 0;
297 
298  for (i = 0; i < num_hist; i++)
299  {
300  length_hist_values[i] = Float8GetDatum(lengths[pos]);
301  pos += delta;
302  posfrac += deltafrac;
303  if (posfrac >= (num_hist - 1))
304  {
305  /* fractional part exceeds 1, carry to integer part */
306  pos++;
307  posfrac -= (num_hist - 1);
308  }
309  }
310  }
311  else
312  {
313  /*
314  * Even when we don't create the histogram, store an empty array
315  * to mean "no histogram". We can't just leave stavalues NULL,
316  * because get_attstatsslot() errors if you ask for stavalues, and
317  * it's NULL. We'll still store the empty fraction in stanumbers.
318  */
319  length_hist_values = palloc(0);
320  num_hist = 0;
321  }
322  stats->staop[slot_idx] = Float8LessOperator;
323  stats->stacoll[slot_idx] = InvalidOid;
324  stats->stavalues[slot_idx] = length_hist_values;
325  stats->numvalues[slot_idx] = num_hist;
326  stats->statypid[slot_idx] = FLOAT8OID;
327  stats->statyplen[slot_idx] = sizeof(float8);
328 #ifdef USE_FLOAT8_BYVAL
329  stats->statypbyval[slot_idx] = true;
330 #else
331  stats->statypbyval[slot_idx] = false;
332 #endif
333  stats->statypalign[slot_idx] = 'd';
334 
335  /* Store the fraction of empty ranges */
336  emptyfrac = (float4 *) palloc(sizeof(float4));
337  *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
338  stats->stanumbers[slot_idx] = emptyfrac;
339  stats->numnumbers[slot_idx] = 1;
340 
341  stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
342  slot_idx++;
343 
344  MemoryContextSwitchTo(old_cxt);
345  }
346  else if (null_cnt > 0)
347  {
348  /* We found only nulls; assume the column is entirely null */
349  stats->stats_valid = true;
350  stats->stanullfrac = 1.0;
351  stats->stawidth = 0; /* "unknown" */
352  stats->stadistinct = 0.0; /* "unknown" */
353  }
354 
355  /*
356  * We don't need to bother cleaning up any of our temporary palloc's. The
357  * hashtable should also go away, as it used a child memory context.
358  */
359 }
static float8 get_float8_infinity(void)
Definition: float.h:90
Datum lower(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:44
#define PointerGetDatum(X)
Definition: postgres.h:556
Datum * stavalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:114
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Datum val
Definition: rangetypes.h:62
static struct @145 value
bool statypbyval[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:124
Datum upper(PG_FUNCTION_ARGS)
Definition: oracle_compat.c:75
Datum FunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2)
Definition: fmgr.c:1150
static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
#define OidIsValid(objectId)
Definition: c.h:638
char statypalign[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:125
Form_pg_attribute attr
Definition: vacuum.h:85
Datum Float8GetDatum(float8 X)
Definition: fmgr.c:1723
static int float8_qsort_cmp(const void *a1, const void *a2)
FmgrInfo rng_subdiff_finfo
Definition: typcache.h:99
double float8
Definition: c.h:491
int32 stawidth
Definition: vacuum.h:106
Oid stacoll[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:110
int numnumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:111
static struct cvec * range(struct vars *v, chr a, chr b, int cases)
Definition: regc_locale.c:416
RangeType * range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, bool empty)
Definition: rangetypes.c:1570
float4 stanullfrac
Definition: vacuum.h:105
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
void range_deserialize(TypeCacheEntry *typcache, RangeType *range, RangeBound *lower, RangeBound *upper, bool *empty)
Definition: rangetypes.c:1699
Oid staop[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:109
bool stats_valid
Definition: vacuum.h:104
float float4
Definition: c.h:490
#define DatumGetFloat8(X)
Definition: postgres.h:728
uintptr_t Datum
Definition: postgres.h:367
int16 stakind[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:108
#define VARSIZE_ANY(PTR)
Definition: postgres.h:335
Oid statypid[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:122
#define InvalidOid
Definition: postgres_ext.h:36
Oid fn_oid
Definition: fmgr.h:59
float4 * stanumbers[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:112
#define DatumGetRangeTypeP(X)
Definition: rangetypes.h:71
bool infinite
Definition: rangetypes.h:63
Oid rng_collation
Definition: typcache.h:96
MemoryContext anl_context
Definition: vacuum.h:90
#define DatumGetPointer(X)
Definition: postgres.h:549
int numvalues[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:113
void * palloc(Size size)
Definition: mcxt.c:949
int16 statyplen[STATISTIC_NUM_SLOTS]
Definition: vacuum.h:123
int i
void * extra_data
Definition: vacuum.h:98
#define qsort(a, b, c, d)
Definition: port.h:492
void vacuum_delay_point(void)
Definition: vacuum.c:1946
float4 stadistinct
Definition: vacuum.h:107

◆ float8_qsort_cmp()

static int float8_qsort_cmp ( const void *  a1,
const void *  a2 
)
static

Definition at line 67 of file rangetypes_typanalyze.c.

Referenced by compute_range_stats().

68 {
69  const float8 *f1 = (const float8 *) a1;
70  const float8 *f2 = (const float8 *) a2;
71 
72  if (*f1 < *f2)
73  return -1;
74  else if (*f1 == *f2)
75  return 0;
76  else
77  return 1;
78 }
static const FormData_pg_attribute a2
Definition: heap.c:166
double float8
Definition: c.h:491
static const FormData_pg_attribute a1
Definition: heap.c:152

◆ range_bound_qsort_cmp()

static int range_bound_qsort_cmp ( const void *  a1,
const void *  a2,
void *  arg 
)
static

Definition at line 84 of file rangetypes_typanalyze.c.

References range_cmp_bounds().

Referenced by compute_range_stats().

85 {
86  RangeBound *b1 = (RangeBound *) a1;
87  RangeBound *b2 = (RangeBound *) a2;
88  TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
89 
90  return range_cmp_bounds(typcache, b1, b2);
91 }
int range_cmp_bounds(TypeCacheEntry *typcache, RangeBound *b1, RangeBound *b2)
Definition: rangetypes.c:1835
static const FormData_pg_attribute a2
Definition: heap.c:166
void * arg
static const FormData_pg_attribute a1
Definition: heap.c:152

◆ range_typanalyze()

Datum range_typanalyze ( PG_FUNCTION_ARGS  )

Definition at line 43 of file rangetypes_typanalyze.c.

References VacAttrStats::attr, VacAttrStats::attrtypid, compute_range_stats(), VacAttrStats::compute_stats, default_statistics_target, VacAttrStats::extra_data, getBaseType(), VacAttrStats::minrows, PG_GETARG_POINTER, PG_RETURN_BOOL, and range_get_typcache().

44 {
46  TypeCacheEntry *typcache;
47  Form_pg_attribute attr = stats->attr;
48 
49  /* Get information about range type; note column might be a domain */
50  typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid));
51 
52  if (attr->attstattarget < 0)
53  attr->attstattarget = default_statistics_target;
54 
56  stats->extra_data = typcache;
57  /* same as in std_typanalyze */
58  stats->minrows = 300 * attr->attstattarget;
59 
60  PG_RETURN_BOOL(true);
61 }
int minrows
Definition: vacuum.h:97
TypeCacheEntry * range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
Definition: rangetypes.c:1546
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:271
Form_pg_attribute attr
Definition: vacuum.h:85
Oid attrtypid
Definition: vacuum.h:86
FormData_pg_attribute * Form_pg_attribute
Definition: pg_attribute.h:200
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:349
static void compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
AnalyzeAttrComputeStatsFunc compute_stats
Definition: vacuum.h:96
void * extra_data
Definition: vacuum.h:98
Oid getBaseType(Oid typid)
Definition: lsyscache.c:2299
int default_statistics_target
Definition: analyze.c:80