148 double total_width = 0;
176 bucket_width = (num_mcelem + 10) * 1000 / 7;
188 lexemes_tab =
hash_create(
"Analyzed lexemes table",
198 for (vector_no = 0; vector_no < samplerows; vector_no++)
209 value = fetchfunc(stats, vector_no, &isnull);
237 lexemesptr =
STRPTR(vector);
238 curentryptr =
ARRPTR(vector);
239 for (
j = 0;
j < vector->
size;
j++)
267 item->
delta = b_current - 1;
277 if (lexeme_no % bucket_width == 0)
293 if (null_cnt < samplerows)
295 int nonnull_cnt = samplerows - null_cnt;
306 stats->
stanullfrac = (double) null_cnt / (
double) samplerows;
307 stats->
stawidth = total_width / (double) nonnull_cnt;
320 cutoff_freq = 9 * lexeme_no / bucket_width;
333 sort_table[track_len++] = item;
341 elog(
DEBUG3,
"tsvector_stats: target # mces = %d, bucket width = %d, "
342 "# lexemes = %d, hashtable size = %d, usable entries = %d",
343 num_mcelem, bucket_width, lexeme_no,
i, track_len);
350 if (num_mcelem < track_len)
355 minfreq = sort_table[num_mcelem - 1]->
frequency;
358 num_mcelem = track_len;
364 Datum *mcelem_values;
405 for (
i = 0;
i < num_mcelem;
i++)
412 mcelem_freqs[
i] = (double) titem->
frequency / (
double) nonnull_cnt;
414 mcelem_freqs[
i++] = (double) minfreq / (
double) nonnull_cnt;
415 mcelem_freqs[
i] = (double) maxfreq / (
double) nonnull_cnt;
418 stats->
stakind[0] = STATISTIC_KIND_MCELEM;
419 stats->
staop[0] = TextEqualOperator;
420 stats->
stacoll[0] = DEFAULT_COLLATION_OID;
523 return (*t2)->
frequency - (*t1)->frequency;
#define Assert(condition)
int default_statistics_target
static dshash_hash hash_key(dshash_table *hash_table, const void *key)
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
HTAB * hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
long hash_get_num_entries(HTAB *hashp)
void * hash_seq_search(HASH_SEQ_STATUS *status)
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
#define PG_GETARG_POINTER(n)
#define PG_RETURN_BOOL(x)
static Datum hash_any(const unsigned char *k, int keylen)
void pfree(void *pointer)
MemoryContext CurrentMemoryContext
void qsort_interruptible(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static uint32 DatumGetUInt32(Datum X)
static Datum PointerGetDatum(const void *X)
static Pointer DatumGetPointer(Datum X)
MemoryContextSwitchTo(old_ctx)
int16 stakind[STATISTIC_NUM_SLOTS]
MemoryContext anl_context
Oid statypid[STATISTIC_NUM_SLOTS]
Oid staop[STATISTIC_NUM_SLOTS]
Oid stacoll[STATISTIC_NUM_SLOTS]
char statypalign[STATISTIC_NUM_SLOTS]
float4 * stanumbers[STATISTIC_NUM_SLOTS]
bool statypbyval[STATISTIC_NUM_SLOTS]
int16 statyplen[STATISTIC_NUM_SLOTS]
int numvalues[STATISTIC_NUM_SLOTS]
Datum * stavalues[STATISTIC_NUM_SLOTS]
int numnumbers[STATISTIC_NUM_SLOTS]
AnalyzeAttrComputeStatsFunc compute_stats
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current)
static int trackitem_compare_lexemes(const void *e1, const void *e2, void *arg)
static int lexeme_compare(const void *key1, const void *key2)
static int lexeme_match(const void *key1, const void *key2, Size keysize)
Datum ts_typanalyze(PG_FUNCTION_ARGS)
static void compute_tsvector_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows)
static uint32 lexeme_hash(const void *key, Size keysize)
static TSVector DatumGetTSVector(Datum X)
static Datum TSVectorGetDatum(const TSVectorData *X)
void vacuum_delay_point(void)
Datum(* AnalyzeAttrFetchFunc)(VacAttrStatsP stats, int rownum, bool *isNull)
text * cstring_to_text_with_len(const char *s, int len)