PostgreSQL Source Code  git master
tsrank.c File Reference
#include "postgres.h"
#include <limits.h>
#include <math.h>
#include "miscadmin.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
#include "utils/fmgrprotos.h"
Include dependency graph for tsrank.c:

Go to the source code of this file.

Data Structures

struct  DocRepresentation
 
struct  QueryRepresentationOperand
 
struct  QueryRepresentation
 
struct  CoverExt
 

Macros

#define NUM_WEIGHTS   4
 
#define wpos(wep)   ( w[ WEP_GETWEIGHT(wep) ] )
 
#define RANK_NO_NORM   0x00
 
#define RANK_NORM_LOGLENGTH   0x01
 
#define RANK_NORM_LENGTH   0x02
 
#define RANK_NORM_EXTDIST   0x04
 
#define RANK_NORM_UNIQ   0x08
 
#define RANK_NORM_LOGUNIQ   0x10
 
#define RANK_NORM_RDIVRPLUS1   0x20
 
#define DEF_NORM_METHOD   RANK_NO_NORM
 
#define WordECompareQueryItem(e, q, p, i, m)
 
#define MAXQROPOS   MAXENTRYPOS
 
#define QR_GET_OPERAND_DATA(q, v)    ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
 

Functions

static float calc_rank_or (const float *w, TSVector t, TSQuery q)
 
static float calc_rank_and (const float *w, TSVector t, TSQuery q)
 
static float4 word_distance (int32 w)
 
static int cnt_length (TSVector t)
 
static WordEntryfind_wordentry (TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
static int compareQueryOperand (const void *a, const void *b, void *arg)
 
static QueryOperand ** SortAndUniqItems (TSQuery q, int *size)
 
static float calc_rank (const float *w, TSVector t, TSQuery q, int32 method)
 
static void getWeights (ArrayType *win, float *ws)
 
Datum ts_rank_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rank_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_tt (PG_FUNCTION_ARGS)
 
static int compareDocR (const void *va, const void *vb)
 
static TSTernaryValue checkcondition_QueryOperand (void *checkval, QueryOperand *val, ExecPhraseData *data)
 
static void resetQueryRepresentation (QueryRepresentation *qr, bool reverseinsert)
 
static void fillQueryRepresentationData (QueryRepresentation *qr, DocRepresentation *entry)
 
static bool Cover (DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 
static DocRepresentationget_docrep (TSVector txt, QueryRepresentation *qr, int *doclen)
 
static float4 calc_rank_cd (const float4 *arrdata, TSVector txt, TSQuery query, int method)
 
Datum ts_rankcd_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_tt (PG_FUNCTION_ARGS)
 

Variables

static const float default_weights [NUM_WEIGHTS] = {0.1f, 0.2f, 0.4f, 1.0f}
 

Macro Definition Documentation

◆ DEF_NORM_METHOD

#define DEF_NORM_METHOD   RANK_NO_NORM

Definition at line 36 of file tsrank.c.

◆ MAXQROPOS

#define MAXQROPOS   MAXENTRYPOS

Definition at line 545 of file tsrank.c.

◆ NUM_WEIGHTS

#define NUM_WEIGHTS   4

Definition at line 24 of file tsrank.c.

◆ QR_GET_OPERAND_DATA

#define QR_GET_OPERAND_DATA (   q,
 
)     ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )

Definition at line 561 of file tsrank.c.

◆ RANK_NO_NORM

#define RANK_NO_NORM   0x00

Definition at line 29 of file tsrank.c.

◆ RANK_NORM_EXTDIST

#define RANK_NORM_EXTDIST   0x04

Definition at line 32 of file tsrank.c.

◆ RANK_NORM_LENGTH

#define RANK_NORM_LENGTH   0x02

Definition at line 31 of file tsrank.c.

◆ RANK_NORM_LOGLENGTH

#define RANK_NORM_LOGLENGTH   0x01

Definition at line 30 of file tsrank.c.

◆ RANK_NORM_LOGUNIQ

#define RANK_NORM_LOGUNIQ   0x10

Definition at line 34 of file tsrank.c.

◆ RANK_NORM_RDIVRPLUS1

#define RANK_NORM_RDIVRPLUS1   0x20

Definition at line 35 of file tsrank.c.

◆ RANK_NORM_UNIQ

#define RANK_NORM_UNIQ   0x08

Definition at line 33 of file tsrank.c.

◆ WordECompareQueryItem

#define WordECompareQueryItem (   e,
  q,
  p,
  i,
 
)
Value:
tsCompareString((q) + (i)->distance, (i)->length, \
(e) + (p)->pos, (p)->len, (m))
int i
Definition: isn.c:73
const void size_t len
e
Definition: preproc-init.c:82
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152

Definition at line 76 of file tsrank.c.

◆ wpos

#define wpos (   wep)    ( w[ WEP_GETWEIGHT(wep) ] )

Definition at line 27 of file tsrank.c.

Function Documentation

◆ calc_rank()

static float calc_rank ( const float *  w,
TSVector  t,
TSQuery  q,
int32  method 
)
static

Definition at line 358 of file tsrank.c.

359 {
360  QueryItem *item = GETQUERY(q);
361  float res = 0.0;
362  int len;
363 
364  if (!t->size || !q->size)
365  return 0.0;
366 
367  /* XXX: What about NOT? */
368  res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369  item->qoperator.oper == OP_PHRASE)) ?
370  calc_rank_and(w, t, q) :
371  calc_rank_or(w, t, q);
372 
373  if (res < 0)
374  res = 1e-20f;
375 
376  if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377  res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378 
379  if (method & RANK_NORM_LENGTH)
380  {
381  len = cnt_length(t);
382  if (len > 0)
383  res /= (float) len;
384  }
385 
386  /* RANK_NORM_EXTDIST not applicable */
387 
388  if ((method & RANK_NORM_UNIQ) && t->size > 0)
389  res /= (float) (t->size);
390 
391  if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392  res /= log((double) (t->size + 1)) / log(2.0);
393 
394  if (method & RANK_NORM_RDIVRPLUS1)
395  res /= (res + 1);
396 
397  return res;
398 }
#define GETQUERY(x)
Definition: _int.h:157
int32 size
Definition: ts_type.h:221
int32 size
Definition: ts_type.h:93
#define QI_OPR
Definition: ts_type.h:150
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define RANK_NORM_RDIVRPLUS1
Definition: tsrank.c:35
#define RANK_NORM_UNIQ
Definition: tsrank.c:33
#define RANK_NORM_LOGLENGTH
Definition: tsrank.c:30
#define RANK_NORM_LENGTH
Definition: tsrank.c:31
static int cnt_length(TSVector t)
Definition: tsrank.c:54
static float calc_rank_or(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:284
static float calc_rank_and(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:201
#define RANK_NORM_LOGUNIQ
Definition: tsrank.c:34
QueryOperator qoperator
Definition: ts_type.h:209
QueryItemType type
Definition: ts_type.h:208

References calc_rank_and(), calc_rank_or(), cnt_length(), GETQUERY, len, OP_AND, OP_PHRASE, QueryOperator::oper, QI_OPR, QueryItem::qoperator, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, res, TSVectorData::size, TSQueryData::size, and QueryItem::type.

Referenced by ts_rank_tt(), ts_rank_ttf(), ts_rank_wtt(), and ts_rank_wttf().

◆ calc_rank_and()

static float calc_rank_and ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 201 of file tsrank.c.

202 {
203  WordEntryPosVector **pos;
204  WordEntryPosVector1 posnull;
205  WordEntryPosVector *POSNULL;
206  int i,
207  k,
208  l,
209  p;
210  WordEntry *entry,
211  *firstentry;
212  WordEntryPos *post,
213  *ct;
214  int32 dimt,
215  lenct,
216  dist,
217  nitem;
218  float res = -1.0;
219  QueryOperand **item;
220  int size = q->size;
221 
222  item = SortAndUniqItems(q, &size);
223  if (size < 2)
224  {
225  pfree(item);
226  return calc_rank_or(w, t, q);
227  }
228  pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229 
230  /* A dummy WordEntryPos array to use when haspos is false */
231  posnull.npos = 1;
232  posnull.pos[0] = 0;
233  WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234  POSNULL = (WordEntryPosVector *) &posnull;
235 
236  for (i = 0; i < size; i++)
237  {
238  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239  if (!entry)
240  continue;
241 
242  while (entry - firstentry < nitem)
243  {
244  if (entry->haspos)
245  pos[i] = _POSVECPTR(t, entry);
246  else
247  pos[i] = POSNULL;
248 
249  dimt = pos[i]->npos;
250  post = pos[i]->pos;
251  for (k = 0; k < i; k++)
252  {
253  if (!pos[k])
254  continue;
255  lenct = pos[k]->npos;
256  ct = pos[k]->pos;
257  for (l = 0; l < dimt; l++)
258  {
259  for (p = 0; p < lenct; p++)
260  {
261  dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262  if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263  {
264  float curw;
265 
266  if (!dist)
267  dist = MAXENTRYPOS;
268  curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269  res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270  }
271  }
272  }
273  }
274 
275  entry++;
276  }
277  }
278  pfree(pos);
279  pfree(item);
280  return res;
281 }
signed int int32
Definition: c.h:497
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
static pg_noinline void Size size
Definition: slab.c:607
WordEntryPos pos[1]
Definition: ts_type.h:75
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 haspos
Definition: ts_type.h:44
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
#define MAXENTRYPOS
Definition: ts_type.h:85
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
uint16 WordEntryPos
Definition: ts_type.h:63
#define wpos(wep)
Definition: tsrank.c:27
static float4 word_distance(int32 w)
Definition: tsrank.c:45
static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
Definition: tsrank.c:87
static QueryOperand ** SortAndUniqItems(TSQuery q, int *size)
Definition: tsrank.c:155

References _POSVECPTR, calc_rank_or(), find_wordentry(), WordEntry::haspos, i, MAXENTRYPOS, WordEntryPosVector::npos, WordEntryPosVector1::npos, palloc0(), pfree(), WordEntryPosVector1::pos, WordEntryPosVector::pos, res, size, TSQueryData::size, SortAndUniqItems(), WEP_GETPOS, WEP_SETPOS, word_distance(), and wpos.

Referenced by calc_rank().

◆ calc_rank_cd()

static float4 calc_rank_cd ( const float4 arrdata,
TSVector  txt,
TSQuery  query,
int  method 
)
static

Definition at line 855 of file tsrank.c.

856 {
857  DocRepresentation *doc;
858  int len,
859  i,
860  doclen = 0;
861  CoverExt ext;
862  double Wdoc = 0.0;
863  double invws[NUM_WEIGHTS];
864  double SumDist = 0.0,
865  PrevExtPos = 0.0;
866  int NExtent = 0;
868 
869 
870  for (i = 0; i < NUM_WEIGHTS; i++)
871  {
872  invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : default_weights[i]));
873  if (invws[i] > 1.0)
874  ereport(ERROR,
875  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
876  errmsg("weight out of range")));
877  invws[i] = 1.0 / invws[i];
878  }
879 
880  qr.query = query;
882  palloc0(sizeof(QueryRepresentationOperand) * query->size);
883 
884  doc = get_docrep(txt, &qr, &doclen);
885  if (!doc)
886  {
887  pfree(qr.operandData);
888  return 0.0;
889  }
890 
891  MemSet(&ext, 0, sizeof(CoverExt));
892  while (Cover(doc, doclen, &qr, &ext))
893  {
894  double Cpos = 0.0;
895  double InvSum = 0.0;
896  double CurExtPos;
897  int nNoise;
898  DocRepresentation *ptr = ext.begin;
899 
900  while (ptr <= ext.end)
901  {
902  InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
903  ptr++;
904  }
905 
906  Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
907 
908  /*
909  * if doc are big enough then ext.q may be equal to ext.p due to limit
910  * of positional information. In this case we approximate number of
911  * noise word as half cover's length
912  */
913  nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
914  if (nNoise < 0)
915  nNoise = (ext.end - ext.begin) / 2;
916  Wdoc += Cpos / ((double) (1 + nNoise));
917 
918  CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
919  if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
920  * zero in a case of
921  * multiple lexize */ )
922  SumDist += 1.0 / (CurExtPos - PrevExtPos);
923 
924  PrevExtPos = CurExtPos;
925  NExtent++;
926  }
927 
928  if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
929  Wdoc /= log((double) (cnt_length(txt) + 1));
930 
931  if (method & RANK_NORM_LENGTH)
932  {
933  len = cnt_length(txt);
934  if (len > 0)
935  Wdoc /= (double) len;
936  }
937 
938  if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
939  Wdoc /= ((double) NExtent) / SumDist;
940 
941  if ((method & RANK_NORM_UNIQ) && txt->size > 0)
942  Wdoc /= (double) (txt->size);
943 
944  if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
945  Wdoc /= log((double) (txt->size + 1)) / log(2.0);
946 
947  if (method & RANK_NORM_RDIVRPLUS1)
948  Wdoc /= (Wdoc + 1);
949 
950  pfree(doc);
951 
952  pfree(qr.operandData);
953 
954  return (float4) Wdoc;
955 }
float float4
Definition: c.h:632
#define MemSet(start, val, len)
Definition: c.h:1023
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int p
Definition: tsrank.c:591
DocRepresentation * begin
Definition: tsrank.c:593
DocRepresentation * end
Definition: tsrank.c:594
int q
Definition: tsrank.c:592
WordEntryPos pos
Definition: tsrank.c:520
QueryRepresentationOperand * operandData
Definition: tsrank.c:558
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
Definition: tsrank.c:732
static const float default_weights[NUM_WEIGHTS]
Definition: tsrank.c:25
#define NUM_WEIGHTS
Definition: tsrank.c:24
#define RANK_NORM_EXTDIST
Definition: tsrank.c:32
static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
Definition: tsrank.c:651

References CoverExt::begin, cnt_length(), Cover(), default_weights, CoverExt::end, ereport, errcode(), errmsg(), ERROR, get_docrep(), i, len, MemSet, NUM_WEIGHTS, QueryRepresentation::operandData, CoverExt::p, palloc0(), pfree(), DocRepresentation::pos, CoverExt::q, QueryRepresentation::query, RANK_NORM_EXTDIST, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, TSVectorData::size, TSQueryData::size, and WEP_GETWEIGHT.

Referenced by ts_rankcd_tt(), ts_rankcd_ttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ calc_rank_or()

static float calc_rank_or ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 284 of file tsrank.c.

285 {
286  WordEntry *entry,
287  *firstentry;
288  WordEntryPosVector1 posnull;
289  WordEntryPos *post;
290  int32 dimt,
291  j,
292  i,
293  nitem;
294  float res = 0.0;
295  QueryOperand **item;
296  int size = q->size;
297 
298  /* A dummy WordEntryPos array to use when haspos is false */
299  posnull.npos = 1;
300  posnull.pos[0] = 0;
301 
302  item = SortAndUniqItems(q, &size);
303 
304  for (i = 0; i < size; i++)
305  {
306  float resj,
307  wjm;
308  int32 jm;
309 
310  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311  if (!entry)
312  continue;
313 
314  while (entry - firstentry < nitem)
315  {
316  if (entry->haspos)
317  {
318  dimt = POSDATALEN(t, entry);
319  post = POSDATAPTR(t, entry);
320  }
321  else
322  {
323  dimt = posnull.npos;
324  post = posnull.pos;
325  }
326 
327  resj = 0.0;
328  wjm = -1.0;
329  jm = 0;
330  for (j = 0; j < dimt; j++)
331  {
332  resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333  if (wpos(post[j]) > wjm)
334  {
335  wjm = wpos(post[j]);
336  jm = j;
337  }
338  }
339 /*
340  limit (sum(1/i^2),i=1,inf) = pi^2/6
341  resj = sum(wi/i^2),i=1,noccurrence,
342  wi - should be sorted desc,
343  don't sort for now, just choose maximum weight. This should be corrected
344  Oleg Bartunov
345 */
346  res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347 
348  entry++;
349  }
350  }
351  if (size > 0)
352  res = res / size;
353  pfree(item);
354  return res;
355 }
int j
Definition: isn.c:74
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define POSDATAPTR(x, e)
Definition: ts_type.h:111

References find_wordentry(), WordEntry::haspos, i, j, WordEntryPosVector1::npos, pfree(), WordEntryPosVector1::pos, POSDATALEN, POSDATAPTR, res, size, TSQueryData::size, SortAndUniqItems(), and wpos.

Referenced by calc_rank(), and calc_rank_and().

◆ checkcondition_QueryOperand()

static TSTernaryValue checkcondition_QueryOperand ( void *  checkval,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 568 of file tsrank.c.

570 {
571  QueryRepresentation *qr = (QueryRepresentation *) checkval;
573 
574  if (!opData->operandexists)
575  return TS_NO;
576 
577  if (data)
578  {
579  data->npos = opData->npos;
580  data->pos = opData->pos;
581  if (opData->reverseinsert)
582  data->pos += MAXQROPOS - opData->npos;
583  }
584 
585  return TS_YES;
586 }
long val
Definition: informix.c:689
const void * data
WordEntryPos pos[MAXQROPOS]
Definition: tsrank.c:552
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
#define MAXQROPOS
Definition: tsrank.c:545
#define QR_GET_OPERAND_DATA(q, v)
Definition: tsrank.c:561

References data, MAXQROPOS, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, QueryRepresentationOperand::pos, QR_GET_OPERAND_DATA, QueryRepresentationOperand::reverseinsert, TS_NO, TS_YES, and val.

Referenced by Cover().

◆ cnt_length()

static int cnt_length ( TSVector  t)
static

Definition at line 54 of file tsrank.c.

55 {
56  WordEntry *ptr = ARRPTR(t),
57  *end = (WordEntry *) STRPTR(t);
58  int len = 0;
59 
60  while (ptr < end)
61  {
62  int clen = POSDATALEN(t, ptr);
63 
64  if (clen == 0)
65  len += 1;
66  else
67  len += clen;
68 
69  ptr++;
70  }
71 
72  return len;
73 }
#define ARRPTR(x)
Definition: cube.c:25
#define STRPTR(x)
Definition: hstore.h:76

References ARRPTR, len, POSDATALEN, and STRPTR.

Referenced by calc_rank(), and calc_rank_cd().

◆ compareDocR()

static int compareDocR ( const void *  va,
const void *  vb 
)
static

Definition at line 524 of file tsrank.c.

525 {
526  const DocRepresentation *a = (const DocRepresentation *) va;
527  const DocRepresentation *b = (const DocRepresentation *) vb;
528 
529  if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
530  {
531  if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
532  {
533  if (a->data.map.entry == b->data.map.entry)
534  return 0;
535 
536  return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
537  }
538 
539  return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
540  }
541 
542  return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
543 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, b, WEP_GETPOS, and WEP_GETWEIGHT.

Referenced by get_docrep().

◆ compareQueryOperand()

static int compareQueryOperand ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 136 of file tsrank.c.

137 {
138  char *operand = (char *) arg;
139  QueryOperand *qa = (*(QueryOperand *const *) a);
140  QueryOperand *qb = (*(QueryOperand *const *) b);
141 
142  return tsCompareString(operand + qa->distance, qa->length,
143  operand + qb->distance, qb->length,
144  false);
145 }
void * arg
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171

References a, arg, b, QueryOperand::distance, QueryOperand::length, and tsCompareString().

Referenced by SortAndUniqItems().

◆ Cover()

static bool Cover ( DocRepresentation doc,
int  len,
QueryRepresentation qr,
CoverExt ext 
)
static

Definition at line 651 of file tsrank.c.

652 {
653  DocRepresentation *ptr;
654  int lastpos = ext->pos;
655  bool found = false;
656 
657  /*
658  * since this function recurses, it could be driven to stack overflow.
659  * (though any decent compiler will optimize away the tail-recursion.
660  */
662 
663  resetQueryRepresentation(qr, false);
664 
665  ext->p = INT_MAX;
666  ext->q = 0;
667  ptr = doc + ext->pos;
668 
669  /* find upper bound of cover from current position, move up */
670  while (ptr - doc < len)
671  {
673 
674  if (TS_execute(GETQUERY(qr->query), (void *) qr,
676  {
677  if (WEP_GETPOS(ptr->pos) > ext->q)
678  {
679  ext->q = WEP_GETPOS(ptr->pos);
680  ext->end = ptr;
681  lastpos = ptr - doc;
682  found = true;
683  }
684  break;
685  }
686  ptr++;
687  }
688 
689  if (!found)
690  return false;
691 
692  resetQueryRepresentation(qr, true);
693 
694  ptr = doc + lastpos;
695 
696  /* find lower bound of cover from found upper bound, move down */
697  while (ptr >= doc + ext->pos)
698  {
699  /*
700  * we scan doc from right to left, so pos info in reverse order!
701  */
703 
704  if (TS_execute(GETQUERY(qr->query), (void *) qr,
706  {
707  if (WEP_GETPOS(ptr->pos) < ext->p)
708  {
709  ext->begin = ptr;
710  ext->p = WEP_GETPOS(ptr->pos);
711  }
712  break;
713  }
714  ptr--;
715  }
716 
717  if (ext->p <= ext->q)
718  {
719  /*
720  * set position for next try to next lexeme after beginning of found
721  * cover
722  */
723  ext->pos = (ptr - doc) + 1;
724  return true;
725  }
726 
727  ext->pos++;
728  return Cover(doc, len, qr, ext);
729 }
void check_stack_depth(void)
Definition: postgres.c:3564
int pos
Definition: tsrank.c:590
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
static TSTernaryValue checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsrank.c:568
static void resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
Definition: tsrank.c:598
static void fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
Definition: tsrank.c:611
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854

References CoverExt::begin, check_stack_depth(), checkcondition_QueryOperand(), CoverExt::end, fillQueryRepresentationData(), GETQUERY, len, CoverExt::p, DocRepresentation::pos, CoverExt::pos, CoverExt::q, QueryRepresentation::query, resetQueryRepresentation(), TS_EXEC_EMPTY, TS_execute(), and WEP_GETPOS.

Referenced by calc_rank_cd().

◆ fillQueryRepresentationData()

static void fillQueryRepresentationData ( QueryRepresentation qr,
DocRepresentation entry 
)
static

Definition at line 611 of file tsrank.c.

612 {
613  int i;
614  int lastPos;
616 
617  for (i = 0; i < entry->data.query.nitem; i++)
618  {
619  if (entry->data.query.items[i]->type != QI_VAL)
620  continue;
621 
622  opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
623 
624  opData->operandexists = true;
625 
626  if (opData->npos == 0)
627  {
628  lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
629  opData->pos[lastPos] = entry->pos;
630  opData->npos++;
631  continue;
632  }
633 
634  lastPos = opData->reverseinsert ?
635  (MAXQROPOS - opData->npos) :
636  (opData->npos - 1);
637 
638  if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
639  {
640  lastPos = opData->reverseinsert ?
641  (MAXQROPOS - 1 - opData->npos) :
642  (opData->npos);
643 
644  opData->pos[lastPos] = entry->pos;
645  opData->npos++;
646  }
647  }
648 }
struct DocRepresentation::@30::@31 query
union DocRepresentation::@30 data
#define QI_VAL
Definition: ts_type.h:149

References DocRepresentation::data, i, MAXQROPOS, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, DocRepresentation::pos, QueryRepresentationOperand::pos, QI_VAL, QR_GET_OPERAND_DATA, DocRepresentation::query, QueryRepresentationOperand::reverseinsert, and WEP_GETPOS.

Referenced by Cover().

◆ find_wordentry()

static WordEntry* find_wordentry ( TSVector  t,
TSQuery  q,
QueryOperand item,
int32 nitem 
)
static

Definition at line 87 of file tsrank.c.

88 {
89  WordEntry *StopLow = ARRPTR(t);
90  WordEntry *StopHigh = (WordEntry *) STRPTR(t);
91  WordEntry *StopMiddle = StopHigh;
92  int difference;
93 
94  *nitem = 0;
95 
96  /* Loop invariant: StopLow <= item < StopHigh */
97  while (StopLow < StopHigh)
98  {
99  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100  difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101  if (difference == 0)
102  {
103  StopHigh = StopMiddle;
104  *nitem = 1;
105  break;
106  }
107  else if (difference > 0)
108  StopLow = StopMiddle + 1;
109  else
110  StopHigh = StopMiddle;
111  }
112 
113  if (item->prefix)
114  {
115  if (StopLow >= StopHigh)
116  StopMiddle = StopHigh;
117 
118  *nitem = 0;
119 
120  while (StopMiddle < (WordEntry *) STRPTR(t) &&
121  WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122  {
123  (*nitem)++;
124  StopMiddle++;
125  }
126  }
127 
128  return (*nitem > 0) ? StopHigh : NULL;
129 }
Datum difference(PG_FUNCTION_ARGS)
#define GETOPERAND(x)
Definition: ltree.h:165
bool prefix
Definition: ts_type.h:163
#define WordECompareQueryItem(e, q, p, i, m)
Definition: tsrank.c:76

References ARRPTR, difference(), GETOPERAND, QueryOperand::prefix, STRPTR, and WordECompareQueryItem.

Referenced by calc_rank_and(), calc_rank_or(), and get_docrep().

◆ get_docrep()

static DocRepresentation* get_docrep ( TSVector  txt,
QueryRepresentation qr,
int *  doclen 
)
static

Definition at line 732 of file tsrank.c.

733 {
734  QueryItem *item = GETQUERY(qr->query);
735  WordEntry *entry,
736  *firstentry;
737  WordEntryPos *post;
738  int32 dimt, /* number of 'post' items */
739  j,
740  i,
741  nitem;
742  int len = qr->query->size * 4,
743  cur = 0;
744  DocRepresentation *doc;
745 
746  doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
747 
748  /*
749  * Iterate through query to make DocRepresentation for words and it's
750  * entries satisfied by query
751  */
752  for (i = 0; i < qr->query->size; i++)
753  {
754  QueryOperand *curoperand;
755 
756  if (item[i].type != QI_VAL)
757  continue;
758 
759  curoperand = &item[i].qoperand;
760 
761  firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
762  if (!entry)
763  continue;
764 
765  /* iterations over entries in tsvector */
766  while (entry - firstentry < nitem)
767  {
768  if (entry->haspos)
769  {
770  dimt = POSDATALEN(txt, entry);
771  post = POSDATAPTR(txt, entry);
772  }
773  else
774  {
775  /* ignore words without positions */
776  entry++;
777  continue;
778  }
779 
780  while (cur + dimt >= len)
781  {
782  len *= 2;
783  doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
784  }
785 
786  /* iterations over entry's positions */
787  for (j = 0; j < dimt; j++)
788  {
789  if (curoperand->weight == 0 ||
790  curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
791  {
792  doc[cur].pos = post[j];
793  doc[cur].data.map.entry = entry;
794  doc[cur].data.map.item = (QueryItem *) curoperand;
795  cur++;
796  }
797  }
798 
799  entry++;
800  }
801  }
802 
803  if (cur > 0)
804  {
805  DocRepresentation *rptr = doc + 1,
806  *wptr = doc,
807  storage;
808 
809  /*
810  * Sort representation in ascending order by pos and entry
811  */
812  qsort(doc, cur, sizeof(DocRepresentation), compareDocR);
813 
814  /*
815  * Join QueryItem per WordEntry and it's position
816  */
817  storage.pos = doc->pos;
818  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
819  storage.data.query.items[0] = doc->data.map.item;
820  storage.data.query.nitem = 1;
821 
822  while (rptr - doc < cur)
823  {
824  if (rptr->pos == (rptr - 1)->pos &&
825  rptr->data.map.entry == (rptr - 1)->data.map.entry)
826  {
827  storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
828  storage.data.query.nitem++;
829  }
830  else
831  {
832  *wptr = storage;
833  wptr++;
834  storage.pos = rptr->pos;
835  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
836  storage.data.query.items[0] = rptr->data.map.item;
837  storage.data.query.nitem = 1;
838  }
839 
840  rptr++;
841  }
842 
843  *wptr = storage;
844  wptr++;
845 
846  *doclen = wptr - doc;
847  return doc;
848  }
849 
850  pfree(doc);
851  return NULL;
852 }
struct cursor * cur
Definition: ecpg.c:28
#define storage
Definition: indent_codes.h:68
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define qsort(a, b, c, d)
Definition: port.h:447
struct DocRepresentation::@30::@32 map
uint8 weight
Definition: ts_type.h:159
static int compareDocR(const void *va, const void *vb)
Definition: tsrank.c:524
QueryOperand qoperand
Definition: ts_type.h:210
const char * type

References compareDocR(), cur, DocRepresentation::data, find_wordentry(), GETQUERY, WordEntry::haspos, i, j, len, DocRepresentation::map, palloc(), pfree(), DocRepresentation::pos, POSDATALEN, POSDATAPTR, QI_VAL, QueryItem::qoperand, qsort, QueryRepresentation::query, repalloc(), TSQueryData::size, storage, type, QueryOperand::weight, and WEP_GETWEIGHT.

Referenced by calc_rank_cd().

◆ getWeights()

static void getWeights ( ArrayType win,
float *  ws 
)
static

Definition at line 405 of file tsrank.c.

406 {
407  int i;
408  float4 *arrdata;
409 
410  Assert(win != NULL);
411 
412  if (ARR_NDIM(win) != 1)
413  ereport(ERROR,
414  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
415  errmsg("array of weight must be one-dimensional")));
416 
417  if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < NUM_WEIGHTS)
418  ereport(ERROR,
419  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
420  errmsg("array of weight is too short")));
421 
422  if (array_contains_nulls(win))
423  ereport(ERROR,
424  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
425  errmsg("array of weight must not contain nulls")));
426 
427  arrdata = (float4 *) ARR_DATA_PTR(win);
428  for (i = 0; i < NUM_WEIGHTS; i++)
429  {
430  ws[i] = (arrdata[i] >= 0) ? arrdata[i] : default_weights[i];
431  if (ws[i] > 1.0)
432  ereport(ERROR,
433  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
434  errmsg("weight out of range")));
435  }
436 }
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3755
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
#define Assert(condition)
Definition: c.h:861

References ARR_DATA_PTR, ARR_DIMS, ARR_NDIM, array_contains_nulls(), ArrayGetNItems(), Assert, default_weights, ereport, errcode(), errmsg(), ERROR, i, and NUM_WEIGHTS.

Referenced by ts_rank_wtt(), ts_rank_wttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ resetQueryRepresentation()

static void resetQueryRepresentation ( QueryRepresentation qr,
bool  reverseinsert 
)
static

Definition at line 598 of file tsrank.c.

599 {
600  int i;
601 
602  for (i = 0; i < qr->query->size; i++)
603  {
604  qr->operandData[i].operandexists = false;
605  qr->operandData[i].reverseinsert = reverseinsert;
606  qr->operandData[i].npos = 0;
607  }
608 }

References i, QueryRepresentationOperand::npos, QueryRepresentation::operandData, QueryRepresentationOperand::operandexists, QueryRepresentation::query, QueryRepresentationOperand::reverseinsert, and TSQueryData::size.

Referenced by Cover().

◆ SortAndUniqItems()

static QueryOperand** SortAndUniqItems ( TSQuery  q,
int *  size 
)
static

Definition at line 155 of file tsrank.c.

156 {
157  char *operand = GETOPERAND(q);
158  QueryItem *item = GETQUERY(q);
159  QueryOperand **res,
160  **ptr,
161  **prevptr;
162 
163  ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164 
165  /* Collect all operands from the tree to res */
166  while ((*size)--)
167  {
168  if (item->type == QI_VAL)
169  {
170  *ptr = (QueryOperand *) item;
171  ptr++;
172  }
173  item++;
174  }
175 
176  *size = ptr - res;
177  if (*size < 2)
178  return res;
179 
180  qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand);
181 
182  ptr = res + 1;
183  prevptr = res;
184 
185  /* remove duplicates */
186  while (ptr - res < *size)
187  {
188  if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
189  {
190  prevptr++;
191  *prevptr = *ptr;
192  }
193  ptr++;
194  }
195 
196  *size = prevptr + 1 - res;
197  return res;
198 }
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static int compareQueryOperand(const void *a, const void *b, void *arg)
Definition: tsrank.c:136

References compareQueryOperand(), GETOPERAND, GETQUERY, palloc(), QI_VAL, qsort_arg(), res, size, and QueryItem::type.

Referenced by calc_rank_and(), and calc_rank_or().

◆ ts_rank_tt()

Datum ts_rank_tt ( PG_FUNCTION_ARGS  )

Definition at line 491 of file tsrank.c.

492 {
493  TSVector txt = PG_GETARG_TSVECTOR(0);
494  TSQuery query = PG_GETARG_TSQUERY(1);
495  float res;
496 
498 
499  PG_FREE_IF_COPY(txt, 0);
500  PG_FREE_IF_COPY(query, 1);
502 }
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:366
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:135
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
static float calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
Definition: tsrank.c:358
#define DEF_NORM_METHOD
Definition: tsrank.c:36

References calc_rank(), DEF_NORM_METHOD, default_weights, PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_ttf()

Datum ts_rank_ttf ( PG_FUNCTION_ARGS  )

Definition at line 476 of file tsrank.c.

477 {
478  TSVector txt = PG_GETARG_TSVECTOR(0);
479  TSQuery query = PG_GETARG_TSQUERY(1);
480  int method = PG_GETARG_INT32(2);
481  float res;
482 
483  res = calc_rank(default_weights, txt, query, method);
484 
485  PG_FREE_IF_COPY(txt, 0);
486  PG_FREE_IF_COPY(query, 1);
488 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269

References calc_rank(), default_weights, PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wtt()

Datum ts_rank_wtt ( PG_FUNCTION_ARGS  )

Definition at line 458 of file tsrank.c.

459 {
461  TSVector txt = PG_GETARG_TSVECTOR(1);
462  TSQuery query = PG_GETARG_TSQUERY(2);
463  float weights[NUM_WEIGHTS];
464  float res;
465 
466  getWeights(win, weights);
467  res = calc_rank(weights, txt, query, DEF_NORM_METHOD);
468 
469  PG_FREE_IF_COPY(win, 0);
470  PG_FREE_IF_COPY(txt, 1);
471  PG_FREE_IF_COPY(query, 2);
473 }
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
static void getWeights(ArrayType *win, float *ws)
Definition: tsrank.c:405

References calc_rank(), DEF_NORM_METHOD, getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wttf()

Datum ts_rank_wttf ( PG_FUNCTION_ARGS  )

Definition at line 439 of file tsrank.c.

440 {
442  TSVector txt = PG_GETARG_TSVECTOR(1);
443  TSQuery query = PG_GETARG_TSQUERY(2);
444  int method = PG_GETARG_INT32(3);
445  float weights[NUM_WEIGHTS];
446  float res;
447 
448  getWeights(win, weights);
449  res = calc_rank(weights, txt, query, method);
450 
451  PG_FREE_IF_COPY(win, 0);
452  PG_FREE_IF_COPY(txt, 1);
453  PG_FREE_IF_COPY(query, 2);
455 }

References calc_rank(), getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_tt()

Datum ts_rankcd_tt ( PG_FUNCTION_ARGS  )

Definition at line 1010 of file tsrank.c.

1011 {
1012  TSVector txt = PG_GETARG_TSVECTOR(0);
1013  TSQuery query = PG_GETARG_TSQUERY(1);
1014  float res;
1015 
1017 
1018  PG_FREE_IF_COPY(txt, 0);
1019  PG_FREE_IF_COPY(query, 1);
1021 }
static float4 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
Definition: tsrank.c:855

References calc_rank_cd(), DEF_NORM_METHOD, default_weights, PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_ttf()

Datum ts_rankcd_ttf ( PG_FUNCTION_ARGS  )

Definition at line 995 of file tsrank.c.

996 {
997  TSVector txt = PG_GETARG_TSVECTOR(0);
998  TSQuery query = PG_GETARG_TSQUERY(1);
999  int method = PG_GETARG_INT32(2);
1000  float res;
1001 
1002  res = calc_rank_cd(default_weights, txt, query, method);
1003 
1004  PG_FREE_IF_COPY(txt, 0);
1005  PG_FREE_IF_COPY(query, 1);
1007 }

References calc_rank_cd(), default_weights, PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wtt()

Datum ts_rankcd_wtt ( PG_FUNCTION_ARGS  )

Definition at line 977 of file tsrank.c.

978 {
980  TSVector txt = PG_GETARG_TSVECTOR(1);
981  TSQuery query = PG_GETARG_TSQUERY(2);
982  float weights[NUM_WEIGHTS];
983  float res;
984 
985  getWeights(win, weights);
986  res = calc_rank_cd(weights, txt, query, DEF_NORM_METHOD);
987 
988  PG_FREE_IF_COPY(win, 0);
989  PG_FREE_IF_COPY(txt, 1);
990  PG_FREE_IF_COPY(query, 2);
992 }

References calc_rank_cd(), DEF_NORM_METHOD, getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wttf()

Datum ts_rankcd_wttf ( PG_FUNCTION_ARGS  )

Definition at line 958 of file tsrank.c.

959 {
961  TSVector txt = PG_GETARG_TSVECTOR(1);
962  TSQuery query = PG_GETARG_TSQUERY(2);
963  int method = PG_GETARG_INT32(3);
964  float weights[NUM_WEIGHTS];
965  float res;
966 
967  getWeights(win, weights);
968  res = calc_rank_cd(weights, txt, query, method);
969 
970  PG_FREE_IF_COPY(win, 0);
971  PG_FREE_IF_COPY(txt, 1);
972  PG_FREE_IF_COPY(query, 2);
974 }

References calc_rank_cd(), getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ word_distance()

static float4 word_distance ( int32  w)
static

Definition at line 45 of file tsrank.c.

46 {
47  if (w > 100)
48  return 1e-30f;
49 
50  return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51 }

Referenced by calc_rank_and().

Variable Documentation

◆ default_weights

const float default_weights[NUM_WEIGHTS] = {0.1f, 0.2f, 0.4f, 1.0f}
static

Definition at line 25 of file tsrank.c.

Referenced by calc_rank_cd(), getWeights(), ts_rank_tt(), ts_rank_ttf(), ts_rankcd_tt(), and ts_rankcd_ttf().