PostgreSQL Source Code  git master
tsrank.c File Reference
#include "postgres.h"
#include <limits.h>
#include <math.h>
#include "miscadmin.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
#include "utils/fmgrprotos.h"
Include dependency graph for tsrank.c:

Go to the source code of this file.

Data Structures

struct  DocRepresentation
 
struct  QueryRepresentationOperand
 
struct  QueryRepresentation
 
struct  CoverExt
 

Macros

#define wpos(wep)   ( w[ WEP_GETWEIGHT(wep) ] )
 
#define RANK_NO_NORM   0x00
 
#define RANK_NORM_LOGLENGTH   0x01
 
#define RANK_NORM_LENGTH   0x02
 
#define RANK_NORM_EXTDIST   0x04
 
#define RANK_NORM_UNIQ   0x08
 
#define RANK_NORM_LOGUNIQ   0x10
 
#define RANK_NORM_RDIVRPLUS1   0x20
 
#define DEF_NORM_METHOD   RANK_NO_NORM
 
#define WordECompareQueryItem(e, q, p, i, m)
 
#define MAXQROPOS   MAXENTRYPOS
 
#define QR_GET_OPERAND_DATA(q, v)    ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
 

Functions

static float calc_rank_or (const float *w, TSVector t, TSQuery q)
 
static float calc_rank_and (const float *w, TSVector t, TSQuery q)
 
static float4 word_distance (int32 w)
 
static int cnt_length (TSVector t)
 
static WordEntryfind_wordentry (TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
static int compareQueryOperand (const void *a, const void *b, void *arg)
 
static QueryOperand ** SortAndUniqItems (TSQuery q, int *size)
 
static float calc_rank (const float *w, TSVector t, TSQuery q, int32 method)
 
static const float * getWeights (ArrayType *win)
 
Datum ts_rank_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rank_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_tt (PG_FUNCTION_ARGS)
 
static int compareDocR (const void *va, const void *vb)
 
static TSTernaryValue checkcondition_QueryOperand (void *checkval, QueryOperand *val, ExecPhraseData *data)
 
static void resetQueryRepresentation (QueryRepresentation *qr, bool reverseinsert)
 
static void fillQueryRepresentationData (QueryRepresentation *qr, DocRepresentation *entry)
 
static bool Cover (DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 
static DocRepresentationget_docrep (TSVector txt, QueryRepresentation *qr, int *doclen)
 
static float4 calc_rank_cd (const float4 *arrdata, TSVector txt, TSQuery query, int method)
 
Datum ts_rankcd_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_tt (PG_FUNCTION_ARGS)
 

Variables

static const float weights [] = {0.1f, 0.2f, 0.4f, 1.0f}
 

Macro Definition Documentation

◆ DEF_NORM_METHOD

#define DEF_NORM_METHOD   RANK_NO_NORM

Definition at line 35 of file tsrank.c.

◆ MAXQROPOS

#define MAXQROPOS   MAXENTRYPOS

Definition at line 540 of file tsrank.c.

◆ QR_GET_OPERAND_DATA

#define QR_GET_OPERAND_DATA (   q,
 
)     ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )

Definition at line 556 of file tsrank.c.

◆ RANK_NO_NORM

#define RANK_NO_NORM   0x00

Definition at line 28 of file tsrank.c.

◆ RANK_NORM_EXTDIST

#define RANK_NORM_EXTDIST   0x04

Definition at line 31 of file tsrank.c.

◆ RANK_NORM_LENGTH

#define RANK_NORM_LENGTH   0x02

Definition at line 30 of file tsrank.c.

◆ RANK_NORM_LOGLENGTH

#define RANK_NORM_LOGLENGTH   0x01

Definition at line 29 of file tsrank.c.

◆ RANK_NORM_LOGUNIQ

#define RANK_NORM_LOGUNIQ   0x10

Definition at line 33 of file tsrank.c.

◆ RANK_NORM_RDIVRPLUS1

#define RANK_NORM_RDIVRPLUS1   0x20

Definition at line 34 of file tsrank.c.

◆ RANK_NORM_UNIQ

#define RANK_NORM_UNIQ   0x08

Definition at line 32 of file tsrank.c.

◆ WordECompareQueryItem

#define WordECompareQueryItem (   e,
  q,
  p,
  i,
 
)
Value:
tsCompareString((q) + (i)->distance, (i)->length, \
(e) + (p)->pos, (p)->len, (m))
int i
Definition: isn.c:73
const void size_t len
e
Definition: preproc-init.c:82
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152

Definition at line 75 of file tsrank.c.

◆ wpos

#define wpos (   wep)    ( w[ WEP_GETWEIGHT(wep) ] )

Definition at line 26 of file tsrank.c.

Function Documentation

◆ calc_rank()

static float calc_rank ( const float *  w,
TSVector  t,
TSQuery  q,
int32  method 
)
static

Definition at line 357 of file tsrank.c.

358 {
359  QueryItem *item = GETQUERY(q);
360  float res = 0.0;
361  int len;
362 
363  if (!t->size || !q->size)
364  return 0.0;
365 
366  /* XXX: What about NOT? */
367  res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
368  item->qoperator.oper == OP_PHRASE)) ?
369  calc_rank_and(w, t, q) :
370  calc_rank_or(w, t, q);
371 
372  if (res < 0)
373  res = 1e-20f;
374 
375  if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
376  res /= log((double) (cnt_length(t) + 1)) / log(2.0);
377 
378  if (method & RANK_NORM_LENGTH)
379  {
380  len = cnt_length(t);
381  if (len > 0)
382  res /= (float) len;
383  }
384 
385  /* RANK_NORM_EXTDIST not applicable */
386 
387  if ((method & RANK_NORM_UNIQ) && t->size > 0)
388  res /= (float) (t->size);
389 
390  if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
391  res /= log((double) (t->size + 1)) / log(2.0);
392 
393  if (method & RANK_NORM_RDIVRPLUS1)
394  res /= (res + 1);
395 
396  return res;
397 }
#define GETQUERY(x)
Definition: _int.h:157
int32 size
Definition: ts_type.h:221
int32 size
Definition: ts_type.h:93
#define QI_OPR
Definition: ts_type.h:150
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define RANK_NORM_RDIVRPLUS1
Definition: tsrank.c:34
#define RANK_NORM_UNIQ
Definition: tsrank.c:32
#define RANK_NORM_LOGLENGTH
Definition: tsrank.c:29
#define RANK_NORM_LENGTH
Definition: tsrank.c:30
static int cnt_length(TSVector t)
Definition: tsrank.c:53
static float calc_rank_or(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:283
static float calc_rank_and(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:200
#define RANK_NORM_LOGUNIQ
Definition: tsrank.c:33
QueryOperator qoperator
Definition: ts_type.h:209
QueryItemType type
Definition: ts_type.h:208

References calc_rank_and(), calc_rank_or(), cnt_length(), GETQUERY, len, OP_AND, OP_PHRASE, QueryOperator::oper, QI_OPR, QueryItem::qoperator, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, res, TSVectorData::size, TSQueryData::size, and QueryItem::type.

Referenced by ts_rank_tt(), ts_rank_ttf(), ts_rank_wtt(), and ts_rank_wttf().

◆ calc_rank_and()

static float calc_rank_and ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 200 of file tsrank.c.

201 {
202  WordEntryPosVector **pos;
203  WordEntryPosVector1 posnull;
204  WordEntryPosVector *POSNULL;
205  int i,
206  k,
207  l,
208  p;
209  WordEntry *entry,
210  *firstentry;
211  WordEntryPos *post,
212  *ct;
213  int32 dimt,
214  lenct,
215  dist,
216  nitem;
217  float res = -1.0;
218  QueryOperand **item;
219  int size = q->size;
220 
221  item = SortAndUniqItems(q, &size);
222  if (size < 2)
223  {
224  pfree(item);
225  return calc_rank_or(w, t, q);
226  }
227  pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
228 
229  /* A dummy WordEntryPos array to use when haspos is false */
230  posnull.npos = 1;
231  posnull.pos[0] = 0;
232  WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
233  POSNULL = (WordEntryPosVector *) &posnull;
234 
235  for (i = 0; i < size; i++)
236  {
237  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
238  if (!entry)
239  continue;
240 
241  while (entry - firstentry < nitem)
242  {
243  if (entry->haspos)
244  pos[i] = _POSVECPTR(t, entry);
245  else
246  pos[i] = POSNULL;
247 
248  dimt = pos[i]->npos;
249  post = pos[i]->pos;
250  for (k = 0; k < i; k++)
251  {
252  if (!pos[k])
253  continue;
254  lenct = pos[k]->npos;
255  ct = pos[k]->pos;
256  for (l = 0; l < dimt; l++)
257  {
258  for (p = 0; p < lenct; p++)
259  {
260  dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
261  if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
262  {
263  float curw;
264 
265  if (!dist)
266  dist = MAXENTRYPOS;
267  curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
268  res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
269  }
270  }
271  }
272  }
273 
274  entry++;
275  }
276  }
277  pfree(pos);
278  pfree(item);
279  return res;
280 }
signed int int32
Definition: c.h:494
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc0(Size size)
Definition: mcxt.c:1346
static pg_noinline void Size size
Definition: slab.c:607
WordEntryPos pos[1]
Definition: ts_type.h:75
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 haspos
Definition: ts_type.h:44
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
#define MAXENTRYPOS
Definition: ts_type.h:85
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
uint16 WordEntryPos
Definition: ts_type.h:63
#define wpos(wep)
Definition: tsrank.c:26
static float4 word_distance(int32 w)
Definition: tsrank.c:44
static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
Definition: tsrank.c:86
static QueryOperand ** SortAndUniqItems(TSQuery q, int *size)
Definition: tsrank.c:154

References _POSVECPTR, calc_rank_or(), find_wordentry(), WordEntry::haspos, i, MAXENTRYPOS, WordEntryPosVector::npos, WordEntryPosVector1::npos, palloc0(), pfree(), WordEntryPosVector1::pos, WordEntryPosVector::pos, res, size, TSQueryData::size, SortAndUniqItems(), WEP_GETPOS, WEP_SETPOS, word_distance(), and wpos.

Referenced by calc_rank().

◆ calc_rank_cd()

static float4 calc_rank_cd ( const float4 arrdata,
TSVector  txt,
TSQuery  query,
int  method 
)
static

Definition at line 850 of file tsrank.c.

851 {
852  DocRepresentation *doc;
853  int len,
854  i,
855  doclen = 0;
856  CoverExt ext;
857  double Wdoc = 0.0;
858  double invws[lengthof(weights)];
859  double SumDist = 0.0,
860  PrevExtPos = 0.0;
861  int NExtent = 0;
863 
864 
865  for (i = 0; i < lengthof(weights); i++)
866  {
867  invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
868  if (invws[i] > 1.0)
869  ereport(ERROR,
870  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
871  errmsg("weight out of range")));
872  invws[i] = 1.0 / invws[i];
873  }
874 
875  qr.query = query;
877  palloc0(sizeof(QueryRepresentationOperand) * query->size);
878 
879  doc = get_docrep(txt, &qr, &doclen);
880  if (!doc)
881  {
882  pfree(qr.operandData);
883  return 0.0;
884  }
885 
886  MemSet(&ext, 0, sizeof(CoverExt));
887  while (Cover(doc, doclen, &qr, &ext))
888  {
889  double Cpos = 0.0;
890  double InvSum = 0.0;
891  double CurExtPos;
892  int nNoise;
893  DocRepresentation *ptr = ext.begin;
894 
895  while (ptr <= ext.end)
896  {
897  InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
898  ptr++;
899  }
900 
901  Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
902 
903  /*
904  * if doc are big enough then ext.q may be equal to ext.p due to limit
905  * of positional information. In this case we approximate number of
906  * noise word as half cover's length
907  */
908  nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
909  if (nNoise < 0)
910  nNoise = (ext.end - ext.begin) / 2;
911  Wdoc += Cpos / ((double) (1 + nNoise));
912 
913  CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
914  if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
915  * zero in a case of
916  * multiple lexize */ )
917  SumDist += 1.0 / (CurExtPos - PrevExtPos);
918 
919  PrevExtPos = CurExtPos;
920  NExtent++;
921  }
922 
923  if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
924  Wdoc /= log((double) (cnt_length(txt) + 1));
925 
926  if (method & RANK_NORM_LENGTH)
927  {
928  len = cnt_length(txt);
929  if (len > 0)
930  Wdoc /= (double) len;
931  }
932 
933  if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
934  Wdoc /= ((double) NExtent) / SumDist;
935 
936  if ((method & RANK_NORM_UNIQ) && txt->size > 0)
937  Wdoc /= (double) (txt->size);
938 
939  if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
940  Wdoc /= log((double) (txt->size + 1)) / log(2.0);
941 
942  if (method & RANK_NORM_RDIVRPLUS1)
943  Wdoc /= (Wdoc + 1);
944 
945  pfree(doc);
946 
947  pfree(qr.operandData);
948 
949  return (float4) Wdoc;
950 }
#define lengthof(array)
Definition: c.h:788
float float4
Definition: c.h:629
#define MemSet(start, val, len)
Definition: c.h:1020
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int p
Definition: tsrank.c:586
DocRepresentation * begin
Definition: tsrank.c:588
DocRepresentation * end
Definition: tsrank.c:589
int q
Definition: tsrank.c:587
WordEntryPos pos
Definition: tsrank.c:515
QueryRepresentationOperand * operandData
Definition: tsrank.c:553
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
Definition: tsrank.c:727
static const float weights[]
Definition: tsrank.c:24
#define RANK_NORM_EXTDIST
Definition: tsrank.c:31
static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
Definition: tsrank.c:646

References CoverExt::begin, cnt_length(), Cover(), CoverExt::end, ereport, errcode(), errmsg(), ERROR, get_docrep(), i, len, lengthof, MemSet, QueryRepresentation::operandData, CoverExt::p, palloc0(), pfree(), DocRepresentation::pos, CoverExt::q, QueryRepresentation::query, RANK_NORM_EXTDIST, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, TSVectorData::size, TSQueryData::size, weights, and WEP_GETWEIGHT.

Referenced by ts_rankcd_tt(), ts_rankcd_ttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ calc_rank_or()

static float calc_rank_or ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 283 of file tsrank.c.

284 {
285  WordEntry *entry,
286  *firstentry;
287  WordEntryPosVector1 posnull;
288  WordEntryPos *post;
289  int32 dimt,
290  j,
291  i,
292  nitem;
293  float res = 0.0;
294  QueryOperand **item;
295  int size = q->size;
296 
297  /* A dummy WordEntryPos array to use when haspos is false */
298  posnull.npos = 1;
299  posnull.pos[0] = 0;
300 
301  item = SortAndUniqItems(q, &size);
302 
303  for (i = 0; i < size; i++)
304  {
305  float resj,
306  wjm;
307  int32 jm;
308 
309  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
310  if (!entry)
311  continue;
312 
313  while (entry - firstentry < nitem)
314  {
315  if (entry->haspos)
316  {
317  dimt = POSDATALEN(t, entry);
318  post = POSDATAPTR(t, entry);
319  }
320  else
321  {
322  dimt = posnull.npos;
323  post = posnull.pos;
324  }
325 
326  resj = 0.0;
327  wjm = -1.0;
328  jm = 0;
329  for (j = 0; j < dimt; j++)
330  {
331  resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
332  if (wpos(post[j]) > wjm)
333  {
334  wjm = wpos(post[j]);
335  jm = j;
336  }
337  }
338 /*
339  limit (sum(1/i^2),i=1,inf) = pi^2/6
340  resj = sum(wi/i^2),i=1,noccurrence,
341  wi - should be sorted desc,
342  don't sort for now, just choose maximum weight. This should be corrected
343  Oleg Bartunov
344 */
345  res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
346 
347  entry++;
348  }
349  }
350  if (size > 0)
351  res = res / size;
352  pfree(item);
353  return res;
354 }
int j
Definition: isn.c:74
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define POSDATAPTR(x, e)
Definition: ts_type.h:111

References find_wordentry(), WordEntry::haspos, i, j, WordEntryPosVector1::npos, pfree(), WordEntryPosVector1::pos, POSDATALEN, POSDATAPTR, res, size, TSQueryData::size, SortAndUniqItems(), and wpos.

Referenced by calc_rank(), and calc_rank_and().

◆ checkcondition_QueryOperand()

static TSTernaryValue checkcondition_QueryOperand ( void *  checkval,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 563 of file tsrank.c.

565 {
566  QueryRepresentation *qr = (QueryRepresentation *) checkval;
568 
569  if (!opData->operandexists)
570  return TS_NO;
571 
572  if (data)
573  {
574  data->npos = opData->npos;
575  data->pos = opData->pos;
576  if (opData->reverseinsert)
577  data->pos += MAXQROPOS - opData->npos;
578  }
579 
580  return TS_YES;
581 }
long val
Definition: informix.c:670
const void * data
WordEntryPos pos[MAXQROPOS]
Definition: tsrank.c:547
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
#define MAXQROPOS
Definition: tsrank.c:540
#define QR_GET_OPERAND_DATA(q, v)
Definition: tsrank.c:556

References data, MAXQROPOS, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, QueryRepresentationOperand::pos, QR_GET_OPERAND_DATA, QueryRepresentationOperand::reverseinsert, TS_NO, TS_YES, and val.

Referenced by Cover().

◆ cnt_length()

static int cnt_length ( TSVector  t)
static

Definition at line 53 of file tsrank.c.

54 {
55  WordEntry *ptr = ARRPTR(t),
56  *end = (WordEntry *) STRPTR(t);
57  int len = 0;
58 
59  while (ptr < end)
60  {
61  int clen = POSDATALEN(t, ptr);
62 
63  if (clen == 0)
64  len += 1;
65  else
66  len += clen;
67 
68  ptr++;
69  }
70 
71  return len;
72 }
#define ARRPTR(x)
Definition: cube.c:25
#define STRPTR(x)
Definition: hstore.h:76

References ARRPTR, len, POSDATALEN, and STRPTR.

Referenced by calc_rank(), and calc_rank_cd().

◆ compareDocR()

static int compareDocR ( const void *  va,
const void *  vb 
)
static

Definition at line 519 of file tsrank.c.

520 {
521  const DocRepresentation *a = (const DocRepresentation *) va;
522  const DocRepresentation *b = (const DocRepresentation *) vb;
523 
524  if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
525  {
526  if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
527  {
528  if (a->data.map.entry == b->data.map.entry)
529  return 0;
530 
531  return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
532  }
533 
534  return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
535  }
536 
537  return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
538 }
int b
Definition: isn.c:70
int a
Definition: isn.c:69

References a, b, WEP_GETPOS, and WEP_GETWEIGHT.

Referenced by get_docrep().

◆ compareQueryOperand()

static int compareQueryOperand ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 135 of file tsrank.c.

136 {
137  char *operand = (char *) arg;
138  QueryOperand *qa = (*(QueryOperand *const *) a);
139  QueryOperand *qb = (*(QueryOperand *const *) b);
140 
141  return tsCompareString(operand + qa->distance, qa->length,
142  operand + qb->distance, qb->length,
143  false);
144 }
void * arg
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171

References a, arg, b, QueryOperand::distance, QueryOperand::length, and tsCompareString().

Referenced by SortAndUniqItems().

◆ Cover()

static bool Cover ( DocRepresentation doc,
int  len,
QueryRepresentation qr,
CoverExt ext 
)
static

Definition at line 646 of file tsrank.c.

647 {
648  DocRepresentation *ptr;
649  int lastpos = ext->pos;
650  bool found = false;
651 
652  /*
653  * since this function recurses, it could be driven to stack overflow.
654  * (though any decent compiler will optimize away the tail-recursion.
655  */
657 
658  resetQueryRepresentation(qr, false);
659 
660  ext->p = INT_MAX;
661  ext->q = 0;
662  ptr = doc + ext->pos;
663 
664  /* find upper bound of cover from current position, move up */
665  while (ptr - doc < len)
666  {
668 
669  if (TS_execute(GETQUERY(qr->query), (void *) qr,
671  {
672  if (WEP_GETPOS(ptr->pos) > ext->q)
673  {
674  ext->q = WEP_GETPOS(ptr->pos);
675  ext->end = ptr;
676  lastpos = ptr - doc;
677  found = true;
678  }
679  break;
680  }
681  ptr++;
682  }
683 
684  if (!found)
685  return false;
686 
687  resetQueryRepresentation(qr, true);
688 
689  ptr = doc + lastpos;
690 
691  /* find lower bound of cover from found upper bound, move down */
692  while (ptr >= doc + ext->pos)
693  {
694  /*
695  * we scan doc from right to left, so pos info in reverse order!
696  */
698 
699  if (TS_execute(GETQUERY(qr->query), (void *) qr,
701  {
702  if (WEP_GETPOS(ptr->pos) < ext->p)
703  {
704  ext->begin = ptr;
705  ext->p = WEP_GETPOS(ptr->pos);
706  }
707  break;
708  }
709  ptr--;
710  }
711 
712  if (ext->p <= ext->q)
713  {
714  /*
715  * set position for next try to next lexeme after beginning of found
716  * cover
717  */
718  ext->pos = (ptr - doc) + 1;
719  return true;
720  }
721 
722  ext->pos++;
723  return Cover(doc, len, qr, ext);
724 }
void check_stack_depth(void)
Definition: postgres.c:3531
int pos
Definition: tsrank.c:585
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
static TSTernaryValue checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsrank.c:563
static void resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
Definition: tsrank.c:593
static void fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
Definition: tsrank.c:606
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854

References CoverExt::begin, check_stack_depth(), checkcondition_QueryOperand(), CoverExt::end, fillQueryRepresentationData(), GETQUERY, len, CoverExt::p, DocRepresentation::pos, CoverExt::pos, CoverExt::q, QueryRepresentation::query, resetQueryRepresentation(), TS_EXEC_EMPTY, TS_execute(), and WEP_GETPOS.

Referenced by calc_rank_cd().

◆ fillQueryRepresentationData()

static void fillQueryRepresentationData ( QueryRepresentation qr,
DocRepresentation entry 
)
static

Definition at line 606 of file tsrank.c.

607 {
608  int i;
609  int lastPos;
611 
612  for (i = 0; i < entry->data.query.nitem; i++)
613  {
614  if (entry->data.query.items[i]->type != QI_VAL)
615  continue;
616 
617  opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
618 
619  opData->operandexists = true;
620 
621  if (opData->npos == 0)
622  {
623  lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
624  opData->pos[lastPos] = entry->pos;
625  opData->npos++;
626  continue;
627  }
628 
629  lastPos = opData->reverseinsert ?
630  (MAXQROPOS - opData->npos) :
631  (opData->npos - 1);
632 
633  if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
634  {
635  lastPos = opData->reverseinsert ?
636  (MAXQROPOS - 1 - opData->npos) :
637  (opData->npos);
638 
639  opData->pos[lastPos] = entry->pos;
640  opData->npos++;
641  }
642  }
643 }
struct DocRepresentation::@30::@31 query
union DocRepresentation::@30 data
#define QI_VAL
Definition: ts_type.h:149

References DocRepresentation::data, i, MAXQROPOS, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, DocRepresentation::pos, QueryRepresentationOperand::pos, QI_VAL, QR_GET_OPERAND_DATA, DocRepresentation::query, QueryRepresentationOperand::reverseinsert, and WEP_GETPOS.

Referenced by Cover().

◆ find_wordentry()

static WordEntry* find_wordentry ( TSVector  t,
TSQuery  q,
QueryOperand item,
int32 nitem 
)
static

Definition at line 86 of file tsrank.c.

87 {
88  WordEntry *StopLow = ARRPTR(t);
89  WordEntry *StopHigh = (WordEntry *) STRPTR(t);
90  WordEntry *StopMiddle = StopHigh;
91  int difference;
92 
93  *nitem = 0;
94 
95  /* Loop invariant: StopLow <= item < StopHigh */
96  while (StopLow < StopHigh)
97  {
98  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
99  difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
100  if (difference == 0)
101  {
102  StopHigh = StopMiddle;
103  *nitem = 1;
104  break;
105  }
106  else if (difference > 0)
107  StopLow = StopMiddle + 1;
108  else
109  StopHigh = StopMiddle;
110  }
111 
112  if (item->prefix)
113  {
114  if (StopLow >= StopHigh)
115  StopMiddle = StopHigh;
116 
117  *nitem = 0;
118 
119  while (StopMiddle < (WordEntry *) STRPTR(t) &&
120  WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
121  {
122  (*nitem)++;
123  StopMiddle++;
124  }
125  }
126 
127  return (*nitem > 0) ? StopHigh : NULL;
128 }
Datum difference(PG_FUNCTION_ARGS)
#define GETOPERAND(x)
Definition: ltree.h:165
bool prefix
Definition: ts_type.h:163
#define WordECompareQueryItem(e, q, p, i, m)
Definition: tsrank.c:75

References ARRPTR, difference(), GETOPERAND, QueryOperand::prefix, STRPTR, and WordECompareQueryItem.

Referenced by calc_rank_and(), calc_rank_or(), and get_docrep().

◆ get_docrep()

static DocRepresentation* get_docrep ( TSVector  txt,
QueryRepresentation qr,
int *  doclen 
)
static

Definition at line 727 of file tsrank.c.

728 {
729  QueryItem *item = GETQUERY(qr->query);
730  WordEntry *entry,
731  *firstentry;
732  WordEntryPos *post;
733  int32 dimt, /* number of 'post' items */
734  j,
735  i,
736  nitem;
737  int len = qr->query->size * 4,
738  cur = 0;
739  DocRepresentation *doc;
740 
741  doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
742 
743  /*
744  * Iterate through query to make DocRepresentation for words and it's
745  * entries satisfied by query
746  */
747  for (i = 0; i < qr->query->size; i++)
748  {
749  QueryOperand *curoperand;
750 
751  if (item[i].type != QI_VAL)
752  continue;
753 
754  curoperand = &item[i].qoperand;
755 
756  firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
757  if (!entry)
758  continue;
759 
760  /* iterations over entries in tsvector */
761  while (entry - firstentry < nitem)
762  {
763  if (entry->haspos)
764  {
765  dimt = POSDATALEN(txt, entry);
766  post = POSDATAPTR(txt, entry);
767  }
768  else
769  {
770  /* ignore words without positions */
771  entry++;
772  continue;
773  }
774 
775  while (cur + dimt >= len)
776  {
777  len *= 2;
778  doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
779  }
780 
781  /* iterations over entry's positions */
782  for (j = 0; j < dimt; j++)
783  {
784  if (curoperand->weight == 0 ||
785  curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
786  {
787  doc[cur].pos = post[j];
788  doc[cur].data.map.entry = entry;
789  doc[cur].data.map.item = (QueryItem *) curoperand;
790  cur++;
791  }
792  }
793 
794  entry++;
795  }
796  }
797 
798  if (cur > 0)
799  {
800  DocRepresentation *rptr = doc + 1,
801  *wptr = doc,
802  storage;
803 
804  /*
805  * Sort representation in ascending order by pos and entry
806  */
807  qsort(doc, cur, sizeof(DocRepresentation), compareDocR);
808 
809  /*
810  * Join QueryItem per WordEntry and it's position
811  */
812  storage.pos = doc->pos;
813  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
814  storage.data.query.items[0] = doc->data.map.item;
815  storage.data.query.nitem = 1;
816 
817  while (rptr - doc < cur)
818  {
819  if (rptr->pos == (rptr - 1)->pos &&
820  rptr->data.map.entry == (rptr - 1)->data.map.entry)
821  {
822  storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
823  storage.data.query.nitem++;
824  }
825  else
826  {
827  *wptr = storage;
828  wptr++;
829  storage.pos = rptr->pos;
830  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
831  storage.data.query.items[0] = rptr->data.map.item;
832  storage.data.query.nitem = 1;
833  }
834 
835  rptr++;
836  }
837 
838  *wptr = storage;
839  wptr++;
840 
841  *doclen = wptr - doc;
842  return doc;
843  }
844 
845  pfree(doc);
846  return NULL;
847 }
struct cursor * cur
Definition: ecpg.c:28
#define storage
Definition: indent_codes.h:68
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
void * palloc(Size size)
Definition: mcxt.c:1316
#define qsort(a, b, c, d)
Definition: port.h:449
struct DocRepresentation::@30::@32 map
uint8 weight
Definition: ts_type.h:159
static int compareDocR(const void *va, const void *vb)
Definition: tsrank.c:519
QueryOperand qoperand
Definition: ts_type.h:210
const char * type

References compareDocR(), cur, DocRepresentation::data, find_wordentry(), GETQUERY, WordEntry::haspos, i, j, len, DocRepresentation::map, palloc(), pfree(), DocRepresentation::pos, POSDATALEN, POSDATAPTR, QI_VAL, QueryItem::qoperand, qsort, QueryRepresentation::query, repalloc(), TSQueryData::size, storage, type, QueryOperand::weight, and WEP_GETWEIGHT.

Referenced by calc_rank_cd().

◆ getWeights()

static const float* getWeights ( ArrayType win)
static

Definition at line 400 of file tsrank.c.

401 {
402  static float ws[lengthof(weights)];
403  int i;
404  float4 *arrdata;
405 
406  if (win == NULL)
407  return weights;
408 
409  if (ARR_NDIM(win) != 1)
410  ereport(ERROR,
411  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
412  errmsg("array of weight must be one-dimensional")));
413 
414  if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
415  ereport(ERROR,
416  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
417  errmsg("array of weight is too short")));
418 
419  if (array_contains_nulls(win))
420  ereport(ERROR,
421  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
422  errmsg("array of weight must not contain nulls")));
423 
424  arrdata = (float4 *) ARR_DATA_PTR(win);
425  for (i = 0; i < lengthof(weights); i++)
426  {
427  ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
428  if (ws[i] > 1.0)
429  ereport(ERROR,
430  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
431  errmsg("weight out of range")));
432  }
433 
434  return ws;
435 }
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3748
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57

References ARR_DATA_PTR, ARR_DIMS, ARR_NDIM, array_contains_nulls(), ArrayGetNItems(), ereport, errcode(), errmsg(), ERROR, i, lengthof, and weights.

Referenced by ts_rank_tt(), ts_rank_ttf(), ts_rank_wtt(), ts_rank_wttf(), ts_rankcd_tt(), ts_rankcd_ttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ resetQueryRepresentation()

static void resetQueryRepresentation ( QueryRepresentation qr,
bool  reverseinsert 
)
static

Definition at line 593 of file tsrank.c.

594 {
595  int i;
596 
597  for (i = 0; i < qr->query->size; i++)
598  {
599  qr->operandData[i].operandexists = false;
600  qr->operandData[i].reverseinsert = reverseinsert;
601  qr->operandData[i].npos = 0;
602  }
603 }

References i, QueryRepresentationOperand::npos, QueryRepresentation::operandData, QueryRepresentationOperand::operandexists, QueryRepresentation::query, QueryRepresentationOperand::reverseinsert, and TSQueryData::size.

Referenced by Cover().

◆ SortAndUniqItems()

static QueryOperand** SortAndUniqItems ( TSQuery  q,
int *  size 
)
static

Definition at line 154 of file tsrank.c.

155 {
156  char *operand = GETOPERAND(q);
157  QueryItem *item = GETQUERY(q);
158  QueryOperand **res,
159  **ptr,
160  **prevptr;
161 
162  ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
163 
164  /* Collect all operands from the tree to res */
165  while ((*size)--)
166  {
167  if (item->type == QI_VAL)
168  {
169  *ptr = (QueryOperand *) item;
170  ptr++;
171  }
172  item++;
173  }
174 
175  *size = ptr - res;
176  if (*size < 2)
177  return res;
178 
179  qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand);
180 
181  ptr = res + 1;
182  prevptr = res;
183 
184  /* remove duplicates */
185  while (ptr - res < *size)
186  {
187  if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
188  {
189  prevptr++;
190  *prevptr = *ptr;
191  }
192  ptr++;
193  }
194 
195  *size = prevptr + 1 - res;
196  return res;
197 }
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static int compareQueryOperand(const void *a, const void *b, void *arg)
Definition: tsrank.c:135

References compareQueryOperand(), GETOPERAND, GETQUERY, palloc(), QI_VAL, qsort_arg(), res, size, and QueryItem::type.

Referenced by calc_rank_and(), and calc_rank_or().

◆ ts_rank_tt()

Datum ts_rank_tt ( PG_FUNCTION_ARGS  )

Definition at line 486 of file tsrank.c.

487 {
488  TSVector txt = PG_GETARG_TSVECTOR(0);
489  TSQuery query = PG_GETARG_TSQUERY(1);
490  float res;
491 
492  res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
493 
494  PG_FREE_IF_COPY(txt, 0);
495  PG_FREE_IF_COPY(query, 1);
497 }
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:366
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:135
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
static float calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
Definition: tsrank.c:357
#define DEF_NORM_METHOD
Definition: tsrank.c:35
static const float * getWeights(ArrayType *win)
Definition: tsrank.c:400

References calc_rank(), DEF_NORM_METHOD, getWeights(), PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_ttf()

Datum ts_rank_ttf ( PG_FUNCTION_ARGS  )

Definition at line 471 of file tsrank.c.

472 {
473  TSVector txt = PG_GETARG_TSVECTOR(0);
474  TSQuery query = PG_GETARG_TSQUERY(1);
475  int method = PG_GETARG_INT32(2);
476  float res;
477 
478  res = calc_rank(getWeights(NULL), txt, query, method);
479 
480  PG_FREE_IF_COPY(txt, 0);
481  PG_FREE_IF_COPY(query, 1);
483 }
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269

References calc_rank(), getWeights(), PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wtt()

Datum ts_rank_wtt ( PG_FUNCTION_ARGS  )

Definition at line 455 of file tsrank.c.

456 {
458  TSVector txt = PG_GETARG_TSVECTOR(1);
459  TSQuery query = PG_GETARG_TSQUERY(2);
460  float res;
461 
462  res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
463 
464  PG_FREE_IF_COPY(win, 0);
465  PG_FREE_IF_COPY(txt, 1);
466  PG_FREE_IF_COPY(query, 2);
468 }
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240

References calc_rank(), DEF_NORM_METHOD, getWeights(), PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wttf()

Datum ts_rank_wttf ( PG_FUNCTION_ARGS  )

Definition at line 438 of file tsrank.c.

439 {
441  TSVector txt = PG_GETARG_TSVECTOR(1);
442  TSQuery query = PG_GETARG_TSQUERY(2);
443  int method = PG_GETARG_INT32(3);
444  float res;
445 
446  res = calc_rank(getWeights(win), txt, query, method);
447 
448  PG_FREE_IF_COPY(win, 0);
449  PG_FREE_IF_COPY(txt, 1);
450  PG_FREE_IF_COPY(query, 2);
452 }

References calc_rank(), getWeights(), PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_tt()

Datum ts_rankcd_tt ( PG_FUNCTION_ARGS  )

Definition at line 1001 of file tsrank.c.

1002 {
1003  TSVector txt = PG_GETARG_TSVECTOR(0);
1004  TSQuery query = PG_GETARG_TSQUERY(1);
1005  float res;
1006 
1007  res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
1008 
1009  PG_FREE_IF_COPY(txt, 0);
1010  PG_FREE_IF_COPY(query, 1);
1012 }
static float4 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
Definition: tsrank.c:850

References calc_rank_cd(), DEF_NORM_METHOD, getWeights(), PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_ttf()

Datum ts_rankcd_ttf ( PG_FUNCTION_ARGS  )

Definition at line 986 of file tsrank.c.

987 {
988  TSVector txt = PG_GETARG_TSVECTOR(0);
989  TSQuery query = PG_GETARG_TSQUERY(1);
990  int method = PG_GETARG_INT32(2);
991  float res;
992 
993  res = calc_rank_cd(getWeights(NULL), txt, query, method);
994 
995  PG_FREE_IF_COPY(txt, 0);
996  PG_FREE_IF_COPY(query, 1);
998 }

References calc_rank_cd(), getWeights(), PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wtt()

Datum ts_rankcd_wtt ( PG_FUNCTION_ARGS  )

Definition at line 970 of file tsrank.c.

971 {
973  TSVector txt = PG_GETARG_TSVECTOR(1);
974  TSQuery query = PG_GETARG_TSQUERY(2);
975  float res;
976 
977  res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
978 
979  PG_FREE_IF_COPY(win, 0);
980  PG_FREE_IF_COPY(txt, 1);
981  PG_FREE_IF_COPY(query, 2);
983 }

References calc_rank_cd(), DEF_NORM_METHOD, getWeights(), PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wttf()

Datum ts_rankcd_wttf ( PG_FUNCTION_ARGS  )

Definition at line 953 of file tsrank.c.

954 {
956  TSVector txt = PG_GETARG_TSVECTOR(1);
957  TSQuery query = PG_GETARG_TSQUERY(2);
958  int method = PG_GETARG_INT32(3);
959  float res;
960 
961  res = calc_rank_cd(getWeights(win), txt, query, method);
962 
963  PG_FREE_IF_COPY(win, 0);
964  PG_FREE_IF_COPY(txt, 1);
965  PG_FREE_IF_COPY(query, 2);
967 }

References calc_rank_cd(), getWeights(), PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ word_distance()

static float4 word_distance ( int32  w)
static

Definition at line 44 of file tsrank.c.

45 {
46  if (w > 100)
47  return 1e-30f;
48 
49  return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
50 }

Referenced by calc_rank_and().

Variable Documentation

◆ weights

const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f}
static

Definition at line 24 of file tsrank.c.

Referenced by calc_rank_cd(), getWeights(), tsvector_filter(), and tsvector_unnest().