PostgreSQL Source Code git master
tsrank.c File Reference
#include "postgres.h"
#include <limits.h>
#include <math.h>
#include "miscadmin.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
#include "utils/fmgrprotos.h"
Include dependency graph for tsrank.c:

Go to the source code of this file.

Data Structures

struct  DocRepresentation
 
struct  QueryRepresentationOperand
 
struct  QueryRepresentation
 
struct  CoverExt
 

Macros

#define NUM_WEIGHTS   4
 
#define wpos(wep)   ( w[ WEP_GETWEIGHT(wep) ] )
 
#define RANK_NO_NORM   0x00
 
#define RANK_NORM_LOGLENGTH   0x01
 
#define RANK_NORM_LENGTH   0x02
 
#define RANK_NORM_EXTDIST   0x04
 
#define RANK_NORM_UNIQ   0x08
 
#define RANK_NORM_LOGUNIQ   0x10
 
#define RANK_NORM_RDIVRPLUS1   0x20
 
#define DEF_NORM_METHOD   RANK_NO_NORM
 
#define WordECompareQueryItem(e, q, p, i, m)
 
#define MAXQROPOS   MAXENTRYPOS
 
#define QR_GET_OPERAND_DATA(q, v)    ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
 

Functions

static float calc_rank_or (const float *w, TSVector t, TSQuery q)
 
static float calc_rank_and (const float *w, TSVector t, TSQuery q)
 
static float4 word_distance (int32 w)
 
static int cnt_length (TSVector t)
 
static WordEntryfind_wordentry (TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
 
static int compareQueryOperand (const void *a, const void *b, void *arg)
 
static QueryOperand ** SortAndUniqItems (TSQuery q, int *size)
 
static float calc_rank (const float *w, TSVector t, TSQuery q, int32 method)
 
static void getWeights (ArrayType *win, float *ws)
 
Datum ts_rank_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rank_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rank_tt (PG_FUNCTION_ARGS)
 
static int compareDocR (const void *va, const void *vb)
 
static TSTernaryValue checkcondition_QueryOperand (void *checkval, QueryOperand *val, ExecPhraseData *data)
 
static void resetQueryRepresentation (QueryRepresentation *qr, bool reverseinsert)
 
static void fillQueryRepresentationData (QueryRepresentation *qr, DocRepresentation *entry)
 
static bool Cover (DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 
static DocRepresentationget_docrep (TSVector txt, QueryRepresentation *qr, int *doclen)
 
static float4 calc_rank_cd (const float4 *arrdata, TSVector txt, TSQuery query, int method)
 
Datum ts_rankcd_wttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_wtt (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_ttf (PG_FUNCTION_ARGS)
 
Datum ts_rankcd_tt (PG_FUNCTION_ARGS)
 

Variables

static const float default_weights [NUM_WEIGHTS] = {0.1f, 0.2f, 0.4f, 1.0f}
 

Macro Definition Documentation

◆ DEF_NORM_METHOD

#define DEF_NORM_METHOD   RANK_NO_NORM

Definition at line 36 of file tsrank.c.

◆ MAXQROPOS

#define MAXQROPOS   MAXENTRYPOS

Definition at line 545 of file tsrank.c.

◆ NUM_WEIGHTS

#define NUM_WEIGHTS   4

Definition at line 24 of file tsrank.c.

◆ QR_GET_OPERAND_DATA

#define QR_GET_OPERAND_DATA (   q,
 
)     ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )

Definition at line 561 of file tsrank.c.

◆ RANK_NO_NORM

#define RANK_NO_NORM   0x00

Definition at line 29 of file tsrank.c.

◆ RANK_NORM_EXTDIST

#define RANK_NORM_EXTDIST   0x04

Definition at line 32 of file tsrank.c.

◆ RANK_NORM_LENGTH

#define RANK_NORM_LENGTH   0x02

Definition at line 31 of file tsrank.c.

◆ RANK_NORM_LOGLENGTH

#define RANK_NORM_LOGLENGTH   0x01

Definition at line 30 of file tsrank.c.

◆ RANK_NORM_LOGUNIQ

#define RANK_NORM_LOGUNIQ   0x10

Definition at line 34 of file tsrank.c.

◆ RANK_NORM_RDIVRPLUS1

#define RANK_NORM_RDIVRPLUS1   0x20

Definition at line 35 of file tsrank.c.

◆ RANK_NORM_UNIQ

#define RANK_NORM_UNIQ   0x08

Definition at line 33 of file tsrank.c.

◆ WordECompareQueryItem

#define WordECompareQueryItem (   e,
  q,
  p,
  i,
 
)
Value:
tsCompareString((q) + (i)->distance, (i)->length, \
(e) + (p)->pos, (p)->len, (m))
int i
Definition: isn.c:72
const void size_t len
e
Definition: preproc-init.c:82
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152

Definition at line 76 of file tsrank.c.

◆ wpos

#define wpos (   wep)    ( w[ WEP_GETWEIGHT(wep) ] )

Definition at line 27 of file tsrank.c.

Function Documentation

◆ calc_rank()

static float calc_rank ( const float *  w,
TSVector  t,
TSQuery  q,
int32  method 
)
static

Definition at line 358 of file tsrank.c.

359{
360 QueryItem *item = GETQUERY(q);
361 float res = 0.0;
362 int len;
363
364 if (!t->size || !q->size)
365 return 0.0;
366
367 /* XXX: What about NOT? */
368 res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369 item->qoperator.oper == OP_PHRASE)) ?
370 calc_rank_and(w, t, q) :
371 calc_rank_or(w, t, q);
372
373 if (res < 0)
374 res = 1e-20f;
375
376 if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378
379 if (method & RANK_NORM_LENGTH)
380 {
381 len = cnt_length(t);
382 if (len > 0)
383 res /= (float) len;
384 }
385
386 /* RANK_NORM_EXTDIST not applicable */
387
388 if ((method & RANK_NORM_UNIQ) && t->size > 0)
389 res /= (float) (t->size);
390
391 if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392 res /= log((double) (t->size + 1)) / log(2.0);
393
394 if (method & RANK_NORM_RDIVRPLUS1)
395 res /= (res + 1);
396
397 return res;
398}
#define GETQUERY(x)
Definition: _int.h:157
int32 size
Definition: ts_type.h:221
int32 size
Definition: ts_type.h:93
#define QI_OPR
Definition: ts_type.h:150
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define RANK_NORM_RDIVRPLUS1
Definition: tsrank.c:35
#define RANK_NORM_UNIQ
Definition: tsrank.c:33
#define RANK_NORM_LOGLENGTH
Definition: tsrank.c:30
#define RANK_NORM_LENGTH
Definition: tsrank.c:31
static int cnt_length(TSVector t)
Definition: tsrank.c:54
static float calc_rank_or(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:284
static float calc_rank_and(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:201
#define RANK_NORM_LOGUNIQ
Definition: tsrank.c:34
QueryOperator qoperator
Definition: ts_type.h:209
QueryItemType type
Definition: ts_type.h:208

References calc_rank_and(), calc_rank_or(), cnt_length(), GETQUERY, len, OP_AND, OP_PHRASE, QueryOperator::oper, QI_OPR, QueryItem::qoperator, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, res, TSVectorData::size, TSQueryData::size, and QueryItem::type.

Referenced by ts_rank_tt(), ts_rank_ttf(), ts_rank_wtt(), and ts_rank_wttf().

◆ calc_rank_and()

static float calc_rank_and ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 201 of file tsrank.c.

202{
203 WordEntryPosVector **pos;
204 WordEntryPosVector1 posnull;
205 WordEntryPosVector *POSNULL;
206 int i,
207 k,
208 l,
209 p;
210 WordEntry *entry,
211 *firstentry;
212 WordEntryPos *post,
213 *ct;
214 int32 dimt,
215 lenct,
216 dist,
217 nitem;
218 float res = -1.0;
219 QueryOperand **item;
220 int size = q->size;
221
222 item = SortAndUniqItems(q, &size);
223 if (size < 2)
224 {
225 pfree(item);
226 return calc_rank_or(w, t, q);
227 }
228 pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229
230 /* A dummy WordEntryPos array to use when haspos is false */
231 posnull.npos = 1;
232 posnull.pos[0] = 0;
233 WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234 POSNULL = (WordEntryPosVector *) &posnull;
235
236 for (i = 0; i < size; i++)
237 {
238 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239 if (!entry)
240 continue;
241
242 while (entry - firstentry < nitem)
243 {
244 if (entry->haspos)
245 pos[i] = _POSVECPTR(t, entry);
246 else
247 pos[i] = POSNULL;
248
249 dimt = pos[i]->npos;
250 post = pos[i]->pos;
251 for (k = 0; k < i; k++)
252 {
253 if (!pos[k])
254 continue;
255 lenct = pos[k]->npos;
256 ct = pos[k]->pos;
257 for (l = 0; l < dimt; l++)
258 {
259 for (p = 0; p < lenct; p++)
260 {
261 dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262 if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263 {
264 float curw;
265
266 if (!dist)
267 dist = MAXENTRYPOS;
268 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270 }
271 }
272 }
273 }
274
275 entry++;
276 }
277 }
278 pfree(pos);
279 pfree(item);
280 return res;
281}
int32_t int32
Definition: c.h:484
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
static pg_noinline void Size size
Definition: slab.c:607
WordEntryPos pos[1]
Definition: ts_type.h:75
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 haspos
Definition: ts_type.h:44
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
#define MAXENTRYPOS
Definition: ts_type.h:85
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
uint16 WordEntryPos
Definition: ts_type.h:63
#define wpos(wep)
Definition: tsrank.c:27
static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
Definition: tsrank.c:87
static float4 word_distance(int32 w)
Definition: tsrank.c:45
static QueryOperand ** SortAndUniqItems(TSQuery q, int *size)
Definition: tsrank.c:155

References _POSVECPTR, calc_rank_or(), find_wordentry(), WordEntry::haspos, i, MAXENTRYPOS, WordEntryPosVector::npos, WordEntryPosVector1::npos, palloc0(), pfree(), WordEntryPosVector1::pos, WordEntryPosVector::pos, res, size, TSQueryData::size, SortAndUniqItems(), WEP_GETPOS, WEP_SETPOS, word_distance(), and wpos.

Referenced by calc_rank().

◆ calc_rank_cd()

static float4 calc_rank_cd ( const float4 arrdata,
TSVector  txt,
TSQuery  query,
int  method 
)
static

Definition at line 855 of file tsrank.c.

856{
858 int len,
859 i,
860 doclen = 0;
861 CoverExt ext;
862 double Wdoc = 0.0;
863 double invws[NUM_WEIGHTS];
864 double SumDist = 0.0,
865 PrevExtPos = 0.0;
866 int NExtent = 0;
868
869
870 for (i = 0; i < NUM_WEIGHTS; i++)
871 {
872 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : default_weights[i]));
873 if (invws[i] > 1.0)
875 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
876 errmsg("weight out of range")));
877 invws[i] = 1.0 / invws[i];
878 }
879
880 qr.query = query;
882 palloc0(sizeof(QueryRepresentationOperand) * query->size);
883
884 doc = get_docrep(txt, &qr, &doclen);
885 if (!doc)
886 {
887 pfree(qr.operandData);
888 return 0.0;
889 }
890
891 MemSet(&ext, 0, sizeof(CoverExt));
892 while (Cover(doc, doclen, &qr, &ext))
893 {
894 double Cpos = 0.0;
895 double InvSum = 0.0;
896 double CurExtPos;
897 int nNoise;
898 DocRepresentation *ptr = ext.begin;
899
900 while (ptr <= ext.end)
901 {
902 InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
903 ptr++;
904 }
905
906 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
907
908 /*
909 * if doc are big enough then ext.q may be equal to ext.p due to limit
910 * of positional information. In this case we approximate number of
911 * noise word as half cover's length
912 */
913 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
914 if (nNoise < 0)
915 nNoise = (ext.end - ext.begin) / 2;
916 Wdoc += Cpos / ((double) (1 + nNoise));
917
918 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
919 if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
920 * zero in a case of
921 * multiple lexize */ )
922 SumDist += 1.0 / (CurExtPos - PrevExtPos);
923
924 PrevExtPos = CurExtPos;
925 NExtent++;
926 }
927
928 if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
929 Wdoc /= log((double) (cnt_length(txt) + 1));
930
931 if (method & RANK_NORM_LENGTH)
932 {
933 len = cnt_length(txt);
934 if (len > 0)
935 Wdoc /= (double) len;
936 }
937
938 if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
939 Wdoc /= ((double) NExtent) / SumDist;
940
941 if ((method & RANK_NORM_UNIQ) && txt->size > 0)
942 Wdoc /= (double) (txt->size);
943
944 if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
945 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
946
947 if (method & RANK_NORM_RDIVRPLUS1)
948 Wdoc /= (Wdoc + 1);
949
950 pfree(doc);
951
952 pfree(qr.operandData);
953
954 return (float4) Wdoc;
955}
float float4
Definition: c.h:586
#define MemSet(start, val, len)
Definition: c.h:977
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
int p
Definition: tsrank.c:591
DocRepresentation * begin
Definition: tsrank.c:593
DocRepresentation * end
Definition: tsrank.c:594
int q
Definition: tsrank.c:592
WordEntryPos pos
Definition: tsrank.c:520
QueryRepresentationOperand * operandData
Definition: tsrank.c:558
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
Definition: tsrank.c:732
static const float default_weights[NUM_WEIGHTS]
Definition: tsrank.c:25
#define NUM_WEIGHTS
Definition: tsrank.c:24
#define RANK_NORM_EXTDIST
Definition: tsrank.c:32
static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
Definition: tsrank.c:651

References CoverExt::begin, cnt_length(), Cover(), default_weights, CoverExt::end, ereport, errcode(), errmsg(), ERROR, get_docrep(), i, len, MemSet, NUM_WEIGHTS, QueryRepresentation::operandData, CoverExt::p, palloc0(), pfree(), DocRepresentation::pos, CoverExt::q, QueryRepresentation::query, RANK_NORM_EXTDIST, RANK_NORM_LENGTH, RANK_NORM_LOGLENGTH, RANK_NORM_LOGUNIQ, RANK_NORM_RDIVRPLUS1, RANK_NORM_UNIQ, TSVectorData::size, TSQueryData::size, and WEP_GETWEIGHT.

Referenced by ts_rankcd_tt(), ts_rankcd_ttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ calc_rank_or()

static float calc_rank_or ( const float *  w,
TSVector  t,
TSQuery  q 
)
static

Definition at line 284 of file tsrank.c.

285{
286 WordEntry *entry,
287 *firstentry;
288 WordEntryPosVector1 posnull;
289 WordEntryPos *post;
290 int32 dimt,
291 j,
292 i,
293 nitem;
294 float res = 0.0;
295 QueryOperand **item;
296 int size = q->size;
297
298 /* A dummy WordEntryPos array to use when haspos is false */
299 posnull.npos = 1;
300 posnull.pos[0] = 0;
301
302 item = SortAndUniqItems(q, &size);
303
304 for (i = 0; i < size; i++)
305 {
306 float resj,
307 wjm;
308 int32 jm;
309
310 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311 if (!entry)
312 continue;
313
314 while (entry - firstentry < nitem)
315 {
316 if (entry->haspos)
317 {
318 dimt = POSDATALEN(t, entry);
319 post = POSDATAPTR(t, entry);
320 }
321 else
322 {
323 dimt = posnull.npos;
324 post = posnull.pos;
325 }
326
327 resj = 0.0;
328 wjm = -1.0;
329 jm = 0;
330 for (j = 0; j < dimt; j++)
331 {
332 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333 if (wpos(post[j]) > wjm)
334 {
335 wjm = wpos(post[j]);
336 jm = j;
337 }
338 }
339/*
340 limit (sum(1/i^2),i=1,inf) = pi^2/6
341 resj = sum(wi/i^2),i=1,noccurrence,
342 wi - should be sorted desc,
343 don't sort for now, just choose maximum weight. This should be corrected
344 Oleg Bartunov
345*/
346 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347
348 entry++;
349 }
350 }
351 if (size > 0)
352 res = res / size;
353 pfree(item);
354 return res;
355}
int j
Definition: isn.c:73
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define POSDATAPTR(x, e)
Definition: ts_type.h:111

References find_wordentry(), WordEntry::haspos, i, j, WordEntryPosVector1::npos, pfree(), WordEntryPosVector1::pos, POSDATALEN, POSDATAPTR, res, size, TSQueryData::size, SortAndUniqItems(), and wpos.

Referenced by calc_rank(), and calc_rank_and().

◆ checkcondition_QueryOperand()

static TSTernaryValue checkcondition_QueryOperand ( void *  checkval,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 568 of file tsrank.c.

570{
571 QueryRepresentation *qr = (QueryRepresentation *) checkval;
573
574 if (!opData->operandexists)
575 return TS_NO;
576
577 if (data)
578 {
579 data->npos = opData->npos;
580 data->pos = opData->pos;
581 if (opData->reverseinsert)
582 data->pos += MAXQROPOS - opData->npos;
583 }
584
585 return TS_YES;
586}
long val
Definition: informix.c:689
const void * data
WordEntryPos pos[MAXQROPOS]
Definition: tsrank.c:552
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
#define MAXQROPOS
Definition: tsrank.c:545
#define QR_GET_OPERAND_DATA(q, v)
Definition: tsrank.c:561

References data, MAXQROPOS, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, QueryRepresentationOperand::pos, QR_GET_OPERAND_DATA, QueryRepresentationOperand::reverseinsert, TS_NO, TS_YES, and val.

Referenced by Cover().

◆ cnt_length()

static int cnt_length ( TSVector  t)
static

Definition at line 54 of file tsrank.c.

55{
56 WordEntry *ptr = ARRPTR(t),
57 *end = (WordEntry *) STRPTR(t);
58 int len = 0;
59
60 while (ptr < end)
61 {
62 int clen = POSDATALEN(t, ptr);
63
64 if (clen == 0)
65 len += 1;
66 else
67 len += clen;
68
69 ptr++;
70 }
71
72 return len;
73}
#define ARRPTR(x)
Definition: cube.c:25
#define STRPTR(x)
Definition: hstore.h:76

References ARRPTR, len, POSDATALEN, and STRPTR.

Referenced by calc_rank(), and calc_rank_cd().

◆ compareDocR()

static int compareDocR ( const void *  va,
const void *  vb 
)
static

Definition at line 524 of file tsrank.c.

525{
526 const DocRepresentation *a = (const DocRepresentation *) va;
527 const DocRepresentation *b = (const DocRepresentation *) vb;
528
529 if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
530 {
531 if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
532 {
533 if (a->data.map.entry == b->data.map.entry)
534 return 0;
535
536 return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
537 }
538
539 return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
540 }
541
542 return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
543}
int b
Definition: isn.c:69
int a
Definition: isn.c:68

References a, b, WEP_GETPOS, and WEP_GETWEIGHT.

Referenced by get_docrep().

◆ compareQueryOperand()

static int compareQueryOperand ( const void *  a,
const void *  b,
void *  arg 
)
static

Definition at line 136 of file tsrank.c.

137{
138 char *operand = (char *) arg;
139 QueryOperand *qa = (*(QueryOperand *const *) a);
140 QueryOperand *qb = (*(QueryOperand *const *) b);
141
142 return tsCompareString(operand + qa->distance, qa->length,
143 operand + qb->distance, qb->length,
144 false);
145}
void * arg
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171

References a, arg, b, QueryOperand::distance, QueryOperand::length, and tsCompareString().

Referenced by SortAndUniqItems().

◆ Cover()

static bool Cover ( DocRepresentation doc,
int  len,
QueryRepresentation qr,
CoverExt ext 
)
static

Definition at line 651 of file tsrank.c.

652{
654 int lastpos = ext->pos;
655 bool found = false;
656
657 /*
658 * since this function recurses, it could be driven to stack overflow.
659 * (though any decent compiler will optimize away the tail-recursion.
660 */
662
663 resetQueryRepresentation(qr, false);
664
665 ext->p = INT_MAX;
666 ext->q = 0;
667 ptr = doc + ext->pos;
668
669 /* find upper bound of cover from current position, move up */
670 while (ptr - doc < len)
671 {
673
674 if (TS_execute(GETQUERY(qr->query), qr,
676 {
677 if (WEP_GETPOS(ptr->pos) > ext->q)
678 {
679 ext->q = WEP_GETPOS(ptr->pos);
680 ext->end = ptr;
681 lastpos = ptr - doc;
682 found = true;
683 }
684 break;
685 }
686 ptr++;
687 }
688
689 if (!found)
690 return false;
691
692 resetQueryRepresentation(qr, true);
693
694 ptr = doc + lastpos;
695
696 /* find lower bound of cover from found upper bound, move down */
697 while (ptr >= doc + ext->pos)
698 {
699 /*
700 * we scan doc from right to left, so pos info in reverse order!
701 */
703
704 if (TS_execute(GETQUERY(qr->query), qr,
706 {
707 if (WEP_GETPOS(ptr->pos) < ext->p)
708 {
709 ext->begin = ptr;
710 ext->p = WEP_GETPOS(ptr->pos);
711 }
712 break;
713 }
714 ptr--;
715 }
716
717 if (ext->p <= ext->q)
718 {
719 /*
720 * set position for next try to next lexeme after beginning of found
721 * cover
722 */
723 ext->pos = (ptr - doc) + 1;
724 return true;
725 }
726
727 ext->pos++;
728 return Cover(doc, len, qr, ext);
729}
void check_stack_depth(void)
Definition: stack_depth.c:95
int pos
Definition: tsrank.c:590
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
static TSTernaryValue checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsrank.c:568
static void resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
Definition: tsrank.c:598
static void fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
Definition: tsrank.c:611
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854

References CoverExt::begin, check_stack_depth(), checkcondition_QueryOperand(), Cover(), CoverExt::end, fillQueryRepresentationData(), GETQUERY, len, CoverExt::p, DocRepresentation::pos, CoverExt::pos, CoverExt::q, QueryRepresentation::query, resetQueryRepresentation(), TS_EXEC_EMPTY, TS_execute(), and WEP_GETPOS.

Referenced by calc_rank_cd(), and Cover().

◆ fillQueryRepresentationData()

static void fillQueryRepresentationData ( QueryRepresentation qr,
DocRepresentation entry 
)
static

Definition at line 611 of file tsrank.c.

612{
613 int i;
614 int lastPos;
616
617 for (i = 0; i < entry->data.query.nitem; i++)
618 {
619 if (entry->data.query.items[i]->type != QI_VAL)
620 continue;
621
622 opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
623
624 opData->operandexists = true;
625
626 if (opData->npos == 0)
627 {
628 lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
629 opData->pos[lastPos] = entry->pos;
630 opData->npos++;
631 continue;
632 }
633
634 lastPos = opData->reverseinsert ?
635 (MAXQROPOS - opData->npos) :
636 (opData->npos - 1);
637
638 if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
639 {
640 lastPos = opData->reverseinsert ?
641 (MAXQROPOS - 1 - opData->npos) :
642 (opData->npos);
643
644 opData->pos[lastPos] = entry->pos;
645 opData->npos++;
646 }
647 }
648}
QueryItem ** items
Definition: tsrank.c:510
struct DocRepresentation::@28::@29 query
union DocRepresentation::@28 data
#define QI_VAL
Definition: ts_type.h:149

References DocRepresentation::data, i, DocRepresentation::items, MAXQROPOS, DocRepresentation::nitem, QueryRepresentationOperand::npos, QueryRepresentationOperand::operandexists, DocRepresentation::pos, QueryRepresentationOperand::pos, QI_VAL, QR_GET_OPERAND_DATA, DocRepresentation::query, QueryRepresentationOperand::reverseinsert, QueryItem::type, and WEP_GETPOS.

Referenced by Cover().

◆ find_wordentry()

static WordEntry * find_wordentry ( TSVector  t,
TSQuery  q,
QueryOperand item,
int32 nitem 
)
static

Definition at line 87 of file tsrank.c.

88{
89 WordEntry *StopLow = ARRPTR(t);
90 WordEntry *StopHigh = (WordEntry *) STRPTR(t);
91 WordEntry *StopMiddle = StopHigh;
92 int difference;
93
94 *nitem = 0;
95
96 /* Loop invariant: StopLow <= item < StopHigh */
97 while (StopLow < StopHigh)
98 {
99 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101 if (difference == 0)
102 {
103 StopHigh = StopMiddle;
104 *nitem = 1;
105 break;
106 }
107 else if (difference > 0)
108 StopLow = StopMiddle + 1;
109 else
110 StopHigh = StopMiddle;
111 }
112
113 if (item->prefix)
114 {
115 if (StopLow >= StopHigh)
116 StopMiddle = StopHigh;
117
118 *nitem = 0;
119
120 while (StopMiddle < (WordEntry *) STRPTR(t) &&
121 WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122 {
123 (*nitem)++;
124 StopMiddle++;
125 }
126 }
127
128 return (*nitem > 0) ? StopHigh : NULL;
129}
Datum difference(PG_FUNCTION_ARGS)
#define GETOPERAND(x)
Definition: ltree.h:165
bool prefix
Definition: ts_type.h:163
#define WordECompareQueryItem(e, q, p, i, m)
Definition: tsrank.c:76

References ARRPTR, difference(), GETOPERAND, QueryOperand::prefix, STRPTR, and WordECompareQueryItem.

Referenced by calc_rank_and(), calc_rank_or(), and get_docrep().

◆ get_docrep()

static DocRepresentation * get_docrep ( TSVector  txt,
QueryRepresentation qr,
int *  doclen 
)
static

Definition at line 732 of file tsrank.c.

733{
734 QueryItem *item = GETQUERY(qr->query);
735 WordEntry *entry,
736 *firstentry;
737 WordEntryPos *post;
738 int32 dimt, /* number of 'post' items */
739 j,
740 i,
741 nitem;
742 int len = qr->query->size * 4,
743 cur = 0;
745
746 doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
747
748 /*
749 * Iterate through query to make DocRepresentation for words and it's
750 * entries satisfied by query
751 */
752 for (i = 0; i < qr->query->size; i++)
753 {
754 QueryOperand *curoperand;
755
756 if (item[i].type != QI_VAL)
757 continue;
758
759 curoperand = &item[i].qoperand;
760
761 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
762 if (!entry)
763 continue;
764
765 /* iterations over entries in tsvector */
766 while (entry - firstentry < nitem)
767 {
768 if (entry->haspos)
769 {
770 dimt = POSDATALEN(txt, entry);
771 post = POSDATAPTR(txt, entry);
772 }
773 else
774 {
775 /* ignore words without positions */
776 entry++;
777 continue;
778 }
779
780 while (cur + dimt >= len)
781 {
782 len *= 2;
783 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
784 }
785
786 /* iterations over entry's positions */
787 for (j = 0; j < dimt; j++)
788 {
789 if (curoperand->weight == 0 ||
790 curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
791 {
792 doc[cur].pos = post[j];
793 doc[cur].data.map.entry = entry;
794 doc[cur].data.map.item = (QueryItem *) curoperand;
795 cur++;
796 }
797 }
798
799 entry++;
800 }
801 }
802
803 if (cur > 0)
804 {
805 DocRepresentation *rptr = doc + 1,
806 *wptr = doc,
807 storage;
808
809 /*
810 * Sort representation in ascending order by pos and entry
811 */
812 qsort(doc, cur, sizeof(DocRepresentation), compareDocR);
813
814 /*
815 * Join QueryItem per WordEntry and it's position
816 */
817 storage.pos = doc->pos;
818 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
819 storage.data.query.items[0] = doc->data.map.item;
820 storage.data.query.nitem = 1;
821
822 while (rptr - doc < cur)
823 {
824 if (rptr->pos == (rptr - 1)->pos &&
825 rptr->data.map.entry == (rptr - 1)->data.map.entry)
826 {
827 storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
828 storage.data.query.nitem++;
829 }
830 else
831 {
832 *wptr = storage;
833 wptr++;
834 storage.pos = rptr->pos;
835 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
836 storage.data.query.items[0] = rptr->data.map.item;
837 storage.data.query.nitem = 1;
838 }
839
840 rptr++;
841 }
842
843 *wptr = storage;
844 wptr++;
845
846 *doclen = wptr - doc;
847 return doc;
848 }
849
850 pfree(doc);
851 return NULL;
852}
struct cursor * cur
Definition: ecpg.c:29
#define storage
Definition: indent_codes.h:68
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define qsort(a, b, c, d)
Definition: port.h:474
struct DocRepresentation::@28::@30 map
WordEntry * entry
Definition: tsrank.c:517
QueryItem * item
Definition: tsrank.c:516
uint8 weight
Definition: ts_type.h:159
static int compareDocR(const void *va, const void *vb)
Definition: tsrank.c:524
QueryOperand qoperand
Definition: ts_type.h:210
const char * type

References compareDocR(), cur, DocRepresentation::data, DocRepresentation::entry, find_wordentry(), GETQUERY, WordEntry::haspos, i, DocRepresentation::item, j, len, DocRepresentation::map, palloc(), pfree(), DocRepresentation::pos, POSDATALEN, POSDATAPTR, QI_VAL, QueryItem::qoperand, qsort, QueryRepresentation::query, repalloc(), TSQueryData::size, storage, type, QueryOperand::weight, and WEP_GETWEIGHT.

Referenced by calc_rank_cd().

◆ getWeights()

static void getWeights ( ArrayType win,
float *  ws 
)
static

Definition at line 405 of file tsrank.c.

406{
407 int i;
408 float4 *arrdata;
409
410 Assert(win != NULL);
411
412 if (ARR_NDIM(win) != 1)
414 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
415 errmsg("array of weight must be one-dimensional")));
416
419 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
420 errmsg("array of weight is too short")));
421
422 if (array_contains_nulls(win))
424 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
425 errmsg("array of weight must not contain nulls")));
426
427 arrdata = (float4 *) ARR_DATA_PTR(win);
428 for (i = 0; i < NUM_WEIGHTS; i++)
429 {
430 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : default_weights[i];
431 if (ws[i] > 1.0)
433 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
434 errmsg("weight out of range")));
435 }
436}
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3767
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
#define Assert(condition)
Definition: c.h:815

References ARR_DATA_PTR, ARR_DIMS, ARR_NDIM, array_contains_nulls(), ArrayGetNItems(), Assert, default_weights, ereport, errcode(), errmsg(), ERROR, i, and NUM_WEIGHTS.

Referenced by ts_rank_wtt(), ts_rank_wttf(), ts_rankcd_wtt(), and ts_rankcd_wttf().

◆ resetQueryRepresentation()

static void resetQueryRepresentation ( QueryRepresentation qr,
bool  reverseinsert 
)
static

Definition at line 598 of file tsrank.c.

599{
600 int i;
601
602 for (i = 0; i < qr->query->size; i++)
603 {
604 qr->operandData[i].operandexists = false;
605 qr->operandData[i].reverseinsert = reverseinsert;
606 qr->operandData[i].npos = 0;
607 }
608}

References i, QueryRepresentationOperand::npos, QueryRepresentation::operandData, QueryRepresentationOperand::operandexists, QueryRepresentation::query, QueryRepresentationOperand::reverseinsert, and TSQueryData::size.

Referenced by Cover().

◆ SortAndUniqItems()

static QueryOperand ** SortAndUniqItems ( TSQuery  q,
int *  size 
)
static

Definition at line 155 of file tsrank.c.

156{
157 char *operand = GETOPERAND(q);
158 QueryItem *item = GETQUERY(q);
160 **ptr,
161 **prevptr;
162
163 ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164
165 /* Collect all operands from the tree to res */
166 while ((*size)--)
167 {
168 if (item->type == QI_VAL)
169 {
170 *ptr = (QueryOperand *) item;
171 ptr++;
172 }
173 item++;
174 }
175
176 *size = ptr - res;
177 if (*size < 2)
178 return res;
179
180 qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand);
181
182 ptr = res + 1;
183 prevptr = res;
184
185 /* remove duplicates */
186 while (ptr - res < *size)
187 {
188 if (compareQueryOperand(ptr, prevptr, operand) != 0)
189 {
190 prevptr++;
191 *prevptr = *ptr;
192 }
193 ptr++;
194 }
195
196 *size = prevptr + 1 - res;
197 return res;
198}
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
static int compareQueryOperand(const void *a, const void *b, void *arg)
Definition: tsrank.c:136

References compareQueryOperand(), GETOPERAND, GETQUERY, palloc(), QI_VAL, qsort_arg(), res, size, and QueryItem::type.

Referenced by calc_rank_and(), and calc_rank_or().

◆ ts_rank_tt()

Datum ts_rank_tt ( PG_FUNCTION_ARGS  )

Definition at line 491 of file tsrank.c.

492{
494 TSQuery query = PG_GETARG_TSQUERY(1);
495 float res;
496
498
499 PG_FREE_IF_COPY(txt, 0);
500 PG_FREE_IF_COPY(query, 1);
502}
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:366
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:135
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
static float calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
Definition: tsrank.c:358
#define DEF_NORM_METHOD
Definition: tsrank.c:36

References calc_rank(), DEF_NORM_METHOD, default_weights, PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_ttf()

Datum ts_rank_ttf ( PG_FUNCTION_ARGS  )

Definition at line 476 of file tsrank.c.

477{
479 TSQuery query = PG_GETARG_TSQUERY(1);
480 int method = PG_GETARG_INT32(2);
481 float res;
482
483 res = calc_rank(default_weights, txt, query, method);
484
485 PG_FREE_IF_COPY(txt, 0);
486 PG_FREE_IF_COPY(query, 1);
488}
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269

References calc_rank(), default_weights, PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wtt()

Datum ts_rank_wtt ( PG_FUNCTION_ARGS  )

Definition at line 458 of file tsrank.c.

459{
462 TSQuery query = PG_GETARG_TSQUERY(2);
463 float weights[NUM_WEIGHTS];
464 float res;
465
466 getWeights(win, weights);
467 res = calc_rank(weights, txt, query, DEF_NORM_METHOD);
468
469 PG_FREE_IF_COPY(win, 0);
470 PG_FREE_IF_COPY(txt, 1);
471 PG_FREE_IF_COPY(query, 2);
473}
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
static void getWeights(ArrayType *win, float *ws)
Definition: tsrank.c:405

References calc_rank(), DEF_NORM_METHOD, getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rank_wttf()

Datum ts_rank_wttf ( PG_FUNCTION_ARGS  )

Definition at line 439 of file tsrank.c.

440{
443 TSQuery query = PG_GETARG_TSQUERY(2);
444 int method = PG_GETARG_INT32(3);
445 float weights[NUM_WEIGHTS];
446 float res;
447
448 getWeights(win, weights);
449 res = calc_rank(weights, txt, query, method);
450
451 PG_FREE_IF_COPY(win, 0);
452 PG_FREE_IF_COPY(txt, 1);
453 PG_FREE_IF_COPY(query, 2);
455}

References calc_rank(), getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_tt()

Datum ts_rankcd_tt ( PG_FUNCTION_ARGS  )

Definition at line 1010 of file tsrank.c.

1011{
1013 TSQuery query = PG_GETARG_TSQUERY(1);
1014 float res;
1015
1017
1018 PG_FREE_IF_COPY(txt, 0);
1019 PG_FREE_IF_COPY(query, 1);
1021}
static float4 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
Definition: tsrank.c:855

References calc_rank_cd(), DEF_NORM_METHOD, default_weights, PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_ttf()

Datum ts_rankcd_ttf ( PG_FUNCTION_ARGS  )

Definition at line 995 of file tsrank.c.

996{
998 TSQuery query = PG_GETARG_TSQUERY(1);
999 int method = PG_GETARG_INT32(2);
1000 float res;
1001
1002 res = calc_rank_cd(default_weights, txt, query, method);
1003
1004 PG_FREE_IF_COPY(txt, 0);
1005 PG_FREE_IF_COPY(query, 1);
1007}

References calc_rank_cd(), default_weights, PG_FREE_IF_COPY, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wtt()

Datum ts_rankcd_wtt ( PG_FUNCTION_ARGS  )

Definition at line 977 of file tsrank.c.

978{
981 TSQuery query = PG_GETARG_TSQUERY(2);
982 float weights[NUM_WEIGHTS];
983 float res;
984
985 getWeights(win, weights);
986 res = calc_rank_cd(weights, txt, query, DEF_NORM_METHOD);
987
988 PG_FREE_IF_COPY(win, 0);
989 PG_FREE_IF_COPY(txt, 1);
990 PG_FREE_IF_COPY(query, 2);
992}

References calc_rank_cd(), DEF_NORM_METHOD, getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ ts_rankcd_wttf()

Datum ts_rankcd_wttf ( PG_FUNCTION_ARGS  )

Definition at line 958 of file tsrank.c.

959{
962 TSQuery query = PG_GETARG_TSQUERY(2);
963 int method = PG_GETARG_INT32(3);
964 float weights[NUM_WEIGHTS];
965 float res;
966
967 getWeights(win, weights);
968 res = calc_rank_cd(weights, txt, query, method);
969
970 PG_FREE_IF_COPY(win, 0);
971 PG_FREE_IF_COPY(txt, 1);
972 PG_FREE_IF_COPY(query, 2);
974}

References calc_rank_cd(), getWeights(), NUM_WEIGHTS, PG_DETOAST_DATUM, PG_FREE_IF_COPY, PG_GETARG_DATUM, PG_GETARG_INT32, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_FLOAT4, and res.

◆ word_distance()

static float4 word_distance ( int32  w)
static

Definition at line 45 of file tsrank.c.

46{
47 if (w > 100)
48 return 1e-30f;
49
50 return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51}

Referenced by calc_rank_and().

Variable Documentation

◆ default_weights

const float default_weights[NUM_WEIGHTS] = {0.1f, 0.2f, 0.4f, 1.0f}
static

Definition at line 25 of file tsrank.c.

Referenced by calc_rank_cd(), getWeights(), ts_rank_tt(), ts_rank_ttf(), ts_rankcd_tt(), and ts_rankcd_ttf().