PostgreSQL Source Code git master
tsrank.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tsrank.c
4 * rank tsvector by tsquery
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsrank.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include <limits.h>
17#include <math.h>
18
19#include "miscadmin.h"
20#include "tsearch/ts_utils.h"
21#include "utils/array.h"
22#include "utils/fmgrprotos.h"
23
24#define NUM_WEIGHTS 4
25static const float default_weights[NUM_WEIGHTS] = {0.1f, 0.2f, 0.4f, 1.0f};
26
27#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
28
29#define RANK_NO_NORM 0x00
30#define RANK_NORM_LOGLENGTH 0x01
31#define RANK_NORM_LENGTH 0x02
32#define RANK_NORM_EXTDIST 0x04
33#define RANK_NORM_UNIQ 0x08
34#define RANK_NORM_LOGUNIQ 0x10
35#define RANK_NORM_RDIVRPLUS1 0x20
36#define DEF_NORM_METHOD RANK_NO_NORM
37
38static float calc_rank_or(const float *w, TSVector t, TSQuery q);
39static float calc_rank_and(const float *w, TSVector t, TSQuery q);
40
41/*
42 * Returns a weight of a word collocation
43 */
44static float4
46{
47 if (w > 100)
48 return 1e-30f;
49
50 return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51}
52
53static int
55{
56 WordEntry *ptr = ARRPTR(t),
57 *end = (WordEntry *) STRPTR(t);
58 int len = 0;
59
60 while (ptr < end)
61 {
62 int clen = POSDATALEN(t, ptr);
63
64 if (clen == 0)
65 len += 1;
66 else
67 len += clen;
68
69 ptr++;
70 }
71
72 return len;
73}
74
75
76#define WordECompareQueryItem(e,q,p,i,m) \
77 tsCompareString((q) + (i)->distance, (i)->length, \
78 (e) + (p)->pos, (p)->len, (m))
79
80
81/*
82 * Returns a pointer to a WordEntry's array corresponding to 'item' from
83 * tsvector 't'. 'q' is the TSQuery containing 'item'.
84 * Returns NULL if not found.
85 */
86static WordEntry *
88{
89 WordEntry *StopLow = ARRPTR(t);
90 WordEntry *StopHigh = (WordEntry *) STRPTR(t);
91 WordEntry *StopMiddle = StopHigh;
92 int difference;
93
94 *nitem = 0;
95
96 /* Loop invariant: StopLow <= item < StopHigh */
97 while (StopLow < StopHigh)
98 {
99 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101 if (difference == 0)
102 {
103 StopHigh = StopMiddle;
104 *nitem = 1;
105 break;
106 }
107 else if (difference > 0)
108 StopLow = StopMiddle + 1;
109 else
110 StopHigh = StopMiddle;
111 }
112
113 if (item->prefix)
114 {
115 if (StopLow >= StopHigh)
116 StopMiddle = StopHigh;
117
118 *nitem = 0;
119
120 while (StopMiddle < (WordEntry *) STRPTR(t) &&
121 WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122 {
123 (*nitem)++;
124 StopMiddle++;
125 }
126 }
127
128 return (*nitem > 0) ? StopHigh : NULL;
129}
130
131
132/*
133 * sort QueryOperands by (length, word)
134 */
135static int
136compareQueryOperand(const void *a, const void *b, void *arg)
137{
138 char *operand = (char *) arg;
139 QueryOperand *qa = (*(QueryOperand *const *) a);
140 QueryOperand *qb = (*(QueryOperand *const *) b);
141
142 return tsCompareString(operand + qa->distance, qa->length,
143 operand + qb->distance, qb->length,
144 false);
145}
146
147/*
148 * Returns a sorted, de-duplicated array of QueryOperands in a query.
149 * The returned QueryOperands are pointers to the original QueryOperands
150 * in the query.
151 *
152 * Length of the returned array is stored in *size
153 */
154static QueryOperand **
156{
157 char *operand = GETOPERAND(q);
158 QueryItem *item = GETQUERY(q);
160 **ptr,
161 **prevptr;
162
163 ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164
165 /* Collect all operands from the tree to res */
166 while ((*size)--)
167 {
168 if (item->type == QI_VAL)
169 {
170 *ptr = (QueryOperand *) item;
171 ptr++;
172 }
173 item++;
174 }
175
176 *size = ptr - res;
177 if (*size < 2)
178 return res;
179
180 qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, operand);
181
182 ptr = res + 1;
183 prevptr = res;
184
185 /* remove duplicates */
186 while (ptr - res < *size)
187 {
188 if (compareQueryOperand(ptr, prevptr, operand) != 0)
189 {
190 prevptr++;
191 *prevptr = *ptr;
192 }
193 ptr++;
194 }
195
196 *size = prevptr + 1 - res;
197 return res;
198}
199
200static float
201calc_rank_and(const float *w, TSVector t, TSQuery q)
202{
203 WordEntryPosVector **pos;
204 WordEntryPosVector1 posnull;
205 WordEntryPosVector *POSNULL;
206 int i,
207 k,
208 l,
209 p;
210 WordEntry *entry,
211 *firstentry;
212 WordEntryPos *post,
213 *ct;
214 int32 dimt,
215 lenct,
216 dist,
217 nitem;
218 float res = -1.0;
219 QueryOperand **item;
220 int size = q->size;
221
222 item = SortAndUniqItems(q, &size);
223 if (size < 2)
224 {
225 pfree(item);
226 return calc_rank_or(w, t, q);
227 }
228 pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229
230 /* A dummy WordEntryPos array to use when haspos is false */
231 posnull.npos = 1;
232 posnull.pos[0] = 0;
233 WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234 POSNULL = (WordEntryPosVector *) &posnull;
235
236 for (i = 0; i < size; i++)
237 {
238 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239 if (!entry)
240 continue;
241
242 while (entry - firstentry < nitem)
243 {
244 if (entry->haspos)
245 pos[i] = _POSVECPTR(t, entry);
246 else
247 pos[i] = POSNULL;
248
249 dimt = pos[i]->npos;
250 post = pos[i]->pos;
251 for (k = 0; k < i; k++)
252 {
253 if (!pos[k])
254 continue;
255 lenct = pos[k]->npos;
256 ct = pos[k]->pos;
257 for (l = 0; l < dimt; l++)
258 {
259 for (p = 0; p < lenct; p++)
260 {
261 dist = abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262 if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263 {
264 float curw;
265
266 if (!dist)
267 dist = MAXENTRYPOS;
268 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270 }
271 }
272 }
273 }
274
275 entry++;
276 }
277 }
278 pfree(pos);
279 pfree(item);
280 return res;
281}
282
283static float
284calc_rank_or(const float *w, TSVector t, TSQuery q)
285{
286 WordEntry *entry,
287 *firstentry;
288 WordEntryPosVector1 posnull;
289 WordEntryPos *post;
290 int32 dimt,
291 j,
292 i,
293 nitem;
294 float res = 0.0;
295 QueryOperand **item;
296 int size = q->size;
297
298 /* A dummy WordEntryPos array to use when haspos is false */
299 posnull.npos = 1;
300 posnull.pos[0] = 0;
301
302 item = SortAndUniqItems(q, &size);
303
304 for (i = 0; i < size; i++)
305 {
306 float resj,
307 wjm;
308 int32 jm;
309
310 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311 if (!entry)
312 continue;
313
314 while (entry - firstentry < nitem)
315 {
316 if (entry->haspos)
317 {
318 dimt = POSDATALEN(t, entry);
319 post = POSDATAPTR(t, entry);
320 }
321 else
322 {
323 dimt = posnull.npos;
324 post = posnull.pos;
325 }
326
327 resj = 0.0;
328 wjm = -1.0;
329 jm = 0;
330 for (j = 0; j < dimt; j++)
331 {
332 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333 if (wpos(post[j]) > wjm)
334 {
335 wjm = wpos(post[j]);
336 jm = j;
337 }
338 }
339/*
340 limit (sum(1/i^2),i=1,inf) = pi^2/6
341 resj = sum(wi/i^2),i=1,noccurrence,
342 wi - should be sorted desc,
343 don't sort for now, just choose maximum weight. This should be corrected
344 Oleg Bartunov
345*/
346 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347
348 entry++;
349 }
350 }
351 if (size > 0)
352 res = res / size;
353 pfree(item);
354 return res;
355}
356
357static float
358calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
359{
360 QueryItem *item = GETQUERY(q);
361 float res = 0.0;
362 int len;
363
364 if (!t->size || !q->size)
365 return 0.0;
366
367 /* XXX: What about NOT? */
368 res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369 item->qoperator.oper == OP_PHRASE)) ?
370 calc_rank_and(w, t, q) :
371 calc_rank_or(w, t, q);
372
373 if (res < 0)
374 res = 1e-20f;
375
376 if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378
379 if (method & RANK_NORM_LENGTH)
380 {
381 len = cnt_length(t);
382 if (len > 0)
383 res /= (float) len;
384 }
385
386 /* RANK_NORM_EXTDIST not applicable */
387
388 if ((method & RANK_NORM_UNIQ) && t->size > 0)
389 res /= (float) (t->size);
390
391 if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392 res /= log((double) (t->size + 1)) / log(2.0);
393
394 if (method & RANK_NORM_RDIVRPLUS1)
395 res /= (res + 1);
396
397 return res;
398}
399
400/*
401 * Extract weights from an array. The weights are stored in *ws, which must
402 * have space for NUM_WEIGHTS elements.
403 */
404static void
405getWeights(ArrayType *win, float *ws)
406{
407 int i;
408 float4 *arrdata;
409
410 Assert(win != NULL);
411
412 if (ARR_NDIM(win) != 1)
414 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
415 errmsg("array of weight must be one-dimensional")));
416
419 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
420 errmsg("array of weight is too short")));
421
422 if (array_contains_nulls(win))
424 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
425 errmsg("array of weight must not contain nulls")));
426
427 arrdata = (float4 *) ARR_DATA_PTR(win);
428 for (i = 0; i < NUM_WEIGHTS; i++)
429 {
430 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : default_weights[i];
431 if (ws[i] > 1.0)
433 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
434 errmsg("weight out of range")));
435 }
436}
437
438Datum
440{
443 TSQuery query = PG_GETARG_TSQUERY(2);
444 int method = PG_GETARG_INT32(3);
445 float weights[NUM_WEIGHTS];
446 float res;
447
448 getWeights(win, weights);
449 res = calc_rank(weights, txt, query, method);
450
451 PG_FREE_IF_COPY(win, 0);
452 PG_FREE_IF_COPY(txt, 1);
453 PG_FREE_IF_COPY(query, 2);
455}
456
457Datum
459{
462 TSQuery query = PG_GETARG_TSQUERY(2);
463 float weights[NUM_WEIGHTS];
464 float res;
465
466 getWeights(win, weights);
467 res = calc_rank(weights, txt, query, DEF_NORM_METHOD);
468
469 PG_FREE_IF_COPY(win, 0);
470 PG_FREE_IF_COPY(txt, 1);
471 PG_FREE_IF_COPY(query, 2);
473}
474
475Datum
477{
479 TSQuery query = PG_GETARG_TSQUERY(1);
480 int method = PG_GETARG_INT32(2);
481 float res;
482
483 res = calc_rank(default_weights, txt, query, method);
484
485 PG_FREE_IF_COPY(txt, 0);
486 PG_FREE_IF_COPY(query, 1);
488}
489
490Datum
492{
494 TSQuery query = PG_GETARG_TSQUERY(1);
495 float res;
496
498
499 PG_FREE_IF_COPY(txt, 0);
500 PG_FREE_IF_COPY(query, 1);
502}
503
504typedef struct
505{
506 union
507 {
508 struct
509 { /* compiled doc representation */
512 } query;
513 struct
514 { /* struct is used for preparing doc
515 * representation */
518 } map;
522
523static int
524compareDocR(const void *va, const void *vb)
525{
526 const DocRepresentation *a = (const DocRepresentation *) va;
527 const DocRepresentation *b = (const DocRepresentation *) vb;
528
529 if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
530 {
531 if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
532 {
533 if (a->data.map.entry == b->data.map.entry)
534 return 0;
535
536 return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
537 }
538
539 return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
540 }
541
542 return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
543}
544
545#define MAXQROPOS MAXENTRYPOS
546typedef struct
547{
549 bool reverseinsert; /* indicates insert order, true means
550 * descending order */
554
555typedef struct
556{
560
561#define QR_GET_OPERAND_DATA(q, v) \
562 ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
563
564/*
565 * TS_execute callback for matching a tsquery operand to QueryRepresentation
566 */
567static TSTernaryValue
570{
571 QueryRepresentation *qr = (QueryRepresentation *) checkval;
573
574 if (!opData->operandexists)
575 return TS_NO;
576
577 if (data)
578 {
579 data->npos = opData->npos;
580 data->pos = opData->pos;
581 if (opData->reverseinsert)
582 data->pos += MAXQROPOS - opData->npos;
583 }
584
585 return TS_YES;
586}
587
588typedef struct
589{
590 int pos;
591 int p;
592 int q;
595} CoverExt;
596
597static void
599{
600 int i;
601
602 for (i = 0; i < qr->query->size; i++)
603 {
604 qr->operandData[i].operandexists = false;
605 qr->operandData[i].reverseinsert = reverseinsert;
606 qr->operandData[i].npos = 0;
607 }
608}
609
610static void
612{
613 int i;
614 int lastPos;
616
617 for (i = 0; i < entry->data.query.nitem; i++)
618 {
619 if (entry->data.query.items[i]->type != QI_VAL)
620 continue;
621
622 opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
623
624 opData->operandexists = true;
625
626 if (opData->npos == 0)
627 {
628 lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
629 opData->pos[lastPos] = entry->pos;
630 opData->npos++;
631 continue;
632 }
633
634 lastPos = opData->reverseinsert ?
635 (MAXQROPOS - opData->npos) :
636 (opData->npos - 1);
637
638 if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
639 {
640 lastPos = opData->reverseinsert ?
641 (MAXQROPOS - 1 - opData->npos) :
642 (opData->npos);
643
644 opData->pos[lastPos] = entry->pos;
645 opData->npos++;
646 }
647 }
648}
649
650static bool
652{
654 int lastpos = ext->pos;
655 bool found = false;
656
657 /*
658 * since this function recurses, it could be driven to stack overflow.
659 * (though any decent compiler will optimize away the tail-recursion.
660 */
662
663 resetQueryRepresentation(qr, false);
664
665 ext->p = INT_MAX;
666 ext->q = 0;
667 ptr = doc + ext->pos;
668
669 /* find upper bound of cover from current position, move up */
670 while (ptr - doc < len)
671 {
673
674 if (TS_execute(GETQUERY(qr->query), qr,
676 {
677 if (WEP_GETPOS(ptr->pos) > ext->q)
678 {
679 ext->q = WEP_GETPOS(ptr->pos);
680 ext->end = ptr;
681 lastpos = ptr - doc;
682 found = true;
683 }
684 break;
685 }
686 ptr++;
687 }
688
689 if (!found)
690 return false;
691
692 resetQueryRepresentation(qr, true);
693
694 ptr = doc + lastpos;
695
696 /* find lower bound of cover from found upper bound, move down */
697 while (ptr >= doc + ext->pos)
698 {
699 /*
700 * we scan doc from right to left, so pos info in reverse order!
701 */
703
704 if (TS_execute(GETQUERY(qr->query), qr,
706 {
707 if (WEP_GETPOS(ptr->pos) < ext->p)
708 {
709 ext->begin = ptr;
710 ext->p = WEP_GETPOS(ptr->pos);
711 }
712 break;
713 }
714 ptr--;
715 }
716
717 if (ext->p <= ext->q)
718 {
719 /*
720 * set position for next try to next lexeme after beginning of found
721 * cover
722 */
723 ext->pos = (ptr - doc) + 1;
724 return true;
725 }
726
727 ext->pos++;
728 return Cover(doc, len, qr, ext);
729}
730
731static DocRepresentation *
733{
734 QueryItem *item = GETQUERY(qr->query);
735 WordEntry *entry,
736 *firstentry;
737 WordEntryPos *post;
738 int32 dimt, /* number of 'post' items */
739 j,
740 i,
741 nitem;
742 int len = qr->query->size * 4,
743 cur = 0;
745
746 doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
747
748 /*
749 * Iterate through query to make DocRepresentation for words and it's
750 * entries satisfied by query
751 */
752 for (i = 0; i < qr->query->size; i++)
753 {
754 QueryOperand *curoperand;
755
756 if (item[i].type != QI_VAL)
757 continue;
758
759 curoperand = &item[i].qoperand;
760
761 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
762 if (!entry)
763 continue;
764
765 /* iterations over entries in tsvector */
766 while (entry - firstentry < nitem)
767 {
768 if (entry->haspos)
769 {
770 dimt = POSDATALEN(txt, entry);
771 post = POSDATAPTR(txt, entry);
772 }
773 else
774 {
775 /* ignore words without positions */
776 entry++;
777 continue;
778 }
779
780 while (cur + dimt >= len)
781 {
782 len *= 2;
783 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
784 }
785
786 /* iterations over entry's positions */
787 for (j = 0; j < dimt; j++)
788 {
789 if (curoperand->weight == 0 ||
790 curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
791 {
792 doc[cur].pos = post[j];
793 doc[cur].data.map.entry = entry;
794 doc[cur].data.map.item = (QueryItem *) curoperand;
795 cur++;
796 }
797 }
798
799 entry++;
800 }
801 }
802
803 if (cur > 0)
804 {
805 DocRepresentation *rptr = doc + 1,
806 *wptr = doc,
807 storage;
808
809 /*
810 * Sort representation in ascending order by pos and entry
811 */
812 qsort(doc, cur, sizeof(DocRepresentation), compareDocR);
813
814 /*
815 * Join QueryItem per WordEntry and its position
816 */
817 storage.pos = doc->pos;
818 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
819 storage.data.query.items[0] = doc->data.map.item;
820 storage.data.query.nitem = 1;
821
822 while (rptr - doc < cur)
823 {
824 if (rptr->pos == (rptr - 1)->pos &&
825 rptr->data.map.entry == (rptr - 1)->data.map.entry)
826 {
827 storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
828 storage.data.query.nitem++;
829 }
830 else
831 {
832 *wptr = storage;
833 wptr++;
834 storage.pos = rptr->pos;
835 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
836 storage.data.query.items[0] = rptr->data.map.item;
837 storage.data.query.nitem = 1;
838 }
839
840 rptr++;
841 }
842
843 *wptr = storage;
844 wptr++;
845
846 *doclen = wptr - doc;
847 return doc;
848 }
849
850 pfree(doc);
851 return NULL;
852}
853
854static float4
855calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
856{
858 int len,
859 i,
860 doclen = 0;
861 CoverExt ext;
862 double Wdoc = 0.0;
863 double invws[NUM_WEIGHTS];
864 double SumDist = 0.0,
865 PrevExtPos = 0.0;
866 int NExtent = 0;
868
869
870 for (i = 0; i < NUM_WEIGHTS; i++)
871 {
872 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : default_weights[i]));
873 if (invws[i] > 1.0)
875 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
876 errmsg("weight out of range")));
877 invws[i] = 1.0 / invws[i];
878 }
879
880 qr.query = query;
882 palloc0(sizeof(QueryRepresentationOperand) * query->size);
883
884 doc = get_docrep(txt, &qr, &doclen);
885 if (!doc)
886 {
887 pfree(qr.operandData);
888 return 0.0;
889 }
890
891 MemSet(&ext, 0, sizeof(CoverExt));
892 while (Cover(doc, doclen, &qr, &ext))
893 {
894 double Cpos = 0.0;
895 double InvSum = 0.0;
896 double CurExtPos;
897 int nNoise;
898 DocRepresentation *ptr = ext.begin;
899
900 while (ptr <= ext.end)
901 {
902 InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
903 ptr++;
904 }
905
906 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
907
908 /*
909 * if doc are big enough then ext.q may be equal to ext.p due to limit
910 * of positional information. In this case we approximate number of
911 * noise word as half cover's length
912 */
913 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
914 if (nNoise < 0)
915 nNoise = (ext.end - ext.begin) / 2;
916 Wdoc += Cpos / ((double) (1 + nNoise));
917
918 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
919 if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
920 * zero in a case of
921 * multiple lexize */ )
922 SumDist += 1.0 / (CurExtPos - PrevExtPos);
923
924 PrevExtPos = CurExtPos;
925 NExtent++;
926 }
927
928 if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
929 Wdoc /= log((double) (cnt_length(txt) + 1));
930
931 if (method & RANK_NORM_LENGTH)
932 {
933 len = cnt_length(txt);
934 if (len > 0)
935 Wdoc /= (double) len;
936 }
937
938 if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
939 Wdoc /= ((double) NExtent) / SumDist;
940
941 if ((method & RANK_NORM_UNIQ) && txt->size > 0)
942 Wdoc /= (double) (txt->size);
943
944 if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
945 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
946
947 if (method & RANK_NORM_RDIVRPLUS1)
948 Wdoc /= (Wdoc + 1);
949
950 pfree(doc);
951
952 pfree(qr.operandData);
953
954 return (float4) Wdoc;
955}
956
957Datum
959{
962 TSQuery query = PG_GETARG_TSQUERY(2);
963 int method = PG_GETARG_INT32(3);
964 float weights[NUM_WEIGHTS];
965 float res;
966
967 getWeights(win, weights);
968 res = calc_rank_cd(weights, txt, query, method);
969
970 PG_FREE_IF_COPY(win, 0);
971 PG_FREE_IF_COPY(txt, 1);
972 PG_FREE_IF_COPY(query, 2);
974}
975
976Datum
978{
981 TSQuery query = PG_GETARG_TSQUERY(2);
982 float weights[NUM_WEIGHTS];
983 float res;
984
985 getWeights(win, weights);
986 res = calc_rank_cd(weights, txt, query, DEF_NORM_METHOD);
987
988 PG_FREE_IF_COPY(win, 0);
989 PG_FREE_IF_COPY(txt, 1);
990 PG_FREE_IF_COPY(query, 2);
992}
993
994Datum
996{
998 TSQuery query = PG_GETARG_TSQUERY(1);
999 int method = PG_GETARG_INT32(2);
1000 float res;
1001
1002 res = calc_rank_cd(default_weights, txt, query, method);
1003
1004 PG_FREE_IF_COPY(txt, 0);
1005 PG_FREE_IF_COPY(query, 1);
1007}
1008
1009Datum
1011{
1013 TSQuery query = PG_GETARG_TSQUERY(1);
1014 float res;
1015
1017
1018 PG_FREE_IF_COPY(txt, 0);
1019 PG_FREE_IF_COPY(query, 1);
1021}
#define GETQUERY(x)
Definition: _int.h:157
#define ARR_NDIM(a)
Definition: array.h:290
#define ARR_DATA_PTR(a)
Definition: array.h:322
#define ARR_DIMS(a)
Definition: array.h:294
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3767
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:57
#define Assert(condition)
Definition: c.h:815
int16_t int16
Definition: c.h:483
int32_t int32
Definition: c.h:484
uint32_t uint32
Definition: c.h:488
float float4
Definition: c.h:586
#define MemSet(start, val, len)
Definition: c.h:977
#define ARRPTR(x)
Definition: cube.c:25
struct cursor * cur
Definition: ecpg.c:29
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:240
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:366
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
Datum difference(PG_FUNCTION_ARGS)
#define STRPTR(x)
Definition: hstore.h:76
#define storage
Definition: indent_codes.h:68
long val
Definition: informix.c:689
int b
Definition: isn.c:69
int a
Definition: isn.c:68
int j
Definition: isn.c:73
int i
Definition: isn.c:72
#define GETOPERAND(x)
Definition: ltree.h:165
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * palloc(Size size)
Definition: mcxt.c:1317
void * arg
const void size_t len
const void * data
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
#define qsort(a, b, c, d)
Definition: port.h:475
uintptr_t Datum
Definition: postgres.h:69
static pg_noinline void Size size
Definition: slab.c:607
void check_stack_depth(void)
Definition: stack_depth.c:95
int pos
Definition: tsrank.c:590
int p
Definition: tsrank.c:591
DocRepresentation * begin
Definition: tsrank.c:593
DocRepresentation * end
Definition: tsrank.c:594
int q
Definition: tsrank.c:592
QueryItem ** items
Definition: tsrank.c:510
WordEntryPos pos
Definition: tsrank.c:520
struct DocRepresentation::@28::@30 map
struct DocRepresentation::@28::@29 query
WordEntry * entry
Definition: tsrank.c:517
QueryItem * item
Definition: tsrank.c:516
union DocRepresentation::@28 data
bool prefix
Definition: ts_type.h:163
uint8 weight
Definition: ts_type.h:159
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171
WordEntryPos pos[MAXQROPOS]
Definition: tsrank.c:552
QueryRepresentationOperand * operandData
Definition: tsrank.c:558
int32 size
Definition: ts_type.h:221
int32 size
Definition: ts_type.h:93
WordEntryPos pos[1]
Definition: ts_type.h:75
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 haspos
Definition: ts_type.h:44
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:135
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
#define MAXENTRYPOS
Definition: ts_type.h:85
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
uint16 WordEntryPos
Definition: ts_type.h:63
#define QI_OPR
Definition: ts_type.h:150
#define QI_VAL
Definition: ts_type.h:149
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define POSDATAPTR(x, e)
Definition: ts_type.h:111
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
TSTernaryValue
Definition: ts_utils.h:133
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
#define RANK_NORM_RDIVRPLUS1
Definition: tsrank.c:35
Datum ts_rankcd_wtt(PG_FUNCTION_ARGS)
Definition: tsrank.c:977
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
Definition: tsrank.c:732
Datum ts_rankcd_wttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:958
#define WordECompareQueryItem(e, q, p, i, m)
Definition: tsrank.c:76
static int compareDocR(const void *va, const void *vb)
Definition: tsrank.c:524
#define RANK_NORM_UNIQ
Definition: tsrank.c:33
#define RANK_NORM_LOGLENGTH
Definition: tsrank.c:30
static TSTernaryValue checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsrank.c:568
#define RANK_NORM_LENGTH
Definition: tsrank.c:31
static void resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
Definition: tsrank.c:598
static float4 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
Definition: tsrank.c:855
static float calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
Definition: tsrank.c:358
static int cnt_length(TSVector t)
Definition: tsrank.c:54
#define wpos(wep)
Definition: tsrank.c:27
static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
Definition: tsrank.c:87
static float4 word_distance(int32 w)
Definition: tsrank.c:45
#define MAXQROPOS
Definition: tsrank.c:545
static int compareQueryOperand(const void *a, const void *b, void *arg)
Definition: tsrank.c:136
Datum ts_rank_ttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:476
Datum ts_rank_wttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:439
Datum ts_rank_tt(PG_FUNCTION_ARGS)
Definition: tsrank.c:491
static QueryOperand ** SortAndUniqItems(TSQuery q, int *size)
Definition: tsrank.c:155
#define DEF_NORM_METHOD
Definition: tsrank.c:36
static void getWeights(ArrayType *win, float *ws)
Definition: tsrank.c:405
static float calc_rank_or(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:284
static void fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
Definition: tsrank.c:611
static const float default_weights[NUM_WEIGHTS]
Definition: tsrank.c:25
#define NUM_WEIGHTS
Definition: tsrank.c:24
Datum ts_rank_wtt(PG_FUNCTION_ARGS)
Definition: tsrank.c:458
#define QR_GET_OPERAND_DATA(q, v)
Definition: tsrank.c:561
Datum ts_rankcd_ttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:995
Datum ts_rankcd_tt(PG_FUNCTION_ARGS)
Definition: tsrank.c:1010
#define RANK_NORM_EXTDIST
Definition: tsrank.c:32
static float calc_rank_and(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:201
static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
Definition: tsrank.c:651
#define RANK_NORM_LOGUNIQ
Definition: tsrank.c:34
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
QueryOperator qoperator
Definition: ts_type.h:209
QueryOperand qoperand
Definition: ts_type.h:210
QueryItemType type
Definition: ts_type.h:208
const char * type