PostgreSQL Source Code  git master
tsrank.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  * rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsrank.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 #include <math.h>
18 
19 #include "tsearch/ts_utils.h"
20 #include "utils/array.h"
21 #include "utils/builtins.h"
22 #include "miscadmin.h"
23 
24 
25 static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
26 
27 #define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
28 
29 #define RANK_NO_NORM 0x00
30 #define RANK_NORM_LOGLENGTH 0x01
31 #define RANK_NORM_LENGTH 0x02
32 #define RANK_NORM_EXTDIST 0x04
33 #define RANK_NORM_UNIQ 0x08
34 #define RANK_NORM_LOGUNIQ 0x10
35 #define RANK_NORM_RDIVRPLUS1 0x20
36 #define DEF_NORM_METHOD RANK_NO_NORM
37 
38 static float calc_rank_or(const float *w, TSVector t, TSQuery q);
39 static float calc_rank_and(const float *w, TSVector t, TSQuery q);
40 
41 /*
42  * Returns a weight of a word collocation
43  */
44 static float4
46 {
47  if (w > 100)
48  return 1e-30f;
49 
50  return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51 }
52 
53 static int
55 {
56  WordEntry *ptr = ARRPTR(t),
57  *end = (WordEntry *) STRPTR(t);
58  int len = 0;
59 
60  while (ptr < end)
61  {
62  int clen = POSDATALEN(t, ptr);
63 
64  if (clen == 0)
65  len += 1;
66  else
67  len += clen;
68 
69  ptr++;
70  }
71 
72  return len;
73 }
74 
75 
76 #define WordECompareQueryItem(e,q,p,i,m) \
77  tsCompareString((q) + (i)->distance, (i)->length, \
78  (e) + (p)->pos, (p)->len, (m))
79 
80 
81 /*
82  * Returns a pointer to a WordEntry's array corresponding to 'item' from
83  * tsvector 't'. 'q' is the TSQuery containing 'item'.
84  * Returns NULL if not found.
85  */
86 static WordEntry *
88 {
89  WordEntry *StopLow = ARRPTR(t);
90  WordEntry *StopHigh = (WordEntry *) STRPTR(t);
91  WordEntry *StopMiddle = StopHigh;
92  int difference;
93 
94  *nitem = 0;
95 
96  /* Loop invariant: StopLow <= item < StopHigh */
97  while (StopLow < StopHigh)
98  {
99  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100  difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101  if (difference == 0)
102  {
103  StopHigh = StopMiddle;
104  *nitem = 1;
105  break;
106  }
107  else if (difference > 0)
108  StopLow = StopMiddle + 1;
109  else
110  StopHigh = StopMiddle;
111  }
112 
113  if (item->prefix)
114  {
115  if (StopLow >= StopHigh)
116  StopMiddle = StopHigh;
117 
118  *nitem = 0;
119 
120  while (StopMiddle < (WordEntry *) STRPTR(t) &&
121  WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122  {
123  (*nitem)++;
124  StopMiddle++;
125  }
126  }
127 
128  return (*nitem > 0) ? StopHigh : NULL;
129 }
130 
131 
132 /*
133  * sort QueryOperands by (length, word)
134  */
135 static int
136 compareQueryOperand(const void *a, const void *b, void *arg)
137 {
138  char *operand = (char *) arg;
139  QueryOperand *qa = (*(QueryOperand *const *) a);
140  QueryOperand *qb = (*(QueryOperand *const *) b);
141 
142  return tsCompareString(operand + qa->distance, qa->length,
143  operand + qb->distance, qb->length,
144  false);
145 }
146 
147 /*
148  * Returns a sorted, de-duplicated array of QueryOperands in a query.
149  * The returned QueryOperands are pointers to the original QueryOperands
150  * in the query.
151  *
152  * Length of the returned array is stored in *size
153  */
154 static QueryOperand **
156 {
157  char *operand = GETOPERAND(q);
158  QueryItem *item = GETQUERY(q);
159  QueryOperand **res,
160  **ptr,
161  **prevptr;
162 
163  ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164 
165  /* Collect all operands from the tree to res */
166  while ((*size)--)
167  {
168  if (item->type == QI_VAL)
169  {
170  *ptr = (QueryOperand *) item;
171  ptr++;
172  }
173  item++;
174  }
175 
176  *size = ptr - res;
177  if (*size < 2)
178  return res;
179 
180  qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
181 
182  ptr = res + 1;
183  prevptr = res;
184 
185  /* remove duplicates */
186  while (ptr - res < *size)
187  {
188  if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
189  {
190  prevptr++;
191  *prevptr = *ptr;
192  }
193  ptr++;
194  }
195 
196  *size = prevptr + 1 - res;
197  return res;
198 }
199 
200 static float
201 calc_rank_and(const float *w, TSVector t, TSQuery q)
202 {
203  WordEntryPosVector **pos;
204  WordEntryPosVector1 posnull;
205  WordEntryPosVector *POSNULL;
206  int i,
207  k,
208  l,
209  p;
210  WordEntry *entry,
211  *firstentry;
212  WordEntryPos *post,
213  *ct;
214  int32 dimt,
215  lenct,
216  dist,
217  nitem;
218  float res = -1.0;
219  QueryOperand **item;
220  int size = q->size;
221 
222  item = SortAndUniqItems(q, &size);
223  if (size < 2)
224  {
225  pfree(item);
226  return calc_rank_or(w, t, q);
227  }
228  pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229 
230  /* A dummy WordEntryPos array to use when haspos is false */
231  posnull.npos = 1;
232  posnull.pos[0] = 0;
233  WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234  POSNULL = (WordEntryPosVector *) &posnull;
235 
236  for (i = 0; i < size; i++)
237  {
238  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239  if (!entry)
240  continue;
241 
242  while (entry - firstentry < nitem)
243  {
244  if (entry->haspos)
245  pos[i] = _POSVECPTR(t, entry);
246  else
247  pos[i] = POSNULL;
248 
249  dimt = pos[i]->npos;
250  post = pos[i]->pos;
251  for (k = 0; k < i; k++)
252  {
253  if (!pos[k])
254  continue;
255  lenct = pos[k]->npos;
256  ct = pos[k]->pos;
257  for (l = 0; l < dimt; l++)
258  {
259  for (p = 0; p < lenct; p++)
260  {
261  dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262  if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263  {
264  float curw;
265 
266  if (!dist)
267  dist = MAXENTRYPOS;
268  curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269  res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270  }
271  }
272  }
273  }
274 
275  entry++;
276  }
277  }
278  pfree(pos);
279  pfree(item);
280  return res;
281 }
282 
283 static float
284 calc_rank_or(const float *w, TSVector t, TSQuery q)
285 {
286  WordEntry *entry,
287  *firstentry;
288  WordEntryPosVector1 posnull;
289  WordEntryPos *post;
290  int32 dimt,
291  j,
292  i,
293  nitem;
294  float res = 0.0;
295  QueryOperand **item;
296  int size = q->size;
297 
298  /* A dummy WordEntryPos array to use when haspos is false */
299  posnull.npos = 1;
300  posnull.pos[0] = 0;
301 
302  item = SortAndUniqItems(q, &size);
303 
304  for (i = 0; i < size; i++)
305  {
306  float resj,
307  wjm;
308  int32 jm;
309 
310  firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311  if (!entry)
312  continue;
313 
314  while (entry - firstentry < nitem)
315  {
316  if (entry->haspos)
317  {
318  dimt = POSDATALEN(t, entry);
319  post = POSDATAPTR(t, entry);
320  }
321  else
322  {
323  dimt = posnull.npos;
324  post = posnull.pos;
325  }
326 
327  resj = 0.0;
328  wjm = -1.0;
329  jm = 0;
330  for (j = 0; j < dimt; j++)
331  {
332  resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333  if (wpos(post[j]) > wjm)
334  {
335  wjm = wpos(post[j]);
336  jm = j;
337  }
338  }
339 /*
340  limit (sum(i/i^2),i->inf) = pi^2/6
341  resj = sum(wi/i^2),i=1,noccurence,
342  wi - should be sorted desc,
343  don't sort for now, just choose maximum weight. This should be corrected
344  Oleg Bartunov
345 */
346  res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347 
348  entry++;
349  }
350  }
351  if (size > 0)
352  res = res / size;
353  pfree(item);
354  return res;
355 }
356 
357 static float
358 calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
359 {
360  QueryItem *item = GETQUERY(q);
361  float res = 0.0;
362  int len;
363 
364  if (!t->size || !q->size)
365  return 0.0;
366 
367  /* XXX: What about NOT? */
368  res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369  item->qoperator.oper == OP_PHRASE)) ?
370  calc_rank_and(w, t, q) :
371  calc_rank_or(w, t, q);
372 
373  if (res < 0)
374  res = 1e-20f;
375 
376  if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377  res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378 
379  if (method & RANK_NORM_LENGTH)
380  {
381  len = cnt_length(t);
382  if (len > 0)
383  res /= (float) len;
384  }
385 
386  /* RANK_NORM_EXTDIST not applicable */
387 
388  if ((method & RANK_NORM_UNIQ) && t->size > 0)
389  res /= (float) (t->size);
390 
391  if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392  res /= log((double) (t->size + 1)) / log(2.0);
393 
394  if (method & RANK_NORM_RDIVRPLUS1)
395  res /= (res + 1);
396 
397  return res;
398 }
399 
400 static const float *
402 {
403  static float ws[lengthof(weights)];
404  int i;
405  float4 *arrdata;
406 
407  if (win == NULL)
408  return weights;
409 
410  if (ARR_NDIM(win) != 1)
411  ereport(ERROR,
412  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
413  errmsg("array of weight must be one-dimensional")));
414 
415  if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
416  ereport(ERROR,
417  (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
418  errmsg("array of weight is too short")));
419 
420  if (array_contains_nulls(win))
421  ereport(ERROR,
422  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
423  errmsg("array of weight must not contain nulls")));
424 
425  arrdata = (float4 *) ARR_DATA_PTR(win);
426  for (i = 0; i < lengthof(weights); i++)
427  {
428  ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
429  if (ws[i] > 1.0)
430  ereport(ERROR,
431  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
432  errmsg("weight out of range")));
433  }
434 
435  return ws;
436 }
437 
438 Datum
440 {
442  TSVector txt = PG_GETARG_TSVECTOR(1);
443  TSQuery query = PG_GETARG_TSQUERY(2);
444  int method = PG_GETARG_INT32(3);
445  float res;
446 
447  res = calc_rank(getWeights(win), txt, query, method);
448 
449  PG_FREE_IF_COPY(win, 0);
450  PG_FREE_IF_COPY(txt, 1);
451  PG_FREE_IF_COPY(query, 2);
452  PG_RETURN_FLOAT4(res);
453 }
454 
455 Datum
457 {
459  TSVector txt = PG_GETARG_TSVECTOR(1);
460  TSQuery query = PG_GETARG_TSQUERY(2);
461  float res;
462 
463  res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
464 
465  PG_FREE_IF_COPY(win, 0);
466  PG_FREE_IF_COPY(txt, 1);
467  PG_FREE_IF_COPY(query, 2);
468  PG_RETURN_FLOAT4(res);
469 }
470 
471 Datum
473 {
474  TSVector txt = PG_GETARG_TSVECTOR(0);
475  TSQuery query = PG_GETARG_TSQUERY(1);
476  int method = PG_GETARG_INT32(2);
477  float res;
478 
479  res = calc_rank(getWeights(NULL), txt, query, method);
480 
481  PG_FREE_IF_COPY(txt, 0);
482  PG_FREE_IF_COPY(query, 1);
483  PG_RETURN_FLOAT4(res);
484 }
485 
486 Datum
488 {
489  TSVector txt = PG_GETARG_TSVECTOR(0);
490  TSQuery query = PG_GETARG_TSQUERY(1);
491  float res;
492 
493  res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
494 
495  PG_FREE_IF_COPY(txt, 0);
496  PG_FREE_IF_COPY(query, 1);
497  PG_RETURN_FLOAT4(res);
498 }
499 
500 typedef struct
501 {
502  union
503  {
504  struct
505  { /* compiled doc representation */
508  } query;
509  struct
510  { /* struct is used for preparing doc
511  * representation */
514  } map;
515  } data;
518 
519 static int
520 compareDocR(const void *va, const void *vb)
521 {
522  const DocRepresentation *a = (const DocRepresentation *) va;
523  const DocRepresentation *b = (const DocRepresentation *) vb;
524 
525  if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
526  {
527  if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
528  {
529  if (a->data.map.entry == b->data.map.entry)
530  return 0;
531 
532  return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
533  }
534 
535  return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
536  }
537 
538  return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
539 }
540 
541 #define MAXQROPOS MAXENTRYPOS
542 typedef struct
543 {
545  bool reverseinsert; /* indicates insert order, true means
546  * descending order */
550 
551 typedef struct
552 {
556 
557 #define QR_GET_OPERAND_DATA(q, v) \
558  ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
559 
560 static bool
562 {
563  QueryRepresentation *qr = (QueryRepresentation *) checkval;
565 
566  if (!opData->operandexists)
567  return false;
568 
569  if (data)
570  {
571  data->npos = opData->npos;
572  data->pos = opData->pos;
573  if (opData->reverseinsert)
574  data->pos += MAXQROPOS - opData->npos;
575  }
576 
577  return true;
578 }
579 
580 typedef struct
581 {
582  int pos;
583  int p;
584  int q;
587 } CoverExt;
588 
589 static void
591 {
592  int i;
593 
594  for (i = 0; i < qr->query->size; i++)
595  {
596  qr->operandData[i].operandexists = false;
597  qr->operandData[i].reverseinsert = reverseinsert;
598  qr->operandData[i].npos = 0;
599  }
600 }
601 
602 static void
604 {
605  int i;
606  int lastPos;
608 
609  for (i = 0; i < entry->data.query.nitem; i++)
610  {
611  if (entry->data.query.items[i]->type != QI_VAL)
612  continue;
613 
614  opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
615 
616  opData->operandexists = true;
617 
618  if (opData->npos == 0)
619  {
620  lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
621  opData->pos[lastPos] = entry->pos;
622  opData->npos++;
623  continue;
624  }
625 
626  lastPos = opData->reverseinsert ?
627  (MAXQROPOS - opData->npos) :
628  (opData->npos - 1);
629 
630  if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
631  {
632  lastPos = opData->reverseinsert ?
633  (MAXQROPOS - 1 - opData->npos) :
634  (opData->npos);
635 
636  opData->pos[lastPos] = entry->pos;
637  opData->npos++;
638  }
639  }
640 }
641 
642 static bool
644 {
645  DocRepresentation *ptr;
646  int lastpos = ext->pos;
647  bool found = false;
648 
649  /*
650  * since this function recurses, it could be driven to stack overflow.
651  * (though any decent compiler will optimize away the tail-recursion.
652  */
654 
655  resetQueryRepresentation(qr, false);
656 
657  ext->p = INT_MAX;
658  ext->q = 0;
659  ptr = doc + ext->pos;
660 
661  /* find upper bound of cover from current position, move up */
662  while (ptr - doc < len)
663  {
665 
666  if (TS_execute(GETQUERY(qr->query), (void *) qr,
668  {
669  if (WEP_GETPOS(ptr->pos) > ext->q)
670  {
671  ext->q = WEP_GETPOS(ptr->pos);
672  ext->end = ptr;
673  lastpos = ptr - doc;
674  found = true;
675  }
676  break;
677  }
678  ptr++;
679  }
680 
681  if (!found)
682  return false;
683 
684  resetQueryRepresentation(qr, true);
685 
686  ptr = doc + lastpos;
687 
688  /* find lower bound of cover from found upper bound, move down */
689  while (ptr >= doc + ext->pos)
690  {
691  /*
692  * we scan doc from right to left, so pos info in reverse order!
693  */
695 
696  if (TS_execute(GETQUERY(qr->query), (void *) qr,
698  {
699  if (WEP_GETPOS(ptr->pos) < ext->p)
700  {
701  ext->begin = ptr;
702  ext->p = WEP_GETPOS(ptr->pos);
703  }
704  break;
705  }
706  ptr--;
707  }
708 
709  if (ext->p <= ext->q)
710  {
711  /*
712  * set position for next try to next lexeme after beginning of found
713  * cover
714  */
715  ext->pos = (ptr - doc) + 1;
716  return true;
717  }
718 
719  ext->pos++;
720  return Cover(doc, len, qr, ext);
721 }
722 
723 static DocRepresentation *
725 {
726  QueryItem *item = GETQUERY(qr->query);
727  WordEntry *entry,
728  *firstentry;
729  WordEntryPos *post;
730  int32 dimt, /* number of 'post' items */
731  j,
732  i,
733  nitem;
734  int len = qr->query->size * 4,
735  cur = 0;
736  DocRepresentation *doc;
737 
738  doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
739 
740  /*
741  * Iterate through query to make DocRepresentaion for words and it's
742  * entries satisfied by query
743  */
744  for (i = 0; i < qr->query->size; i++)
745  {
746  QueryOperand *curoperand;
747 
748  if (item[i].type != QI_VAL)
749  continue;
750 
751  curoperand = &item[i].qoperand;
752 
753  firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
754  if (!entry)
755  continue;
756 
757  /* iterations over entries in tsvector */
758  while (entry - firstentry < nitem)
759  {
760  if (entry->haspos)
761  {
762  dimt = POSDATALEN(txt, entry);
763  post = POSDATAPTR(txt, entry);
764  }
765  else
766  {
767  /* ignore words without positions */
768  entry++;
769  continue;
770  }
771 
772  while (cur + dimt >= len)
773  {
774  len *= 2;
775  doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
776  }
777 
778  /* iterations over entry's positions */
779  for (j = 0; j < dimt; j++)
780  {
781  if (curoperand->weight == 0 ||
782  curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
783  {
784  doc[cur].pos = post[j];
785  doc[cur].data.map.entry = entry;
786  doc[cur].data.map.item = (QueryItem *) curoperand;
787  cur++;
788  }
789  }
790 
791  entry++;
792  }
793  }
794 
795  if (cur > 0)
796  {
797  DocRepresentation *rptr = doc + 1,
798  *wptr = doc,
799  storage;
800 
801  /*
802  * Sort representation in ascending order by pos and entry
803  */
804  qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
805 
806  /*
807  * Join QueryItem per WordEntry and it's position
808  */
809  storage.pos = doc->pos;
810  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
811  storage.data.query.items[0] = doc->data.map.item;
812  storage.data.query.nitem = 1;
813 
814  while (rptr - doc < cur)
815  {
816  if (rptr->pos == (rptr - 1)->pos &&
817  rptr->data.map.entry == (rptr - 1)->data.map.entry)
818  {
819  storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
820  storage.data.query.nitem++;
821  }
822  else
823  {
824  *wptr = storage;
825  wptr++;
826  storage.pos = rptr->pos;
827  storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
828  storage.data.query.items[0] = rptr->data.map.item;
829  storage.data.query.nitem = 1;
830  }
831 
832  rptr++;
833  }
834 
835  *wptr = storage;
836  wptr++;
837 
838  *doclen = wptr - doc;
839  return doc;
840  }
841 
842  pfree(doc);
843  return NULL;
844 }
845 
846 static float4
847 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
848 {
849  DocRepresentation *doc;
850  int len,
851  i,
852  doclen = 0;
853  CoverExt ext;
854  double Wdoc = 0.0;
855  double invws[lengthof(weights)];
856  double SumDist = 0.0,
857  PrevExtPos = 0.0,
858  CurExtPos = 0.0;
859  int NExtent = 0;
861 
862 
863  for (i = 0; i < lengthof(weights); i++)
864  {
865  invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
866  if (invws[i] > 1.0)
867  ereport(ERROR,
868  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
869  errmsg("weight out of range")));
870  invws[i] = 1.0 / invws[i];
871  }
872 
873  qr.query = query;
875  palloc0(sizeof(QueryRepresentationOperand) * query->size);
876 
877  doc = get_docrep(txt, &qr, &doclen);
878  if (!doc)
879  {
880  pfree(qr.operandData);
881  return 0.0;
882  }
883 
884  MemSet(&ext, 0, sizeof(CoverExt));
885  while (Cover(doc, doclen, &qr, &ext))
886  {
887  double Cpos = 0.0;
888  double InvSum = 0.0;
889  int nNoise;
890  DocRepresentation *ptr = ext.begin;
891 
892  while (ptr <= ext.end)
893  {
894  InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
895  ptr++;
896  }
897 
898  Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
899 
900  /*
901  * if doc are big enough then ext.q may be equal to ext.p due to limit
902  * of positional information. In this case we approximate number of
903  * noise word as half cover's length
904  */
905  nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
906  if (nNoise < 0)
907  nNoise = (ext.end - ext.begin) / 2;
908  Wdoc += Cpos / ((double) (1 + nNoise));
909 
910  CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
911  if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
912  * zero in a case of
913  * multiple lexize */ )
914  SumDist += 1.0 / (CurExtPos - PrevExtPos);
915 
916  PrevExtPos = CurExtPos;
917  NExtent++;
918  }
919 
920  if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
921  Wdoc /= log((double) (cnt_length(txt) + 1));
922 
923  if (method & RANK_NORM_LENGTH)
924  {
925  len = cnt_length(txt);
926  if (len > 0)
927  Wdoc /= (double) len;
928  }
929 
930  if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
931  Wdoc /= ((double) NExtent) / SumDist;
932 
933  if ((method & RANK_NORM_UNIQ) && txt->size > 0)
934  Wdoc /= (double) (txt->size);
935 
936  if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
937  Wdoc /= log((double) (txt->size + 1)) / log(2.0);
938 
939  if (method & RANK_NORM_RDIVRPLUS1)
940  Wdoc /= (Wdoc + 1);
941 
942  pfree(doc);
943 
944  pfree(qr.operandData);
945 
946  return (float4) Wdoc;
947 }
948 
949 Datum
951 {
953  TSVector txt = PG_GETARG_TSVECTOR(1);
954  TSQuery query = PG_GETARG_TSQUERY(2);
955  int method = PG_GETARG_INT32(3);
956  float res;
957 
958  res = calc_rank_cd(getWeights(win), txt, query, method);
959 
960  PG_FREE_IF_COPY(win, 0);
961  PG_FREE_IF_COPY(txt, 1);
962  PG_FREE_IF_COPY(query, 2);
963  PG_RETURN_FLOAT4(res);
964 }
965 
966 Datum
968 {
970  TSVector txt = PG_GETARG_TSVECTOR(1);
971  TSQuery query = PG_GETARG_TSQUERY(2);
972  float res;
973 
974  res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
975 
976  PG_FREE_IF_COPY(win, 0);
977  PG_FREE_IF_COPY(txt, 1);
978  PG_FREE_IF_COPY(query, 2);
979  PG_RETURN_FLOAT4(res);
980 }
981 
982 Datum
984 {
985  TSVector txt = PG_GETARG_TSVECTOR(0);
986  TSQuery query = PG_GETARG_TSQUERY(1);
987  int method = PG_GETARG_INT32(2);
988  float res;
989 
990  res = calc_rank_cd(getWeights(NULL), txt, query, method);
991 
992  PG_FREE_IF_COPY(txt, 0);
993  PG_FREE_IF_COPY(query, 1);
994  PG_RETURN_FLOAT4(res);
995 }
996 
997 Datum
999 {
1000  TSVector txt = PG_GETARG_TSVECTOR(0);
1001  TSQuery query = PG_GETARG_TSQUERY(1);
1002  float res;
1003 
1004  res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
1005 
1006  PG_FREE_IF_COPY(txt, 0);
1007  PG_FREE_IF_COPY(query, 1);
1008  PG_RETURN_FLOAT4(res);
1009 }
uint16 WordEntryPos
Definition: ts_type.h:63
signed short int16
Definition: c.h:283
#define RANK_NORM_RDIVRPLUS1
Definition: tsrank.c:35
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1815
#define DEF_NORM_METHOD
Definition: tsrank.c:36
QueryOperator qoperator
Definition: ts_type.h:196
static float calc_rank_or(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:284
DocRepresentation * end
Definition: tsrank.c:586
#define PG_GETARG_INT32(n)
Definition: fmgr.h:234
#define RANK_NORM_EXTDIST
Definition: tsrank.c:32
#define PG_RETURN_FLOAT4(x)
Definition: fmgr.h:325
Datum ts_rankcd_tt(PG_FUNCTION_ARGS)
Definition: tsrank.c:998
#define TS_EXEC_CALC_NOT
Definition: ts_utils.h:167
Datum ts_rank_wttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:439
struct DocRepresentation::@33::@35 map
static bool Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
Definition: tsrank.c:643
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
#define RANK_NORM_LOGLENGTH
Definition: tsrank.c:30
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:238
#define QR_GET_OPERAND_DATA(q, v)
Definition: tsrank.c:557
int ArrayGetNItems(int ndim, const int *dims)
Definition: arrayutils.c:75
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
Datum ts_rankcd_wttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:950
QueryItem ** items
Definition: tsrank.c:506
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:575
#define MemSet(start, val, len)
Definition: c.h:853
#define QI_VAL
Definition: ts_type.h:134
#define lengthof(array)
Definition: c.h:600
static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
Definition: tsrank.c:724
uint32 distance
Definition: ts_type.h:158
Datum ts_rankcd_ttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:983
static int compareDocR(const void *va, const void *vb)
Definition: tsrank.c:520
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
static void fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
Definition: tsrank.c:603
#define GETQUERY(x)
Definition: _int.h:142
#define RANK_NORM_UNIQ
Definition: tsrank.c:33
signed int int32
Definition: c.h:284
#define GETOPERAND(x)
Definition: ltree.h:118
WordEntryPos pos[1]
Definition: ts_type.h:75
#define POSDATAPTR(x, e)
Definition: ts_type.h:111
#define Abs(x)
Definition: c.h:808
Datum ts_rank_wtt(PG_FUNCTION_ARGS)
Definition: tsrank.c:456
#define OP_AND
Definition: ts_type.h:167
static int cnt_length(TSVector t)
Definition: tsrank.c:54
void pfree(void *pointer)
Definition: mcxt.c:949
#define ERROR
Definition: elog.h:43
int p
Definition: tsrank.c:583
#define ARR_DIMS(a)
Definition: array.h:279
union DocRepresentation::@33 data
uint8 weight
Definition: ts_type.h:146
#define WEP_GETPOS(x)
Definition: ts_type.h:80
int pos
Definition: tsrank.c:582
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
#define ARR_DATA_PTR(a)
Definition: array.h:307
static int compareQueryOperand(const void *a, const void *b, void *arg)
Definition: tsrank.c:136
void check_stack_depth(void)
Definition: postgres.c:3150
int q
Definition: tsrank.c:584
int32 size
Definition: ts_type.h:93
static float4 word_distance(int32 w)
Definition: tsrank.c:45
WordEntryPos * pos
Definition: ts_utils.h:138
unsigned int uint32
Definition: c.h:296
#define WordECompareQueryItem(e, q, p, i, m)
Definition: tsrank.c:76
static float calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
Definition: tsrank.c:358
#define RANK_NORM_LOGUNIQ
Definition: tsrank.c:34
uint32 haspos
Definition: ts_type.h:44
#define ereport(elevel, rest)
Definition: elog.h:122
#define wpos(wep)
Definition: tsrank.c:27
void qsort_arg(void *base, size_t nel, size_t elsize, qsort_arg_comparator cmp, void *arg)
Definition: qsort_arg.c:113
static QueryOperand ** SortAndUniqItems(TSQuery q, int *size)
Definition: tsrank.c:155
float float4
Definition: c.h:428
#define QI_OPR
Definition: ts_type.h:135
QueryItemType type
Definition: ts_type.h:195
void * palloc0(Size size)
Definition: mcxt.c:877
uintptr_t Datum
Definition: postgres.h:372
Datum difference(PG_FUNCTION_ARGS)
static void resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
Definition: tsrank.c:590
QueryRepresentationOperand * operandData
Definition: tsrank.c:554
Datum ts_rank_ttf(PG_FUNCTION_ARGS)
Definition: tsrank.c:472
Datum ts_rank_tt(PG_FUNCTION_ARGS)
Definition: tsrank.c:487
static float calc_rank_and(const float *w, TSVector t, TSQuery q)
Definition: tsrank.c:201
#define OP_PHRASE
Definition: ts_type.h:169
#define MAXQROPOS
Definition: tsrank.c:541
#define RANK_NORM_LENGTH
Definition: tsrank.c:31
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:225
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1160
WordEntry * entry
Definition: tsrank.c:513
static float4 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
Definition: tsrank.c:847
QueryItem * item
Definition: tsrank.c:512
#define ARR_NDIM(a)
Definition: array.h:275
static bool checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsrank.c:561
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:962
uint32 length
Definition: ts_type.h:158
static WordEntry * find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
Definition: tsrank.c:87
e
Definition: preproc-init.c:82
void * palloc(Size size)
Definition: mcxt.c:848
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define STRPTR(x)
Definition: hstore.h:76
int32 size
Definition: ts_type.h:208
int i
void * arg
#define PG_DETOAST_DATUM(datum)
Definition: fmgr.h:205
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:120
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
bool prefix
Definition: ts_type.h:150
#define ARRPTR(x)
Definition: cube.c:26
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
QueryOperand qoperand
Definition: ts_type.h:197
#define qsort(a, b, c, d)
Definition: port.h:408
bool array_contains_nulls(ArrayType *array)
Definition: arrayfuncs.c:3516
#define MAXENTRYPOS
Definition: ts_type.h:85
struct DocRepresentation::@33::@34 query
Datum ts_rankcd_wtt(PG_FUNCTION_ARGS)
Definition: tsrank.c:967
#define TS_EXEC_EMPTY
Definition: ts_utils.h:159
static const float * getWeights(ArrayType *win)
Definition: tsrank.c:401
long val
Definition: informix.c:689
WordEntryPos pos[MAXQROPOS]
Definition: tsrank.c:548
DocRepresentation * begin
Definition: tsrank.c:585
WordEntryPos pos
Definition: tsrank.c:516
static const float weights[]
Definition: tsrank.c:25