PostgreSQL Source Code  git master
tsvector_op.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  * operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "lib/qunique.h"
25 #include "mb/pg_wchar.h"
26 #include "miscadmin.h"
27 #include "parser/parse_coerce.h"
28 #include "tsearch/ts_utils.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/lsyscache.h"
32 #include "utils/regproc.h"
33 #include "utils/rel.h"
34 
35 
36 typedef struct
37 {
40  char *values;
41  char *operand;
42 } CHKVAL;
43 
44 
45 typedef struct StatEntry
46 {
47  uint32 ndoc; /* zero indicates that we were already here
48  * while walking through the tree */
50  struct StatEntry *left;
51  struct StatEntry *right;
55 
56 #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 
58 typedef struct
59 {
61 
63 
66 
68 } TSVectorStat;
69 
70 
71 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72  uint32 flags,
73  TSExecuteCallback chkcond);
74 static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
75 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
76 
77 
78 /*
79  * Order: haspos, len, word, for all positions (pos, weight)
80  */
81 static int
83 {
84  if (VARSIZE(a) < VARSIZE(b))
85  return -1;
86  else if (VARSIZE(a) > VARSIZE(b))
87  return 1;
88  else if (a->size < b->size)
89  return -1;
90  else if (a->size > b->size)
91  return 1;
92  else
93  {
94  WordEntry *aptr = ARRPTR(a);
95  WordEntry *bptr = ARRPTR(b);
96  int i = 0;
97  int res;
98 
99 
100  for (i = 0; i < a->size; i++)
101  {
102  if (aptr->haspos != bptr->haspos)
103  {
104  return (aptr->haspos > bptr->haspos) ? -1 : 1;
105  }
106  else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
107  {
108  return res;
109  }
110  else if (aptr->haspos)
111  {
112  WordEntryPos *ap = POSDATAPTR(a, aptr);
113  WordEntryPos *bp = POSDATAPTR(b, bptr);
114  int j;
115 
116  if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
117  return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
118 
119  for (j = 0; j < POSDATALEN(a, aptr); j++)
120  {
121  if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
122  {
123  return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
124  }
125  else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
126  {
127  return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
128  }
129  ap++, bp++;
130  }
131  }
132 
133  aptr++;
134  bptr++;
135  }
136  }
137 
138  return 0;
139 }
140 
141 #define TSVECTORCMPFUNC( type, action, ret ) \
142 Datum \
143 tsvector_##type(PG_FUNCTION_ARGS) \
144 { \
145  TSVector a = PG_GETARG_TSVECTOR(0); \
146  TSVector b = PG_GETARG_TSVECTOR(1); \
147  int res = silly_cmp_tsvector(a, b); \
148  PG_FREE_IF_COPY(a,0); \
149  PG_FREE_IF_COPY(b,1); \
150  PG_RETURN_##ret( res action 0 ); \
151 } \
152 /* keep compiler quiet - no extra ; */ \
153 extern int no_such_variable
154 
155 TSVECTORCMPFUNC(lt, <, BOOL);
156 TSVECTORCMPFUNC(le, <=, BOOL);
157 TSVECTORCMPFUNC(eq, ==, BOOL);
158 TSVECTORCMPFUNC(ge, >=, BOOL);
159 TSVECTORCMPFUNC(gt, >, BOOL);
160 TSVECTORCMPFUNC(ne, !=, BOOL);
161 TSVECTORCMPFUNC(cmp, +, INT32);
162 
163 Datum
165 {
167  TSVector out;
168  int i,
169  len = 0;
170  WordEntry *arrin = ARRPTR(in),
171  *arrout;
172  char *cur;
173 
174  for (i = 0; i < in->size; i++)
175  len += arrin[i].len;
176 
177  len = CALCDATASIZE(in->size, len);
178  out = (TSVector) palloc0(len);
179  SET_VARSIZE(out, len);
180  out->size = in->size;
181  arrout = ARRPTR(out);
182  cur = STRPTR(out);
183  for (i = 0; i < in->size; i++)
184  {
185  memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
186  arrout[i].haspos = 0;
187  arrout[i].len = arrin[i].len;
188  arrout[i].pos = cur - STRPTR(out);
189  cur += arrout[i].len;
190  }
191 
192  PG_FREE_IF_COPY(in, 0);
193  PG_RETURN_POINTER(out);
194 }
195 
196 Datum
198 {
200  int32 ret = in->size;
201 
202  PG_FREE_IF_COPY(in, 0);
203  PG_RETURN_INT32(ret);
204 }
205 
206 Datum
208 {
210  char cw = PG_GETARG_CHAR(1);
211  TSVector out;
212  int i,
213  j;
214  WordEntry *entry;
215  WordEntryPos *p;
216  int w = 0;
217 
218  switch (cw)
219  {
220  case 'A':
221  case 'a':
222  w = 3;
223  break;
224  case 'B':
225  case 'b':
226  w = 2;
227  break;
228  case 'C':
229  case 'c':
230  w = 1;
231  break;
232  case 'D':
233  case 'd':
234  w = 0;
235  break;
236  default:
237  /* internal error */
238  elog(ERROR, "unrecognized weight: %d", cw);
239  }
240 
241  out = (TSVector) palloc(VARSIZE(in));
242  memcpy(out, in, VARSIZE(in));
243  entry = ARRPTR(out);
244  i = out->size;
245  while (i--)
246  {
247  if ((j = POSDATALEN(out, entry)) != 0)
248  {
249  p = POSDATAPTR(out, entry);
250  while (j--)
251  {
252  WEP_SETWEIGHT(*p, w);
253  p++;
254  }
255  }
256  entry++;
257  }
258 
259  PG_FREE_IF_COPY(in, 0);
260  PG_RETURN_POINTER(out);
261 }
262 
263 /*
264  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
265  *
266  * Assign weight w to elements of tsin that are listed in lexemes.
267  */
268 Datum
270 {
271  TSVector tsin = PG_GETARG_TSVECTOR(0);
272  char char_weight = PG_GETARG_CHAR(1);
273  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
274 
275  TSVector tsout;
276  int i,
277  j,
278  nlexemes,
279  weight;
280  WordEntry *entry;
281  Datum *dlexemes;
282  bool *nulls;
283 
284  switch (char_weight)
285  {
286  case 'A':
287  case 'a':
288  weight = 3;
289  break;
290  case 'B':
291  case 'b':
292  weight = 2;
293  break;
294  case 'C':
295  case 'c':
296  weight = 1;
297  break;
298  case 'D':
299  case 'd':
300  weight = 0;
301  break;
302  default:
303  /* internal error */
304  elog(ERROR, "unrecognized weight: %c", char_weight);
305  }
306 
307  tsout = (TSVector) palloc(VARSIZE(tsin));
308  memcpy(tsout, tsin, VARSIZE(tsin));
309  entry = ARRPTR(tsout);
310 
311  deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
312  &dlexemes, &nulls, &nlexemes);
313 
314  /*
315  * Assuming that lexemes array is significantly shorter than tsvector we
316  * can iterate through lexemes performing binary search of each lexeme
317  * from lexemes in tsvector.
318  */
319  for (i = 0; i < nlexemes; i++)
320  {
321  char *lex;
322  int lex_len,
323  lex_pos;
324 
325  /* Ignore null array elements, they surely don't match */
326  if (nulls[i])
327  continue;
328 
329  lex = VARDATA(dlexemes[i]);
330  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
331  lex_pos = tsvector_bsearch(tsout, lex, lex_len);
332 
333  if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
334  {
335  WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
336 
337  while (j--)
338  {
339  WEP_SETWEIGHT(*p, weight);
340  p++;
341  }
342  }
343  }
344 
345  PG_FREE_IF_COPY(tsin, 0);
346  PG_FREE_IF_COPY(lexemes, 2);
347 
348  PG_RETURN_POINTER(tsout);
349 }
350 
351 #define compareEntry(pa, a, pb, b) \
352  tsCompareString((pa) + (a)->pos, (a)->len, \
353  (pb) + (b)->pos, (b)->len, \
354  false)
355 
356 /*
357  * Add positions from src to dest after offsetting them by maxpos.
358  * Return the number added (might be less than expected due to overflow)
359  */
360 static int32
362  TSVector dest, WordEntry *destptr,
363  int32 maxpos)
364 {
365  uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
366  int i;
367  uint16 slen = POSDATALEN(src, srcptr),
368  startlen;
369  WordEntryPos *spos = POSDATAPTR(src, srcptr),
370  *dpos = POSDATAPTR(dest, destptr);
371 
372  if (!destptr->haspos)
373  *clen = 0;
374 
375  startlen = *clen;
376  for (i = 0;
377  i < slen && *clen < MAXNUMPOS &&
378  (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
379  i++)
380  {
381  WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
382  WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
383  (*clen)++;
384  }
385 
386  if (*clen != startlen)
387  destptr->haspos = 1;
388  return *clen - startlen;
389 }
390 
391 /*
392  * Perform binary search of given lexeme in TSVector.
393  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
394  * found.
395  */
396 static int
397 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
398 {
399  WordEntry *arrin = ARRPTR(tsv);
400  int StopLow = 0,
401  StopHigh = tsv->size,
402  StopMiddle,
403  cmp;
404 
405  while (StopLow < StopHigh)
406  {
407  StopMiddle = (StopLow + StopHigh) / 2;
408 
409  cmp = tsCompareString(lexeme, lexeme_len,
410  STRPTR(tsv) + arrin[StopMiddle].pos,
411  arrin[StopMiddle].len,
412  false);
413 
414  if (cmp < 0)
415  StopHigh = StopMiddle;
416  else if (cmp > 0)
417  StopLow = StopMiddle + 1;
418  else /* found it */
419  return StopMiddle;
420  }
421 
422  return -1;
423 }
424 
425 /*
426  * qsort comparator functions
427  */
428 
429 static int
430 compare_int(const void *va, const void *vb)
431 {
432  int a = *((const int *) va);
433  int b = *((const int *) vb);
434 
435  if (a == b)
436  return 0;
437  return (a > b) ? 1 : -1;
438 }
439 
440 static int
441 compare_text_lexemes(const void *va, const void *vb)
442 {
443  Datum a = *((const Datum *) va);
444  Datum b = *((const Datum *) vb);
445  char *alex = VARDATA_ANY(a);
446  int alex_len = VARSIZE_ANY_EXHDR(a);
447  char *blex = VARDATA_ANY(b);
448  int blex_len = VARSIZE_ANY_EXHDR(b);
449 
450  return tsCompareString(alex, alex_len, blex, blex_len, false);
451 }
452 
453 /*
454  * Internal routine to delete lexemes from TSVector by array of offsets.
455  *
456  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
457  * int indices_count -- size of that array
458  *
459  * Returns new TSVector without given lexemes along with their positions
460  * and weights.
461  */
462 static TSVector
463 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
464  int indices_count)
465 {
466  TSVector tsout;
467  WordEntry *arrin = ARRPTR(tsv),
468  *arrout;
469  char *data = STRPTR(tsv),
470  *dataout;
471  int i, /* index in arrin */
472  j, /* index in arrout */
473  k, /* index in indices_to_delete */
474  curoff; /* index in dataout area */
475 
476  /*
477  * Sort the filter array to simplify membership checks below. Also, get
478  * rid of any duplicate entries, so that we can assume that indices_count
479  * is exactly equal to the number of lexemes that will be removed.
480  */
481  if (indices_count > 1)
482  {
483  qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
484  indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
485  compare_int);
486  }
487 
488  /*
489  * Here we overestimate tsout size, since we don't know how much space is
490  * used by the deleted lexeme(s). We will set exact size below.
491  */
492  tsout = (TSVector) palloc0(VARSIZE(tsv));
493 
494  /* This count must be correct because STRPTR(tsout) relies on it. */
495  tsout->size = tsv->size - indices_count;
496 
497  /*
498  * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
499  */
500  arrout = ARRPTR(tsout);
501  dataout = STRPTR(tsout);
502  curoff = 0;
503  for (i = j = k = 0; i < tsv->size; i++)
504  {
505  /*
506  * If current i is present in indices_to_delete, skip this lexeme.
507  * Since indices_to_delete is already sorted, we only need to check
508  * the current (k'th) entry.
509  */
510  if (k < indices_count && i == indices_to_delete[k])
511  {
512  k++;
513  continue;
514  }
515 
516  /* Copy lexeme and its positions and weights */
517  memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
518  arrout[j].haspos = arrin[i].haspos;
519  arrout[j].len = arrin[i].len;
520  arrout[j].pos = curoff;
521  curoff += arrin[i].len;
522  if (arrin[i].haspos)
523  {
524  int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
525  + sizeof(uint16);
526 
527  curoff = SHORTALIGN(curoff);
528  memcpy(dataout + curoff,
529  STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
530  len);
531  curoff += len;
532  }
533 
534  j++;
535  }
536 
537  /*
538  * k should now be exactly equal to indices_count. If it isn't then the
539  * caller provided us with indices outside of [0, tsv->size) range and
540  * estimation of tsout's size is wrong.
541  */
542  Assert(k == indices_count);
543 
544  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
545  return tsout;
546 }
547 
548 /*
549  * Delete given lexeme from tsvector.
550  * Implementation of user-level ts_delete(tsvector, text).
551  */
552 Datum
554 {
555  TSVector tsin = PG_GETARG_TSVECTOR(0),
556  tsout;
557  text *tlexeme = PG_GETARG_TEXT_PP(1);
558  char *lexeme = VARDATA_ANY(tlexeme);
559  int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
560  skip_index;
561 
562  if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
563  PG_RETURN_POINTER(tsin);
564 
565  tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
566 
567  PG_FREE_IF_COPY(tsin, 0);
568  PG_FREE_IF_COPY(tlexeme, 1);
569  PG_RETURN_POINTER(tsout);
570 }
571 
572 /*
573  * Delete given array of lexemes from tsvector.
574  * Implementation of user-level ts_delete(tsvector, text[]).
575  */
576 Datum
578 {
579  TSVector tsin = PG_GETARG_TSVECTOR(0),
580  tsout;
581  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
582  int i,
583  nlex,
584  skip_count,
585  *skip_indices;
586  Datum *dlexemes;
587  bool *nulls;
588 
589  deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
590  &dlexemes, &nulls, &nlex);
591 
592  /*
593  * In typical use case array of lexemes to delete is relatively small. So
594  * here we optimize things for that scenario: iterate through lexarr
595  * performing binary search of each lexeme from lexarr in tsvector.
596  */
597  skip_indices = palloc0(nlex * sizeof(int));
598  for (i = skip_count = 0; i < nlex; i++)
599  {
600  char *lex;
601  int lex_len,
602  lex_pos;
603 
604  /* Ignore null array elements, they surely don't match */
605  if (nulls[i])
606  continue;
607 
608  lex = VARDATA(dlexemes[i]);
609  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
610  lex_pos = tsvector_bsearch(tsin, lex, lex_len);
611 
612  if (lex_pos >= 0)
613  skip_indices[skip_count++] = lex_pos;
614  }
615 
616  tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
617 
618  pfree(skip_indices);
619  PG_FREE_IF_COPY(tsin, 0);
620  PG_FREE_IF_COPY(lexemes, 1);
621 
622  PG_RETURN_POINTER(tsout);
623 }
624 
625 /*
626  * Expand tsvector as table with following columns:
627  * lexeme: lexeme text
628  * positions: integer array of lexeme positions
629  * weights: char array of weights corresponding to positions
630  */
631 Datum
633 {
634  FuncCallContext *funcctx;
635  TSVector tsin;
636 
637  if (SRF_IS_FIRSTCALL())
638  {
639  MemoryContext oldcontext;
640  TupleDesc tupdesc;
641 
642  funcctx = SRF_FIRSTCALL_INIT();
643  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
644 
645  tupdesc = CreateTemplateTupleDesc(3);
646  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
647  TEXTOID, -1, 0);
648  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
649  INT2ARRAYOID, -1, 0);
650  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
651  TEXTARRAYOID, -1, 0);
652  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
653 
654  funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
655 
656  MemoryContextSwitchTo(oldcontext);
657  }
658 
659  funcctx = SRF_PERCALL_SETUP();
660  tsin = (TSVector) funcctx->user_fctx;
661 
662  if (funcctx->call_cntr < tsin->size)
663  {
664  WordEntry *arrin = ARRPTR(tsin);
665  char *data = STRPTR(tsin);
666  HeapTuple tuple;
667  int j,
668  i = funcctx->call_cntr;
669  bool nulls[] = {false, false, false};
670  Datum values[3];
671 
672  values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
673 
674  if (arrin[i].haspos)
675  {
676  WordEntryPosVector *posv;
677  Datum *positions;
678  Datum *weights;
679  char weight;
680 
681  /*
682  * Internally tsvector stores position and weight in the same
683  * uint16 (2 bits for weight, 14 for position). Here we extract
684  * that in two separate arrays.
685  */
686  posv = _POSVECPTR(tsin, arrin + i);
687  positions = palloc(posv->npos * sizeof(Datum));
688  weights = palloc(posv->npos * sizeof(Datum));
689  for (j = 0; j < posv->npos; j++)
690  {
691  positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
692  weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
694  1));
695  }
696 
697  values[1] = PointerGetDatum(construct_array(positions, posv->npos,
698  INT2OID, 2, true, TYPALIGN_SHORT));
700  TEXTOID, -1, false, TYPALIGN_INT));
701  }
702  else
703  {
704  nulls[1] = nulls[2] = true;
705  }
706 
707  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
708  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
709  }
710  else
711  {
712  SRF_RETURN_DONE(funcctx);
713  }
714 }
715 
716 /*
717  * Convert tsvector to array of lexemes.
718  */
719 Datum
721 {
722  TSVector tsin = PG_GETARG_TSVECTOR(0);
723  WordEntry *arrin = ARRPTR(tsin);
724  Datum *elements;
725  int i;
726  ArrayType *array;
727 
728  elements = palloc(tsin->size * sizeof(Datum));
729 
730  for (i = 0; i < tsin->size; i++)
731  {
732  elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
733  arrin[i].len));
734  }
735 
736  array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
737 
738  pfree(elements);
739  PG_FREE_IF_COPY(tsin, 0);
740  PG_RETURN_POINTER(array);
741 }
742 
743 /*
744  * Build tsvector from array of lexemes.
745  */
746 Datum
748 {
750  TSVector tsout;
751  Datum *dlexemes;
752  WordEntry *arrout;
753  bool *nulls;
754  int nitems,
755  i,
756  tslen,
757  datalen = 0;
758  char *cur;
759 
760  deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
761 
762  /*
763  * Reject nulls and zero length strings (maybe we should just ignore them,
764  * instead?)
765  */
766  for (i = 0; i < nitems; i++)
767  {
768  if (nulls[i])
769  ereport(ERROR,
770  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
771  errmsg("lexeme array may not contain nulls")));
772 
773  if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
774  ereport(ERROR,
775  (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
776  errmsg("lexeme array may not contain empty strings")));
777  }
778 
779  /* Sort and de-dup, because this is required for a valid tsvector. */
780  if (nitems > 1)
781  {
782  qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
783  nitems = qunique(dlexemes, nitems, sizeof(Datum),
785  }
786 
787  /* Calculate space needed for surviving lexemes. */
788  for (i = 0; i < nitems; i++)
789  datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
790  tslen = CALCDATASIZE(nitems, datalen);
791 
792  /* Allocate and fill tsvector. */
793  tsout = (TSVector) palloc0(tslen);
794  SET_VARSIZE(tsout, tslen);
795  tsout->size = nitems;
796 
797  arrout = ARRPTR(tsout);
798  cur = STRPTR(tsout);
799  for (i = 0; i < nitems; i++)
800  {
801  char *lex = VARDATA(dlexemes[i]);
802  int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
803 
804  memcpy(cur, lex, lex_len);
805  arrout[i].haspos = 0;
806  arrout[i].len = lex_len;
807  arrout[i].pos = cur - STRPTR(tsout);
808  cur += lex_len;
809  }
810 
811  PG_FREE_IF_COPY(v, 0);
812  PG_RETURN_POINTER(tsout);
813 }
814 
815 /*
816  * ts_filter(): keep only lexemes with given weights in tsvector.
817  */
818 Datum
820 {
821  TSVector tsin = PG_GETARG_TSVECTOR(0),
822  tsout;
824  WordEntry *arrin = ARRPTR(tsin),
825  *arrout;
826  char *datain = STRPTR(tsin),
827  *dataout;
828  Datum *dweights;
829  bool *nulls;
830  int nweights;
831  int i,
832  j;
833  int cur_pos = 0;
834  char mask = 0;
835 
836  deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
837  &dweights, &nulls, &nweights);
838 
839  for (i = 0; i < nweights; i++)
840  {
841  char char_weight;
842 
843  if (nulls[i])
844  ereport(ERROR,
845  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
846  errmsg("weight array may not contain nulls")));
847 
848  char_weight = DatumGetChar(dweights[i]);
849  switch (char_weight)
850  {
851  case 'A':
852  case 'a':
853  mask = mask | 8;
854  break;
855  case 'B':
856  case 'b':
857  mask = mask | 4;
858  break;
859  case 'C':
860  case 'c':
861  mask = mask | 2;
862  break;
863  case 'D':
864  case 'd':
865  mask = mask | 1;
866  break;
867  default:
868  ereport(ERROR,
869  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
870  errmsg("unrecognized weight: \"%c\"", char_weight)));
871  }
872  }
873 
874  tsout = (TSVector) palloc0(VARSIZE(tsin));
875  tsout->size = tsin->size;
876  arrout = ARRPTR(tsout);
877  dataout = STRPTR(tsout);
878 
879  for (i = j = 0; i < tsin->size; i++)
880  {
881  WordEntryPosVector *posvin,
882  *posvout;
883  int npos = 0;
884  int k;
885 
886  if (!arrin[i].haspos)
887  continue;
888 
889  posvin = _POSVECPTR(tsin, arrin + i);
890  posvout = (WordEntryPosVector *)
891  (dataout + SHORTALIGN(cur_pos + arrin[i].len));
892 
893  for (k = 0; k < posvin->npos; k++)
894  {
895  if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
896  posvout->pos[npos++] = posvin->pos[k];
897  }
898 
899  /* if no satisfactory positions found, skip lexeme */
900  if (!npos)
901  continue;
902 
903  arrout[j].haspos = true;
904  arrout[j].len = arrin[i].len;
905  arrout[j].pos = cur_pos;
906 
907  memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
908  posvout->npos = npos;
909  cur_pos += SHORTALIGN(arrin[i].len);
910  cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
911  sizeof(uint16);
912  j++;
913  }
914 
915  tsout->size = j;
916  if (dataout != STRPTR(tsout))
917  memmove(STRPTR(tsout), dataout, cur_pos);
918 
919  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
920 
921  PG_FREE_IF_COPY(tsin, 0);
922  PG_RETURN_POINTER(tsout);
923 }
924 
925 Datum
927 {
928  TSVector in1 = PG_GETARG_TSVECTOR(0);
929  TSVector in2 = PG_GETARG_TSVECTOR(1);
930  TSVector out;
931  WordEntry *ptr;
932  WordEntry *ptr1,
933  *ptr2;
934  WordEntryPos *p;
935  int maxpos = 0,
936  i,
937  j,
938  i1,
939  i2,
940  dataoff,
941  output_bytes,
942  output_size;
943  char *data,
944  *data1,
945  *data2;
946 
947  /* Get max position in in1; we'll need this to offset in2's positions */
948  ptr = ARRPTR(in1);
949  i = in1->size;
950  while (i--)
951  {
952  if ((j = POSDATALEN(in1, ptr)) != 0)
953  {
954  p = POSDATAPTR(in1, ptr);
955  while (j--)
956  {
957  if (WEP_GETPOS(*p) > maxpos)
958  maxpos = WEP_GETPOS(*p);
959  p++;
960  }
961  }
962  ptr++;
963  }
964 
965  ptr1 = ARRPTR(in1);
966  ptr2 = ARRPTR(in2);
967  data1 = STRPTR(in1);
968  data2 = STRPTR(in2);
969  i1 = in1->size;
970  i2 = in2->size;
971 
972  /*
973  * Conservative estimate of space needed. We might need all the data in
974  * both inputs, and conceivably add a pad byte before position data for
975  * each item where there was none before.
976  */
977  output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
978 
979  out = (TSVector) palloc0(output_bytes);
980  SET_VARSIZE(out, output_bytes);
981 
982  /*
983  * We must make out->size valid so that STRPTR(out) is sensible. We'll
984  * collapse out any unused space at the end.
985  */
986  out->size = in1->size + in2->size;
987 
988  ptr = ARRPTR(out);
989  data = STRPTR(out);
990  dataoff = 0;
991  while (i1 && i2)
992  {
993  int cmp = compareEntry(data1, ptr1, data2, ptr2);
994 
995  if (cmp < 0)
996  { /* in1 first */
997  ptr->haspos = ptr1->haspos;
998  ptr->len = ptr1->len;
999  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1000  ptr->pos = dataoff;
1001  dataoff += ptr1->len;
1002  if (ptr->haspos)
1003  {
1004  dataoff = SHORTALIGN(dataoff);
1005  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1006  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1007  }
1008 
1009  ptr++;
1010  ptr1++;
1011  i1--;
1012  }
1013  else if (cmp > 0)
1014  { /* in2 first */
1015  ptr->haspos = ptr2->haspos;
1016  ptr->len = ptr2->len;
1017  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1018  ptr->pos = dataoff;
1019  dataoff += ptr2->len;
1020  if (ptr->haspos)
1021  {
1022  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1023 
1024  if (addlen == 0)
1025  ptr->haspos = 0;
1026  else
1027  {
1028  dataoff = SHORTALIGN(dataoff);
1029  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1030  }
1031  }
1032 
1033  ptr++;
1034  ptr2++;
1035  i2--;
1036  }
1037  else
1038  {
1039  ptr->haspos = ptr1->haspos | ptr2->haspos;
1040  ptr->len = ptr1->len;
1041  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1042  ptr->pos = dataoff;
1043  dataoff += ptr1->len;
1044  if (ptr->haspos)
1045  {
1046  if (ptr1->haspos)
1047  {
1048  dataoff = SHORTALIGN(dataoff);
1049  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1050  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1051  if (ptr2->haspos)
1052  dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1053  }
1054  else /* must have ptr2->haspos */
1055  {
1056  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1057 
1058  if (addlen == 0)
1059  ptr->haspos = 0;
1060  else
1061  {
1062  dataoff = SHORTALIGN(dataoff);
1063  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1064  }
1065  }
1066  }
1067 
1068  ptr++;
1069  ptr1++;
1070  ptr2++;
1071  i1--;
1072  i2--;
1073  }
1074  }
1075 
1076  while (i1)
1077  {
1078  ptr->haspos = ptr1->haspos;
1079  ptr->len = ptr1->len;
1080  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1081  ptr->pos = dataoff;
1082  dataoff += ptr1->len;
1083  if (ptr->haspos)
1084  {
1085  dataoff = SHORTALIGN(dataoff);
1086  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1087  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1088  }
1089 
1090  ptr++;
1091  ptr1++;
1092  i1--;
1093  }
1094 
1095  while (i2)
1096  {
1097  ptr->haspos = ptr2->haspos;
1098  ptr->len = ptr2->len;
1099  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1100  ptr->pos = dataoff;
1101  dataoff += ptr2->len;
1102  if (ptr->haspos)
1103  {
1104  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1105 
1106  if (addlen == 0)
1107  ptr->haspos = 0;
1108  else
1109  {
1110  dataoff = SHORTALIGN(dataoff);
1111  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1112  }
1113  }
1114 
1115  ptr++;
1116  ptr2++;
1117  i2--;
1118  }
1119 
1120  /*
1121  * Instead of checking each offset individually, we check for overflow of
1122  * pos fields once at the end.
1123  */
1124  if (dataoff > MAXSTRPOS)
1125  ereport(ERROR,
1126  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1127  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1128 
1129  /*
1130  * Adjust sizes (asserting that we didn't overrun the original estimates)
1131  * and collapse out any unused array entries.
1132  */
1133  output_size = ptr - ARRPTR(out);
1134  Assert(output_size <= out->size);
1135  out->size = output_size;
1136  if (data != STRPTR(out))
1137  memmove(STRPTR(out), data, dataoff);
1138  output_bytes = CALCDATASIZE(out->size, dataoff);
1139  Assert(output_bytes <= VARSIZE(out));
1140  SET_VARSIZE(out, output_bytes);
1141 
1142  PG_FREE_IF_COPY(in1, 0);
1143  PG_FREE_IF_COPY(in2, 1);
1144  PG_RETURN_POINTER(out);
1145 }
1146 
1147 /*
1148  * Compare two strings by tsvector rules.
1149  *
1150  * if prefix = true then it returns zero value iff b has prefix a
1151  */
1152 int32
1153 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1154 {
1155  int cmp;
1156 
1157  if (lena == 0)
1158  {
1159  if (prefix)
1160  cmp = 0; /* empty string is prefix of anything */
1161  else
1162  cmp = (lenb > 0) ? -1 : 0;
1163  }
1164  else if (lenb == 0)
1165  {
1166  cmp = (lena > 0) ? 1 : 0;
1167  }
1168  else
1169  {
1170  cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1171 
1172  if (prefix)
1173  {
1174  if (cmp == 0 && lena > lenb)
1175  cmp = 1; /* a is longer, so not a prefix of b */
1176  }
1177  else if (cmp == 0 && lena != lenb)
1178  {
1179  cmp = (lena < lenb) ? -1 : 1;
1180  }
1181  }
1182 
1183  return cmp;
1184 }
1185 
1186 /*
1187  * Check weight info or/and fill 'data' with the required positions
1188  */
1189 static TSTernaryValue
1192 {
1193  TSTernaryValue result = TS_NO;
1194 
1195  Assert(data == NULL || data->npos == 0);
1196 
1197  if (entry->haspos)
1198  {
1199  WordEntryPosVector *posvec;
1200 
1201  /*
1202  * We can't use the _POSVECPTR macro here because the pointer to the
1203  * tsvector's lexeme storage is already contained in chkval->values.
1204  */
1205  posvec = (WordEntryPosVector *)
1206  (chkval->values + SHORTALIGN(entry->pos + entry->len));
1207 
1208  if (val->weight && data)
1209  {
1210  WordEntryPos *posvec_iter = posvec->pos;
1211  WordEntryPos *dptr;
1212 
1213  /*
1214  * Filter position information by weights
1215  */
1216  dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1217  data->allocated = true;
1218 
1219  /* Is there a position with a matching weight? */
1220  while (posvec_iter < posvec->pos + posvec->npos)
1221  {
1222  /* If true, append this position to the data->pos */
1223  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224  {
1225  *dptr = WEP_GETPOS(*posvec_iter);
1226  dptr++;
1227  }
1228 
1229  posvec_iter++;
1230  }
1231 
1232  data->npos = dptr - data->pos;
1233 
1234  if (data->npos > 0)
1235  result = TS_YES;
1236  else
1237  {
1238  pfree(data->pos);
1239  data->pos = NULL;
1240  data->allocated = false;
1241  }
1242  }
1243  else if (val->weight)
1244  {
1245  WordEntryPos *posvec_iter = posvec->pos;
1246 
1247  /* Is there a position with a matching weight? */
1248  while (posvec_iter < posvec->pos + posvec->npos)
1249  {
1250  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1251  {
1252  result = TS_YES;
1253  break; /* no need to go further */
1254  }
1255 
1256  posvec_iter++;
1257  }
1258  }
1259  else if (data)
1260  {
1261  data->npos = posvec->npos;
1262  data->pos = posvec->pos;
1263  data->allocated = false;
1264  result = TS_YES;
1265  }
1266  else
1267  {
1268  /* simplest case: no weight check, positions not needed */
1269  result = TS_YES;
1270  }
1271  }
1272  else
1273  {
1274  /*
1275  * Position info is lacking, so if the caller requires it, we can only
1276  * say that maybe there is a match.
1277  *
1278  * Notice, however, that we *don't* check val->weight here.
1279  * Historically, stripped tsvectors are considered to match queries
1280  * whether or not the query has a weight restriction; that's a little
1281  * dubious but we'll preserve the behavior.
1282  */
1283  if (data)
1284  result = TS_MAYBE;
1285  else
1286  result = TS_YES;
1287  }
1288 
1289  return result;
1290 }
1291 
1292 /*
1293  * TS_execute callback for matching a tsquery operand to plain tsvector data
1294  */
1295 static TSTernaryValue
1297 {
1298  CHKVAL *chkval = (CHKVAL *) checkval;
1299  WordEntry *StopLow = chkval->arrb;
1300  WordEntry *StopHigh = chkval->arre;
1301  WordEntry *StopMiddle = StopHigh;
1303 
1304  /* Loop invariant: StopLow <= val < StopHigh */
1305  while (StopLow < StopHigh)
1306  {
1307  int difference;
1308 
1309  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1310  difference = tsCompareString(chkval->operand + val->distance,
1311  val->length,
1312  chkval->values + StopMiddle->pos,
1313  StopMiddle->len,
1314  false);
1315 
1316  if (difference == 0)
1317  {
1318  /* Check weight info & fill 'data' with positions */
1319  res = checkclass_str(chkval, StopMiddle, val, data);
1320  break;
1321  }
1322  else if (difference > 0)
1323  StopLow = StopMiddle + 1;
1324  else
1325  StopHigh = StopMiddle;
1326  }
1327 
1328  /*
1329  * If it's a prefix search, we should also consider lexemes that the
1330  * search term is a prefix of (which will necessarily immediately follow
1331  * the place we found in the above loop). But we can skip them if there
1332  * was a definite match on the exact term AND the caller doesn't need
1333  * position info.
1334  */
1335  if (val->prefix && (res != TS_YES || data))
1336  {
1337  WordEntryPos *allpos = NULL;
1338  int npos = 0,
1339  totalpos = 0;
1340 
1341  /* adjust start position for corner case */
1342  if (StopLow >= StopHigh)
1343  StopMiddle = StopHigh;
1344 
1345  /* we don't try to re-use any data from the initial match */
1346  if (data)
1347  {
1348  if (data->allocated)
1349  pfree(data->pos);
1350  data->pos = NULL;
1351  data->allocated = false;
1352  data->npos = 0;
1353  }
1354  res = TS_NO;
1355 
1356  while ((res != TS_YES || data) &&
1357  StopMiddle < chkval->arre &&
1358  tsCompareString(chkval->operand + val->distance,
1359  val->length,
1360  chkval->values + StopMiddle->pos,
1361  StopMiddle->len,
1362  true) == 0)
1363  {
1364  TSTernaryValue subres;
1365 
1366  subres = checkclass_str(chkval, StopMiddle, val, data);
1367 
1368  if (subres != TS_NO)
1369  {
1370  if (data)
1371  {
1372  /*
1373  * We need to join position information
1374  */
1375  if (subres == TS_MAYBE)
1376  {
1377  /*
1378  * No position info for this match, so we must report
1379  * MAYBE overall.
1380  */
1381  res = TS_MAYBE;
1382  /* forget any previous positions */
1383  npos = 0;
1384  /* don't leak storage */
1385  if (allpos)
1386  pfree(allpos);
1387  break;
1388  }
1389 
1390  while (npos + data->npos > totalpos)
1391  {
1392  if (totalpos == 0)
1393  {
1394  totalpos = 256;
1395  allpos = palloc(sizeof(WordEntryPos) * totalpos);
1396  }
1397  else
1398  {
1399  totalpos *= 2;
1400  allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1401  }
1402  }
1403 
1404  memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1405  npos += data->npos;
1406 
1407  /* don't leak storage from individual matches */
1408  if (data->allocated)
1409  pfree(data->pos);
1410  data->pos = NULL;
1411  data->allocated = false;
1412  /* it's important to reset data->npos before next loop */
1413  data->npos = 0;
1414  }
1415  else
1416  {
1417  /* Don't need positions, just handle YES/MAYBE */
1418  if (subres == TS_YES || res == TS_NO)
1419  res = subres;
1420  }
1421  }
1422 
1423  StopMiddle++;
1424  }
1425 
1426  if (data && npos > 0)
1427  {
1428  /* Sort and make unique array of found positions */
1429  data->pos = allpos;
1430  qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1431  data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1433  data->allocated = true;
1434  res = TS_YES;
1435  }
1436  }
1437 
1438  return res;
1439 }
1440 
1441 /*
1442  * Compute output position list for a tsquery operator in phrase mode.
1443  *
1444  * Merge the position lists in Ldata and Rdata as specified by "emit",
1445  * returning the result list into *data. The input position lists must be
1446  * sorted and unique, and the output will be as well.
1447  *
1448  * data: pointer to initially-all-zeroes output struct, or NULL
1449  * Ldata, Rdata: input position lists
1450  * emit: bitmask of TSPO_XXX flags
1451  * Loffset: offset to be added to Ldata positions before comparing/outputting
1452  * Roffset: offset to be added to Rdata positions before comparing/outputting
1453  * max_npos: maximum possible required size of output position array
1454  *
1455  * Loffset and Roffset should not be negative, else we risk trying to output
1456  * negative positions, which won't fit into WordEntryPos.
1457  *
1458  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1459  * we return it as TSTernaryValue.
1460  *
1461  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1462  * returns TS_YES if any positions would have been emitted.
1463  */
1464 #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1465 #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1466 #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1467 
1468 static TSTernaryValue
1470  ExecPhraseData *Ldata,
1471  ExecPhraseData *Rdata,
1472  int emit,
1473  int Loffset,
1474  int Roffset,
1475  int max_npos)
1476 {
1477  int Lindex,
1478  Rindex;
1479 
1480  /* Loop until both inputs are exhausted */
1481  Lindex = Rindex = 0;
1482  while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1483  {
1484  int Lpos,
1485  Rpos;
1486  int output_pos = 0;
1487 
1488  /*
1489  * Fetch current values to compare. WEP_GETPOS() is needed because
1490  * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1491  */
1492  if (Lindex < Ldata->npos)
1493  Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1494  else
1495  {
1496  /* L array exhausted, so we're done if R_ONLY isn't set */
1497  if (!(emit & TSPO_R_ONLY))
1498  break;
1499  Lpos = INT_MAX;
1500  }
1501  if (Rindex < Rdata->npos)
1502  Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1503  else
1504  {
1505  /* R array exhausted, so we're done if L_ONLY isn't set */
1506  if (!(emit & TSPO_L_ONLY))
1507  break;
1508  Rpos = INT_MAX;
1509  }
1510 
1511  /* Merge-join the two input lists */
1512  if (Lpos < Rpos)
1513  {
1514  /* Lpos is not matched in Rdata, should we output it? */
1515  if (emit & TSPO_L_ONLY)
1516  output_pos = Lpos;
1517  Lindex++;
1518  }
1519  else if (Lpos == Rpos)
1520  {
1521  /* Lpos and Rpos match ... should we output it? */
1522  if (emit & TSPO_BOTH)
1523  output_pos = Rpos;
1524  Lindex++;
1525  Rindex++;
1526  }
1527  else /* Lpos > Rpos */
1528  {
1529  /* Rpos is not matched in Ldata, should we output it? */
1530  if (emit & TSPO_R_ONLY)
1531  output_pos = Rpos;
1532  Rindex++;
1533  }
1534 
1535  if (output_pos > 0)
1536  {
1537  if (data)
1538  {
1539  /* Store position, first allocating output array if needed */
1540  if (data->pos == NULL)
1541  {
1542  data->pos = (WordEntryPos *)
1543  palloc(max_npos * sizeof(WordEntryPos));
1544  data->allocated = true;
1545  }
1546  data->pos[data->npos++] = output_pos;
1547  }
1548  else
1549  {
1550  /*
1551  * Exact positions not needed, so return TS_YES as soon as we
1552  * know there is at least one.
1553  */
1554  return TS_YES;
1555  }
1556  }
1557  }
1558 
1559  if (data && data->npos > 0)
1560  {
1561  /* Let's assert we didn't overrun the array */
1562  Assert(data->npos <= max_npos);
1563  return TS_YES;
1564  }
1565  return TS_NO;
1566 }
1567 
1568 /*
1569  * Execute tsquery at or below an OP_PHRASE operator.
1570  *
1571  * This handles tsquery execution at recursion levels where we need to care
1572  * about match locations.
1573  *
1574  * In addition to the same arguments used for TS_execute, the caller may pass
1575  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1576  * match position info on success. data == NULL if no position data need be
1577  * returned. (In practice, outside callers pass NULL, and only the internal
1578  * recursion cases pass a data pointer.)
1579  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1580  * This is OK because an outside call always starts from an OP_PHRASE node.
1581  *
1582  * The detailed semantics of the match data, given that the function returned
1583  * TS_YES (successful match), are:
1584  *
1585  * npos > 0, negate = false:
1586  * query is matched at specified position(s) (and only those positions)
1587  * npos > 0, negate = true:
1588  * query is matched at all positions *except* specified position(s)
1589  * npos = 0, negate = true:
1590  * query is matched at all positions
1591  * npos = 0, negate = false:
1592  * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1593  *
1594  * Successful matches also return a "width" value which is the match width in
1595  * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1596  * and is the sum of the phrase operator distances for phrase matches. Note
1597  * that when width > 0, the listed positions represent the ends of matches not
1598  * the starts. (This unintuitive rule is needed to avoid possibly generating
1599  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1600  *
1601  * If the TSExecuteCallback function reports that an operand is present
1602  * but fails to provide position(s) for it, we will return TS_MAYBE when
1603  * it is possible but not certain that the query is matched.
1604  *
1605  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1606  * negate = false (which is the state initialized by the caller); but the
1607  * "width" output in such cases is undefined.
1608  */
1609 static TSTernaryValue
1610 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1611  TSExecuteCallback chkcond,
1613 {
1614  ExecPhraseData Ldata,
1615  Rdata;
1616  TSTernaryValue lmatch,
1617  rmatch;
1618  int Loffset,
1619  Roffset,
1620  maxwidth;
1621 
1622  /* since this function recurses, it could be driven to stack overflow */
1624 
1625  if (curitem->type == QI_VAL)
1626  return chkcond(arg, (QueryOperand *) curitem, data);
1627 
1628  switch (curitem->qoperator.oper)
1629  {
1630  case OP_NOT:
1631 
1632  /*
1633  * We need not touch data->width, since a NOT operation does not
1634  * change the match width.
1635  */
1636  if (flags & TS_EXEC_SKIP_NOT)
1637  {
1638  /* with SKIP_NOT, report NOT as "match everywhere" */
1639  Assert(data->npos == 0 && !data->negate);
1640  data->negate = true;
1641  return TS_YES;
1642  }
1643  switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1644  {
1645  case TS_NO:
1646  /* change "match nowhere" to "match everywhere" */
1647  Assert(data->npos == 0 && !data->negate);
1648  data->negate = true;
1649  return TS_YES;
1650  case TS_YES:
1651  if (data->npos > 0)
1652  {
1653  /* we have some positions, invert negate flag */
1654  data->negate = !data->negate;
1655  return TS_YES;
1656  }
1657  else if (data->negate)
1658  {
1659  /* change "match everywhere" to "match nowhere" */
1660  data->negate = false;
1661  return TS_NO;
1662  }
1663  /* Should not get here if result was TS_YES */
1664  Assert(false);
1665  break;
1666  case TS_MAYBE:
1667  /* match positions are, and remain, uncertain */
1668  return TS_MAYBE;
1669  }
1670  break;
1671 
1672  case OP_PHRASE:
1673  case OP_AND:
1674  memset(&Ldata, 0, sizeof(Ldata));
1675  memset(&Rdata, 0, sizeof(Rdata));
1676 
1677  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1678  arg, flags, chkcond, &Ldata);
1679  if (lmatch == TS_NO)
1680  return TS_NO;
1681 
1682  rmatch = TS_phrase_execute(curitem + 1,
1683  arg, flags, chkcond, &Rdata);
1684  if (rmatch == TS_NO)
1685  return TS_NO;
1686 
1687  /*
1688  * If either operand has no position information, then we can't
1689  * return reliable position data, only a MAYBE result.
1690  */
1691  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1692  return TS_MAYBE;
1693 
1694  if (curitem->qoperator.oper == OP_PHRASE)
1695  {
1696  /*
1697  * Compute Loffset and Roffset suitable for phrase match, and
1698  * compute overall width of whole phrase match.
1699  */
1700  Loffset = curitem->qoperator.distance + Rdata.width;
1701  Roffset = 0;
1702  if (data)
1703  data->width = curitem->qoperator.distance +
1704  Ldata.width + Rdata.width;
1705  }
1706  else
1707  {
1708  /*
1709  * For OP_AND, set output width and alignment like OP_OR (see
1710  * comment below)
1711  */
1712  maxwidth = Max(Ldata.width, Rdata.width);
1713  Loffset = maxwidth - Ldata.width;
1714  Roffset = maxwidth - Rdata.width;
1715  if (data)
1716  data->width = maxwidth;
1717  }
1718 
1719  if (Ldata.negate && Rdata.negate)
1720  {
1721  /* !L & !R: treat as !(L | R) */
1722  (void) TS_phrase_output(data, &Ldata, &Rdata,
1724  Loffset, Roffset,
1725  Ldata.npos + Rdata.npos);
1726  if (data)
1727  data->negate = true;
1728  return TS_YES;
1729  }
1730  else if (Ldata.negate)
1731  {
1732  /* !L & R */
1733  return TS_phrase_output(data, &Ldata, &Rdata,
1734  TSPO_R_ONLY,
1735  Loffset, Roffset,
1736  Rdata.npos);
1737  }
1738  else if (Rdata.negate)
1739  {
1740  /* L & !R */
1741  return TS_phrase_output(data, &Ldata, &Rdata,
1742  TSPO_L_ONLY,
1743  Loffset, Roffset,
1744  Ldata.npos);
1745  }
1746  else
1747  {
1748  /* straight AND */
1749  return TS_phrase_output(data, &Ldata, &Rdata,
1750  TSPO_BOTH,
1751  Loffset, Roffset,
1752  Min(Ldata.npos, Rdata.npos));
1753  }
1754 
1755  case OP_OR:
1756  memset(&Ldata, 0, sizeof(Ldata));
1757  memset(&Rdata, 0, sizeof(Rdata));
1758 
1759  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1760  arg, flags, chkcond, &Ldata);
1761  rmatch = TS_phrase_execute(curitem + 1,
1762  arg, flags, chkcond, &Rdata);
1763 
1764  if (lmatch == TS_NO && rmatch == TS_NO)
1765  return TS_NO;
1766 
1767  /*
1768  * If either operand has no position information, then we can't
1769  * return reliable position data, only a MAYBE result.
1770  */
1771  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1772  return TS_MAYBE;
1773 
1774  /*
1775  * Cope with undefined output width from failed submatch. (This
1776  * takes less code than trying to ensure that all failure returns
1777  * set data->width to zero.)
1778  */
1779  if (lmatch == TS_NO)
1780  Ldata.width = 0;
1781  if (rmatch == TS_NO)
1782  Rdata.width = 0;
1783 
1784  /*
1785  * For OP_AND and OP_OR, report the width of the wider of the two
1786  * inputs, and align the narrower input's positions to the right
1787  * end of that width. This rule deals at least somewhat
1788  * reasonably with cases like "x <-> (y | z <-> q)".
1789  */
1790  maxwidth = Max(Ldata.width, Rdata.width);
1791  Loffset = maxwidth - Ldata.width;
1792  Roffset = maxwidth - Rdata.width;
1793  data->width = maxwidth;
1794 
1795  if (Ldata.negate && Rdata.negate)
1796  {
1797  /* !L | !R: treat as !(L & R) */
1798  (void) TS_phrase_output(data, &Ldata, &Rdata,
1799  TSPO_BOTH,
1800  Loffset, Roffset,
1801  Min(Ldata.npos, Rdata.npos));
1802  data->negate = true;
1803  return TS_YES;
1804  }
1805  else if (Ldata.negate)
1806  {
1807  /* !L | R: treat as !(L & !R) */
1808  (void) TS_phrase_output(data, &Ldata, &Rdata,
1809  TSPO_L_ONLY,
1810  Loffset, Roffset,
1811  Ldata.npos);
1812  data->negate = true;
1813  return TS_YES;
1814  }
1815  else if (Rdata.negate)
1816  {
1817  /* L | !R: treat as !(!L & R) */
1818  (void) TS_phrase_output(data, &Ldata, &Rdata,
1819  TSPO_R_ONLY,
1820  Loffset, Roffset,
1821  Rdata.npos);
1822  data->negate = true;
1823  return TS_YES;
1824  }
1825  else
1826  {
1827  /* straight OR */
1828  return TS_phrase_output(data, &Ldata, &Rdata,
1830  Loffset, Roffset,
1831  Ldata.npos + Rdata.npos);
1832  }
1833 
1834  default:
1835  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1836  }
1837 
1838  /* not reachable, but keep compiler quiet */
1839  return TS_NO;
1840 }
1841 
1842 
1843 /*
1844  * Evaluate tsquery boolean expression.
1845  *
1846  * curitem: current tsquery item (initially, the first one)
1847  * arg: opaque value to pass through to callback function
1848  * flags: bitmask of flag bits shown in ts_utils.h
1849  * chkcond: callback function to check whether a primitive value is present
1850  */
1851 bool
1852 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1853  TSExecuteCallback chkcond)
1854 {
1855  /*
1856  * If we get TS_MAYBE from the recursion, return true. We could only see
1857  * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1858  * need to check again.
1859  */
1860  return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1861 }
1862 
1863 /*
1864  * Evaluate tsquery boolean expression.
1865  *
1866  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1867  */
1869 TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1870  TSExecuteCallback chkcond)
1871 {
1872  return TS_execute_recurse(curitem, arg, flags, chkcond);
1873 }
1874 
1875 /*
1876  * TS_execute recursion for operators above any phrase operator. Here we do
1877  * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1878  * operator, we pass it off to TS_phrase_execute which does worry.
1879  */
1880 static TSTernaryValue
1881 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1882  TSExecuteCallback chkcond)
1883 {
1884  TSTernaryValue lmatch;
1885 
1886  /* since this function recurses, it could be driven to stack overflow */
1888 
1889  /* ... and let's check for query cancel while we're at it */
1891 
1892  if (curitem->type == QI_VAL)
1893  return chkcond(arg, (QueryOperand *) curitem,
1894  NULL /* don't need position info */ );
1895 
1896  switch (curitem->qoperator.oper)
1897  {
1898  case OP_NOT:
1899  if (flags & TS_EXEC_SKIP_NOT)
1900  return TS_YES;
1901  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1902  {
1903  case TS_NO:
1904  return TS_YES;
1905  case TS_YES:
1906  return TS_NO;
1907  case TS_MAYBE:
1908  return TS_MAYBE;
1909  }
1910  break;
1911 
1912  case OP_AND:
1913  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1914  flags, chkcond);
1915  if (lmatch == TS_NO)
1916  return TS_NO;
1917  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1918  {
1919  case TS_NO:
1920  return TS_NO;
1921  case TS_YES:
1922  return lmatch;
1923  case TS_MAYBE:
1924  return TS_MAYBE;
1925  }
1926  break;
1927 
1928  case OP_OR:
1929  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1930  flags, chkcond);
1931  if (lmatch == TS_YES)
1932  return TS_YES;
1933  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1934  {
1935  case TS_NO:
1936  return lmatch;
1937  case TS_YES:
1938  return TS_YES;
1939  case TS_MAYBE:
1940  return TS_MAYBE;
1941  }
1942  break;
1943 
1944  case OP_PHRASE:
1945 
1946  /*
1947  * If we get a MAYBE result, and the caller doesn't want that,
1948  * convert it to NO. It would be more consistent, perhaps, to
1949  * return the result of TS_phrase_execute() verbatim and then
1950  * convert MAYBE results at the top of the recursion. But
1951  * converting at the topmost phrase operator gives results that
1952  * are bug-compatible with the old implementation, so do it like
1953  * this for now.
1954  */
1955  switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1956  {
1957  case TS_NO:
1958  return TS_NO;
1959  case TS_YES:
1960  return TS_YES;
1961  case TS_MAYBE:
1962  return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1963  }
1964  break;
1965 
1966  default:
1967  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1968  }
1969 
1970  /* not reachable, but keep compiler quiet */
1971  return TS_NO;
1972 }
1973 
1974 /*
1975  * Detect whether a tsquery boolean expression requires any positive matches
1976  * to values shown in the tsquery.
1977  *
1978  * This is needed to know whether a GIN index search requires full index scan.
1979  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1980  * entries for x; but 'x | !y' could match rows containing neither x nor y.
1981  */
1982 bool
1984 {
1985  /* since this function recurses, it could be driven to stack overflow */
1987 
1988  if (curitem->type == QI_VAL)
1989  return true;
1990 
1991  switch (curitem->qoperator.oper)
1992  {
1993  case OP_NOT:
1994 
1995  /*
1996  * Assume there are no required matches underneath a NOT. For
1997  * some cases with nested NOTs, we could prove there's a required
1998  * match, but it seems unlikely to be worth the trouble.
1999  */
2000  return false;
2001 
2002  case OP_PHRASE:
2003 
2004  /*
2005  * Treat OP_PHRASE as OP_AND here
2006  */
2007  case OP_AND:
2008  /* If either side requires a match, we're good */
2009  if (tsquery_requires_match(curitem + curitem->qoperator.left))
2010  return true;
2011  else
2012  return tsquery_requires_match(curitem + 1);
2013 
2014  case OP_OR:
2015  /* Both sides must require a match */
2016  if (tsquery_requires_match(curitem + curitem->qoperator.left))
2017  return tsquery_requires_match(curitem + 1);
2018  else
2019  return false;
2020 
2021  default:
2022  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2023  }
2024 
2025  /* not reachable, but keep compiler quiet */
2026  return false;
2027 }
2028 
2029 /*
2030  * boolean operations
2031  */
2032 Datum
2034 {
2036  PG_GETARG_DATUM(1),
2037  PG_GETARG_DATUM(0)));
2038 }
2039 
2040 Datum
2042 {
2044  TSQuery query = PG_GETARG_TSQUERY(1);
2045  CHKVAL chkval;
2046  bool result;
2047 
2048  /* empty query matches nothing */
2049  if (!query->size)
2050  {
2051  PG_FREE_IF_COPY(val, 0);
2052  PG_FREE_IF_COPY(query, 1);
2053  PG_RETURN_BOOL(false);
2054  }
2055 
2056  chkval.arrb = ARRPTR(val);
2057  chkval.arre = chkval.arrb + val->size;
2058  chkval.values = STRPTR(val);
2059  chkval.operand = GETOPERAND(query);
2060  result = TS_execute(GETQUERY(query),
2061  &chkval,
2062  TS_EXEC_EMPTY,
2064 
2065  PG_FREE_IF_COPY(val, 0);
2066  PG_FREE_IF_COPY(query, 1);
2067  PG_RETURN_BOOL(result);
2068 }
2069 
2070 Datum
2072 {
2073  TSVector vector;
2074  TSQuery query;
2075  bool res;
2076 
2078  PG_GETARG_DATUM(0)));
2080  PG_GETARG_DATUM(1)));
2081 
2083  TSVectorGetDatum(vector),
2084  TSQueryGetDatum(query)));
2085 
2086  pfree(vector);
2087  pfree(query);
2088 
2090 }
2091 
2092 Datum
2094 {
2095  TSVector vector;
2096  TSQuery query = PG_GETARG_TSQUERY(1);
2097  bool res;
2098 
2100  PG_GETARG_DATUM(0)));
2101 
2103  TSVectorGetDatum(vector),
2104  TSQueryGetDatum(query)));
2105 
2106  pfree(vector);
2107  PG_FREE_IF_COPY(query, 1);
2108 
2110 }
2111 
2112 /*
2113  * ts_stat statistic function support
2114  */
2115 
2116 
2117 /*
2118  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2119  * that have a weight equal to one of the weights in 'weight' bitmask.
2120  */
2121 static int
2123 {
2124  int len = POSDATALEN(txt, wptr);
2125  int num = 0;
2126  WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2127 
2128  while (len--)
2129  {
2130  if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2131  num++;
2132  ptr++;
2133  }
2134  return num;
2135 }
2136 
2137 #define compareStatWord(a,e,t) \
2138  tsCompareString((a)->lexeme, (a)->lenlexeme, \
2139  STRPTR(t) + (e)->pos, (e)->len, \
2140  false)
2141 
2142 static void
2144 {
2145  WordEntry *we = ARRPTR(txt) + off;
2146  StatEntry *node = stat->root,
2147  *pnode = NULL;
2148  int n,
2149  res = 0;
2150  uint32 depth = 1;
2151 
2152  if (stat->weight == 0)
2153  n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2154  else
2155  n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2156 
2157  if (n == 0)
2158  return; /* nothing to insert */
2159 
2160  while (node)
2161  {
2162  res = compareStatWord(node, we, txt);
2163 
2164  if (res == 0)
2165  {
2166  break;
2167  }
2168  else
2169  {
2170  pnode = node;
2171  node = (res < 0) ? node->left : node->right;
2172  }
2173  depth++;
2174  }
2175 
2176  if (depth > stat->maxdepth)
2177  stat->maxdepth = depth;
2178 
2179  if (node == NULL)
2180  {
2181  node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2182  node->left = node->right = NULL;
2183  node->ndoc = 1;
2184  node->nentry = n;
2185  node->lenlexeme = we->len;
2186  memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2187 
2188  if (pnode == NULL)
2189  {
2190  stat->root = node;
2191  }
2192  else
2193  {
2194  if (res < 0)
2195  pnode->left = node;
2196  else
2197  pnode->right = node;
2198  }
2199  }
2200  else
2201  {
2202  node->ndoc++;
2203  node->nentry += n;
2204  }
2205 }
2206 
2207 static void
2209  uint32 low, uint32 high, uint32 offset)
2210 {
2211  uint32 pos;
2212  uint32 middle = (low + high) >> 1;
2213 
2214  pos = (low + middle) >> 1;
2215  if (low != middle && pos >= offset && pos - offset < txt->size)
2216  insertStatEntry(persistentContext, stat, txt, pos - offset);
2217  pos = (high + middle + 1) >> 1;
2218  if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2219  insertStatEntry(persistentContext, stat, txt, pos - offset);
2220 
2221  if (low != middle)
2222  chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2223  if (high != middle + 1)
2224  chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2225 }
2226 
2227 /*
2228  * This is written like a custom aggregate function, because the
2229  * original plan was to do just that. Unfortunately, an aggregate function
2230  * can't return a set, so that plan was abandoned. If that limitation is
2231  * lifted in the future, ts_stat could be a real aggregate function so that
2232  * you could use it like this:
2233  *
2234  * SELECT ts_stat(vector_column) FROM vector_table;
2235  *
2236  * where vector_column is a tsvector-type column in vector_table.
2237  */
2238 
2239 static TSVectorStat *
2241 {
2243  uint32 i,
2244  nbit = 0,
2245  offset;
2246 
2247  if (stat == NULL)
2248  { /* Init in first */
2249  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2250  stat->maxdepth = 1;
2251  }
2252 
2253  /* simple check of correctness */
2254  if (txt == NULL || txt->size == 0)
2255  {
2256  if (txt && txt != (TSVector) DatumGetPointer(data))
2257  pfree(txt);
2258  return stat;
2259  }
2260 
2261  i = txt->size - 1;
2262  for (; i > 0; i >>= 1)
2263  nbit++;
2264 
2265  nbit = 1 << nbit;
2266  offset = (nbit - txt->size) / 2;
2267 
2268  insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2269  chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2270 
2271  return stat;
2272 }
2273 
2274 static void
2276  TSVectorStat *stat)
2277 {
2278  TupleDesc tupdesc;
2279  MemoryContext oldcontext;
2280  StatEntry *node;
2281 
2282  funcctx->user_fctx = (void *) stat;
2283 
2284  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2285 
2286  stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2287  stat->stackpos = 0;
2288 
2289  node = stat->root;
2290  /* find leftmost value */
2291  if (node == NULL)
2292  stat->stack[stat->stackpos] = NULL;
2293  else
2294  for (;;)
2295  {
2296  stat->stack[stat->stackpos] = node;
2297  if (node->left)
2298  {
2299  stat->stackpos++;
2300  node = node->left;
2301  }
2302  else
2303  break;
2304  }
2305  Assert(stat->stackpos <= stat->maxdepth);
2306 
2307  tupdesc = CreateTemplateTupleDesc(3);
2308  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2309  TEXTOID, -1, 0);
2310  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2311  INT4OID, -1, 0);
2312  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2313  INT4OID, -1, 0);
2314  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2315  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2316 
2317  MemoryContextSwitchTo(oldcontext);
2318 }
2319 
2320 static StatEntry *
2322 {
2323  StatEntry *node = stat->stack[stat->stackpos];
2324 
2325  if (node == NULL)
2326  return NULL;
2327 
2328  if (node->ndoc != 0)
2329  {
2330  /* return entry itself: we already was at left sublink */
2331  return node;
2332  }
2333  else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2334  {
2335  /* go on right sublink */
2336  stat->stackpos++;
2337  node = node->right;
2338 
2339  /* find most-left value */
2340  for (;;)
2341  {
2342  stat->stack[stat->stackpos] = node;
2343  if (node->left)
2344  {
2345  stat->stackpos++;
2346  node = node->left;
2347  }
2348  else
2349  break;
2350  }
2351  Assert(stat->stackpos <= stat->maxdepth);
2352  }
2353  else
2354  {
2355  /* we already return all left subtree, itself and right subtree */
2356  if (stat->stackpos == 0)
2357  return NULL;
2358 
2359  stat->stackpos--;
2360  return walkStatEntryTree(stat);
2361  }
2362 
2363  return node;
2364 }
2365 
2366 static Datum
2368 {
2369  TSVectorStat *st;
2370  StatEntry *entry;
2371 
2372  st = (TSVectorStat *) funcctx->user_fctx;
2373 
2374  entry = walkStatEntryTree(st);
2375 
2376  if (entry != NULL)
2377  {
2378  Datum result;
2379  char *values[3];
2380  char ndoc[16];
2381  char nentry[16];
2382  HeapTuple tuple;
2383 
2384  values[0] = palloc(entry->lenlexeme + 1);
2385  memcpy(values[0], entry->lexeme, entry->lenlexeme);
2386  (values[0])[entry->lenlexeme] = '\0';
2387  sprintf(ndoc, "%d", entry->ndoc);
2388  values[1] = ndoc;
2389  sprintf(nentry, "%d", entry->nentry);
2390  values[2] = nentry;
2391 
2392  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2393  result = HeapTupleGetDatum(tuple);
2394 
2395  pfree(values[0]);
2396 
2397  /* mark entry as already visited */
2398  entry->ndoc = 0;
2399 
2400  return result;
2401  }
2402 
2403  return (Datum) 0;
2404 }
2405 
2406 static TSVectorStat *
2407 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2408 {
2409  char *query = text_to_cstring(txt);
2410  TSVectorStat *stat;
2411  bool isnull;
2412  Portal portal;
2413  SPIPlanPtr plan;
2414 
2415  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2416  /* internal error */
2417  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2418 
2419  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2420  /* internal error */
2421  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2422 
2423  SPI_cursor_fetch(portal, true, 100);
2424 
2425  if (SPI_tuptable == NULL ||
2426  SPI_tuptable->tupdesc->natts != 1 ||
2428  TSVECTOROID))
2429  ereport(ERROR,
2430  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2431  errmsg("ts_stat query must return one tsvector column")));
2432 
2433  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2434  stat->maxdepth = 1;
2435 
2436  if (ws)
2437  {
2438  char *buf;
2439 
2440  buf = VARDATA_ANY(ws);
2441  while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2442  {
2443  if (pg_mblen(buf) == 1)
2444  {
2445  switch (*buf)
2446  {
2447  case 'A':
2448  case 'a':
2449  stat->weight |= 1 << 3;
2450  break;
2451  case 'B':
2452  case 'b':
2453  stat->weight |= 1 << 2;
2454  break;
2455  case 'C':
2456  case 'c':
2457  stat->weight |= 1 << 1;
2458  break;
2459  case 'D':
2460  case 'd':
2461  stat->weight |= 1;
2462  break;
2463  default:
2464  stat->weight |= 0;
2465  }
2466  }
2467  buf += pg_mblen(buf);
2468  }
2469  }
2470 
2471  while (SPI_processed > 0)
2472  {
2473  uint64 i;
2474 
2475  for (i = 0; i < SPI_processed; i++)
2476  {
2478 
2479  if (!isnull)
2480  stat = ts_accum(persistentContext, stat, data);
2481  }
2482 
2484  SPI_cursor_fetch(portal, true, 100);
2485  }
2486 
2488  SPI_cursor_close(portal);
2489  SPI_freeplan(plan);
2490  pfree(query);
2491 
2492  return stat;
2493 }
2494 
2495 Datum
2497 {
2498  FuncCallContext *funcctx;
2499  Datum result;
2500 
2501  if (SRF_IS_FIRSTCALL())
2502  {
2503  TSVectorStat *stat;
2504  text *txt = PG_GETARG_TEXT_PP(0);
2505 
2506  funcctx = SRF_FIRSTCALL_INIT();
2507  SPI_connect();
2508  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2509  PG_FREE_IF_COPY(txt, 0);
2510  ts_setup_firstcall(fcinfo, funcctx, stat);
2511  SPI_finish();
2512  }
2513 
2514  funcctx = SRF_PERCALL_SETUP();
2515  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2516  SRF_RETURN_NEXT(funcctx, result);
2517  SRF_RETURN_DONE(funcctx);
2518 }
2519 
2520 Datum
2522 {
2523  FuncCallContext *funcctx;
2524  Datum result;
2525 
2526  if (SRF_IS_FIRSTCALL())
2527  {
2528  TSVectorStat *stat;
2529  text *txt = PG_GETARG_TEXT_PP(0);
2530  text *ws = PG_GETARG_TEXT_PP(1);
2531 
2532  funcctx = SRF_FIRSTCALL_INIT();
2533  SPI_connect();
2534  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2535  PG_FREE_IF_COPY(txt, 0);
2536  PG_FREE_IF_COPY(ws, 1);
2537  ts_setup_firstcall(fcinfo, funcctx, stat);
2538  SPI_finish();
2539  }
2540 
2541  funcctx = SRF_PERCALL_SETUP();
2542  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2543  SRF_RETURN_NEXT(funcctx, result);
2544  SRF_RETURN_DONE(funcctx);
2545 }
2546 
2547 
2548 /*
2549  * Triggers for automatic update of a tsvector column from text column(s)
2550  *
2551  * Trigger arguments are either
2552  * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2553  * name of tsvector col, name of regconfig col, name(s) of text col(s)
2554  * ie, tsconfig can either be specified by name, or indirectly as the
2555  * contents of a regconfig field in the row. If the name is used, it must
2556  * be explicitly schema-qualified.
2557  */
2558 Datum
2560 {
2561  return tsvector_update_trigger(fcinfo, false);
2562 }
2563 
2564 Datum
2566 {
2567  return tsvector_update_trigger(fcinfo, true);
2568 }
2569 
2570 static Datum
2572 {
2573  TriggerData *trigdata;
2574  Trigger *trigger;
2575  Relation rel;
2576  HeapTuple rettuple = NULL;
2577  int tsvector_attr_num,
2578  i;
2579  ParsedText prs;
2580  Datum datum;
2581  bool isnull;
2582  text *txt;
2583  Oid cfgId;
2584  bool update_needed;
2585 
2586  /* Check call context */
2587  if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2588  elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2589 
2590  trigdata = (TriggerData *) fcinfo->context;
2591  if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2592  elog(ERROR, "tsvector_update_trigger: must be fired for row");
2593  if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2594  elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2595 
2596  if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2597  {
2598  rettuple = trigdata->tg_trigtuple;
2599  update_needed = true;
2600  }
2601  else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2602  {
2603  rettuple = trigdata->tg_newtuple;
2604  update_needed = false; /* computed below */
2605  }
2606  else
2607  elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2608 
2609  trigger = trigdata->tg_trigger;
2610  rel = trigdata->tg_relation;
2611 
2612  if (trigger->tgnargs < 3)
2613  elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2614 
2615  /* Find the target tsvector column */
2616  tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2617  if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2618  ereport(ERROR,
2619  (errcode(ERRCODE_UNDEFINED_COLUMN),
2620  errmsg("tsvector column \"%s\" does not exist",
2621  trigger->tgargs[0])));
2622  /* This will effectively reject system columns, so no separate test: */
2623  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2624  TSVECTOROID))
2625  ereport(ERROR,
2626  (errcode(ERRCODE_DATATYPE_MISMATCH),
2627  errmsg("column \"%s\" is not of tsvector type",
2628  trigger->tgargs[0])));
2629 
2630  /* Find the configuration to use */
2631  if (config_column)
2632  {
2633  int config_attr_num;
2634 
2635  config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2636  if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2637  ereport(ERROR,
2638  (errcode(ERRCODE_UNDEFINED_COLUMN),
2639  errmsg("configuration column \"%s\" does not exist",
2640  trigger->tgargs[1])));
2641  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2642  REGCONFIGOID))
2643  ereport(ERROR,
2644  (errcode(ERRCODE_DATATYPE_MISMATCH),
2645  errmsg("column \"%s\" is not of regconfig type",
2646  trigger->tgargs[1])));
2647 
2648  datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2649  if (isnull)
2650  ereport(ERROR,
2651  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2652  errmsg("configuration column \"%s\" must not be null",
2653  trigger->tgargs[1])));
2654  cfgId = DatumGetObjectId(datum);
2655  }
2656  else
2657  {
2658  List *names;
2659 
2660  names = stringToQualifiedNameList(trigger->tgargs[1]);
2661  /* require a schema so that results are not search path dependent */
2662  if (list_length(names) < 2)
2663  ereport(ERROR,
2664  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2665  errmsg("text search configuration name \"%s\" must be schema-qualified",
2666  trigger->tgargs[1])));
2667  cfgId = get_ts_config_oid(names, false);
2668  }
2669 
2670  /* initialize parse state */
2671  prs.lenwords = 32;
2672  prs.curwords = 0;
2673  prs.pos = 0;
2674  prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2675 
2676  /* find all words in indexable column(s) */
2677  for (i = 2; i < trigger->tgnargs; i++)
2678  {
2679  int numattr;
2680 
2681  numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2683  ereport(ERROR,
2684  (errcode(ERRCODE_UNDEFINED_COLUMN),
2685  errmsg("column \"%s\" does not exist",
2686  trigger->tgargs[i])));
2687  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2688  ereport(ERROR,
2689  (errcode(ERRCODE_DATATYPE_MISMATCH),
2690  errmsg("column \"%s\" is not of a character type",
2691  trigger->tgargs[i])));
2692 
2694  update_needed = true;
2695 
2696  datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2697  if (isnull)
2698  continue;
2699 
2700  txt = DatumGetTextPP(datum);
2701 
2702  parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2703 
2704  if (txt != (text *) DatumGetPointer(datum))
2705  pfree(txt);
2706  }
2707 
2708  if (update_needed)
2709  {
2710  /* make tsvector value */
2711  datum = TSVectorGetDatum(make_tsvector(&prs));
2712  isnull = false;
2713 
2714  /* and insert it into tuple */
2715  rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2716  1, &tsvector_attr_num,
2717  &datum, &isnull);
2718 
2719  pfree(DatumGetPointer(datum));
2720  }
2721 
2722  return PointerGetDatum(rettuple);
2723 }
#define GETQUERY(x)
Definition: _int.h:157
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:256
ArrayType * construct_array(Datum *elems, int nelems, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3319
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3491
int16 AttrNumber
Definition: attnum.h:21
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int numattr
Definition: bootstrap.c:66
unsigned short uint16
Definition: c.h:440
unsigned int uint32
Definition: c.h:441
signed char int8
Definition: c.h:427
#define Min(x, y)
Definition: c.h:986
signed int int32
Definition: c.h:429
#define Max(x, y)
Definition: c.h:980
#define VARHDRSZ
Definition: c.h:627
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:350
#define SHORTALIGN(LEN)
Definition: c.h:753
#define ARRPTR(x)
Definition: cube.c:25
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:693
int errmsg(const char *fmt,...)
Definition: elog.c:904
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
#define ereport(elevel,...)
Definition: elog.h:143
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2071
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:2135
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2086
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:633
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:631
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:299
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:303
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:305
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:301
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:323
Datum difference(PG_FUNCTION_ARGS)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, int *replCols, Datum *replValues, bool *replIsnull)
Definition: heaptuple.c:1181
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
#define STRPTR(x)
Definition: hstore.h:76
long val
Definition: informix.c:664
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
#define GETOPERAND(x)
Definition: ltree.h:164
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
void pfree(void *pointer)
Definition: mcxt.c:1175
void * palloc0(Size size)
Definition: mcxt.c:1099
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:906
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1188
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:863
void * palloc(Size size)
Definition: mcxt.c:1068
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition: namespace.c:2725
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
bool IsBinaryCoercible(Oid srctype, Oid targettype)
void * arg
const void size_t len
const void * data
static int list_length(const List *l)
Definition: pg_list.h:149
static char * buf
Definition: pg_test_fsync.c:67
#define sprintf
Definition: port.h:227
#define qsort(a, b, c, d)
Definition: port.h:495
void check_stack_depth(void)
Definition: postgres.c:3500
#define DatumGetObjectId(X)
Definition: postgres.h:544
uintptr_t Datum
Definition: postgres.h:411
#define DatumGetBool(X)
Definition: postgres.h:437
#define DatumGetPointer(X)
Definition: postgres.h:593
#define VARDATA(PTR)
Definition: postgres.h:315
#define VARDATA_ANY(PTR)
Definition: postgres.h:361
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:342
#define VARSIZE(PTR)
Definition: postgres.h:316
#define DatumGetChar(X)
Definition: postgres.h:453
#define Int16GetDatum(X)
Definition: postgres.h:495
#define PointerGetDatum(X)
Definition: postgres.h:600
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:354
unsigned int Oid
Definition: postgres_ext.h:31
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:747
List * stringToQualifiedNameList(const char *string)
Definition: regproc.c:1877
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition: spi.c:1173
uint64 SPI_processed
Definition: spi.c:45
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition: spi.c:1306
int SPI_freeplan(SPIPlanPtr plan)
Definition: spi.c:1023
SPITupleTable * SPI_tuptable
Definition: spi.c:46
int SPI_connect(void)
Definition: spi.c:95
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition: spi.c:1804
int SPI_finish(void)
Definition: spi.c:183
void SPI_freetuptable(SPITupleTable *tuptable)
Definition: spi.c:1384
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, Datum *Values, const char *Nulls, bool read_only)
Definition: spi.c:1443
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition: spi.c:858
void SPI_cursor_close(Portal portal)
Definition: spi.c:1860
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition: spi.c:1250
#define SPI_ERROR_NOATTRIBUTE
Definition: spi.h:76
int32 * arrb
Definition: _int_bool.c:226
WordEntry * arre
Definition: tsvector_op.c:39
char * values
Definition: tsvector_op.c:40
char * operand
Definition: ltxtquery_op.c:52
int32 * arre
Definition: _int_bool.c:227
WordEntry * arrb
Definition: tsvector_op.c:38
WordEntryPos * pos
Definition: ts_utils.h:162
void * user_fctx
Definition: funcapi.h:82
uint64 call_cntr
Definition: funcapi.h:65
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
Definition: pg_list.h:51
int32 pos
Definition: ts_utils.h:103
int32 lenwords
Definition: ts_utils.h:101
int32 curwords
Definition: ts_utils.h:102
ParsedWord * words
Definition: ts_utils.h:100
int16 distance
Definition: ts_type.h:181
uint32 left
Definition: ts_type.h:182
TupleDesc rd_att
Definition: rel.h:110
TupleDesc tupdesc
Definition: spi.h:25
HeapTuple * vals
Definition: spi.h:26
uint32 nentry
Definition: tsvector_op.c:49
struct StatEntry * left
Definition: tsvector_op.c:50
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition: tsvector_op.c:53
uint32 lenlexeme
Definition: tsvector_op.c:52
uint32 ndoc
Definition: tsvector_op.c:47
struct StatEntry * right
Definition: tsvector_op.c:51
int32 size
Definition: ts_type.h:206
int32 size
Definition: ts_type.h:93
int32 weight
Definition: tsvector_op.c:60
StatEntry * root
Definition: tsvector_op.c:67
uint32 maxdepth
Definition: tsvector_op.c:62
uint32 stackpos
Definition: tsvector_op.c:65
StatEntry ** stack
Definition: tsvector_op.c:64
Relation tg_relation
Definition: trigger.h:35
const Bitmapset * tg_updatedcols
Definition: trigger.h:43
TriggerEvent tg_event
Definition: trigger.h:34
HeapTuple tg_newtuple
Definition: trigger.h:37
Trigger * tg_trigger
Definition: trigger.h:38
HeapTuple tg_trigtuple
Definition: trigger.h:36
int16 tgnargs
Definition: reltrigger.h:38
char ** tgargs
Definition: reltrigger.h:41
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 pos
Definition: ts_type.h:46
uint32 haspos
Definition: ts_type.h:44
uint32 len
Definition: ts_type.h:45
Definition: c.h:622
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:27
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition: to_tsany.c:269
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:166
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition: to_tsany.c:639
#define TRIGGER_FIRED_BEFORE(event)
Definition: trigger.h:128
#define CALLED_AS_TRIGGER(fcinfo)
Definition: trigger.h:26
#define TRIGGER_FIRED_FOR_ROW(event)
Definition: trigger.h:122
#define TRIGGER_FIRED_BY_INSERT(event)
Definition: trigger.h:110
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition: trigger.h:116
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition: ts_parse.c:354
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:120
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
#define MAXENTRYPOS
Definition: ts_type.h:85
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define TSQueryGetDatum(X)
Definition: ts_type.h:235
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:236
uint16 WordEntryPos
Definition: ts_type.h:63
#define MAXNUMPOS
Definition: ts_type.h:86
TSVectorData * TSVector
Definition: ts_type.h:98
#define DatumGetTSVector(X)
Definition: ts_type.h:117
#define PG_GETARG_TSVECTOR_COPY(n)
Definition: ts_type.h:121
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define DatumGetTSQuery(X)
Definition: ts_type.h:233
#define QI_VAL
Definition: ts_type.h:134
#define LIMITPOS(x)
Definition: ts_type.h:87
#define OP_AND
Definition: ts_type.h:165
#define OP_PHRASE
Definition: ts_type.h:167
#define OP_OR
Definition: ts_type.h:166
#define POSDATAPTR(x, e)
Definition: ts_type.h:111
#define OP_NOT
Definition: ts_type.h:164
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define TSVectorGetDatum(X)
Definition: ts_type.h:119
#define MAXSTRPOS
Definition: ts_type.h:50
#define TS_EXEC_PHRASE_NO_POS
Definition: ts_utils.h:198
TSTernaryValue
Definition: ts_utils.h:129
@ TS_MAYBE
Definition: ts_utils.h:132
@ TS_NO
Definition: ts_utils.h:130
@ TS_YES
Definition: ts_utils.h:131
#define TS_EXEC_EMPTY
Definition: ts_utils.h:184
#define TS_EXEC_SKIP_NOT
Definition: ts_utils.h:191
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:178
static const float weights[]
Definition: tsrank.c:24
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:33
Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:269
#define TSPO_BOTH
Definition: tsvector_op.c:1466
static Datum ts_process_call(FuncCallContext *funcctx)
Definition: tsvector_op.c:2367
static TSTernaryValue checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1296
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1852
Datum ts_match_vq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2041
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2559
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
Definition: tsvector_op.c:361
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:577
#define TSPO_R_ONLY
Definition: tsvector_op.c:1465
Datum array_to_tsvector(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:747
#define STATENTRYHDRSZ
Definition: tsvector_op.c:56
Datum tsvector_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:819
static TSTernaryValue TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
Definition: tsvector_op.c:1469
#define compareEntry(pa, a, pb, b)
Definition: tsvector_op.c:351
Datum tsvector_setweight(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:207
#define TSVECTORCMPFUNC(type, action, ret)
Definition: tsvector_op.c:141
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Definition: tsvector_op.c:2122
Datum tsvector_strip(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:164
struct StatEntry StatEntry
Datum tsvector_length(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:197
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:720
Datum ts_match_tq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2093
Datum ts_stat1(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2496
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1153
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:553
#define TSPO_L_ONLY
Definition: tsvector_op.c:1464
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Definition: tsvector_op.c:2571
Datum ts_match_qv(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2033
static int silly_cmp_tsvector(const TSVector a, const TSVector b)
Definition: tsvector_op.c:82
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
Definition: tsvector_op.c:397
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:1983
Datum tsvector_concat(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:926
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2565
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1881
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1869
static int compare_int(const void *va, const void *vb)
Definition: tsvector_op.c:430
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Definition: tsvector_op.c:2321
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
Definition: tsvector_op.c:2275
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Definition: tsvector_op.c:2208
Datum ts_match_tt(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2071
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
Definition: tsvector_op.c:2240
static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
Definition: tsvector_op.c:1610
static int compare_text_lexemes(const void *va, const void *vb)
Definition: tsvector_op.c:441
static TSTernaryValue checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1190
#define compareStatWord(a, e, t)
Definition: tsvector_op.c:2137
Datum tsvector_unnest(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:632
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
Definition: tsvector_op.c:2407
Datum ts_stat2(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2521
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
Definition: tsvector_op.c:2143
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
Definition: tsvector_op.c:463
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:45
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:583
QueryOperator qoperator
Definition: ts_type.h:194
QueryItemType type
Definition: ts_type.h:193
char * text_to_cstring(const text *t)
Definition: varlena.c:221
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:200
#define stat
Definition: win32_port.h:283