PostgreSQL Source Code  git master
tsvector_op.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  * operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "lib/qunique.h"
25 #include "mb/pg_wchar.h"
26 #include "miscadmin.h"
27 #include "parser/parse_coerce.h"
28 #include "tsearch/ts_utils.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/lsyscache.h"
32 #include "utils/regproc.h"
33 #include "utils/rel.h"
34 
35 
36 typedef struct
37 {
40  char *values;
41  char *operand;
42 } CHKVAL;
43 
44 
45 typedef struct StatEntry
46 {
47  uint32 ndoc; /* zero indicates that we were already here
48  * while walking through the tree */
50  struct StatEntry *left;
51  struct StatEntry *right;
54 } StatEntry;
55 
56 #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 
58 typedef struct
59 {
61 
63 
66 
68 } TSVectorStat;
69 
70 /* TS_execute requires ternary logic to handle NOT with phrase matches */
71 typedef enum
72 {
73  TS_NO, /* definitely no match */
74  TS_YES, /* definitely does match */
75  TS_MAYBE /* can't verify match for lack of pos data */
77 
78 
79 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
80  uint32 flags,
81  TSExecuteCallback chkcond);
82 static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
83 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
84 
85 
86 /*
87  * Order: haspos, len, word, for all positions (pos, weight)
88  */
89 static int
91 {
92  if (VARSIZE(a) < VARSIZE(b))
93  return -1;
94  else if (VARSIZE(a) > VARSIZE(b))
95  return 1;
96  else if (a->size < b->size)
97  return -1;
98  else if (a->size > b->size)
99  return 1;
100  else
101  {
102  WordEntry *aptr = ARRPTR(a);
103  WordEntry *bptr = ARRPTR(b);
104  int i = 0;
105  int res;
106 
107 
108  for (i = 0; i < a->size; i++)
109  {
110  if (aptr->haspos != bptr->haspos)
111  {
112  return (aptr->haspos > bptr->haspos) ? -1 : 1;
113  }
114  else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
115  {
116  return res;
117  }
118  else if (aptr->haspos)
119  {
120  WordEntryPos *ap = POSDATAPTR(a, aptr);
121  WordEntryPos *bp = POSDATAPTR(b, bptr);
122  int j;
123 
124  if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
125  return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
126 
127  for (j = 0; j < POSDATALEN(a, aptr); j++)
128  {
129  if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
130  {
131  return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
132  }
133  else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
134  {
135  return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
136  }
137  ap++, bp++;
138  }
139  }
140 
141  aptr++;
142  bptr++;
143  }
144  }
145 
146  return 0;
147 }
148 
149 #define TSVECTORCMPFUNC( type, action, ret ) \
150 Datum \
151 tsvector_##type(PG_FUNCTION_ARGS) \
152 { \
153  TSVector a = PG_GETARG_TSVECTOR(0); \
154  TSVector b = PG_GETARG_TSVECTOR(1); \
155  int res = silly_cmp_tsvector(a, b); \
156  PG_FREE_IF_COPY(a,0); \
157  PG_FREE_IF_COPY(b,1); \
158  PG_RETURN_##ret( res action 0 ); \
159 } \
160 /* keep compiler quiet - no extra ; */ \
161 extern int no_such_variable
162 
163 TSVECTORCMPFUNC(lt, <, BOOL);
164 TSVECTORCMPFUNC(le, <=, BOOL);
165 TSVECTORCMPFUNC(eq, ==, BOOL);
166 TSVECTORCMPFUNC(ge, >=, BOOL);
167 TSVECTORCMPFUNC(gt, >, BOOL);
168 TSVECTORCMPFUNC(ne, !=, BOOL);
169 TSVECTORCMPFUNC(cmp, +, INT32);
170 
171 Datum
173 {
175  TSVector out;
176  int i,
177  len = 0;
178  WordEntry *arrin = ARRPTR(in),
179  *arrout;
180  char *cur;
181 
182  for (i = 0; i < in->size; i++)
183  len += arrin[i].len;
184 
185  len = CALCDATASIZE(in->size, len);
186  out = (TSVector) palloc0(len);
187  SET_VARSIZE(out, len);
188  out->size = in->size;
189  arrout = ARRPTR(out);
190  cur = STRPTR(out);
191  for (i = 0; i < in->size; i++)
192  {
193  memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
194  arrout[i].haspos = 0;
195  arrout[i].len = arrin[i].len;
196  arrout[i].pos = cur - STRPTR(out);
197  cur += arrout[i].len;
198  }
199 
200  PG_FREE_IF_COPY(in, 0);
201  PG_RETURN_POINTER(out);
202 }
203 
204 Datum
206 {
208  int32 ret = in->size;
209 
210  PG_FREE_IF_COPY(in, 0);
211  PG_RETURN_INT32(ret);
212 }
213 
214 Datum
216 {
218  char cw = PG_GETARG_CHAR(1);
219  TSVector out;
220  int i,
221  j;
222  WordEntry *entry;
223  WordEntryPos *p;
224  int w = 0;
225 
226  switch (cw)
227  {
228  case 'A':
229  case 'a':
230  w = 3;
231  break;
232  case 'B':
233  case 'b':
234  w = 2;
235  break;
236  case 'C':
237  case 'c':
238  w = 1;
239  break;
240  case 'D':
241  case 'd':
242  w = 0;
243  break;
244  default:
245  /* internal error */
246  elog(ERROR, "unrecognized weight: %d", cw);
247  }
248 
249  out = (TSVector) palloc(VARSIZE(in));
250  memcpy(out, in, VARSIZE(in));
251  entry = ARRPTR(out);
252  i = out->size;
253  while (i--)
254  {
255  if ((j = POSDATALEN(out, entry)) != 0)
256  {
257  p = POSDATAPTR(out, entry);
258  while (j--)
259  {
260  WEP_SETWEIGHT(*p, w);
261  p++;
262  }
263  }
264  entry++;
265  }
266 
267  PG_FREE_IF_COPY(in, 0);
268  PG_RETURN_POINTER(out);
269 }
270 
271 /*
272  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
273  *
274  * Assign weight w to elements of tsin that are listed in lexemes.
275  */
276 Datum
278 {
279  TSVector tsin = PG_GETARG_TSVECTOR(0);
280  char char_weight = PG_GETARG_CHAR(1);
281  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
282 
283  TSVector tsout;
284  int i,
285  j,
286  nlexemes,
287  weight;
288  WordEntry *entry;
289  Datum *dlexemes;
290  bool *nulls;
291 
292  switch (char_weight)
293  {
294  case 'A':
295  case 'a':
296  weight = 3;
297  break;
298  case 'B':
299  case 'b':
300  weight = 2;
301  break;
302  case 'C':
303  case 'c':
304  weight = 1;
305  break;
306  case 'D':
307  case 'd':
308  weight = 0;
309  break;
310  default:
311  /* internal error */
312  elog(ERROR, "unrecognized weight: %c", char_weight);
313  }
314 
315  tsout = (TSVector) palloc(VARSIZE(tsin));
316  memcpy(tsout, tsin, VARSIZE(tsin));
317  entry = ARRPTR(tsout);
318 
319  deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
320  &dlexemes, &nulls, &nlexemes);
321 
322  /*
323  * Assuming that lexemes array is significantly shorter than tsvector we
324  * can iterate through lexemes performing binary search of each lexeme
325  * from lexemes in tsvector.
326  */
327  for (i = 0; i < nlexemes; i++)
328  {
329  char *lex;
330  int lex_len,
331  lex_pos;
332 
333  if (nulls[i])
334  ereport(ERROR,
335  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
336  errmsg("lexeme array may not contain nulls")));
337 
338  lex = VARDATA(dlexemes[i]);
339  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
340  lex_pos = tsvector_bsearch(tsout, lex, lex_len);
341 
342  if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
343  {
344  WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
345 
346  while (j--)
347  {
348  WEP_SETWEIGHT(*p, weight);
349  p++;
350  }
351  }
352  }
353 
354  PG_FREE_IF_COPY(tsin, 0);
355  PG_FREE_IF_COPY(lexemes, 2);
356 
357  PG_RETURN_POINTER(tsout);
358 }
359 
360 #define compareEntry(pa, a, pb, b) \
361  tsCompareString((pa) + (a)->pos, (a)->len, \
362  (pb) + (b)->pos, (b)->len, \
363  false)
364 
365 /*
366  * Add positions from src to dest after offsetting them by maxpos.
367  * Return the number added (might be less than expected due to overflow)
368  */
369 static int32
371  TSVector dest, WordEntry *destptr,
372  int32 maxpos)
373 {
374  uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
375  int i;
376  uint16 slen = POSDATALEN(src, srcptr),
377  startlen;
378  WordEntryPos *spos = POSDATAPTR(src, srcptr),
379  *dpos = POSDATAPTR(dest, destptr);
380 
381  if (!destptr->haspos)
382  *clen = 0;
383 
384  startlen = *clen;
385  for (i = 0;
386  i < slen && *clen < MAXNUMPOS &&
387  (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
388  i++)
389  {
390  WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
391  WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
392  (*clen)++;
393  }
394 
395  if (*clen != startlen)
396  destptr->haspos = 1;
397  return *clen - startlen;
398 }
399 
400 /*
401  * Perform binary search of given lexeme in TSVector.
402  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
403  * found.
404  */
405 static int
406 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
407 {
408  WordEntry *arrin = ARRPTR(tsv);
409  int StopLow = 0,
410  StopHigh = tsv->size,
411  StopMiddle,
412  cmp;
413 
414  while (StopLow < StopHigh)
415  {
416  StopMiddle = (StopLow + StopHigh) / 2;
417 
418  cmp = tsCompareString(lexeme, lexeme_len,
419  STRPTR(tsv) + arrin[StopMiddle].pos,
420  arrin[StopMiddle].len,
421  false);
422 
423  if (cmp < 0)
424  StopHigh = StopMiddle;
425  else if (cmp > 0)
426  StopLow = StopMiddle + 1;
427  else /* found it */
428  return StopMiddle;
429  }
430 
431  return -1;
432 }
433 
434 /*
435  * qsort comparator functions
436  */
437 
438 static int
439 compare_int(const void *va, const void *vb)
440 {
441  int a = *((const int *) va);
442  int b = *((const int *) vb);
443 
444  if (a == b)
445  return 0;
446  return (a > b) ? 1 : -1;
447 }
448 
449 static int
450 compare_text_lexemes(const void *va, const void *vb)
451 {
452  Datum a = *((const Datum *) va);
453  Datum b = *((const Datum *) vb);
454  char *alex = VARDATA_ANY(a);
455  int alex_len = VARSIZE_ANY_EXHDR(a);
456  char *blex = VARDATA_ANY(b);
457  int blex_len = VARSIZE_ANY_EXHDR(b);
458 
459  return tsCompareString(alex, alex_len, blex, blex_len, false);
460 }
461 
462 /*
463  * Internal routine to delete lexemes from TSVector by array of offsets.
464  *
465  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
466  * int indices_count -- size of that array
467  *
468  * Returns new TSVector without given lexemes along with their positions
469  * and weights.
470  */
471 static TSVector
472 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
473  int indices_count)
474 {
475  TSVector tsout;
476  WordEntry *arrin = ARRPTR(tsv),
477  *arrout;
478  char *data = STRPTR(tsv),
479  *dataout;
480  int i, /* index in arrin */
481  j, /* index in arrout */
482  k, /* index in indices_to_delete */
483  curoff; /* index in dataout area */
484 
485  /*
486  * Sort the filter array to simplify membership checks below. Also, get
487  * rid of any duplicate entries, so that we can assume that indices_count
488  * is exactly equal to the number of lexemes that will be removed.
489  */
490  if (indices_count > 1)
491  {
492  qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
493  indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
494  compare_int);
495  }
496 
497  /*
498  * Here we overestimate tsout size, since we don't know how much space is
499  * used by the deleted lexeme(s). We will set exact size below.
500  */
501  tsout = (TSVector) palloc0(VARSIZE(tsv));
502 
503  /* This count must be correct because STRPTR(tsout) relies on it. */
504  tsout->size = tsv->size - indices_count;
505 
506  /*
507  * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
508  */
509  arrout = ARRPTR(tsout);
510  dataout = STRPTR(tsout);
511  curoff = 0;
512  for (i = j = k = 0; i < tsv->size; i++)
513  {
514  /*
515  * If current i is present in indices_to_delete, skip this lexeme.
516  * Since indices_to_delete is already sorted, we only need to check
517  * the current (k'th) entry.
518  */
519  if (k < indices_count && i == indices_to_delete[k])
520  {
521  k++;
522  continue;
523  }
524 
525  /* Copy lexeme and its positions and weights */
526  memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
527  arrout[j].haspos = arrin[i].haspos;
528  arrout[j].len = arrin[i].len;
529  arrout[j].pos = curoff;
530  curoff += arrin[i].len;
531  if (arrin[i].haspos)
532  {
533  int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
534  + sizeof(uint16);
535 
536  curoff = SHORTALIGN(curoff);
537  memcpy(dataout + curoff,
538  STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
539  len);
540  curoff += len;
541  }
542 
543  j++;
544  }
545 
546  /*
547  * k should now be exactly equal to indices_count. If it isn't then the
548  * caller provided us with indices outside of [0, tsv->size) range and
549  * estimation of tsout's size is wrong.
550  */
551  Assert(k == indices_count);
552 
553  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
554  return tsout;
555 }
556 
557 /*
558  * Delete given lexeme from tsvector.
559  * Implementation of user-level ts_delete(tsvector, text).
560  */
561 Datum
563 {
564  TSVector tsin = PG_GETARG_TSVECTOR(0),
565  tsout;
566  text *tlexeme = PG_GETARG_TEXT_PP(1);
567  char *lexeme = VARDATA_ANY(tlexeme);
568  int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
569  skip_index;
570 
571  if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
572  PG_RETURN_POINTER(tsin);
573 
574  tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
575 
576  PG_FREE_IF_COPY(tsin, 0);
577  PG_FREE_IF_COPY(tlexeme, 1);
578  PG_RETURN_POINTER(tsout);
579 }
580 
581 /*
582  * Delete given array of lexemes from tsvector.
583  * Implementation of user-level ts_delete(tsvector, text[]).
584  */
585 Datum
587 {
588  TSVector tsin = PG_GETARG_TSVECTOR(0),
589  tsout;
590  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
591  int i,
592  nlex,
593  skip_count,
594  *skip_indices;
595  Datum *dlexemes;
596  bool *nulls;
597 
598  deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
599  &dlexemes, &nulls, &nlex);
600 
601  /*
602  * In typical use case array of lexemes to delete is relatively small. So
603  * here we optimize things for that scenario: iterate through lexarr
604  * performing binary search of each lexeme from lexarr in tsvector.
605  */
606  skip_indices = palloc0(nlex * sizeof(int));
607  for (i = skip_count = 0; i < nlex; i++)
608  {
609  char *lex;
610  int lex_len,
611  lex_pos;
612 
613  if (nulls[i])
614  ereport(ERROR,
615  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
616  errmsg("lexeme array may not contain nulls")));
617 
618  lex = VARDATA(dlexemes[i]);
619  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
620  lex_pos = tsvector_bsearch(tsin, lex, lex_len);
621 
622  if (lex_pos >= 0)
623  skip_indices[skip_count++] = lex_pos;
624  }
625 
626  tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
627 
628  pfree(skip_indices);
629  PG_FREE_IF_COPY(tsin, 0);
630  PG_FREE_IF_COPY(lexemes, 1);
631 
632  PG_RETURN_POINTER(tsout);
633 }
634 
635 /*
636  * Expand tsvector as table with following columns:
637  * lexeme: lexeme text
638  * positions: integer array of lexeme positions
639  * weights: char array of weights corresponding to positions
640  */
641 Datum
643 {
644  FuncCallContext *funcctx;
645  TSVector tsin;
646 
647  if (SRF_IS_FIRSTCALL())
648  {
649  MemoryContext oldcontext;
650  TupleDesc tupdesc;
651 
652  funcctx = SRF_FIRSTCALL_INIT();
653  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
654 
655  tupdesc = CreateTemplateTupleDesc(3);
656  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
657  TEXTOID, -1, 0);
658  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
659  INT2ARRAYOID, -1, 0);
660  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
661  TEXTARRAYOID, -1, 0);
662  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
663 
664  funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
665 
666  MemoryContextSwitchTo(oldcontext);
667  }
668 
669  funcctx = SRF_PERCALL_SETUP();
670  tsin = (TSVector) funcctx->user_fctx;
671 
672  if (funcctx->call_cntr < tsin->size)
673  {
674  WordEntry *arrin = ARRPTR(tsin);
675  char *data = STRPTR(tsin);
676  HeapTuple tuple;
677  int j,
678  i = funcctx->call_cntr;
679  bool nulls[] = {false, false, false};
680  Datum values[3];
681 
682  values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
683 
684  if (arrin[i].haspos)
685  {
686  WordEntryPosVector *posv;
687  Datum *positions;
688  Datum *weights;
689  char weight;
690 
691  /*
692  * Internally tsvector stores position and weight in the same
693  * uint16 (2 bits for weight, 14 for position). Here we extract
694  * that in two separate arrays.
695  */
696  posv = _POSVECPTR(tsin, arrin + i);
697  positions = palloc(posv->npos * sizeof(Datum));
698  weights = palloc(posv->npos * sizeof(Datum));
699  for (j = 0; j < posv->npos; j++)
700  {
701  positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
702  weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
703  weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
704  1));
705  }
706 
707  values[1] = PointerGetDatum(construct_array(positions, posv->npos,
708  INT2OID, 2, true, TYPALIGN_SHORT));
709  values[2] = PointerGetDatum(construct_array(weights, posv->npos,
710  TEXTOID, -1, false, TYPALIGN_INT));
711  }
712  else
713  {
714  nulls[1] = nulls[2] = true;
715  }
716 
717  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
718  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
719  }
720  else
721  {
722  SRF_RETURN_DONE(funcctx);
723  }
724 }
725 
726 /*
727  * Convert tsvector to array of lexemes.
728  */
729 Datum
731 {
732  TSVector tsin = PG_GETARG_TSVECTOR(0);
733  WordEntry *arrin = ARRPTR(tsin);
734  Datum *elements;
735  int i;
736  ArrayType *array;
737 
738  elements = palloc(tsin->size * sizeof(Datum));
739 
740  for (i = 0; i < tsin->size; i++)
741  {
742  elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
743  arrin[i].len));
744  }
745 
746  array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
747 
748  pfree(elements);
749  PG_FREE_IF_COPY(tsin, 0);
750  PG_RETURN_POINTER(array);
751 }
752 
753 /*
754  * Build tsvector from array of lexemes.
755  */
756 Datum
758 {
760  TSVector tsout;
761  Datum *dlexemes;
762  WordEntry *arrout;
763  bool *nulls;
764  int nitems,
765  i,
766  tslen,
767  datalen = 0;
768  char *cur;
769 
770  deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
771 
772  /* Reject nulls (maybe we should just ignore them, instead?) */
773  for (i = 0; i < nitems; i++)
774  {
775  if (nulls[i])
776  ereport(ERROR,
777  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
778  errmsg("lexeme array may not contain nulls")));
779  }
780 
781  /* Sort and de-dup, because this is required for a valid tsvector. */
782  if (nitems > 1)
783  {
784  qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
785  nitems = qunique(dlexemes, nitems, sizeof(Datum),
787  }
788 
789  /* Calculate space needed for surviving lexemes. */
790  for (i = 0; i < nitems; i++)
791  datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
792  tslen = CALCDATASIZE(nitems, datalen);
793 
794  /* Allocate and fill tsvector. */
795  tsout = (TSVector) palloc0(tslen);
796  SET_VARSIZE(tsout, tslen);
797  tsout->size = nitems;
798 
799  arrout = ARRPTR(tsout);
800  cur = STRPTR(tsout);
801  for (i = 0; i < nitems; i++)
802  {
803  char *lex = VARDATA(dlexemes[i]);
804  int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
805 
806  memcpy(cur, lex, lex_len);
807  arrout[i].haspos = 0;
808  arrout[i].len = lex_len;
809  arrout[i].pos = cur - STRPTR(tsout);
810  cur += lex_len;
811  }
812 
813  PG_FREE_IF_COPY(v, 0);
814  PG_RETURN_POINTER(tsout);
815 }
816 
817 /*
818  * ts_filter(): keep only lexemes with given weights in tsvector.
819  */
820 Datum
822 {
823  TSVector tsin = PG_GETARG_TSVECTOR(0),
824  tsout;
826  WordEntry *arrin = ARRPTR(tsin),
827  *arrout;
828  char *datain = STRPTR(tsin),
829  *dataout;
830  Datum *dweights;
831  bool *nulls;
832  int nweights;
833  int i,
834  j;
835  int cur_pos = 0;
836  char mask = 0;
837 
838  deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
839  &dweights, &nulls, &nweights);
840 
841  for (i = 0; i < nweights; i++)
842  {
843  char char_weight;
844 
845  if (nulls[i])
846  ereport(ERROR,
847  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
848  errmsg("weight array may not contain nulls")));
849 
850  char_weight = DatumGetChar(dweights[i]);
851  switch (char_weight)
852  {
853  case 'A':
854  case 'a':
855  mask = mask | 8;
856  break;
857  case 'B':
858  case 'b':
859  mask = mask | 4;
860  break;
861  case 'C':
862  case 'c':
863  mask = mask | 2;
864  break;
865  case 'D':
866  case 'd':
867  mask = mask | 1;
868  break;
869  default:
870  ereport(ERROR,
871  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
872  errmsg("unrecognized weight: \"%c\"", char_weight)));
873  }
874  }
875 
876  tsout = (TSVector) palloc0(VARSIZE(tsin));
877  tsout->size = tsin->size;
878  arrout = ARRPTR(tsout);
879  dataout = STRPTR(tsout);
880 
881  for (i = j = 0; i < tsin->size; i++)
882  {
883  WordEntryPosVector *posvin,
884  *posvout;
885  int npos = 0;
886  int k;
887 
888  if (!arrin[i].haspos)
889  continue;
890 
891  posvin = _POSVECPTR(tsin, arrin + i);
892  posvout = (WordEntryPosVector *)
893  (dataout + SHORTALIGN(cur_pos + arrin[i].len));
894 
895  for (k = 0; k < posvin->npos; k++)
896  {
897  if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
898  posvout->pos[npos++] = posvin->pos[k];
899  }
900 
901  /* if no satisfactory positions found, skip lexeme */
902  if (!npos)
903  continue;
904 
905  arrout[j].haspos = true;
906  arrout[j].len = arrin[i].len;
907  arrout[j].pos = cur_pos;
908 
909  memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
910  posvout->npos = npos;
911  cur_pos += SHORTALIGN(arrin[i].len);
912  cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
913  sizeof(uint16);
914  j++;
915  }
916 
917  tsout->size = j;
918  if (dataout != STRPTR(tsout))
919  memmove(STRPTR(tsout), dataout, cur_pos);
920 
921  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
922 
923  PG_FREE_IF_COPY(tsin, 0);
924  PG_RETURN_POINTER(tsout);
925 }
926 
927 Datum
929 {
930  TSVector in1 = PG_GETARG_TSVECTOR(0);
931  TSVector in2 = PG_GETARG_TSVECTOR(1);
932  TSVector out;
933  WordEntry *ptr;
934  WordEntry *ptr1,
935  *ptr2;
936  WordEntryPos *p;
937  int maxpos = 0,
938  i,
939  j,
940  i1,
941  i2,
942  dataoff,
943  output_bytes,
944  output_size;
945  char *data,
946  *data1,
947  *data2;
948 
949  /* Get max position in in1; we'll need this to offset in2's positions */
950  ptr = ARRPTR(in1);
951  i = in1->size;
952  while (i--)
953  {
954  if ((j = POSDATALEN(in1, ptr)) != 0)
955  {
956  p = POSDATAPTR(in1, ptr);
957  while (j--)
958  {
959  if (WEP_GETPOS(*p) > maxpos)
960  maxpos = WEP_GETPOS(*p);
961  p++;
962  }
963  }
964  ptr++;
965  }
966 
967  ptr1 = ARRPTR(in1);
968  ptr2 = ARRPTR(in2);
969  data1 = STRPTR(in1);
970  data2 = STRPTR(in2);
971  i1 = in1->size;
972  i2 = in2->size;
973 
974  /*
975  * Conservative estimate of space needed. We might need all the data in
976  * both inputs, and conceivably add a pad byte before position data for
977  * each item where there was none before.
978  */
979  output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
980 
981  out = (TSVector) palloc0(output_bytes);
982  SET_VARSIZE(out, output_bytes);
983 
984  /*
985  * We must make out->size valid so that STRPTR(out) is sensible. We'll
986  * collapse out any unused space at the end.
987  */
988  out->size = in1->size + in2->size;
989 
990  ptr = ARRPTR(out);
991  data = STRPTR(out);
992  dataoff = 0;
993  while (i1 && i2)
994  {
995  int cmp = compareEntry(data1, ptr1, data2, ptr2);
996 
997  if (cmp < 0)
998  { /* in1 first */
999  ptr->haspos = ptr1->haspos;
1000  ptr->len = ptr1->len;
1001  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1002  ptr->pos = dataoff;
1003  dataoff += ptr1->len;
1004  if (ptr->haspos)
1005  {
1006  dataoff = SHORTALIGN(dataoff);
1007  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1008  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1009  }
1010 
1011  ptr++;
1012  ptr1++;
1013  i1--;
1014  }
1015  else if (cmp > 0)
1016  { /* in2 first */
1017  ptr->haspos = ptr2->haspos;
1018  ptr->len = ptr2->len;
1019  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1020  ptr->pos = dataoff;
1021  dataoff += ptr2->len;
1022  if (ptr->haspos)
1023  {
1024  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1025 
1026  if (addlen == 0)
1027  ptr->haspos = 0;
1028  else
1029  {
1030  dataoff = SHORTALIGN(dataoff);
1031  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1032  }
1033  }
1034 
1035  ptr++;
1036  ptr2++;
1037  i2--;
1038  }
1039  else
1040  {
1041  ptr->haspos = ptr1->haspos | ptr2->haspos;
1042  ptr->len = ptr1->len;
1043  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1044  ptr->pos = dataoff;
1045  dataoff += ptr1->len;
1046  if (ptr->haspos)
1047  {
1048  if (ptr1->haspos)
1049  {
1050  dataoff = SHORTALIGN(dataoff);
1051  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1052  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1053  if (ptr2->haspos)
1054  dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1055  }
1056  else /* must have ptr2->haspos */
1057  {
1058  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1059 
1060  if (addlen == 0)
1061  ptr->haspos = 0;
1062  else
1063  {
1064  dataoff = SHORTALIGN(dataoff);
1065  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1066  }
1067  }
1068  }
1069 
1070  ptr++;
1071  ptr1++;
1072  ptr2++;
1073  i1--;
1074  i2--;
1075  }
1076  }
1077 
1078  while (i1)
1079  {
1080  ptr->haspos = ptr1->haspos;
1081  ptr->len = ptr1->len;
1082  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1083  ptr->pos = dataoff;
1084  dataoff += ptr1->len;
1085  if (ptr->haspos)
1086  {
1087  dataoff = SHORTALIGN(dataoff);
1088  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1089  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1090  }
1091 
1092  ptr++;
1093  ptr1++;
1094  i1--;
1095  }
1096 
1097  while (i2)
1098  {
1099  ptr->haspos = ptr2->haspos;
1100  ptr->len = ptr2->len;
1101  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1102  ptr->pos = dataoff;
1103  dataoff += ptr2->len;
1104  if (ptr->haspos)
1105  {
1106  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1107 
1108  if (addlen == 0)
1109  ptr->haspos = 0;
1110  else
1111  {
1112  dataoff = SHORTALIGN(dataoff);
1113  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1114  }
1115  }
1116 
1117  ptr++;
1118  ptr2++;
1119  i2--;
1120  }
1121 
1122  /*
1123  * Instead of checking each offset individually, we check for overflow of
1124  * pos fields once at the end.
1125  */
1126  if (dataoff > MAXSTRPOS)
1127  ereport(ERROR,
1128  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1129  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1130 
1131  /*
1132  * Adjust sizes (asserting that we didn't overrun the original estimates)
1133  * and collapse out any unused array entries.
1134  */
1135  output_size = ptr - ARRPTR(out);
1136  Assert(output_size <= out->size);
1137  out->size = output_size;
1138  if (data != STRPTR(out))
1139  memmove(STRPTR(out), data, dataoff);
1140  output_bytes = CALCDATASIZE(out->size, dataoff);
1141  Assert(output_bytes <= VARSIZE(out));
1142  SET_VARSIZE(out, output_bytes);
1143 
1144  PG_FREE_IF_COPY(in1, 0);
1145  PG_FREE_IF_COPY(in2, 1);
1146  PG_RETURN_POINTER(out);
1147 }
1148 
1149 /*
1150  * Compare two strings by tsvector rules.
1151  *
1152  * if prefix = true then it returns zero value iff b has prefix a
1153  */
1154 int32
1155 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1156 {
1157  int cmp;
1158 
1159  if (lena == 0)
1160  {
1161  if (prefix)
1162  cmp = 0; /* empty string is prefix of anything */
1163  else
1164  cmp = (lenb > 0) ? -1 : 0;
1165  }
1166  else if (lenb == 0)
1167  {
1168  cmp = (lena > 0) ? 1 : 0;
1169  }
1170  else
1171  {
1172  cmp = memcmp(a, b, Min(lena, lenb));
1173 
1174  if (prefix)
1175  {
1176  if (cmp == 0 && lena > lenb)
1177  cmp = 1; /* a is longer, so not a prefix of b */
1178  }
1179  else if (cmp == 0 && lena != lenb)
1180  {
1181  cmp = (lena < lenb) ? -1 : 1;
1182  }
1183  }
1184 
1185  return cmp;
1186 }
1187 
1188 /*
1189  * Check weight info or/and fill 'data' with the required positions
1190  */
1191 static bool
1193  ExecPhraseData *data)
1194 {
1195  bool result = false;
1196 
1197  if (entry->haspos && (val->weight || data))
1198  {
1199  WordEntryPosVector *posvec;
1200 
1201  /*
1202  * We can't use the _POSVECPTR macro here because the pointer to the
1203  * tsvector's lexeme storage is already contained in chkval->values.
1204  */
1205  posvec = (WordEntryPosVector *)
1206  (chkval->values + SHORTALIGN(entry->pos + entry->len));
1207 
1208  if (val->weight && data)
1209  {
1210  WordEntryPos *posvec_iter = posvec->pos;
1211  WordEntryPos *dptr;
1212 
1213  /*
1214  * Filter position information by weights
1215  */
1216  dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1217  data->allocated = true;
1218 
1219  /* Is there a position with a matching weight? */
1220  while (posvec_iter < posvec->pos + posvec->npos)
1221  {
1222  /* If true, append this position to the data->pos */
1223  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224  {
1225  *dptr = WEP_GETPOS(*posvec_iter);
1226  dptr++;
1227  }
1228 
1229  posvec_iter++;
1230  }
1231 
1232  data->npos = dptr - data->pos;
1233 
1234  if (data->npos > 0)
1235  result = true;
1236  }
1237  else if (val->weight)
1238  {
1239  WordEntryPos *posvec_iter = posvec->pos;
1240 
1241  /* Is there a position with a matching weight? */
1242  while (posvec_iter < posvec->pos + posvec->npos)
1243  {
1244  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1245  {
1246  result = true;
1247  break; /* no need to go further */
1248  }
1249 
1250  posvec_iter++;
1251  }
1252  }
1253  else /* data != NULL */
1254  {
1255  data->npos = posvec->npos;
1256  data->pos = posvec->pos;
1257  data->allocated = false;
1258  result = true;
1259  }
1260  }
1261  else
1262  {
1263  result = true;
1264  }
1265 
1266  return result;
1267 }
1268 
1269 /*
1270  * is there value 'val' in array or not ?
1271  */
1272 static bool
1274 {
1275  CHKVAL *chkval = (CHKVAL *) checkval;
1276  WordEntry *StopLow = chkval->arrb;
1277  WordEntry *StopHigh = chkval->arre;
1278  WordEntry *StopMiddle = StopHigh;
1279  bool res = false;
1280 
1281  /* Loop invariant: StopLow <= val < StopHigh */
1282  while (StopLow < StopHigh)
1283  {
1284  int difference;
1285 
1286  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1287  difference = tsCompareString(chkval->operand + val->distance,
1288  val->length,
1289  chkval->values + StopMiddle->pos,
1290  StopMiddle->len,
1291  false);
1292 
1293  if (difference == 0)
1294  {
1295  /* Check weight info & fill 'data' with positions */
1296  res = checkclass_str(chkval, StopMiddle, val, data);
1297  break;
1298  }
1299  else if (difference > 0)
1300  StopLow = StopMiddle + 1;
1301  else
1302  StopHigh = StopMiddle;
1303  }
1304 
1305  if ((!res || data) && val->prefix)
1306  {
1307  WordEntryPos *allpos = NULL;
1308  int npos = 0,
1309  totalpos = 0;
1310 
1311  /*
1312  * there was a failed exact search, so we should scan further to find
1313  * a prefix match. We also need to do so if caller needs position info
1314  */
1315  if (StopLow >= StopHigh)
1316  StopMiddle = StopHigh;
1317 
1318  while ((!res || data) && StopMiddle < chkval->arre &&
1319  tsCompareString(chkval->operand + val->distance,
1320  val->length,
1321  chkval->values + StopMiddle->pos,
1322  StopMiddle->len,
1323  true) == 0)
1324  {
1325  if (data)
1326  {
1327  /*
1328  * We need to join position information
1329  */
1330  res = checkclass_str(chkval, StopMiddle, val, data);
1331 
1332  if (res)
1333  {
1334  while (npos + data->npos >= totalpos)
1335  {
1336  if (totalpos == 0)
1337  {
1338  totalpos = 256;
1339  allpos = palloc(sizeof(WordEntryPos) * totalpos);
1340  }
1341  else
1342  {
1343  totalpos *= 2;
1344  allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1345  }
1346  }
1347 
1348  memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1349  npos += data->npos;
1350  }
1351  else
1352  {
1353  /* at loop exit, res must be true if we found matches */
1354  res = (npos > 0);
1355  }
1356  }
1357  else
1358  {
1359  res = checkclass_str(chkval, StopMiddle, val, NULL);
1360  }
1361 
1362  StopMiddle++;
1363  }
1364 
1365  if (res && data)
1366  {
1367  /* Sort and make unique array of found positions */
1368  data->pos = allpos;
1369  qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1370  data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1372  data->allocated = true;
1373  }
1374  }
1375 
1376  return res;
1377 }
1378 
1379 /*
1380  * Compute output position list for a tsquery operator in phrase mode.
1381  *
1382  * Merge the position lists in Ldata and Rdata as specified by "emit",
1383  * returning the result list into *data. The input position lists must be
1384  * sorted and unique, and the output will be as well.
1385  *
1386  * data: pointer to initially-all-zeroes output struct, or NULL
1387  * Ldata, Rdata: input position lists
1388  * emit: bitmask of TSPO_XXX flags
1389  * Loffset: offset to be added to Ldata positions before comparing/outputting
1390  * Roffset: offset to be added to Rdata positions before comparing/outputting
1391  * max_npos: maximum possible required size of output position array
1392  *
1393  * Loffset and Roffset should not be negative, else we risk trying to output
1394  * negative positions, which won't fit into WordEntryPos.
1395  *
1396  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1397  * we return it as TSTernaryValue.
1398  *
1399  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1400  * returns TS_YES if any positions would have been emitted.
1401  */
1402 #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1403 #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1404 #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1405 
1406 static TSTernaryValue
1408  ExecPhraseData *Ldata,
1409  ExecPhraseData *Rdata,
1410  int emit,
1411  int Loffset,
1412  int Roffset,
1413  int max_npos)
1414 {
1415  int Lindex,
1416  Rindex;
1417 
1418  /* Loop until both inputs are exhausted */
1419  Lindex = Rindex = 0;
1420  while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1421  {
1422  int Lpos,
1423  Rpos;
1424  int output_pos = 0;
1425 
1426  /*
1427  * Fetch current values to compare. WEP_GETPOS() is needed because
1428  * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1429  */
1430  if (Lindex < Ldata->npos)
1431  Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1432  else
1433  {
1434  /* L array exhausted, so we're done if R_ONLY isn't set */
1435  if (!(emit & TSPO_R_ONLY))
1436  break;
1437  Lpos = INT_MAX;
1438  }
1439  if (Rindex < Rdata->npos)
1440  Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1441  else
1442  {
1443  /* R array exhausted, so we're done if L_ONLY isn't set */
1444  if (!(emit & TSPO_L_ONLY))
1445  break;
1446  Rpos = INT_MAX;
1447  }
1448 
1449  /* Merge-join the two input lists */
1450  if (Lpos < Rpos)
1451  {
1452  /* Lpos is not matched in Rdata, should we output it? */
1453  if (emit & TSPO_L_ONLY)
1454  output_pos = Lpos;
1455  Lindex++;
1456  }
1457  else if (Lpos == Rpos)
1458  {
1459  /* Lpos and Rpos match ... should we output it? */
1460  if (emit & TSPO_BOTH)
1461  output_pos = Rpos;
1462  Lindex++;
1463  Rindex++;
1464  }
1465  else /* Lpos > Rpos */
1466  {
1467  /* Rpos is not matched in Ldata, should we output it? */
1468  if (emit & TSPO_R_ONLY)
1469  output_pos = Rpos;
1470  Rindex++;
1471  }
1472 
1473  if (output_pos > 0)
1474  {
1475  if (data)
1476  {
1477  /* Store position, first allocating output array if needed */
1478  if (data->pos == NULL)
1479  {
1480  data->pos = (WordEntryPos *)
1481  palloc(max_npos * sizeof(WordEntryPos));
1482  data->allocated = true;
1483  }
1484  data->pos[data->npos++] = output_pos;
1485  }
1486  else
1487  {
1488  /*
1489  * Exact positions not needed, so return TS_YES as soon as we
1490  * know there is at least one.
1491  */
1492  return TS_YES;
1493  }
1494  }
1495  }
1496 
1497  if (data && data->npos > 0)
1498  {
1499  /* Let's assert we didn't overrun the array */
1500  Assert(data->npos <= max_npos);
1501  return TS_YES;
1502  }
1503  return TS_NO;
1504 }
1505 
1506 /*
1507  * Execute tsquery at or below an OP_PHRASE operator.
1508  *
1509  * This handles tsquery execution at recursion levels where we need to care
1510  * about match locations.
1511  *
1512  * In addition to the same arguments used for TS_execute, the caller may pass
1513  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1514  * match position info on success. data == NULL if no position data need be
1515  * returned. (In practice, outside callers pass NULL, and only the internal
1516  * recursion cases pass a data pointer.)
1517  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1518  * This is OK because an outside call always starts from an OP_PHRASE node.
1519  *
1520  * The detailed semantics of the match data, given that the function returned
1521  * TS_YES (successful match), are:
1522  *
1523  * npos > 0, negate = false:
1524  * query is matched at specified position(s) (and only those positions)
1525  * npos > 0, negate = true:
1526  * query is matched at all positions *except* specified position(s)
1527  * npos = 0, negate = true:
1528  * query is matched at all positions
1529  * npos = 0, negate = false:
1530  * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1531  *
1532  * Successful matches also return a "width" value which is the match width in
1533  * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1534  * and is the sum of the phrase operator distances for phrase matches. Note
1535  * that when width > 0, the listed positions represent the ends of matches not
1536  * the starts. (This unintuitive rule is needed to avoid possibly generating
1537  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1538  *
1539  * If the TSExecuteCallback function reports that an operand is present
1540  * but fails to provide position(s) for it, we will return TS_MAYBE when
1541  * it is possible but not certain that the query is matched.
1542  *
1543  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1544  * negate = false (which is the state initialized by the caller); but the
1545  * "width" output in such cases is undefined.
1546  */
1547 static TSTernaryValue
1548 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1549  TSExecuteCallback chkcond,
1550  ExecPhraseData *data)
1551 {
1552  ExecPhraseData Ldata,
1553  Rdata;
1554  TSTernaryValue lmatch,
1555  rmatch;
1556  int Loffset,
1557  Roffset,
1558  maxwidth;
1559 
1560  /* since this function recurses, it could be driven to stack overflow */
1562 
1563  if (curitem->type == QI_VAL)
1564  {
1565  if (!chkcond(arg, (QueryOperand *) curitem, data))
1566  return TS_NO;
1567  if (data->npos > 0 || data->negate)
1568  return TS_YES;
1569  /* If we have no position data, we must return TS_MAYBE */
1570  return TS_MAYBE;
1571  }
1572 
1573  switch (curitem->qoperator.oper)
1574  {
1575  case OP_NOT:
1576 
1577  /*
1578  * We need not touch data->width, since a NOT operation does not
1579  * change the match width.
1580  */
1581  if (!(flags & TS_EXEC_CALC_NOT))
1582  {
1583  /* without CALC_NOT, report NOT as "match everywhere" */
1584  Assert(data->npos == 0 && !data->negate);
1585  data->negate = true;
1586  return TS_YES;
1587  }
1588  switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1589  {
1590  case TS_NO:
1591  /* change "match nowhere" to "match everywhere" */
1592  Assert(data->npos == 0 && !data->negate);
1593  data->negate = true;
1594  return TS_YES;
1595  case TS_YES:
1596  if (data->npos > 0)
1597  {
1598  /* we have some positions, invert negate flag */
1599  data->negate = !data->negate;
1600  return TS_YES;
1601  }
1602  else if (data->negate)
1603  {
1604  /* change "match everywhere" to "match nowhere" */
1605  data->negate = false;
1606  return TS_NO;
1607  }
1608  /* Should not get here if result was TS_YES */
1609  Assert(false);
1610  break;
1611  case TS_MAYBE:
1612  /* match positions are, and remain, uncertain */
1613  return TS_MAYBE;
1614  }
1615  break;
1616 
1617  case OP_PHRASE:
1618  case OP_AND:
1619  memset(&Ldata, 0, sizeof(Ldata));
1620  memset(&Rdata, 0, sizeof(Rdata));
1621 
1622  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1623  arg, flags, chkcond, &Ldata);
1624  if (lmatch == TS_NO)
1625  return TS_NO;
1626 
1627  rmatch = TS_phrase_execute(curitem + 1,
1628  arg, flags, chkcond, &Rdata);
1629  if (rmatch == TS_NO)
1630  return TS_NO;
1631 
1632  /*
1633  * If either operand has no position information, then we can't
1634  * return reliable position data, only a MAYBE result.
1635  */
1636  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1637  return TS_MAYBE;
1638 
1639  if (curitem->qoperator.oper == OP_PHRASE)
1640  {
1641  /*
1642  * Compute Loffset and Roffset suitable for phrase match, and
1643  * compute overall width of whole phrase match.
1644  */
1645  Loffset = curitem->qoperator.distance + Rdata.width;
1646  Roffset = 0;
1647  if (data)
1648  data->width = curitem->qoperator.distance +
1649  Ldata.width + Rdata.width;
1650  }
1651  else
1652  {
1653  /*
1654  * For OP_AND, set output width and alignment like OP_OR (see
1655  * comment below)
1656  */
1657  maxwidth = Max(Ldata.width, Rdata.width);
1658  Loffset = maxwidth - Ldata.width;
1659  Roffset = maxwidth - Rdata.width;
1660  if (data)
1661  data->width = maxwidth;
1662  }
1663 
1664  if (Ldata.negate && Rdata.negate)
1665  {
1666  /* !L & !R: treat as !(L | R) */
1667  (void) TS_phrase_output(data, &Ldata, &Rdata,
1669  Loffset, Roffset,
1670  Ldata.npos + Rdata.npos);
1671  if (data)
1672  data->negate = true;
1673  return TS_YES;
1674  }
1675  else if (Ldata.negate)
1676  {
1677  /* !L & R */
1678  return TS_phrase_output(data, &Ldata, &Rdata,
1679  TSPO_R_ONLY,
1680  Loffset, Roffset,
1681  Rdata.npos);
1682  }
1683  else if (Rdata.negate)
1684  {
1685  /* L & !R */
1686  return TS_phrase_output(data, &Ldata, &Rdata,
1687  TSPO_L_ONLY,
1688  Loffset, Roffset,
1689  Ldata.npos);
1690  }
1691  else
1692  {
1693  /* straight AND */
1694  return TS_phrase_output(data, &Ldata, &Rdata,
1695  TSPO_BOTH,
1696  Loffset, Roffset,
1697  Min(Ldata.npos, Rdata.npos));
1698  }
1699 
1700  case OP_OR:
1701  memset(&Ldata, 0, sizeof(Ldata));
1702  memset(&Rdata, 0, sizeof(Rdata));
1703 
1704  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1705  arg, flags, chkcond, &Ldata);
1706  rmatch = TS_phrase_execute(curitem + 1,
1707  arg, flags, chkcond, &Rdata);
1708 
1709  if (lmatch == TS_NO && rmatch == TS_NO)
1710  return TS_NO;
1711 
1712  /*
1713  * If either operand has no position information, then we can't
1714  * return reliable position data, only a MAYBE result.
1715  */
1716  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1717  return TS_MAYBE;
1718 
1719  /*
1720  * Cope with undefined output width from failed submatch. (This
1721  * takes less code than trying to ensure that all failure returns
1722  * set data->width to zero.)
1723  */
1724  if (lmatch == TS_NO)
1725  Ldata.width = 0;
1726  if (rmatch == TS_NO)
1727  Rdata.width = 0;
1728 
1729  /*
1730  * For OP_AND and OP_OR, report the width of the wider of the two
1731  * inputs, and align the narrower input's positions to the right
1732  * end of that width. This rule deals at least somewhat
1733  * reasonably with cases like "x <-> (y | z <-> q)".
1734  */
1735  maxwidth = Max(Ldata.width, Rdata.width);
1736  Loffset = maxwidth - Ldata.width;
1737  Roffset = maxwidth - Rdata.width;
1738  data->width = maxwidth;
1739 
1740  if (Ldata.negate && Rdata.negate)
1741  {
1742  /* !L | !R: treat as !(L & R) */
1743  (void) TS_phrase_output(data, &Ldata, &Rdata,
1744  TSPO_BOTH,
1745  Loffset, Roffset,
1746  Min(Ldata.npos, Rdata.npos));
1747  data->negate = true;
1748  return TS_YES;
1749  }
1750  else if (Ldata.negate)
1751  {
1752  /* !L | R: treat as !(L & !R) */
1753  (void) TS_phrase_output(data, &Ldata, &Rdata,
1754  TSPO_L_ONLY,
1755  Loffset, Roffset,
1756  Ldata.npos);
1757  data->negate = true;
1758  return TS_YES;
1759  }
1760  else if (Rdata.negate)
1761  {
1762  /* L | !R: treat as !(!L & R) */
1763  (void) TS_phrase_output(data, &Ldata, &Rdata,
1764  TSPO_R_ONLY,
1765  Loffset, Roffset,
1766  Rdata.npos);
1767  data->negate = true;
1768  return TS_YES;
1769  }
1770  else
1771  {
1772  /* straight OR */
1773  return TS_phrase_output(data, &Ldata, &Rdata,
1775  Loffset, Roffset,
1776  Ldata.npos + Rdata.npos);
1777  }
1778 
1779  default:
1780  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1781  }
1782 
1783  /* not reachable, but keep compiler quiet */
1784  return TS_NO;
1785 }
1786 
1787 
1788 /*
1789  * Evaluate tsquery boolean expression.
1790  *
1791  * curitem: current tsquery item (initially, the first one)
1792  * arg: opaque value to pass through to callback function
1793  * flags: bitmask of flag bits shown in ts_utils.h
1794  * chkcond: callback function to check whether a primitive value is present
1795  */
1796 bool
1797 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1798  TSExecuteCallback chkcond)
1799 {
1800  /*
1801  * If we get TS_MAYBE from the recursion, return true. We could only see
1802  * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1803  * need to check again.
1804  */
1805  return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1806 }
1807 
1808 /*
1809  * TS_execute recursion for operators above any phrase operator. Here we do
1810  * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1811  * operator, we pass it off to TS_phrase_execute which does worry.
1812  */
1813 static TSTernaryValue
1814 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1815  TSExecuteCallback chkcond)
1816 {
1817  TSTernaryValue lmatch;
1818 
1819  /* since this function recurses, it could be driven to stack overflow */
1821 
1822  if (curitem->type == QI_VAL)
1823  return chkcond(arg, (QueryOperand *) curitem,
1824  NULL /* don't need position info */ ) ? TS_YES : TS_NO;
1825 
1826  switch (curitem->qoperator.oper)
1827  {
1828  case OP_NOT:
1829  if (!(flags & TS_EXEC_CALC_NOT))
1830  return TS_YES;
1831  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1832  {
1833  case TS_NO:
1834  return TS_YES;
1835  case TS_YES:
1836  return TS_NO;
1837  case TS_MAYBE:
1838  return TS_MAYBE;
1839  }
1840  break;
1841 
1842  case OP_AND:
1843  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1844  flags, chkcond);
1845  if (lmatch == TS_NO)
1846  return TS_NO;
1847  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1848  {
1849  case TS_NO:
1850  return TS_NO;
1851  case TS_YES:
1852  return lmatch;
1853  case TS_MAYBE:
1854  return TS_MAYBE;
1855  }
1856  break;
1857 
1858  case OP_OR:
1859  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1860  flags, chkcond);
1861  if (lmatch == TS_YES)
1862  return TS_YES;
1863  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1864  {
1865  case TS_NO:
1866  return lmatch;
1867  case TS_YES:
1868  return TS_YES;
1869  case TS_MAYBE:
1870  return TS_MAYBE;
1871  }
1872  break;
1873 
1874  case OP_PHRASE:
1875 
1876  /*
1877  * If we get a MAYBE result, and the caller doesn't want that,
1878  * convert it to NO. It would be more consistent, perhaps, to
1879  * return the result of TS_phrase_execute() verbatim and then
1880  * convert MAYBE results at the top of the recursion. But
1881  * converting at the topmost phrase operator gives results that
1882  * are bug-compatible with the old implementation, so do it like
1883  * this for now.
1884  */
1885  switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1886  {
1887  case TS_NO:
1888  return TS_NO;
1889  case TS_YES:
1890  return TS_YES;
1891  case TS_MAYBE:
1892  return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1893  }
1894  break;
1895 
1896  default:
1897  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1898  }
1899 
1900  /* not reachable, but keep compiler quiet */
1901  return TS_NO;
1902 }
1903 
1904 /*
1905  * Detect whether a tsquery boolean expression requires any positive matches
1906  * to values shown in the tsquery.
1907  *
1908  * This is needed to know whether a GIN index search requires full index scan.
1909  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1910  * entries for x; but 'x | !y' could match rows containing neither x nor y.
1911  */
1912 bool
1914 {
1915  /* since this function recurses, it could be driven to stack overflow */
1917 
1918  if (curitem->type == QI_VAL)
1919  return true;
1920 
1921  switch (curitem->qoperator.oper)
1922  {
1923  case OP_NOT:
1924 
1925  /*
1926  * Assume there are no required matches underneath a NOT. For
1927  * some cases with nested NOTs, we could prove there's a required
1928  * match, but it seems unlikely to be worth the trouble.
1929  */
1930  return false;
1931 
1932  case OP_PHRASE:
1933 
1934  /*
1935  * Treat OP_PHRASE as OP_AND here
1936  */
1937  case OP_AND:
1938  /* If either side requires a match, we're good */
1939  if (tsquery_requires_match(curitem + curitem->qoperator.left))
1940  return true;
1941  else
1942  return tsquery_requires_match(curitem + 1);
1943 
1944  case OP_OR:
1945  /* Both sides must require a match */
1946  if (tsquery_requires_match(curitem + curitem->qoperator.left))
1947  return tsquery_requires_match(curitem + 1);
1948  else
1949  return false;
1950 
1951  default:
1952  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1953  }
1954 
1955  /* not reachable, but keep compiler quiet */
1956  return false;
1957 }
1958 
1959 /*
1960  * boolean operations
1961  */
1962 Datum
1964 {
1966  PG_GETARG_DATUM(1),
1967  PG_GETARG_DATUM(0)));
1968 }
1969 
1970 Datum
1972 {
1974  TSQuery query = PG_GETARG_TSQUERY(1);
1975  CHKVAL chkval;
1976  bool result;
1977 
1978  /* empty query matches nothing */
1979  if (!query->size)
1980  {
1981  PG_FREE_IF_COPY(val, 0);
1982  PG_FREE_IF_COPY(query, 1);
1983  PG_RETURN_BOOL(false);
1984  }
1985 
1986  chkval.arrb = ARRPTR(val);
1987  chkval.arre = chkval.arrb + val->size;
1988  chkval.values = STRPTR(val);
1989  chkval.operand = GETOPERAND(query);
1990  result = TS_execute(GETQUERY(query),
1991  &chkval,
1994 
1995  PG_FREE_IF_COPY(val, 0);
1996  PG_FREE_IF_COPY(query, 1);
1997  PG_RETURN_BOOL(result);
1998 }
1999 
2000 Datum
2002 {
2003  TSVector vector;
2004  TSQuery query;
2005  bool res;
2006 
2008  PG_GETARG_DATUM(0)));
2010  PG_GETARG_DATUM(1)));
2011 
2013  TSVectorGetDatum(vector),
2014  TSQueryGetDatum(query)));
2015 
2016  pfree(vector);
2017  pfree(query);
2018 
2019  PG_RETURN_BOOL(res);
2020 }
2021 
2022 Datum
2024 {
2025  TSVector vector;
2026  TSQuery query = PG_GETARG_TSQUERY(1);
2027  bool res;
2028 
2030  PG_GETARG_DATUM(0)));
2031 
2033  TSVectorGetDatum(vector),
2034  TSQueryGetDatum(query)));
2035 
2036  pfree(vector);
2037  PG_FREE_IF_COPY(query, 1);
2038 
2039  PG_RETURN_BOOL(res);
2040 }
2041 
2042 /*
2043  * ts_stat statistic function support
2044  */
2045 
2046 
2047 /*
2048  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2049  * that have a weight equal to one of the weights in 'weight' bitmask.
2050  */
2051 static int
2053 {
2054  int len = POSDATALEN(txt, wptr);
2055  int num = 0;
2056  WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2057 
2058  while (len--)
2059  {
2060  if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2061  num++;
2062  ptr++;
2063  }
2064  return num;
2065 }
2066 
2067 #define compareStatWord(a,e,t) \
2068  tsCompareString((a)->lexeme, (a)->lenlexeme, \
2069  STRPTR(t) + (e)->pos, (e)->len, \
2070  false)
2071 
2072 static void
2074 {
2075  WordEntry *we = ARRPTR(txt) + off;
2076  StatEntry *node = stat->root,
2077  *pnode = NULL;
2078  int n,
2079  res = 0;
2080  uint32 depth = 1;
2081 
2082  if (stat->weight == 0)
2083  n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2084  else
2085  n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2086 
2087  if (n == 0)
2088  return; /* nothing to insert */
2089 
2090  while (node)
2091  {
2092  res = compareStatWord(node, we, txt);
2093 
2094  if (res == 0)
2095  {
2096  break;
2097  }
2098  else
2099  {
2100  pnode = node;
2101  node = (res < 0) ? node->left : node->right;
2102  }
2103  depth++;
2104  }
2105 
2106  if (depth > stat->maxdepth)
2107  stat->maxdepth = depth;
2108 
2109  if (node == NULL)
2110  {
2111  node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2112  node->left = node->right = NULL;
2113  node->ndoc = 1;
2114  node->nentry = n;
2115  node->lenlexeme = we->len;
2116  memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2117 
2118  if (pnode == NULL)
2119  {
2120  stat->root = node;
2121  }
2122  else
2123  {
2124  if (res < 0)
2125  pnode->left = node;
2126  else
2127  pnode->right = node;
2128  }
2129 
2130  }
2131  else
2132  {
2133  node->ndoc++;
2134  node->nentry += n;
2135  }
2136 }
2137 
2138 static void
2140  uint32 low, uint32 high, uint32 offset)
2141 {
2142  uint32 pos;
2143  uint32 middle = (low + high) >> 1;
2144 
2145  pos = (low + middle) >> 1;
2146  if (low != middle && pos >= offset && pos - offset < txt->size)
2147  insertStatEntry(persistentContext, stat, txt, pos - offset);
2148  pos = (high + middle + 1) >> 1;
2149  if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2150  insertStatEntry(persistentContext, stat, txt, pos - offset);
2151 
2152  if (low != middle)
2153  chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2154  if (high != middle + 1)
2155  chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2156 }
2157 
2158 /*
2159  * This is written like a custom aggregate function, because the
2160  * original plan was to do just that. Unfortunately, an aggregate function
2161  * can't return a set, so that plan was abandoned. If that limitation is
2162  * lifted in the future, ts_stat could be a real aggregate function so that
2163  * you could use it like this:
2164  *
2165  * SELECT ts_stat(vector_column) FROM vector_table;
2166  *
2167  * where vector_column is a tsvector-type column in vector_table.
2168  */
2169 
2170 static TSVectorStat *
2171 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2172 {
2173  TSVector txt = DatumGetTSVector(data);
2174  uint32 i,
2175  nbit = 0,
2176  offset;
2177 
2178  if (stat == NULL)
2179  { /* Init in first */
2180  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2181  stat->maxdepth = 1;
2182  }
2183 
2184  /* simple check of correctness */
2185  if (txt == NULL || txt->size == 0)
2186  {
2187  if (txt && txt != (TSVector) DatumGetPointer(data))
2188  pfree(txt);
2189  return stat;
2190  }
2191 
2192  i = txt->size - 1;
2193  for (; i > 0; i >>= 1)
2194  nbit++;
2195 
2196  nbit = 1 << nbit;
2197  offset = (nbit - txt->size) / 2;
2198 
2199  insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2200  chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2201 
2202  return stat;
2203 }
2204 
2205 static void
2207  TSVectorStat *stat)
2208 {
2209  TupleDesc tupdesc;
2210  MemoryContext oldcontext;
2211  StatEntry *node;
2212 
2213  funcctx->user_fctx = (void *) stat;
2214 
2215  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2216 
2217  stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2218  stat->stackpos = 0;
2219 
2220  node = stat->root;
2221  /* find leftmost value */
2222  if (node == NULL)
2223  stat->stack[stat->stackpos] = NULL;
2224  else
2225  for (;;)
2226  {
2227  stat->stack[stat->stackpos] = node;
2228  if (node->left)
2229  {
2230  stat->stackpos++;
2231  node = node->left;
2232  }
2233  else
2234  break;
2235  }
2236  Assert(stat->stackpos <= stat->maxdepth);
2237 
2238  tupdesc = CreateTemplateTupleDesc(3);
2239  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2240  TEXTOID, -1, 0);
2241  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2242  INT4OID, -1, 0);
2243  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2244  INT4OID, -1, 0);
2245  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2246  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2247 
2248  MemoryContextSwitchTo(oldcontext);
2249 }
2250 
2251 static StatEntry *
2253 {
2254  StatEntry *node = stat->stack[stat->stackpos];
2255 
2256  if (node == NULL)
2257  return NULL;
2258 
2259  if (node->ndoc != 0)
2260  {
2261  /* return entry itself: we already was at left sublink */
2262  return node;
2263  }
2264  else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2265  {
2266  /* go on right sublink */
2267  stat->stackpos++;
2268  node = node->right;
2269 
2270  /* find most-left value */
2271  for (;;)
2272  {
2273  stat->stack[stat->stackpos] = node;
2274  if (node->left)
2275  {
2276  stat->stackpos++;
2277  node = node->left;
2278  }
2279  else
2280  break;
2281  }
2282  Assert(stat->stackpos <= stat->maxdepth);
2283  }
2284  else
2285  {
2286  /* we already return all left subtree, itself and right subtree */
2287  if (stat->stackpos == 0)
2288  return NULL;
2289 
2290  stat->stackpos--;
2291  return walkStatEntryTree(stat);
2292  }
2293 
2294  return node;
2295 }
2296 
2297 static Datum
2299 {
2300  TSVectorStat *st;
2301  StatEntry *entry;
2302 
2303  st = (TSVectorStat *) funcctx->user_fctx;
2304 
2305  entry = walkStatEntryTree(st);
2306 
2307  if (entry != NULL)
2308  {
2309  Datum result;
2310  char *values[3];
2311  char ndoc[16];
2312  char nentry[16];
2313  HeapTuple tuple;
2314 
2315  values[0] = palloc(entry->lenlexeme + 1);
2316  memcpy(values[0], entry->lexeme, entry->lenlexeme);
2317  (values[0])[entry->lenlexeme] = '\0';
2318  sprintf(ndoc, "%d", entry->ndoc);
2319  values[1] = ndoc;
2320  sprintf(nentry, "%d", entry->nentry);
2321  values[2] = nentry;
2322 
2323  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2324  result = HeapTupleGetDatum(tuple);
2325 
2326  pfree(values[0]);
2327 
2328  /* mark entry as already visited */
2329  entry->ndoc = 0;
2330 
2331  return result;
2332  }
2333 
2334  return (Datum) 0;
2335 }
2336 
2337 static TSVectorStat *
2338 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2339 {
2340  char *query = text_to_cstring(txt);
2341  TSVectorStat *stat;
2342  bool isnull;
2343  Portal portal;
2344  SPIPlanPtr plan;
2345 
2346  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2347  /* internal error */
2348  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2349 
2350  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2351  /* internal error */
2352  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2353 
2354  SPI_cursor_fetch(portal, true, 100);
2355 
2356  if (SPI_tuptable == NULL ||
2357  SPI_tuptable->tupdesc->natts != 1 ||
2359  TSVECTOROID))
2360  ereport(ERROR,
2361  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2362  errmsg("ts_stat query must return one tsvector column")));
2363 
2364  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2365  stat->maxdepth = 1;
2366 
2367  if (ws)
2368  {
2369  char *buf;
2370 
2371  buf = VARDATA_ANY(ws);
2372  while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2373  {
2374  if (pg_mblen(buf) == 1)
2375  {
2376  switch (*buf)
2377  {
2378  case 'A':
2379  case 'a':
2380  stat->weight |= 1 << 3;
2381  break;
2382  case 'B':
2383  case 'b':
2384  stat->weight |= 1 << 2;
2385  break;
2386  case 'C':
2387  case 'c':
2388  stat->weight |= 1 << 1;
2389  break;
2390  case 'D':
2391  case 'd':
2392  stat->weight |= 1;
2393  break;
2394  default:
2395  stat->weight |= 0;
2396  }
2397  }
2398  buf += pg_mblen(buf);
2399  }
2400  }
2401 
2402  while (SPI_processed > 0)
2403  {
2404  uint64 i;
2405 
2406  for (i = 0; i < SPI_processed; i++)
2407  {
2408  Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2409 
2410  if (!isnull)
2411  stat = ts_accum(persistentContext, stat, data);
2412  }
2413 
2415  SPI_cursor_fetch(portal, true, 100);
2416  }
2417 
2419  SPI_cursor_close(portal);
2420  SPI_freeplan(plan);
2421  pfree(query);
2422 
2423  return stat;
2424 }
2425 
2426 Datum
2428 {
2429  FuncCallContext *funcctx;
2430  Datum result;
2431 
2432  if (SRF_IS_FIRSTCALL())
2433  {
2434  TSVectorStat *stat;
2435  text *txt = PG_GETARG_TEXT_PP(0);
2436 
2437  funcctx = SRF_FIRSTCALL_INIT();
2438  SPI_connect();
2439  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2440  PG_FREE_IF_COPY(txt, 0);
2441  ts_setup_firstcall(fcinfo, funcctx, stat);
2442  SPI_finish();
2443  }
2444 
2445  funcctx = SRF_PERCALL_SETUP();
2446  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2447  SRF_RETURN_NEXT(funcctx, result);
2448  SRF_RETURN_DONE(funcctx);
2449 }
2450 
2451 Datum
2453 {
2454  FuncCallContext *funcctx;
2455  Datum result;
2456 
2457  if (SRF_IS_FIRSTCALL())
2458  {
2459  TSVectorStat *stat;
2460  text *txt = PG_GETARG_TEXT_PP(0);
2461  text *ws = PG_GETARG_TEXT_PP(1);
2462 
2463  funcctx = SRF_FIRSTCALL_INIT();
2464  SPI_connect();
2465  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2466  PG_FREE_IF_COPY(txt, 0);
2467  PG_FREE_IF_COPY(ws, 1);
2468  ts_setup_firstcall(fcinfo, funcctx, stat);
2469  SPI_finish();
2470  }
2471 
2472  funcctx = SRF_PERCALL_SETUP();
2473  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2474  SRF_RETURN_NEXT(funcctx, result);
2475  SRF_RETURN_DONE(funcctx);
2476 }
2477 
2478 
2479 /*
2480  * Triggers for automatic update of a tsvector column from text column(s)
2481  *
2482  * Trigger arguments are either
2483  * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2484  * name of tsvector col, name of regconfig col, name(s) of text col(s)
2485  * ie, tsconfig can either be specified by name, or indirectly as the
2486  * contents of a regconfig field in the row. If the name is used, it must
2487  * be explicitly schema-qualified.
2488  */
2489 Datum
2491 {
2492  return tsvector_update_trigger(fcinfo, false);
2493 }
2494 
2495 Datum
2497 {
2498  return tsvector_update_trigger(fcinfo, true);
2499 }
2500 
2501 static Datum
2503 {
2504  TriggerData *trigdata;
2505  Trigger *trigger;
2506  Relation rel;
2507  HeapTuple rettuple = NULL;
2508  int tsvector_attr_num,
2509  i;
2510  ParsedText prs;
2511  Datum datum;
2512  bool isnull;
2513  text *txt;
2514  Oid cfgId;
2515  bool update_needed;
2516 
2517  /* Check call context */
2518  if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2519  elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2520 
2521  trigdata = (TriggerData *) fcinfo->context;
2522  if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2523  elog(ERROR, "tsvector_update_trigger: must be fired for row");
2524  if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2525  elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2526 
2527  if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2528  {
2529  rettuple = trigdata->tg_trigtuple;
2530  update_needed = true;
2531  }
2532  else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2533  {
2534  rettuple = trigdata->tg_newtuple;
2535  update_needed = false; /* computed below */
2536  }
2537  else
2538  elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2539 
2540  trigger = trigdata->tg_trigger;
2541  rel = trigdata->tg_relation;
2542 
2543  if (trigger->tgnargs < 3)
2544  elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2545 
2546  /* Find the target tsvector column */
2547  tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2548  if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2549  ereport(ERROR,
2550  (errcode(ERRCODE_UNDEFINED_COLUMN),
2551  errmsg("tsvector column \"%s\" does not exist",
2552  trigger->tgargs[0])));
2553  /* This will effectively reject system columns, so no separate test: */
2554  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2555  TSVECTOROID))
2556  ereport(ERROR,
2557  (errcode(ERRCODE_DATATYPE_MISMATCH),
2558  errmsg("column \"%s\" is not of tsvector type",
2559  trigger->tgargs[0])));
2560 
2561  /* Find the configuration to use */
2562  if (config_column)
2563  {
2564  int config_attr_num;
2565 
2566  config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2567  if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2568  ereport(ERROR,
2569  (errcode(ERRCODE_UNDEFINED_COLUMN),
2570  errmsg("configuration column \"%s\" does not exist",
2571  trigger->tgargs[1])));
2572  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2573  REGCONFIGOID))
2574  ereport(ERROR,
2575  (errcode(ERRCODE_DATATYPE_MISMATCH),
2576  errmsg("column \"%s\" is not of regconfig type",
2577  trigger->tgargs[1])));
2578 
2579  datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2580  if (isnull)
2581  ereport(ERROR,
2582  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2583  errmsg("configuration column \"%s\" must not be null",
2584  trigger->tgargs[1])));
2585  cfgId = DatumGetObjectId(datum);
2586  }
2587  else
2588  {
2589  List *names;
2590 
2591  names = stringToQualifiedNameList(trigger->tgargs[1]);
2592  /* require a schema so that results are not search path dependent */
2593  if (list_length(names) < 2)
2594  ereport(ERROR,
2595  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2596  errmsg("text search configuration name \"%s\" must be schema-qualified",
2597  trigger->tgargs[1])));
2598  cfgId = get_ts_config_oid(names, false);
2599  }
2600 
2601  /* initialize parse state */
2602  prs.lenwords = 32;
2603  prs.curwords = 0;
2604  prs.pos = 0;
2605  prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2606 
2607  /* find all words in indexable column(s) */
2608  for (i = 2; i < trigger->tgnargs; i++)
2609  {
2610  int numattr;
2611 
2612  numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2613  if (numattr == SPI_ERROR_NOATTRIBUTE)
2614  ereport(ERROR,
2615  (errcode(ERRCODE_UNDEFINED_COLUMN),
2616  errmsg("column \"%s\" does not exist",
2617  trigger->tgargs[i])));
2618  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2619  ereport(ERROR,
2620  (errcode(ERRCODE_DATATYPE_MISMATCH),
2621  errmsg("column \"%s\" is not of a character type",
2622  trigger->tgargs[i])));
2623 
2625  update_needed = true;
2626 
2627  datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2628  if (isnull)
2629  continue;
2630 
2631  txt = DatumGetTextPP(datum);
2632 
2633  parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2634 
2635  if (txt != (text *) DatumGetPointer(datum))
2636  pfree(txt);
2637  }
2638 
2639  if (update_needed)
2640  {
2641  /* make tsvector value */
2642  datum = TSVectorGetDatum(make_tsvector(&prs));
2643  isnull = false;
2644 
2645  /* and insert it into tuple */
2646  rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2647  1, &tsvector_attr_num,
2648  &datum, &isnull);
2649 
2650  pfree(DatumGetPointer(datum));
2651  }
2652 
2653  return PointerGetDatum(rettuple);
2654 }
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition: spi.c:1024
uint16 WordEntryPos
Definition: ts_type.h:63
uint32 nentry
Definition: tsvector_op.c:49
uint64 call_cntr
Definition: funcapi.h:65
#define DatumGetTSQuery(X)
Definition: ts_type.h:235
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1797
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
QueryOperator qoperator
Definition: ts_type.h:196
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Definition: tsvector_op.c:2139
#define TSPO_R_ONLY
Definition: tsvector_op.c:1403
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition: spi.c:1157
Datum tsvector_unnest(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:642
Datum tsvector_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:821
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:156
Datum tsvector_length(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:205
#define VARDATA_ANY(PTR)
Definition: postgres.h:348
#define VARDATA(PTR)
Definition: postgres.h:302
#define TS_EXEC_CALC_NOT
Definition: ts_utils.h:183
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:44
uint32 stackpos
Definition: tsvector_op.c:65
int numattr
Definition: bootstrap.c:77
int SPI_connect(void)
Definition: spi.c:90
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define VARSIZE(PTR)
Definition: postgres.h:303
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:562
Datum ts_match_qv(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1963
bool allocated
Definition: ts_utils.h:152
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:293
#define PointerGetDatum(X)
Definition: postgres.h:556
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define VARHDRSZ
Definition: c.h:561
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:238
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:1913
#define DatumGetObjectId(X)
Definition: postgres.h:500
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition: spi.c:747
const Bitmapset * tg_updatedcols
Definition: trigger.h:42
Datum tsvector_concat(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:928
#define DatumGetTextPP(X)
Definition: fmgr.h:291
int SPI_finish(void)
Definition: spi.c:177
StatEntry * root
Definition: tsvector_op.c:67
#define Min(x, y)
Definition: c.h:920
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define Int16GetDatum(X)
Definition: postgres.h:451
#define PG_RETURN_INT32(x)
Definition: fmgr.h:353
ArrayType * construct_array(Datum *elems, int nelems, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3292
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1814
SPITupleTable * SPI_tuptable
Definition: spi.c:46
Datum tsvector_setweight(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:215
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:276
uint32 len
Definition: ts_type.h:44
struct cursor * cur
Definition: ecpg.c:28
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition: to_tsany.c:617
int errcode(int sqlerrcode)
Definition: elog.c:610
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition: namespace.c:2679
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2496
#define QI_VAL
Definition: ts_type.h:134
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:27
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, Datum *Values, const char *Nulls, bool read_only)
Definition: spi.c:1294
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1020
int32 lenwords
Definition: ts_utils.h:101
Datum ts_stat1(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2427
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:624
uint32 distance
Definition: ts_type.h:158
int16 distance
Definition: ts_type.h:183
#define MAXSTRPOS
Definition: ts_type.h:50
#define TSPO_BOTH
Definition: tsvector_op.c:1404
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:33
unsigned int Oid
Definition: postgres_ext.h:31
#define OP_OR
Definition: ts_type.h:168
HeapTuple * vals
Definition: spi.h:26
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:297
#define GETQUERY(x)
Definition: _int.h:157
uint64 SPI_processed
Definition: spi.c:45
int32 curwords
Definition: ts_utils.h:102
TupleDesc tuple_desc
Definition: funcapi.h:112
HeapTuple tg_trigtuple
Definition: trigger.h:35
signed int int32
Definition: c.h:355
WordEntry * arrb
Definition: tsvector_op.c:38
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:2116
#define GETOPERAND(x)
Definition: ltree.h:151
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:308
#define POSDATAPTR(x, e)
Definition: ts_type.h:111
int32 * arrb
Definition: _int_bool.c:227
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:299
#define sprintf
Definition: port.h:195
TSTernaryValue
Definition: tsvector_op.c:71
#define OP_AND
Definition: ts_type.h:167
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:251
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:586
uint32 ndoc
Definition: tsvector_op.c:47
Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:277
static bool checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1273
unsigned short uint16
Definition: c.h:366
void pfree(void *pointer)
Definition: mcxt.c:1056
Datum array_to_tsvector(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:757
#define ERROR
Definition: elog.h:43
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition: ts_parse.c:354
#define MAXNUMPOS
Definition: ts_type.h:86
Datum ts_match_vq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1971
uint8 weight
Definition: ts_type.h:146
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition: spi.c:1101
#define WEP_GETPOS(x)
Definition: ts_type.h:80
char * operand
Definition: ltxtquery_op.c:52
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2052
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
static char * buf
Definition: pg_test_fsync.c:67
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:184
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Definition: tsvector_op.c:2252
void check_stack_depth(void)
Definition: postgres.c:3312
#define SPI_ERROR_NOATTRIBUTE
Definition: spi.h:47
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
Definition: tsvector_op.c:2338
AttInMetadata * attinmeta
Definition: funcapi.h:91
static int silly_cmp_tsvector(const TSVector a, const TSVector b)
Definition: tsvector_op.c:90
int32 size
Definition: ts_type.h:93
WordEntryPos * pos
Definition: ts_utils.h:154
#define DatumGetBool(X)
Definition: postgres.h:393
unsigned int uint32
Definition: c.h:367
ParsedWord * words
Definition: ts_utils.h:100
#define TSVectorGetDatum(X)
Definition: ts_type.h:119
uint32 lenlexeme
Definition: tsvector_op.c:52
#define DatumGetTSVector(X)
Definition: ts_type.h:117
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:603
uint32 haspos
Definition: ts_type.h:44
#define TSQueryGetDatum(X)
Definition: ts_type.h:237
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
Definition: tsvector_op.c:2206
bool IsBinaryCoercible(Oid srctype, Oid targettype)
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
Definition: tsvector_op.c:472
#define TSPO_L_ONLY
Definition: tsvector_op.c:1402
StatEntry ** stack
Definition: tsvector_op.c:64
signed char int8
Definition: c.h:353
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
#define stat(a, b)
Definition: win32_port.h:255
void SPI_freetuptable(SPITupleTable *tuptable)
Definition: spi.c:1235
char ** tgargs
Definition: reltrigger.h:41
bool(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:169
QueryItemType type
Definition: ts_type.h:195
void * palloc0(Size size)
Definition: mcxt.c:980
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:358
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
Definition: tsvector_op.c:2073
uintptr_t Datum
Definition: postgres.h:367
Datum difference(PG_FUNCTION_ARGS)
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:352
TSVectorData * TSVector
Definition: ts_type.h:98
#define DatumGetChar(X)
Definition: postgres.h:409
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2067
Datum tsvector_strip(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:172
TupleDesc tupdesc
Definition: spi.h:25
Trigger * tg_trigger
Definition: trigger.h:37
TupleDesc rd_att
Definition: rel.h:110
HeapTuple tg_newtuple
Definition: trigger.h:36
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:839
#define ereport(elevel,...)
Definition: elog.h:144
#define Max(x, y)
Definition: c.h:914
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:730
#define CALLED_AS_TRIGGER(fcinfo)
Definition: trigger.h:25
#define Assert(condition)
Definition: c.h:738
uint32 maxdepth
Definition: tsvector_op.c:62
#define OP_PHRASE
Definition: ts_type.h:169
TriggerEvent tg_event
Definition: trigger.h:33
#define TS_EXEC_PHRASE_NO_POS
Definition: ts_utils.h:190
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition: to_tsany.c:259
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
static int compare_text_lexemes(const void *va, const void *vb)
Definition: tsvector_op.c:450
struct StatEntry * left
Definition: tsvector_op.c:50
int32 pos
Definition: ts_utils.h:103
static Datum ts_process_call(FuncCallContext *funcctx)
Definition: tsvector_op.c:2298
static int list_length(const List *l)
Definition: pg_list.h:169
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Definition: tsvector_op.c:2052
Datum ts_stat2(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2452
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
Definition: tsvector_op.c:406
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1155
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:220
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2490
uint32 pos
Definition: ts_type.h:44
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1069
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, int *replCols, Datum *replValues, bool *replIsnull)
Definition: heaptuple.c:1181
uint32 length
Definition: ts_type.h:158
#define STATENTRYHDRSZ
Definition: tsvector_op.c:56
#define DatumGetPointer(X)
Definition: postgres.h:549
#define TRIGGER_FIRED_BEFORE(event)
Definition: trigger.h:135
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3462
uint32 left
Definition: ts_type.h:184
int SPI_freeplan(SPIPlanPtr plan)
Definition: spi.c:874
static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
Definition: tsvector_op.c:1548
static Datum values[MAXATTR]
Definition: bootstrap.c:167
void SPI_cursor_close(Portal portal)
Definition: spi.c:1705
char * text_to_cstring(const text *t)
Definition: varlena.c:205
char * values
Definition: tsvector_op.c:40
List * stringToQualifiedNameList(const char *string)
Definition: regproc.c:1866
#define TRIGGER_FIRED_BY_INSERT(event)
Definition: trigger.h:117
struct StatEntry StatEntry
void * user_fctx
Definition: funcapi.h:82
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:341
void * palloc(Size size)
Definition: mcxt.c:949
int errmsg(const char *fmt,...)
Definition: elog.c:824
int32 * arre
Definition: _int_bool.c:228
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:796
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Definition: tsvector_op.c:2502
#define STRPTR(x)
Definition: hstore.h:76
int32 size
Definition: ts_type.h:208
#define elog(elevel,...)
Definition: elog.h:214
int i
int16 tgnargs
Definition: reltrigger.h:38
void * arg
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition: spi.c:1649
static int compare_int(const void *va, const void *vb)
Definition: tsvector_op.c:439
#define PG_GETARG_TSVECTOR_COPY(n)
Definition: ts_type.h:121
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:120
Definition: c.h:555
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
Definition: tsvector_op.c:2171
#define LIMITPOS(x)
Definition: ts_type.h:87
bool prefix
Definition: ts_type.h:150
static TSTernaryValue TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
Definition: tsvector_op.c:1407
WordEntry * arre
Definition: tsvector_op.c:39
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
#define ARRPTR(x)
Definition: cube.c:24
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define qsort(a, b, c, d)
Definition: port.h:479
#define SHORTALIGN(LEN)
Definition: c.h:687
static bool checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1192
#define TRIGGER_FIRED_FOR_ROW(event)
Definition: trigger.h:129
#define compareStatWord(a, e, t)
Definition: tsvector_op.c:2067
#define MAXENTRYPOS
Definition: ts_type.h:85
Definition: pg_list.h:50
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:427
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition: trigger.h:123
int16 AttrNumber
Definition: attnum.h:21
long val
Definition: informix.c:664
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
Definition: tsvector_op.c:370
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:626
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
Datum ts_match_tt(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2001
Datum ts_match_tq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2023
struct StatEntry * right
Definition: tsvector_op.c:51
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:317
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
#define compareEntry(pa, a, pb, b)
Definition: tsvector_op.c:360
#define TSVECTORCMPFUNC(type, action, ret)
Definition: tsvector_op.c:149
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition: tsvector_op.c:53
Relation tg_relation
Definition: trigger.h:34
static const float weights[]
Definition: tsrank.c:24
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:295
int32 weight
Definition: tsvector_op.c:60
#define OP_NOT
Definition: ts_type.h:166