PostgreSQL Source Code  git master
tsvector_op.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  * operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "lib/qunique.h"
25 #include "mb/pg_wchar.h"
26 #include "miscadmin.h"
27 #include "parser/parse_coerce.h"
28 #include "tsearch/ts_utils.h"
29 #include "utils/array.h"
30 #include "utils/builtins.h"
31 #include "utils/lsyscache.h"
32 #include "utils/regproc.h"
33 #include "utils/rel.h"
34 
35 
36 typedef struct
37 {
40  char *values;
41  char *operand;
42 } CHKVAL;
43 
44 
45 typedef struct StatEntry
46 {
47  uint32 ndoc; /* zero indicates that we were already here
48  * while walking through the tree */
50  struct StatEntry *left;
51  struct StatEntry *right;
55 
56 #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57 
58 typedef struct
59 {
61 
63 
66 
68 } TSVectorStat;
69 
70 
71 static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72  uint32 flags,
73  TSExecuteCallback chkcond);
74 static bool TS_execute_locations_recurse(QueryItem *curitem,
75  void *arg,
76  TSExecuteCallback chkcond,
77  List **locations);
78 static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
79 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
80 
81 
82 /*
83  * Order: haspos, len, word, for all positions (pos, weight)
84  */
85 static int
87 {
88  if (VARSIZE(a) < VARSIZE(b))
89  return -1;
90  else if (VARSIZE(a) > VARSIZE(b))
91  return 1;
92  else if (a->size < b->size)
93  return -1;
94  else if (a->size > b->size)
95  return 1;
96  else
97  {
98  WordEntry *aptr = ARRPTR(a);
99  WordEntry *bptr = ARRPTR(b);
100  int i = 0;
101  int res;
102 
103 
104  for (i = 0; i < a->size; i++)
105  {
106  if (aptr->haspos != bptr->haspos)
107  {
108  return (aptr->haspos > bptr->haspos) ? -1 : 1;
109  }
110  else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
111  {
112  return res;
113  }
114  else if (aptr->haspos)
115  {
116  WordEntryPos *ap = POSDATAPTR(a, aptr);
117  WordEntryPos *bp = POSDATAPTR(b, bptr);
118  int j;
119 
120  if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
121  return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
122 
123  for (j = 0; j < POSDATALEN(a, aptr); j++)
124  {
125  if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
126  {
127  return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
128  }
129  else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
130  {
131  return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
132  }
133  ap++, bp++;
134  }
135  }
136 
137  aptr++;
138  bptr++;
139  }
140  }
141 
142  return 0;
143 }
144 
145 #define TSVECTORCMPFUNC( type, action, ret ) \
146 Datum \
147 tsvector_##type(PG_FUNCTION_ARGS) \
148 { \
149  TSVector a = PG_GETARG_TSVECTOR(0); \
150  TSVector b = PG_GETARG_TSVECTOR(1); \
151  int res = silly_cmp_tsvector(a, b); \
152  PG_FREE_IF_COPY(a,0); \
153  PG_FREE_IF_COPY(b,1); \
154  PG_RETURN_##ret( res action 0 ); \
155 } \
156 /* keep compiler quiet - no extra ; */ \
157 extern int no_such_variable
158 
159 TSVECTORCMPFUNC(lt, <, BOOL);
160 TSVECTORCMPFUNC(le, <=, BOOL);
161 TSVECTORCMPFUNC(eq, ==, BOOL);
162 TSVECTORCMPFUNC(ge, >=, BOOL);
163 TSVECTORCMPFUNC(gt, >, BOOL);
164 TSVECTORCMPFUNC(ne, !=, BOOL);
165 TSVECTORCMPFUNC(cmp, +, INT32);
166 
167 Datum
169 {
171  TSVector out;
172  int i,
173  len = 0;
174  WordEntry *arrin = ARRPTR(in),
175  *arrout;
176  char *cur;
177 
178  for (i = 0; i < in->size; i++)
179  len += arrin[i].len;
180 
181  len = CALCDATASIZE(in->size, len);
182  out = (TSVector) palloc0(len);
183  SET_VARSIZE(out, len);
184  out->size = in->size;
185  arrout = ARRPTR(out);
186  cur = STRPTR(out);
187  for (i = 0; i < in->size; i++)
188  {
189  memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
190  arrout[i].haspos = 0;
191  arrout[i].len = arrin[i].len;
192  arrout[i].pos = cur - STRPTR(out);
193  cur += arrout[i].len;
194  }
195 
196  PG_FREE_IF_COPY(in, 0);
197  PG_RETURN_POINTER(out);
198 }
199 
200 Datum
202 {
204  int32 ret = in->size;
205 
206  PG_FREE_IF_COPY(in, 0);
207  PG_RETURN_INT32(ret);
208 }
209 
210 Datum
212 {
214  char cw = PG_GETARG_CHAR(1);
215  TSVector out;
216  int i,
217  j;
218  WordEntry *entry;
219  WordEntryPos *p;
220  int w = 0;
221 
222  switch (cw)
223  {
224  case 'A':
225  case 'a':
226  w = 3;
227  break;
228  case 'B':
229  case 'b':
230  w = 2;
231  break;
232  case 'C':
233  case 'c':
234  w = 1;
235  break;
236  case 'D':
237  case 'd':
238  w = 0;
239  break;
240  default:
241  /* internal error */
242  elog(ERROR, "unrecognized weight: %d", cw);
243  }
244 
245  out = (TSVector) palloc(VARSIZE(in));
246  memcpy(out, in, VARSIZE(in));
247  entry = ARRPTR(out);
248  i = out->size;
249  while (i--)
250  {
251  if ((j = POSDATALEN(out, entry)) != 0)
252  {
253  p = POSDATAPTR(out, entry);
254  while (j--)
255  {
256  WEP_SETWEIGHT(*p, w);
257  p++;
258  }
259  }
260  entry++;
261  }
262 
263  PG_FREE_IF_COPY(in, 0);
264  PG_RETURN_POINTER(out);
265 }
266 
267 /*
268  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
269  *
270  * Assign weight w to elements of tsin that are listed in lexemes.
271  */
272 Datum
274 {
275  TSVector tsin = PG_GETARG_TSVECTOR(0);
276  char char_weight = PG_GETARG_CHAR(1);
277  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
278 
279  TSVector tsout;
280  int i,
281  j,
282  nlexemes,
283  weight;
284  WordEntry *entry;
285  Datum *dlexemes;
286  bool *nulls;
287 
288  switch (char_weight)
289  {
290  case 'A':
291  case 'a':
292  weight = 3;
293  break;
294  case 'B':
295  case 'b':
296  weight = 2;
297  break;
298  case 'C':
299  case 'c':
300  weight = 1;
301  break;
302  case 'D':
303  case 'd':
304  weight = 0;
305  break;
306  default:
307  /* internal error */
308  elog(ERROR, "unrecognized weight: %c", char_weight);
309  }
310 
311  tsout = (TSVector) palloc(VARSIZE(tsin));
312  memcpy(tsout, tsin, VARSIZE(tsin));
313  entry = ARRPTR(tsout);
314 
315  deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
316 
317  /*
318  * Assuming that lexemes array is significantly shorter than tsvector we
319  * can iterate through lexemes performing binary search of each lexeme
320  * from lexemes in tsvector.
321  */
322  for (i = 0; i < nlexemes; i++)
323  {
324  char *lex;
325  int lex_len,
326  lex_pos;
327 
328  /* Ignore null array elements, they surely don't match */
329  if (nulls[i])
330  continue;
331 
332  lex = VARDATA(dlexemes[i]);
333  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
334  lex_pos = tsvector_bsearch(tsout, lex, lex_len);
335 
336  if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
337  {
338  WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
339 
340  while (j--)
341  {
342  WEP_SETWEIGHT(*p, weight);
343  p++;
344  }
345  }
346  }
347 
348  PG_FREE_IF_COPY(tsin, 0);
349  PG_FREE_IF_COPY(lexemes, 2);
350 
351  PG_RETURN_POINTER(tsout);
352 }
353 
354 #define compareEntry(pa, a, pb, b) \
355  tsCompareString((pa) + (a)->pos, (a)->len, \
356  (pb) + (b)->pos, (b)->len, \
357  false)
358 
359 /*
360  * Add positions from src to dest after offsetting them by maxpos.
361  * Return the number added (might be less than expected due to overflow)
362  */
363 static int32
365  TSVector dest, WordEntry *destptr,
366  int32 maxpos)
367 {
368  uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
369  int i;
370  uint16 slen = POSDATALEN(src, srcptr),
371  startlen;
372  WordEntryPos *spos = POSDATAPTR(src, srcptr),
373  *dpos = POSDATAPTR(dest, destptr);
374 
375  if (!destptr->haspos)
376  *clen = 0;
377 
378  startlen = *clen;
379  for (i = 0;
380  i < slen && *clen < MAXNUMPOS &&
381  (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
382  i++)
383  {
384  WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
385  WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
386  (*clen)++;
387  }
388 
389  if (*clen != startlen)
390  destptr->haspos = 1;
391  return *clen - startlen;
392 }
393 
394 /*
395  * Perform binary search of given lexeme in TSVector.
396  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
397  * found.
398  */
399 static int
400 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
401 {
402  WordEntry *arrin = ARRPTR(tsv);
403  int StopLow = 0,
404  StopHigh = tsv->size,
405  StopMiddle,
406  cmp;
407 
408  while (StopLow < StopHigh)
409  {
410  StopMiddle = (StopLow + StopHigh) / 2;
411 
412  cmp = tsCompareString(lexeme, lexeme_len,
413  STRPTR(tsv) + arrin[StopMiddle].pos,
414  arrin[StopMiddle].len,
415  false);
416 
417  if (cmp < 0)
418  StopHigh = StopMiddle;
419  else if (cmp > 0)
420  StopLow = StopMiddle + 1;
421  else /* found it */
422  return StopMiddle;
423  }
424 
425  return -1;
426 }
427 
428 /*
429  * qsort comparator functions
430  */
431 
432 static int
433 compare_int(const void *va, const void *vb)
434 {
435  int a = *((const int *) va);
436  int b = *((const int *) vb);
437 
438  if (a == b)
439  return 0;
440  return (a > b) ? 1 : -1;
441 }
442 
443 static int
444 compare_text_lexemes(const void *va, const void *vb)
445 {
446  Datum a = *((const Datum *) va);
447  Datum b = *((const Datum *) vb);
448  char *alex = VARDATA_ANY(a);
449  int alex_len = VARSIZE_ANY_EXHDR(a);
450  char *blex = VARDATA_ANY(b);
451  int blex_len = VARSIZE_ANY_EXHDR(b);
452 
453  return tsCompareString(alex, alex_len, blex, blex_len, false);
454 }
455 
456 /*
457  * Internal routine to delete lexemes from TSVector by array of offsets.
458  *
459  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
460  * int indices_count -- size of that array
461  *
462  * Returns new TSVector without given lexemes along with their positions
463  * and weights.
464  */
465 static TSVector
466 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
467  int indices_count)
468 {
469  TSVector tsout;
470  WordEntry *arrin = ARRPTR(tsv),
471  *arrout;
472  char *data = STRPTR(tsv),
473  *dataout;
474  int i, /* index in arrin */
475  j, /* index in arrout */
476  k, /* index in indices_to_delete */
477  curoff; /* index in dataout area */
478 
479  /*
480  * Sort the filter array to simplify membership checks below. Also, get
481  * rid of any duplicate entries, so that we can assume that indices_count
482  * is exactly equal to the number of lexemes that will be removed.
483  */
484  if (indices_count > 1)
485  {
486  qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
487  indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
488  compare_int);
489  }
490 
491  /*
492  * Here we overestimate tsout size, since we don't know how much space is
493  * used by the deleted lexeme(s). We will set exact size below.
494  */
495  tsout = (TSVector) palloc0(VARSIZE(tsv));
496 
497  /* This count must be correct because STRPTR(tsout) relies on it. */
498  tsout->size = tsv->size - indices_count;
499 
500  /*
501  * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
502  */
503  arrout = ARRPTR(tsout);
504  dataout = STRPTR(tsout);
505  curoff = 0;
506  for (i = j = k = 0; i < tsv->size; i++)
507  {
508  /*
509  * If current i is present in indices_to_delete, skip this lexeme.
510  * Since indices_to_delete is already sorted, we only need to check
511  * the current (k'th) entry.
512  */
513  if (k < indices_count && i == indices_to_delete[k])
514  {
515  k++;
516  continue;
517  }
518 
519  /* Copy lexeme and its positions and weights */
520  memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
521  arrout[j].haspos = arrin[i].haspos;
522  arrout[j].len = arrin[i].len;
523  arrout[j].pos = curoff;
524  curoff += arrin[i].len;
525  if (arrin[i].haspos)
526  {
527  int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
528  + sizeof(uint16);
529 
530  curoff = SHORTALIGN(curoff);
531  memcpy(dataout + curoff,
532  STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
533  len);
534  curoff += len;
535  }
536 
537  j++;
538  }
539 
540  /*
541  * k should now be exactly equal to indices_count. If it isn't then the
542  * caller provided us with indices outside of [0, tsv->size) range and
543  * estimation of tsout's size is wrong.
544  */
545  Assert(k == indices_count);
546 
547  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
548  return tsout;
549 }
550 
551 /*
552  * Delete given lexeme from tsvector.
553  * Implementation of user-level ts_delete(tsvector, text).
554  */
555 Datum
557 {
558  TSVector tsin = PG_GETARG_TSVECTOR(0),
559  tsout;
560  text *tlexeme = PG_GETARG_TEXT_PP(1);
561  char *lexeme = VARDATA_ANY(tlexeme);
562  int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
563  skip_index;
564 
565  if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
566  PG_RETURN_POINTER(tsin);
567 
568  tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
569 
570  PG_FREE_IF_COPY(tsin, 0);
571  PG_FREE_IF_COPY(tlexeme, 1);
572  PG_RETURN_POINTER(tsout);
573 }
574 
575 /*
576  * Delete given array of lexemes from tsvector.
577  * Implementation of user-level ts_delete(tsvector, text[]).
578  */
579 Datum
581 {
582  TSVector tsin = PG_GETARG_TSVECTOR(0),
583  tsout;
584  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
585  int i,
586  nlex,
587  skip_count,
588  *skip_indices;
589  Datum *dlexemes;
590  bool *nulls;
591 
592  deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
593 
594  /*
595  * In typical use case array of lexemes to delete is relatively small. So
596  * here we optimize things for that scenario: iterate through lexarr
597  * performing binary search of each lexeme from lexarr in tsvector.
598  */
599  skip_indices = palloc0(nlex * sizeof(int));
600  for (i = skip_count = 0; i < nlex; i++)
601  {
602  char *lex;
603  int lex_len,
604  lex_pos;
605 
606  /* Ignore null array elements, they surely don't match */
607  if (nulls[i])
608  continue;
609 
610  lex = VARDATA(dlexemes[i]);
611  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
612  lex_pos = tsvector_bsearch(tsin, lex, lex_len);
613 
614  if (lex_pos >= 0)
615  skip_indices[skip_count++] = lex_pos;
616  }
617 
618  tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
619 
620  pfree(skip_indices);
621  PG_FREE_IF_COPY(tsin, 0);
622  PG_FREE_IF_COPY(lexemes, 1);
623 
624  PG_RETURN_POINTER(tsout);
625 }
626 
627 /*
628  * Expand tsvector as table with following columns:
629  * lexeme: lexeme text
630  * positions: integer array of lexeme positions
631  * weights: char array of weights corresponding to positions
632  */
633 Datum
635 {
636  FuncCallContext *funcctx;
637  TSVector tsin;
638 
639  if (SRF_IS_FIRSTCALL())
640  {
641  MemoryContext oldcontext;
642  TupleDesc tupdesc;
643 
644  funcctx = SRF_FIRSTCALL_INIT();
645  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
646 
647  tupdesc = CreateTemplateTupleDesc(3);
648  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
649  TEXTOID, -1, 0);
650  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
651  INT2ARRAYOID, -1, 0);
652  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
653  TEXTARRAYOID, -1, 0);
654  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
655  elog(ERROR, "return type must be a row type");
656  funcctx->tuple_desc = tupdesc;
657 
658  funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
659 
660  MemoryContextSwitchTo(oldcontext);
661  }
662 
663  funcctx = SRF_PERCALL_SETUP();
664  tsin = (TSVector) funcctx->user_fctx;
665 
666  if (funcctx->call_cntr < tsin->size)
667  {
668  WordEntry *arrin = ARRPTR(tsin);
669  char *data = STRPTR(tsin);
670  HeapTuple tuple;
671  int j,
672  i = funcctx->call_cntr;
673  bool nulls[] = {false, false, false};
674  Datum values[3];
675 
676  values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
677 
678  if (arrin[i].haspos)
679  {
680  WordEntryPosVector *posv;
681  Datum *positions;
682  Datum *weights;
683  char weight;
684 
685  /*
686  * Internally tsvector stores position and weight in the same
687  * uint16 (2 bits for weight, 14 for position). Here we extract
688  * that in two separate arrays.
689  */
690  posv = _POSVECPTR(tsin, arrin + i);
691  positions = palloc(posv->npos * sizeof(Datum));
692  weights = palloc(posv->npos * sizeof(Datum));
693  for (j = 0; j < posv->npos; j++)
694  {
695  positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
696  weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
698  1));
699  }
700 
701  values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
703  }
704  else
705  {
706  nulls[1] = nulls[2] = true;
707  }
708 
709  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
710  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
711  }
712  else
713  {
714  SRF_RETURN_DONE(funcctx);
715  }
716 }
717 
718 /*
719  * Convert tsvector to array of lexemes.
720  */
721 Datum
723 {
724  TSVector tsin = PG_GETARG_TSVECTOR(0);
725  WordEntry *arrin = ARRPTR(tsin);
726  Datum *elements;
727  int i;
728  ArrayType *array;
729 
730  elements = palloc(tsin->size * sizeof(Datum));
731 
732  for (i = 0; i < tsin->size; i++)
733  {
734  elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
735  arrin[i].len));
736  }
737 
738  array = construct_array_builtin(elements, tsin->size, TEXTOID);
739 
740  pfree(elements);
741  PG_FREE_IF_COPY(tsin, 0);
742  PG_RETURN_POINTER(array);
743 }
744 
745 /*
746  * Build tsvector from array of lexemes.
747  */
748 Datum
750 {
752  TSVector tsout;
753  Datum *dlexemes;
754  WordEntry *arrout;
755  bool *nulls;
756  int nitems,
757  i,
758  tslen,
759  datalen = 0;
760  char *cur;
761 
762  deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
763 
764  /*
765  * Reject nulls and zero length strings (maybe we should just ignore them,
766  * instead?)
767  */
768  for (i = 0; i < nitems; i++)
769  {
770  if (nulls[i])
771  ereport(ERROR,
772  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
773  errmsg("lexeme array may not contain nulls")));
774 
775  if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
776  ereport(ERROR,
777  (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
778  errmsg("lexeme array may not contain empty strings")));
779  }
780 
781  /* Sort and de-dup, because this is required for a valid tsvector. */
782  if (nitems > 1)
783  {
784  qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
785  nitems = qunique(dlexemes, nitems, sizeof(Datum),
787  }
788 
789  /* Calculate space needed for surviving lexemes. */
790  for (i = 0; i < nitems; i++)
791  datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
792  tslen = CALCDATASIZE(nitems, datalen);
793 
794  /* Allocate and fill tsvector. */
795  tsout = (TSVector) palloc0(tslen);
796  SET_VARSIZE(tsout, tslen);
797  tsout->size = nitems;
798 
799  arrout = ARRPTR(tsout);
800  cur = STRPTR(tsout);
801  for (i = 0; i < nitems; i++)
802  {
803  char *lex = VARDATA(dlexemes[i]);
804  int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
805 
806  memcpy(cur, lex, lex_len);
807  arrout[i].haspos = 0;
808  arrout[i].len = lex_len;
809  arrout[i].pos = cur - STRPTR(tsout);
810  cur += lex_len;
811  }
812 
813  PG_FREE_IF_COPY(v, 0);
814  PG_RETURN_POINTER(tsout);
815 }
816 
817 /*
818  * ts_filter(): keep only lexemes with given weights in tsvector.
819  */
820 Datum
822 {
823  TSVector tsin = PG_GETARG_TSVECTOR(0),
824  tsout;
826  WordEntry *arrin = ARRPTR(tsin),
827  *arrout;
828  char *datain = STRPTR(tsin),
829  *dataout;
830  Datum *dweights;
831  bool *nulls;
832  int nweights;
833  int i,
834  j;
835  int cur_pos = 0;
836  char mask = 0;
837 
838  deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
839 
840  for (i = 0; i < nweights; i++)
841  {
842  char char_weight;
843 
844  if (nulls[i])
845  ereport(ERROR,
846  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
847  errmsg("weight array may not contain nulls")));
848 
849  char_weight = DatumGetChar(dweights[i]);
850  switch (char_weight)
851  {
852  case 'A':
853  case 'a':
854  mask = mask | 8;
855  break;
856  case 'B':
857  case 'b':
858  mask = mask | 4;
859  break;
860  case 'C':
861  case 'c':
862  mask = mask | 2;
863  break;
864  case 'D':
865  case 'd':
866  mask = mask | 1;
867  break;
868  default:
869  ereport(ERROR,
870  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
871  errmsg("unrecognized weight: \"%c\"", char_weight)));
872  }
873  }
874 
875  tsout = (TSVector) palloc0(VARSIZE(tsin));
876  tsout->size = tsin->size;
877  arrout = ARRPTR(tsout);
878  dataout = STRPTR(tsout);
879 
880  for (i = j = 0; i < tsin->size; i++)
881  {
882  WordEntryPosVector *posvin,
883  *posvout;
884  int npos = 0;
885  int k;
886 
887  if (!arrin[i].haspos)
888  continue;
889 
890  posvin = _POSVECPTR(tsin, arrin + i);
891  posvout = (WordEntryPosVector *)
892  (dataout + SHORTALIGN(cur_pos + arrin[i].len));
893 
894  for (k = 0; k < posvin->npos; k++)
895  {
896  if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
897  posvout->pos[npos++] = posvin->pos[k];
898  }
899 
900  /* if no satisfactory positions found, skip lexeme */
901  if (!npos)
902  continue;
903 
904  arrout[j].haspos = true;
905  arrout[j].len = arrin[i].len;
906  arrout[j].pos = cur_pos;
907 
908  memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
909  posvout->npos = npos;
910  cur_pos += SHORTALIGN(arrin[i].len);
911  cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
912  sizeof(uint16);
913  j++;
914  }
915 
916  tsout->size = j;
917  if (dataout != STRPTR(tsout))
918  memmove(STRPTR(tsout), dataout, cur_pos);
919 
920  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
921 
922  PG_FREE_IF_COPY(tsin, 0);
923  PG_RETURN_POINTER(tsout);
924 }
925 
926 Datum
928 {
929  TSVector in1 = PG_GETARG_TSVECTOR(0);
930  TSVector in2 = PG_GETARG_TSVECTOR(1);
931  TSVector out;
932  WordEntry *ptr;
933  WordEntry *ptr1,
934  *ptr2;
935  WordEntryPos *p;
936  int maxpos = 0,
937  i,
938  j,
939  i1,
940  i2,
941  dataoff,
942  output_bytes,
943  output_size;
944  char *data,
945  *data1,
946  *data2;
947 
948  /* Get max position in in1; we'll need this to offset in2's positions */
949  ptr = ARRPTR(in1);
950  i = in1->size;
951  while (i--)
952  {
953  if ((j = POSDATALEN(in1, ptr)) != 0)
954  {
955  p = POSDATAPTR(in1, ptr);
956  while (j--)
957  {
958  if (WEP_GETPOS(*p) > maxpos)
959  maxpos = WEP_GETPOS(*p);
960  p++;
961  }
962  }
963  ptr++;
964  }
965 
966  ptr1 = ARRPTR(in1);
967  ptr2 = ARRPTR(in2);
968  data1 = STRPTR(in1);
969  data2 = STRPTR(in2);
970  i1 = in1->size;
971  i2 = in2->size;
972 
973  /*
974  * Conservative estimate of space needed. We might need all the data in
975  * both inputs, and conceivably add a pad byte before position data for
976  * each item where there was none before.
977  */
978  output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
979 
980  out = (TSVector) palloc0(output_bytes);
981  SET_VARSIZE(out, output_bytes);
982 
983  /*
984  * We must make out->size valid so that STRPTR(out) is sensible. We'll
985  * collapse out any unused space at the end.
986  */
987  out->size = in1->size + in2->size;
988 
989  ptr = ARRPTR(out);
990  data = STRPTR(out);
991  dataoff = 0;
992  while (i1 && i2)
993  {
994  int cmp = compareEntry(data1, ptr1, data2, ptr2);
995 
996  if (cmp < 0)
997  { /* in1 first */
998  ptr->haspos = ptr1->haspos;
999  ptr->len = ptr1->len;
1000  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1001  ptr->pos = dataoff;
1002  dataoff += ptr1->len;
1003  if (ptr->haspos)
1004  {
1005  dataoff = SHORTALIGN(dataoff);
1006  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1007  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1008  }
1009 
1010  ptr++;
1011  ptr1++;
1012  i1--;
1013  }
1014  else if (cmp > 0)
1015  { /* in2 first */
1016  ptr->haspos = ptr2->haspos;
1017  ptr->len = ptr2->len;
1018  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1019  ptr->pos = dataoff;
1020  dataoff += ptr2->len;
1021  if (ptr->haspos)
1022  {
1023  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1024 
1025  if (addlen == 0)
1026  ptr->haspos = 0;
1027  else
1028  {
1029  dataoff = SHORTALIGN(dataoff);
1030  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1031  }
1032  }
1033 
1034  ptr++;
1035  ptr2++;
1036  i2--;
1037  }
1038  else
1039  {
1040  ptr->haspos = ptr1->haspos | ptr2->haspos;
1041  ptr->len = ptr1->len;
1042  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1043  ptr->pos = dataoff;
1044  dataoff += ptr1->len;
1045  if (ptr->haspos)
1046  {
1047  if (ptr1->haspos)
1048  {
1049  dataoff = SHORTALIGN(dataoff);
1050  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1051  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1052  if (ptr2->haspos)
1053  dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1054  }
1055  else /* must have ptr2->haspos */
1056  {
1057  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1058 
1059  if (addlen == 0)
1060  ptr->haspos = 0;
1061  else
1062  {
1063  dataoff = SHORTALIGN(dataoff);
1064  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1065  }
1066  }
1067  }
1068 
1069  ptr++;
1070  ptr1++;
1071  ptr2++;
1072  i1--;
1073  i2--;
1074  }
1075  }
1076 
1077  while (i1)
1078  {
1079  ptr->haspos = ptr1->haspos;
1080  ptr->len = ptr1->len;
1081  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1082  ptr->pos = dataoff;
1083  dataoff += ptr1->len;
1084  if (ptr->haspos)
1085  {
1086  dataoff = SHORTALIGN(dataoff);
1087  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1088  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1089  }
1090 
1091  ptr++;
1092  ptr1++;
1093  i1--;
1094  }
1095 
1096  while (i2)
1097  {
1098  ptr->haspos = ptr2->haspos;
1099  ptr->len = ptr2->len;
1100  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1101  ptr->pos = dataoff;
1102  dataoff += ptr2->len;
1103  if (ptr->haspos)
1104  {
1105  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1106 
1107  if (addlen == 0)
1108  ptr->haspos = 0;
1109  else
1110  {
1111  dataoff = SHORTALIGN(dataoff);
1112  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1113  }
1114  }
1115 
1116  ptr++;
1117  ptr2++;
1118  i2--;
1119  }
1120 
1121  /*
1122  * Instead of checking each offset individually, we check for overflow of
1123  * pos fields once at the end.
1124  */
1125  if (dataoff > MAXSTRPOS)
1126  ereport(ERROR,
1127  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1128  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1129 
1130  /*
1131  * Adjust sizes (asserting that we didn't overrun the original estimates)
1132  * and collapse out any unused array entries.
1133  */
1134  output_size = ptr - ARRPTR(out);
1135  Assert(output_size <= out->size);
1136  out->size = output_size;
1137  if (data != STRPTR(out))
1138  memmove(STRPTR(out), data, dataoff);
1139  output_bytes = CALCDATASIZE(out->size, dataoff);
1140  Assert(output_bytes <= VARSIZE(out));
1141  SET_VARSIZE(out, output_bytes);
1142 
1143  PG_FREE_IF_COPY(in1, 0);
1144  PG_FREE_IF_COPY(in2, 1);
1145  PG_RETURN_POINTER(out);
1146 }
1147 
1148 /*
1149  * Compare two strings by tsvector rules.
1150  *
1151  * if prefix = true then it returns zero value iff b has prefix a
1152  */
1153 int32
1154 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1155 {
1156  int cmp;
1157 
1158  if (lena == 0)
1159  {
1160  if (prefix)
1161  cmp = 0; /* empty string is prefix of anything */
1162  else
1163  cmp = (lenb > 0) ? -1 : 0;
1164  }
1165  else if (lenb == 0)
1166  {
1167  cmp = (lena > 0) ? 1 : 0;
1168  }
1169  else
1170  {
1171  cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1172 
1173  if (prefix)
1174  {
1175  if (cmp == 0 && lena > lenb)
1176  cmp = 1; /* a is longer, so not a prefix of b */
1177  }
1178  else if (cmp == 0 && lena != lenb)
1179  {
1180  cmp = (lena < lenb) ? -1 : 1;
1181  }
1182  }
1183 
1184  return cmp;
1185 }
1186 
1187 /*
1188  * Check weight info or/and fill 'data' with the required positions
1189  */
1190 static TSTernaryValue
1193 {
1194  TSTernaryValue result = TS_NO;
1195 
1196  Assert(data == NULL || data->npos == 0);
1197 
1198  if (entry->haspos)
1199  {
1200  WordEntryPosVector *posvec;
1201 
1202  /*
1203  * We can't use the _POSVECPTR macro here because the pointer to the
1204  * tsvector's lexeme storage is already contained in chkval->values.
1205  */
1206  posvec = (WordEntryPosVector *)
1207  (chkval->values + SHORTALIGN(entry->pos + entry->len));
1208 
1209  if (val->weight && data)
1210  {
1211  WordEntryPos *posvec_iter = posvec->pos;
1212  WordEntryPos *dptr;
1213 
1214  /*
1215  * Filter position information by weights
1216  */
1217  dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1218  data->allocated = true;
1219 
1220  /* Is there a position with a matching weight? */
1221  while (posvec_iter < posvec->pos + posvec->npos)
1222  {
1223  /* If true, append this position to the data->pos */
1224  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1225  {
1226  *dptr = WEP_GETPOS(*posvec_iter);
1227  dptr++;
1228  }
1229 
1230  posvec_iter++;
1231  }
1232 
1233  data->npos = dptr - data->pos;
1234 
1235  if (data->npos > 0)
1236  result = TS_YES;
1237  else
1238  {
1239  pfree(data->pos);
1240  data->pos = NULL;
1241  data->allocated = false;
1242  }
1243  }
1244  else if (val->weight)
1245  {
1246  WordEntryPos *posvec_iter = posvec->pos;
1247 
1248  /* Is there a position with a matching weight? */
1249  while (posvec_iter < posvec->pos + posvec->npos)
1250  {
1251  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1252  {
1253  result = TS_YES;
1254  break; /* no need to go further */
1255  }
1256 
1257  posvec_iter++;
1258  }
1259  }
1260  else if (data)
1261  {
1262  data->npos = posvec->npos;
1263  data->pos = posvec->pos;
1264  data->allocated = false;
1265  result = TS_YES;
1266  }
1267  else
1268  {
1269  /* simplest case: no weight check, positions not needed */
1270  result = TS_YES;
1271  }
1272  }
1273  else
1274  {
1275  /*
1276  * Position info is lacking, so if the caller requires it, we can only
1277  * say that maybe there is a match.
1278  *
1279  * Notice, however, that we *don't* check val->weight here.
1280  * Historically, stripped tsvectors are considered to match queries
1281  * whether or not the query has a weight restriction; that's a little
1282  * dubious but we'll preserve the behavior.
1283  */
1284  if (data)
1285  result = TS_MAYBE;
1286  else
1287  result = TS_YES;
1288  }
1289 
1290  return result;
1291 }
1292 
1293 /*
1294  * TS_execute callback for matching a tsquery operand to plain tsvector data
1295  */
1296 static TSTernaryValue
1298 {
1299  CHKVAL *chkval = (CHKVAL *) checkval;
1300  WordEntry *StopLow = chkval->arrb;
1301  WordEntry *StopHigh = chkval->arre;
1302  WordEntry *StopMiddle = StopHigh;
1304 
1305  /* Loop invariant: StopLow <= val < StopHigh */
1306  while (StopLow < StopHigh)
1307  {
1308  int difference;
1309 
1310  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1311  difference = tsCompareString(chkval->operand + val->distance,
1312  val->length,
1313  chkval->values + StopMiddle->pos,
1314  StopMiddle->len,
1315  false);
1316 
1317  if (difference == 0)
1318  {
1319  /* Check weight info & fill 'data' with positions */
1320  res = checkclass_str(chkval, StopMiddle, val, data);
1321  break;
1322  }
1323  else if (difference > 0)
1324  StopLow = StopMiddle + 1;
1325  else
1326  StopHigh = StopMiddle;
1327  }
1328 
1329  /*
1330  * If it's a prefix search, we should also consider lexemes that the
1331  * search term is a prefix of (which will necessarily immediately follow
1332  * the place we found in the above loop). But we can skip them if there
1333  * was a definite match on the exact term AND the caller doesn't need
1334  * position info.
1335  */
1336  if (val->prefix && (res != TS_YES || data))
1337  {
1338  WordEntryPos *allpos = NULL;
1339  int npos = 0,
1340  totalpos = 0;
1341 
1342  /* adjust start position for corner case */
1343  if (StopLow >= StopHigh)
1344  StopMiddle = StopHigh;
1345 
1346  /* we don't try to re-use any data from the initial match */
1347  if (data)
1348  {
1349  if (data->allocated)
1350  pfree(data->pos);
1351  data->pos = NULL;
1352  data->allocated = false;
1353  data->npos = 0;
1354  }
1355  res = TS_NO;
1356 
1357  while ((res != TS_YES || data) &&
1358  StopMiddle < chkval->arre &&
1359  tsCompareString(chkval->operand + val->distance,
1360  val->length,
1361  chkval->values + StopMiddle->pos,
1362  StopMiddle->len,
1363  true) == 0)
1364  {
1365  TSTernaryValue subres;
1366 
1367  subres = checkclass_str(chkval, StopMiddle, val, data);
1368 
1369  if (subres != TS_NO)
1370  {
1371  if (data)
1372  {
1373  /*
1374  * We need to join position information
1375  */
1376  if (subres == TS_MAYBE)
1377  {
1378  /*
1379  * No position info for this match, so we must report
1380  * MAYBE overall.
1381  */
1382  res = TS_MAYBE;
1383  /* forget any previous positions */
1384  npos = 0;
1385  /* don't leak storage */
1386  if (allpos)
1387  pfree(allpos);
1388  break;
1389  }
1390 
1391  while (npos + data->npos > totalpos)
1392  {
1393  if (totalpos == 0)
1394  {
1395  totalpos = 256;
1396  allpos = palloc(sizeof(WordEntryPos) * totalpos);
1397  }
1398  else
1399  {
1400  totalpos *= 2;
1401  allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1402  }
1403  }
1404 
1405  memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1406  npos += data->npos;
1407 
1408  /* don't leak storage from individual matches */
1409  if (data->allocated)
1410  pfree(data->pos);
1411  data->pos = NULL;
1412  data->allocated = false;
1413  /* it's important to reset data->npos before next loop */
1414  data->npos = 0;
1415  }
1416  else
1417  {
1418  /* Don't need positions, just handle YES/MAYBE */
1419  if (subres == TS_YES || res == TS_NO)
1420  res = subres;
1421  }
1422  }
1423 
1424  StopMiddle++;
1425  }
1426 
1427  if (data && npos > 0)
1428  {
1429  /* Sort and make unique array of found positions */
1430  data->pos = allpos;
1431  qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1432  data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1434  data->allocated = true;
1435  res = TS_YES;
1436  }
1437  }
1438 
1439  return res;
1440 }
1441 
1442 /*
1443  * Compute output position list for a tsquery operator in phrase mode.
1444  *
1445  * Merge the position lists in Ldata and Rdata as specified by "emit",
1446  * returning the result list into *data. The input position lists must be
1447  * sorted and unique, and the output will be as well.
1448  *
1449  * data: pointer to initially-all-zeroes output struct, or NULL
1450  * Ldata, Rdata: input position lists
1451  * emit: bitmask of TSPO_XXX flags
1452  * Loffset: offset to be added to Ldata positions before comparing/outputting
1453  * Roffset: offset to be added to Rdata positions before comparing/outputting
1454  * max_npos: maximum possible required size of output position array
1455  *
1456  * Loffset and Roffset should not be negative, else we risk trying to output
1457  * negative positions, which won't fit into WordEntryPos.
1458  *
1459  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1460  * we return it as TSTernaryValue.
1461  *
1462  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1463  * returns TS_YES if any positions would have been emitted.
1464  */
1465 #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1466 #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1467 #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1468 
1469 static TSTernaryValue
1471  ExecPhraseData *Ldata,
1472  ExecPhraseData *Rdata,
1473  int emit,
1474  int Loffset,
1475  int Roffset,
1476  int max_npos)
1477 {
1478  int Lindex,
1479  Rindex;
1480 
1481  /* Loop until both inputs are exhausted */
1482  Lindex = Rindex = 0;
1483  while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1484  {
1485  int Lpos,
1486  Rpos;
1487  int output_pos = 0;
1488 
1489  /*
1490  * Fetch current values to compare. WEP_GETPOS() is needed because
1491  * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1492  */
1493  if (Lindex < Ldata->npos)
1494  Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1495  else
1496  {
1497  /* L array exhausted, so we're done if R_ONLY isn't set */
1498  if (!(emit & TSPO_R_ONLY))
1499  break;
1500  Lpos = INT_MAX;
1501  }
1502  if (Rindex < Rdata->npos)
1503  Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1504  else
1505  {
1506  /* R array exhausted, so we're done if L_ONLY isn't set */
1507  if (!(emit & TSPO_L_ONLY))
1508  break;
1509  Rpos = INT_MAX;
1510  }
1511 
1512  /* Merge-join the two input lists */
1513  if (Lpos < Rpos)
1514  {
1515  /* Lpos is not matched in Rdata, should we output it? */
1516  if (emit & TSPO_L_ONLY)
1517  output_pos = Lpos;
1518  Lindex++;
1519  }
1520  else if (Lpos == Rpos)
1521  {
1522  /* Lpos and Rpos match ... should we output it? */
1523  if (emit & TSPO_BOTH)
1524  output_pos = Rpos;
1525  Lindex++;
1526  Rindex++;
1527  }
1528  else /* Lpos > Rpos */
1529  {
1530  /* Rpos is not matched in Ldata, should we output it? */
1531  if (emit & TSPO_R_ONLY)
1532  output_pos = Rpos;
1533  Rindex++;
1534  }
1535 
1536  if (output_pos > 0)
1537  {
1538  if (data)
1539  {
1540  /* Store position, first allocating output array if needed */
1541  if (data->pos == NULL)
1542  {
1543  data->pos = (WordEntryPos *)
1544  palloc(max_npos * sizeof(WordEntryPos));
1545  data->allocated = true;
1546  }
1547  data->pos[data->npos++] = output_pos;
1548  }
1549  else
1550  {
1551  /*
1552  * Exact positions not needed, so return TS_YES as soon as we
1553  * know there is at least one.
1554  */
1555  return TS_YES;
1556  }
1557  }
1558  }
1559 
1560  if (data && data->npos > 0)
1561  {
1562  /* Let's assert we didn't overrun the array */
1563  Assert(data->npos <= max_npos);
1564  return TS_YES;
1565  }
1566  return TS_NO;
1567 }
1568 
1569 /*
1570  * Execute tsquery at or below an OP_PHRASE operator.
1571  *
1572  * This handles tsquery execution at recursion levels where we need to care
1573  * about match locations.
1574  *
1575  * In addition to the same arguments used for TS_execute, the caller may pass
1576  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1577  * match position info on success. data == NULL if no position data need be
1578  * returned.
1579  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1580  * This is OK because an outside call always starts from an OP_PHRASE node,
1581  * and all internal recursion cases pass data != NULL.
1582  *
1583  * The detailed semantics of the match data, given that the function returned
1584  * TS_YES (successful match), are:
1585  *
1586  * npos > 0, negate = false:
1587  * query is matched at specified position(s) (and only those positions)
1588  * npos > 0, negate = true:
1589  * query is matched at all positions *except* specified position(s)
1590  * npos = 0, negate = true:
1591  * query is matched at all positions
1592  * npos = 0, negate = false:
1593  * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1594  *
1595  * Successful matches also return a "width" value which is the match width in
1596  * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1597  * and is the sum of the phrase operator distances for phrase matches. Note
1598  * that when width > 0, the listed positions represent the ends of matches not
1599  * the starts. (This unintuitive rule is needed to avoid possibly generating
1600  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1601  *
1602  * If the TSExecuteCallback function reports that an operand is present
1603  * but fails to provide position(s) for it, we will return TS_MAYBE when
1604  * it is possible but not certain that the query is matched.
1605  *
1606  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1607  * negate = false (which is the state initialized by the caller); but the
1608  * "width" output in such cases is undefined.
1609  */
1610 static TSTernaryValue
1611 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1612  TSExecuteCallback chkcond,
1614 {
1615  ExecPhraseData Ldata,
1616  Rdata;
1617  TSTernaryValue lmatch,
1618  rmatch;
1619  int Loffset,
1620  Roffset,
1621  maxwidth;
1622 
1623  /* since this function recurses, it could be driven to stack overflow */
1625 
1626  /* ... and let's check for query cancel while we're at it */
1628 
1629  if (curitem->type == QI_VAL)
1630  return chkcond(arg, (QueryOperand *) curitem, data);
1631 
1632  switch (curitem->qoperator.oper)
1633  {
1634  case OP_NOT:
1635 
1636  /*
1637  * We need not touch data->width, since a NOT operation does not
1638  * change the match width.
1639  */
1640  if (flags & TS_EXEC_SKIP_NOT)
1641  {
1642  /* with SKIP_NOT, report NOT as "match everywhere" */
1643  Assert(data->npos == 0 && !data->negate);
1644  data->negate = true;
1645  return TS_YES;
1646  }
1647  switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1648  {
1649  case TS_NO:
1650  /* change "match nowhere" to "match everywhere" */
1651  Assert(data->npos == 0 && !data->negate);
1652  data->negate = true;
1653  return TS_YES;
1654  case TS_YES:
1655  if (data->npos > 0)
1656  {
1657  /* we have some positions, invert negate flag */
1658  data->negate = !data->negate;
1659  return TS_YES;
1660  }
1661  else if (data->negate)
1662  {
1663  /* change "match everywhere" to "match nowhere" */
1664  data->negate = false;
1665  return TS_NO;
1666  }
1667  /* Should not get here if result was TS_YES */
1668  Assert(false);
1669  break;
1670  case TS_MAYBE:
1671  /* match positions are, and remain, uncertain */
1672  return TS_MAYBE;
1673  }
1674  break;
1675 
1676  case OP_PHRASE:
1677  case OP_AND:
1678  memset(&Ldata, 0, sizeof(Ldata));
1679  memset(&Rdata, 0, sizeof(Rdata));
1680 
1681  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1682  arg, flags, chkcond, &Ldata);
1683  if (lmatch == TS_NO)
1684  return TS_NO;
1685 
1686  rmatch = TS_phrase_execute(curitem + 1,
1687  arg, flags, chkcond, &Rdata);
1688  if (rmatch == TS_NO)
1689  return TS_NO;
1690 
1691  /*
1692  * If either operand has no position information, then we can't
1693  * return reliable position data, only a MAYBE result.
1694  */
1695  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1696  return TS_MAYBE;
1697 
1698  if (curitem->qoperator.oper == OP_PHRASE)
1699  {
1700  /*
1701  * Compute Loffset and Roffset suitable for phrase match, and
1702  * compute overall width of whole phrase match.
1703  */
1704  Loffset = curitem->qoperator.distance + Rdata.width;
1705  Roffset = 0;
1706  if (data)
1707  data->width = curitem->qoperator.distance +
1708  Ldata.width + Rdata.width;
1709  }
1710  else
1711  {
1712  /*
1713  * For OP_AND, set output width and alignment like OP_OR (see
1714  * comment below)
1715  */
1716  maxwidth = Max(Ldata.width, Rdata.width);
1717  Loffset = maxwidth - Ldata.width;
1718  Roffset = maxwidth - Rdata.width;
1719  if (data)
1720  data->width = maxwidth;
1721  }
1722 
1723  if (Ldata.negate && Rdata.negate)
1724  {
1725  /* !L & !R: treat as !(L | R) */
1726  (void) TS_phrase_output(data, &Ldata, &Rdata,
1728  Loffset, Roffset,
1729  Ldata.npos + Rdata.npos);
1730  if (data)
1731  data->negate = true;
1732  return TS_YES;
1733  }
1734  else if (Ldata.negate)
1735  {
1736  /* !L & R */
1737  return TS_phrase_output(data, &Ldata, &Rdata,
1738  TSPO_R_ONLY,
1739  Loffset, Roffset,
1740  Rdata.npos);
1741  }
1742  else if (Rdata.negate)
1743  {
1744  /* L & !R */
1745  return TS_phrase_output(data, &Ldata, &Rdata,
1746  TSPO_L_ONLY,
1747  Loffset, Roffset,
1748  Ldata.npos);
1749  }
1750  else
1751  {
1752  /* straight AND */
1753  return TS_phrase_output(data, &Ldata, &Rdata,
1754  TSPO_BOTH,
1755  Loffset, Roffset,
1756  Min(Ldata.npos, Rdata.npos));
1757  }
1758 
1759  case OP_OR:
1760  memset(&Ldata, 0, sizeof(Ldata));
1761  memset(&Rdata, 0, sizeof(Rdata));
1762 
1763  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1764  arg, flags, chkcond, &Ldata);
1765  rmatch = TS_phrase_execute(curitem + 1,
1766  arg, flags, chkcond, &Rdata);
1767 
1768  if (lmatch == TS_NO && rmatch == TS_NO)
1769  return TS_NO;
1770 
1771  /*
1772  * If either operand has no position information, then we can't
1773  * return reliable position data, only a MAYBE result.
1774  */
1775  if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1776  return TS_MAYBE;
1777 
1778  /*
1779  * Cope with undefined output width from failed submatch. (This
1780  * takes less code than trying to ensure that all failure returns
1781  * set data->width to zero.)
1782  */
1783  if (lmatch == TS_NO)
1784  Ldata.width = 0;
1785  if (rmatch == TS_NO)
1786  Rdata.width = 0;
1787 
1788  /*
1789  * For OP_AND and OP_OR, report the width of the wider of the two
1790  * inputs, and align the narrower input's positions to the right
1791  * end of that width. This rule deals at least somewhat
1792  * reasonably with cases like "x <-> (y | z <-> q)".
1793  */
1794  maxwidth = Max(Ldata.width, Rdata.width);
1795  Loffset = maxwidth - Ldata.width;
1796  Roffset = maxwidth - Rdata.width;
1797  data->width = maxwidth;
1798 
1799  if (Ldata.negate && Rdata.negate)
1800  {
1801  /* !L | !R: treat as !(L & R) */
1802  (void) TS_phrase_output(data, &Ldata, &Rdata,
1803  TSPO_BOTH,
1804  Loffset, Roffset,
1805  Min(Ldata.npos, Rdata.npos));
1806  data->negate = true;
1807  return TS_YES;
1808  }
1809  else if (Ldata.negate)
1810  {
1811  /* !L | R: treat as !(L & !R) */
1812  (void) TS_phrase_output(data, &Ldata, &Rdata,
1813  TSPO_L_ONLY,
1814  Loffset, Roffset,
1815  Ldata.npos);
1816  data->negate = true;
1817  return TS_YES;
1818  }
1819  else if (Rdata.negate)
1820  {
1821  /* L | !R: treat as !(!L & R) */
1822  (void) TS_phrase_output(data, &Ldata, &Rdata,
1823  TSPO_R_ONLY,
1824  Loffset, Roffset,
1825  Rdata.npos);
1826  data->negate = true;
1827  return TS_YES;
1828  }
1829  else
1830  {
1831  /* straight OR */
1832  return TS_phrase_output(data, &Ldata, &Rdata,
1834  Loffset, Roffset,
1835  Ldata.npos + Rdata.npos);
1836  }
1837 
1838  default:
1839  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1840  }
1841 
1842  /* not reachable, but keep compiler quiet */
1843  return TS_NO;
1844 }
1845 
1846 
1847 /*
1848  * Evaluate tsquery boolean expression.
1849  *
1850  * curitem: current tsquery item (initially, the first one)
1851  * arg: opaque value to pass through to callback function
1852  * flags: bitmask of flag bits shown in ts_utils.h
1853  * chkcond: callback function to check whether a primitive value is present
1854  */
1855 bool
1856 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1857  TSExecuteCallback chkcond)
1858 {
1859  /*
1860  * If we get TS_MAYBE from the recursion, return true. We could only see
1861  * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1862  * need to check again.
1863  */
1864  return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1865 }
1866 
1867 /*
1868  * Evaluate tsquery boolean expression.
1869  *
1870  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1871  */
1873 TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1874  TSExecuteCallback chkcond)
1875 {
1876  return TS_execute_recurse(curitem, arg, flags, chkcond);
1877 }
1878 
1879 /*
1880  * TS_execute recursion for operators above any phrase operator. Here we do
1881  * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1882  * operator, we pass it off to TS_phrase_execute which does worry.
1883  */
1884 static TSTernaryValue
1885 TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1886  TSExecuteCallback chkcond)
1887 {
1888  TSTernaryValue lmatch;
1889 
1890  /* since this function recurses, it could be driven to stack overflow */
1892 
1893  /* ... and let's check for query cancel while we're at it */
1895 
1896  if (curitem->type == QI_VAL)
1897  return chkcond(arg, (QueryOperand *) curitem,
1898  NULL /* don't need position info */ );
1899 
1900  switch (curitem->qoperator.oper)
1901  {
1902  case OP_NOT:
1903  if (flags & TS_EXEC_SKIP_NOT)
1904  return TS_YES;
1905  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1906  {
1907  case TS_NO:
1908  return TS_YES;
1909  case TS_YES:
1910  return TS_NO;
1911  case TS_MAYBE:
1912  return TS_MAYBE;
1913  }
1914  break;
1915 
1916  case OP_AND:
1917  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1918  flags, chkcond);
1919  if (lmatch == TS_NO)
1920  return TS_NO;
1921  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1922  {
1923  case TS_NO:
1924  return TS_NO;
1925  case TS_YES:
1926  return lmatch;
1927  case TS_MAYBE:
1928  return TS_MAYBE;
1929  }
1930  break;
1931 
1932  case OP_OR:
1933  lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1934  flags, chkcond);
1935  if (lmatch == TS_YES)
1936  return TS_YES;
1937  switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1938  {
1939  case TS_NO:
1940  return lmatch;
1941  case TS_YES:
1942  return TS_YES;
1943  case TS_MAYBE:
1944  return TS_MAYBE;
1945  }
1946  break;
1947 
1948  case OP_PHRASE:
1949 
1950  /*
1951  * If we get a MAYBE result, and the caller doesn't want that,
1952  * convert it to NO. It would be more consistent, perhaps, to
1953  * return the result of TS_phrase_execute() verbatim and then
1954  * convert MAYBE results at the top of the recursion. But
1955  * converting at the topmost phrase operator gives results that
1956  * are bug-compatible with the old implementation, so do it like
1957  * this for now.
1958  */
1959  switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1960  {
1961  case TS_NO:
1962  return TS_NO;
1963  case TS_YES:
1964  return TS_YES;
1965  case TS_MAYBE:
1966  return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1967  }
1968  break;
1969 
1970  default:
1971  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1972  }
1973 
1974  /* not reachable, but keep compiler quiet */
1975  return TS_NO;
1976 }
1977 
1978 /*
1979  * Evaluate tsquery and report locations of matching terms.
1980  *
1981  * This is like TS_execute except that it returns match locations not just
1982  * success/failure status. The callback function is required to provide
1983  * position data (we report failure if it doesn't).
1984  *
1985  * On successful match, the result is a List of ExecPhraseData structs, one
1986  * for each AND'ed term or phrase operator in the query. Each struct includes
1987  * a sorted array of lexeme positions matching that term. (Recall that for
1988  * phrase operators, the match includes width+1 lexemes, and the recorded
1989  * position is that of the rightmost lexeme.)
1990  *
1991  * OR subexpressions are handled by union'ing their match locations into a
1992  * single List element, which is valid since any of those locations contains
1993  * a match. However, when some of the OR'ed terms are phrase operators, we
1994  * report the maximum width of any of the OR'ed terms, making such cases
1995  * slightly imprecise in the conservative direction. (For example, if the
1996  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
1997  * reported as though it includes the lexeme to the left of C.)
1998  *
1999  * Locations of NOT subexpressions are not reported. (Obviously, there can
2000  * be no successful NOT matches at top level, or the match would have failed.
2001  * So this amounts to ignoring NOTs underneath ORs.)
2002  *
2003  * The result is NIL if no match, or if position data was not returned.
2004  *
2005  * Arguments are the same as for TS_execute, although flags is currently
2006  * vestigial since none of the defined bits are sensible here.
2007  */
2008 List *
2010  uint32 flags,
2011  TSExecuteCallback chkcond)
2012 {
2013  List *result;
2014 
2015  /* No flags supported, as yet */
2016  Assert(flags == TS_EXEC_EMPTY);
2017  if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
2018  return result;
2019  return NIL;
2020 }
2021 
2022 /*
2023  * TS_execute_locations recursion for operators above any phrase operator.
2024  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
2025  */
2026 static bool
2028  TSExecuteCallback chkcond,
2029  List **locations)
2030 {
2031  bool lmatch,
2032  rmatch;
2033  List *llocations,
2034  *rlocations;
2036 
2037  /* since this function recurses, it could be driven to stack overflow */
2039 
2040  /* ... and let's check for query cancel while we're at it */
2042 
2043  /* Default locations result is empty */
2044  *locations = NIL;
2045 
2046  if (curitem->type == QI_VAL)
2047  {
2049  if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2050  {
2051  *locations = list_make1(data);
2052  return true;
2053  }
2054  pfree(data);
2055  return false;
2056  }
2057 
2058  switch (curitem->qoperator.oper)
2059  {
2060  case OP_NOT:
2061  if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2062  &llocations))
2063  return true; /* we don't pass back any locations */
2064  return false;
2065 
2066  case OP_AND:
2067  if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2068  arg, chkcond,
2069  &llocations))
2070  return false;
2071  if (!TS_execute_locations_recurse(curitem + 1,
2072  arg, chkcond,
2073  &rlocations))
2074  return false;
2075  *locations = list_concat(llocations, rlocations);
2076  return true;
2077 
2078  case OP_OR:
2079  lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2080  arg, chkcond,
2081  &llocations);
2082  rmatch = TS_execute_locations_recurse(curitem + 1,
2083  arg, chkcond,
2084  &rlocations);
2085  if (lmatch || rmatch)
2086  {
2087  /*
2088  * We generate an AND'able location struct from each
2089  * combination of sub-matches, following the disjunctive law
2090  * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2091  *
2092  * However, if either input didn't produce locations (i.e., it
2093  * failed or was a NOT), we must just return the other list.
2094  */
2095  if (llocations == NIL)
2096  *locations = rlocations;
2097  else if (rlocations == NIL)
2098  *locations = llocations;
2099  else
2100  {
2101  ListCell *ll;
2102 
2103  foreach(ll, llocations)
2104  {
2105  ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
2106  ListCell *lr;
2107 
2108  foreach(lr, rlocations)
2109  {
2110  ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
2111 
2113  (void) TS_phrase_output(data, ldata, rdata,
2115  0, 0,
2116  ldata->npos + rdata->npos);
2117  /* Report the larger width, as explained above. */
2118  data->width = Max(ldata->width, rdata->width);
2119  *locations = lappend(*locations, data);
2120  }
2121  }
2122  }
2123 
2124  return true;
2125  }
2126  return false;
2127 
2128  case OP_PHRASE:
2129  /* We can hand this off to TS_phrase_execute */
2131  if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
2132  data) == TS_YES)
2133  {
2134  if (!data->negate)
2135  *locations = list_make1(data);
2136  return true;
2137  }
2138  pfree(data);
2139  return false;
2140 
2141  default:
2142  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2143  }
2144 
2145  /* not reachable, but keep compiler quiet */
2146  return false;
2147 }
2148 
2149 /*
2150  * Detect whether a tsquery boolean expression requires any positive matches
2151  * to values shown in the tsquery.
2152  *
2153  * This is needed to know whether a GIN index search requires full index scan.
2154  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
2155  * entries for x; but 'x | !y' could match rows containing neither x nor y.
2156  */
2157 bool
2159 {
2160  /* since this function recurses, it could be driven to stack overflow */
2162 
2163  if (curitem->type == QI_VAL)
2164  return true;
2165 
2166  switch (curitem->qoperator.oper)
2167  {
2168  case OP_NOT:
2169 
2170  /*
2171  * Assume there are no required matches underneath a NOT. For
2172  * some cases with nested NOTs, we could prove there's a required
2173  * match, but it seems unlikely to be worth the trouble.
2174  */
2175  return false;
2176 
2177  case OP_PHRASE:
2178 
2179  /*
2180  * Treat OP_PHRASE as OP_AND here
2181  */
2182  case OP_AND:
2183  /* If either side requires a match, we're good */
2184  if (tsquery_requires_match(curitem + curitem->qoperator.left))
2185  return true;
2186  else
2187  return tsquery_requires_match(curitem + 1);
2188 
2189  case OP_OR:
2190  /* Both sides must require a match */
2191  if (tsquery_requires_match(curitem + curitem->qoperator.left))
2192  return tsquery_requires_match(curitem + 1);
2193  else
2194  return false;
2195 
2196  default:
2197  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2198  }
2199 
2200  /* not reachable, but keep compiler quiet */
2201  return false;
2202 }
2203 
2204 /*
2205  * boolean operations
2206  */
2207 Datum
2209 {
2211  PG_GETARG_DATUM(1),
2212  PG_GETARG_DATUM(0)));
2213 }
2214 
2215 Datum
2217 {
2219  TSQuery query = PG_GETARG_TSQUERY(1);
2220  CHKVAL chkval;
2221  bool result;
2222 
2223  /* empty query matches nothing */
2224  if (!query->size)
2225  {
2226  PG_FREE_IF_COPY(val, 0);
2227  PG_FREE_IF_COPY(query, 1);
2228  PG_RETURN_BOOL(false);
2229  }
2230 
2231  chkval.arrb = ARRPTR(val);
2232  chkval.arre = chkval.arrb + val->size;
2233  chkval.values = STRPTR(val);
2234  chkval.operand = GETOPERAND(query);
2235  result = TS_execute(GETQUERY(query),
2236  &chkval,
2237  TS_EXEC_EMPTY,
2239 
2240  PG_FREE_IF_COPY(val, 0);
2241  PG_FREE_IF_COPY(query, 1);
2242  PG_RETURN_BOOL(result);
2243 }
2244 
2245 Datum
2247 {
2248  TSVector vector;
2249  TSQuery query;
2250  bool res;
2251 
2253  PG_GETARG_DATUM(0)));
2255  PG_GETARG_DATUM(1)));
2256 
2258  TSVectorGetDatum(vector),
2259  TSQueryGetDatum(query)));
2260 
2261  pfree(vector);
2262  pfree(query);
2263 
2265 }
2266 
2267 Datum
2269 {
2270  TSVector vector;
2271  TSQuery query = PG_GETARG_TSQUERY(1);
2272  bool res;
2273 
2275  PG_GETARG_DATUM(0)));
2276 
2278  TSVectorGetDatum(vector),
2279  TSQueryGetDatum(query)));
2280 
2281  pfree(vector);
2282  PG_FREE_IF_COPY(query, 1);
2283 
2285 }
2286 
2287 /*
2288  * ts_stat statistic function support
2289  */
2290 
2291 
2292 /*
2293  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2294  * that have a weight equal to one of the weights in 'weight' bitmask.
2295  */
2296 static int
2298 {
2299  int len = POSDATALEN(txt, wptr);
2300  int num = 0;
2301  WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2302 
2303  while (len--)
2304  {
2305  if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2306  num++;
2307  ptr++;
2308  }
2309  return num;
2310 }
2311 
2312 #define compareStatWord(a,e,t) \
2313  tsCompareString((a)->lexeme, (a)->lenlexeme, \
2314  STRPTR(t) + (e)->pos, (e)->len, \
2315  false)
2316 
2317 static void
2319 {
2320  WordEntry *we = ARRPTR(txt) + off;
2321  StatEntry *node = stat->root,
2322  *pnode = NULL;
2323  int n,
2324  res = 0;
2325  uint32 depth = 1;
2326 
2327  if (stat->weight == 0)
2328  n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2329  else
2330  n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2331 
2332  if (n == 0)
2333  return; /* nothing to insert */
2334 
2335  while (node)
2336  {
2337  res = compareStatWord(node, we, txt);
2338 
2339  if (res == 0)
2340  {
2341  break;
2342  }
2343  else
2344  {
2345  pnode = node;
2346  node = (res < 0) ? node->left : node->right;
2347  }
2348  depth++;
2349  }
2350 
2351  if (depth > stat->maxdepth)
2352  stat->maxdepth = depth;
2353 
2354  if (node == NULL)
2355  {
2356  node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2357  node->left = node->right = NULL;
2358  node->ndoc = 1;
2359  node->nentry = n;
2360  node->lenlexeme = we->len;
2361  memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2362 
2363  if (pnode == NULL)
2364  {
2365  stat->root = node;
2366  }
2367  else
2368  {
2369  if (res < 0)
2370  pnode->left = node;
2371  else
2372  pnode->right = node;
2373  }
2374  }
2375  else
2376  {
2377  node->ndoc++;
2378  node->nentry += n;
2379  }
2380 }
2381 
2382 static void
2384  uint32 low, uint32 high, uint32 offset)
2385 {
2386  uint32 pos;
2387  uint32 middle = (low + high) >> 1;
2388 
2389  pos = (low + middle) >> 1;
2390  if (low != middle && pos >= offset && pos - offset < txt->size)
2391  insertStatEntry(persistentContext, stat, txt, pos - offset);
2392  pos = (high + middle + 1) >> 1;
2393  if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2394  insertStatEntry(persistentContext, stat, txt, pos - offset);
2395 
2396  if (low != middle)
2397  chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2398  if (high != middle + 1)
2399  chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2400 }
2401 
2402 /*
2403  * This is written like a custom aggregate function, because the
2404  * original plan was to do just that. Unfortunately, an aggregate function
2405  * can't return a set, so that plan was abandoned. If that limitation is
2406  * lifted in the future, ts_stat could be a real aggregate function so that
2407  * you could use it like this:
2408  *
2409  * SELECT ts_stat(vector_column) FROM vector_table;
2410  *
2411  * where vector_column is a tsvector-type column in vector_table.
2412  */
2413 
2414 static TSVectorStat *
2416 {
2418  uint32 i,
2419  nbit = 0,
2420  offset;
2421 
2422  if (stat == NULL)
2423  { /* Init in first */
2424  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2425  stat->maxdepth = 1;
2426  }
2427 
2428  /* simple check of correctness */
2429  if (txt == NULL || txt->size == 0)
2430  {
2431  if (txt && txt != (TSVector) DatumGetPointer(data))
2432  pfree(txt);
2433  return stat;
2434  }
2435 
2436  i = txt->size - 1;
2437  for (; i > 0; i >>= 1)
2438  nbit++;
2439 
2440  nbit = 1 << nbit;
2441  offset = (nbit - txt->size) / 2;
2442 
2443  insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2444  chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2445 
2446  return stat;
2447 }
2448 
2449 static void
2451  TSVectorStat *stat)
2452 {
2453  TupleDesc tupdesc;
2454  MemoryContext oldcontext;
2455  StatEntry *node;
2456 
2457  funcctx->user_fctx = (void *) stat;
2458 
2459  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2460 
2461  stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2462  stat->stackpos = 0;
2463 
2464  node = stat->root;
2465  /* find leftmost value */
2466  if (node == NULL)
2467  stat->stack[stat->stackpos] = NULL;
2468  else
2469  for (;;)
2470  {
2471  stat->stack[stat->stackpos] = node;
2472  if (node->left)
2473  {
2474  stat->stackpos++;
2475  node = node->left;
2476  }
2477  else
2478  break;
2479  }
2480  Assert(stat->stackpos <= stat->maxdepth);
2481 
2482  if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2483  elog(ERROR, "return type must be a row type");
2484  funcctx->tuple_desc = tupdesc;
2485  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2486 
2487  MemoryContextSwitchTo(oldcontext);
2488 }
2489 
2490 static StatEntry *
2492 {
2493  StatEntry *node = stat->stack[stat->stackpos];
2494 
2495  if (node == NULL)
2496  return NULL;
2497 
2498  if (node->ndoc != 0)
2499  {
2500  /* return entry itself: we already was at left sublink */
2501  return node;
2502  }
2503  else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2504  {
2505  /* go on right sublink */
2506  stat->stackpos++;
2507  node = node->right;
2508 
2509  /* find most-left value */
2510  for (;;)
2511  {
2512  stat->stack[stat->stackpos] = node;
2513  if (node->left)
2514  {
2515  stat->stackpos++;
2516  node = node->left;
2517  }
2518  else
2519  break;
2520  }
2521  Assert(stat->stackpos <= stat->maxdepth);
2522  }
2523  else
2524  {
2525  /* we already return all left subtree, itself and right subtree */
2526  if (stat->stackpos == 0)
2527  return NULL;
2528 
2529  stat->stackpos--;
2530  return walkStatEntryTree(stat);
2531  }
2532 
2533  return node;
2534 }
2535 
2536 static Datum
2538 {
2539  TSVectorStat *st;
2540  StatEntry *entry;
2541 
2542  st = (TSVectorStat *) funcctx->user_fctx;
2543 
2544  entry = walkStatEntryTree(st);
2545 
2546  if (entry != NULL)
2547  {
2548  Datum result;
2549  char *values[3];
2550  char ndoc[16];
2551  char nentry[16];
2552  HeapTuple tuple;
2553 
2554  values[0] = palloc(entry->lenlexeme + 1);
2555  memcpy(values[0], entry->lexeme, entry->lenlexeme);
2556  (values[0])[entry->lenlexeme] = '\0';
2557  sprintf(ndoc, "%d", entry->ndoc);
2558  values[1] = ndoc;
2559  sprintf(nentry, "%d", entry->nentry);
2560  values[2] = nentry;
2561 
2562  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2563  result = HeapTupleGetDatum(tuple);
2564 
2565  pfree(values[0]);
2566 
2567  /* mark entry as already visited */
2568  entry->ndoc = 0;
2569 
2570  return result;
2571  }
2572 
2573  return (Datum) 0;
2574 }
2575 
2576 static TSVectorStat *
2577 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2578 {
2579  char *query = text_to_cstring(txt);
2580  TSVectorStat *stat;
2581  bool isnull;
2582  Portal portal;
2583  SPIPlanPtr plan;
2584 
2585  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2586  /* internal error */
2587  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2588 
2589  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2590  /* internal error */
2591  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2592 
2593  SPI_cursor_fetch(portal, true, 100);
2594 
2595  if (SPI_tuptable == NULL ||
2596  SPI_tuptable->tupdesc->natts != 1 ||
2598  TSVECTOROID))
2599  ereport(ERROR,
2600  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2601  errmsg("ts_stat query must return one tsvector column")));
2602 
2603  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2604  stat->maxdepth = 1;
2605 
2606  if (ws)
2607  {
2608  char *buf;
2609 
2610  buf = VARDATA_ANY(ws);
2611  while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2612  {
2613  if (pg_mblen(buf) == 1)
2614  {
2615  switch (*buf)
2616  {
2617  case 'A':
2618  case 'a':
2619  stat->weight |= 1 << 3;
2620  break;
2621  case 'B':
2622  case 'b':
2623  stat->weight |= 1 << 2;
2624  break;
2625  case 'C':
2626  case 'c':
2627  stat->weight |= 1 << 1;
2628  break;
2629  case 'D':
2630  case 'd':
2631  stat->weight |= 1;
2632  break;
2633  default:
2634  stat->weight |= 0;
2635  }
2636  }
2637  buf += pg_mblen(buf);
2638  }
2639  }
2640 
2641  while (SPI_processed > 0)
2642  {
2643  uint64 i;
2644 
2645  for (i = 0; i < SPI_processed; i++)
2646  {
2648 
2649  if (!isnull)
2650  stat = ts_accum(persistentContext, stat, data);
2651  }
2652 
2654  SPI_cursor_fetch(portal, true, 100);
2655  }
2656 
2658  SPI_cursor_close(portal);
2659  SPI_freeplan(plan);
2660  pfree(query);
2661 
2662  return stat;
2663 }
2664 
2665 Datum
2667 {
2668  FuncCallContext *funcctx;
2669  Datum result;
2670 
2671  if (SRF_IS_FIRSTCALL())
2672  {
2673  TSVectorStat *stat;
2674  text *txt = PG_GETARG_TEXT_PP(0);
2675 
2676  funcctx = SRF_FIRSTCALL_INIT();
2677  SPI_connect();
2678  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2679  PG_FREE_IF_COPY(txt, 0);
2680  ts_setup_firstcall(fcinfo, funcctx, stat);
2681  SPI_finish();
2682  }
2683 
2684  funcctx = SRF_PERCALL_SETUP();
2685  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2686  SRF_RETURN_NEXT(funcctx, result);
2687  SRF_RETURN_DONE(funcctx);
2688 }
2689 
2690 Datum
2692 {
2693  FuncCallContext *funcctx;
2694  Datum result;
2695 
2696  if (SRF_IS_FIRSTCALL())
2697  {
2698  TSVectorStat *stat;
2699  text *txt = PG_GETARG_TEXT_PP(0);
2700  text *ws = PG_GETARG_TEXT_PP(1);
2701 
2702  funcctx = SRF_FIRSTCALL_INIT();
2703  SPI_connect();
2704  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2705  PG_FREE_IF_COPY(txt, 0);
2706  PG_FREE_IF_COPY(ws, 1);
2707  ts_setup_firstcall(fcinfo, funcctx, stat);
2708  SPI_finish();
2709  }
2710 
2711  funcctx = SRF_PERCALL_SETUP();
2712  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2713  SRF_RETURN_NEXT(funcctx, result);
2714  SRF_RETURN_DONE(funcctx);
2715 }
2716 
2717 
2718 /*
2719  * Triggers for automatic update of a tsvector column from text column(s)
2720  *
2721  * Trigger arguments are either
2722  * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2723  * name of tsvector col, name of regconfig col, name(s) of text col(s)
2724  * ie, tsconfig can either be specified by name, or indirectly as the
2725  * contents of a regconfig field in the row. If the name is used, it must
2726  * be explicitly schema-qualified.
2727  */
2728 Datum
2730 {
2731  return tsvector_update_trigger(fcinfo, false);
2732 }
2733 
2734 Datum
2736 {
2737  return tsvector_update_trigger(fcinfo, true);
2738 }
2739 
2740 static Datum
2742 {
2743  TriggerData *trigdata;
2744  Trigger *trigger;
2745  Relation rel;
2746  HeapTuple rettuple = NULL;
2747  int tsvector_attr_num,
2748  i;
2749  ParsedText prs;
2750  Datum datum;
2751  bool isnull;
2752  text *txt;
2753  Oid cfgId;
2754  bool update_needed;
2755 
2756  /* Check call context */
2757  if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2758  elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2759 
2760  trigdata = (TriggerData *) fcinfo->context;
2761  if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2762  elog(ERROR, "tsvector_update_trigger: must be fired for row");
2763  if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2764  elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2765 
2766  if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2767  {
2768  rettuple = trigdata->tg_trigtuple;
2769  update_needed = true;
2770  }
2771  else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2772  {
2773  rettuple = trigdata->tg_newtuple;
2774  update_needed = false; /* computed below */
2775  }
2776  else
2777  elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2778 
2779  trigger = trigdata->tg_trigger;
2780  rel = trigdata->tg_relation;
2781 
2782  if (trigger->tgnargs < 3)
2783  elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2784 
2785  /* Find the target tsvector column */
2786  tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2787  if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2788  ereport(ERROR,
2789  (errcode(ERRCODE_UNDEFINED_COLUMN),
2790  errmsg("tsvector column \"%s\" does not exist",
2791  trigger->tgargs[0])));
2792  /* This will effectively reject system columns, so no separate test: */
2793  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2794  TSVECTOROID))
2795  ereport(ERROR,
2796  (errcode(ERRCODE_DATATYPE_MISMATCH),
2797  errmsg("column \"%s\" is not of tsvector type",
2798  trigger->tgargs[0])));
2799 
2800  /* Find the configuration to use */
2801  if (config_column)
2802  {
2803  int config_attr_num;
2804 
2805  config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2806  if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2807  ereport(ERROR,
2808  (errcode(ERRCODE_UNDEFINED_COLUMN),
2809  errmsg("configuration column \"%s\" does not exist",
2810  trigger->tgargs[1])));
2811  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2812  REGCONFIGOID))
2813  ereport(ERROR,
2814  (errcode(ERRCODE_DATATYPE_MISMATCH),
2815  errmsg("column \"%s\" is not of regconfig type",
2816  trigger->tgargs[1])));
2817 
2818  datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2819  if (isnull)
2820  ereport(ERROR,
2821  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2822  errmsg("configuration column \"%s\" must not be null",
2823  trigger->tgargs[1])));
2824  cfgId = DatumGetObjectId(datum);
2825  }
2826  else
2827  {
2828  List *names;
2829 
2830  names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2831  /* require a schema so that results are not search path dependent */
2832  if (list_length(names) < 2)
2833  ereport(ERROR,
2834  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2835  errmsg("text search configuration name \"%s\" must be schema-qualified",
2836  trigger->tgargs[1])));
2837  cfgId = get_ts_config_oid(names, false);
2838  }
2839 
2840  /* initialize parse state */
2841  prs.lenwords = 32;
2842  prs.curwords = 0;
2843  prs.pos = 0;
2844  prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2845 
2846  /* find all words in indexable column(s) */
2847  for (i = 2; i < trigger->tgnargs; i++)
2848  {
2849  int numattr;
2850 
2851  numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2853  ereport(ERROR,
2854  (errcode(ERRCODE_UNDEFINED_COLUMN),
2855  errmsg("column \"%s\" does not exist",
2856  trigger->tgargs[i])));
2857  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2858  ereport(ERROR,
2859  (errcode(ERRCODE_DATATYPE_MISMATCH),
2860  errmsg("column \"%s\" is not of a character type",
2861  trigger->tgargs[i])));
2862 
2864  update_needed = true;
2865 
2866  datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2867  if (isnull)
2868  continue;
2869 
2870  txt = DatumGetTextPP(datum);
2871 
2872  parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2873 
2874  if (txt != (text *) DatumGetPointer(datum))
2875  pfree(txt);
2876  }
2877 
2878  if (update_needed)
2879  {
2880  /* make tsvector value */
2881  datum = TSVectorGetDatum(make_tsvector(&prs));
2882  isnull = false;
2883 
2884  /* and insert it into tuple */
2885  rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2886  1, &tsvector_attr_num,
2887  &datum, &isnull);
2888 
2889  pfree(DatumGetPointer(datum));
2890  }
2891 
2892  return PointerGetDatum(rettuple);
2893 }
#define GETQUERY(x)
Definition: _int.h:157
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:256
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
Definition: arrayfuncs.c:3340
void deconstruct_array_builtin(ArrayType *array, Oid elmtype, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3644
int16 AttrNumber
Definition: attnum.h:21
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:460
static Datum values[MAXATTR]
Definition: bootstrap.c:156
int numattr
Definition: bootstrap.c:66
unsigned short uint16
Definition: c.h:494
unsigned int uint32
Definition: c.h:495
signed char int8
Definition: c.h:481
#define Min(x, y)
Definition: c.h:993
signed int int32
Definition: c.h:483
#define Max(x, y)
Definition: c.h:987
#define VARHDRSZ
Definition: c.h:681
#define FLEXIBLE_ARRAY_MEMBER
Definition: c.h:387
#define SHORTALIGN(LEN)
Definition: c.h:796
#define ARRPTR(x)
Definition: cube.c:25
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:858
int errmsg(const char *fmt,...)
Definition: elog.c:1069
#define ERROR
Definition: elog.h:39
#define ereport(elevel,...)
Definition: elog.h:149
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:2136
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:2087
#define palloc0_object(type)
Definition: fe_memutils.h:63
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:644
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:273
#define DatumGetTextPP(X)
Definition: fmgr.h:292
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:642
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:268
#define PG_RETURN_INT32(x)
Definition: fmgr.h:354
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:359
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
Datum difference(PG_FUNCTION_ARGS)
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:1108
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, int *replCols, Datum *replValues, bool *replIsnull)
Definition: heaptuple.c:1269
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
#define STRPTR(x)
Definition: hstore.h:76
#define nitems(x)
Definition: indent.h:31
long val
Definition: informix.c:664
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int j
Definition: isn.c:74
int i
Definition: isn.c:73
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:338
List * list_concat(List *list1, const List *list2)
Definition: list.c:560
#define GETOPERAND(x)
Definition: ltree.h:165
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1024
void pfree(void *pointer)
Definition: mcxt.c:1456
void * palloc0(Size size)
Definition: mcxt.c:1257
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:1064
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1476
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:1021
void * palloc(Size size)
Definition: mcxt.c:1226
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:121
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition: namespace.c:2704
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:138
bool IsBinaryCoercible(Oid srctype, Oid targettype)
void * arg
const void size_t len
const void * data
#define lfirst(lc)
Definition: pg_list.h:172
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define list_make1(x1)
Definition: pg_list.h:212
#define plan(x)
Definition: pg_regress.c:154
static char * buf
Definition: pg_test_fsync.c:67
#define sprintf
Definition: port.h:240
#define qsort(a, b, c, d)
Definition: port.h:445
void check_stack_depth(void)
Definition: postgres.c:3523
static bool DatumGetBool(Datum X)
Definition: postgres.h:90
static Datum PointerGetDatum(const void *X)
Definition: postgres.h:322
uintptr_t Datum
Definition: postgres.h:64
static Oid DatumGetObjectId(Datum X)
Definition: postgres.h:242
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:172
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
static char DatumGetChar(Datum X)
Definition: postgres.h:112
unsigned int Oid
Definition: postgres_ext.h:31
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition: qunique.h:21
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition: regproc.c:1777
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition: spi.c:1173
uint64 SPI_processed
Definition: spi.c:45
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition: spi.c:1306
int SPI_freeplan(SPIPlanPtr plan)
Definition: spi.c:1023
SPITupleTable * SPI_tuptable
Definition: spi.c:46
int SPI_connect(void)
Definition: spi.c:95
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition: spi.c:1804
int SPI_finish(void)
Definition: spi.c:183
void SPI_freetuptable(SPITupleTable *tuptable)
Definition: spi.c:1384
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, Datum *Values, const char *Nulls, bool read_only)
Definition: spi.c:1443
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition: spi.c:858
void SPI_cursor_close(Portal portal)
Definition: spi.c:1860
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition: spi.c:1250
#define SPI_ERROR_NOATTRIBUTE
Definition: spi.h:76
int32 * arrb
Definition: _int_bool.c:226
WordEntry * arre
Definition: tsvector_op.c:39
char * values
Definition: tsvector_op.c:40
char * operand
Definition: ltxtquery_op.c:52
int32 * arre
Definition: _int_bool.c:227
WordEntry * arrb
Definition: tsvector_op.c:38
WordEntryPos * pos
Definition: ts_utils.h:166
void * user_fctx
Definition: funcapi.h:82
uint64 call_cntr
Definition: funcapi.h:65
AttInMetadata * attinmeta
Definition: funcapi.h:91
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
TupleDesc tuple_desc
Definition: funcapi.h:112
Definition: pg_list.h:54
int32 pos
Definition: ts_utils.h:107
int32 lenwords
Definition: ts_utils.h:105
int32 curwords
Definition: ts_utils.h:106
ParsedWord * words
Definition: ts_utils.h:104
int16 distance
Definition: ts_type.h:196
uint32 left
Definition: ts_type.h:197
TupleDesc rd_att
Definition: rel.h:112
TupleDesc tupdesc
Definition: spi.h:25
HeapTuple * vals
Definition: spi.h:26
uint32 nentry
Definition: tsvector_op.c:49
struct StatEntry * left
Definition: tsvector_op.c:50
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition: tsvector_op.c:53
uint32 lenlexeme
Definition: tsvector_op.c:52
uint32 ndoc
Definition: tsvector_op.c:47
struct StatEntry * right
Definition: tsvector_op.c:51
int32 size
Definition: ts_type.h:221
int32 size
Definition: ts_type.h:93
int32 weight
Definition: tsvector_op.c:60
StatEntry * root
Definition: tsvector_op.c:67
uint32 maxdepth
Definition: tsvector_op.c:62
uint32 stackpos
Definition: tsvector_op.c:65
StatEntry ** stack
Definition: tsvector_op.c:64
Relation tg_relation
Definition: trigger.h:35
const Bitmapset * tg_updatedcols
Definition: trigger.h:43
TriggerEvent tg_event
Definition: trigger.h:34
HeapTuple tg_newtuple
Definition: trigger.h:37
Trigger * tg_trigger
Definition: trigger.h:38
HeapTuple tg_trigtuple
Definition: trigger.h:36
int16 tgnargs
Definition: reltrigger.h:38
char ** tgargs
Definition: reltrigger.h:41
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
uint32 pos
Definition: ts_type.h:46
uint32 haspos
Definition: ts_type.h:44
uint32 len
Definition: ts_type.h:45
Definition: c.h:676
#define FirstLowInvalidHeapAttributeNumber
Definition: sysattr.h:27
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition: to_tsany.c:271
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:166
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition: to_tsany.c:643
#define TRIGGER_FIRED_BEFORE(event)
Definition: trigger.h:128
#define CALLED_AS_TRIGGER(fcinfo)
Definition: trigger.h:26
#define TRIGGER_FIRED_FOR_ROW(event)
Definition: trigger.h:122
#define TRIGGER_FIRED_BY_INSERT(event)
Definition: trigger.h:110
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition: trigger.h:116
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition: ts_parse.c:355
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:135
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define _POSVECPTR(x, e)
Definition: ts_type.h:109
static TSQuery DatumGetTSQuery(Datum X)
Definition: ts_type.h:249
static TSVector DatumGetTSVector(Datum X)
Definition: ts_type.h:118
#define MAXENTRYPOS
Definition: ts_type.h:85
static Datum TSVectorGetDatum(const TSVectorData *X)
Definition: ts_type.h:130
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define POSDATALEN(x, e)
Definition: ts_type.h:110
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
uint16 WordEntryPos
Definition: ts_type.h:63
#define MAXNUMPOS
Definition: ts_type.h:86
TSVectorData * TSVector
Definition: ts_type.h:98
#define PG_GETARG_TSVECTOR_COPY(n)
Definition: ts_type.h:136
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define QI_VAL
Definition: ts_type.h:149
static Datum TSQueryGetDatum(const TSQueryData *X)
Definition: ts_type.h:261
#define LIMITPOS(x)
Definition: ts_type.h:87
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define OP_OR
Definition: ts_type.h:181
#define POSDATAPTR(x, e)
Definition: ts_type.h:111
#define OP_NOT
Definition: ts_type.h:179
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define MAXSTRPOS
Definition: ts_type.h:50
#define TS_EXEC_PHRASE_NO_POS
Definition: ts_utils.h:202
TSTernaryValue
Definition: ts_utils.h:133
@ TS_MAYBE
Definition: ts_utils.h:136
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
#define TS_EXEC_EMPTY
Definition: ts_utils.h:188
#define TS_EXEC_SKIP_NOT
Definition: ts_utils.h:195
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:182
static const float weights[]
Definition: tsrank.c:24
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:35
Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:273
#define TSPO_BOTH
Definition: tsvector_op.c:1467
static Datum ts_process_call(FuncCallContext *funcctx)
Definition: tsvector_op.c:2537
static TSTernaryValue checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1297
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1856
Datum ts_match_vq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2216
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2729
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
Definition: tsvector_op.c:364
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:580
#define TSPO_R_ONLY
Definition: tsvector_op.c:1466
Datum array_to_tsvector(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:749
#define STATENTRYHDRSZ
Definition: tsvector_op.c:56
Datum tsvector_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:821
static TSTernaryValue TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
Definition: tsvector_op.c:1470
#define compareEntry(pa, a, pb, b)
Definition: tsvector_op.c:354
Datum tsvector_setweight(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:211
#define TSVECTORCMPFUNC(type, action, ret)
Definition: tsvector_op.c:145
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Definition: tsvector_op.c:2297
Datum tsvector_strip(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:168
struct StatEntry StatEntry
Datum tsvector_length(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:201
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:722
Datum ts_match_tq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2268
Datum ts_stat1(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2666
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1154
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:556
#define TSPO_L_ONLY
Definition: tsvector_op.c:1465
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Definition: tsvector_op.c:2741
Datum ts_match_qv(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2208
static int silly_cmp_tsvector(const TSVector a, const TSVector b)
Definition: tsvector_op.c:86
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
Definition: tsvector_op.c:400
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:2158
Datum tsvector_concat(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:927
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2735
static bool TS_execute_locations_recurse(QueryItem *curitem, void *arg, TSExecuteCallback chkcond, List **locations)
Definition: tsvector_op.c:2027
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1885
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1873
static int compare_int(const void *va, const void *vb)
Definition: tsvector_op.c:433
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Definition: tsvector_op.c:2491
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
Definition: tsvector_op.c:2450
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Definition: tsvector_op.c:2383
Datum ts_match_tt(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2246
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
Definition: tsvector_op.c:2415
static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
Definition: tsvector_op.c:1611
static int compare_text_lexemes(const void *va, const void *vb)
Definition: tsvector_op.c:444
static TSTernaryValue checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1191
#define compareStatWord(a, e, t)
Definition: tsvector_op.c:2312
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:2009
Datum tsvector_unnest(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:634
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
Definition: tsvector_op.c:2577
Datum ts_stat2(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2691
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
Definition: tsvector_op.c:2318
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
Definition: tsvector_op.c:466
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:45
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:583
QueryOperator qoperator
Definition: ts_type.h:209
QueryItemType type
Definition: ts_type.h:208
#define VARDATA(PTR)
Definition: varatt.h:278
#define VARDATA_ANY(PTR)
Definition: varatt.h:324
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
#define VARSIZE(PTR)
Definition: varatt.h:279
#define VARSIZE_ANY_EXHDR(PTR)
Definition: varatt.h:317
char * text_to_cstring(const text *t)
Definition: varlena.c:215
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:194
#define stat
Definition: win32_port.h:284