PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
tsvector_op.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsvector_op.c
4  * operations over tsvector
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsvector_op.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include <limits.h>
17 
18 #include "access/htup_details.h"
19 #include "catalog/namespace.h"
20 #include "catalog/pg_type.h"
21 #include "commands/trigger.h"
22 #include "executor/spi.h"
23 #include "funcapi.h"
24 #include "mb/pg_wchar.h"
25 #include "miscadmin.h"
26 #include "parser/parse_coerce.h"
27 #include "tsearch/ts_utils.h"
28 #include "utils/builtins.h"
29 #include "utils/lsyscache.h"
30 #include "utils/regproc.h"
31 #include "utils/rel.h"
32 
33 
34 typedef struct
35 {
38  char *values;
39  char *operand;
40 } CHKVAL;
41 
42 
43 typedef struct StatEntry
44 {
45  uint32 ndoc; /* zero indicates that we were already here
46  * while walking through the tree */
48  struct StatEntry *left;
49  struct StatEntry *right;
51  char lexeme[FLEXIBLE_ARRAY_MEMBER];
52 } StatEntry;
53 
54 #define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
55 
56 typedef struct
57 {
59 
61 
64 
66 } TSVectorStat;
67 
68 #define STATHDRSIZE (offsetof(TSVectorStat, data))
69 
70 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
71 static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
72 
73 /*
74  * Order: haspos, len, word, for all positions (pos, weight)
75  */
76 static int
78 {
79  if (VARSIZE(a) < VARSIZE(b))
80  return -1;
81  else if (VARSIZE(a) > VARSIZE(b))
82  return 1;
83  else if (a->size < b->size)
84  return -1;
85  else if (a->size > b->size)
86  return 1;
87  else
88  {
89  WordEntry *aptr = ARRPTR(a);
90  WordEntry *bptr = ARRPTR(b);
91  int i = 0;
92  int res;
93 
94 
95  for (i = 0; i < a->size; i++)
96  {
97  if (aptr->haspos != bptr->haspos)
98  {
99  return (aptr->haspos > bptr->haspos) ? -1 : 1;
100  }
101  else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
102  {
103  return res;
104  }
105  else if (aptr->haspos)
106  {
107  WordEntryPos *ap = POSDATAPTR(a, aptr);
108  WordEntryPos *bp = POSDATAPTR(b, bptr);
109  int j;
110 
111  if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
112  return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
113 
114  for (j = 0; j < POSDATALEN(a, aptr); j++)
115  {
116  if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
117  {
118  return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
119  }
120  else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
121  {
122  return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
123  }
124  ap++, bp++;
125  }
126  }
127 
128  aptr++;
129  bptr++;
130  }
131  }
132 
133  return 0;
134 }
135 
136 #define TSVECTORCMPFUNC( type, action, ret ) \
137 Datum \
138 tsvector_##type(PG_FUNCTION_ARGS) \
139 { \
140  TSVector a = PG_GETARG_TSVECTOR(0); \
141  TSVector b = PG_GETARG_TSVECTOR(1); \
142  int res = silly_cmp_tsvector(a, b); \
143  PG_FREE_IF_COPY(a,0); \
144  PG_FREE_IF_COPY(b,1); \
145  PG_RETURN_##ret( res action 0 ); \
146 } \
147 /* keep compiler quiet - no extra ; */ \
148 extern int no_such_variable
149 
150 TSVECTORCMPFUNC(lt, <, BOOL);
151 TSVECTORCMPFUNC(le, <=, BOOL);
152 TSVECTORCMPFUNC(eq, ==, BOOL);
153 TSVECTORCMPFUNC(ge, >=, BOOL);
154 TSVECTORCMPFUNC(gt, >, BOOL);
155 TSVECTORCMPFUNC(ne, !=, BOOL);
156 TSVECTORCMPFUNC(cmp, +, INT32);
157 
158 Datum
160 {
162  TSVector out;
163  int i,
164  len = 0;
165  WordEntry *arrin = ARRPTR(in),
166  *arrout;
167  char *cur;
168 
169  for (i = 0; i < in->size; i++)
170  len += arrin[i].len;
171 
172  len = CALCDATASIZE(in->size, len);
173  out = (TSVector) palloc0(len);
174  SET_VARSIZE(out, len);
175  out->size = in->size;
176  arrout = ARRPTR(out);
177  cur = STRPTR(out);
178  for (i = 0; i < in->size; i++)
179  {
180  memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
181  arrout[i].haspos = 0;
182  arrout[i].len = arrin[i].len;
183  arrout[i].pos = cur - STRPTR(out);
184  cur += arrout[i].len;
185  }
186 
187  PG_FREE_IF_COPY(in, 0);
188  PG_RETURN_POINTER(out);
189 }
190 
191 Datum
193 {
195  int32 ret = in->size;
196 
197  PG_FREE_IF_COPY(in, 0);
198  PG_RETURN_INT32(ret);
199 }
200 
201 Datum
203 {
205  char cw = PG_GETARG_CHAR(1);
206  TSVector out;
207  int i,
208  j;
209  WordEntry *entry;
210  WordEntryPos *p;
211  int w = 0;
212 
213  switch (cw)
214  {
215  case 'A':
216  case 'a':
217  w = 3;
218  break;
219  case 'B':
220  case 'b':
221  w = 2;
222  break;
223  case 'C':
224  case 'c':
225  w = 1;
226  break;
227  case 'D':
228  case 'd':
229  w = 0;
230  break;
231  default:
232  /* internal error */
233  elog(ERROR, "unrecognized weight: %d", cw);
234  }
235 
236  out = (TSVector) palloc(VARSIZE(in));
237  memcpy(out, in, VARSIZE(in));
238  entry = ARRPTR(out);
239  i = out->size;
240  while (i--)
241  {
242  if ((j = POSDATALEN(out, entry)) != 0)
243  {
244  p = POSDATAPTR(out, entry);
245  while (j--)
246  {
247  WEP_SETWEIGHT(*p, w);
248  p++;
249  }
250  }
251  entry++;
252  }
253 
254  PG_FREE_IF_COPY(in, 0);
255  PG_RETURN_POINTER(out);
256 }
257 
258 /*
259  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
260  *
261  * Assign weight w to elements of tsin that are listed in lexemes.
262  */
263 Datum
265 {
266  TSVector tsin = PG_GETARG_TSVECTOR(0);
267  char char_weight = PG_GETARG_CHAR(1);
268  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
269 
270  TSVector tsout;
271  int i,
272  j,
273  nlexemes,
274  weight;
275  WordEntry *entry;
276  Datum *dlexemes;
277  bool *nulls;
278 
279  switch (char_weight)
280  {
281  case 'A':
282  case 'a':
283  weight = 3;
284  break;
285  case 'B':
286  case 'b':
287  weight = 2;
288  break;
289  case 'C':
290  case 'c':
291  weight = 1;
292  break;
293  case 'D':
294  case 'd':
295  weight = 0;
296  break;
297  default:
298  /* internal error */
299  elog(ERROR, "unrecognized weight: %c", char_weight);
300  }
301 
302  tsout = (TSVector) palloc(VARSIZE(tsin));
303  memcpy(tsout, tsin, VARSIZE(tsin));
304  entry = ARRPTR(tsout);
305 
306  deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
307  &dlexemes, &nulls, &nlexemes);
308 
309  /*
310  * Assuming that lexemes array is significantly shorter than tsvector we
311  * can iterate through lexemes performing binary search of each lexeme
312  * from lexemes in tsvector.
313  */
314  for (i = 0; i < nlexemes; i++)
315  {
316  char *lex;
317  int lex_len,
318  lex_pos;
319 
320  if (nulls[i])
321  ereport(ERROR,
322  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
323  errmsg("lexeme array may not contain nulls")));
324 
325  lex = VARDATA(dlexemes[i]);
326  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
327  lex_pos = tsvector_bsearch(tsout, lex, lex_len);
328 
329  if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
330  {
331  WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
332 
333  while (j--)
334  {
335  WEP_SETWEIGHT(*p, weight);
336  p++;
337  }
338  }
339  }
340 
341  PG_FREE_IF_COPY(tsin, 0);
342  PG_FREE_IF_COPY(lexemes, 2);
343 
344  PG_RETURN_POINTER(tsout);
345 }
346 
347 #define compareEntry(pa, a, pb, b) \
348  tsCompareString((pa) + (a)->pos, (a)->len, \
349  (pb) + (b)->pos, (b)->len, \
350  false)
351 
352 /*
353  * Add positions from src to dest after offsetting them by maxpos.
354  * Return the number added (might be less than expected due to overflow)
355  */
356 static int32
358  TSVector dest, WordEntry *destptr,
359  int32 maxpos)
360 {
361  uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
362  int i;
363  uint16 slen = POSDATALEN(src, srcptr),
364  startlen;
365  WordEntryPos *spos = POSDATAPTR(src, srcptr),
366  *dpos = POSDATAPTR(dest, destptr);
367 
368  if (!destptr->haspos)
369  *clen = 0;
370 
371  startlen = *clen;
372  for (i = 0;
373  i < slen && *clen < MAXNUMPOS &&
374  (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
375  i++)
376  {
377  WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
378  WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
379  (*clen)++;
380  }
381 
382  if (*clen != startlen)
383  destptr->haspos = 1;
384  return *clen - startlen;
385 }
386 
387 /*
388  * Perform binary search of given lexeme in TSVector.
389  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
390  * found.
391  */
392 static int
393 tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
394 {
395  WordEntry *arrin = ARRPTR(tsv);
396  int StopLow = 0,
397  StopHigh = tsv->size,
398  StopMiddle,
399  cmp;
400 
401  while (StopLow < StopHigh)
402  {
403  StopMiddle = (StopLow + StopHigh) / 2;
404 
405  cmp = tsCompareString(lexeme, lexeme_len,
406  STRPTR(tsv) + arrin[StopMiddle].pos,
407  arrin[StopMiddle].len,
408  false);
409 
410  if (cmp < 0)
411  StopHigh = StopMiddle;
412  else if (cmp > 0)
413  StopLow = StopMiddle + 1;
414  else /* found it */
415  return StopMiddle;
416  }
417 
418  return -1;
419 }
420 
421 /*
422  * qsort comparator functions
423  */
424 
425 static int
426 compare_int(const void *va, const void *vb)
427 {
428  int a = *((const int *) va);
429  int b = *((const int *) vb);
430 
431  if (a == b)
432  return 0;
433  return (a > b) ? 1 : -1;
434 }
435 
436 static int
437 compare_text_lexemes(const void *va, const void *vb)
438 {
439  Datum a = *((const Datum *) va);
440  Datum b = *((const Datum *) vb);
441  char *alex = VARDATA_ANY(a);
442  int alex_len = VARSIZE_ANY_EXHDR(a);
443  char *blex = VARDATA_ANY(b);
444  int blex_len = VARSIZE_ANY_EXHDR(b);
445 
446  return tsCompareString(alex, alex_len, blex, blex_len, false);
447 }
448 
449 /*
450  * Internal routine to delete lexemes from TSVector by array of offsets.
451  *
452  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
453  * int indices_count -- size of that array
454  *
455  * Returns new TSVector without given lexemes along with their positions
456  * and weights.
457  */
458 static TSVector
459 tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
460  int indices_count)
461 {
462  TSVector tsout;
463  WordEntry *arrin = ARRPTR(tsv),
464  *arrout;
465  char *data = STRPTR(tsv),
466  *dataout;
467  int i, /* index in arrin */
468  j, /* index in arrout */
469  k, /* index in indices_to_delete */
470  curoff; /* index in dataout area */
471 
472  /*
473  * Sort the filter array to simplify membership checks below. Also, get
474  * rid of any duplicate entries, so that we can assume that indices_count
475  * is exactly equal to the number of lexemes that will be removed.
476  */
477  if (indices_count > 1)
478  {
479  int kp;
480 
481  qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
482  kp = 0;
483  for (k = 1; k < indices_count; k++)
484  {
485  if (indices_to_delete[k] != indices_to_delete[kp])
486  indices_to_delete[++kp] = indices_to_delete[k];
487  }
488  indices_count = ++kp;
489  }
490 
491  /*
492  * Here we overestimate tsout size, since we don't know how much space is
493  * used by the deleted lexeme(s). We will set exact size below.
494  */
495  tsout = (TSVector) palloc0(VARSIZE(tsv));
496 
497  /* This count must be correct because STRPTR(tsout) relies on it. */
498  tsout->size = tsv->size - indices_count;
499 
500  /*
501  * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
502  */
503  arrout = ARRPTR(tsout);
504  dataout = STRPTR(tsout);
505  curoff = 0;
506  for (i = j = k = 0; i < tsv->size; i++)
507  {
508  /*
509  * If current i is present in indices_to_delete, skip this lexeme.
510  * Since indices_to_delete is already sorted, we only need to check
511  * the current (k'th) entry.
512  */
513  if (k < indices_count && i == indices_to_delete[k])
514  {
515  k++;
516  continue;
517  }
518 
519  /* Copy lexeme and its positions and weights */
520  memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
521  arrout[j].haspos = arrin[i].haspos;
522  arrout[j].len = arrin[i].len;
523  arrout[j].pos = curoff;
524  curoff += arrin[i].len;
525  if (arrin[i].haspos)
526  {
527  int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
528  + sizeof(uint16);
529 
530  curoff = SHORTALIGN(curoff);
531  memcpy(dataout + curoff,
532  STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
533  len);
534  curoff += len;
535  }
536 
537  j++;
538  }
539 
540  /*
541  * k should now be exactly equal to indices_count. If it isn't then the
542  * caller provided us with indices outside of [0, tsv->size) range and
543  * estimation of tsout's size is wrong.
544  */
545  Assert(k == indices_count);
546 
547  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
548  return tsout;
549 }
550 
551 /*
552  * Delete given lexeme from tsvector.
553  * Implementation of user-level ts_delete(tsvector, text).
554  */
555 Datum
557 {
558  TSVector tsin = PG_GETARG_TSVECTOR(0),
559  tsout;
560  text *tlexeme = PG_GETARG_TEXT_PP(1);
561  char *lexeme = VARDATA_ANY(tlexeme);
562  int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
563  skip_index;
564 
565  if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
566  PG_RETURN_POINTER(tsin);
567 
568  tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
569 
570  PG_FREE_IF_COPY(tsin, 0);
571  PG_FREE_IF_COPY(tlexeme, 1);
572  PG_RETURN_POINTER(tsout);
573 }
574 
575 /*
576  * Delete given array of lexemes from tsvector.
577  * Implementation of user-level ts_delete(tsvector, text[]).
578  */
579 Datum
581 {
582  TSVector tsin = PG_GETARG_TSVECTOR(0),
583  tsout;
584  ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
585  int i,
586  nlex,
587  skip_count,
588  *skip_indices;
589  Datum *dlexemes;
590  bool *nulls;
591 
592  deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
593  &dlexemes, &nulls, &nlex);
594 
595  /*
596  * In typical use case array of lexemes to delete is relatively small. So
597  * here we optimize things for that scenario: iterate through lexarr
598  * performing binary search of each lexeme from lexarr in tsvector.
599  */
600  skip_indices = palloc0(nlex * sizeof(int));
601  for (i = skip_count = 0; i < nlex; i++)
602  {
603  char *lex;
604  int lex_len,
605  lex_pos;
606 
607  if (nulls[i])
608  ereport(ERROR,
609  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
610  errmsg("lexeme array may not contain nulls")));
611 
612  lex = VARDATA(dlexemes[i]);
613  lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
614  lex_pos = tsvector_bsearch(tsin, lex, lex_len);
615 
616  if (lex_pos >= 0)
617  skip_indices[skip_count++] = lex_pos;
618  }
619 
620  tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
621 
622  pfree(skip_indices);
623  PG_FREE_IF_COPY(tsin, 0);
624  PG_FREE_IF_COPY(lexemes, 1);
625 
626  PG_RETURN_POINTER(tsout);
627 }
628 
629 /*
630  * Expand tsvector as table with following columns:
631  * lexeme: lexeme text
632  * positions: integer array of lexeme positions
633  * weights: char array of weights corresponding to positions
634  */
635 Datum
637 {
638  FuncCallContext *funcctx;
639  TSVector tsin;
640 
641  if (SRF_IS_FIRSTCALL())
642  {
643  MemoryContext oldcontext;
644  TupleDesc tupdesc;
645 
646  funcctx = SRF_FIRSTCALL_INIT();
647  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
648 
649  tupdesc = CreateTemplateTupleDesc(3, false);
650  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
651  TEXTOID, -1, 0);
652  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
653  INT2ARRAYOID, -1, 0);
654  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
655  TEXTARRAYOID, -1, 0);
656  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
657 
658  funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
659 
660  MemoryContextSwitchTo(oldcontext);
661  }
662 
663  funcctx = SRF_PERCALL_SETUP();
664  tsin = (TSVector) funcctx->user_fctx;
665 
666  if (funcctx->call_cntr < tsin->size)
667  {
668  WordEntry *arrin = ARRPTR(tsin);
669  char *data = STRPTR(tsin);
670  HeapTuple tuple;
671  int j,
672  i = funcctx->call_cntr;
673  bool nulls[] = {false, false, false};
674  Datum values[3];
675 
676  values[0] = PointerGetDatum(
677  cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
678  );
679 
680  if (arrin[i].haspos)
681  {
682  WordEntryPosVector *posv;
683  Datum *positions;
684  Datum *weights;
685  char weight;
686 
687  /*
688  * Internally tsvector stores position and weight in the same
689  * uint16 (2 bits for weight, 14 for position). Here we extract
690  * that in two separate arrays.
691  */
692  posv = _POSVECPTR(tsin, arrin + i);
693  positions = palloc(posv->npos * sizeof(Datum));
694  weights = palloc(posv->npos * sizeof(Datum));
695  for (j = 0; j < posv->npos; j++)
696  {
697  positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
698  weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
699  weights[j] = PointerGetDatum(
700  cstring_to_text_with_len(&weight, 1)
701  );
702  }
703 
704  values[1] = PointerGetDatum(
705  construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
706  values[2] = PointerGetDatum(
707  construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
708  }
709  else
710  {
711  nulls[1] = nulls[2] = true;
712  }
713 
714  tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
715  SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
716  }
717  else
718  {
719  pfree(tsin);
720  SRF_RETURN_DONE(funcctx);
721  }
722 }
723 
724 /*
725  * Convert tsvector to array of lexemes.
726  */
727 Datum
729 {
730  TSVector tsin = PG_GETARG_TSVECTOR(0);
731  WordEntry *arrin = ARRPTR(tsin);
732  Datum *elements;
733  int i;
734  ArrayType *array;
735 
736  elements = palloc(tsin->size * sizeof(Datum));
737 
738  for (i = 0; i < tsin->size; i++)
739  {
740  elements[i] = PointerGetDatum(
741  cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
742  );
743  }
744 
745  array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
746 
747  pfree(elements);
748  PG_FREE_IF_COPY(tsin, 0);
749  PG_RETURN_POINTER(array);
750 }
751 
752 /*
753  * Build tsvector from array of lexemes.
754  */
755 Datum
757 {
759  TSVector tsout;
760  Datum *dlexemes;
761  WordEntry *arrout;
762  bool *nulls;
763  int nitems,
764  i,
765  j,
766  tslen,
767  datalen = 0;
768  char *cur;
769 
770  deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
771 
772  /* Reject nulls (maybe we should just ignore them, instead?) */
773  for (i = 0; i < nitems; i++)
774  {
775  if (nulls[i])
776  ereport(ERROR,
777  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
778  errmsg("lexeme array may not contain nulls")));
779  }
780 
781  /* Sort and de-dup, because this is required for a valid tsvector. */
782  if (nitems > 1)
783  {
784  qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
785  j = 0;
786  for (i = 1; i < nitems; i++)
787  {
788  if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
789  dlexemes[++j] = dlexemes[i];
790  }
791  nitems = ++j;
792  }
793 
794  /* Calculate space needed for surviving lexemes. */
795  for (i = 0; i < nitems; i++)
796  datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
797  tslen = CALCDATASIZE(nitems, datalen);
798 
799  /* Allocate and fill tsvector. */
800  tsout = (TSVector) palloc0(tslen);
801  SET_VARSIZE(tsout, tslen);
802  tsout->size = nitems;
803 
804  arrout = ARRPTR(tsout);
805  cur = STRPTR(tsout);
806  for (i = 0; i < nitems; i++)
807  {
808  char *lex = VARDATA(dlexemes[i]);
809  int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
810 
811  memcpy(cur, lex, lex_len);
812  arrout[i].haspos = 0;
813  arrout[i].len = lex_len;
814  arrout[i].pos = cur - STRPTR(tsout);
815  cur += lex_len;
816  }
817 
818  PG_FREE_IF_COPY(v, 0);
819  PG_RETURN_POINTER(tsout);
820 }
821 
822 /*
823  * ts_filter(): keep only lexemes with given weights in tsvector.
824  */
825 Datum
827 {
828  TSVector tsin = PG_GETARG_TSVECTOR(0),
829  tsout;
831  WordEntry *arrin = ARRPTR(tsin),
832  *arrout;
833  char *datain = STRPTR(tsin),
834  *dataout;
835  Datum *dweights;
836  bool *nulls;
837  int nweights;
838  int i,
839  j;
840  int cur_pos = 0;
841  char mask = 0;
842 
843  deconstruct_array(weights, CHAROID, 1, true, 'c',
844  &dweights, &nulls, &nweights);
845 
846  for (i = 0; i < nweights; i++)
847  {
848  char char_weight;
849 
850  if (nulls[i])
851  ereport(ERROR,
852  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
853  errmsg("weight array may not contain nulls")));
854 
855  char_weight = DatumGetChar(dweights[i]);
856  switch (char_weight)
857  {
858  case 'A':
859  case 'a':
860  mask = mask | 8;
861  break;
862  case 'B':
863  case 'b':
864  mask = mask | 4;
865  break;
866  case 'C':
867  case 'c':
868  mask = mask | 2;
869  break;
870  case 'D':
871  case 'd':
872  mask = mask | 1;
873  break;
874  default:
875  ereport(ERROR,
876  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
877  errmsg("unrecognized weight: \"%c\"", char_weight)));
878  }
879  }
880 
881  tsout = (TSVector) palloc0(VARSIZE(tsin));
882  tsout->size = tsin->size;
883  arrout = ARRPTR(tsout);
884  dataout = STRPTR(tsout);
885 
886  for (i = j = 0; i < tsin->size; i++)
887  {
888  WordEntryPosVector *posvin,
889  *posvout;
890  int npos = 0;
891  int k;
892 
893  if (!arrin[i].haspos)
894  continue;
895 
896  posvin = _POSVECPTR(tsin, arrin + i);
897  posvout = (WordEntryPosVector *)
898  (dataout + SHORTALIGN(cur_pos + arrin[i].len));
899 
900  for (k = 0; k < posvin->npos; k++)
901  {
902  if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
903  posvout->pos[npos++] = posvin->pos[k];
904  }
905 
906  /* if no satisfactory positions found, skip lexeme */
907  if (!npos)
908  continue;
909 
910  arrout[j].haspos = true;
911  arrout[j].len = arrin[i].len;
912  arrout[j].pos = cur_pos;
913 
914  memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
915  posvout->npos = npos;
916  cur_pos += SHORTALIGN(arrin[i].len);
917  cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
918  sizeof(uint16);
919  j++;
920  }
921 
922  tsout->size = j;
923  if (dataout != STRPTR(tsout))
924  memmove(STRPTR(tsout), dataout, cur_pos);
925 
926  SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
927 
928  PG_FREE_IF_COPY(tsin, 0);
929  PG_RETURN_POINTER(tsout);
930 }
931 
932 Datum
934 {
935  TSVector in1 = PG_GETARG_TSVECTOR(0);
936  TSVector in2 = PG_GETARG_TSVECTOR(1);
937  TSVector out;
938  WordEntry *ptr;
939  WordEntry *ptr1,
940  *ptr2;
941  WordEntryPos *p;
942  int maxpos = 0,
943  i,
944  j,
945  i1,
946  i2,
947  dataoff,
948  output_bytes,
949  output_size;
950  char *data,
951  *data1,
952  *data2;
953 
954  /* Get max position in in1; we'll need this to offset in2's positions */
955  ptr = ARRPTR(in1);
956  i = in1->size;
957  while (i--)
958  {
959  if ((j = POSDATALEN(in1, ptr)) != 0)
960  {
961  p = POSDATAPTR(in1, ptr);
962  while (j--)
963  {
964  if (WEP_GETPOS(*p) > maxpos)
965  maxpos = WEP_GETPOS(*p);
966  p++;
967  }
968  }
969  ptr++;
970  }
971 
972  ptr1 = ARRPTR(in1);
973  ptr2 = ARRPTR(in2);
974  data1 = STRPTR(in1);
975  data2 = STRPTR(in2);
976  i1 = in1->size;
977  i2 = in2->size;
978 
979  /*
980  * Conservative estimate of space needed. We might need all the data in
981  * both inputs, and conceivably add a pad byte before position data for
982  * each item where there was none before.
983  */
984  output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
985 
986  out = (TSVector) palloc0(output_bytes);
987  SET_VARSIZE(out, output_bytes);
988 
989  /*
990  * We must make out->size valid so that STRPTR(out) is sensible. We'll
991  * collapse out any unused space at the end.
992  */
993  out->size = in1->size + in2->size;
994 
995  ptr = ARRPTR(out);
996  data = STRPTR(out);
997  dataoff = 0;
998  while (i1 && i2)
999  {
1000  int cmp = compareEntry(data1, ptr1, data2, ptr2);
1001 
1002  if (cmp < 0)
1003  { /* in1 first */
1004  ptr->haspos = ptr1->haspos;
1005  ptr->len = ptr1->len;
1006  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1007  ptr->pos = dataoff;
1008  dataoff += ptr1->len;
1009  if (ptr->haspos)
1010  {
1011  dataoff = SHORTALIGN(dataoff);
1012  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1013  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1014  }
1015 
1016  ptr++;
1017  ptr1++;
1018  i1--;
1019  }
1020  else if (cmp > 0)
1021  { /* in2 first */
1022  ptr->haspos = ptr2->haspos;
1023  ptr->len = ptr2->len;
1024  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1025  ptr->pos = dataoff;
1026  dataoff += ptr2->len;
1027  if (ptr->haspos)
1028  {
1029  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1030 
1031  if (addlen == 0)
1032  ptr->haspos = 0;
1033  else
1034  {
1035  dataoff = SHORTALIGN(dataoff);
1036  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1037  }
1038  }
1039 
1040  ptr++;
1041  ptr2++;
1042  i2--;
1043  }
1044  else
1045  {
1046  ptr->haspos = ptr1->haspos | ptr2->haspos;
1047  ptr->len = ptr1->len;
1048  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1049  ptr->pos = dataoff;
1050  dataoff += ptr1->len;
1051  if (ptr->haspos)
1052  {
1053  if (ptr1->haspos)
1054  {
1055  dataoff = SHORTALIGN(dataoff);
1056  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1057  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1058  if (ptr2->haspos)
1059  dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1060  }
1061  else /* must have ptr2->haspos */
1062  {
1063  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1064 
1065  if (addlen == 0)
1066  ptr->haspos = 0;
1067  else
1068  {
1069  dataoff = SHORTALIGN(dataoff);
1070  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1071  }
1072  }
1073  }
1074 
1075  ptr++;
1076  ptr1++;
1077  ptr2++;
1078  i1--;
1079  i2--;
1080  }
1081  }
1082 
1083  while (i1)
1084  {
1085  ptr->haspos = ptr1->haspos;
1086  ptr->len = ptr1->len;
1087  memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1088  ptr->pos = dataoff;
1089  dataoff += ptr1->len;
1090  if (ptr->haspos)
1091  {
1092  dataoff = SHORTALIGN(dataoff);
1093  memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1094  dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1095  }
1096 
1097  ptr++;
1098  ptr1++;
1099  i1--;
1100  }
1101 
1102  while (i2)
1103  {
1104  ptr->haspos = ptr2->haspos;
1105  ptr->len = ptr2->len;
1106  memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1107  ptr->pos = dataoff;
1108  dataoff += ptr2->len;
1109  if (ptr->haspos)
1110  {
1111  int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1112 
1113  if (addlen == 0)
1114  ptr->haspos = 0;
1115  else
1116  {
1117  dataoff = SHORTALIGN(dataoff);
1118  dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1119  }
1120  }
1121 
1122  ptr++;
1123  ptr2++;
1124  i2--;
1125  }
1126 
1127  /*
1128  * Instead of checking each offset individually, we check for overflow of
1129  * pos fields once at the end.
1130  */
1131  if (dataoff > MAXSTRPOS)
1132  ereport(ERROR,
1133  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1134  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1135 
1136  /*
1137  * Adjust sizes (asserting that we didn't overrun the original estimates)
1138  * and collapse out any unused array entries.
1139  */
1140  output_size = ptr - ARRPTR(out);
1141  Assert(output_size <= out->size);
1142  out->size = output_size;
1143  if (data != STRPTR(out))
1144  memmove(STRPTR(out), data, dataoff);
1145  output_bytes = CALCDATASIZE(out->size, dataoff);
1146  Assert(output_bytes <= VARSIZE(out));
1147  SET_VARSIZE(out, output_bytes);
1148 
1149  PG_FREE_IF_COPY(in1, 0);
1150  PG_FREE_IF_COPY(in2, 1);
1151  PG_RETURN_POINTER(out);
1152 }
1153 
1154 /*
1155  * Compare two strings by tsvector rules.
1156  *
1157  * if isPrefix = true then it returns zero value iff b has prefix a
1158  */
1159 int32
1160 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1161 {
1162  int cmp;
1163 
1164  if (lena == 0)
1165  {
1166  if (prefix)
1167  cmp = 0; /* empty string is prefix of anything */
1168  else
1169  cmp = (lenb > 0) ? -1 : 0;
1170  }
1171  else if (lenb == 0)
1172  {
1173  cmp = (lena > 0) ? 1 : 0;
1174  }
1175  else
1176  {
1177  cmp = memcmp(a, b, Min(lena, lenb));
1178 
1179  if (prefix)
1180  {
1181  if (cmp == 0 && lena > lenb)
1182  cmp = 1; /* a is longer, so not a prefix of b */
1183  }
1184  else if (cmp == 0 && lena != lenb)
1185  {
1186  cmp = (lena < lenb) ? -1 : 1;
1187  }
1188  }
1189 
1190  return cmp;
1191 }
1192 
1193 /*
1194  * Check weight info or/and fill 'data' with the required positions
1195  */
1196 static bool
1198  ExecPhraseData *data)
1199 {
1200  bool result = false;
1201 
1202  if (entry->haspos && (val->weight || data))
1203  {
1204  WordEntryPosVector *posvec;
1205 
1206  /*
1207  * We can't use the _POSVECPTR macro here because the pointer to the
1208  * tsvector's lexeme storage is already contained in chkval->values.
1209  */
1210  posvec = (WordEntryPosVector *)
1211  (chkval->values + SHORTALIGN(entry->pos + entry->len));
1212 
1213  if (val->weight && data)
1214  {
1215  WordEntryPos *posvec_iter = posvec->pos;
1216  WordEntryPos *dptr;
1217 
1218  /*
1219  * Filter position information by weights
1220  */
1221  dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1222  data->allocated = true;
1223 
1224  /* Is there a position with a matching weight? */
1225  while (posvec_iter < posvec->pos + posvec->npos)
1226  {
1227  /* If true, append this position to the data->pos */
1228  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1229  {
1230  *dptr = WEP_GETPOS(*posvec_iter);
1231  dptr++;
1232  }
1233 
1234  posvec_iter++;
1235  }
1236 
1237  data->npos = dptr - data->pos;
1238 
1239  if (data->npos > 0)
1240  result = true;
1241  }
1242  else if (val->weight)
1243  {
1244  WordEntryPos *posvec_iter = posvec->pos;
1245 
1246  /* Is there a position with a matching weight? */
1247  while (posvec_iter < posvec->pos + posvec->npos)
1248  {
1249  if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1250  {
1251  result = true;
1252  break; /* no need to go further */
1253  }
1254 
1255  posvec_iter++;
1256  }
1257  }
1258  else /* data != NULL */
1259  {
1260  data->npos = posvec->npos;
1261  data->pos = posvec->pos;
1262  data->allocated = false;
1263  result = true;
1264  }
1265  }
1266  else
1267  {
1268  result = true;
1269  }
1270 
1271  return result;
1272 }
1273 
1274 /*
1275  * Removes duplicate pos entries. We can't use uniquePos() from
1276  * tsvector.c because array might be longer than MAXENTRYPOS
1277  *
1278  * Returns new length.
1279  */
1280 static int
1282 {
1283  WordEntryPos *pos_iter,
1284  *result;
1285 
1286  if (npos <= 1)
1287  return npos;
1288 
1289  qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1290 
1291  result = pos;
1292  pos_iter = pos + 1;
1293  while (pos_iter < pos + npos)
1294  {
1295  if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
1296  {
1297  result++;
1298  *result = WEP_GETPOS(*pos_iter);
1299  }
1300 
1301  pos_iter++;
1302  }
1303 
1304  return result + 1 - pos;
1305 }
1306 
1307 /*
1308  * is there value 'val' in array or not ?
1309  */
1310 static bool
1312 {
1313  CHKVAL *chkval = (CHKVAL *) checkval;
1314  WordEntry *StopLow = chkval->arrb;
1315  WordEntry *StopHigh = chkval->arre;
1316  WordEntry *StopMiddle = StopHigh;
1317  int difference = -1;
1318  bool res = false;
1319 
1320  /* Loop invariant: StopLow <= val < StopHigh */
1321  while (StopLow < StopHigh)
1322  {
1323  StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1324  difference = tsCompareString(chkval->operand + val->distance,
1325  val->length,
1326  chkval->values + StopMiddle->pos,
1327  StopMiddle->len,
1328  false);
1329 
1330  if (difference == 0)
1331  {
1332  /* Check weight info & fill 'data' with positions */
1333  res = checkclass_str(chkval, StopMiddle, val, data);
1334  break;
1335  }
1336  else if (difference > 0)
1337  StopLow = StopMiddle + 1;
1338  else
1339  StopHigh = StopMiddle;
1340  }
1341 
1342  if ((!res || data) && val->prefix)
1343  {
1344  WordEntryPos *allpos = NULL;
1345  int npos = 0,
1346  totalpos = 0;
1347 
1348  /*
1349  * there was a failed exact search, so we should scan further to find
1350  * a prefix match. We also need to do so if caller needs position info
1351  */
1352  if (StopLow >= StopHigh)
1353  StopMiddle = StopHigh;
1354 
1355  while ((!res || data) && StopMiddle < chkval->arre &&
1356  tsCompareString(chkval->operand + val->distance,
1357  val->length,
1358  chkval->values + StopMiddle->pos,
1359  StopMiddle->len,
1360  true) == 0)
1361  {
1362  if (data)
1363  {
1364  /*
1365  * We need to join position information
1366  */
1367  res = checkclass_str(chkval, StopMiddle, val, data);
1368 
1369  if (res)
1370  {
1371  while (npos + data->npos >= totalpos)
1372  {
1373  if (totalpos == 0)
1374  {
1375  totalpos = 256;
1376  allpos = palloc(sizeof(WordEntryPos) * totalpos);
1377  }
1378  else
1379  {
1380  totalpos *= 2;
1381  allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1382  }
1383  }
1384 
1385  memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1386  npos += data->npos;
1387  }
1388  }
1389  else
1390  {
1391  res = checkclass_str(chkval, StopMiddle, val, NULL);
1392  }
1393 
1394  StopMiddle++;
1395  }
1396 
1397  if (res && data)
1398  {
1399  /* Sort and make unique array of found positions */
1400  data->pos = allpos;
1401  data->npos = uniqueLongPos(allpos, npos);
1402  data->allocated = true;
1403  }
1404  }
1405 
1406  return res;
1407 }
1408 
1409 /*
1410  * Compute output position list for a tsquery operator in phrase mode.
1411  *
1412  * Merge the position lists in Ldata and Rdata as specified by "emit",
1413  * returning the result list into *data. The input position lists must be
1414  * sorted and unique, and the output will be as well.
1415  *
1416  * data: pointer to initially-all-zeroes output struct, or NULL
1417  * Ldata, Rdata: input position lists
1418  * emit: bitmask of TSPO_XXX flags
1419  * Loffset: offset to be added to Ldata positions before comparing/outputting
1420  * Roffset: offset to be added to Rdata positions before comparing/outputting
1421  * max_npos: maximum possible required size of output position array
1422  *
1423  * Loffset and Roffset should not be negative, else we risk trying to output
1424  * negative positions, which won't fit into WordEntryPos.
1425  *
1426  * Returns true if any positions were emitted to *data; or if data is NULL,
1427  * returns true if any positions would have been emitted.
1428  */
1429 #define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1430 #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1431 #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1432 
1433 static bool
1435  ExecPhraseData *Ldata,
1436  ExecPhraseData *Rdata,
1437  int emit,
1438  int Loffset,
1439  int Roffset,
1440  int max_npos)
1441 {
1442  int Lindex,
1443  Rindex;
1444 
1445  /* Loop until both inputs are exhausted */
1446  Lindex = Rindex = 0;
1447  while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1448  {
1449  int Lpos,
1450  Rpos;
1451  int output_pos = 0;
1452 
1453  /*
1454  * Fetch current values to compare. WEP_GETPOS() is needed because
1455  * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1456  */
1457  if (Lindex < Ldata->npos)
1458  Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1459  else
1460  {
1461  /* L array exhausted, so we're done if R_ONLY isn't set */
1462  if (!(emit & TSPO_R_ONLY))
1463  break;
1464  Lpos = INT_MAX;
1465  }
1466  if (Rindex < Rdata->npos)
1467  Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1468  else
1469  {
1470  /* R array exhausted, so we're done if L_ONLY isn't set */
1471  if (!(emit & TSPO_L_ONLY))
1472  break;
1473  Rpos = INT_MAX;
1474  }
1475 
1476  /* Merge-join the two input lists */
1477  if (Lpos < Rpos)
1478  {
1479  /* Lpos is not matched in Rdata, should we output it? */
1480  if (emit & TSPO_L_ONLY)
1481  output_pos = Lpos;
1482  Lindex++;
1483  }
1484  else if (Lpos == Rpos)
1485  {
1486  /* Lpos and Rpos match ... should we output it? */
1487  if (emit & TSPO_BOTH)
1488  output_pos = Rpos;
1489  Lindex++;
1490  Rindex++;
1491  }
1492  else /* Lpos > Rpos */
1493  {
1494  /* Rpos is not matched in Ldata, should we output it? */
1495  if (emit & TSPO_R_ONLY)
1496  output_pos = Rpos;
1497  Rindex++;
1498  }
1499 
1500  if (output_pos > 0)
1501  {
1502  if (data)
1503  {
1504  /* Store position, first allocating output array if needed */
1505  if (data->pos == NULL)
1506  {
1507  data->pos = (WordEntryPos *)
1508  palloc(max_npos * sizeof(WordEntryPos));
1509  data->allocated = true;
1510  }
1511  data->pos[data->npos++] = output_pos;
1512  }
1513  else
1514  {
1515  /*
1516  * Exact positions not needed, so return true as soon as we
1517  * know there is at least one.
1518  */
1519  return true;
1520  }
1521  }
1522  }
1523 
1524  if (data && data->npos > 0)
1525  {
1526  /* Let's assert we didn't overrun the array */
1527  Assert(data->npos <= max_npos);
1528  return true;
1529  }
1530  return false;
1531 }
1532 
1533 /*
1534  * Execute tsquery at or below an OP_PHRASE operator.
1535  *
1536  * This handles tsquery execution at recursion levels where we need to care
1537  * about match locations.
1538  *
1539  * In addition to the same arguments used for TS_execute, the caller may pass
1540  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1541  * match position info on success. data == NULL if no position data need be
1542  * returned. (In practice, outside callers pass NULL, and only the internal
1543  * recursion cases pass a data pointer.)
1544  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1545  * This is OK because an outside call always starts from an OP_PHRASE node.
1546  *
1547  * The detailed semantics of the match data, given that the function returned
1548  * "true" (successful match, or possible match), are:
1549  *
1550  * npos > 0, negate = false:
1551  * query is matched at specified position(s) (and only those positions)
1552  * npos > 0, negate = true:
1553  * query is matched at all positions *except* specified position(s)
1554  * npos = 0, negate = false:
1555  * query is possibly matched, matching position(s) are unknown
1556  * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
1557  * npos = 0, negate = true:
1558  * query is matched at all positions
1559  *
1560  * Successful matches also return a "width" value which is the match width in
1561  * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1562  * and is the sum of the phrase operator distances for phrase matches. Note
1563  * that when width > 0, the listed positions represent the ends of matches not
1564  * the starts. (This unintuitive rule is needed to avoid possibly generating
1565  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1566  *
1567  * When the function returns "false" (no match), it must return npos = 0,
1568  * negate = false (which is the state initialized by the caller); but the
1569  * "width" output in such cases is undefined.
1570  */
1571 static bool
1572 TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1573  TSExecuteCallback chkcond,
1574  ExecPhraseData *data)
1575 {
1576  ExecPhraseData Ldata,
1577  Rdata;
1578  bool lmatch,
1579  rmatch;
1580  int Loffset,
1581  Roffset,
1582  maxwidth;
1583 
1584  /* since this function recurses, it could be driven to stack overflow */
1586 
1587  if (curitem->type == QI_VAL)
1588  return chkcond(arg, (QueryOperand *) curitem, data);
1589 
1590  switch (curitem->qoperator.oper)
1591  {
1592  case OP_NOT:
1593 
1594  /*
1595  * Because a "true" result with no specific positions is taken as
1596  * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
1597  * If it's a false positive, the right things happen anyway.
1598  *
1599  * Also, we need not touch data->width, since a NOT operation does
1600  * not change the match width.
1601  */
1602  if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1603  {
1604  if (data->npos > 0)
1605  {
1606  /* we have some positions, invert negate flag */
1607  data->negate = !data->negate;
1608  return true;
1609  }
1610  else if (data->negate)
1611  {
1612  /* change "match everywhere" to "match nowhere" */
1613  data->negate = false;
1614  return false;
1615  }
1616  /* match positions are, and remain, uncertain */
1617  return true;
1618  }
1619  else
1620  {
1621  /* change "match nowhere" to "match everywhere" */
1622  Assert(data->npos == 0 && !data->negate);
1623  data->negate = true;
1624  return true;
1625  }
1626 
1627  case OP_PHRASE:
1628  case OP_AND:
1629  memset(&Ldata, 0, sizeof(Ldata));
1630  memset(&Rdata, 0, sizeof(Rdata));
1631 
1632  if (!TS_phrase_execute(curitem + curitem->qoperator.left,
1633  arg, flags, chkcond, &Ldata))
1634  return false;
1635 
1636  if (!TS_phrase_execute(curitem + 1,
1637  arg, flags, chkcond, &Rdata))
1638  return false;
1639 
1640  /*
1641  * If either operand has no position information, then we can't
1642  * return position data, only a "possible match" result. "Possible
1643  * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1644  * is set, otherwise return false.
1645  */
1646  if ((Ldata.npos == 0 && !Ldata.negate) ||
1647  (Rdata.npos == 0 && !Rdata.negate))
1648  return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1649 
1650  if (curitem->qoperator.oper == OP_PHRASE)
1651  {
1652  /*
1653  * Compute Loffset and Roffset suitable for phrase match, and
1654  * compute overall width of whole phrase match.
1655  */
1656  Loffset = curitem->qoperator.distance + Rdata.width;
1657  Roffset = 0;
1658  if (data)
1659  data->width = curitem->qoperator.distance +
1660  Ldata.width + Rdata.width;
1661  }
1662  else
1663  {
1664  /*
1665  * For OP_AND, set output width and alignment like OP_OR (see
1666  * comment below)
1667  */
1668  maxwidth = Max(Ldata.width, Rdata.width);
1669  Loffset = maxwidth - Ldata.width;
1670  Roffset = maxwidth - Rdata.width;
1671  if (data)
1672  data->width = maxwidth;
1673  }
1674 
1675  if (Ldata.negate && Rdata.negate)
1676  {
1677  /* !L & !R: treat as !(L | R) */
1678  (void) TS_phrase_output(data, &Ldata, &Rdata,
1680  Loffset, Roffset,
1681  Ldata.npos + Rdata.npos);
1682  if (data)
1683  data->negate = true;
1684  return true;
1685  }
1686  else if (Ldata.negate)
1687  {
1688  /* !L & R */
1689  return TS_phrase_output(data, &Ldata, &Rdata,
1690  TSPO_R_ONLY,
1691  Loffset, Roffset,
1692  Rdata.npos);
1693  }
1694  else if (Rdata.negate)
1695  {
1696  /* L & !R */
1697  return TS_phrase_output(data, &Ldata, &Rdata,
1698  TSPO_L_ONLY,
1699  Loffset, Roffset,
1700  Ldata.npos);
1701  }
1702  else
1703  {
1704  /* straight AND */
1705  return TS_phrase_output(data, &Ldata, &Rdata,
1706  TSPO_BOTH,
1707  Loffset, Roffset,
1708  Min(Ldata.npos, Rdata.npos));
1709  }
1710 
1711  case OP_OR:
1712  memset(&Ldata, 0, sizeof(Ldata));
1713  memset(&Rdata, 0, sizeof(Rdata));
1714 
1715  lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1716  arg, flags, chkcond, &Ldata);
1717  rmatch = TS_phrase_execute(curitem + 1,
1718  arg, flags, chkcond, &Rdata);
1719 
1720  if (!lmatch && !rmatch)
1721  return false;
1722 
1723  /*
1724  * If a valid operand has no position information, then we can't
1725  * return position data, only a "possible match" result. "Possible
1726  * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1727  * is set, otherwise return false.
1728  */
1729  if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
1730  (rmatch && Rdata.npos == 0 && !Rdata.negate))
1731  return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1732 
1733  /*
1734  * Cope with undefined output width from failed submatch. (This
1735  * takes less code than trying to ensure that all failure returns
1736  * set data->width to zero.)
1737  */
1738  if (!lmatch)
1739  Ldata.width = 0;
1740  if (!rmatch)
1741  Rdata.width = 0;
1742 
1743  /*
1744  * For OP_AND and OP_OR, report the width of the wider of the two
1745  * inputs, and align the narrower input's positions to the right
1746  * end of that width. This rule deals at least somewhat
1747  * reasonably with cases like "x <-> (y | z <-> q)".
1748  */
1749  maxwidth = Max(Ldata.width, Rdata.width);
1750  Loffset = maxwidth - Ldata.width;
1751  Roffset = maxwidth - Rdata.width;
1752  data->width = maxwidth;
1753 
1754  if (Ldata.negate && Rdata.negate)
1755  {
1756  /* !L | !R: treat as !(L & R) */
1757  (void) TS_phrase_output(data, &Ldata, &Rdata,
1758  TSPO_BOTH,
1759  Loffset, Roffset,
1760  Min(Ldata.npos, Rdata.npos));
1761  data->negate = true;
1762  return true;
1763  }
1764  else if (Ldata.negate)
1765  {
1766  /* !L | R: treat as !(L & !R) */
1767  (void) TS_phrase_output(data, &Ldata, &Rdata,
1768  TSPO_L_ONLY,
1769  Loffset, Roffset,
1770  Ldata.npos);
1771  data->negate = true;
1772  return true;
1773  }
1774  else if (Rdata.negate)
1775  {
1776  /* L | !R: treat as !(!L & R) */
1777  (void) TS_phrase_output(data, &Ldata, &Rdata,
1778  TSPO_R_ONLY,
1779  Loffset, Roffset,
1780  Rdata.npos);
1781  data->negate = true;
1782  return true;
1783  }
1784  else
1785  {
1786  /* straight OR */
1787  return TS_phrase_output(data, &Ldata, &Rdata,
1789  Loffset, Roffset,
1790  Ldata.npos + Rdata.npos);
1791  }
1792 
1793  default:
1794  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1795  }
1796 
1797  /* not reachable, but keep compiler quiet */
1798  return false;
1799 }
1800 
1801 
1802 /*
1803  * Evaluate tsquery boolean expression.
1804  *
1805  * curitem: current tsquery item (initially, the first one)
1806  * arg: opaque value to pass through to callback function
1807  * flags: bitmask of flag bits shown in ts_utils.h
1808  * chkcond: callback function to check whether a primitive value is present
1809  *
1810  * The logic here deals only with operators above any phrase operator, for
1811  * which we do not need to worry about lexeme positions. As soon as we hit an
1812  * OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
1813  */
1814 bool
1815 TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1816  TSExecuteCallback chkcond)
1817 {
1818  /* since this function recurses, it could be driven to stack overflow */
1820 
1821  if (curitem->type == QI_VAL)
1822  return chkcond(arg, (QueryOperand *) curitem,
1823  NULL /* we don't need position info */ );
1824 
1825  switch (curitem->qoperator.oper)
1826  {
1827  case OP_NOT:
1828  if (flags & TS_EXEC_CALC_NOT)
1829  return !TS_execute(curitem + 1, arg, flags, chkcond);
1830  else
1831  return true;
1832 
1833  case OP_AND:
1834  if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1835  return TS_execute(curitem + 1, arg, flags, chkcond);
1836  else
1837  return false;
1838 
1839  case OP_OR:
1840  if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1841  return true;
1842  else
1843  return TS_execute(curitem + 1, arg, flags, chkcond);
1844 
1845  case OP_PHRASE:
1846  return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
1847 
1848  default:
1849  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1850  }
1851 
1852  /* not reachable, but keep compiler quiet */
1853  return false;
1854 }
1855 
1856 /*
1857  * Detect whether a tsquery boolean expression requires any positive matches
1858  * to values shown in the tsquery.
1859  *
1860  * This is needed to know whether a GIN index search requires full index scan.
1861  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
1862  * entries for x; but 'x | !y' could match rows containing neither x nor y.
1863  */
1864 bool
1866 {
1867  /* since this function recurses, it could be driven to stack overflow */
1869 
1870  if (curitem->type == QI_VAL)
1871  return true;
1872 
1873  switch (curitem->qoperator.oper)
1874  {
1875  case OP_NOT:
1876 
1877  /*
1878  * Assume there are no required matches underneath a NOT. For
1879  * some cases with nested NOTs, we could prove there's a required
1880  * match, but it seems unlikely to be worth the trouble.
1881  */
1882  return false;
1883 
1884  case OP_PHRASE:
1885 
1886  /*
1887  * Treat OP_PHRASE as OP_AND here
1888  */
1889  case OP_AND:
1890  /* If either side requires a match, we're good */
1891  if (tsquery_requires_match(curitem + curitem->qoperator.left))
1892  return true;
1893  else
1894  return tsquery_requires_match(curitem + 1);
1895 
1896  case OP_OR:
1897  /* Both sides must require a match */
1898  if (tsquery_requires_match(curitem + curitem->qoperator.left))
1899  return tsquery_requires_match(curitem + 1);
1900  else
1901  return false;
1902 
1903  default:
1904  elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1905  }
1906 
1907  /* not reachable, but keep compiler quiet */
1908  return false;
1909 }
1910 
1911 /*
1912  * boolean operations
1913  */
1914 Datum
1916 {
1918  PG_GETARG_DATUM(1),
1919  PG_GETARG_DATUM(0)));
1920 }
1921 
1922 Datum
1924 {
1926  TSQuery query = PG_GETARG_TSQUERY(1);
1927  CHKVAL chkval;
1928  bool result;
1929 
1930  /* empty query matches nothing */
1931  if (!query->size)
1932  {
1933  PG_FREE_IF_COPY(val, 0);
1934  PG_FREE_IF_COPY(query, 1);
1935  PG_RETURN_BOOL(false);
1936  }
1937 
1938  chkval.arrb = ARRPTR(val);
1939  chkval.arre = chkval.arrb + val->size;
1940  chkval.values = STRPTR(val);
1941  chkval.operand = GETOPERAND(query);
1942  result = TS_execute(GETQUERY(query),
1943  &chkval,
1946 
1947  PG_FREE_IF_COPY(val, 0);
1948  PG_FREE_IF_COPY(query, 1);
1949  PG_RETURN_BOOL(result);
1950 }
1951 
1952 Datum
1954 {
1955  TSVector vector;
1956  TSQuery query;
1957  bool res;
1958 
1960  PG_GETARG_DATUM(0)));
1962  PG_GETARG_DATUM(1)));
1963 
1965  TSVectorGetDatum(vector),
1966  TSQueryGetDatum(query)));
1967 
1968  pfree(vector);
1969  pfree(query);
1970 
1971  PG_RETURN_BOOL(res);
1972 }
1973 
1974 Datum
1976 {
1977  TSVector vector;
1978  TSQuery query = PG_GETARG_TSQUERY(1);
1979  bool res;
1980 
1982  PG_GETARG_DATUM(0)));
1983 
1985  TSVectorGetDatum(vector),
1986  TSQueryGetDatum(query)));
1987 
1988  pfree(vector);
1989  PG_FREE_IF_COPY(query, 1);
1990 
1991  PG_RETURN_BOOL(res);
1992 }
1993 
1994 /*
1995  * ts_stat statistic function support
1996  */
1997 
1998 
1999 /*
2000  * Returns the number of positions in value 'wptr' within tsvector 'txt',
2001  * that have a weight equal to one of the weights in 'weight' bitmask.
2002  */
2003 static int
2005 {
2006  int len = POSDATALEN(txt, wptr);
2007  int num = 0;
2008  WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2009 
2010  while (len--)
2011  {
2012  if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2013  num++;
2014  ptr++;
2015  }
2016  return num;
2017 }
2018 
2019 #define compareStatWord(a,e,t) \
2020  tsCompareString((a)->lexeme, (a)->lenlexeme, \
2021  STRPTR(t) + (e)->pos, (e)->len, \
2022  false)
2023 
2024 static void
2025 insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2026 {
2027  WordEntry *we = ARRPTR(txt) + off;
2028  StatEntry *node = stat->root,
2029  *pnode = NULL;
2030  int n,
2031  res = 0;
2032  uint32 depth = 1;
2033 
2034  if (stat->weight == 0)
2035  n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2036  else
2037  n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2038 
2039  if (n == 0)
2040  return; /* nothing to insert */
2041 
2042  while (node)
2043  {
2044  res = compareStatWord(node, we, txt);
2045 
2046  if (res == 0)
2047  {
2048  break;
2049  }
2050  else
2051  {
2052  pnode = node;
2053  node = (res < 0) ? node->left : node->right;
2054  }
2055  depth++;
2056  }
2057 
2058  if (depth > stat->maxdepth)
2059  stat->maxdepth = depth;
2060 
2061  if (node == NULL)
2062  {
2063  node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2064  node->left = node->right = NULL;
2065  node->ndoc = 1;
2066  node->nentry = n;
2067  node->lenlexeme = we->len;
2068  memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2069 
2070  if (pnode == NULL)
2071  {
2072  stat->root = node;
2073  }
2074  else
2075  {
2076  if (res < 0)
2077  pnode->left = node;
2078  else
2079  pnode->right = node;
2080  }
2081 
2082  }
2083  else
2084  {
2085  node->ndoc++;
2086  node->nentry += n;
2087  }
2088 }
2089 
2090 static void
2092  uint32 low, uint32 high, uint32 offset)
2093 {
2094  uint32 pos;
2095  uint32 middle = (low + high) >> 1;
2096 
2097  pos = (low + middle) >> 1;
2098  if (low != middle && pos >= offset && pos - offset < txt->size)
2099  insertStatEntry(persistentContext, stat, txt, pos - offset);
2100  pos = (high + middle + 1) >> 1;
2101  if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2102  insertStatEntry(persistentContext, stat, txt, pos - offset);
2103 
2104  if (low != middle)
2105  chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2106  if (high != middle + 1)
2107  chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2108 }
2109 
2110 /*
2111  * This is written like a custom aggregate function, because the
2112  * original plan was to do just that. Unfortunately, an aggregate function
2113  * can't return a set, so that plan was abandoned. If that limitation is
2114  * lifted in the future, ts_stat could be a real aggregate function so that
2115  * you could use it like this:
2116  *
2117  * SELECT ts_stat(vector_column) FROM vector_table;
2118  *
2119  * where vector_column is a tsvector-type column in vector_table.
2120  */
2121 
2122 static TSVectorStat *
2123 ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2124 {
2125  TSVector txt = DatumGetTSVector(data);
2126  uint32 i,
2127  nbit = 0,
2128  offset;
2129 
2130  if (stat == NULL)
2131  { /* Init in first */
2132  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2133  stat->maxdepth = 1;
2134  }
2135 
2136  /* simple check of correctness */
2137  if (txt == NULL || txt->size == 0)
2138  {
2139  if (txt && txt != (TSVector) DatumGetPointer(data))
2140  pfree(txt);
2141  return stat;
2142  }
2143 
2144  i = txt->size - 1;
2145  for (; i > 0; i >>= 1)
2146  nbit++;
2147 
2148  nbit = 1 << nbit;
2149  offset = (nbit - txt->size) / 2;
2150 
2151  insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2152  chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
2153 
2154  return stat;
2155 }
2156 
2157 static void
2159  TSVectorStat *stat)
2160 {
2161  TupleDesc tupdesc;
2162  MemoryContext oldcontext;
2163  StatEntry *node;
2164 
2165  funcctx->user_fctx = (void *) stat;
2166 
2167  oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2168 
2169  stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
2170  stat->stackpos = 0;
2171 
2172  node = stat->root;
2173  /* find leftmost value */
2174  if (node == NULL)
2175  stat->stack[stat->stackpos] = NULL;
2176  else
2177  for (;;)
2178  {
2179  stat->stack[stat->stackpos] = node;
2180  if (node->left)
2181  {
2182  stat->stackpos++;
2183  node = node->left;
2184  }
2185  else
2186  break;
2187  }
2188  Assert(stat->stackpos <= stat->maxdepth);
2189 
2190  tupdesc = CreateTemplateTupleDesc(3, false);
2191  TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
2192  TEXTOID, -1, 0);
2193  TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
2194  INT4OID, -1, 0);
2195  TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
2196  INT4OID, -1, 0);
2197  funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2198  funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2199 
2200  MemoryContextSwitchTo(oldcontext);
2201 }
2202 
2203 static StatEntry *
2205 {
2206  StatEntry *node = stat->stack[stat->stackpos];
2207 
2208  if (node == NULL)
2209  return NULL;
2210 
2211  if (node->ndoc != 0)
2212  {
2213  /* return entry itself: we already was at left sublink */
2214  return node;
2215  }
2216  else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2217  {
2218  /* go on right sublink */
2219  stat->stackpos++;
2220  node = node->right;
2221 
2222  /* find most-left value */
2223  for (;;)
2224  {
2225  stat->stack[stat->stackpos] = node;
2226  if (node->left)
2227  {
2228  stat->stackpos++;
2229  node = node->left;
2230  }
2231  else
2232  break;
2233  }
2234  Assert(stat->stackpos <= stat->maxdepth);
2235  }
2236  else
2237  {
2238  /* we already return all left subtree, itself and right subtree */
2239  if (stat->stackpos == 0)
2240  return NULL;
2241 
2242  stat->stackpos--;
2243  return walkStatEntryTree(stat);
2244  }
2245 
2246  return node;
2247 }
2248 
2249 static Datum
2251 {
2252  TSVectorStat *st;
2253  StatEntry *entry;
2254 
2255  st = (TSVectorStat *) funcctx->user_fctx;
2256 
2257  entry = walkStatEntryTree(st);
2258 
2259  if (entry != NULL)
2260  {
2261  Datum result;
2262  char *values[3];
2263  char ndoc[16];
2264  char nentry[16];
2265  HeapTuple tuple;
2266 
2267  values[0] = palloc(entry->lenlexeme + 1);
2268  memcpy(values[0], entry->lexeme, entry->lenlexeme);
2269  (values[0])[entry->lenlexeme] = '\0';
2270  sprintf(ndoc, "%d", entry->ndoc);
2271  values[1] = ndoc;
2272  sprintf(nentry, "%d", entry->nentry);
2273  values[2] = nentry;
2274 
2275  tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2276  result = HeapTupleGetDatum(tuple);
2277 
2278  pfree(values[0]);
2279 
2280  /* mark entry as already visited */
2281  entry->ndoc = 0;
2282 
2283  return result;
2284  }
2285 
2286  return (Datum) 0;
2287 }
2288 
2289 static TSVectorStat *
2290 ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
2291 {
2292  char *query = text_to_cstring(txt);
2293  TSVectorStat *stat;
2294  bool isnull;
2295  Portal portal;
2296  SPIPlanPtr plan;
2297 
2298  if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2299  /* internal error */
2300  elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2301 
2302  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2303  /* internal error */
2304  elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2305 
2306  SPI_cursor_fetch(portal, true, 100);
2307 
2308  if (SPI_tuptable == NULL ||
2309  SPI_tuptable->tupdesc->natts != 1 ||
2311  TSVECTOROID))
2312  ereport(ERROR,
2313  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2314  errmsg("ts_stat query must return one tsvector column")));
2315 
2316  stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2317  stat->maxdepth = 1;
2318 
2319  if (ws)
2320  {
2321  char *buf;
2322 
2323  buf = VARDATA_ANY(ws);
2324  while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2325  {
2326  if (pg_mblen(buf) == 1)
2327  {
2328  switch (*buf)
2329  {
2330  case 'A':
2331  case 'a':
2332  stat->weight |= 1 << 3;
2333  break;
2334  case 'B':
2335  case 'b':
2336  stat->weight |= 1 << 2;
2337  break;
2338  case 'C':
2339  case 'c':
2340  stat->weight |= 1 << 1;
2341  break;
2342  case 'D':
2343  case 'd':
2344  stat->weight |= 1;
2345  break;
2346  default:
2347  stat->weight |= 0;
2348  }
2349  }
2350  buf += pg_mblen(buf);
2351  }
2352  }
2353 
2354  while (SPI_processed > 0)
2355  {
2356  uint64 i;
2357 
2358  for (i = 0; i < SPI_processed; i++)
2359  {
2360  Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
2361 
2362  if (!isnull)
2363  stat = ts_accum(persistentContext, stat, data);
2364  }
2365 
2367  SPI_cursor_fetch(portal, true, 100);
2368  }
2369 
2371  SPI_cursor_close(portal);
2372  SPI_freeplan(plan);
2373  pfree(query);
2374 
2375  return stat;
2376 }
2377 
2378 Datum
2380 {
2381  FuncCallContext *funcctx;
2382  Datum result;
2383 
2384  if (SRF_IS_FIRSTCALL())
2385  {
2386  TSVectorStat *stat;
2387  text *txt = PG_GETARG_TEXT_PP(0);
2388 
2389  funcctx = SRF_FIRSTCALL_INIT();
2390  SPI_connect();
2391  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2392  PG_FREE_IF_COPY(txt, 0);
2393  ts_setup_firstcall(fcinfo, funcctx, stat);
2394  SPI_finish();
2395  }
2396 
2397  funcctx = SRF_PERCALL_SETUP();
2398  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2399  SRF_RETURN_NEXT(funcctx, result);
2400  SRF_RETURN_DONE(funcctx);
2401 }
2402 
2403 Datum
2405 {
2406  FuncCallContext *funcctx;
2407  Datum result;
2408 
2409  if (SRF_IS_FIRSTCALL())
2410  {
2411  TSVectorStat *stat;
2412  text *txt = PG_GETARG_TEXT_PP(0);
2413  text *ws = PG_GETARG_TEXT_PP(1);
2414 
2415  funcctx = SRF_FIRSTCALL_INIT();
2416  SPI_connect();
2417  stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2418  PG_FREE_IF_COPY(txt, 0);
2419  PG_FREE_IF_COPY(ws, 1);
2420  ts_setup_firstcall(fcinfo, funcctx, stat);
2421  SPI_finish();
2422  }
2423 
2424  funcctx = SRF_PERCALL_SETUP();
2425  if ((result = ts_process_call(funcctx)) != (Datum) 0)
2426  SRF_RETURN_NEXT(funcctx, result);
2427  SRF_RETURN_DONE(funcctx);
2428 }
2429 
2430 
2431 /*
2432  * Triggers for automatic update of a tsvector column from text column(s)
2433  *
2434  * Trigger arguments are either
2435  * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2436  * name of tsvector col, name of regconfig col, name(s) of text col(s)
2437  * ie, tsconfig can either be specified by name, or indirectly as the
2438  * contents of a regconfig field in the row. If the name is used, it must
2439  * be explicitly schema-qualified.
2440  */
2441 Datum
2443 {
2444  return tsvector_update_trigger(fcinfo, false);
2445 }
2446 
2447 Datum
2449 {
2450  return tsvector_update_trigger(fcinfo, true);
2451 }
2452 
2453 static Datum
2455 {
2456  TriggerData *trigdata;
2457  Trigger *trigger;
2458  Relation rel;
2459  HeapTuple rettuple = NULL;
2460  int tsvector_attr_num,
2461  i;
2462  ParsedText prs;
2463  Datum datum;
2464  bool isnull;
2465  text *txt;
2466  Oid cfgId;
2467 
2468  /* Check call context */
2469  if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2470  elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2471 
2472  trigdata = (TriggerData *) fcinfo->context;
2473  if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2474  elog(ERROR, "tsvector_update_trigger: must be fired for row");
2475  if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2476  elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2477 
2478  if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2479  rettuple = trigdata->tg_trigtuple;
2480  else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2481  rettuple = trigdata->tg_newtuple;
2482  else
2483  elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2484 
2485  trigger = trigdata->tg_trigger;
2486  rel = trigdata->tg_relation;
2487 
2488  if (trigger->tgnargs < 3)
2489  elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2490 
2491  /* Find the target tsvector column */
2492  tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2493  if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2494  ereport(ERROR,
2495  (errcode(ERRCODE_UNDEFINED_COLUMN),
2496  errmsg("tsvector column \"%s\" does not exist",
2497  trigger->tgargs[0])));
2498  /* This will effectively reject system columns, so no separate test: */
2499  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2500  TSVECTOROID))
2501  ereport(ERROR,
2502  (errcode(ERRCODE_DATATYPE_MISMATCH),
2503  errmsg("column \"%s\" is not of tsvector type",
2504  trigger->tgargs[0])));
2505 
2506  /* Find the configuration to use */
2507  if (config_column)
2508  {
2509  int config_attr_num;
2510 
2511  config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2512  if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2513  ereport(ERROR,
2514  (errcode(ERRCODE_UNDEFINED_COLUMN),
2515  errmsg("configuration column \"%s\" does not exist",
2516  trigger->tgargs[1])));
2517  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2518  REGCONFIGOID))
2519  ereport(ERROR,
2520  (errcode(ERRCODE_DATATYPE_MISMATCH),
2521  errmsg("column \"%s\" is not of regconfig type",
2522  trigger->tgargs[1])));
2523 
2524  datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2525  if (isnull)
2526  ereport(ERROR,
2527  (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2528  errmsg("configuration column \"%s\" must not be null",
2529  trigger->tgargs[1])));
2530  cfgId = DatumGetObjectId(datum);
2531  }
2532  else
2533  {
2534  List *names;
2535 
2536  names = stringToQualifiedNameList(trigger->tgargs[1]);
2537  /* require a schema so that results are not search path dependent */
2538  if (list_length(names) < 2)
2539  ereport(ERROR,
2540  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2541  errmsg("text search configuration name \"%s\" must be schema-qualified",
2542  trigger->tgargs[1])));
2543  cfgId = get_ts_config_oid(names, false);
2544  }
2545 
2546  /* initialize parse state */
2547  prs.lenwords = 32;
2548  prs.curwords = 0;
2549  prs.pos = 0;
2550  prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
2551 
2552  /* find all words in indexable column(s) */
2553  for (i = 2; i < trigger->tgnargs; i++)
2554  {
2555  int numattr;
2556 
2557  numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2558  if (numattr == SPI_ERROR_NOATTRIBUTE)
2559  ereport(ERROR,
2560  (errcode(ERRCODE_UNDEFINED_COLUMN),
2561  errmsg("column \"%s\" does not exist",
2562  trigger->tgargs[i])));
2563  if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2564  ereport(ERROR,
2565  (errcode(ERRCODE_DATATYPE_MISMATCH),
2566  errmsg("column \"%s\" is not of a character type",
2567  trigger->tgargs[i])));
2568 
2569  datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2570  if (isnull)
2571  continue;
2572 
2573  txt = DatumGetTextPP(datum);
2574 
2575  parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2576 
2577  if (txt != (text *) DatumGetPointer(datum))
2578  pfree(txt);
2579  }
2580 
2581  /* make tsvector value */
2582  if (prs.curwords)
2583  {
2584  datum = PointerGetDatum(make_tsvector(&prs));
2585  isnull = false;
2586  rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2587  1, &tsvector_attr_num,
2588  &datum, &isnull);
2589  pfree(DatumGetPointer(datum));
2590  }
2591  else
2592  {
2593  TSVector out = palloc(CALCDATASIZE(0, 0));
2594 
2595  SET_VARSIZE(out, CALCDATASIZE(0, 0));
2596  out->size = 0;
2597  datum = PointerGetDatum(out);
2598  isnull = false;
2599  rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2600  1, &tsvector_attr_num,
2601  &datum, &isnull);
2602  pfree(prs.words);
2603  }
2604 
2605  return PointerGetDatum(rettuple);
2606 }
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition: spi.c:761
uint16 WordEntryPos
Definition: ts_type.h:63
uint32 nentry
Definition: tsvector_op.c:47
uint64 call_cntr
Definition: funcapi.h:65
#define DatumGetTSQuery(X)
Definition: ts_type.h:244
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1815
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
QueryOperator qoperator
Definition: ts_type.h:205
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Definition: tsvector_op.c:2091
#define TSPO_R_ONLY
Definition: tsvector_op.c:1430
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition: spi.c:892
Datum tsvector_unnest(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:636
Datum tsvector_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:826
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:154
Datum tsvector_length(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:192
#define VARDATA_ANY(PTR)
Definition: postgres.h:347
#define VARDATA(PTR)
Definition: postgres.h:303
bool(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:154
#define TS_EXEC_CALC_NOT
Definition: ts_utils.h:168
uint32 stackpos
Definition: tsvector_op.c:63
int numattr
Definition: bootstrap.c:73
static int uniqueLongPos(WordEntryPos *pos, int npos)
Definition: tsvector_op.c:1281
int SPI_connect(void)
Definition: spi.c:84
#define POSDATALEN(x, e)
Definition: ts_type.h:119
#define TEXTOID
Definition: pg_type.h:324
#define VARSIZE(PTR)
Definition: postgres.h:304
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:556
Datum ts_match_qv(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1915
bool allocated
Definition: ts_utils.h:137
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:285
#define PointerGetDatum(X)
Definition: postgres.h:562
#define PG_GETARG_DATUM(n)
Definition: fmgr.h:233
#define VARHDRSZ
Definition: c.h:445
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:247
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:1865
#define DatumGetObjectId(X)
Definition: postgres.h:506
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition: spi.c:482
Datum tsvector_concat(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:933
#define DatumGetTextPP(X)
Definition: fmgr.h:256
int SPI_finish(void)
Definition: spi.c:148
StatEntry * root
Definition: tsvector_op.c:65
#define Min(x, y)
Definition: c.h:806
#define _POSVECPTR(x, e)
Definition: ts_type.h:118
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define Int16GetDatum(X)
Definition: postgres.h:457
#define PG_RETURN_INT32(x)
Definition: fmgr.h:314
ArrayType * construct_array(Datum *elems, int nelems, Oid elmtype, int elmlen, bool elmbyval, char elmalign)
Definition: arrayfuncs.c:3306
#define INT4OID
Definition: pg_type.h:316
SPITupleTable * SPI_tuptable
Definition: spi.c:41
Datum tsvector_setweight(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:202
uint32 len
Definition: ts_type.h:44
struct cursor * cur
Definition: ecpg.c:28
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition: to_tsany.c:553
int errcode(int sqlerrcode)
Definition: elog.c:575
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition: namespace.c:2596
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2448
#define QI_VAL
Definition: ts_type.h:143
return result
Definition: formatting.c:1632
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, Datum *Values, const char *Nulls, bool read_only)
Definition: spi.c:1029
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull)
Definition: heaptuple.c:692
int32 lenwords
Definition: ts_utils.h:86
Datum ts_stat1(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2379
#define DirectFunctionCall1(func, arg1)
Definition: fmgr.h:584
uint32 distance
Definition: ts_type.h:167
int16 distance
Definition: ts_type.h:192
#define MAXSTRPOS
Definition: ts_type.h:50
#define TSPO_BOTH
Definition: tsvector_op.c:1431
int compareWordEntryPos(const void *a, const void *b)
Definition: tsvector.c:33
unsigned int Oid
Definition: postgres_ext.h:31
#define OP_OR
Definition: ts_type.h:177
HeapTuple * vals
Definition: spi.h:28
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:289
int natts
Definition: tupdesc.h:73
#define GETQUERY(x)
Definition: _int.h:142
uint64 SPI_processed
Definition: spi.c:39
int32 curwords
Definition: ts_utils.h:87
TupleDesc tuple_desc
Definition: funcapi.h:120
HeapTuple tg_trigtuple
Definition: trigger.h:35
signed int int32
Definition: c.h:256
WordEntry * arrb
Definition: tsvector_op.c:36
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
Definition: execTuples.c:1115
#define GETOPERAND(x)
Definition: ltree.h:118
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:273
#define POSDATAPTR(x, e)
Definition: ts_type.h:120
static bool TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
Definition: tsvector_op.c:1434
int32 * arrb
Definition: _int_bool.c:228
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:291
#define OP_AND
Definition: ts_type.h:176
#define PG_GETARG_ARRAYTYPE_P(n)
Definition: array.h:244
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:580
uint32 ndoc
Definition: tsvector_op.c:45
Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:264
static bool checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1311
unsigned short uint16
Definition: c.h:267
void pfree(void *pointer)
Definition: mcxt.c:950
Datum array_to_tsvector(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:756
#define TEXTARRAYOID
Definition: pg_type.h:470
#define ERROR
Definition: elog.h:43
#define INT2ARRAYOID
Definition: pg_type.h:464
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition: ts_parse.c:358
#define MAXNUMPOS
Definition: ts_type.h:86
Datum ts_match_vq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1923
uint8 weight
Definition: ts_type.h:155
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition: spi.c:836
#define WEP_GETPOS(x)
Definition: ts_type.h:80
char * operand
Definition: ltxtquery_op.c:52
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:1031
#define INT2OID
Definition: pg_type.h:308
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition: ts_type.h:68
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
static char * buf
Definition: pg_test_fsync.c:66
#define memmove(d, s, c)
Definition: c.h:1058
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:161
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Definition: tsvector_op.c:2204
void check_stack_depth(void)
Definition: postgres.c:3102
#define SPI_ERROR_NOATTRIBUTE
Definition: spi.h:44
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
Definition: tsvector_op.c:2290
AttInMetadata * attinmeta
Definition: funcapi.h:99
static int silly_cmp_tsvector(const TSVector a, const TSVector b)
Definition: tsvector_op.c:77
int32 size
Definition: ts_type.h:102
WordEntryPos * pos
Definition: ts_utils.h:139
#define DatumGetBool(X)
Definition: postgres.h:399
unsigned int uint32
Definition: c.h:268
ParsedWord * words
Definition: ts_utils.h:85
#define TSVectorGetDatum(X)
Definition: ts_type.h:128
uint32 lenlexeme
Definition: tsvector_op.c:50
#define DatumGetTSVector(X)
Definition: ts_type.h:126
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:497
uint32 haspos
Definition: ts_type.h:44
#define ereport(elevel, rest)
Definition: elog.h:122
#define TSQueryGetDatum(X)
Definition: ts_type.h:246
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
Definition: tsvector_op.c:2158
bool IsBinaryCoercible(Oid srctype, Oid targettype)
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
Definition: tsvector_op.c:459
#define TSPO_L_ONLY
Definition: tsvector_op.c:1429
StatEntry ** stack
Definition: tsvector_op.c:62
signed char int8
Definition: c.h:254
#define CALCDATASIZE(x, lenstr)
Definition: hstore.h:72
void SPI_freetuptable(SPITupleTable *tuptable)
Definition: spi.c:970
char ** tgargs
Definition: reltrigger.h:40
QueryItemType type
Definition: ts_type.h:204
void * palloc0(Size size)
Definition: mcxt.c:878
#define PG_RETURN_BOOL(x)
Definition: fmgr.h:319
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
Definition: tsvector_op.c:2025
uintptr_t Datum
Definition: postgres.h:372
Datum difference(PG_FUNCTION_ARGS)
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:313
TSVectorData * TSVector
Definition: ts_type.h:107
#define DatumGetChar(X)
Definition: postgres.h:415
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
Definition: execTuples.c:1068
Datum tsvector_strip(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:159
TupleDesc tupdesc
Definition: spi.h:27
Trigger * tg_trigger
Definition: trigger.h:37
TupleDesc rd_att
Definition: rel.h:115
HeapTuple tg_newtuple
Definition: trigger.h:36
#define CHAROID
Definition: pg_type.h:296
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition: mcxt.c:742
#define Max(x, y)
Definition: c.h:800
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:728
#define NULL
Definition: c.h:229
#define CALLED_AS_TRIGGER(fcinfo)
Definition: trigger.h:25
#define Assert(condition)
Definition: c.h:675
uint32 maxdepth
Definition: tsvector_op.c:60
#define OP_PHRASE
Definition: ts_type.h:178
TriggerEvent tg_event
Definition: trigger.h:33
#define TS_EXEC_PHRASE_NO_POS
Definition: ts_utils.h:175
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition: to_tsany.c:257
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:109
static int compare_text_lexemes(const void *va, const void *vb)
Definition: tsvector_op.c:437
struct StatEntry * left
Definition: tsvector_op.c:48
int32 pos
Definition: ts_utils.h:88
static Datum ts_process_call(FuncCallContext *funcctx)
Definition: tsvector_op.c:2250
static int list_length(const List *l)
Definition: pg_list.h:89
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Definition: tsvector_op.c:2004
Datum ts_stat2(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2404
static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
Definition: tsvector_op.c:393
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:225
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1160
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define HeapTupleGetDatum(tuple)
Definition: funcapi.h:222
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:2442
TupleDesc CreateTemplateTupleDesc(int natts, bool hasoid)
Definition: tupdesc.c:41
uint32 pos
Definition: ts_type.h:44
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, int *replCols, Datum *replValues, bool *replIsnull)
Definition: heaptuple.c:862
uint32 length
Definition: ts_type.h:167
#define STATENTRYHDRSZ
Definition: tsvector_op.c:54
#define DatumGetPointer(X)
Definition: postgres.h:555
#define REGCONFIGOID
Definition: pg_type.h:624
#define TRIGGER_FIRED_BEFORE(event)
Definition: trigger.h:88
void deconstruct_array(ArrayType *array, Oid elmtype, int elmlen, bool elmbyval, char elmalign, Datum **elemsp, bool **nullsp, int *nelemsp)
Definition: arrayfuncs.c:3475
uint32 left
Definition: ts_type.h:193
int SPI_freeplan(SPIPlanPtr plan)
Definition: spi.c:609
static Datum values[MAXATTR]
Definition: bootstrap.c:163
void SPI_cursor_close(Portal portal)
Definition: spi.c:1403
char * text_to_cstring(const text *t)
Definition: varlena.c:182
char * values
Definition: tsvector_op.c:38
List * stringToQualifiedNameList(const char *string)
Definition: regproc.c:1686
#define TRIGGER_FIRED_BY_INSERT(event)
Definition: trigger.h:70
struct StatEntry StatEntry
void * user_fctx
Definition: funcapi.h:90
#define VARSIZE_ANY_EXHDR(PTR)
Definition: postgres.h:340
void * palloc(Size size)
Definition: mcxt.c:849
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define TSVECTOROID
Definition: pg_type.h:615
int32 * arre
Definition: _int_bool.c:229
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition: mcxt.c:707
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Definition: tsvector_op.c:2454
#define STRPTR(x)
Definition: hstore.h:76
int32 size
Definition: ts_type.h:217
int i
int16 tgnargs
Definition: reltrigger.h:37
void * arg
static bool TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
Definition: tsvector_op.c:1572
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition: spi.c:1347
static int compare_int(const void *va, const void *vb)
Definition: tsvector_op.c:426
#define PG_GETARG_TSVECTOR_COPY(n)
Definition: ts_type.h:130
#define PG_GETARG_TSVECTOR(n)
Definition: ts_type.h:129
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
Definition: tsvector_op.c:2123
#define LIMITPOS(x)
Definition: ts_type.h:87
bool prefix
Definition: ts_type.h:159
WordEntry * arre
Definition: tsvector_op.c:37
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
#define ARRPTR(x)
Definition: cube.c:26
#define elog
Definition: elog.h:219
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define qsort(a, b, c, d)
Definition: port.h:440
#define SHORTALIGN(LEN)
Definition: c.h:584
static bool checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
Definition: tsvector_op.c:1197
#define TRIGGER_FIRED_FOR_ROW(event)
Definition: trigger.h:82
#define compareStatWord(a, e, t)
Definition: tsvector_op.c:2019
#define MAXENTRYPOS
Definition: ts_type.h:85
typedef BOOL(WINAPI *MINIDUMPWRITEDUMP)(HANDLE hProcess
Definition: pg_list.h:45
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition: trigger.h:76
int16 AttrNumber
Definition: attnum.h:21
long val
Definition: informix.c:689
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
Definition: tsvector_op.c:357
#define DirectFunctionCall2(func, arg1, arg2)
Definition: fmgr.h:586
#define PG_GETARG_CHAR(n)
Definition: fmgr.h:238
Datum ts_match_tt(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1953
Datum ts_match_tq(PG_FUNCTION_ARGS)
Definition: tsvector_op.c:1975
struct StatEntry * right
Definition: tsvector_op.c:49
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:309
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:742
#define compareEntry(pa, a, pb, b)
Definition: tsvector_op.c:347
#define TSVECTORCMPFUNC(type, action, ret)
Definition: tsvector_op.c:136
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition: tsvector_op.c:51
Relation tg_relation
Definition: trigger.h:34
static const float weights[]
Definition: tsrank.c:25
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:287
int32 weight
Definition: tsvector_op.c:58
#define OP_NOT
Definition: ts_type.h:175