PostgreSQL Source Code git master
Loading...
Searching...
No Matches
tsvector_op.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * tsvector_op.c
4 * operations over tsvector
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsvector_op.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include <limits.h>
17
18#include "access/htup_details.h"
19#include "catalog/namespace.h"
20#include "catalog/pg_type.h"
21#include "commands/trigger.h"
22#include "common/int.h"
23#include "executor/spi.h"
24#include "funcapi.h"
25#include "lib/qunique.h"
26#include "mb/pg_wchar.h"
27#include "miscadmin.h"
28#include "parser/parse_coerce.h"
29#include "tsearch/ts_utils.h"
30#include "utils/array.h"
31#include "utils/builtins.h"
32#include "utils/regproc.h"
33#include "utils/rel.h"
34
35
36typedef struct
37{
40 char *values;
41 char *operand;
42} CHKVAL;
43
44
45typedef struct StatEntry
46{
47 uint32 ndoc; /* zero indicates that we were already here
48 * while walking through the tree */
50 struct StatEntry *left;
55
56#define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
57
69
70
71static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
72 uint32 flags,
74static bool TS_execute_locations_recurse(QueryItem *curitem,
75 void *arg,
77 List **locations);
78static int tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len);
80
81
82/*
83 * Order: haspos, len, word, for all positions (pos, weight)
84 */
85static int
87{
88 if (VARSIZE(a) < VARSIZE(b))
89 return -1;
90 else if (VARSIZE(a) > VARSIZE(b))
91 return 1;
92 else if (a->size < b->size)
93 return -1;
94 else if (a->size > b->size)
95 return 1;
96 else
97 {
98 const WordEntry *aptr = ARRPTR(a);
99 const WordEntry *bptr = ARRPTR(b);
100 int i = 0;
101 int res;
102
103
104 for (i = 0; i < a->size; i++)
105 {
106 if (aptr->haspos != bptr->haspos)
107 {
108 return (aptr->haspos > bptr->haspos) ? -1 : 1;
109 }
110 else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
111 {
112 return res;
113 }
114 else if (aptr->haspos)
115 {
118 int j;
119
120 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
121 return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
122
123 for (j = 0; j < POSDATALEN(a, aptr); j++)
124 {
125 if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
126 {
127 return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
128 }
129 else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
130 {
131 return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
132 }
133 ap++, bp++;
134 }
135 }
136
137 aptr++;
138 bptr++;
139 }
140 }
141
142 return 0;
143}
144
145#define TSVECTORCMPFUNC( type, action, ret ) \
146Datum \
147tsvector_##type(PG_FUNCTION_ARGS) \
148{ \
149 TSVector a = PG_GETARG_TSVECTOR(0); \
150 TSVector b = PG_GETARG_TSVECTOR(1); \
151 int res = silly_cmp_tsvector(a, b); \
152 PG_FREE_IF_COPY(a,0); \
153 PG_FREE_IF_COPY(b,1); \
154 PG_RETURN_##ret( res action 0 ); \
155} \
156/* keep compiler quiet - no extra ; */ \
157extern int no_such_variable
158
166
167Datum
169{
171 TSVector out;
172 int i,
173 len = 0;
174 WordEntry *arrin = ARRPTR(in),
175 *arrout;
176 char *cur;
177
178 for (i = 0; i < in->size; i++)
179 len += arrin[i].len;
180
181 len = CALCDATASIZE(in->size, len);
182 out = (TSVector) palloc0(len);
183 SET_VARSIZE(out, len);
184 out->size = in->size;
185 arrout = ARRPTR(out);
186 cur = STRPTR(out);
187 for (i = 0; i < in->size; i++)
188 {
189 memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
190 arrout[i].haspos = 0;
191 arrout[i].len = arrin[i].len;
192 arrout[i].pos = cur - STRPTR(out);
193 cur += arrout[i].len;
194 }
195
196 PG_FREE_IF_COPY(in, 0);
198}
199
200Datum
202{
204 int32 ret = in->size;
205
206 PG_FREE_IF_COPY(in, 0);
207 PG_RETURN_INT32(ret);
208}
209
210Datum
212{
214 char cw = PG_GETARG_CHAR(1);
215 TSVector out;
216 int i,
217 j;
218 WordEntry *entry;
219 WordEntryPos *p;
220 int w = 0;
221
222 switch (cw)
223 {
224 case 'A':
225 case 'a':
226 w = 3;
227 break;
228 case 'B':
229 case 'b':
230 w = 2;
231 break;
232 case 'C':
233 case 'c':
234 w = 1;
235 break;
236 case 'D':
237 case 'd':
238 w = 0;
239 break;
240 default:
241 /* internal error */
242 elog(ERROR, "unrecognized weight: %d", cw);
243 }
244
245 out = (TSVector) palloc(VARSIZE(in));
246 memcpy(out, in, VARSIZE(in));
247 entry = ARRPTR(out);
248 i = out->size;
249 while (i--)
250 {
251 if ((j = POSDATALEN(out, entry)) != 0)
252 {
253 p = POSDATAPTR(out, entry);
254 while (j--)
255 {
256 WEP_SETWEIGHT(*p, w);
257 p++;
258 }
259 }
260 entry++;
261 }
262
263 PG_FREE_IF_COPY(in, 0);
265}
266
267/*
268 * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
269 *
270 * Assign weight w to elements of tsin that are listed in lexemes.
271 */
272Datum
274{
276 char char_weight = PG_GETARG_CHAR(1);
278
280 int i,
281 j,
282 nlexemes,
283 weight;
284 WordEntry *entry;
286 bool *nulls;
287
288 switch (char_weight)
289 {
290 case 'A':
291 case 'a':
292 weight = 3;
293 break;
294 case 'B':
295 case 'b':
296 weight = 2;
297 break;
298 case 'C':
299 case 'c':
300 weight = 1;
301 break;
302 case 'D':
303 case 'd':
304 weight = 0;
305 break;
306 default:
307 /* internal error */
308 elog(ERROR, "unrecognized weight: %c", char_weight);
309 }
310
313 entry = ARRPTR(tsout);
314
316
317 /*
318 * Assuming that lexemes array is significantly shorter than tsvector we
319 * can iterate through lexemes performing binary search of each lexeme
320 * from lexemes in tsvector.
321 */
322 for (i = 0; i < nlexemes; i++)
323 {
324 char *lex;
325 int lex_len,
326 lex_pos;
327
328 /* Ignore null array elements, they surely don't match */
329 if (nulls[i])
330 continue;
331
335
336 if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
337 {
338 WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
339
340 while (j--)
341 {
342 WEP_SETWEIGHT(*p, weight);
343 p++;
344 }
345 }
346 }
347
350
352}
353
354#define compareEntry(pa, a, pb, b) \
355 tsCompareString((pa) + (a)->pos, (a)->len, \
356 (pb) + (b)->pos, (b)->len, \
357 false)
358
359/*
360 * Add positions from src to dest after offsetting them by maxpos.
361 * Return the number added (might be less than expected due to overflow)
362 */
363static int32
367{
368 uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
369 int i;
371 startlen;
373 *dpos = POSDATAPTR(dest, destptr);
374
375 if (!destptr->haspos)
376 *clen = 0;
377
378 startlen = *clen;
379 for (i = 0;
380 i < slen && *clen < MAXNUMPOS &&
381 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
382 i++)
383 {
386 (*clen)++;
387 }
388
389 if (*clen != startlen)
390 destptr->haspos = 1;
391 return *clen - startlen;
392}
393
394/*
395 * Perform binary search of given lexeme in TSVector.
396 * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
397 * found.
398 */
399static int
401{
402 const WordEntry *arrin = ARRPTR(tsv);
403 int StopLow = 0,
404 StopHigh = tsv->size,
406 cmp;
407
408 while (StopLow < StopHigh)
409 {
410 StopMiddle = (StopLow + StopHigh) / 2;
411
413 STRPTR(tsv) + arrin[StopMiddle].pos,
415 false);
416
417 if (cmp < 0)
419 else if (cmp > 0)
420 StopLow = StopMiddle + 1;
421 else /* found it */
422 return StopMiddle;
423 }
424
425 return -1;
426}
427
428/*
429 * qsort comparator functions
430 */
431
432static int
433compare_int(const void *va, const void *vb)
434{
435 int a = *((const int *) va);
436 int b = *((const int *) vb);
437
438 return pg_cmp_s32(a, b);
439}
440
441static int
442compare_text_lexemes(const void *va, const void *vb)
443{
444 Datum a = *((const Datum *) va);
445 Datum b = *((const Datum *) vb);
450
451 return tsCompareString(alex, alex_len, blex, blex_len, false);
452}
453
454/*
455 * Internal routine to delete lexemes from TSVector by array of offsets.
456 *
457 * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
458 * int indices_count -- size of that array
459 *
460 * Returns new TSVector without given lexemes along with their positions
461 * and weights.
462 */
463static TSVector
465 int indices_count)
466{
469 *arrout;
470 char *data = STRPTR(tsv),
471 *dataout;
472 int i, /* index in arrin */
473 j, /* index in arrout */
474 k, /* index in indices_to_delete */
475 curoff; /* index in dataout area */
476
477 /*
478 * Sort the filter array to simplify membership checks below. Also, get
479 * rid of any duplicate entries, so that we can assume that indices_count
480 * is exactly equal to the number of lexemes that will be removed.
481 */
482 if (indices_count > 1)
483 {
487 }
488
489 /*
490 * Here we overestimate tsout size, since we don't know how much space is
491 * used by the deleted lexeme(s). We will set exact size below.
492 */
494
495 /* This count must be correct because STRPTR(tsout) relies on it. */
496 tsout->size = tsv->size - indices_count;
497
498 /*
499 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
500 */
503 curoff = 0;
504 for (i = j = k = 0; i < tsv->size; i++)
505 {
506 /*
507 * If current i is present in indices_to_delete, skip this lexeme.
508 * Since indices_to_delete is already sorted, we only need to check
509 * the current (k'th) entry.
510 */
511 if (k < indices_count && i == indices_to_delete[k])
512 {
513 k++;
514 continue;
515 }
516
517 /* Copy lexeme and its positions and weights */
518 memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
519 arrout[j].haspos = arrin[i].haspos;
520 arrout[j].len = arrin[i].len;
521 arrout[j].pos = curoff;
522 curoff += arrin[i].len;
523 if (arrin[i].haspos)
524 {
525 int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
526 + sizeof(uint16);
527
530 STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
531 len);
532 curoff += len;
533 }
534
535 j++;
536 }
537
538 /*
539 * k should now be exactly equal to indices_count. If it isn't then the
540 * caller provided us with indices outside of [0, tsv->size) range and
541 * estimation of tsout's size is wrong.
542 */
543 Assert(k == indices_count);
544
546 return tsout;
547}
548
549/*
550 * Delete given lexeme from tsvector.
551 * Implementation of user-level ts_delete(tsvector, text).
552 */
553Datum
572
573/*
574 * Delete given array of lexemes from tsvector.
575 * Implementation of user-level ts_delete(tsvector, text[]).
576 */
577Datum
579{
581 tsout;
583 int i,
584 nlex,
588 bool *nulls;
589
591
592 /*
593 * In typical use case array of lexemes to delete is relatively small. So
594 * here we optimize things for that scenario: iterate through lexarr
595 * performing binary search of each lexeme from lexarr in tsvector.
596 */
597 skip_indices = palloc0(nlex * sizeof(int));
598 for (i = skip_count = 0; i < nlex; i++)
599 {
600 char *lex;
601 int lex_len,
602 lex_pos;
603
604 /* Ignore null array elements, they surely don't match */
605 if (nulls[i])
606 continue;
607
611
612 if (lex_pos >= 0)
614 }
615
617
621
623}
624
625/*
626 * Expand tsvector as table with following columns:
627 * lexeme: lexeme text
628 * positions: integer array of lexeme positions
629 * weights: char array of weights corresponding to positions
630 */
631Datum
633{
636
637 if (SRF_IS_FIRSTCALL())
638 {
639 MemoryContext oldcontext;
640 TupleDesc tupdesc;
641
643 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
644
645 tupdesc = CreateTemplateTupleDesc(3);
646 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
647 TEXTOID, -1, 0);
648 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
649 INT2ARRAYOID, -1, 0);
650 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
651 TEXTARRAYOID, -1, 0);
652 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
653 elog(ERROR, "return type must be a row type");
654 TupleDescFinalize(tupdesc);
655 funcctx->tuple_desc = tupdesc;
656
657 funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
658
659 MemoryContextSwitchTo(oldcontext);
660 }
661
663 tsin = (TSVector) funcctx->user_fctx;
664
665 if (funcctx->call_cntr < tsin->size)
666 {
668 char *data = STRPTR(tsin);
669 HeapTuple tuple;
670 int j,
671 i = funcctx->call_cntr;
672 bool nulls[] = {false, false, false};
673 Datum values[3];
674
676
677 if (arrin[i].haspos)
678 {
681 Datum *weights;
682 char weight;
683
684 /*
685 * Internally tsvector stores position and weight in the same
686 * uint16 (2 bits for weight, 14 for position). Here we extract
687 * that in two separate arrays.
688 */
689 posv = _POSVECPTR(tsin, arrin + i);
690 positions = palloc(posv->npos * sizeof(Datum));
691 weights = palloc(posv->npos * sizeof(Datum));
692 for (j = 0; j < posv->npos; j++)
693 {
695 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
697 1));
698 }
699
702 }
703 else
704 {
705 nulls[1] = nulls[2] = true;
706 }
707
708 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
710 }
711 else
712 {
714 }
715}
716
717/*
718 * Convert tsvector to array of lexemes.
719 */
720Datum
722{
725 Datum *elements;
726 int i;
727 ArrayType *array;
728
729 elements = palloc(tsin->size * sizeof(Datum));
730
731 for (i = 0; i < tsin->size; i++)
732 {
734 arrin[i].len));
735 }
736
737 array = construct_array_builtin(elements, tsin->size, TEXTOID);
738
739 pfree(elements);
741 PG_RETURN_POINTER(array);
742}
743
744/*
745 * Build tsvector from array of lexemes.
746 */
747Datum
749{
754 bool *nulls;
755 int nitems,
756 i,
757 tslen,
758 datalen = 0;
759 char *cur;
760
762
763 /*
764 * Reject nulls and zero length strings (maybe we should just ignore them,
765 * instead?)
766 */
767 for (i = 0; i < nitems; i++)
768 {
769 if (nulls[i])
772 errmsg("lexeme array may not contain nulls")));
773
777 errmsg("lexeme array may not contain empty strings")));
778 }
779
780 /* Sort and de-dup, because this is required for a valid tsvector. */
781 if (nitems > 1)
782 {
784 nitems = qunique(dlexemes, nitems, sizeof(Datum),
786 }
787
788 /* Calculate space needed for surviving lexemes. */
789 for (i = 0; i < nitems; i++)
790 datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
791 tslen = CALCDATASIZE(nitems, datalen);
792
793 /* Allocate and fill tsvector. */
796 tsout->size = nitems;
797
799 cur = STRPTR(tsout);
800 for (i = 0; i < nitems; i++)
801 {
802 char *lex = VARDATA(DatumGetPointer(dlexemes[i]));
804
805 memcpy(cur, lex, lex_len);
806 arrout[i].haspos = 0;
807 arrout[i].len = lex_len;
808 arrout[i].pos = cur - STRPTR(tsout);
809 cur += lex_len;
810 }
811
812 PG_FREE_IF_COPY(v, 0);
814}
815
816/*
817 * ts_filter(): keep only lexemes with given weights in tsvector.
818 */
819Datum
821{
823 tsout;
826 *arrout;
827 char *datain = STRPTR(tsin),
828 *dataout;
830 bool *nulls;
831 int nweights;
832 int i,
833 j;
834 int cur_pos = 0;
835 char mask = 0;
836
838
839 for (i = 0; i < nweights; i++)
840 {
841 char char_weight;
842
843 if (nulls[i])
846 errmsg("weight array may not contain nulls")));
847
849 switch (char_weight)
850 {
851 case 'A':
852 case 'a':
853 mask = mask | 8;
854 break;
855 case 'B':
856 case 'b':
857 mask = mask | 4;
858 break;
859 case 'C':
860 case 'c':
861 mask = mask | 2;
862 break;
863 case 'D':
864 case 'd':
865 mask = mask | 1;
866 break;
867 default:
870 errmsg("unrecognized weight: \"%c\"", char_weight)));
871 }
872 }
873
875 tsout->size = tsin->size;
878
879 for (i = j = 0; i < tsin->size; i++)
880 {
882 *posvout;
883 int npos = 0;
884 int k;
885
886 if (!arrin[i].haspos)
887 continue;
888
892
893 for (k = 0; k < posvin->npos; k++)
894 {
895 if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
896 posvout->pos[npos++] = posvin->pos[k];
897 }
898
899 /* if no satisfactory positions found, skip lexeme */
900 if (!npos)
901 continue;
902
903 arrout[j].haspos = true;
904 arrout[j].len = arrin[i].len;
905 arrout[j].pos = cur_pos;
906
908 posvout->npos = npos;
910 cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
911 sizeof(uint16);
912 j++;
913 }
914
915 tsout->size = j;
916 if (dataout != STRPTR(tsout))
918
920
923}
924
925Datum
927{
930 TSVector out;
931 WordEntry *ptr;
933 *ptr2;
934 WordEntryPos *p;
935 int maxpos = 0,
936 i,
937 j,
938 i1,
939 i2,
940 dataoff,
943 char *data,
944 *data1,
945 *data2;
946
947 /* Get max position in in1; we'll need this to offset in2's positions */
948 ptr = ARRPTR(in1);
949 i = in1->size;
950 while (i--)
951 {
952 if ((j = POSDATALEN(in1, ptr)) != 0)
953 {
954 p = POSDATAPTR(in1, ptr);
955 while (j--)
956 {
957 if (WEP_GETPOS(*p) > maxpos)
958 maxpos = WEP_GETPOS(*p);
959 p++;
960 }
961 }
962 ptr++;
963 }
964
965 ptr1 = ARRPTR(in1);
966 ptr2 = ARRPTR(in2);
967 data1 = STRPTR(in1);
968 data2 = STRPTR(in2);
969 i1 = in1->size;
970 i2 = in2->size;
971
972 /*
973 * Conservative estimate of space needed. We might need all the data in
974 * both inputs, and conceivably add a pad byte before position data for
975 * each item where there was none before.
976 */
977 output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
978
981
982 /*
983 * We must make out->size valid so that STRPTR(out) is sensible. We'll
984 * collapse out any unused space at the end.
985 */
986 out->size = in1->size + in2->size;
987
988 ptr = ARRPTR(out);
989 data = STRPTR(out);
990 dataoff = 0;
991 while (i1 && i2)
992 {
994
995 if (cmp < 0)
996 { /* in1 first */
997 ptr->haspos = ptr1->haspos;
998 ptr->len = ptr1->len;
999 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1000 ptr->pos = dataoff;
1001 dataoff += ptr1->len;
1002 if (ptr->haspos)
1003 {
1005 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1006 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1007 }
1008
1009 ptr++;
1010 ptr1++;
1011 i1--;
1012 }
1013 else if (cmp > 0)
1014 { /* in2 first */
1015 ptr->haspos = ptr2->haspos;
1016 ptr->len = ptr2->len;
1017 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1018 ptr->pos = dataoff;
1019 dataoff += ptr2->len;
1020 if (ptr->haspos)
1021 {
1022 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1023
1024 if (addlen == 0)
1025 ptr->haspos = 0;
1026 else
1027 {
1029 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1030 }
1031 }
1032
1033 ptr++;
1034 ptr2++;
1035 i2--;
1036 }
1037 else
1038 {
1039 ptr->haspos = ptr1->haspos | ptr2->haspos;
1040 ptr->len = ptr1->len;
1041 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1042 ptr->pos = dataoff;
1043 dataoff += ptr1->len;
1044 if (ptr->haspos)
1045 {
1046 if (ptr1->haspos)
1047 {
1049 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1050 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1051 if (ptr2->haspos)
1052 dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1053 }
1054 else /* must have ptr2->haspos */
1055 {
1056 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1057
1058 if (addlen == 0)
1059 ptr->haspos = 0;
1060 else
1061 {
1063 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1064 }
1065 }
1066 }
1067
1068 ptr++;
1069 ptr1++;
1070 ptr2++;
1071 i1--;
1072 i2--;
1073 }
1074 }
1075
1076 while (i1)
1077 {
1078 ptr->haspos = ptr1->haspos;
1079 ptr->len = ptr1->len;
1080 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1081 ptr->pos = dataoff;
1082 dataoff += ptr1->len;
1083 if (ptr->haspos)
1084 {
1086 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1087 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1088 }
1089
1090 ptr++;
1091 ptr1++;
1092 i1--;
1093 }
1094
1095 while (i2)
1096 {
1097 ptr->haspos = ptr2->haspos;
1098 ptr->len = ptr2->len;
1099 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1100 ptr->pos = dataoff;
1101 dataoff += ptr2->len;
1102 if (ptr->haspos)
1103 {
1104 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1105
1106 if (addlen == 0)
1107 ptr->haspos = 0;
1108 else
1109 {
1111 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1112 }
1113 }
1114
1115 ptr++;
1116 ptr2++;
1117 i2--;
1118 }
1119
1120 /*
1121 * Instead of checking each offset individually, we check for overflow of
1122 * pos fields once at the end.
1123 */
1124 if (dataoff > MAXSTRPOS)
1125 ereport(ERROR,
1127 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1128
1129 /*
1130 * Adjust sizes (asserting that we didn't overrun the original estimates)
1131 * and collapse out any unused array entries.
1132 */
1133 output_size = ptr - ARRPTR(out);
1135 out->size = output_size;
1136 if (data != STRPTR(out))
1137 memmove(STRPTR(out), data, dataoff);
1139 Assert(output_bytes <= VARSIZE(out));
1141
1142 PG_FREE_IF_COPY(in1, 0);
1143 PG_FREE_IF_COPY(in2, 1);
1144 PG_RETURN_POINTER(out);
1145}
1146
1147/*
1148 * Compare two strings by tsvector rules.
1149 *
1150 * if prefix = true then it returns zero value iff b has prefix a
1151 */
1152int32
1153tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1154{
1155 int cmp;
1156
1157 if (lena == 0)
1158 {
1159 if (prefix)
1160 cmp = 0; /* empty string is prefix of anything */
1161 else
1162 cmp = (lenb > 0) ? -1 : 0;
1163 }
1164 else if (lenb == 0)
1165 {
1166 cmp = (lena > 0) ? 1 : 0;
1167 }
1168 else
1169 {
1170 cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1171
1172 if (prefix)
1173 {
1174 if (cmp == 0 && lena > lenb)
1175 cmp = 1; /* a is longer, so not a prefix of b */
1176 }
1177 else if (cmp == 0 && lena != lenb)
1178 {
1179 cmp = (lena < lenb) ? -1 : 1;
1180 }
1181 }
1182
1183 return cmp;
1184}
1185
1186/*
1187 * Check weight info or/and fill 'data' with the required positions
1188 */
1189static TSTernaryValue
1192{
1193 TSTernaryValue result = TS_NO;
1194
1195 Assert(data == NULL || data->npos == 0);
1196
1197 if (entry->haspos)
1198 {
1200
1201 /*
1202 * We can't use the _POSVECPTR macro here because the pointer to the
1203 * tsvector's lexeme storage is already contained in chkval->values.
1204 */
1206 (chkval->values + SHORTALIGN(entry->pos + entry->len));
1207
1208 if (val->weight && data)
1209 {
1212
1213 /*
1214 * Filter position information by weights
1215 */
1216 dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
1217 data->allocated = true;
1218
1219 /* Is there a position with a matching weight? */
1220 while (posvec_iter < posvec->pos + posvec->npos)
1221 {
1222 /* If true, append this position to the data->pos */
1223 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1224 {
1226 dptr++;
1227 }
1228
1229 posvec_iter++;
1230 }
1231
1232 data->npos = dptr - data->pos;
1233
1234 if (data->npos > 0)
1235 result = TS_YES;
1236 else
1237 {
1238 pfree(data->pos);
1239 data->pos = NULL;
1240 data->allocated = false;
1241 }
1242 }
1243 else if (val->weight)
1244 {
1246
1247 /* Is there a position with a matching weight? */
1248 while (posvec_iter < posvec->pos + posvec->npos)
1249 {
1250 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1251 {
1252 result = TS_YES;
1253 break; /* no need to go further */
1254 }
1255
1256 posvec_iter++;
1257 }
1258 }
1259 else if (data)
1260 {
1261 data->npos = posvec->npos;
1262 data->pos = posvec->pos;
1263 data->allocated = false;
1264 result = TS_YES;
1265 }
1266 else
1267 {
1268 /* simplest case: no weight check, positions not needed */
1269 result = TS_YES;
1270 }
1271 }
1272 else
1273 {
1274 /*
1275 * Position info is lacking, so if the caller requires it, we can only
1276 * say that maybe there is a match.
1277 *
1278 * Notice, however, that we *don't* check val->weight here.
1279 * Historically, stripped tsvectors are considered to match queries
1280 * whether or not the query has a weight restriction; that's a little
1281 * dubious but we'll preserve the behavior.
1282 */
1283 if (data)
1284 result = TS_MAYBE;
1285 else
1286 result = TS_YES;
1287 }
1288
1289 return result;
1290}
1291
1292/*
1293 * TS_execute callback for matching a tsquery operand to plain tsvector data
1294 */
1295static TSTernaryValue
1297{
1299 WordEntry *StopLow = chkval->arrb;
1300 WordEntry *StopHigh = chkval->arre;
1302 TSTernaryValue res = TS_NO;
1303
1304 /* Loop invariant: StopLow <= val < StopHigh */
1305 while (StopLow < StopHigh)
1306 {
1307 int difference;
1308
1309 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1310 difference = tsCompareString(chkval->operand + val->distance,
1311 val->length,
1312 chkval->values + StopMiddle->pos,
1313 StopMiddle->len,
1314 false);
1315
1316 if (difference == 0)
1317 {
1318 /* Check weight info & fill 'data' with positions */
1320 break;
1321 }
1322 else if (difference > 0)
1323 StopLow = StopMiddle + 1;
1324 else
1326 }
1327
1328 /*
1329 * If it's a prefix search, we should also consider lexemes that the
1330 * search term is a prefix of (which will necessarily immediately follow
1331 * the place we found in the above loop). But we can skip them if there
1332 * was a definite match on the exact term AND the caller doesn't need
1333 * position info.
1334 */
1335 if (val->prefix && (res != TS_YES || data))
1336 {
1338 int npos = 0,
1339 totalpos = 0;
1340
1341 /* adjust start position for corner case */
1342 if (StopLow >= StopHigh)
1344
1345 /* we don't try to re-use any data from the initial match */
1346 if (data)
1347 {
1348 if (data->allocated)
1349 pfree(data->pos);
1350 data->pos = NULL;
1351 data->allocated = false;
1352 data->npos = 0;
1353 }
1354 res = TS_NO;
1355
1356 while ((res != TS_YES || data) &&
1358 tsCompareString(chkval->operand + val->distance,
1359 val->length,
1360 chkval->values + StopMiddle->pos,
1361 StopMiddle->len,
1362 true) == 0)
1363 {
1365
1367
1368 if (subres != TS_NO)
1369 {
1370 if (data)
1371 {
1372 /*
1373 * We need to join position information
1374 */
1375 if (subres == TS_MAYBE)
1376 {
1377 /*
1378 * No position info for this match, so we must report
1379 * MAYBE overall.
1380 */
1381 res = TS_MAYBE;
1382 /* forget any previous positions */
1383 npos = 0;
1384 /* don't leak storage */
1385 if (allpos)
1386 pfree(allpos);
1387 break;
1388 }
1389
1390 while (npos + data->npos > totalpos)
1391 {
1392 if (totalpos == 0)
1393 {
1394 totalpos = 256;
1396 }
1397 else
1398 {
1399 totalpos *= 2;
1401 }
1402 }
1403
1404 memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1405 npos += data->npos;
1406
1407 /* don't leak storage from individual matches */
1408 if (data->allocated)
1409 pfree(data->pos);
1410 data->pos = NULL;
1411 data->allocated = false;
1412 /* it's important to reset data->npos before next loop */
1413 data->npos = 0;
1414 }
1415 else
1416 {
1417 /* Don't need positions, just handle YES/MAYBE */
1418 if (subres == TS_YES || res == TS_NO)
1419 res = subres;
1420 }
1421 }
1422
1423 StopMiddle++;
1424 }
1425
1426 if (data && npos > 0)
1427 {
1428 /* Sort and make unique array of found positions */
1429 data->pos = allpos;
1430 qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1431 data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1433 data->allocated = true;
1434 res = TS_YES;
1435 }
1436 }
1437
1438 return res;
1439}
1440
1441/*
1442 * Compute output position list for a tsquery operator in phrase mode.
1443 *
1444 * Merge the position lists in Ldata and Rdata as specified by "emit",
1445 * returning the result list into *data. The input position lists must be
1446 * sorted and unique, and the output will be as well.
1447 *
1448 * data: pointer to initially-all-zeroes output struct, or NULL
1449 * Ldata, Rdata: input position lists
1450 * emit: bitmask of TSPO_XXX flags
1451 * Loffset: offset to be added to Ldata positions before comparing/outputting
1452 * Roffset: offset to be added to Rdata positions before comparing/outputting
1453 * max_npos: maximum possible required size of output position array
1454 *
1455 * Loffset and Roffset should not be negative, else we risk trying to output
1456 * negative positions, which won't fit into WordEntryPos.
1457 *
1458 * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1459 * we return it as TSTernaryValue.
1460 *
1461 * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1462 * returns TS_YES if any positions would have been emitted.
1463 */
1464#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1465#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1466#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1467
1468static TSTernaryValue
1472 int emit,
1473 int Loffset,
1474 int Roffset,
1475 int max_npos)
1476{
1477 int Lindex,
1478 Rindex;
1479
1480 /* Loop until both inputs are exhausted */
1481 Lindex = Rindex = 0;
1482 while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1483 {
1484 int Lpos,
1485 Rpos;
1486 int output_pos = 0;
1487
1488 /*
1489 * Fetch current values to compare. WEP_GETPOS() is needed because
1490 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1491 */
1492 if (Lindex < Ldata->npos)
1493 Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1494 else
1495 {
1496 /* L array exhausted, so we're done if R_ONLY isn't set */
1497 if (!(emit & TSPO_R_ONLY))
1498 break;
1499 Lpos = INT_MAX;
1500 }
1501 if (Rindex < Rdata->npos)
1502 Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1503 else
1504 {
1505 /* R array exhausted, so we're done if L_ONLY isn't set */
1506 if (!(emit & TSPO_L_ONLY))
1507 break;
1508 Rpos = INT_MAX;
1509 }
1510
1511 /* Merge-join the two input lists */
1512 if (Lpos < Rpos)
1513 {
1514 /* Lpos is not matched in Rdata, should we output it? */
1515 if (emit & TSPO_L_ONLY)
1516 output_pos = Lpos;
1517 Lindex++;
1518 }
1519 else if (Lpos == Rpos)
1520 {
1521 /* Lpos and Rpos match ... should we output it? */
1522 if (emit & TSPO_BOTH)
1523 output_pos = Rpos;
1524 Lindex++;
1525 Rindex++;
1526 }
1527 else /* Lpos > Rpos */
1528 {
1529 /* Rpos is not matched in Ldata, should we output it? */
1530 if (emit & TSPO_R_ONLY)
1531 output_pos = Rpos;
1532 Rindex++;
1533 }
1534
1535 if (output_pos > 0)
1536 {
1537 if (data)
1538 {
1539 /* Store position, first allocating output array if needed */
1540 if (data->pos == NULL)
1541 {
1542 data->pos = (WordEntryPos *)
1543 palloc(max_npos * sizeof(WordEntryPos));
1544 data->allocated = true;
1545 }
1546 data->pos[data->npos++] = output_pos;
1547 }
1548 else
1549 {
1550 /*
1551 * Exact positions not needed, so return TS_YES as soon as we
1552 * know there is at least one.
1553 */
1554 return TS_YES;
1555 }
1556 }
1557 }
1558
1559 if (data && data->npos > 0)
1560 {
1561 /* Let's assert we didn't overrun the array */
1562 Assert(data->npos <= max_npos);
1563 return TS_YES;
1564 }
1565 return TS_NO;
1566}
1567
1568/*
1569 * Execute tsquery at or below an OP_PHRASE operator.
1570 *
1571 * This handles tsquery execution at recursion levels where we need to care
1572 * about match locations.
1573 *
1574 * In addition to the same arguments used for TS_execute, the caller may pass
1575 * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1576 * match position info on success. data == NULL if no position data need be
1577 * returned.
1578 * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1579 * This is OK because an outside call always starts from an OP_PHRASE node,
1580 * and all internal recursion cases pass data != NULL.
1581 *
1582 * The detailed semantics of the match data, given that the function returned
1583 * TS_YES (successful match), are:
1584 *
1585 * npos > 0, negate = false:
1586 * query is matched at specified position(s) (and only those positions)
1587 * npos > 0, negate = true:
1588 * query is matched at all positions *except* specified position(s)
1589 * npos = 0, negate = true:
1590 * query is matched at all positions
1591 * npos = 0, negate = false:
1592 * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1593 *
1594 * Successful matches also return a "width" value which is the match width in
1595 * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1596 * and is the sum of the phrase operator distances for phrase matches. Note
1597 * that when width > 0, the listed positions represent the ends of matches not
1598 * the starts. (This unintuitive rule is needed to avoid possibly generating
1599 * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1600 *
1601 * If the TSExecuteCallback function reports that an operand is present
1602 * but fails to provide position(s) for it, we will return TS_MAYBE when
1603 * it is possible but not certain that the query is matched.
1604 *
1605 * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1606 * negate = false (which is the state initialized by the caller); but the
1607 * "width" output in such cases is undefined.
1608 */
1609static TSTernaryValue
1610TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1613{
1615 Rdata;
1617 rmatch;
1618 int Loffset,
1619 Roffset,
1620 maxwidth;
1621
1622 /* since this function recurses, it could be driven to stack overflow */
1624
1625 /* ... and let's check for query cancel while we're at it */
1627
1628 if (curitem->type == QI_VAL)
1629 return chkcond(arg, (QueryOperand *) curitem, data);
1630
1631 switch (curitem->qoperator.oper)
1632 {
1633 case OP_NOT:
1634
1635 /*
1636 * We need not touch data->width, since a NOT operation does not
1637 * change the match width.
1638 */
1639 if (flags & TS_EXEC_SKIP_NOT)
1640 {
1641 /* with SKIP_NOT, report NOT as "match everywhere" */
1642 Assert(data->npos == 0 && !data->negate);
1643 data->negate = true;
1644 return TS_YES;
1645 }
1646 switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1647 {
1648 case TS_NO:
1649 /* change "match nowhere" to "match everywhere" */
1650 Assert(data->npos == 0 && !data->negate);
1651 data->negate = true;
1652 return TS_YES;
1653 case TS_YES:
1654 if (data->npos > 0)
1655 {
1656 /* we have some positions, invert negate flag */
1657 data->negate = !data->negate;
1658 return TS_YES;
1659 }
1660 else if (data->negate)
1661 {
1662 /* change "match everywhere" to "match nowhere" */
1663 data->negate = false;
1664 return TS_NO;
1665 }
1666 /* Should not get here if result was TS_YES */
1667 Assert(false);
1668 break;
1669 case TS_MAYBE:
1670 /* match positions are, and remain, uncertain */
1671 return TS_MAYBE;
1672 }
1673 break;
1674
1675 case OP_PHRASE:
1676 case OP_AND:
1677 memset(&Ldata, 0, sizeof(Ldata));
1678 memset(&Rdata, 0, sizeof(Rdata));
1679
1680 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1681 arg, flags, chkcond, &Ldata);
1682 if (lmatch == TS_NO)
1683 return TS_NO;
1684
1685 rmatch = TS_phrase_execute(curitem + 1,
1686 arg, flags, chkcond, &Rdata);
1687 if (rmatch == TS_NO)
1688 return TS_NO;
1689
1690 /*
1691 * If either operand has no position information, then we can't
1692 * return reliable position data, only a MAYBE result.
1693 */
1694 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1695 return TS_MAYBE;
1696
1697 if (curitem->qoperator.oper == OP_PHRASE)
1698 {
1699 /*
1700 * Compute Loffset and Roffset suitable for phrase match, and
1701 * compute overall width of whole phrase match.
1702 */
1703 Loffset = curitem->qoperator.distance + Rdata.width;
1704 Roffset = 0;
1705 if (data)
1706 data->width = curitem->qoperator.distance +
1707 Ldata.width + Rdata.width;
1708 }
1709 else
1710 {
1711 /*
1712 * For OP_AND, set output width and alignment like OP_OR (see
1713 * comment below)
1714 */
1715 maxwidth = Max(Ldata.width, Rdata.width);
1716 Loffset = maxwidth - Ldata.width;
1717 Roffset = maxwidth - Rdata.width;
1718 if (data)
1719 data->width = maxwidth;
1720 }
1721
1722 if (Ldata.negate && Rdata.negate)
1723 {
1724 /* !L & !R: treat as !(L | R) */
1728 Ldata.npos + Rdata.npos);
1729 if (data)
1730 data->negate = true;
1731 return TS_YES;
1732 }
1733 else if (Ldata.negate)
1734 {
1735 /* !L & R */
1736 return TS_phrase_output(data, &Ldata, &Rdata,
1739 Rdata.npos);
1740 }
1741 else if (Rdata.negate)
1742 {
1743 /* L & !R */
1744 return TS_phrase_output(data, &Ldata, &Rdata,
1747 Ldata.npos);
1748 }
1749 else
1750 {
1751 /* straight AND */
1752 return TS_phrase_output(data, &Ldata, &Rdata,
1753 TSPO_BOTH,
1755 Min(Ldata.npos, Rdata.npos));
1756 }
1757
1758 case OP_OR:
1759 memset(&Ldata, 0, sizeof(Ldata));
1760 memset(&Rdata, 0, sizeof(Rdata));
1761
1762 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1763 arg, flags, chkcond, &Ldata);
1764 rmatch = TS_phrase_execute(curitem + 1,
1765 arg, flags, chkcond, &Rdata);
1766
1767 if (lmatch == TS_NO && rmatch == TS_NO)
1768 return TS_NO;
1769
1770 /*
1771 * If either operand has no position information, then we can't
1772 * return reliable position data, only a MAYBE result.
1773 */
1774 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1775 return TS_MAYBE;
1776
1777 /*
1778 * Cope with undefined output width from failed submatch. (This
1779 * takes less code than trying to ensure that all failure returns
1780 * set data->width to zero.)
1781 */
1782 if (lmatch == TS_NO)
1783 Ldata.width = 0;
1784 if (rmatch == TS_NO)
1785 Rdata.width = 0;
1786
1787 /*
1788 * For OP_AND and OP_OR, report the width of the wider of the two
1789 * inputs, and align the narrower input's positions to the right
1790 * end of that width. This rule deals at least somewhat
1791 * reasonably with cases like "x <-> (y | z <-> q)".
1792 */
1793 maxwidth = Max(Ldata.width, Rdata.width);
1794 Loffset = maxwidth - Ldata.width;
1795 Roffset = maxwidth - Rdata.width;
1796 data->width = maxwidth;
1797
1798 if (Ldata.negate && Rdata.negate)
1799 {
1800 /* !L | !R: treat as !(L & R) */
1802 TSPO_BOTH,
1804 Min(Ldata.npos, Rdata.npos));
1805 data->negate = true;
1806 return TS_YES;
1807 }
1808 else if (Ldata.negate)
1809 {
1810 /* !L | R: treat as !(L & !R) */
1814 Ldata.npos);
1815 data->negate = true;
1816 return TS_YES;
1817 }
1818 else if (Rdata.negate)
1819 {
1820 /* L | !R: treat as !(!L & R) */
1824 Rdata.npos);
1825 data->negate = true;
1826 return TS_YES;
1827 }
1828 else
1829 {
1830 /* straight OR */
1831 return TS_phrase_output(data, &Ldata, &Rdata,
1834 Ldata.npos + Rdata.npos);
1835 }
1836
1837 default:
1838 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1839 }
1840
1841 /* not reachable, but keep compiler quiet */
1842 return TS_NO;
1843}
1844
1845
1846/*
1847 * Evaluate tsquery boolean expression.
1848 *
1849 * curitem: current tsquery item (initially, the first one)
1850 * arg: opaque value to pass through to callback function
1851 * flags: bitmask of flag bits shown in ts_utils.h
1852 * chkcond: callback function to check whether a primitive value is present
1853 */
1854bool
1855TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1857{
1858 /*
1859 * If we get TS_MAYBE from the recursion, return true. We could only see
1860 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1861 * need to check again.
1862 */
1863 return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1864}
1865
1866/*
1867 * Evaluate tsquery boolean expression.
1868 *
1869 * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1870 */
1874{
1875 return TS_execute_recurse(curitem, arg, flags, chkcond);
1876}
1877
1878/*
1879 * TS_execute recursion for operators above any phrase operator. Here we do
1880 * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1881 * operator, we pass it off to TS_phrase_execute which does worry.
1882 */
1883static TSTernaryValue
1886{
1888
1889 /* since this function recurses, it could be driven to stack overflow */
1891
1892 /* ... and let's check for query cancel while we're at it */
1894
1895 if (curitem->type == QI_VAL)
1896 return chkcond(arg, (QueryOperand *) curitem,
1897 NULL /* don't need position info */ );
1898
1899 switch (curitem->qoperator.oper)
1900 {
1901 case OP_NOT:
1902 if (flags & TS_EXEC_SKIP_NOT)
1903 return TS_YES;
1904 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1905 {
1906 case TS_NO:
1907 return TS_YES;
1908 case TS_YES:
1909 return TS_NO;
1910 case TS_MAYBE:
1911 return TS_MAYBE;
1912 }
1913 break;
1914
1915 case OP_AND:
1916 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1917 flags, chkcond);
1918 if (lmatch == TS_NO)
1919 return TS_NO;
1920 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1921 {
1922 case TS_NO:
1923 return TS_NO;
1924 case TS_YES:
1925 return lmatch;
1926 case TS_MAYBE:
1927 return TS_MAYBE;
1928 }
1929 break;
1930
1931 case OP_OR:
1932 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1933 flags, chkcond);
1934 if (lmatch == TS_YES)
1935 return TS_YES;
1936 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1937 {
1938 case TS_NO:
1939 return lmatch;
1940 case TS_YES:
1941 return TS_YES;
1942 case TS_MAYBE:
1943 return TS_MAYBE;
1944 }
1945 break;
1946
1947 case OP_PHRASE:
1948
1949 /*
1950 * If we get a MAYBE result, and the caller doesn't want that,
1951 * convert it to NO. It would be more consistent, perhaps, to
1952 * return the result of TS_phrase_execute() verbatim and then
1953 * convert MAYBE results at the top of the recursion. But
1954 * converting at the topmost phrase operator gives results that
1955 * are bug-compatible with the old implementation, so do it like
1956 * this for now.
1957 */
1958 switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1959 {
1960 case TS_NO:
1961 return TS_NO;
1962 case TS_YES:
1963 return TS_YES;
1964 case TS_MAYBE:
1965 return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1966 }
1967 break;
1968
1969 default:
1970 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1971 }
1972
1973 /* not reachable, but keep compiler quiet */
1974 return TS_NO;
1975}
1976
1977/*
1978 * Evaluate tsquery and report locations of matching terms.
1979 *
1980 * This is like TS_execute except that it returns match locations not just
1981 * success/failure status. The callback function is required to provide
1982 * position data (we report failure if it doesn't).
1983 *
1984 * On successful match, the result is a List of ExecPhraseData structs, one
1985 * for each AND'ed term or phrase operator in the query. Each struct includes
1986 * a sorted array of lexeme positions matching that term. (Recall that for
1987 * phrase operators, the match includes width+1 lexemes, and the recorded
1988 * position is that of the rightmost lexeme.)
1989 *
1990 * OR subexpressions are handled by union'ing their match locations into a
1991 * single List element, which is valid since any of those locations contains
1992 * a match. However, when some of the OR'ed terms are phrase operators, we
1993 * report the maximum width of any of the OR'ed terms, making such cases
1994 * slightly imprecise in the conservative direction. (For example, if the
1995 * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
1996 * reported as though it includes the lexeme to the left of C.)
1997 *
1998 * Locations of NOT subexpressions are not reported. (Obviously, there can
1999 * be no successful NOT matches at top level, or the match would have failed.
2000 * So this amounts to ignoring NOTs underneath ORs.)
2001 *
2002 * The result is NIL if no match, or if position data was not returned.
2003 *
2004 * Arguments are the same as for TS_execute, although flags is currently
2005 * vestigial since none of the defined bits are sensible here.
2006 */
2007List *
2009 uint32 flags,
2011{
2012 List *result;
2013
2014 /* No flags supported, as yet */
2015 Assert(flags == TS_EXEC_EMPTY);
2016 if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
2017 return result;
2018 return NIL;
2019}
2020
2021/*
2022 * TS_execute_locations recursion for operators above any phrase operator.
2023 * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
2024 */
2025static bool
2028 List **locations)
2029{
2030 bool lmatch,
2031 rmatch;
2033 *rlocations;
2035
2036 /* since this function recurses, it could be driven to stack overflow */
2038
2039 /* ... and let's check for query cancel while we're at it */
2041
2042 /* Default locations result is empty */
2043 *locations = NIL;
2044
2045 if (curitem->type == QI_VAL)
2046 {
2048 if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2049 {
2051 return true;
2052 }
2053 pfree(data);
2054 return false;
2055 }
2056
2057 switch (curitem->qoperator.oper)
2058 {
2059 case OP_NOT:
2060 if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2061 &llocations))
2062 return true; /* we don't pass back any locations */
2063 return false;
2064
2065 case OP_AND:
2066 if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2067 arg, chkcond,
2068 &llocations))
2069 return false;
2070 if (!TS_execute_locations_recurse(curitem + 1,
2071 arg, chkcond,
2072 &rlocations))
2073 return false;
2075 return true;
2076
2077 case OP_OR:
2079 arg, chkcond,
2080 &llocations);
2082 arg, chkcond,
2083 &rlocations);
2084 if (lmatch || rmatch)
2085 {
2086 /*
2087 * We generate an AND'able location struct from each
2088 * combination of sub-matches, following the disjunctive law
2089 * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2090 *
2091 * However, if either input didn't produce locations (i.e., it
2092 * failed or was a NOT), we must just return the other list.
2093 */
2094 if (llocations == NIL)
2096 else if (rlocations == NIL)
2098 else
2099 {
2100 ListCell *ll;
2101
2102 foreach(ll, llocations)
2103 {
2105 ListCell *lr;
2106
2107 foreach(lr, rlocations)
2108 {
2110
2114 0, 0,
2115 ldata->npos + rdata->npos);
2116 /* Report the larger width, as explained above. */
2117 data->width = Max(ldata->width, rdata->width);
2119 }
2120 }
2121 }
2122
2123 return true;
2124 }
2125 return false;
2126
2127 case OP_PHRASE:
2128 /* We can hand this off to TS_phrase_execute */
2131 data) == TS_YES)
2132 {
2133 if (!data->negate)
2135 return true;
2136 }
2137 pfree(data);
2138 return false;
2139
2140 default:
2141 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2142 }
2143
2144 /* not reachable, but keep compiler quiet */
2145 return false;
2146}
2147
2148/*
2149 * Detect whether a tsquery boolean expression requires any positive matches
2150 * to values shown in the tsquery.
2151 *
2152 * This is needed to know whether a GIN index search requires full index scan.
2153 * For example, 'x & !y' requires a match of x, so it's sufficient to scan
2154 * entries for x; but 'x | !y' could match rows containing neither x nor y.
2155 */
2156bool
2158{
2159 /* since this function recurses, it could be driven to stack overflow */
2161
2162 if (curitem->type == QI_VAL)
2163 return true;
2164
2165 switch (curitem->qoperator.oper)
2166 {
2167 case OP_NOT:
2168
2169 /*
2170 * Assume there are no required matches underneath a NOT. For
2171 * some cases with nested NOTs, we could prove there's a required
2172 * match, but it seems unlikely to be worth the trouble.
2173 */
2174 return false;
2175
2176 case OP_PHRASE:
2177
2178 /*
2179 * Treat OP_PHRASE as OP_AND here
2180 */
2181 case OP_AND:
2182 /* If either side requires a match, we're good */
2183 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2184 return true;
2185 else
2186 return tsquery_requires_match(curitem + 1);
2187
2188 case OP_OR:
2189 /* Both sides must require a match */
2190 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2191 return tsquery_requires_match(curitem + 1);
2192 else
2193 return false;
2194
2195 default:
2196 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2197 }
2198
2199 /* not reachable, but keep compiler quiet */
2200 return false;
2201}
2202
2203/*
2204 * boolean operations
2205 */
2206Datum
2213
2214Datum
2216{
2218 TSQuery query = PG_GETARG_TSQUERY(1);
2219 CHKVAL chkval;
2220 bool result;
2221
2222 /* empty query matches nothing */
2223 if (!query->size)
2224 {
2225 PG_FREE_IF_COPY(val, 0);
2226 PG_FREE_IF_COPY(query, 1);
2227 PG_RETURN_BOOL(false);
2228 }
2229
2230 chkval.arrb = ARRPTR(val);
2231 chkval.arre = chkval.arrb + val->size;
2232 chkval.values = STRPTR(val);
2233 chkval.operand = GETOPERAND(query);
2234 result = TS_execute(GETQUERY(query),
2235 &chkval,
2238
2239 PG_FREE_IF_COPY(val, 0);
2240 PG_FREE_IF_COPY(query, 1);
2241 PG_RETURN_BOOL(result);
2242}
2243
2244Datum
2246{
2247 TSVector vector;
2248 TSQuery query;
2249 bool res;
2250
2252 PG_GETARG_DATUM(0)));
2254 PG_GETARG_DATUM(1)));
2255
2257 TSVectorGetDatum(vector),
2258 TSQueryGetDatum(query)));
2259
2260 pfree(vector);
2261 pfree(query);
2262
2263 PG_RETURN_BOOL(res);
2264}
2265
2266Datum
2268{
2269 TSVector vector;
2270 TSQuery query = PG_GETARG_TSQUERY(1);
2271 bool res;
2272
2274 PG_GETARG_DATUM(0)));
2275
2277 TSVectorGetDatum(vector),
2278 TSQueryGetDatum(query)));
2279
2280 pfree(vector);
2281 PG_FREE_IF_COPY(query, 1);
2282
2283 PG_RETURN_BOOL(res);
2284}
2285
2286/*
2287 * ts_stat statistic function support
2288 */
2289
2290
2291/*
2292 * Returns the number of positions in value 'wptr' within tsvector 'txt',
2293 * that have a weight equal to one of the weights in 'weight' bitmask.
2294 */
2295static int
2297{
2298 int len = POSDATALEN(txt, wptr);
2299 int num = 0;
2301
2302 while (len--)
2303 {
2304 if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2305 num++;
2306 ptr++;
2307 }
2308 return num;
2309}
2310
2311#define compareStatWord(a,e,t) \
2312 tsCompareString((a)->lexeme, (a)->lenlexeme, \
2313 STRPTR(t) + (e)->pos, (e)->len, \
2314 false)
2315
2316static void
2318{
2319 WordEntry *we = ARRPTR(txt) + off;
2320 StatEntry *node = stat->root,
2321 *pnode = NULL;
2322 int n,
2323 res = 0;
2324 uint32 depth = 1;
2325
2326 if (stat->weight == 0)
2327 n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2328 else
2329 n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2330
2331 if (n == 0)
2332 return; /* nothing to insert */
2333
2334 while (node)
2335 {
2336 res = compareStatWord(node, we, txt);
2337
2338 if (res == 0)
2339 {
2340 break;
2341 }
2342 else
2343 {
2344 pnode = node;
2345 node = (res < 0) ? node->left : node->right;
2346 }
2347 depth++;
2348 }
2349
2350 if (depth > stat->maxdepth)
2351 stat->maxdepth = depth;
2352
2353 if (node == NULL)
2354 {
2356 node->left = node->right = NULL;
2357 node->ndoc = 1;
2358 node->nentry = n;
2359 node->lenlexeme = we->len;
2360 memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2361
2362 if (pnode == NULL)
2363 {
2364 stat->root = node;
2365 }
2366 else
2367 {
2368 if (res < 0)
2369 pnode->left = node;
2370 else
2371 pnode->right = node;
2372 }
2373 }
2374 else
2375 {
2376 node->ndoc++;
2377 node->nentry += n;
2378 }
2379}
2380
2381static void
2383 uint32 low, uint32 high, uint32 offset)
2384{
2385 uint32 pos;
2386 uint32 middle = (low + high) >> 1;
2387
2388 pos = (low + middle) >> 1;
2389 if (low != middle && pos >= offset && pos - offset < txt->size)
2390 insertStatEntry(persistentContext, stat, txt, pos - offset);
2391 pos = (high + middle + 1) >> 1;
2392 if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2393 insertStatEntry(persistentContext, stat, txt, pos - offset);
2394
2395 if (low != middle)
2397 if (high != middle + 1)
2398 chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2399}
2400
2401/*
2402 * This is written like a custom aggregate function, because the
2403 * original plan was to do just that. Unfortunately, an aggregate function
2404 * can't return a set, so that plan was abandoned. If that limitation is
2405 * lifted in the future, ts_stat could be a real aggregate function so that
2406 * you could use it like this:
2407 *
2408 * SELECT ts_stat(vector_column) FROM vector_table;
2409 *
2410 * where vector_column is a tsvector-type column in vector_table.
2411 */
2412
2413static TSVectorStat *
2415{
2417 uint32 i,
2418 nbit = 0,
2419 offset;
2420
2421 if (stat == NULL)
2422 { /* Init in first */
2424 stat->maxdepth = 1;
2425 }
2426
2427 /* simple check of correctness */
2428 if (txt == NULL || txt->size == 0)
2429 {
2430 if (txt && txt != (TSVector) DatumGetPointer(data))
2431 pfree(txt);
2432 return stat;
2433 }
2434
2435 i = txt->size - 1;
2436 for (; i > 0; i >>= 1)
2437 nbit++;
2438
2439 nbit = 1 << nbit;
2440 offset = (nbit - txt->size) / 2;
2441
2442 insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2444
2445 return stat;
2446}
2447
2448static void
2451{
2452 TupleDesc tupdesc;
2453 MemoryContext oldcontext;
2454 StatEntry *node;
2455
2456 funcctx->user_fctx = stat;
2457
2458 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2459
2460 stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
2461 stat->stackpos = 0;
2462
2463 node = stat->root;
2464 /* find leftmost value */
2465 if (node == NULL)
2466 stat->stack[stat->stackpos] = NULL;
2467 else
2468 for (;;)
2469 {
2470 stat->stack[stat->stackpos] = node;
2471 if (node->left)
2472 {
2473 stat->stackpos++;
2474 node = node->left;
2475 }
2476 else
2477 break;
2478 }
2479 Assert(stat->stackpos <= stat->maxdepth);
2480
2481 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2482 elog(ERROR, "return type must be a row type");
2483 funcctx->tuple_desc = tupdesc;
2484 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2485
2486 MemoryContextSwitchTo(oldcontext);
2487}
2488
2489static StatEntry *
2491{
2492 StatEntry *node = stat->stack[stat->stackpos];
2493
2494 if (node == NULL)
2495 return NULL;
2496
2497 if (node->ndoc != 0)
2498 {
2499 /* return entry itself: we already was at left sublink */
2500 return node;
2501 }
2502 else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2503 {
2504 /* go on right sublink */
2505 stat->stackpos++;
2506 node = node->right;
2507
2508 /* find most-left value */
2509 for (;;)
2510 {
2511 stat->stack[stat->stackpos] = node;
2512 if (node->left)
2513 {
2514 stat->stackpos++;
2515 node = node->left;
2516 }
2517 else
2518 break;
2519 }
2520 Assert(stat->stackpos <= stat->maxdepth);
2521 }
2522 else
2523 {
2524 /* we already return all left subtree, itself and right subtree */
2525 if (stat->stackpos == 0)
2526 return NULL;
2527
2528 stat->stackpos--;
2529 return walkStatEntryTree(stat);
2530 }
2531
2532 return node;
2533}
2534
2535static Datum
2537{
2538 TSVectorStat *st;
2539 StatEntry *entry;
2540
2541 st = (TSVectorStat *) funcctx->user_fctx;
2542
2543 entry = walkStatEntryTree(st);
2544
2545 if (entry != NULL)
2546 {
2547 Datum result;
2548 char *values[3];
2549 char ndoc[16];
2550 char nentry[16];
2551 HeapTuple tuple;
2552
2553 values[0] = palloc(entry->lenlexeme + 1);
2554 memcpy(values[0], entry->lexeme, entry->lenlexeme);
2555 (values[0])[entry->lenlexeme] = '\0';
2556 sprintf(ndoc, "%d", entry->ndoc);
2557 values[1] = ndoc;
2558 sprintf(nentry, "%d", entry->nentry);
2559 values[2] = nentry;
2560
2561 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2562 result = HeapTupleGetDatum(tuple);
2563
2564 pfree(values[0]);
2565
2566 /* mark entry as already visited */
2567 entry->ndoc = 0;
2568
2569 return result;
2570 }
2571
2572 return (Datum) 0;
2573}
2574
2575static TSVectorStat *
2577{
2578 char *query = text_to_cstring(txt);
2580 bool isnull;
2581 Portal portal;
2583
2584 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2585 /* internal error */
2586 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2587
2588 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2589 /* internal error */
2590 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2591
2592 SPI_cursor_fetch(portal, true, 100);
2593
2594 if (SPI_tuptable == NULL ||
2595 SPI_tuptable->tupdesc->natts != 1 ||
2597 TSVECTOROID))
2598 ereport(ERROR,
2600 errmsg("ts_stat query must return one tsvector column")));
2601
2603 stat->maxdepth = 1;
2604
2605 if (ws)
2606 {
2607 char *buf;
2608 const char *end;
2609
2610 buf = VARDATA_ANY(ws);
2611 end = buf + VARSIZE_ANY_EXHDR(ws);
2612 while (buf < end)
2613 {
2614 int len = pg_mblen_range(buf, end);
2615
2616 if (len == 1)
2617 {
2618 switch (*buf)
2619 {
2620 case 'A':
2621 case 'a':
2622 stat->weight |= 1 << 3;
2623 break;
2624 case 'B':
2625 case 'b':
2626 stat->weight |= 1 << 2;
2627 break;
2628 case 'C':
2629 case 'c':
2630 stat->weight |= 1 << 1;
2631 break;
2632 case 'D':
2633 case 'd':
2634 stat->weight |= 1;
2635 break;
2636 default:
2637 stat->weight |= 0;
2638 }
2639 }
2640 buf += len;
2641 }
2642 }
2643
2644 while (SPI_processed > 0)
2645 {
2646 uint64 i;
2647
2648 for (i = 0; i < SPI_processed; i++)
2649 {
2651
2652 if (!isnull)
2654 }
2655
2657 SPI_cursor_fetch(portal, true, 100);
2658 }
2659
2661 SPI_cursor_close(portal);
2663 pfree(query);
2664
2665 return stat;
2666}
2667
2668Datum
2670{
2672 Datum result;
2673
2674 if (SRF_IS_FIRSTCALL())
2675 {
2678
2680 SPI_connect();
2681 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2682 PG_FREE_IF_COPY(txt, 0);
2684 SPI_finish();
2685 }
2686
2688 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2689 SRF_RETURN_NEXT(funcctx, result);
2691}
2692
2693Datum
2695{
2697 Datum result;
2698
2699 if (SRF_IS_FIRSTCALL())
2700 {
2704
2706 SPI_connect();
2707 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2708 PG_FREE_IF_COPY(txt, 0);
2709 PG_FREE_IF_COPY(ws, 1);
2711 SPI_finish();
2712 }
2713
2715 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2716 SRF_RETURN_NEXT(funcctx, result);
2718}
2719
2720
2721/*
2722 * Triggers for automatic update of a tsvector column from text column(s)
2723 *
2724 * Trigger arguments are either
2725 * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2726 * name of tsvector col, name of regconfig col, name(s) of text col(s)
2727 * ie, tsconfig can either be specified by name, or indirectly as the
2728 * contents of a regconfig field in the row. If the name is used, it must
2729 * be explicitly schema-qualified.
2730 */
2731Datum
2736
2737Datum
2742
2743static Datum
2745{
2746 TriggerData *trigdata;
2748 Relation rel;
2751 i;
2752 ParsedText prs;
2753 Datum datum;
2754 bool isnull;
2755 text *txt;
2756 Oid cfgId;
2757 bool update_needed;
2758
2759 /* Check call context */
2760 if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2761 elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2762
2763 trigdata = (TriggerData *) fcinfo->context;
2764 if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2765 elog(ERROR, "tsvector_update_trigger: must be fired for row");
2766 if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2767 elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2768
2769 if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2770 {
2771 rettuple = trigdata->tg_trigtuple;
2772 update_needed = true;
2773 }
2774 else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2775 {
2776 rettuple = trigdata->tg_newtuple;
2777 update_needed = false; /* computed below */
2778 }
2779 else
2780 elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2781
2782 trigger = trigdata->tg_trigger;
2783 rel = trigdata->tg_relation;
2784
2785 if (trigger->tgnargs < 3)
2786 elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2787
2788 /* Find the target tsvector column */
2789 tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2791 ereport(ERROR,
2793 errmsg("tsvector column \"%s\" does not exist",
2794 trigger->tgargs[0])));
2795 /* This will effectively reject system columns, so no separate test: */
2797 TSVECTOROID))
2798 ereport(ERROR,
2800 errmsg("column \"%s\" is not of tsvector type",
2801 trigger->tgargs[0])));
2802
2803 /* Find the configuration to use */
2804 if (config_column)
2805 {
2806 int config_attr_num;
2807
2808 config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2810 ereport(ERROR,
2812 errmsg("configuration column \"%s\" does not exist",
2813 trigger->tgargs[1])));
2815 REGCONFIGOID))
2816 ereport(ERROR,
2818 errmsg("column \"%s\" is not of regconfig type",
2819 trigger->tgargs[1])));
2820
2821 datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2822 if (isnull)
2823 ereport(ERROR,
2825 errmsg("configuration column \"%s\" must not be null",
2826 trigger->tgargs[1])));
2827 cfgId = DatumGetObjectId(datum);
2828 }
2829 else
2830 {
2831 List *names;
2832
2833 names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2834 /* require a schema so that results are not search path dependent */
2835 if (list_length(names) < 2)
2836 ereport(ERROR,
2838 errmsg("text search configuration name \"%s\" must be schema-qualified",
2839 trigger->tgargs[1])));
2840 cfgId = get_ts_config_oid(names, false);
2841 }
2842
2843 /* initialize parse state */
2844 prs.lenwords = 32;
2845 prs.curwords = 0;
2846 prs.pos = 0;
2848
2849 /* find all words in indexable column(s) */
2850 for (i = 2; i < trigger->tgnargs; i++)
2851 {
2852 int numattr;
2853
2854 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2856 ereport(ERROR,
2858 errmsg("column \"%s\" does not exist",
2859 trigger->tgargs[i])));
2861 ereport(ERROR,
2863 errmsg("column \"%s\" is not of a character type",
2864 trigger->tgargs[i])));
2865
2867 update_needed = true;
2868
2869 datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2870 if (isnull)
2871 continue;
2872
2873 txt = DatumGetTextPP(datum);
2874
2876
2877 if (txt != (text *) DatumGetPointer(datum))
2878 pfree(txt);
2879 }
2880
2881 if (update_needed)
2882 {
2883 /* make tsvector value */
2884 datum = TSVectorGetDatum(make_tsvector(&prs));
2885 isnull = false;
2886
2887 /* and insert it into tuple */
2890 &datum, &isnull);
2891
2892 pfree(DatumGetPointer(datum));
2893 }
2894
2895 return PointerGetDatum(rettuple);
2896}
#define GETQUERY(x)
Definition _int.h:157
#define PG_GETARG_ARRAYTYPE_P(n)
Definition array.h:263
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
void deconstruct_array_builtin(const ArrayType *array, Oid elmtype, Datum **elemsp, bool **nullsp, int *nelemsp)
int16 AttrNumber
Definition attnum.h:21
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
static Datum values[MAXATTR]
Definition bootstrap.c:188
int numattr
Definition bootstrap.c:65
#define Min(x, y)
Definition c.h:1093
#define Max(x, y)
Definition c.h:1087
#define VARHDRSZ
Definition c.h:783
#define Assert(condition)
Definition c.h:945
#define FLEXIBLE_ARRAY_MEMBER
Definition c.h:552
int8_t int8
Definition c.h:612
#define SHORTALIGN(LEN)
Definition c.h:894
int32_t int32
Definition c.h:614
uint64_t uint64
Definition c.h:619
uint16_t uint16
Definition c.h:617
uint32_t uint32
Definition c.h:618
#define ARRPTR(x)
Definition cube.c:28
struct cursor * cur
Definition ecpg.c:29
Datum arg
Definition elog.c:1322
int errcode(int sqlerrcode)
Definition elog.c:874
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define DirectFunctionCall2(func, arg1, arg2)
Definition fmgr.h:686
#define PG_GETARG_CHAR(n)
Definition fmgr.h:273
#define DatumGetTextPP(X)
Definition fmgr.h:293
#define DirectFunctionCall1(func, arg1)
Definition fmgr.h:684
#define PG_GETARG_DATUM(n)
Definition fmgr.h:268
#define PG_RETURN_INT32(x)
Definition fmgr.h:355
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition funcapi.h:328
Datum difference(PG_FUNCTION_ARGS)
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, const int *replCols, const Datum *replValues, const bool *replIsnull)
Definition heaptuple.c:1198
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1037
#define CALCDATASIZE(x, lenstr)
Definition hstore.h:72
#define STRPTR(x)
Definition hstore.h:76
#define nitems(x)
Definition indent.h:31
long val
Definition informix.c:689
static int pg_cmp_s32(int32 a, int32 b)
Definition int.h:713
int b
Definition isn.c:74
int a
Definition isn.c:73
int j
Definition isn.c:78
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
List * list_concat(List *list1, const List *list2)
Definition list.c:561
#define GETOPERAND(x)
Definition ltree.h:165
int pg_mblen_range(const char *mbstr, const char *end)
Definition mbutils.c:1084
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1266
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition namespace.c:3224
static char * errmsg
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
bool IsBinaryCoercible(Oid srctype, Oid targettype)
const void size_t len
const void * data
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define list_make1(x1)
Definition pg_list.h:212
#define plan(x)
Definition pg_regress.c:161
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define sprintf
Definition port.h:262
#define qsort(a, b, c, d)
Definition port.h:495
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:342
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:242
static Datum Int16GetDatum(int16 X)
Definition postgres.h:172
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:332
static char DatumGetChar(Datum X)
Definition postgres.h:122
unsigned int Oid
static int fb(int x)
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition qunique.h:21
static int cmp(const chr *x, const chr *y, size_t len)
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition regproc.c:1922
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition spi.c:1176
uint64 SPI_processed
Definition spi.c:45
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition spi.c:1309
int SPI_freeplan(SPIPlanPtr plan)
Definition spi.c:1026
SPITupleTable * SPI_tuptable
Definition spi.c:46
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, const Datum *Values, const char *Nulls, bool read_only)
Definition spi.c:1446
int SPI_connect(void)
Definition spi.c:95
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition spi.c:1807
int SPI_finish(void)
Definition spi.c:183
void SPI_freetuptable(SPITupleTable *tuptable)
Definition spi.c:1387
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition spi.c:861
void SPI_cursor_close(Portal portal)
Definition spi.c:1863
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition spi.c:1253
#define SPI_ERROR_NOATTRIBUTE
Definition spi.h:76
void check_stack_depth(void)
Definition stack_depth.c:95
WordEntry * arre
Definition tsvector_op.c:39
char * values
Definition tsvector_op.c:40
WordEntry * arrb
Definition tsvector_op.c:38
Definition pg_list.h:54
int32 pos
Definition ts_utils.h:105
int32 lenwords
Definition ts_utils.h:103
int32 curwords
Definition ts_utils.h:104
ParsedWord * words
Definition ts_utils.h:102
int16 distance
Definition ts_type.h:196
uint32 left
Definition ts_type.h:197
TupleDesc rd_att
Definition rel.h:112
TupleDesc tupdesc
Definition spi.h:25
HeapTuple * vals
Definition spi.h:26
uint32 nentry
Definition tsvector_op.c:49
struct StatEntry * left
Definition tsvector_op.c:50
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition tsvector_op.c:53
uint32 lenlexeme
Definition tsvector_op.c:52
uint32 ndoc
Definition tsvector_op.c:47
struct StatEntry * right
Definition tsvector_op.c:51
int32 size
Definition ts_type.h:221
int32 size
Definition ts_type.h:93
StatEntry * root
Definition tsvector_op.c:67
uint32 maxdepth
Definition tsvector_op.c:62
uint32 stackpos
Definition tsvector_op.c:65
StatEntry ** stack
Definition tsvector_op.c:64
Relation tg_relation
Definition trigger.h:35
const Bitmapset * tg_updatedcols
Definition trigger.h:43
TriggerEvent tg_event
Definition trigger.h:34
HeapTuple tg_newtuple
Definition trigger.h:37
Trigger * tg_trigger
Definition trigger.h:38
HeapTuple tg_trigtuple
Definition trigger.h:36
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition ts_type.h:68
uint32 pos
Definition ts_type.h:46
uint32 haspos
Definition ts_type.h:44
uint32 len
Definition ts_type.h:45
Definition c.h:778
#define FirstLowInvalidHeapAttributeNumber
Definition sysattr.h:27
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition to_tsany.c:270
TSVector make_tsvector(ParsedText *prs)
Definition to_tsany.c:165
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition to_tsany.c:642
#define TRIGGER_FIRED_BEFORE(event)
Definition trigger.h:130
#define CALLED_AS_TRIGGER(fcinfo)
Definition trigger.h:26
#define TRIGGER_FIRED_FOR_ROW(event)
Definition trigger.h:124
#define TRIGGER_FIRED_BY_INSERT(event)
Definition trigger.h:112
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition trigger.h:118
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition ts_parse.c:355
#define PG_GETARG_TSVECTOR(n)
Definition ts_type.h:135
#define WEP_GETPOS(x)
Definition ts_type.h:80
#define _POSVECPTR(x, e)
Definition ts_type.h:109
static TSQuery DatumGetTSQuery(Datum X)
Definition ts_type.h:249
static TSVector DatumGetTSVector(Datum X)
Definition ts_type.h:118
#define MAXENTRYPOS
Definition ts_type.h:85
static Datum TSVectorGetDatum(const TSVectorData *X)
Definition ts_type.h:130
#define WEP_SETPOS(x, v)
Definition ts_type.h:83
#define POSDATALEN(x, e)
Definition ts_type.h:110
#define PG_GETARG_TSQUERY(n)
Definition ts_type.h:266
uint16 WordEntryPos
Definition ts_type.h:63
#define MAXNUMPOS
Definition ts_type.h:86
TSVectorData * TSVector
Definition ts_type.h:98
#define PG_GETARG_TSVECTOR_COPY(n)
Definition ts_type.h:136
#define WEP_SETWEIGHT(x, v)
Definition ts_type.h:82
#define QI_VAL
Definition ts_type.h:149
static Datum TSQueryGetDatum(const TSQueryData *X)
Definition ts_type.h:261
#define LIMITPOS(x)
Definition ts_type.h:87
#define OP_AND
Definition ts_type.h:180
#define OP_PHRASE
Definition ts_type.h:182
#define OP_OR
Definition ts_type.h:181
#define POSDATAPTR(x, e)
Definition ts_type.h:111
#define OP_NOT
Definition ts_type.h:179
#define WEP_GETWEIGHT(x)
Definition ts_type.h:79
#define MAXSTRPOS
Definition ts_type.h:50
#define TS_EXEC_PHRASE_NO_POS
Definition ts_utils.h:200
TSTernaryValue
Definition ts_utils.h:131
@ TS_MAYBE
Definition ts_utils.h:134
@ TS_NO
Definition ts_utils.h:132
@ TS_YES
Definition ts_utils.h:133
#define TS_EXEC_EMPTY
Definition ts_utils.h:186
#define TS_EXEC_SKIP_NOT
Definition ts_utils.h:193
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition ts_utils.h:180
int compareWordEntryPos(const void *a, const void *b)
Definition tsvector.c:36
Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
#define TSPO_BOTH
static Datum ts_process_call(FuncCallContext *funcctx)
static TSTernaryValue checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Datum ts_match_vq(PG_FUNCTION_ARGS)
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
#define TSPO_R_ONLY
Datum array_to_tsvector(PG_FUNCTION_ARGS)
#define STATENTRYHDRSZ
Definition tsvector_op.c:56
Datum tsvector_filter(PG_FUNCTION_ARGS)
static TSTernaryValue TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
#define compareEntry(pa, a, pb, b)
Datum tsvector_setweight(PG_FUNCTION_ARGS)
#define TSVECTORCMPFUNC(type, action, ret)
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Datum tsvector_strip(PG_FUNCTION_ARGS)
Datum tsvector_length(PG_FUNCTION_ARGS)
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Datum ts_match_tq(PG_FUNCTION_ARGS)
static int silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
Definition tsvector_op.c:86
Datum ts_stat1(PG_FUNCTION_ARGS)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
#define TSPO_L_ONLY
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Datum ts_match_qv(PG_FUNCTION_ARGS)
bool tsquery_requires_match(QueryItem *curitem)
Datum tsvector_concat(PG_FUNCTION_ARGS)
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
static bool TS_execute_locations_recurse(QueryItem *curitem, void *arg, TSExecuteCallback chkcond, List **locations)
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static int compare_int(const void *va, const void *vb)
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Datum ts_match_tt(PG_FUNCTION_ARGS)
static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
static int tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
static int compare_text_lexemes(const void *va, const void *vb)
static TSTernaryValue checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
#define compareStatWord(a, e, t)
Datum tsvector_unnest(PG_FUNCTION_ARGS)
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Datum ts_stat2(PG_FUNCTION_ARGS)
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
TupleDesc CreateTemplateTupleDesc(int natts)
Definition tupdesc.c:165
void TupleDescFinalize(TupleDesc tupdesc)
Definition tupdesc.c:508
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition tupdesc.c:897
QueryOperator qoperator
Definition ts_type.h:209
QueryItemType type
Definition ts_type.h:208
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
text * cstring_to_text_with_len(const char *s, int len)
Definition varlena.c:196
char * text_to_cstring(const text *t)
Definition varlena.c:217
#define stat
Definition win32_port.h:74