PostgreSQL Source Code git master
Loading...
Searching...
No Matches
tsvector_op.c File Reference
#include "postgres.h"
#include <limits.h>
#include "access/htup_details.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "common/int.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "lib/qunique.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "parser/parse_coerce.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/regproc.h"
#include "utils/rel.h"
Include dependency graph for tsvector_op.c:

Go to the source code of this file.

Data Structures

struct  CHKVAL
 
struct  StatEntry
 
struct  TSVectorStat
 

Macros

#define STATENTRYHDRSZ   (offsetof(StatEntry, lexeme))
 
#define TSVECTORCMPFUNC(type, action, ret)
 
#define compareEntry(pa, a, pb, b)
 
#define TSPO_L_ONLY   0x01 /* emit positions appearing only in L */
 
#define TSPO_R_ONLY   0x02 /* emit positions appearing only in R */
 
#define TSPO_BOTH   0x04 /* emit positions appearing in both L&R */
 
#define compareStatWord(a, e, t)
 

Typedefs

typedef struct StatEntry StatEntry
 

Functions

static TSTernaryValue TS_execute_recurse (QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
 
static bool TS_execute_locations_recurse (QueryItem *curitem, void *arg, TSExecuteCallback chkcond, List **locations)
 
static int tsvector_bsearch (const TSVectorData *tsv, char *lexeme, int lexeme_len)
 
static Datum tsvector_update_trigger (PG_FUNCTION_ARGS, bool config_column)
 
static int silly_cmp_tsvector (const TSVectorData *a, const TSVectorData *b)
 
 TSVECTORCMPFUNC (lt,<, BOOL)
 
 TSVECTORCMPFUNC (le,<=, BOOL)
 
 TSVECTORCMPFUNC (eq,==, BOOL)
 
 TSVECTORCMPFUNC (ge, >=, BOOL)
 
 TSVECTORCMPFUNC (gt, >, BOOL)
 
 TSVECTORCMPFUNC (ne, !=, BOOL)
 
 TSVECTORCMPFUNC (cmp,+, INT32)
 
Datum tsvector_strip (PG_FUNCTION_ARGS)
 
Datum tsvector_length (PG_FUNCTION_ARGS)
 
Datum tsvector_setweight (PG_FUNCTION_ARGS)
 
Datum tsvector_setweight_by_filter (PG_FUNCTION_ARGS)
 
static int32 add_pos (TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
 
static int compare_int (const void *va, const void *vb)
 
static int compare_text_lexemes (const void *va, const void *vb)
 
static TSVector tsvector_delete_by_indices (TSVector tsv, int *indices_to_delete, int indices_count)
 
Datum tsvector_delete_str (PG_FUNCTION_ARGS)
 
Datum tsvector_delete_arr (PG_FUNCTION_ARGS)
 
Datum tsvector_unnest (PG_FUNCTION_ARGS)
 
Datum tsvector_to_array (PG_FUNCTION_ARGS)
 
Datum array_to_tsvector (PG_FUNCTION_ARGS)
 
Datum tsvector_filter (PG_FUNCTION_ARGS)
 
Datum tsvector_concat (PG_FUNCTION_ARGS)
 
int32 tsCompareString (char *a, int lena, char *b, int lenb, bool prefix)
 
static TSTernaryValue checkclass_str (CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
 
static TSTernaryValue checkcondition_str (void *checkval, QueryOperand *val, ExecPhraseData *data)
 
static TSTernaryValue TS_phrase_output (ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
 
static TSTernaryValue TS_phrase_execute (QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
 
bool TS_execute (QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
 
TSTernaryValue TS_execute_ternary (QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
 
ListTS_execute_locations (QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
 
bool tsquery_requires_match (QueryItem *curitem)
 
Datum ts_match_qv (PG_FUNCTION_ARGS)
 
Datum ts_match_vq (PG_FUNCTION_ARGS)
 
Datum ts_match_tt (PG_FUNCTION_ARGS)
 
Datum ts_match_tq (PG_FUNCTION_ARGS)
 
static int check_weight (TSVector txt, WordEntry *wptr, int8 weight)
 
static void insertStatEntry (MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
 
static void chooseNextStatEntry (MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
 
static TSVectorStatts_accum (MemoryContext persistentContext, TSVectorStat *stat, Datum data)
 
static void ts_setup_firstcall (FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
 
static StatEntrywalkStatEntryTree (TSVectorStat *stat)
 
static Datum ts_process_call (FuncCallContext *funcctx)
 
static TSVectorStatts_stat_sql (MemoryContext persistentContext, text *txt, text *ws)
 
Datum ts_stat1 (PG_FUNCTION_ARGS)
 
Datum ts_stat2 (PG_FUNCTION_ARGS)
 
Datum tsvector_update_trigger_byid (PG_FUNCTION_ARGS)
 
Datum tsvector_update_trigger_bycolumn (PG_FUNCTION_ARGS)
 

Macro Definition Documentation

◆ compareEntry

#define compareEntry (   pa,
  a,
  pb,
  b 
)
Value:
tsCompareString((pa) + (a)->pos, (a)->len, \
(pb) + (b)->pos, (b)->len, \
false)
int b
Definition isn.c:74
int a
Definition isn.c:73
const void size_t len
static int fb(int x)
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)

Definition at line 354 of file tsvector_op.c.

366{
367 uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
368 int i;
370 startlen;
372 *dpos = POSDATAPTR(dest, destptr);
373
374 if (!destptr->haspos)
375 *clen = 0;
376
377 startlen = *clen;
378 for (i = 0;
379 i < slen && *clen < MAXNUMPOS &&
380 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
381 i++)
382 {
385 (*clen)++;
386 }
387
388 if (*clen != startlen)
389 destptr->haspos = 1;
390 return *clen - startlen;
391}
392
393/*
394 * Perform binary search of given lexeme in TSVector.
395 * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
396 * found.
397 */
398static int
399tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
400{
401 const WordEntry *arrin = ARRPTR(tsv);
402 int StopLow = 0,
403 StopHigh = tsv->size,
405 cmp;
406
407 while (StopLow < StopHigh)
408 {
409 StopMiddle = (StopLow + StopHigh) / 2;
410
412 STRPTR(tsv) + arrin[StopMiddle].pos,
414 false);
415
416 if (cmp < 0)
418 else if (cmp > 0)
419 StopLow = StopMiddle + 1;
420 else /* found it */
421 return StopMiddle;
422 }
423
424 return -1;
425}
426
427/*
428 * qsort comparator functions
429 */
430
431static int
432compare_int(const void *va, const void *vb)
433{
434 int a = *((const int *) va);
435 int b = *((const int *) vb);
436
437 return pg_cmp_s32(a, b);
438}
439
440static int
441compare_text_lexemes(const void *va, const void *vb)
442{
443 Datum a = *((const Datum *) va);
444 Datum b = *((const Datum *) vb);
449
450 return tsCompareString(alex, alex_len, blex, blex_len, false);
451}
452
453/*
454 * Internal routine to delete lexemes from TSVector by array of offsets.
455 *
456 * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
457 * int indices_count -- size of that array
458 *
459 * Returns new TSVector without given lexemes along with their positions
460 * and weights.
461 */
462static TSVector
464 int indices_count)
465{
468 *arrout;
469 char *data = STRPTR(tsv),
470 *dataout;
471 int i, /* index in arrin */
472 j, /* index in arrout */
473 k, /* index in indices_to_delete */
474 curoff; /* index in dataout area */
475
476 /*
477 * Sort the filter array to simplify membership checks below. Also, get
478 * rid of any duplicate entries, so that we can assume that indices_count
479 * is exactly equal to the number of lexemes that will be removed.
480 */
481 if (indices_count > 1)
482 {
486 }
487
488 /*
489 * Here we overestimate tsout size, since we don't know how much space is
490 * used by the deleted lexeme(s). We will set exact size below.
491 */
493
494 /* This count must be correct because STRPTR(tsout) relies on it. */
495 tsout->size = tsv->size - indices_count;
496
497 /*
498 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
499 */
502 curoff = 0;
503 for (i = j = k = 0; i < tsv->size; i++)
504 {
505 /*
506 * If current i is present in indices_to_delete, skip this lexeme.
507 * Since indices_to_delete is already sorted, we only need to check
508 * the current (k'th) entry.
509 */
510 if (k < indices_count && i == indices_to_delete[k])
511 {
512 k++;
513 continue;
514 }
515
516 /* Copy lexeme and its positions and weights */
517 memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
518 arrout[j].haspos = arrin[i].haspos;
519 arrout[j].len = arrin[i].len;
520 arrout[j].pos = curoff;
521 curoff += arrin[i].len;
522 if (arrin[i].haspos)
523 {
524 int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
525 + sizeof(uint16);
526
529 STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
530 len);
531 curoff += len;
532 }
533
534 j++;
535 }
536
537 /*
538 * k should now be exactly equal to indices_count. If it isn't then the
539 * caller provided us with indices outside of [0, tsv->size) range and
540 * estimation of tsout's size is wrong.
541 */
542 Assert(k == indices_count);
543
545 return tsout;
546}
547
548/*
549 * Delete given lexeme from tsvector.
550 * Implementation of user-level ts_delete(tsvector, text).
551 */
552Datum
554{
556 tsout;
558 char *lexeme = VARDATA_ANY(tlexeme);
561
562 if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
564
566
570}
571
572/*
573 * Delete given array of lexemes from tsvector.
574 * Implementation of user-level ts_delete(tsvector, text[]).
575 */
576Datum
578{
580 tsout;
582 int i,
583 nlex,
587 bool *nulls;
588
590
591 /*
592 * In typical use case array of lexemes to delete is relatively small. So
593 * here we optimize things for that scenario: iterate through lexarr
594 * performing binary search of each lexeme from lexarr in tsvector.
595 */
596 skip_indices = palloc0(nlex * sizeof(int));
597 for (i = skip_count = 0; i < nlex; i++)
598 {
599 char *lex;
600 int lex_len,
601 lex_pos;
602
603 /* Ignore null array elements, they surely don't match */
604 if (nulls[i])
605 continue;
606
610
611 if (lex_pos >= 0)
613 }
614
616
620
622}
623
624/*
625 * Expand tsvector as table with following columns:
626 * lexeme: lexeme text
627 * positions: integer array of lexeme positions
628 * weights: char array of weights corresponding to positions
629 */
630Datum
632{
635
636 if (SRF_IS_FIRSTCALL())
637 {
638 MemoryContext oldcontext;
639 TupleDesc tupdesc;
640
642 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
643
644 tupdesc = CreateTemplateTupleDesc(3);
645 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
646 TEXTOID, -1, 0);
647 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
648 INT2ARRAYOID, -1, 0);
649 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
650 TEXTARRAYOID, -1, 0);
651 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
652 elog(ERROR, "return type must be a row type");
653 funcctx->tuple_desc = tupdesc;
654
655 funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
656
657 MemoryContextSwitchTo(oldcontext);
658 }
659
661 tsin = (TSVector) funcctx->user_fctx;
662
663 if (funcctx->call_cntr < tsin->size)
664 {
666 char *data = STRPTR(tsin);
667 HeapTuple tuple;
668 int j,
669 i = funcctx->call_cntr;
670 bool nulls[] = {false, false, false};
671 Datum values[3];
672
674
675 if (arrin[i].haspos)
676 {
679 Datum *weights;
680 char weight;
681
682 /*
683 * Internally tsvector stores position and weight in the same
684 * uint16 (2 bits for weight, 14 for position). Here we extract
685 * that in two separate arrays.
686 */
687 posv = _POSVECPTR(tsin, arrin + i);
688 positions = palloc(posv->npos * sizeof(Datum));
689 weights = palloc(posv->npos * sizeof(Datum));
690 for (j = 0; j < posv->npos; j++)
691 {
693 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
695 1));
696 }
697
700 }
701 else
702 {
703 nulls[1] = nulls[2] = true;
704 }
705
706 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
708 }
709 else
710 {
712 }
713}
714
715/*
716 * Convert tsvector to array of lexemes.
717 */
718Datum
720{
723 Datum *elements;
724 int i;
725 ArrayType *array;
726
727 elements = palloc(tsin->size * sizeof(Datum));
728
729 for (i = 0; i < tsin->size; i++)
730 {
732 arrin[i].len));
733 }
734
735 array = construct_array_builtin(elements, tsin->size, TEXTOID);
736
737 pfree(elements);
739 PG_RETURN_POINTER(array);
740}
741
742/*
743 * Build tsvector from array of lexemes.
744 */
745Datum
747{
752 bool *nulls;
753 int nitems,
754 i,
755 tslen,
756 datalen = 0;
757 char *cur;
758
760
761 /*
762 * Reject nulls and zero length strings (maybe we should just ignore them,
763 * instead?)
764 */
765 for (i = 0; i < nitems; i++)
766 {
767 if (nulls[i])
770 errmsg("lexeme array may not contain nulls")));
771
775 errmsg("lexeme array may not contain empty strings")));
776 }
777
778 /* Sort and de-dup, because this is required for a valid tsvector. */
779 if (nitems > 1)
780 {
782 nitems = qunique(dlexemes, nitems, sizeof(Datum),
784 }
785
786 /* Calculate space needed for surviving lexemes. */
787 for (i = 0; i < nitems; i++)
788 datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
789 tslen = CALCDATASIZE(nitems, datalen);
790
791 /* Allocate and fill tsvector. */
794 tsout->size = nitems;
795
797 cur = STRPTR(tsout);
798 for (i = 0; i < nitems; i++)
799 {
800 char *lex = VARDATA(DatumGetPointer(dlexemes[i]));
802
803 memcpy(cur, lex, lex_len);
804 arrout[i].haspos = 0;
805 arrout[i].len = lex_len;
806 arrout[i].pos = cur - STRPTR(tsout);
807 cur += lex_len;
808 }
809
810 PG_FREE_IF_COPY(v, 0);
812}
813
814/*
815 * ts_filter(): keep only lexemes with given weights in tsvector.
816 */
817Datum
819{
821 tsout;
824 *arrout;
825 char *datain = STRPTR(tsin),
826 *dataout;
828 bool *nulls;
829 int nweights;
830 int i,
831 j;
832 int cur_pos = 0;
833 char mask = 0;
834
836
837 for (i = 0; i < nweights; i++)
838 {
839 char char_weight;
840
841 if (nulls[i])
844 errmsg("weight array may not contain nulls")));
845
847 switch (char_weight)
848 {
849 case 'A':
850 case 'a':
851 mask = mask | 8;
852 break;
853 case 'B':
854 case 'b':
855 mask = mask | 4;
856 break;
857 case 'C':
858 case 'c':
859 mask = mask | 2;
860 break;
861 case 'D':
862 case 'd':
863 mask = mask | 1;
864 break;
865 default:
868 errmsg("unrecognized weight: \"%c\"", char_weight)));
869 }
870 }
871
873 tsout->size = tsin->size;
876
877 for (i = j = 0; i < tsin->size; i++)
878 {
880 *posvout;
881 int npos = 0;
882 int k;
883
884 if (!arrin[i].haspos)
885 continue;
886
890
891 for (k = 0; k < posvin->npos; k++)
892 {
893 if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
894 posvout->pos[npos++] = posvin->pos[k];
895 }
896
897 /* if no satisfactory positions found, skip lexeme */
898 if (!npos)
899 continue;
900
901 arrout[j].haspos = true;
902 arrout[j].len = arrin[i].len;
903 arrout[j].pos = cur_pos;
904
906 posvout->npos = npos;
908 cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
909 sizeof(uint16);
910 j++;
911 }
912
913 tsout->size = j;
914 if (dataout != STRPTR(tsout))
916
918
921}
922
923Datum
925{
928 TSVector out;
929 WordEntry *ptr;
931 *ptr2;
932 WordEntryPos *p;
933 int maxpos = 0,
934 i,
935 j,
936 i1,
937 i2,
938 dataoff,
941 char *data,
942 *data1,
943 *data2;
944
945 /* Get max position in in1; we'll need this to offset in2's positions */
946 ptr = ARRPTR(in1);
947 i = in1->size;
948 while (i--)
949 {
950 if ((j = POSDATALEN(in1, ptr)) != 0)
951 {
952 p = POSDATAPTR(in1, ptr);
953 while (j--)
954 {
955 if (WEP_GETPOS(*p) > maxpos)
956 maxpos = WEP_GETPOS(*p);
957 p++;
958 }
959 }
960 ptr++;
961 }
962
963 ptr1 = ARRPTR(in1);
964 ptr2 = ARRPTR(in2);
965 data1 = STRPTR(in1);
966 data2 = STRPTR(in2);
967 i1 = in1->size;
968 i2 = in2->size;
969
970 /*
971 * Conservative estimate of space needed. We might need all the data in
972 * both inputs, and conceivably add a pad byte before position data for
973 * each item where there was none before.
974 */
975 output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
976
979
980 /*
981 * We must make out->size valid so that STRPTR(out) is sensible. We'll
982 * collapse out any unused space at the end.
983 */
984 out->size = in1->size + in2->size;
985
986 ptr = ARRPTR(out);
987 data = STRPTR(out);
988 dataoff = 0;
989 while (i1 && i2)
990 {
992
993 if (cmp < 0)
994 { /* in1 first */
995 ptr->haspos = ptr1->haspos;
996 ptr->len = ptr1->len;
997 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
998 ptr->pos = dataoff;
999 dataoff += ptr1->len;
1000 if (ptr->haspos)
1001 {
1003 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1004 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1005 }
1006
1007 ptr++;
1008 ptr1++;
1009 i1--;
1010 }
1011 else if (cmp > 0)
1012 { /* in2 first */
1013 ptr->haspos = ptr2->haspos;
1014 ptr->len = ptr2->len;
1015 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1016 ptr->pos = dataoff;
1017 dataoff += ptr2->len;
1018 if (ptr->haspos)
1019 {
1020 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1021
1022 if (addlen == 0)
1023 ptr->haspos = 0;
1024 else
1025 {
1027 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1028 }
1029 }
1030
1031 ptr++;
1032 ptr2++;
1033 i2--;
1034 }
1035 else
1036 {
1037 ptr->haspos = ptr1->haspos | ptr2->haspos;
1038 ptr->len = ptr1->len;
1039 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1040 ptr->pos = dataoff;
1041 dataoff += ptr1->len;
1042 if (ptr->haspos)
1043 {
1044 if (ptr1->haspos)
1045 {
1047 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1048 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1049 if (ptr2->haspos)
1050 dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1051 }
1052 else /* must have ptr2->haspos */
1053 {
1054 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1055
1056 if (addlen == 0)
1057 ptr->haspos = 0;
1058 else
1059 {
1061 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1062 }
1063 }
1064 }
1065
1066 ptr++;
1067 ptr1++;
1068 ptr2++;
1069 i1--;
1070 i2--;
1071 }
1072 }
1073
1074 while (i1)
1075 {
1076 ptr->haspos = ptr1->haspos;
1077 ptr->len = ptr1->len;
1078 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1079 ptr->pos = dataoff;
1080 dataoff += ptr1->len;
1081 if (ptr->haspos)
1082 {
1084 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1085 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1086 }
1087
1088 ptr++;
1089 ptr1++;
1090 i1--;
1091 }
1092
1093 while (i2)
1094 {
1095 ptr->haspos = ptr2->haspos;
1096 ptr->len = ptr2->len;
1097 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1098 ptr->pos = dataoff;
1099 dataoff += ptr2->len;
1100 if (ptr->haspos)
1101 {
1102 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1103
1104 if (addlen == 0)
1105 ptr->haspos = 0;
1106 else
1107 {
1109 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1110 }
1111 }
1112
1113 ptr++;
1114 ptr2++;
1115 i2--;
1116 }
1117
1118 /*
1119 * Instead of checking each offset individually, we check for overflow of
1120 * pos fields once at the end.
1121 */
1122 if (dataoff > MAXSTRPOS)
1123 ereport(ERROR,
1125 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1126
1127 /*
1128 * Adjust sizes (asserting that we didn't overrun the original estimates)
1129 * and collapse out any unused array entries.
1130 */
1131 output_size = ptr - ARRPTR(out);
1133 out->size = output_size;
1134 if (data != STRPTR(out))
1135 memmove(STRPTR(out), data, dataoff);
1137 Assert(output_bytes <= VARSIZE(out));
1139
1140 PG_FREE_IF_COPY(in1, 0);
1141 PG_FREE_IF_COPY(in2, 1);
1142 PG_RETURN_POINTER(out);
1143}
1144
1145/*
1146 * Compare two strings by tsvector rules.
1147 *
1148 * if prefix = true then it returns zero value iff b has prefix a
1149 */
1150int32
1151tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
1152{
1153 int cmp;
1154
1155 if (lena == 0)
1156 {
1157 if (prefix)
1158 cmp = 0; /* empty string is prefix of anything */
1159 else
1160 cmp = (lenb > 0) ? -1 : 0;
1161 }
1162 else if (lenb == 0)
1163 {
1164 cmp = (lena > 0) ? 1 : 0;
1165 }
1166 else
1167 {
1168 cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1169
1170 if (prefix)
1171 {
1172 if (cmp == 0 && lena > lenb)
1173 cmp = 1; /* a is longer, so not a prefix of b */
1174 }
1175 else if (cmp == 0 && lena != lenb)
1176 {
1177 cmp = (lena < lenb) ? -1 : 1;
1178 }
1179 }
1180
1181 return cmp;
1182}
1183
1184/*
1185 * Check weight info or/and fill 'data' with the required positions
1186 */
1187static TSTernaryValue
1190{
1191 TSTernaryValue result = TS_NO;
1192
1193 Assert(data == NULL || data->npos == 0);
1194
1195 if (entry->haspos)
1196 {
1198
1199 /*
1200 * We can't use the _POSVECPTR macro here because the pointer to the
1201 * tsvector's lexeme storage is already contained in chkval->values.
1202 */
1204 (chkval->values + SHORTALIGN(entry->pos + entry->len));
1205
1206 if (val->weight && data)
1207 {
1210
1211 /*
1212 * Filter position information by weights
1213 */
1214 dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
1215 data->allocated = true;
1216
1217 /* Is there a position with a matching weight? */
1218 while (posvec_iter < posvec->pos + posvec->npos)
1219 {
1220 /* If true, append this position to the data->pos */
1221 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1222 {
1224 dptr++;
1225 }
1226
1227 posvec_iter++;
1228 }
1229
1230 data->npos = dptr - data->pos;
1231
1232 if (data->npos > 0)
1233 result = TS_YES;
1234 else
1235 {
1236 pfree(data->pos);
1237 data->pos = NULL;
1238 data->allocated = false;
1239 }
1240 }
1241 else if (val->weight)
1242 {
1244
1245 /* Is there a position with a matching weight? */
1246 while (posvec_iter < posvec->pos + posvec->npos)
1247 {
1248 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1249 {
1250 result = TS_YES;
1251 break; /* no need to go further */
1252 }
1253
1254 posvec_iter++;
1255 }
1256 }
1257 else if (data)
1258 {
1259 data->npos = posvec->npos;
1260 data->pos = posvec->pos;
1261 data->allocated = false;
1262 result = TS_YES;
1263 }
1264 else
1265 {
1266 /* simplest case: no weight check, positions not needed */
1267 result = TS_YES;
1268 }
1269 }
1270 else
1271 {
1272 /*
1273 * Position info is lacking, so if the caller requires it, we can only
1274 * say that maybe there is a match.
1275 *
1276 * Notice, however, that we *don't* check val->weight here.
1277 * Historically, stripped tsvectors are considered to match queries
1278 * whether or not the query has a weight restriction; that's a little
1279 * dubious but we'll preserve the behavior.
1280 */
1281 if (data)
1282 result = TS_MAYBE;
1283 else
1284 result = TS_YES;
1285 }
1286
1287 return result;
1288}
1289
1290/*
1291 * TS_execute callback for matching a tsquery operand to plain tsvector data
1292 */
1293static TSTernaryValue
1295{
1297 WordEntry *StopLow = chkval->arrb;
1298 WordEntry *StopHigh = chkval->arre;
1300 TSTernaryValue res = TS_NO;
1301
1302 /* Loop invariant: StopLow <= val < StopHigh */
1303 while (StopLow < StopHigh)
1304 {
1305 int difference;
1306
1307 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1308 difference = tsCompareString(chkval->operand + val->distance,
1309 val->length,
1310 chkval->values + StopMiddle->pos,
1311 StopMiddle->len,
1312 false);
1313
1314 if (difference == 0)
1315 {
1316 /* Check weight info & fill 'data' with positions */
1318 break;
1319 }
1320 else if (difference > 0)
1321 StopLow = StopMiddle + 1;
1322 else
1324 }
1325
1326 /*
1327 * If it's a prefix search, we should also consider lexemes that the
1328 * search term is a prefix of (which will necessarily immediately follow
1329 * the place we found in the above loop). But we can skip them if there
1330 * was a definite match on the exact term AND the caller doesn't need
1331 * position info.
1332 */
1333 if (val->prefix && (res != TS_YES || data))
1334 {
1336 int npos = 0,
1337 totalpos = 0;
1338
1339 /* adjust start position for corner case */
1340 if (StopLow >= StopHigh)
1342
1343 /* we don't try to re-use any data from the initial match */
1344 if (data)
1345 {
1346 if (data->allocated)
1347 pfree(data->pos);
1348 data->pos = NULL;
1349 data->allocated = false;
1350 data->npos = 0;
1351 }
1352 res = TS_NO;
1353
1354 while ((res != TS_YES || data) &&
1356 tsCompareString(chkval->operand + val->distance,
1357 val->length,
1358 chkval->values + StopMiddle->pos,
1359 StopMiddle->len,
1360 true) == 0)
1361 {
1363
1365
1366 if (subres != TS_NO)
1367 {
1368 if (data)
1369 {
1370 /*
1371 * We need to join position information
1372 */
1373 if (subres == TS_MAYBE)
1374 {
1375 /*
1376 * No position info for this match, so we must report
1377 * MAYBE overall.
1378 */
1379 res = TS_MAYBE;
1380 /* forget any previous positions */
1381 npos = 0;
1382 /* don't leak storage */
1383 if (allpos)
1384 pfree(allpos);
1385 break;
1386 }
1387
1388 while (npos + data->npos > totalpos)
1389 {
1390 if (totalpos == 0)
1391 {
1392 totalpos = 256;
1394 }
1395 else
1396 {
1397 totalpos *= 2;
1399 }
1400 }
1401
1402 memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1403 npos += data->npos;
1404
1405 /* don't leak storage from individual matches */
1406 if (data->allocated)
1407 pfree(data->pos);
1408 data->pos = NULL;
1409 data->allocated = false;
1410 /* it's important to reset data->npos before next loop */
1411 data->npos = 0;
1412 }
1413 else
1414 {
1415 /* Don't need positions, just handle YES/MAYBE */
1416 if (subres == TS_YES || res == TS_NO)
1417 res = subres;
1418 }
1419 }
1420
1421 StopMiddle++;
1422 }
1423
1424 if (data && npos > 0)
1425 {
1426 /* Sort and make unique array of found positions */
1427 data->pos = allpos;
1428 qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1429 data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1431 data->allocated = true;
1432 res = TS_YES;
1433 }
1434 }
1435
1436 return res;
1437}
1438
1439/*
1440 * Compute output position list for a tsquery operator in phrase mode.
1441 *
1442 * Merge the position lists in Ldata and Rdata as specified by "emit",
1443 * returning the result list into *data. The input position lists must be
1444 * sorted and unique, and the output will be as well.
1445 *
1446 * data: pointer to initially-all-zeroes output struct, or NULL
1447 * Ldata, Rdata: input position lists
1448 * emit: bitmask of TSPO_XXX flags
1449 * Loffset: offset to be added to Ldata positions before comparing/outputting
1450 * Roffset: offset to be added to Rdata positions before comparing/outputting
1451 * max_npos: maximum possible required size of output position array
1452 *
1453 * Loffset and Roffset should not be negative, else we risk trying to output
1454 * negative positions, which won't fit into WordEntryPos.
1455 *
1456 * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
1457 * we return it as TSTernaryValue.
1458 *
1459 * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
1460 * returns TS_YES if any positions would have been emitted.
1461 */
1462#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1463#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1464#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1465
1466static TSTernaryValue
1470 int emit,
1471 int Loffset,
1472 int Roffset,
1473 int max_npos)
1474{
1475 int Lindex,
1476 Rindex;
1477
1478 /* Loop until both inputs are exhausted */
1479 Lindex = Rindex = 0;
1480 while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1481 {
1482 int Lpos,
1483 Rpos;
1484 int output_pos = 0;
1485
1486 /*
1487 * Fetch current values to compare. WEP_GETPOS() is needed because
1488 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1489 */
1490 if (Lindex < Ldata->npos)
1491 Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1492 else
1493 {
1494 /* L array exhausted, so we're done if R_ONLY isn't set */
1495 if (!(emit & TSPO_R_ONLY))
1496 break;
1497 Lpos = INT_MAX;
1498 }
1499 if (Rindex < Rdata->npos)
1500 Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1501 else
1502 {
1503 /* R array exhausted, so we're done if L_ONLY isn't set */
1504 if (!(emit & TSPO_L_ONLY))
1505 break;
1506 Rpos = INT_MAX;
1507 }
1508
1509 /* Merge-join the two input lists */
1510 if (Lpos < Rpos)
1511 {
1512 /* Lpos is not matched in Rdata, should we output it? */
1513 if (emit & TSPO_L_ONLY)
1514 output_pos = Lpos;
1515 Lindex++;
1516 }
1517 else if (Lpos == Rpos)
1518 {
1519 /* Lpos and Rpos match ... should we output it? */
1520 if (emit & TSPO_BOTH)
1521 output_pos = Rpos;
1522 Lindex++;
1523 Rindex++;
1524 }
1525 else /* Lpos > Rpos */
1526 {
1527 /* Rpos is not matched in Ldata, should we output it? */
1528 if (emit & TSPO_R_ONLY)
1529 output_pos = Rpos;
1530 Rindex++;
1531 }
1532
1533 if (output_pos > 0)
1534 {
1535 if (data)
1536 {
1537 /* Store position, first allocating output array if needed */
1538 if (data->pos == NULL)
1539 {
1540 data->pos = (WordEntryPos *)
1541 palloc(max_npos * sizeof(WordEntryPos));
1542 data->allocated = true;
1543 }
1544 data->pos[data->npos++] = output_pos;
1545 }
1546 else
1547 {
1548 /*
1549 * Exact positions not needed, so return TS_YES as soon as we
1550 * know there is at least one.
1551 */
1552 return TS_YES;
1553 }
1554 }
1555 }
1556
1557 if (data && data->npos > 0)
1558 {
1559 /* Let's assert we didn't overrun the array */
1560 Assert(data->npos <= max_npos);
1561 return TS_YES;
1562 }
1563 return TS_NO;
1564}
1565
1566/*
1567 * Execute tsquery at or below an OP_PHRASE operator.
1568 *
1569 * This handles tsquery execution at recursion levels where we need to care
1570 * about match locations.
1571 *
1572 * In addition to the same arguments used for TS_execute, the caller may pass
1573 * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1574 * match position info on success. data == NULL if no position data need be
1575 * returned.
1576 * Note: the function assumes data != NULL for operators other than OP_PHRASE.
1577 * This is OK because an outside call always starts from an OP_PHRASE node,
1578 * and all internal recursion cases pass data != NULL.
1579 *
1580 * The detailed semantics of the match data, given that the function returned
1581 * TS_YES (successful match), are:
1582 *
1583 * npos > 0, negate = false:
1584 * query is matched at specified position(s) (and only those positions)
1585 * npos > 0, negate = true:
1586 * query is matched at all positions *except* specified position(s)
1587 * npos = 0, negate = true:
1588 * query is matched at all positions
1589 * npos = 0, negate = false:
1590 * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
1591 *
1592 * Successful matches also return a "width" value which is the match width in
1593 * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1594 * and is the sum of the phrase operator distances for phrase matches. Note
1595 * that when width > 0, the listed positions represent the ends of matches not
1596 * the starts. (This unintuitive rule is needed to avoid possibly generating
1597 * negative positions, which wouldn't fit into the WordEntryPos arrays.)
1598 *
1599 * If the TSExecuteCallback function reports that an operand is present
1600 * but fails to provide position(s) for it, we will return TS_MAYBE when
1601 * it is possible but not certain that the query is matched.
1602 *
1603 * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
1604 * negate = false (which is the state initialized by the caller); but the
1605 * "width" output in such cases is undefined.
1606 */
1607static TSTernaryValue
1608TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
1611{
1613 Rdata;
1615 rmatch;
1616 int Loffset,
1617 Roffset,
1618 maxwidth;
1619
1620 /* since this function recurses, it could be driven to stack overflow */
1622
1623 /* ... and let's check for query cancel while we're at it */
1625
1626 if (curitem->type == QI_VAL)
1627 return chkcond(arg, (QueryOperand *) curitem, data);
1628
1629 switch (curitem->qoperator.oper)
1630 {
1631 case OP_NOT:
1632
1633 /*
1634 * We need not touch data->width, since a NOT operation does not
1635 * change the match width.
1636 */
1637 if (flags & TS_EXEC_SKIP_NOT)
1638 {
1639 /* with SKIP_NOT, report NOT as "match everywhere" */
1640 Assert(data->npos == 0 && !data->negate);
1641 data->negate = true;
1642 return TS_YES;
1643 }
1644 switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1645 {
1646 case TS_NO:
1647 /* change "match nowhere" to "match everywhere" */
1648 Assert(data->npos == 0 && !data->negate);
1649 data->negate = true;
1650 return TS_YES;
1651 case TS_YES:
1652 if (data->npos > 0)
1653 {
1654 /* we have some positions, invert negate flag */
1655 data->negate = !data->negate;
1656 return TS_YES;
1657 }
1658 else if (data->negate)
1659 {
1660 /* change "match everywhere" to "match nowhere" */
1661 data->negate = false;
1662 return TS_NO;
1663 }
1664 /* Should not get here if result was TS_YES */
1665 Assert(false);
1666 break;
1667 case TS_MAYBE:
1668 /* match positions are, and remain, uncertain */
1669 return TS_MAYBE;
1670 }
1671 break;
1672
1673 case OP_PHRASE:
1674 case OP_AND:
1675 memset(&Ldata, 0, sizeof(Ldata));
1676 memset(&Rdata, 0, sizeof(Rdata));
1677
1678 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1679 arg, flags, chkcond, &Ldata);
1680 if (lmatch == TS_NO)
1681 return TS_NO;
1682
1683 rmatch = TS_phrase_execute(curitem + 1,
1684 arg, flags, chkcond, &Rdata);
1685 if (rmatch == TS_NO)
1686 return TS_NO;
1687
1688 /*
1689 * If either operand has no position information, then we can't
1690 * return reliable position data, only a MAYBE result.
1691 */
1692 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1693 return TS_MAYBE;
1694
1695 if (curitem->qoperator.oper == OP_PHRASE)
1696 {
1697 /*
1698 * Compute Loffset and Roffset suitable for phrase match, and
1699 * compute overall width of whole phrase match.
1700 */
1701 Loffset = curitem->qoperator.distance + Rdata.width;
1702 Roffset = 0;
1703 if (data)
1704 data->width = curitem->qoperator.distance +
1705 Ldata.width + Rdata.width;
1706 }
1707 else
1708 {
1709 /*
1710 * For OP_AND, set output width and alignment like OP_OR (see
1711 * comment below)
1712 */
1713 maxwidth = Max(Ldata.width, Rdata.width);
1714 Loffset = maxwidth - Ldata.width;
1715 Roffset = maxwidth - Rdata.width;
1716 if (data)
1717 data->width = maxwidth;
1718 }
1719
1720 if (Ldata.negate && Rdata.negate)
1721 {
1722 /* !L & !R: treat as !(L | R) */
1726 Ldata.npos + Rdata.npos);
1727 if (data)
1728 data->negate = true;
1729 return TS_YES;
1730 }
1731 else if (Ldata.negate)
1732 {
1733 /* !L & R */
1734 return TS_phrase_output(data, &Ldata, &Rdata,
1737 Rdata.npos);
1738 }
1739 else if (Rdata.negate)
1740 {
1741 /* L & !R */
1742 return TS_phrase_output(data, &Ldata, &Rdata,
1745 Ldata.npos);
1746 }
1747 else
1748 {
1749 /* straight AND */
1750 return TS_phrase_output(data, &Ldata, &Rdata,
1751 TSPO_BOTH,
1753 Min(Ldata.npos, Rdata.npos));
1754 }
1755
1756 case OP_OR:
1757 memset(&Ldata, 0, sizeof(Ldata));
1758 memset(&Rdata, 0, sizeof(Rdata));
1759
1760 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1761 arg, flags, chkcond, &Ldata);
1762 rmatch = TS_phrase_execute(curitem + 1,
1763 arg, flags, chkcond, &Rdata);
1764
1765 if (lmatch == TS_NO && rmatch == TS_NO)
1766 return TS_NO;
1767
1768 /*
1769 * If either operand has no position information, then we can't
1770 * return reliable position data, only a MAYBE result.
1771 */
1772 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1773 return TS_MAYBE;
1774
1775 /*
1776 * Cope with undefined output width from failed submatch. (This
1777 * takes less code than trying to ensure that all failure returns
1778 * set data->width to zero.)
1779 */
1780 if (lmatch == TS_NO)
1781 Ldata.width = 0;
1782 if (rmatch == TS_NO)
1783 Rdata.width = 0;
1784
1785 /*
1786 * For OP_AND and OP_OR, report the width of the wider of the two
1787 * inputs, and align the narrower input's positions to the right
1788 * end of that width. This rule deals at least somewhat
1789 * reasonably with cases like "x <-> (y | z <-> q)".
1790 */
1791 maxwidth = Max(Ldata.width, Rdata.width);
1792 Loffset = maxwidth - Ldata.width;
1793 Roffset = maxwidth - Rdata.width;
1794 data->width = maxwidth;
1795
1796 if (Ldata.negate && Rdata.negate)
1797 {
1798 /* !L | !R: treat as !(L & R) */
1800 TSPO_BOTH,
1802 Min(Ldata.npos, Rdata.npos));
1803 data->negate = true;
1804 return TS_YES;
1805 }
1806 else if (Ldata.negate)
1807 {
1808 /* !L | R: treat as !(L & !R) */
1812 Ldata.npos);
1813 data->negate = true;
1814 return TS_YES;
1815 }
1816 else if (Rdata.negate)
1817 {
1818 /* L | !R: treat as !(!L & R) */
1822 Rdata.npos);
1823 data->negate = true;
1824 return TS_YES;
1825 }
1826 else
1827 {
1828 /* straight OR */
1829 return TS_phrase_output(data, &Ldata, &Rdata,
1832 Ldata.npos + Rdata.npos);
1833 }
1834
1835 default:
1836 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1837 }
1838
1839 /* not reachable, but keep compiler quiet */
1840 return TS_NO;
1841}
1842
1843
1844/*
1845 * Evaluate tsquery boolean expression.
1846 *
1847 * curitem: current tsquery item (initially, the first one)
1848 * arg: opaque value to pass through to callback function
1849 * flags: bitmask of flag bits shown in ts_utils.h
1850 * chkcond: callback function to check whether a primitive value is present
1851 */
1852bool
1853TS_execute(QueryItem *curitem, void *arg, uint32 flags,
1855{
1856 /*
1857 * If we get TS_MAYBE from the recursion, return true. We could only see
1858 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1859 * need to check again.
1860 */
1861 return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1862}
1863
1864/*
1865 * Evaluate tsquery boolean expression.
1866 *
1867 * This is the same as TS_execute except that TS_MAYBE is returned as-is.
1868 */
1870TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1872{
1873 return TS_execute_recurse(curitem, arg, flags, chkcond);
1874}
1875
1876/*
1877 * TS_execute recursion for operators above any phrase operator. Here we do
1878 * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
1879 * operator, we pass it off to TS_phrase_execute which does worry.
1880 */
1881static TSTernaryValue
1882TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
1884{
1886
1887 /* since this function recurses, it could be driven to stack overflow */
1889
1890 /* ... and let's check for query cancel while we're at it */
1892
1893 if (curitem->type == QI_VAL)
1894 return chkcond(arg, (QueryOperand *) curitem,
1895 NULL /* don't need position info */ );
1896
1897 switch (curitem->qoperator.oper)
1898 {
1899 case OP_NOT:
1900 if (flags & TS_EXEC_SKIP_NOT)
1901 return TS_YES;
1902 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1903 {
1904 case TS_NO:
1905 return TS_YES;
1906 case TS_YES:
1907 return TS_NO;
1908 case TS_MAYBE:
1909 return TS_MAYBE;
1910 }
1911 break;
1912
1913 case OP_AND:
1914 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1915 flags, chkcond);
1916 if (lmatch == TS_NO)
1917 return TS_NO;
1918 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1919 {
1920 case TS_NO:
1921 return TS_NO;
1922 case TS_YES:
1923 return lmatch;
1924 case TS_MAYBE:
1925 return TS_MAYBE;
1926 }
1927 break;
1928
1929 case OP_OR:
1930 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1931 flags, chkcond);
1932 if (lmatch == TS_YES)
1933 return TS_YES;
1934 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1935 {
1936 case TS_NO:
1937 return lmatch;
1938 case TS_YES:
1939 return TS_YES;
1940 case TS_MAYBE:
1941 return TS_MAYBE;
1942 }
1943 break;
1944
1945 case OP_PHRASE:
1946
1947 /*
1948 * If we get a MAYBE result, and the caller doesn't want that,
1949 * convert it to NO. It would be more consistent, perhaps, to
1950 * return the result of TS_phrase_execute() verbatim and then
1951 * convert MAYBE results at the top of the recursion. But
1952 * converting at the topmost phrase operator gives results that
1953 * are bug-compatible with the old implementation, so do it like
1954 * this for now.
1955 */
1956 switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1957 {
1958 case TS_NO:
1959 return TS_NO;
1960 case TS_YES:
1961 return TS_YES;
1962 case TS_MAYBE:
1963 return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1964 }
1965 break;
1966
1967 default:
1968 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1969 }
1970
1971 /* not reachable, but keep compiler quiet */
1972 return TS_NO;
1973}
1974
1975/*
1976 * Evaluate tsquery and report locations of matching terms.
1977 *
1978 * This is like TS_execute except that it returns match locations not just
1979 * success/failure status. The callback function is required to provide
1980 * position data (we report failure if it doesn't).
1981 *
1982 * On successful match, the result is a List of ExecPhraseData structs, one
1983 * for each AND'ed term or phrase operator in the query. Each struct includes
1984 * a sorted array of lexeme positions matching that term. (Recall that for
1985 * phrase operators, the match includes width+1 lexemes, and the recorded
1986 * position is that of the rightmost lexeme.)
1987 *
1988 * OR subexpressions are handled by union'ing their match locations into a
1989 * single List element, which is valid since any of those locations contains
1990 * a match. However, when some of the OR'ed terms are phrase operators, we
1991 * report the maximum width of any of the OR'ed terms, making such cases
1992 * slightly imprecise in the conservative direction. (For example, if the
1993 * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
1994 * reported as though it includes the lexeme to the left of C.)
1995 *
1996 * Locations of NOT subexpressions are not reported. (Obviously, there can
1997 * be no successful NOT matches at top level, or the match would have failed.
1998 * So this amounts to ignoring NOTs underneath ORs.)
1999 *
2000 * The result is NIL if no match, or if position data was not returned.
2001 *
2002 * Arguments are the same as for TS_execute, although flags is currently
2003 * vestigial since none of the defined bits are sensible here.
2004 */
2005List *
2006TS_execute_locations(QueryItem *curitem, void *arg,
2007 uint32 flags,
2009{
2010 List *result;
2011
2012 /* No flags supported, as yet */
2013 Assert(flags == TS_EXEC_EMPTY);
2014 if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
2015 return result;
2016 return NIL;
2017}
2018
2019/*
2020 * TS_execute_locations recursion for operators above any phrase operator.
2021 * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
2022 */
2023static bool
2026 List **locations)
2027{
2028 bool lmatch,
2029 rmatch;
2031 *rlocations;
2033
2034 /* since this function recurses, it could be driven to stack overflow */
2036
2037 /* ... and let's check for query cancel while we're at it */
2039
2040 /* Default locations result is empty */
2041 *locations = NIL;
2042
2043 if (curitem->type == QI_VAL)
2044 {
2046 if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2047 {
2049 return true;
2050 }
2051 pfree(data);
2052 return false;
2053 }
2054
2055 switch (curitem->qoperator.oper)
2056 {
2057 case OP_NOT:
2058 if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2059 &llocations))
2060 return true; /* we don't pass back any locations */
2061 return false;
2062
2063 case OP_AND:
2064 if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2065 arg, chkcond,
2066 &llocations))
2067 return false;
2068 if (!TS_execute_locations_recurse(curitem + 1,
2069 arg, chkcond,
2070 &rlocations))
2071 return false;
2073 return true;
2074
2075 case OP_OR:
2077 arg, chkcond,
2078 &llocations);
2080 arg, chkcond,
2081 &rlocations);
2082 if (lmatch || rmatch)
2083 {
2084 /*
2085 * We generate an AND'able location struct from each
2086 * combination of sub-matches, following the disjunctive law
2087 * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2088 *
2089 * However, if either input didn't produce locations (i.e., it
2090 * failed or was a NOT), we must just return the other list.
2091 */
2092 if (llocations == NIL)
2094 else if (rlocations == NIL)
2096 else
2097 {
2098 ListCell *ll;
2099
2100 foreach(ll, llocations)
2101 {
2103 ListCell *lr;
2104
2105 foreach(lr, rlocations)
2106 {
2108
2112 0, 0,
2113 ldata->npos + rdata->npos);
2114 /* Report the larger width, as explained above. */
2115 data->width = Max(ldata->width, rdata->width);
2117 }
2118 }
2119 }
2120
2121 return true;
2122 }
2123 return false;
2124
2125 case OP_PHRASE:
2126 /* We can hand this off to TS_phrase_execute */
2129 data) == TS_YES)
2130 {
2131 if (!data->negate)
2133 return true;
2134 }
2135 pfree(data);
2136 return false;
2137
2138 default:
2139 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2140 }
2141
2142 /* not reachable, but keep compiler quiet */
2143 return false;
2144}
2145
2146/*
2147 * Detect whether a tsquery boolean expression requires any positive matches
2148 * to values shown in the tsquery.
2149 *
2150 * This is needed to know whether a GIN index search requires full index scan.
2151 * For example, 'x & !y' requires a match of x, so it's sufficient to scan
2152 * entries for x; but 'x | !y' could match rows containing neither x nor y.
2153 */
2154bool
2156{
2157 /* since this function recurses, it could be driven to stack overflow */
2159
2160 if (curitem->type == QI_VAL)
2161 return true;
2162
2163 switch (curitem->qoperator.oper)
2164 {
2165 case OP_NOT:
2166
2167 /*
2168 * Assume there are no required matches underneath a NOT. For
2169 * some cases with nested NOTs, we could prove there's a required
2170 * match, but it seems unlikely to be worth the trouble.
2171 */
2172 return false;
2173
2174 case OP_PHRASE:
2175
2176 /*
2177 * Treat OP_PHRASE as OP_AND here
2178 */
2179 case OP_AND:
2180 /* If either side requires a match, we're good */
2181 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2182 return true;
2183 else
2184 return tsquery_requires_match(curitem + 1);
2185
2186 case OP_OR:
2187 /* Both sides must require a match */
2188 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2189 return tsquery_requires_match(curitem + 1);
2190 else
2191 return false;
2192
2193 default:
2194 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2195 }
2196
2197 /* not reachable, but keep compiler quiet */
2198 return false;
2199}
2200
2201/*
2202 * boolean operations
2203 */
2204Datum
2206{
2208 PG_GETARG_DATUM(1),
2209 PG_GETARG_DATUM(0)));
2210}
2211
2212Datum
2214{
2216 TSQuery query = PG_GETARG_TSQUERY(1);
2217 CHKVAL chkval;
2218 bool result;
2219
2220 /* empty query matches nothing */
2221 if (!query->size)
2222 {
2223 PG_FREE_IF_COPY(val, 0);
2224 PG_FREE_IF_COPY(query, 1);
2225 PG_RETURN_BOOL(false);
2226 }
2227
2228 chkval.arrb = ARRPTR(val);
2229 chkval.arre = chkval.arrb + val->size;
2230 chkval.values = STRPTR(val);
2231 chkval.operand = GETOPERAND(query);
2232 result = TS_execute(GETQUERY(query),
2233 &chkval,
2236
2237 PG_FREE_IF_COPY(val, 0);
2238 PG_FREE_IF_COPY(query, 1);
2239 PG_RETURN_BOOL(result);
2240}
2241
2242Datum
2244{
2245 TSVector vector;
2246 TSQuery query;
2247 bool res;
2248
2250 PG_GETARG_DATUM(0)));
2252 PG_GETARG_DATUM(1)));
2253
2255 TSVectorGetDatum(vector),
2256 TSQueryGetDatum(query)));
2257
2258 pfree(vector);
2259 pfree(query);
2260
2261 PG_RETURN_BOOL(res);
2262}
2263
2264Datum
2266{
2267 TSVector vector;
2268 TSQuery query = PG_GETARG_TSQUERY(1);
2269 bool res;
2270
2272 PG_GETARG_DATUM(0)));
2273
2275 TSVectorGetDatum(vector),
2276 TSQueryGetDatum(query)));
2277
2278 pfree(vector);
2279 PG_FREE_IF_COPY(query, 1);
2280
2281 PG_RETURN_BOOL(res);
2282}
2283
2284/*
2285 * ts_stat statistic function support
2286 */
2287
2288
2289/*
2290 * Returns the number of positions in value 'wptr' within tsvector 'txt',
2291 * that have a weight equal to one of the weights in 'weight' bitmask.
2292 */
2293static int
2295{
2296 int len = POSDATALEN(txt, wptr);
2297 int num = 0;
2299
2300 while (len--)
2301 {
2302 if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2303 num++;
2304 ptr++;
2305 }
2306 return num;
2307}
2308
2309#define compareStatWord(a,e,t) \
2310 tsCompareString((a)->lexeme, (a)->lenlexeme, \
2311 STRPTR(t) + (e)->pos, (e)->len, \
2312 false)
2313
2314static void
2316{
2317 WordEntry *we = ARRPTR(txt) + off;
2318 StatEntry *node = stat->root,
2319 *pnode = NULL;
2320 int n,
2321 res = 0;
2322 uint32 depth = 1;
2323
2324 if (stat->weight == 0)
2325 n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2326 else
2327 n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2328
2329 if (n == 0)
2330 return; /* nothing to insert */
2331
2332 while (node)
2333 {
2334 res = compareStatWord(node, we, txt);
2335
2336 if (res == 0)
2337 {
2338 break;
2339 }
2340 else
2341 {
2342 pnode = node;
2343 node = (res < 0) ? node->left : node->right;
2344 }
2345 depth++;
2346 }
2347
2348 if (depth > stat->maxdepth)
2349 stat->maxdepth = depth;
2350
2351 if (node == NULL)
2352 {
2354 node->left = node->right = NULL;
2355 node->ndoc = 1;
2356 node->nentry = n;
2357 node->lenlexeme = we->len;
2358 memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2359
2360 if (pnode == NULL)
2361 {
2362 stat->root = node;
2363 }
2364 else
2365 {
2366 if (res < 0)
2367 pnode->left = node;
2368 else
2369 pnode->right = node;
2370 }
2371 }
2372 else
2373 {
2374 node->ndoc++;
2375 node->nentry += n;
2376 }
2377}
2378
2379static void
2381 uint32 low, uint32 high, uint32 offset)
2382{
2383 uint32 pos;
2384 uint32 middle = (low + high) >> 1;
2385
2386 pos = (low + middle) >> 1;
2387 if (low != middle && pos >= offset && pos - offset < txt->size)
2388 insertStatEntry(persistentContext, stat, txt, pos - offset);
2389 pos = (high + middle + 1) >> 1;
2390 if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2391 insertStatEntry(persistentContext, stat, txt, pos - offset);
2392
2393 if (low != middle)
2395 if (high != middle + 1)
2396 chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2397}
2398
2399/*
2400 * This is written like a custom aggregate function, because the
2401 * original plan was to do just that. Unfortunately, an aggregate function
2402 * can't return a set, so that plan was abandoned. If that limitation is
2403 * lifted in the future, ts_stat could be a real aggregate function so that
2404 * you could use it like this:
2405 *
2406 * SELECT ts_stat(vector_column) FROM vector_table;
2407 *
2408 * where vector_column is a tsvector-type column in vector_table.
2409 */
2410
2411static TSVectorStat *
2413{
2415 uint32 i,
2416 nbit = 0,
2417 offset;
2418
2419 if (stat == NULL)
2420 { /* Init in first */
2422 stat->maxdepth = 1;
2423 }
2424
2425 /* simple check of correctness */
2426 if (txt == NULL || txt->size == 0)
2427 {
2428 if (txt && txt != (TSVector) DatumGetPointer(data))
2429 pfree(txt);
2430 return stat;
2431 }
2432
2433 i = txt->size - 1;
2434 for (; i > 0; i >>= 1)
2435 nbit++;
2436
2437 nbit = 1 << nbit;
2438 offset = (nbit - txt->size) / 2;
2439
2440 insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2442
2443 return stat;
2444}
2445
2446static void
2449{
2450 TupleDesc tupdesc;
2451 MemoryContext oldcontext;
2452 StatEntry *node;
2453
2454 funcctx->user_fctx = stat;
2455
2456 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2457
2458 stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
2459 stat->stackpos = 0;
2460
2461 node = stat->root;
2462 /* find leftmost value */
2463 if (node == NULL)
2464 stat->stack[stat->stackpos] = NULL;
2465 else
2466 for (;;)
2467 {
2468 stat->stack[stat->stackpos] = node;
2469 if (node->left)
2470 {
2471 stat->stackpos++;
2472 node = node->left;
2473 }
2474 else
2475 break;
2476 }
2477 Assert(stat->stackpos <= stat->maxdepth);
2478
2479 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2480 elog(ERROR, "return type must be a row type");
2481 funcctx->tuple_desc = tupdesc;
2482 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2483
2484 MemoryContextSwitchTo(oldcontext);
2485}
2486
2487static StatEntry *
2489{
2490 StatEntry *node = stat->stack[stat->stackpos];
2491
2492 if (node == NULL)
2493 return NULL;
2494
2495 if (node->ndoc != 0)
2496 {
2497 /* return entry itself: we already was at left sublink */
2498 return node;
2499 }
2500 else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2501 {
2502 /* go on right sublink */
2503 stat->stackpos++;
2504 node = node->right;
2505
2506 /* find most-left value */
2507 for (;;)
2508 {
2509 stat->stack[stat->stackpos] = node;
2510 if (node->left)
2511 {
2512 stat->stackpos++;
2513 node = node->left;
2514 }
2515 else
2516 break;
2517 }
2518 Assert(stat->stackpos <= stat->maxdepth);
2519 }
2520 else
2521 {
2522 /* we already return all left subtree, itself and right subtree */
2523 if (stat->stackpos == 0)
2524 return NULL;
2525
2526 stat->stackpos--;
2527 return walkStatEntryTree(stat);
2528 }
2529
2530 return node;
2531}
2532
2533static Datum
2535{
2536 TSVectorStat *st;
2537 StatEntry *entry;
2538
2539 st = (TSVectorStat *) funcctx->user_fctx;
2540
2541 entry = walkStatEntryTree(st);
2542
2543 if (entry != NULL)
2544 {
2545 Datum result;
2546 char *values[3];
2547 char ndoc[16];
2548 char nentry[16];
2549 HeapTuple tuple;
2550
2551 values[0] = palloc(entry->lenlexeme + 1);
2552 memcpy(values[0], entry->lexeme, entry->lenlexeme);
2553 (values[0])[entry->lenlexeme] = '\0';
2554 sprintf(ndoc, "%d", entry->ndoc);
2555 values[1] = ndoc;
2556 sprintf(nentry, "%d", entry->nentry);
2557 values[2] = nentry;
2558
2559 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2560 result = HeapTupleGetDatum(tuple);
2561
2562 pfree(values[0]);
2563
2564 /* mark entry as already visited */
2565 entry->ndoc = 0;
2566
2567 return result;
2568 }
2569
2570 return (Datum) 0;
2571}
2572
2573static TSVectorStat *
2575{
2576 char *query = text_to_cstring(txt);
2578 bool isnull;
2579 Portal portal;
2581
2582 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2583 /* internal error */
2584 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2585
2586 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2587 /* internal error */
2588 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2589
2590 SPI_cursor_fetch(portal, true, 100);
2591
2592 if (SPI_tuptable == NULL ||
2593 SPI_tuptable->tupdesc->natts != 1 ||
2595 TSVECTOROID))
2596 ereport(ERROR,
2598 errmsg("ts_stat query must return one tsvector column")));
2599
2601 stat->maxdepth = 1;
2602
2603 if (ws)
2604 {
2605 char *buf;
2606
2607 buf = VARDATA_ANY(ws);
2608 while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2609 {
2610 if (pg_mblen(buf) == 1)
2611 {
2612 switch (*buf)
2613 {
2614 case 'A':
2615 case 'a':
2616 stat->weight |= 1 << 3;
2617 break;
2618 case 'B':
2619 case 'b':
2620 stat->weight |= 1 << 2;
2621 break;
2622 case 'C':
2623 case 'c':
2624 stat->weight |= 1 << 1;
2625 break;
2626 case 'D':
2627 case 'd':
2628 stat->weight |= 1;
2629 break;
2630 default:
2631 stat->weight |= 0;
2632 }
2633 }
2634 buf += pg_mblen(buf);
2635 }
2636 }
2637
2638 while (SPI_processed > 0)
2639 {
2640 uint64 i;
2641
2642 for (i = 0; i < SPI_processed; i++)
2643 {
2645
2646 if (!isnull)
2648 }
2649
2651 SPI_cursor_fetch(portal, true, 100);
2652 }
2653
2655 SPI_cursor_close(portal);
2657 pfree(query);
2658
2659 return stat;
2660}
2661
2662Datum
2664{
2666 Datum result;
2667
2668 if (SRF_IS_FIRSTCALL())
2669 {
2672
2674 SPI_connect();
2675 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2676 PG_FREE_IF_COPY(txt, 0);
2678 SPI_finish();
2679 }
2680
2682 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2683 SRF_RETURN_NEXT(funcctx, result);
2685}
2686
2687Datum
2689{
2691 Datum result;
2692
2693 if (SRF_IS_FIRSTCALL())
2694 {
2698
2700 SPI_connect();
2701 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2702 PG_FREE_IF_COPY(txt, 0);
2703 PG_FREE_IF_COPY(ws, 1);
2705 SPI_finish();
2706 }
2707
2709 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2710 SRF_RETURN_NEXT(funcctx, result);
2712}
2713
2714
2715/*
2716 * Triggers for automatic update of a tsvector column from text column(s)
2717 *
2718 * Trigger arguments are either
2719 * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2720 * name of tsvector col, name of regconfig col, name(s) of text col(s)
2721 * ie, tsconfig can either be specified by name, or indirectly as the
2722 * contents of a regconfig field in the row. If the name is used, it must
2723 * be explicitly schema-qualified.
2724 */
2725Datum
2727{
2728 return tsvector_update_trigger(fcinfo, false);
2729}
2730
2731Datum
2733{
2734 return tsvector_update_trigger(fcinfo, true);
2735}
2736
2737static Datum
2739{
2740 TriggerData *trigdata;
2742 Relation rel;
2745 i;
2746 ParsedText prs;
2747 Datum datum;
2748 bool isnull;
2749 text *txt;
2750 Oid cfgId;
2751 bool update_needed;
2752
2753 /* Check call context */
2754 if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2755 elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2756
2757 trigdata = (TriggerData *) fcinfo->context;
2758 if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2759 elog(ERROR, "tsvector_update_trigger: must be fired for row");
2760 if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2761 elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2762
2763 if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2764 {
2765 rettuple = trigdata->tg_trigtuple;
2766 update_needed = true;
2767 }
2768 else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2769 {
2770 rettuple = trigdata->tg_newtuple;
2771 update_needed = false; /* computed below */
2772 }
2773 else
2774 elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2775
2776 trigger = trigdata->tg_trigger;
2777 rel = trigdata->tg_relation;
2778
2779 if (trigger->tgnargs < 3)
2780 elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2781
2782 /* Find the target tsvector column */
2783 tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2785 ereport(ERROR,
2787 errmsg("tsvector column \"%s\" does not exist",
2788 trigger->tgargs[0])));
2789 /* This will effectively reject system columns, so no separate test: */
2791 TSVECTOROID))
2792 ereport(ERROR,
2794 errmsg("column \"%s\" is not of tsvector type",
2795 trigger->tgargs[0])));
2796
2797 /* Find the configuration to use */
2798 if (config_column)
2799 {
2800 int config_attr_num;
2801
2802 config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2804 ereport(ERROR,
2806 errmsg("configuration column \"%s\" does not exist",
2807 trigger->tgargs[1])));
2809 REGCONFIGOID))
2810 ereport(ERROR,
2812 errmsg("column \"%s\" is not of regconfig type",
2813 trigger->tgargs[1])));
2814
2815 datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2816 if (isnull)
2817 ereport(ERROR,
2819 errmsg("configuration column \"%s\" must not be null",
2820 trigger->tgargs[1])));
2821 cfgId = DatumGetObjectId(datum);
2822 }
2823 else
2824 {
2825 List *names;
2826
2827 names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2828 /* require a schema so that results are not search path dependent */
2829 if (list_length(names) < 2)
2830 ereport(ERROR,
2832 errmsg("text search configuration name \"%s\" must be schema-qualified",
2833 trigger->tgargs[1])));
2834 cfgId = get_ts_config_oid(names, false);
2835 }
2836
2837 /* initialize parse state */
2838 prs.lenwords = 32;
2839 prs.curwords = 0;
2840 prs.pos = 0;
2842
2843 /* find all words in indexable column(s) */
2844 for (i = 2; i < trigger->tgnargs; i++)
2845 {
2846 int numattr;
2847
2848 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2850 ereport(ERROR,
2852 errmsg("column \"%s\" does not exist",
2853 trigger->tgargs[i])));
2855 ereport(ERROR,
2857 errmsg("column \"%s\" is not of a character type",
2858 trigger->tgargs[i])));
2859
2861 update_needed = true;
2862
2863 datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2864 if (isnull)
2865 continue;
2866
2867 txt = DatumGetTextPP(datum);
2868
2870
2871 if (txt != (text *) DatumGetPointer(datum))
2872 pfree(txt);
2873 }
2874
2875 if (update_needed)
2876 {
2877 /* make tsvector value */
2878 datum = TSVectorGetDatum(make_tsvector(&prs));
2879 isnull = false;
2880
2881 /* and insert it into tuple */
2884 &datum, &isnull);
2885
2886 pfree(DatumGetPointer(datum));
2887 }
2888
2889 return PointerGetDatum(rettuple);
2890}
#define GETQUERY(x)
Definition _int.h:157
#define PG_GETARG_ARRAYTYPE_P(n)
Definition array.h:263
ArrayType * construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
void deconstruct_array_builtin(const ArrayType *array, Oid elmtype, Datum **elemsp, bool **nullsp, int *nelemsp)
int16 AttrNumber
Definition attnum.h:21
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
static Datum values[MAXATTR]
Definition bootstrap.c:155
int numattr
Definition bootstrap.c:61
#define Min(x, y)
Definition c.h:997
#define Max(x, y)
Definition c.h:991
#define VARHDRSZ
Definition c.h:711
#define Assert(condition)
Definition c.h:873
int8_t int8
Definition c.h:540
#define SHORTALIGN(LEN)
Definition c.h:822
int32_t int32
Definition c.h:542
uint64_t uint64
Definition c.h:547
uint16_t uint16
Definition c.h:545
uint32_t uint32
Definition c.h:546
#define ARRPTR(x)
Definition cube.c:28
struct cursor * cur
Definition ecpg.c:29
int errcode(int sqlerrcode)
Definition elog.c:863
int errmsg(const char *fmt,...)
Definition elog.c:1080
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define ereport(elevel,...)
Definition elog.h:150
HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
AttInMetadata * TupleDescGetAttInMetadata(TupleDesc tupdesc)
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define palloc0_array(type, count)
Definition fe_memutils.h:77
#define palloc0_object(type)
Definition fe_memutils.h:75
#define PG_FREE_IF_COPY(ptr, n)
Definition fmgr.h:260
#define PG_GETARG_TEXT_PP(n)
Definition fmgr.h:310
#define DirectFunctionCall2(func, arg1, arg2)
Definition fmgr.h:686
#define DatumGetTextPP(X)
Definition fmgr.h:293
#define DirectFunctionCall1(func, arg1)
Definition fmgr.h:684
#define PG_GETARG_DATUM(n)
Definition fmgr.h:268
#define PG_RETURN_DATUM(x)
Definition fmgr.h:354
#define PG_RETURN_POINTER(x)
Definition fmgr.h:363
#define PG_FUNCTION_ARGS
Definition fmgr.h:193
#define PG_RETURN_BOOL(x)
Definition fmgr.h:360
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition funcapi.h:328
Datum difference(PG_FUNCTION_ARGS)
HeapTuple heap_modify_tuple_by_cols(HeapTuple tuple, TupleDesc tupleDesc, int nCols, const int *replCols, const Datum *replValues, const bool *replIsnull)
Definition heaptuple.c:1278
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition heaptuple.c:1117
#define CALCDATASIZE(x, lenstr)
Definition hstore.h:72
#define STRPTR(x)
Definition hstore.h:76
#define nitems(x)
Definition indent.h:31
long val
Definition informix.c:689
static int pg_cmp_s32(int32 a, int32 b)
Definition int.h:713
int j
Definition isn.c:78
int i
Definition isn.c:77
List * lappend(List *list, void *datum)
Definition list.c:339
List * list_concat(List *list1, const List *list2)
Definition list.c:561
#define GETOPERAND(x)
Definition ltree.h:167
int pg_mblen(const char *mbstr)
Definition mbutils.c:1026
void * MemoryContextAlloc(MemoryContext context, Size size)
Definition mcxt.c:1232
void * MemoryContextAllocZero(MemoryContext context, Size size)
Definition mcxt.c:1266
void pfree(void *pointer)
Definition mcxt.c:1616
void * palloc0(Size size)
Definition mcxt.c:1417
void * palloc(Size size)
Definition mcxt.c:1387
#define CHECK_FOR_INTERRUPTS()
Definition miscadmin.h:123
Oid get_ts_config_oid(List *names, bool missing_ok)
Definition namespace.c:3222
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition palloc.h:124
bool IsBinaryCoercible(Oid srctype, Oid targettype)
void * arg
const void * data
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define list_make1(x1)
Definition pg_list.h:212
#define plan(x)
Definition pg_regress.c:161
static char buf[DEFAULT_XLOG_SEG_SIZE]
#define sprintf
Definition port.h:262
#define qsort(a, b, c, d)
Definition port.h:495
static bool DatumGetBool(Datum X)
Definition postgres.h:100
static Datum PointerGetDatum(const void *X)
Definition postgres.h:352
static Oid DatumGetObjectId(Datum X)
Definition postgres.h:252
static Datum Int16GetDatum(int16 X)
Definition postgres.h:182
uint64_t Datum
Definition postgres.h:70
static Pointer DatumGetPointer(Datum X)
Definition postgres.h:342
static char DatumGetChar(Datum X)
Definition postgres.h:122
unsigned int Oid
static size_t qunique(void *array, size_t elements, size_t width, int(*compare)(const void *, const void *))
Definition qunique.h:21
static int cmp(const chr *x, const chr *y, size_t len)
List * stringToQualifiedNameList(const char *string, Node *escontext)
Definition regproc.c:1922
int SPI_fnumber(TupleDesc tupdesc, const char *fname)
Definition spi.c:1175
uint64 SPI_processed
Definition spi.c:44
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition spi.c:1308
int SPI_freeplan(SPIPlanPtr plan)
Definition spi.c:1025
SPITupleTable * SPI_tuptable
Definition spi.c:45
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, const Datum *Values, const char *Nulls, bool read_only)
Definition spi.c:1445
int SPI_connect(void)
Definition spi.c:94
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition spi.c:1806
int SPI_finish(void)
Definition spi.c:182
void SPI_freetuptable(SPITupleTable *tuptable)
Definition spi.c:1386
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition spi.c:860
void SPI_cursor_close(Portal portal)
Definition spi.c:1862
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition spi.c:1252
#define SPI_ERROR_NOATTRIBUTE
Definition spi.h:76
void check_stack_depth(void)
Definition stack_depth.c:95
Definition pg_list.h:54
int32 pos
Definition ts_utils.h:107
int32 lenwords
Definition ts_utils.h:105
int32 curwords
Definition ts_utils.h:106
ParsedWord * words
Definition ts_utils.h:104
int16 distance
Definition ts_type.h:196
uint32 left
Definition ts_type.h:197
TupleDesc rd_att
Definition rel.h:112
TupleDesc tupdesc
Definition spi.h:25
HeapTuple * vals
Definition spi.h:26
uint32 nentry
Definition tsvector_op.c:49
struct StatEntry * left
Definition tsvector_op.c:50
char lexeme[FLEXIBLE_ARRAY_MEMBER]
Definition tsvector_op.c:53
uint32 lenlexeme
Definition tsvector_op.c:52
uint32 ndoc
Definition tsvector_op.c:47
struct StatEntry * right
Definition tsvector_op.c:51
int32 size
Definition ts_type.h:221
int32 size
Definition ts_type.h:93
Relation tg_relation
Definition trigger.h:35
const Bitmapset * tg_updatedcols
Definition trigger.h:43
TriggerEvent tg_event
Definition trigger.h:34
HeapTuple tg_newtuple
Definition trigger.h:37
Trigger * tg_trigger
Definition trigger.h:38
HeapTuple tg_trigtuple
Definition trigger.h:36
WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]
Definition ts_type.h:68
uint32 pos
Definition ts_type.h:46
uint32 haspos
Definition ts_type.h:44
uint32 len
Definition ts_type.h:45
Definition c.h:706
#define FirstLowInvalidHeapAttributeNumber
Definition sysattr.h:27
Datum to_tsvector(PG_FUNCTION_ARGS)
Definition to_tsany.c:270
TSVector make_tsvector(ParsedText *prs)
Definition to_tsany.c:165
Datum plainto_tsquery(PG_FUNCTION_ARGS)
Definition to_tsany.c:642
#define TRIGGER_FIRED_BEFORE(event)
Definition trigger.h:130
#define CALLED_AS_TRIGGER(fcinfo)
Definition trigger.h:26
#define TRIGGER_FIRED_FOR_ROW(event)
Definition trigger.h:124
#define TRIGGER_FIRED_BY_INSERT(event)
Definition trigger.h:112
#define TRIGGER_FIRED_BY_UPDATE(event)
Definition trigger.h:118
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
Definition ts_parse.c:355
#define PG_GETARG_TSVECTOR(n)
Definition ts_type.h:135
#define WEP_GETPOS(x)
Definition ts_type.h:80
#define _POSVECPTR(x, e)
Definition ts_type.h:109
static TSQuery DatumGetTSQuery(Datum X)
Definition ts_type.h:249
static TSVector DatumGetTSVector(Datum X)
Definition ts_type.h:118
#define MAXENTRYPOS
Definition ts_type.h:85
static Datum TSVectorGetDatum(const TSVectorData *X)
Definition ts_type.h:130
#define WEP_SETPOS(x, v)
Definition ts_type.h:83
#define POSDATALEN(x, e)
Definition ts_type.h:110
#define PG_GETARG_TSQUERY(n)
Definition ts_type.h:266
uint16 WordEntryPos
Definition ts_type.h:63
#define MAXNUMPOS
Definition ts_type.h:86
TSVectorData * TSVector
Definition ts_type.h:98
#define PG_GETARG_TSVECTOR_COPY(n)
Definition ts_type.h:136
#define WEP_SETWEIGHT(x, v)
Definition ts_type.h:82
#define QI_VAL
Definition ts_type.h:149
static Datum TSQueryGetDatum(const TSQueryData *X)
Definition ts_type.h:261
#define LIMITPOS(x)
Definition ts_type.h:87
#define OP_AND
Definition ts_type.h:180
#define OP_PHRASE
Definition ts_type.h:182
#define OP_OR
Definition ts_type.h:181
#define POSDATAPTR(x, e)
Definition ts_type.h:111
#define OP_NOT
Definition ts_type.h:179
#define WEP_GETWEIGHT(x)
Definition ts_type.h:79
#define MAXSTRPOS
Definition ts_type.h:50
#define TS_EXEC_PHRASE_NO_POS
Definition ts_utils.h:202
TSTernaryValue
Definition ts_utils.h:133
@ TS_MAYBE
Definition ts_utils.h:136
@ TS_NO
Definition ts_utils.h:134
@ TS_YES
Definition ts_utils.h:135
#define TS_EXEC_EMPTY
Definition ts_utils.h:188
#define TS_EXEC_SKIP_NOT
Definition ts_utils.h:195
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition ts_utils.h:182
int compareWordEntryPos(const void *a, const void *b)
Definition tsvector.c:36
#define TSPO_BOTH
static Datum ts_process_call(FuncCallContext *funcctx)
static TSTernaryValue checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Datum ts_match_vq(PG_FUNCTION_ARGS)
Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
static int32 add_pos(TSVector src, WordEntry *srcptr, TSVector dest, WordEntry *destptr, int32 maxpos)
static TSVectorStat * ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Datum tsvector_delete_arr(PG_FUNCTION_ARGS)
#define TSPO_R_ONLY
Datum array_to_tsvector(PG_FUNCTION_ARGS)
#define STATENTRYHDRSZ
Definition tsvector_op.c:56
Datum tsvector_filter(PG_FUNCTION_ARGS)
static TSTernaryValue TS_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos)
#define compareEntry(pa, a, pb, b)
static int check_weight(TSVector txt, WordEntry *wptr, int8 weight)
Datum tsvector_to_array(PG_FUNCTION_ARGS)
Datum ts_match_tq(PG_FUNCTION_ARGS)
Datum ts_stat1(PG_FUNCTION_ARGS)
Datum tsvector_delete_str(PG_FUNCTION_ARGS)
#define TSPO_L_ONLY
static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
Datum ts_match_qv(PG_FUNCTION_ARGS)
bool tsquery_requires_match(QueryItem *curitem)
Datum tsvector_concat(PG_FUNCTION_ARGS)
Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
static bool TS_execute_locations_recurse(QueryItem *curitem, void *arg, TSExecuteCallback chkcond, List **locations)
static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static TSVectorStat * ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
static int compare_int(const void *va, const void *vb)
static void ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx, TSVectorStat *stat)
static void chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 low, uint32 high, uint32 offset)
Datum ts_match_tt(PG_FUNCTION_ARGS)
static TSTernaryValue TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond, ExecPhraseData *data)
static int tsvector_bsearch(const TSVectorData *tsv, char *lexeme, int lexeme_len)
static int compare_text_lexemes(const void *va, const void *vb)
static TSTernaryValue checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val, ExecPhraseData *data)
#define compareStatWord(a, e, t)
Datum tsvector_unnest(PG_FUNCTION_ARGS)
static StatEntry * walkStatEntryTree(TSVectorStat *stat)
Datum ts_stat2(PG_FUNCTION_ARGS)
static void insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
static TSVector tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete, int indices_count)
TupleDesc CreateTemplateTupleDesc(int natts)
Definition tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition tupdesc.c:842
QueryOperator qoperator
Definition ts_type.h:209
QueryItemType type
Definition ts_type.h:208
static Size VARSIZE_ANY_EXHDR(const void *PTR)
Definition varatt.h:472
static Size VARSIZE(const void *PTR)
Definition varatt.h:298
static char * VARDATA(const void *PTR)
Definition varatt.h:305
static char * VARDATA_ANY(const void *PTR)
Definition varatt.h:486
static void SET_VARSIZE(void *PTR, Size len)
Definition varatt.h:432
text * cstring_to_text_with_len(const char *s, int len)
Definition varlena.c:193
char * text_to_cstring(const text *t)
Definition varlena.c:214
#define stat
Definition win32_port.h:74

◆ compareStatWord

#define compareStatWord (   a,
  e,
 
)
Value:
tsCompareString((a)->lexeme, (a)->lenlexeme, \
STRPTR(t) + (e)->pos, (e)->len, \
false)
e

Definition at line 2310 of file tsvector_op.c.

◆ STATENTRYHDRSZ

#define STATENTRYHDRSZ   (offsetof(StatEntry, lexeme))

Definition at line 56 of file tsvector_op.c.

◆ TSPO_BOTH

#define TSPO_BOTH   0x04 /* emit positions appearing in both L&R */

Definition at line 1465 of file tsvector_op.c.

◆ TSPO_L_ONLY

#define TSPO_L_ONLY   0x01 /* emit positions appearing only in L */

Definition at line 1463 of file tsvector_op.c.

◆ TSPO_R_ONLY

#define TSPO_R_ONLY   0x02 /* emit positions appearing only in R */

Definition at line 1464 of file tsvector_op.c.

◆ TSVECTORCMPFUNC

#define TSVECTORCMPFUNC (   type,
  action,
  ret 
)
Value:
{ \
int res = silly_cmp_tsvector(a, b); \
PG_RETURN_##ret( res action 0 ); \
} \
/* keep compiler quiet - no extra ; */ \
extern int no_such_variable
static int silly_cmp_tsvector(const TSVectorData *a, const TSVectorData *b)
Definition tsvector_op.c:86
const char * type

Definition at line 145 of file tsvector_op.c.

148{ \
151 int res = silly_cmp_tsvector(a, b); \
154 PG_RETURN_##ret( res action 0 ); \
155} \
156/* keep compiler quiet - no extra ; */ \
157extern int no_such_variable

Typedef Documentation

◆ StatEntry

Function Documentation

◆ add_pos()

static int32 add_pos ( TSVector  src,
WordEntry srcptr,
TSVector  dest,
WordEntry destptr,
int32  maxpos 
)
static

Definition at line 364 of file tsvector_op.c.

367{
368 uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
369 int i;
371 startlen;
373 *dpos = POSDATAPTR(dest, destptr);
374
375 if (!destptr->haspos)
376 *clen = 0;
377
378 startlen = *clen;
379 for (i = 0;
380 i < slen && *clen < MAXNUMPOS &&
381 (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
382 i++)
383 {
386 (*clen)++;
387 }
388
389 if (*clen != startlen)
390 destptr->haspos = 1;
391 return *clen - startlen;
392}

References _POSVECPTR, fb(), i, LIMITPOS, MAXENTRYPOS, MAXNUMPOS, POSDATALEN, POSDATAPTR, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, and WEP_SETWEIGHT.

Referenced by tsvector_concat().

◆ array_to_tsvector()

Datum array_to_tsvector ( PG_FUNCTION_ARGS  )

Definition at line 747 of file tsvector_op.c.

748{
753 bool *nulls;
754 int nitems,
755 i,
756 tslen,
757 datalen = 0;
758 char *cur;
759
761
762 /*
763 * Reject nulls and zero length strings (maybe we should just ignore them,
764 * instead?)
765 */
766 for (i = 0; i < nitems; i++)
767 {
768 if (nulls[i])
771 errmsg("lexeme array may not contain nulls")));
772
776 errmsg("lexeme array may not contain empty strings")));
777 }
778
779 /* Sort and de-dup, because this is required for a valid tsvector. */
780 if (nitems > 1)
781 {
783 nitems = qunique(dlexemes, nitems, sizeof(Datum),
785 }
786
787 /* Calculate space needed for surviving lexemes. */
788 for (i = 0; i < nitems; i++)
789 datalen += VARSIZE(DatumGetPointer(dlexemes[i])) - VARHDRSZ;
790 tslen = CALCDATASIZE(nitems, datalen);
791
792 /* Allocate and fill tsvector. */
795 tsout->size = nitems;
796
798 cur = STRPTR(tsout);
799 for (i = 0; i < nitems; i++)
800 {
801 char *lex = VARDATA(DatumGetPointer(dlexemes[i]));
803
804 memcpy(cur, lex, lex_len);
805 arrout[i].haspos = 0;
806 arrout[i].len = lex_len;
807 arrout[i].pos = cur - STRPTR(tsout);
808 cur += lex_len;
809 }
810
811 PG_FREE_IF_COPY(v, 0);
813}

References ARRPTR, CALCDATASIZE, compare_text_lexemes(), cur, DatumGetPointer(), deconstruct_array_builtin(), ereport, errcode(), errmsg(), ERROR, fb(), i, nitems, palloc0(), PG_FREE_IF_COPY, PG_GETARG_ARRAYTYPE_P, PG_RETURN_POINTER, qsort, qunique(), SET_VARSIZE(), STRPTR, VARDATA(), VARHDRSZ, and VARSIZE().

◆ check_weight()

static int check_weight ( TSVector  txt,
WordEntry wptr,
int8  weight 
)
static

Definition at line 2295 of file tsvector_op.c.

2296{
2297 int len = POSDATALEN(txt, wptr);
2298 int num = 0;
2300
2301 while (len--)
2302 {
2303 if (weight & (1 << WEP_GETWEIGHT(*ptr)))
2304 num++;
2305 ptr++;
2306 }
2307 return num;
2308}

References fb(), len, POSDATALEN, POSDATAPTR, and WEP_GETWEIGHT.

Referenced by insertStatEntry().

◆ checkclass_str()

static TSTernaryValue checkclass_str ( CHKVAL chkval,
WordEntry entry,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 1189 of file tsvector_op.c.

1191{
1192 TSTernaryValue result = TS_NO;
1193
1194 Assert(data == NULL || data->npos == 0);
1195
1196 if (entry->haspos)
1197 {
1199
1200 /*
1201 * We can't use the _POSVECPTR macro here because the pointer to the
1202 * tsvector's lexeme storage is already contained in chkval->values.
1203 */
1205 (chkval->values + SHORTALIGN(entry->pos + entry->len));
1206
1207 if (val->weight && data)
1208 {
1211
1212 /*
1213 * Filter position information by weights
1214 */
1215 dptr = data->pos = palloc_array(WordEntryPos, posvec->npos);
1216 data->allocated = true;
1217
1218 /* Is there a position with a matching weight? */
1219 while (posvec_iter < posvec->pos + posvec->npos)
1220 {
1221 /* If true, append this position to the data->pos */
1222 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1223 {
1225 dptr++;
1226 }
1227
1228 posvec_iter++;
1229 }
1230
1231 data->npos = dptr - data->pos;
1232
1233 if (data->npos > 0)
1234 result = TS_YES;
1235 else
1236 {
1237 pfree(data->pos);
1238 data->pos = NULL;
1239 data->allocated = false;
1240 }
1241 }
1242 else if (val->weight)
1243 {
1245
1246 /* Is there a position with a matching weight? */
1247 while (posvec_iter < posvec->pos + posvec->npos)
1248 {
1249 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
1250 {
1251 result = TS_YES;
1252 break; /* no need to go further */
1253 }
1254
1255 posvec_iter++;
1256 }
1257 }
1258 else if (data)
1259 {
1260 data->npos = posvec->npos;
1261 data->pos = posvec->pos;
1262 data->allocated = false;
1263 result = TS_YES;
1264 }
1265 else
1266 {
1267 /* simplest case: no weight check, positions not needed */
1268 result = TS_YES;
1269 }
1270 }
1271 else
1272 {
1273 /*
1274 * Position info is lacking, so if the caller requires it, we can only
1275 * say that maybe there is a match.
1276 *
1277 * Notice, however, that we *don't* check val->weight here.
1278 * Historically, stripped tsvectors are considered to match queries
1279 * whether or not the query has a weight restriction; that's a little
1280 * dubious but we'll preserve the behavior.
1281 */
1282 if (data)
1283 result = TS_MAYBE;
1284 else
1285 result = TS_YES;
1286 }
1287
1288 return result;
1289}

References Assert, data, fb(), WordEntry::haspos, WordEntry::len, palloc_array, pfree(), WordEntry::pos, WordEntryPosVector::pos, SHORTALIGN, TS_MAYBE, TS_NO, TS_YES, val, WEP_GETPOS, and WEP_GETWEIGHT.

Referenced by checkcondition_str().

◆ checkcondition_str()

static TSTernaryValue checkcondition_str ( void checkval,
QueryOperand val,
ExecPhraseData data 
)
static

Definition at line 1295 of file tsvector_op.c.

1296{
1298 WordEntry *StopLow = chkval->arrb;
1299 WordEntry *StopHigh = chkval->arre;
1301 TSTernaryValue res = TS_NO;
1302
1303 /* Loop invariant: StopLow <= val < StopHigh */
1304 while (StopLow < StopHigh)
1305 {
1306 int difference;
1307
1308 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
1309 difference = tsCompareString(chkval->operand + val->distance,
1310 val->length,
1311 chkval->values + StopMiddle->pos,
1312 StopMiddle->len,
1313 false);
1314
1315 if (difference == 0)
1316 {
1317 /* Check weight info & fill 'data' with positions */
1319 break;
1320 }
1321 else if (difference > 0)
1322 StopLow = StopMiddle + 1;
1323 else
1325 }
1326
1327 /*
1328 * If it's a prefix search, we should also consider lexemes that the
1329 * search term is a prefix of (which will necessarily immediately follow
1330 * the place we found in the above loop). But we can skip them if there
1331 * was a definite match on the exact term AND the caller doesn't need
1332 * position info.
1333 */
1334 if (val->prefix && (res != TS_YES || data))
1335 {
1337 int npos = 0,
1338 totalpos = 0;
1339
1340 /* adjust start position for corner case */
1341 if (StopLow >= StopHigh)
1343
1344 /* we don't try to re-use any data from the initial match */
1345 if (data)
1346 {
1347 if (data->allocated)
1348 pfree(data->pos);
1349 data->pos = NULL;
1350 data->allocated = false;
1351 data->npos = 0;
1352 }
1353 res = TS_NO;
1354
1355 while ((res != TS_YES || data) &&
1357 tsCompareString(chkval->operand + val->distance,
1358 val->length,
1359 chkval->values + StopMiddle->pos,
1360 StopMiddle->len,
1361 true) == 0)
1362 {
1364
1366
1367 if (subres != TS_NO)
1368 {
1369 if (data)
1370 {
1371 /*
1372 * We need to join position information
1373 */
1374 if (subres == TS_MAYBE)
1375 {
1376 /*
1377 * No position info for this match, so we must report
1378 * MAYBE overall.
1379 */
1380 res = TS_MAYBE;
1381 /* forget any previous positions */
1382 npos = 0;
1383 /* don't leak storage */
1384 if (allpos)
1385 pfree(allpos);
1386 break;
1387 }
1388
1389 while (npos + data->npos > totalpos)
1390 {
1391 if (totalpos == 0)
1392 {
1393 totalpos = 256;
1395 }
1396 else
1397 {
1398 totalpos *= 2;
1400 }
1401 }
1402
1403 memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1404 npos += data->npos;
1405
1406 /* don't leak storage from individual matches */
1407 if (data->allocated)
1408 pfree(data->pos);
1409 data->pos = NULL;
1410 data->allocated = false;
1411 /* it's important to reset data->npos before next loop */
1412 data->npos = 0;
1413 }
1414 else
1415 {
1416 /* Don't need positions, just handle YES/MAYBE */
1417 if (subres == TS_YES || res == TS_NO)
1418 res = subres;
1419 }
1420 }
1421
1422 StopMiddle++;
1423 }
1424
1425 if (data && npos > 0)
1426 {
1427 /* Sort and make unique array of found positions */
1428 data->pos = allpos;
1429 qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
1430 data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
1432 data->allocated = true;
1433 res = TS_YES;
1434 }
1435 }
1436
1437 return res;
1438}

References checkclass_str(), compareWordEntryPos(), data, difference(), fb(), palloc_array, pfree(), qsort, qunique(), repalloc_array, TS_MAYBE, TS_NO, TS_YES, tsCompareString(), and val.

Referenced by ts_match_vq().

◆ chooseNextStatEntry()

static void chooseNextStatEntry ( MemoryContext  persistentContext,
TSVectorStat stat,
TSVector  txt,
uint32  low,
uint32  high,
uint32  offset 
)
static

Definition at line 2381 of file tsvector_op.c.

2383{
2384 uint32 pos;
2385 uint32 middle = (low + high) >> 1;
2386
2387 pos = (low + middle) >> 1;
2388 if (low != middle && pos >= offset && pos - offset < txt->size)
2389 insertStatEntry(persistentContext, stat, txt, pos - offset);
2390 pos = (high + middle + 1) >> 1;
2391 if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
2392 insertStatEntry(persistentContext, stat, txt, pos - offset);
2393
2394 if (low != middle)
2396 if (high != middle + 1)
2397 chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
2398}

References chooseNextStatEntry(), fb(), and insertStatEntry().

Referenced by chooseNextStatEntry(), and ts_accum().

◆ compare_int()

static int compare_int ( const void va,
const void vb 
)
static

Definition at line 433 of file tsvector_op.c.

434{
435 int a = *((const int *) va);
436 int b = *((const int *) vb);
437
438 return pg_cmp_s32(a, b);
439}

References a, b, fb(), and pg_cmp_s32().

Referenced by tsvector_delete_by_indices().

◆ compare_text_lexemes()

static int compare_text_lexemes ( const void va,
const void vb 
)
static

Definition at line 442 of file tsvector_op.c.

443{
444 Datum a = *((const Datum *) va);
445 Datum b = *((const Datum *) vb);
450
451 return tsCompareString(alex, alex_len, blex, blex_len, false);
452}

References a, b, DatumGetPointer(), fb(), tsCompareString(), VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by array_to_tsvector().

◆ insertStatEntry()

static void insertStatEntry ( MemoryContext  persistentContext,
TSVectorStat stat,
TSVector  txt,
uint32  off 
)
static

Definition at line 2316 of file tsvector_op.c.

2317{
2318 WordEntry *we = ARRPTR(txt) + off;
2319 StatEntry *node = stat->root,
2320 *pnode = NULL;
2321 int n,
2322 res = 0;
2323 uint32 depth = 1;
2324
2325 if (stat->weight == 0)
2326 n = (we->haspos) ? POSDATALEN(txt, we) : 1;
2327 else
2328 n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
2329
2330 if (n == 0)
2331 return; /* nothing to insert */
2332
2333 while (node)
2334 {
2335 res = compareStatWord(node, we, txt);
2336
2337 if (res == 0)
2338 {
2339 break;
2340 }
2341 else
2342 {
2343 pnode = node;
2344 node = (res < 0) ? node->left : node->right;
2345 }
2346 depth++;
2347 }
2348
2349 if (depth > stat->maxdepth)
2350 stat->maxdepth = depth;
2351
2352 if (node == NULL)
2353 {
2355 node->left = node->right = NULL;
2356 node->ndoc = 1;
2357 node->nentry = n;
2358 node->lenlexeme = we->len;
2359 memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2360
2361 if (pnode == NULL)
2362 {
2363 stat->root = node;
2364 }
2365 else
2366 {
2367 if (res < 0)
2368 pnode->left = node;
2369 else
2370 pnode->right = node;
2371 }
2372 }
2373 else
2374 {
2375 node->ndoc++;
2376 node->nentry += n;
2377 }
2378}

References ARRPTR, check_weight(), compareStatWord, fb(), StatEntry::left, StatEntry::lenlexeme, StatEntry::lexeme, MemoryContextAlloc(), StatEntry::ndoc, StatEntry::nentry, POSDATALEN, StatEntry::right, STATENTRYHDRSZ, and STRPTR.

Referenced by chooseNextStatEntry(), and ts_accum().

◆ silly_cmp_tsvector()

static int silly_cmp_tsvector ( const TSVectorData a,
const TSVectorData b 
)
static

Definition at line 86 of file tsvector_op.c.

87{
88 if (VARSIZE(a) < VARSIZE(b))
89 return -1;
90 else if (VARSIZE(a) > VARSIZE(b))
91 return 1;
92 else if (a->size < b->size)
93 return -1;
94 else if (a->size > b->size)
95 return 1;
96 else
97 {
98 const WordEntry *aptr = ARRPTR(a);
99 const WordEntry *bptr = ARRPTR(b);
100 int i = 0;
101 int res;
102
103
104 for (i = 0; i < a->size; i++)
105 {
106 if (aptr->haspos != bptr->haspos)
107 {
108 return (aptr->haspos > bptr->haspos) ? -1 : 1;
109 }
110 else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
111 {
112 return res;
113 }
114 else if (aptr->haspos)
115 {
118 int j;
119
120 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
121 return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
122
123 for (j = 0; j < POSDATALEN(a, aptr); j++)
124 {
125 if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
126 {
127 return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
128 }
129 else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
130 {
131 return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
132 }
133 ap++, bp++;
134 }
135 }
136
137 aptr++;
138 bptr++;
139 }
140 }
141
142 return 0;
143}

References a, ARRPTR, b, fb(), i, j, POSDATALEN, POSDATAPTR, STRPTR, tsCompareString(), VARSIZE(), WEP_GETPOS, and WEP_GETWEIGHT.

◆ ts_accum()

static TSVectorStat * ts_accum ( MemoryContext  persistentContext,
TSVectorStat stat,
Datum  data 
)
static

Definition at line 2413 of file tsvector_op.c.

2414{
2416 uint32 i,
2417 nbit = 0,
2418 offset;
2419
2420 if (stat == NULL)
2421 { /* Init in first */
2423 stat->maxdepth = 1;
2424 }
2425
2426 /* simple check of correctness */
2427 if (txt == NULL || txt->size == 0)
2428 {
2429 if (txt && txt != (TSVector) DatumGetPointer(data))
2430 pfree(txt);
2431 return stat;
2432 }
2433
2434 i = txt->size - 1;
2435 for (; i > 0; i >>= 1)
2436 nbit++;
2437
2438 nbit = 1 << nbit;
2439 offset = (nbit - txt->size) / 2;
2440
2441 insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
2443
2444 return stat;
2445}

References chooseNextStatEntry(), data, DatumGetPointer(), DatumGetTSVector(), fb(), i, insertStatEntry(), MemoryContextAllocZero(), pfree(), and stat.

Referenced by ts_stat_sql().

◆ TS_execute()

bool TS_execute ( QueryItem curitem,
void arg,
uint32  flags,
TSExecuteCallback  chkcond 
)

Definition at line 1854 of file tsvector_op.c.

1856{
1857 /*
1858 * If we get TS_MAYBE from the recursion, return true. We could only see
1859 * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
1860 * need to check again.
1861 */
1862 return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
1863}

References arg, fb(), TS_execute_recurse(), and TS_NO.

Referenced by Cover(), gtsvector_consistent(), hlCover(), and ts_match_vq().

◆ TS_execute_locations()

List * TS_execute_locations ( QueryItem curitem,
void arg,
uint32  flags,
TSExecuteCallback  chkcond 
)

Definition at line 2007 of file tsvector_op.c.

2010{
2011 List *result;
2012
2013 /* No flags supported, as yet */
2014 Assert(flags == TS_EXEC_EMPTY);
2015 if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
2016 return result;
2017 return NIL;
2018}

References arg, Assert, fb(), NIL, TS_EXEC_EMPTY, and TS_execute_locations_recurse().

Referenced by prsd_headline().

◆ TS_execute_locations_recurse()

static bool TS_execute_locations_recurse ( QueryItem curitem,
void arg,
TSExecuteCallback  chkcond,
List **  locations 
)
static

Definition at line 2025 of file tsvector_op.c.

2028{
2029 bool lmatch,
2030 rmatch;
2032 *rlocations;
2034
2035 /* since this function recurses, it could be driven to stack overflow */
2037
2038 /* ... and let's check for query cancel while we're at it */
2040
2041 /* Default locations result is empty */
2042 *locations = NIL;
2043
2044 if (curitem->type == QI_VAL)
2045 {
2047 if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
2048 {
2050 return true;
2051 }
2052 pfree(data);
2053 return false;
2054 }
2055
2056 switch (curitem->qoperator.oper)
2057 {
2058 case OP_NOT:
2059 if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
2060 &llocations))
2061 return true; /* we don't pass back any locations */
2062 return false;
2063
2064 case OP_AND:
2065 if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
2066 arg, chkcond,
2067 &llocations))
2068 return false;
2069 if (!TS_execute_locations_recurse(curitem + 1,
2070 arg, chkcond,
2071 &rlocations))
2072 return false;
2074 return true;
2075
2076 case OP_OR:
2078 arg, chkcond,
2079 &llocations);
2081 arg, chkcond,
2082 &rlocations);
2083 if (lmatch || rmatch)
2084 {
2085 /*
2086 * We generate an AND'able location struct from each
2087 * combination of sub-matches, following the disjunctive law
2088 * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
2089 *
2090 * However, if either input didn't produce locations (i.e., it
2091 * failed or was a NOT), we must just return the other list.
2092 */
2093 if (llocations == NIL)
2095 else if (rlocations == NIL)
2097 else
2098 {
2099 ListCell *ll;
2100
2101 foreach(ll, llocations)
2102 {
2104 ListCell *lr;
2105
2106 foreach(lr, rlocations)
2107 {
2109
2113 0, 0,
2114 ldata->npos + rdata->npos);
2115 /* Report the larger width, as explained above. */
2116 data->width = Max(ldata->width, rdata->width);
2118 }
2119 }
2120 }
2121
2122 return true;
2123 }
2124 return false;
2125
2126 case OP_PHRASE:
2127 /* We can hand this off to TS_phrase_execute */
2130 data) == TS_YES)
2131 {
2132 if (!data->negate)
2134 return true;
2135 }
2136 pfree(data);
2137 return false;
2138
2139 default:
2140 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2141 }
2142
2143 /* not reachable, but keep compiler quiet */
2144 return false;
2145}

References arg, CHECK_FOR_INTERRUPTS, check_stack_depth(), data, elog, ERROR, fb(), lappend(), QueryOperator::left, lfirst, list_concat(), list_make1, Max, NIL, OP_AND, OP_NOT, OP_OR, OP_PHRASE, QueryOperator::oper, palloc0_object, pfree(), QI_VAL, QueryItem::qoperator, TS_EXEC_EMPTY, TS_execute_locations_recurse(), TS_phrase_execute(), TS_phrase_output(), TS_YES, TSPO_BOTH, TSPO_L_ONLY, TSPO_R_ONLY, and QueryItem::type.

Referenced by TS_execute_locations(), and TS_execute_locations_recurse().

◆ TS_execute_recurse()

static TSTernaryValue TS_execute_recurse ( QueryItem curitem,
void arg,
uint32  flags,
TSExecuteCallback  chkcond 
)
static

Definition at line 1883 of file tsvector_op.c.

1885{
1887
1888 /* since this function recurses, it could be driven to stack overflow */
1890
1891 /* ... and let's check for query cancel while we're at it */
1893
1894 if (curitem->type == QI_VAL)
1895 return chkcond(arg, (QueryOperand *) curitem,
1896 NULL /* don't need position info */ );
1897
1898 switch (curitem->qoperator.oper)
1899 {
1900 case OP_NOT:
1901 if (flags & TS_EXEC_SKIP_NOT)
1902 return TS_YES;
1903 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1904 {
1905 case TS_NO:
1906 return TS_YES;
1907 case TS_YES:
1908 return TS_NO;
1909 case TS_MAYBE:
1910 return TS_MAYBE;
1911 }
1912 break;
1913
1914 case OP_AND:
1915 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1916 flags, chkcond);
1917 if (lmatch == TS_NO)
1918 return TS_NO;
1919 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1920 {
1921 case TS_NO:
1922 return TS_NO;
1923 case TS_YES:
1924 return lmatch;
1925 case TS_MAYBE:
1926 return TS_MAYBE;
1927 }
1928 break;
1929
1930 case OP_OR:
1931 lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
1932 flags, chkcond);
1933 if (lmatch == TS_YES)
1934 return TS_YES;
1935 switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
1936 {
1937 case TS_NO:
1938 return lmatch;
1939 case TS_YES:
1940 return TS_YES;
1941 case TS_MAYBE:
1942 return TS_MAYBE;
1943 }
1944 break;
1945
1946 case OP_PHRASE:
1947
1948 /*
1949 * If we get a MAYBE result, and the caller doesn't want that,
1950 * convert it to NO. It would be more consistent, perhaps, to
1951 * return the result of TS_phrase_execute() verbatim and then
1952 * convert MAYBE results at the top of the recursion. But
1953 * converting at the topmost phrase operator gives results that
1954 * are bug-compatible with the old implementation, so do it like
1955 * this for now.
1956 */
1957 switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
1958 {
1959 case TS_NO:
1960 return TS_NO;
1961 case TS_YES:
1962 return TS_YES;
1963 case TS_MAYBE:
1964 return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
1965 }
1966 break;
1967
1968 default:
1969 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1970 }
1971
1972 /* not reachable, but keep compiler quiet */
1973 return TS_NO;
1974}

References arg, CHECK_FOR_INTERRUPTS, check_stack_depth(), elog, ERROR, fb(), QueryOperator::left, OP_AND, OP_NOT, OP_OR, OP_PHRASE, QueryOperator::oper, QI_VAL, QueryItem::qoperator, TS_EXEC_PHRASE_NO_POS, TS_EXEC_SKIP_NOT, TS_execute_recurse(), TS_MAYBE, TS_NO, TS_phrase_execute(), TS_YES, and QueryItem::type.

Referenced by TS_execute(), TS_execute_recurse(), and TS_execute_ternary().

◆ TS_execute_ternary()

TSTernaryValue TS_execute_ternary ( QueryItem curitem,
void arg,
uint32  flags,
TSExecuteCallback  chkcond 
)

Definition at line 1871 of file tsvector_op.c.

1873{
1874 return TS_execute_recurse(curitem, arg, flags, chkcond);
1875}

References arg, fb(), and TS_execute_recurse().

Referenced by gin_tsquery_consistent(), and gin_tsquery_triconsistent().

◆ ts_match_qv()

Datum ts_match_qv ( PG_FUNCTION_ARGS  )

◆ ts_match_tq()

Datum ts_match_tq ( PG_FUNCTION_ARGS  )

◆ ts_match_tt()

◆ ts_match_vq()

Datum ts_match_vq ( PG_FUNCTION_ARGS  )

Definition at line 2214 of file tsvector_op.c.

2215{
2217 TSQuery query = PG_GETARG_TSQUERY(1);
2218 CHKVAL chkval;
2219 bool result;
2220
2221 /* empty query matches nothing */
2222 if (!query->size)
2223 {
2224 PG_FREE_IF_COPY(val, 0);
2225 PG_FREE_IF_COPY(query, 1);
2226 PG_RETURN_BOOL(false);
2227 }
2228
2229 chkval.arrb = ARRPTR(val);
2230 chkval.arre = chkval.arrb + val->size;
2231 chkval.values = STRPTR(val);
2232 chkval.operand = GETOPERAND(query);
2233 result = TS_execute(GETQUERY(query),
2234 &chkval,
2237
2238 PG_FREE_IF_COPY(val, 0);
2239 PG_FREE_IF_COPY(query, 1);
2240 PG_RETURN_BOOL(result);
2241}

References ARRPTR, checkcondition_str(), fb(), GETOPERAND, GETQUERY, PG_FREE_IF_COPY, PG_GETARG_TSQUERY, PG_GETARG_TSVECTOR, PG_RETURN_BOOL, TSQueryData::size, STRPTR, TS_EXEC_EMPTY, TS_execute(), and val.

Referenced by ts_match_qv(), ts_match_tq(), and ts_match_tt().

◆ TS_phrase_execute()

static TSTernaryValue TS_phrase_execute ( QueryItem curitem,
void arg,
uint32  flags,
TSExecuteCallback  chkcond,
ExecPhraseData data 
)
static

Definition at line 1609 of file tsvector_op.c.

1612{
1614 Rdata;
1616 rmatch;
1617 int Loffset,
1618 Roffset,
1619 maxwidth;
1620
1621 /* since this function recurses, it could be driven to stack overflow */
1623
1624 /* ... and let's check for query cancel while we're at it */
1626
1627 if (curitem->type == QI_VAL)
1628 return chkcond(arg, (QueryOperand *) curitem, data);
1629
1630 switch (curitem->qoperator.oper)
1631 {
1632 case OP_NOT:
1633
1634 /*
1635 * We need not touch data->width, since a NOT operation does not
1636 * change the match width.
1637 */
1638 if (flags & TS_EXEC_SKIP_NOT)
1639 {
1640 /* with SKIP_NOT, report NOT as "match everywhere" */
1641 Assert(data->npos == 0 && !data->negate);
1642 data->negate = true;
1643 return TS_YES;
1644 }
1645 switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
1646 {
1647 case TS_NO:
1648 /* change "match nowhere" to "match everywhere" */
1649 Assert(data->npos == 0 && !data->negate);
1650 data->negate = true;
1651 return TS_YES;
1652 case TS_YES:
1653 if (data->npos > 0)
1654 {
1655 /* we have some positions, invert negate flag */
1656 data->negate = !data->negate;
1657 return TS_YES;
1658 }
1659 else if (data->negate)
1660 {
1661 /* change "match everywhere" to "match nowhere" */
1662 data->negate = false;
1663 return TS_NO;
1664 }
1665 /* Should not get here if result was TS_YES */
1666 Assert(false);
1667 break;
1668 case TS_MAYBE:
1669 /* match positions are, and remain, uncertain */
1670 return TS_MAYBE;
1671 }
1672 break;
1673
1674 case OP_PHRASE:
1675 case OP_AND:
1676 memset(&Ldata, 0, sizeof(Ldata));
1677 memset(&Rdata, 0, sizeof(Rdata));
1678
1679 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1680 arg, flags, chkcond, &Ldata);
1681 if (lmatch == TS_NO)
1682 return TS_NO;
1683
1684 rmatch = TS_phrase_execute(curitem + 1,
1685 arg, flags, chkcond, &Rdata);
1686 if (rmatch == TS_NO)
1687 return TS_NO;
1688
1689 /*
1690 * If either operand has no position information, then we can't
1691 * return reliable position data, only a MAYBE result.
1692 */
1693 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1694 return TS_MAYBE;
1695
1696 if (curitem->qoperator.oper == OP_PHRASE)
1697 {
1698 /*
1699 * Compute Loffset and Roffset suitable for phrase match, and
1700 * compute overall width of whole phrase match.
1701 */
1702 Loffset = curitem->qoperator.distance + Rdata.width;
1703 Roffset = 0;
1704 if (data)
1705 data->width = curitem->qoperator.distance +
1706 Ldata.width + Rdata.width;
1707 }
1708 else
1709 {
1710 /*
1711 * For OP_AND, set output width and alignment like OP_OR (see
1712 * comment below)
1713 */
1714 maxwidth = Max(Ldata.width, Rdata.width);
1715 Loffset = maxwidth - Ldata.width;
1716 Roffset = maxwidth - Rdata.width;
1717 if (data)
1718 data->width = maxwidth;
1719 }
1720
1721 if (Ldata.negate && Rdata.negate)
1722 {
1723 /* !L & !R: treat as !(L | R) */
1727 Ldata.npos + Rdata.npos);
1728 if (data)
1729 data->negate = true;
1730 return TS_YES;
1731 }
1732 else if (Ldata.negate)
1733 {
1734 /* !L & R */
1735 return TS_phrase_output(data, &Ldata, &Rdata,
1738 Rdata.npos);
1739 }
1740 else if (Rdata.negate)
1741 {
1742 /* L & !R */
1743 return TS_phrase_output(data, &Ldata, &Rdata,
1746 Ldata.npos);
1747 }
1748 else
1749 {
1750 /* straight AND */
1751 return TS_phrase_output(data, &Ldata, &Rdata,
1752 TSPO_BOTH,
1754 Min(Ldata.npos, Rdata.npos));
1755 }
1756
1757 case OP_OR:
1758 memset(&Ldata, 0, sizeof(Ldata));
1759 memset(&Rdata, 0, sizeof(Rdata));
1760
1761 lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1762 arg, flags, chkcond, &Ldata);
1763 rmatch = TS_phrase_execute(curitem + 1,
1764 arg, flags, chkcond, &Rdata);
1765
1766 if (lmatch == TS_NO && rmatch == TS_NO)
1767 return TS_NO;
1768
1769 /*
1770 * If either operand has no position information, then we can't
1771 * return reliable position data, only a MAYBE result.
1772 */
1773 if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
1774 return TS_MAYBE;
1775
1776 /*
1777 * Cope with undefined output width from failed submatch. (This
1778 * takes less code than trying to ensure that all failure returns
1779 * set data->width to zero.)
1780 */
1781 if (lmatch == TS_NO)
1782 Ldata.width = 0;
1783 if (rmatch == TS_NO)
1784 Rdata.width = 0;
1785
1786 /*
1787 * For OP_AND and OP_OR, report the width of the wider of the two
1788 * inputs, and align the narrower input's positions to the right
1789 * end of that width. This rule deals at least somewhat
1790 * reasonably with cases like "x <-> (y | z <-> q)".
1791 */
1792 maxwidth = Max(Ldata.width, Rdata.width);
1793 Loffset = maxwidth - Ldata.width;
1794 Roffset = maxwidth - Rdata.width;
1795 data->width = maxwidth;
1796
1797 if (Ldata.negate && Rdata.negate)
1798 {
1799 /* !L | !R: treat as !(L & R) */
1801 TSPO_BOTH,
1803 Min(Ldata.npos, Rdata.npos));
1804 data->negate = true;
1805 return TS_YES;
1806 }
1807 else if (Ldata.negate)
1808 {
1809 /* !L | R: treat as !(L & !R) */
1813 Ldata.npos);
1814 data->negate = true;
1815 return TS_YES;
1816 }
1817 else if (Rdata.negate)
1818 {
1819 /* L | !R: treat as !(!L & R) */
1823 Rdata.npos);
1824 data->negate = true;
1825 return TS_YES;
1826 }
1827 else
1828 {
1829 /* straight OR */
1830 return TS_phrase_output(data, &Ldata, &Rdata,
1833 Ldata.npos + Rdata.npos);
1834 }
1835
1836 default:
1837 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1838 }
1839
1840 /* not reachable, but keep compiler quiet */
1841 return TS_NO;
1842}

References arg, Assert, CHECK_FOR_INTERRUPTS, check_stack_depth(), data, QueryOperator::distance, elog, ERROR, fb(), QueryOperator::left, Max, Min, OP_AND, OP_NOT, OP_OR, OP_PHRASE, QueryOperator::oper, QI_VAL, QueryItem::qoperator, TS_EXEC_SKIP_NOT, TS_MAYBE, TS_NO, TS_phrase_execute(), TS_phrase_output(), TS_YES, TSPO_BOTH, TSPO_L_ONLY, TSPO_R_ONLY, and QueryItem::type.

Referenced by TS_execute_locations_recurse(), TS_execute_recurse(), and TS_phrase_execute().

◆ TS_phrase_output()

static TSTernaryValue TS_phrase_output ( ExecPhraseData data,
ExecPhraseData Ldata,
ExecPhraseData Rdata,
int  emit,
int  Loffset,
int  Roffset,
int  max_npos 
)
static

Definition at line 1468 of file tsvector_op.c.

1475{
1476 int Lindex,
1477 Rindex;
1478
1479 /* Loop until both inputs are exhausted */
1480 Lindex = Rindex = 0;
1481 while (Lindex < Ldata->npos || Rindex < Rdata->npos)
1482 {
1483 int Lpos,
1484 Rpos;
1485 int output_pos = 0;
1486
1487 /*
1488 * Fetch current values to compare. WEP_GETPOS() is needed because
1489 * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1490 */
1491 if (Lindex < Ldata->npos)
1492 Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1493 else
1494 {
1495 /* L array exhausted, so we're done if R_ONLY isn't set */
1496 if (!(emit & TSPO_R_ONLY))
1497 break;
1498 Lpos = INT_MAX;
1499 }
1500 if (Rindex < Rdata->npos)
1501 Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1502 else
1503 {
1504 /* R array exhausted, so we're done if L_ONLY isn't set */
1505 if (!(emit & TSPO_L_ONLY))
1506 break;
1507 Rpos = INT_MAX;
1508 }
1509
1510 /* Merge-join the two input lists */
1511 if (Lpos < Rpos)
1512 {
1513 /* Lpos is not matched in Rdata, should we output it? */
1514 if (emit & TSPO_L_ONLY)
1515 output_pos = Lpos;
1516 Lindex++;
1517 }
1518 else if (Lpos == Rpos)
1519 {
1520 /* Lpos and Rpos match ... should we output it? */
1521 if (emit & TSPO_BOTH)
1522 output_pos = Rpos;
1523 Lindex++;
1524 Rindex++;
1525 }
1526 else /* Lpos > Rpos */
1527 {
1528 /* Rpos is not matched in Ldata, should we output it? */
1529 if (emit & TSPO_R_ONLY)
1530 output_pos = Rpos;
1531 Rindex++;
1532 }
1533
1534 if (output_pos > 0)
1535 {
1536 if (data)
1537 {
1538 /* Store position, first allocating output array if needed */
1539 if (data->pos == NULL)
1540 {
1541 data->pos = (WordEntryPos *)
1542 palloc(max_npos * sizeof(WordEntryPos));
1543 data->allocated = true;
1544 }
1545 data->pos[data->npos++] = output_pos;
1546 }
1547 else
1548 {
1549 /*
1550 * Exact positions not needed, so return TS_YES as soon as we
1551 * know there is at least one.
1552 */
1553 return TS_YES;
1554 }
1555 }
1556 }
1557
1558 if (data && data->npos > 0)
1559 {
1560 /* Let's assert we didn't overrun the array */
1561 Assert(data->npos <= max_npos);
1562 return TS_YES;
1563 }
1564 return TS_NO;
1565}

References Assert, data, fb(), palloc(), TS_NO, TS_YES, TSPO_BOTH, TSPO_L_ONLY, TSPO_R_ONLY, and WEP_GETPOS.

Referenced by TS_execute_locations_recurse(), and TS_phrase_execute().

◆ ts_process_call()

static Datum ts_process_call ( FuncCallContext funcctx)
static

Definition at line 2535 of file tsvector_op.c.

2536{
2537 TSVectorStat *st;
2538 StatEntry *entry;
2539
2540 st = (TSVectorStat *) funcctx->user_fctx;
2541
2542 entry = walkStatEntryTree(st);
2543
2544 if (entry != NULL)
2545 {
2546 Datum result;
2547 char *values[3];
2548 char ndoc[16];
2549 char nentry[16];
2550 HeapTuple tuple;
2551
2552 values[0] = palloc(entry->lenlexeme + 1);
2553 memcpy(values[0], entry->lexeme, entry->lenlexeme);
2554 (values[0])[entry->lenlexeme] = '\0';
2555 sprintf(ndoc, "%d", entry->ndoc);
2556 values[1] = ndoc;
2557 sprintf(nentry, "%d", entry->nentry);
2558 values[2] = nentry;
2559
2560 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2561 result = HeapTupleGetDatum(tuple);
2562
2563 pfree(values[0]);
2564
2565 /* mark entry as already visited */
2566 entry->ndoc = 0;
2567
2568 return result;
2569 }
2570
2571 return (Datum) 0;
2572}

References BuildTupleFromCStrings(), fb(), HeapTupleGetDatum(), StatEntry::lenlexeme, StatEntry::lexeme, StatEntry::ndoc, StatEntry::nentry, palloc(), pfree(), sprintf, values, and walkStatEntryTree().

Referenced by ts_stat1(), and ts_stat2().

◆ ts_setup_firstcall()

static void ts_setup_firstcall ( FunctionCallInfo  fcinfo,
FuncCallContext funcctx,
TSVectorStat stat 
)
static

Definition at line 2448 of file tsvector_op.c.

2450{
2451 TupleDesc tupdesc;
2452 MemoryContext oldcontext;
2453 StatEntry *node;
2454
2455 funcctx->user_fctx = stat;
2456
2457 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2458
2459 stat->stack = palloc0_array(StatEntry *, stat->maxdepth + 1);
2460 stat->stackpos = 0;
2461
2462 node = stat->root;
2463 /* find leftmost value */
2464 if (node == NULL)
2465 stat->stack[stat->stackpos] = NULL;
2466 else
2467 for (;;)
2468 {
2469 stat->stack[stat->stackpos] = node;
2470 if (node->left)
2471 {
2472 stat->stackpos++;
2473 node = node->left;
2474 }
2475 else
2476 break;
2477 }
2478 Assert(stat->stackpos <= stat->maxdepth);
2479
2480 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2481 elog(ERROR, "return type must be a row type");
2482 funcctx->tuple_desc = tupdesc;
2483 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2484
2485 MemoryContextSwitchTo(oldcontext);
2486}

References Assert, elog, ERROR, fb(), get_call_result_type(), StatEntry::left, MemoryContextSwitchTo(), palloc0_array, stat, TupleDescGetAttInMetadata(), and TYPEFUNC_COMPOSITE.

Referenced by ts_stat1(), and ts_stat2().

◆ ts_stat1()

Datum ts_stat1 ( PG_FUNCTION_ARGS  )

Definition at line 2664 of file tsvector_op.c.

2665{
2667 Datum result;
2668
2669 if (SRF_IS_FIRSTCALL())
2670 {
2673
2675 SPI_connect();
2676 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2677 PG_FREE_IF_COPY(txt, 0);
2679 SPI_finish();
2680 }
2681
2683 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2684 SRF_RETURN_NEXT(funcctx, result);
2686}

References fb(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, SPI_connect(), SPI_finish(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, stat, ts_process_call(), ts_setup_firstcall(), and ts_stat_sql().

◆ ts_stat2()

Datum ts_stat2 ( PG_FUNCTION_ARGS  )

Definition at line 2689 of file tsvector_op.c.

2690{
2692 Datum result;
2693
2694 if (SRF_IS_FIRSTCALL())
2695 {
2699
2701 SPI_connect();
2702 stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2703 PG_FREE_IF_COPY(txt, 0);
2704 PG_FREE_IF_COPY(ws, 1);
2706 SPI_finish();
2707 }
2708
2710 if ((result = ts_process_call(funcctx)) != (Datum) 0)
2711 SRF_RETURN_NEXT(funcctx, result);
2713}

References fb(), PG_FREE_IF_COPY, PG_GETARG_TEXT_PP, SPI_connect(), SPI_finish(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, stat, ts_process_call(), ts_setup_firstcall(), and ts_stat_sql().

◆ ts_stat_sql()

static TSVectorStat * ts_stat_sql ( MemoryContext  persistentContext,
text txt,
text ws 
)
static

Definition at line 2575 of file tsvector_op.c.

2576{
2577 char *query = text_to_cstring(txt);
2579 bool isnull;
2580 Portal portal;
2582
2583 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2584 /* internal error */
2585 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2586
2587 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2588 /* internal error */
2589 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2590
2591 SPI_cursor_fetch(portal, true, 100);
2592
2593 if (SPI_tuptable == NULL ||
2594 SPI_tuptable->tupdesc->natts != 1 ||
2596 TSVECTOROID))
2597 ereport(ERROR,
2599 errmsg("ts_stat query must return one tsvector column")));
2600
2602 stat->maxdepth = 1;
2603
2604 if (ws)
2605 {
2606 char *buf;
2607
2608 buf = VARDATA_ANY(ws);
2609 while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2610 {
2611 if (pg_mblen(buf) == 1)
2612 {
2613 switch (*buf)
2614 {
2615 case 'A':
2616 case 'a':
2617 stat->weight |= 1 << 3;
2618 break;
2619 case 'B':
2620 case 'b':
2621 stat->weight |= 1 << 2;
2622 break;
2623 case 'C':
2624 case 'c':
2625 stat->weight |= 1 << 1;
2626 break;
2627 case 'D':
2628 case 'd':
2629 stat->weight |= 1;
2630 break;
2631 default:
2632 stat->weight |= 0;
2633 }
2634 }
2635 buf += pg_mblen(buf);
2636 }
2637 }
2638
2639 while (SPI_processed > 0)
2640 {
2641 uint64 i;
2642
2643 for (i = 0; i < SPI_processed; i++)
2644 {
2646
2647 if (!isnull)
2649 }
2650
2652 SPI_cursor_fetch(portal, true, 100);
2653 }
2654
2656 SPI_cursor_close(portal);
2658 pfree(query);
2659
2660 return stat;
2661}

References buf, data, elog, ereport, errcode(), errmsg(), ERROR, fb(), i, IsBinaryCoercible(), MemoryContextAllocZero(), TupleDescData::natts, pfree(), pg_mblen(), plan, SPI_cursor_close(), SPI_cursor_fetch(), SPI_cursor_open(), SPI_freeplan(), SPI_freetuptable(), SPI_getbinval(), SPI_gettypeid(), SPI_prepare(), SPI_processed, SPI_tuptable, stat, text_to_cstring(), ts_accum(), SPITupleTable::tupdesc, SPITupleTable::vals, VARDATA_ANY(), and VARSIZE_ANY_EXHDR().

Referenced by ts_stat1(), and ts_stat2().

◆ tsCompareString()

int32 tsCompareString ( char a,
int  lena,
char b,
int  lenb,
bool  prefix 
)

Definition at line 1152 of file tsvector_op.c.

1153{
1154 int cmp;
1155
1156 if (lena == 0)
1157 {
1158 if (prefix)
1159 cmp = 0; /* empty string is prefix of anything */
1160 else
1161 cmp = (lenb > 0) ? -1 : 0;
1162 }
1163 else if (lenb == 0)
1164 {
1165 cmp = (lena > 0) ? 1 : 0;
1166 }
1167 else
1168 {
1169 cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
1170
1171 if (prefix)
1172 {
1173 if (cmp == 0 && lena > lenb)
1174 cmp = 1; /* a is longer, so not a prefix of b */
1175 }
1176 else if (cmp == 0 && lena != lenb)
1177 {
1178 cmp = (lena < lenb) ? -1 : 1;
1179 }
1180 }
1181
1182 return cmp;
1183}

References a, b, cmp(), fb(), and Min.

Referenced by checkcondition_str(), compare_text_lexemes(), compareentry(), compareQueryOperand(), compareWORD(), gin_cmp_prefix(), gin_cmp_tslexeme(), hlfinditem(), QTNodeCompare(), silly_cmp_tsvector(), and tsvector_bsearch().

◆ tsquery_requires_match()

bool tsquery_requires_match ( QueryItem curitem)

Definition at line 2156 of file tsvector_op.c.

2157{
2158 /* since this function recurses, it could be driven to stack overflow */
2160
2161 if (curitem->type == QI_VAL)
2162 return true;
2163
2164 switch (curitem->qoperator.oper)
2165 {
2166 case OP_NOT:
2167
2168 /*
2169 * Assume there are no required matches underneath a NOT. For
2170 * some cases with nested NOTs, we could prove there's a required
2171 * match, but it seems unlikely to be worth the trouble.
2172 */
2173 return false;
2174
2175 case OP_PHRASE:
2176
2177 /*
2178 * Treat OP_PHRASE as OP_AND here
2179 */
2180 case OP_AND:
2181 /* If either side requires a match, we're good */
2182 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2183 return true;
2184 else
2185 return tsquery_requires_match(curitem + 1);
2186
2187 case OP_OR:
2188 /* Both sides must require a match */
2189 if (tsquery_requires_match(curitem + curitem->qoperator.left))
2190 return tsquery_requires_match(curitem + 1);
2191 else
2192 return false;
2193
2194 default:
2195 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
2196 }
2197
2198 /* not reachable, but keep compiler quiet */
2199 return false;
2200}

References check_stack_depth(), elog, ERROR, QueryOperator::left, OP_AND, OP_NOT, OP_OR, OP_PHRASE, QueryOperator::oper, QI_VAL, QueryItem::qoperator, tsquery_requires_match(), and QueryItem::type.

Referenced by gin_extract_tsquery(), and tsquery_requires_match().

◆ tsvector_bsearch()

static int tsvector_bsearch ( const TSVectorData tsv,
char lexeme,
int  lexeme_len 
)
static

Definition at line 400 of file tsvector_op.c.

401{
402 const WordEntry *arrin = ARRPTR(tsv);
403 int StopLow = 0,
404 StopHigh = tsv->size,
406 cmp;
407
408 while (StopLow < StopHigh)
409 {
410 StopMiddle = (StopLow + StopHigh) / 2;
411
413 STRPTR(tsv) + arrin[StopMiddle].pos,
415 false);
416
417 if (cmp < 0)
419 else if (cmp > 0)
420 StopLow = StopMiddle + 1;
421 else /* found it */
422 return StopMiddle;
423 }
424
425 return -1;
426}

References ARRPTR, cmp(), fb(), len, StatEntry::lexeme, STRPTR, and tsCompareString().

Referenced by tsvector_delete_arr(), tsvector_delete_str(), and tsvector_setweight_by_filter().

◆ tsvector_concat()

Datum tsvector_concat ( PG_FUNCTION_ARGS  )

Definition at line 925 of file tsvector_op.c.

926{
929 TSVector out;
930 WordEntry *ptr;
932 *ptr2;
933 WordEntryPos *p;
934 int maxpos = 0,
935 i,
936 j,
937 i1,
938 i2,
939 dataoff,
942 char *data,
943 *data1,
944 *data2;
945
946 /* Get max position in in1; we'll need this to offset in2's positions */
947 ptr = ARRPTR(in1);
948 i = in1->size;
949 while (i--)
950 {
951 if ((j = POSDATALEN(in1, ptr)) != 0)
952 {
953 p = POSDATAPTR(in1, ptr);
954 while (j--)
955 {
956 if (WEP_GETPOS(*p) > maxpos)
957 maxpos = WEP_GETPOS(*p);
958 p++;
959 }
960 }
961 ptr++;
962 }
963
964 ptr1 = ARRPTR(in1);
965 ptr2 = ARRPTR(in2);
966 data1 = STRPTR(in1);
967 data2 = STRPTR(in2);
968 i1 = in1->size;
969 i2 = in2->size;
970
971 /*
972 * Conservative estimate of space needed. We might need all the data in
973 * both inputs, and conceivably add a pad byte before position data for
974 * each item where there was none before.
975 */
976 output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
977
980
981 /*
982 * We must make out->size valid so that STRPTR(out) is sensible. We'll
983 * collapse out any unused space at the end.
984 */
985 out->size = in1->size + in2->size;
986
987 ptr = ARRPTR(out);
988 data = STRPTR(out);
989 dataoff = 0;
990 while (i1 && i2)
991 {
993
994 if (cmp < 0)
995 { /* in1 first */
996 ptr->haspos = ptr1->haspos;
997 ptr->len = ptr1->len;
998 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
999 ptr->pos = dataoff;
1000 dataoff += ptr1->len;
1001 if (ptr->haspos)
1002 {
1004 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1005 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1006 }
1007
1008 ptr++;
1009 ptr1++;
1010 i1--;
1011 }
1012 else if (cmp > 0)
1013 { /* in2 first */
1014 ptr->haspos = ptr2->haspos;
1015 ptr->len = ptr2->len;
1016 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1017 ptr->pos = dataoff;
1018 dataoff += ptr2->len;
1019 if (ptr->haspos)
1020 {
1021 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1022
1023 if (addlen == 0)
1024 ptr->haspos = 0;
1025 else
1026 {
1028 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1029 }
1030 }
1031
1032 ptr++;
1033 ptr2++;
1034 i2--;
1035 }
1036 else
1037 {
1038 ptr->haspos = ptr1->haspos | ptr2->haspos;
1039 ptr->len = ptr1->len;
1040 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1041 ptr->pos = dataoff;
1042 dataoff += ptr1->len;
1043 if (ptr->haspos)
1044 {
1045 if (ptr1->haspos)
1046 {
1048 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1049 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1050 if (ptr2->haspos)
1051 dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1052 }
1053 else /* must have ptr2->haspos */
1054 {
1055 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1056
1057 if (addlen == 0)
1058 ptr->haspos = 0;
1059 else
1060 {
1062 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1063 }
1064 }
1065 }
1066
1067 ptr++;
1068 ptr1++;
1069 ptr2++;
1070 i1--;
1071 i2--;
1072 }
1073 }
1074
1075 while (i1)
1076 {
1077 ptr->haspos = ptr1->haspos;
1078 ptr->len = ptr1->len;
1079 memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1080 ptr->pos = dataoff;
1081 dataoff += ptr1->len;
1082 if (ptr->haspos)
1083 {
1085 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1086 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1087 }
1088
1089 ptr++;
1090 ptr1++;
1091 i1--;
1092 }
1093
1094 while (i2)
1095 {
1096 ptr->haspos = ptr2->haspos;
1097 ptr->len = ptr2->len;
1098 memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1099 ptr->pos = dataoff;
1100 dataoff += ptr2->len;
1101 if (ptr->haspos)
1102 {
1103 int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1104
1105 if (addlen == 0)
1106 ptr->haspos = 0;
1107 else
1108 {
1110 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1111 }
1112 }
1113
1114 ptr++;
1115 ptr2++;
1116 i2--;
1117 }
1118
1119 /*
1120 * Instead of checking each offset individually, we check for overflow of
1121 * pos fields once at the end.
1122 */
1123 if (dataoff > MAXSTRPOS)
1124 ereport(ERROR,
1126 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1127
1128 /*
1129 * Adjust sizes (asserting that we didn't overrun the original estimates)
1130 * and collapse out any unused array entries.
1131 */
1132 output_size = ptr - ARRPTR(out);
1134 out->size = output_size;
1135 if (data != STRPTR(out))
1136 memmove(STRPTR(out), data, dataoff);
1138 Assert(output_bytes <= VARSIZE(out));
1140
1141 PG_FREE_IF_COPY(in1, 0);
1142 PG_FREE_IF_COPY(in2, 1);
1143 PG_RETURN_POINTER(out);
1144}

References _POSVECPTR, add_pos(), ARRPTR, Assert, CALCDATASIZE, cmp(), compareEntry, data, ereport, errcode(), errmsg(), ERROR, fb(), WordEntry::haspos, i, j, WordEntry::len, MAXSTRPOS, palloc0(), PG_FREE_IF_COPY, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, WordEntry::pos, POSDATALEN, POSDATAPTR, SET_VARSIZE(), SHORTALIGN, TSVectorData::size, STRPTR, VARSIZE(), and WEP_GETPOS.

◆ tsvector_delete_arr()

Datum tsvector_delete_arr ( PG_FUNCTION_ARGS  )

Definition at line 578 of file tsvector_op.c.

579{
581 tsout;
583 int i,
584 nlex,
588 bool *nulls;
589
591
592 /*
593 * In typical use case array of lexemes to delete is relatively small. So
594 * here we optimize things for that scenario: iterate through lexarr
595 * performing binary search of each lexeme from lexarr in tsvector.
596 */
597 skip_indices = palloc0(nlex * sizeof(int));
598 for (i = skip_count = 0; i < nlex; i++)
599 {
600 char *lex;
601 int lex_len,
602 lex_pos;
603
604 /* Ignore null array elements, they surely don't match */
605 if (nulls[i])
606 continue;
607
611
612 if (lex_pos >= 0)
614 }
615
617
621
623}

References DatumGetPointer(), deconstruct_array_builtin(), fb(), i, palloc0(), pfree(), PG_FREE_IF_COPY, PG_GETARG_ARRAYTYPE_P, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, tsvector_bsearch(), tsvector_delete_by_indices(), VARDATA(), VARHDRSZ, and VARSIZE().

◆ tsvector_delete_by_indices()

static TSVector tsvector_delete_by_indices ( TSVector  tsv,
int indices_to_delete,
int  indices_count 
)
static

Definition at line 464 of file tsvector_op.c.

466{
469 *arrout;
470 char *data = STRPTR(tsv),
471 *dataout;
472 int i, /* index in arrin */
473 j, /* index in arrout */
474 k, /* index in indices_to_delete */
475 curoff; /* index in dataout area */
476
477 /*
478 * Sort the filter array to simplify membership checks below. Also, get
479 * rid of any duplicate entries, so that we can assume that indices_count
480 * is exactly equal to the number of lexemes that will be removed.
481 */
482 if (indices_count > 1)
483 {
487 }
488
489 /*
490 * Here we overestimate tsout size, since we don't know how much space is
491 * used by the deleted lexeme(s). We will set exact size below.
492 */
494
495 /* This count must be correct because STRPTR(tsout) relies on it. */
496 tsout->size = tsv->size - indices_count;
497
498 /*
499 * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
500 */
503 curoff = 0;
504 for (i = j = k = 0; i < tsv->size; i++)
505 {
506 /*
507 * If current i is present in indices_to_delete, skip this lexeme.
508 * Since indices_to_delete is already sorted, we only need to check
509 * the current (k'th) entry.
510 */
511 if (k < indices_count && i == indices_to_delete[k])
512 {
513 k++;
514 continue;
515 }
516
517 /* Copy lexeme and its positions and weights */
518 memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
519 arrout[j].haspos = arrin[i].haspos;
520 arrout[j].len = arrin[i].len;
521 arrout[j].pos = curoff;
522 curoff += arrin[i].len;
523 if (arrin[i].haspos)
524 {
525 int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
526 + sizeof(uint16);
527
530 STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
531 len);
532 curoff += len;
533 }
534
535 j++;
536 }
537
538 /*
539 * k should now be exactly equal to indices_count. If it isn't then the
540 * caller provided us with indices outside of [0, tsv->size) range and
541 * estimation of tsout's size is wrong.
542 */
543 Assert(k == indices_count);
544
546 return tsout;
547}

References ARRPTR, Assert, CALCDATASIZE, compare_int(), data, fb(), i, j, len, palloc0(), POSDATALEN, qsort, qunique(), SET_VARSIZE(), SHORTALIGN, STRPTR, and VARSIZE().

Referenced by tsvector_delete_arr(), and tsvector_delete_str().

◆ tsvector_delete_str()

◆ tsvector_filter()

Datum tsvector_filter ( PG_FUNCTION_ARGS  )

Definition at line 819 of file tsvector_op.c.

820{
822 tsout;
825 *arrout;
826 char *datain = STRPTR(tsin),
827 *dataout;
829 bool *nulls;
830 int nweights;
831 int i,
832 j;
833 int cur_pos = 0;
834 char mask = 0;
835
837
838 for (i = 0; i < nweights; i++)
839 {
840 char char_weight;
841
842 if (nulls[i])
845 errmsg("weight array may not contain nulls")));
846
848 switch (char_weight)
849 {
850 case 'A':
851 case 'a':
852 mask = mask | 8;
853 break;
854 case 'B':
855 case 'b':
856 mask = mask | 4;
857 break;
858 case 'C':
859 case 'c':
860 mask = mask | 2;
861 break;
862 case 'D':
863 case 'd':
864 mask = mask | 1;
865 break;
866 default:
869 errmsg("unrecognized weight: \"%c\"", char_weight)));
870 }
871 }
872
874 tsout->size = tsin->size;
877
878 for (i = j = 0; i < tsin->size; i++)
879 {
881 *posvout;
882 int npos = 0;
883 int k;
884
885 if (!arrin[i].haspos)
886 continue;
887
891
892 for (k = 0; k < posvin->npos; k++)
893 {
894 if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
895 posvout->pos[npos++] = posvin->pos[k];
896 }
897
898 /* if no satisfactory positions found, skip lexeme */
899 if (!npos)
900 continue;
901
902 arrout[j].haspos = true;
903 arrout[j].len = arrin[i].len;
904 arrout[j].pos = cur_pos;
905
907 posvout->npos = npos;
909 cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
910 sizeof(uint16);
911 j++;
912 }
913
914 tsout->size = j;
915 if (dataout != STRPTR(tsout))
917
919
922}

References _POSVECPTR, ARRPTR, CALCDATASIZE, DatumGetChar(), deconstruct_array_builtin(), ereport, errcode(), errmsg(), ERROR, fb(), i, j, len, palloc0(), PG_FREE_IF_COPY, PG_GETARG_ARRAYTYPE_P, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, POSDATALEN, SET_VARSIZE(), SHORTALIGN, STRPTR, VARSIZE(), and WEP_GETWEIGHT.

◆ tsvector_length()

Datum tsvector_length ( PG_FUNCTION_ARGS  )

Definition at line 201 of file tsvector_op.c.

202{
204 int32 ret = in->size;
205
206 PG_FREE_IF_COPY(in, 0);
207 PG_RETURN_INT32(ret);
208}
#define PG_RETURN_INT32(x)
Definition fmgr.h:355

References PG_FREE_IF_COPY, PG_GETARG_TSVECTOR, PG_RETURN_INT32, and TSVectorData::size.

◆ tsvector_setweight()

Datum tsvector_setweight ( PG_FUNCTION_ARGS  )

Definition at line 211 of file tsvector_op.c.

212{
214 char cw = PG_GETARG_CHAR(1);
215 TSVector out;
216 int i,
217 j;
218 WordEntry *entry;
219 WordEntryPos *p;
220 int w = 0;
221
222 switch (cw)
223 {
224 case 'A':
225 case 'a':
226 w = 3;
227 break;
228 case 'B':
229 case 'b':
230 w = 2;
231 break;
232 case 'C':
233 case 'c':
234 w = 1;
235 break;
236 case 'D':
237 case 'd':
238 w = 0;
239 break;
240 default:
241 /* internal error */
242 elog(ERROR, "unrecognized weight: %d", cw);
243 }
244
245 out = (TSVector) palloc(VARSIZE(in));
246 memcpy(out, in, VARSIZE(in));
247 entry = ARRPTR(out);
248 i = out->size;
249 while (i--)
250 {
251 if ((j = POSDATALEN(out, entry)) != 0)
252 {
253 p = POSDATAPTR(out, entry);
254 while (j--)
255 {
256 WEP_SETWEIGHT(*p, w);
257 p++;
258 }
259 }
260 entry++;
261 }
262
263 PG_FREE_IF_COPY(in, 0);
265}
#define PG_GETARG_CHAR(n)
Definition fmgr.h:273

References ARRPTR, elog, ERROR, fb(), i, j, palloc(), PG_FREE_IF_COPY, PG_GETARG_CHAR, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, POSDATALEN, POSDATAPTR, TSVectorData::size, VARSIZE(), and WEP_SETWEIGHT.

◆ tsvector_setweight_by_filter()

Datum tsvector_setweight_by_filter ( PG_FUNCTION_ARGS  )

Definition at line 273 of file tsvector_op.c.

274{
276 char char_weight = PG_GETARG_CHAR(1);
278
280 int i,
281 j,
282 nlexemes,
283 weight;
284 WordEntry *entry;
286 bool *nulls;
287
288 switch (char_weight)
289 {
290 case 'A':
291 case 'a':
292 weight = 3;
293 break;
294 case 'B':
295 case 'b':
296 weight = 2;
297 break;
298 case 'C':
299 case 'c':
300 weight = 1;
301 break;
302 case 'D':
303 case 'd':
304 weight = 0;
305 break;
306 default:
307 /* internal error */
308 elog(ERROR, "unrecognized weight: %c", char_weight);
309 }
310
313 entry = ARRPTR(tsout);
314
316
317 /*
318 * Assuming that lexemes array is significantly shorter than tsvector we
319 * can iterate through lexemes performing binary search of each lexeme
320 * from lexemes in tsvector.
321 */
322 for (i = 0; i < nlexemes; i++)
323 {
324 char *lex;
325 int lex_len,
326 lex_pos;
327
328 /* Ignore null array elements, they surely don't match */
329 if (nulls[i])
330 continue;
331
335
336 if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
337 {
338 WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
339
340 while (j--)
341 {
342 WEP_SETWEIGHT(*p, weight);
343 p++;
344 }
345 }
346 }
347
350
352}

References ARRPTR, DatumGetPointer(), deconstruct_array_builtin(), elog, ERROR, fb(), i, j, palloc(), PG_FREE_IF_COPY, PG_GETARG_ARRAYTYPE_P, PG_GETARG_CHAR, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, POSDATALEN, POSDATAPTR, tsvector_bsearch(), VARDATA(), VARHDRSZ, VARSIZE(), and WEP_SETWEIGHT.

◆ tsvector_strip()

Datum tsvector_strip ( PG_FUNCTION_ARGS  )

Definition at line 168 of file tsvector_op.c.

169{
171 TSVector out;
172 int i,
173 len = 0;
174 WordEntry *arrin = ARRPTR(in),
175 *arrout;
176 char *cur;
177
178 for (i = 0; i < in->size; i++)
179 len += arrin[i].len;
180
181 len = CALCDATASIZE(in->size, len);
182 out = (TSVector) palloc0(len);
183 SET_VARSIZE(out, len);
184 out->size = in->size;
185 arrout = ARRPTR(out);
186 cur = STRPTR(out);
187 for (i = 0; i < in->size; i++)
188 {
189 memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
190 arrout[i].haspos = 0;
191 arrout[i].len = arrin[i].len;
192 arrout[i].pos = cur - STRPTR(out);
193 cur += arrout[i].len;
194 }
195
196 PG_FREE_IF_COPY(in, 0);
198}

References ARRPTR, CALCDATASIZE, cur, fb(), i, len, palloc0(), PG_FREE_IF_COPY, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, SET_VARSIZE(), TSVectorData::size, and STRPTR.

◆ tsvector_to_array()

Datum tsvector_to_array ( PG_FUNCTION_ARGS  )

Definition at line 720 of file tsvector_op.c.

721{
724 Datum *elements;
725 int i;
726 ArrayType *array;
727
728 elements = palloc(tsin->size * sizeof(Datum));
729
730 for (i = 0; i < tsin->size; i++)
731 {
733 arrin[i].len));
734 }
735
736 array = construct_array_builtin(elements, tsin->size, TEXTOID);
737
738 pfree(elements);
740 PG_RETURN_POINTER(array);
741}

References ARRPTR, construct_array_builtin(), cstring_to_text_with_len(), fb(), i, len, palloc(), pfree(), PG_FREE_IF_COPY, PG_GETARG_TSVECTOR, PG_RETURN_POINTER, PointerGetDatum(), and STRPTR.

◆ tsvector_unnest()

Datum tsvector_unnest ( PG_FUNCTION_ARGS  )

Definition at line 632 of file tsvector_op.c.

633{
636
637 if (SRF_IS_FIRSTCALL())
638 {
639 MemoryContext oldcontext;
640 TupleDesc tupdesc;
641
643 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
644
645 tupdesc = CreateTemplateTupleDesc(3);
646 TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
647 TEXTOID, -1, 0);
648 TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
649 INT2ARRAYOID, -1, 0);
650 TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
651 TEXTARRAYOID, -1, 0);
652 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
653 elog(ERROR, "return type must be a row type");
654 funcctx->tuple_desc = tupdesc;
655
656 funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
657
658 MemoryContextSwitchTo(oldcontext);
659 }
660
662 tsin = (TSVector) funcctx->user_fctx;
663
664 if (funcctx->call_cntr < tsin->size)
665 {
667 char *data = STRPTR(tsin);
668 HeapTuple tuple;
669 int j,
670 i = funcctx->call_cntr;
671 bool nulls[] = {false, false, false};
672 Datum values[3];
673
675
676 if (arrin[i].haspos)
677 {
680 Datum *weights;
681 char weight;
682
683 /*
684 * Internally tsvector stores position and weight in the same
685 * uint16 (2 bits for weight, 14 for position). Here we extract
686 * that in two separate arrays.
687 */
688 posv = _POSVECPTR(tsin, arrin + i);
689 positions = palloc(posv->npos * sizeof(Datum));
690 weights = palloc(posv->npos * sizeof(Datum));
691 for (j = 0; j < posv->npos; j++)
692 {
694 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
696 1));
697 }
698
701 }
702 else
703 {
704 nulls[1] = nulls[2] = true;
705 }
706
707 tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
709 }
710 else
711 {
713 }
714}

References _POSVECPTR, ARRPTR, construct_array_builtin(), CreateTemplateTupleDesc(), cstring_to_text_with_len(), data, elog, ERROR, fb(), get_call_result_type(), heap_form_tuple(), HeapTupleGetDatum(), i, Int16GetDatum(), j, len, MemoryContextSwitchTo(), palloc(), PG_GETARG_TSVECTOR_COPY, PointerGetDatum(), SRF_FIRSTCALL_INIT, SRF_IS_FIRSTCALL, SRF_PERCALL_SETUP, SRF_RETURN_DONE, SRF_RETURN_NEXT, STRPTR, TupleDescInitEntry(), TYPEFUNC_COMPOSITE, values, WEP_GETPOS, and WEP_GETWEIGHT.

◆ tsvector_update_trigger()

static Datum tsvector_update_trigger ( PG_FUNCTION_ARGS  ,
bool  config_column 
)
static

Definition at line 2739 of file tsvector_op.c.

2740{
2741 TriggerData *trigdata;
2743 Relation rel;
2746 i;
2747 ParsedText prs;
2748 Datum datum;
2749 bool isnull;
2750 text *txt;
2751 Oid cfgId;
2752 bool update_needed;
2753
2754 /* Check call context */
2755 if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
2756 elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2757
2758 trigdata = (TriggerData *) fcinfo->context;
2759 if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2760 elog(ERROR, "tsvector_update_trigger: must be fired for row");
2761 if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2762 elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2763
2764 if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2765 {
2766 rettuple = trigdata->tg_trigtuple;
2767 update_needed = true;
2768 }
2769 else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2770 {
2771 rettuple = trigdata->tg_newtuple;
2772 update_needed = false; /* computed below */
2773 }
2774 else
2775 elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2776
2777 trigger = trigdata->tg_trigger;
2778 rel = trigdata->tg_relation;
2779
2780 if (trigger->tgnargs < 3)
2781 elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2782
2783 /* Find the target tsvector column */
2784 tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
2786 ereport(ERROR,
2788 errmsg("tsvector column \"%s\" does not exist",
2789 trigger->tgargs[0])));
2790 /* This will effectively reject system columns, so no separate test: */
2792 TSVECTOROID))
2793 ereport(ERROR,
2795 errmsg("column \"%s\" is not of tsvector type",
2796 trigger->tgargs[0])));
2797
2798 /* Find the configuration to use */
2799 if (config_column)
2800 {
2801 int config_attr_num;
2802
2803 config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
2805 ereport(ERROR,
2807 errmsg("configuration column \"%s\" does not exist",
2808 trigger->tgargs[1])));
2810 REGCONFIGOID))
2811 ereport(ERROR,
2813 errmsg("column \"%s\" is not of regconfig type",
2814 trigger->tgargs[1])));
2815
2816 datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2817 if (isnull)
2818 ereport(ERROR,
2820 errmsg("configuration column \"%s\" must not be null",
2821 trigger->tgargs[1])));
2822 cfgId = DatumGetObjectId(datum);
2823 }
2824 else
2825 {
2826 List *names;
2827
2828 names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
2829 /* require a schema so that results are not search path dependent */
2830 if (list_length(names) < 2)
2831 ereport(ERROR,
2833 errmsg("text search configuration name \"%s\" must be schema-qualified",
2834 trigger->tgargs[1])));
2835 cfgId = get_ts_config_oid(names, false);
2836 }
2837
2838 /* initialize parse state */
2839 prs.lenwords = 32;
2840 prs.curwords = 0;
2841 prs.pos = 0;
2843
2844 /* find all words in indexable column(s) */
2845 for (i = 2; i < trigger->tgnargs; i++)
2846 {
2847 int numattr;
2848
2849 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2851 ereport(ERROR,
2853 errmsg("column \"%s\" does not exist",
2854 trigger->tgargs[i])));
2856 ereport(ERROR,
2858 errmsg("column \"%s\" is not of a character type",
2859 trigger->tgargs[i])));
2860
2862 update_needed = true;
2863
2864 datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2865 if (isnull)
2866 continue;
2867
2868 txt = DatumGetTextPP(datum);
2869
2871
2872 if (txt != (text *) DatumGetPointer(datum))
2873 pfree(txt);
2874 }
2875
2876 if (update_needed)
2877 {
2878 /* make tsvector value */
2879 datum = TSVectorGetDatum(make_tsvector(&prs));
2880 isnull = false;
2881
2882 /* and insert it into tuple */
2885 &datum, &isnull);
2886
2887 pfree(DatumGetPointer(datum));
2888 }
2889
2890 return PointerGetDatum(rettuple);
2891}

References bms_is_member(), CALLED_AS_TRIGGER, ParsedText::curwords, DatumGetObjectId(), DatumGetPointer(), DatumGetTextPP, elog, ereport, errcode(), errmsg(), ERROR, fb(), FirstLowInvalidHeapAttributeNumber, get_ts_config_oid(), heap_modify_tuple_by_cols(), i, IsBinaryCoercible(), ParsedText::lenwords, list_length(), make_tsvector(), numattr, palloc_array, parsetext(), pfree(), PointerGetDatum(), ParsedText::pos, RelationData::rd_att, SPI_ERROR_NOATTRIBUTE, SPI_fnumber(), SPI_getbinval(), SPI_gettypeid(), stringToQualifiedNameList(), TriggerData::tg_event, TriggerData::tg_newtuple, TriggerData::tg_relation, TriggerData::tg_trigger, TriggerData::tg_trigtuple, TriggerData::tg_updatedcols, TRIGGER_FIRED_BEFORE, TRIGGER_FIRED_BY_INSERT, TRIGGER_FIRED_BY_UPDATE, TRIGGER_FIRED_FOR_ROW, TSVectorGetDatum(), VARDATA_ANY(), VARSIZE_ANY_EXHDR(), and ParsedText::words.

Referenced by tsvector_update_trigger_bycolumn(), and tsvector_update_trigger_byid().

◆ tsvector_update_trigger_bycolumn()

Datum tsvector_update_trigger_bycolumn ( PG_FUNCTION_ARGS  )

Definition at line 2733 of file tsvector_op.c.

2734{
2735 return tsvector_update_trigger(fcinfo, true);
2736}

References tsvector_update_trigger().

◆ tsvector_update_trigger_byid()

Datum tsvector_update_trigger_byid ( PG_FUNCTION_ARGS  )

Definition at line 2727 of file tsvector_op.c.

2728{
2729 return tsvector_update_trigger(fcinfo, false);
2730}

References tsvector_update_trigger().

◆ TSVECTORCMPFUNC() [1/7]

TSVECTORCMPFUNC ( cmp  ,
,
INT32   
)

◆ TSVECTORCMPFUNC() [2/7]

TSVECTORCMPFUNC ( eq  ,
BOOL   
)

◆ TSVECTORCMPFUNC() [3/7]

TSVECTORCMPFUNC ( ge  ,
>=  ,
BOOL   
)

◆ TSVECTORCMPFUNC() [4/7]

TSVECTORCMPFUNC ( gt  ,
BOOL   
)

◆ TSVECTORCMPFUNC() [5/7]

TSVECTORCMPFUNC ( le  ,
<=  ,
BOOL   
)

◆ TSVECTORCMPFUNC() [6/7]

TSVECTORCMPFUNC ( lt  )

◆ TSVECTORCMPFUNC() [7/7]

TSVECTORCMPFUNC ( ne  ,
,
BOOL   
)

◆ walkStatEntryTree()

static StatEntry * walkStatEntryTree ( TSVectorStat stat)
static

Definition at line 2489 of file tsvector_op.c.

2490{
2491 StatEntry *node = stat->stack[stat->stackpos];
2492
2493 if (node == NULL)
2494 return NULL;
2495
2496 if (node->ndoc != 0)
2497 {
2498 /* return entry itself: we already was at left sublink */
2499 return node;
2500 }
2501 else if (node->right && node->right != stat->stack[stat->stackpos + 1])
2502 {
2503 /* go on right sublink */
2504 stat->stackpos++;
2505 node = node->right;
2506
2507 /* find most-left value */
2508 for (;;)
2509 {
2510 stat->stack[stat->stackpos] = node;
2511 if (node->left)
2512 {
2513 stat->stackpos++;
2514 node = node->left;
2515 }
2516 else
2517 break;
2518 }
2519 Assert(stat->stackpos <= stat->maxdepth);
2520 }
2521 else
2522 {
2523 /* we already return all left subtree, itself and right subtree */
2524 if (stat->stackpos == 0)
2525 return NULL;
2526
2527 stat->stackpos--;
2528 return walkStatEntryTree(stat);
2529 }
2530
2531 return node;
2532}

References Assert, fb(), StatEntry::left, StatEntry::ndoc, StatEntry::right, and walkStatEntryTree().

Referenced by ts_process_call(), and walkStatEntryTree().