PostgreSQL Source Code  git master
ts_utils.h
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * ts_utils.h
4  * helper utilities for tsearch
5  *
6  * Copyright (c) 1998-2024, PostgreSQL Global Development Group
7  *
8  * src/include/tsearch/ts_utils.h
9  *
10  *-------------------------------------------------------------------------
11  */
12 #ifndef _PG_TS_UTILS_H_
13 #define _PG_TS_UTILS_H_
14 
15 #include "nodes/pg_list.h"
16 #include "tsearch/ts_public.h"
17 #include "tsearch/ts_type.h"
18 
19 /*
20  * Common parse definitions for tsvector and tsquery
21  */
22 
23 /* tsvector parser support. */
24 
25 struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
27 
28 /* flag bits that can be passed to init_tsvector_parser: */
29 #define P_TSV_OPR_IS_DELIM (1 << 0)
30 #define P_TSV_IS_TSQUERY (1 << 1)
31 #define P_TSV_IS_WEB (1 << 2)
32 
33 extern TSVectorParseState init_tsvector_parser(char *input, int flags,
34  Node *escontext);
37  char **strval, int *lenval,
38  WordEntryPos **pos_ptr, int *poslen,
39  char **endptr);
41 
42 /* phrase operator begins with '<' */
43 #define ISOPERATOR(x) \
44  ( pg_mblen(x) == 1 && ( *(x) == '!' || \
45  *(x) == '&' || \
46  *(x) == '|' || \
47  *(x) == '(' || \
48  *(x) == ')' || \
49  *(x) == '<' \
50  ) )
51 
52 /* parse_tsquery */
53 
54 struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
56 
57 typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
58  char *token, int tokenlen,
59  int16 tokenweights, /* bitmap as described in
60  * QueryOperand struct */
61  bool prefix);
62 
63 /* flag bits that can be passed to parse_tsquery: */
64 #define P_TSQ_PLAIN (1 << 0)
65 #define P_TSQ_WEB (1 << 1)
66 
67 extern TSQuery parse_tsquery(char *buf,
68  PushFunction pushval,
69  Datum opaque,
70  int flags,
71  Node *escontext);
72 
73 /* Functions for use by PushFunction implementations */
75  char *strval, int lenval, int16 weight, bool prefix);
76 extern void pushStop(TSQueryParserState state);
77 extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
78 
79 /*
80  * parse plain text and lexize words
81  */
82 typedef struct
83 {
84  uint16 flags; /* currently, only TSL_PREFIX */
88  union
89  {
91 
92  /*
93  * When apos array is used, apos[0] is the number of elements in the
94  * array (excluding apos[0]), and alen is the allocated size of the
95  * array. We do not allow more than MAXNUMPOS array elements.
96  */
98  } pos;
99  char *word;
100 } ParsedWord;
101 
102 typedef struct
103 {
108 } ParsedText;
109 
110 extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
111 
112 /*
113  * headline framework, flow in common to generate:
114  * 1 parse text with hlparsetext
115  * 2 parser-specific function to find part
116  * 3 generateHeadline to generate result text
117  */
118 
119 extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
120  char *buf, int32 buflen);
122 
123 /*
124  * TSQuery execution support
125  *
126  * TS_execute() executes a tsquery against data that can be represented in
127  * various forms. The TSExecuteCallback callback function is called to check
128  * whether a given primitive tsquery value is matched in the data.
129  */
130 
131 /* TS_execute requires ternary logic to handle NOT with phrase matches */
132 typedef enum
133 {
134  TS_NO, /* definitely no match */
135  TS_YES, /* definitely does match */
136  TS_MAYBE, /* can't verify match for lack of pos data */
138 
139 /*
140  * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
141  * lexeme position data (because of a phrase-match operator in the tsquery).
142  * The callback should fill in position data when it returns TS_YES (success).
143  * If it cannot return position data, it should leave "data" unchanged and
144  * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
145  * recheck with position data available.
146  *
147  * The reported lexeme positions must be sorted and unique. Callers must only
148  * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
149  * This allows the returned "pos" to point directly to the WordEntryPos
150  * portion of a tsvector value. If "allocated" is true then the pos array
151  * is palloc'd workspace and caller may free it when done.
152  *
153  * "negate" means that the pos array contains positions where the query does
154  * not match, rather than positions where it does. "width" is positive when
155  * the match is wider than one lexeme. Neither of these fields normally need
156  * to be touched by TSExecuteCallback functions; they are used for
157  * phrase-search processing within TS_execute.
158  *
159  * All fields of the ExecPhraseData struct are initially zeroed by caller.
160  */
161 typedef struct ExecPhraseData
162 {
163  int npos; /* number of positions reported */
164  bool allocated; /* pos points to palloc'd data? */
165  bool negate; /* positions are where query is NOT matched */
166  WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
167  int width; /* width of match in lexemes, less 1 */
169 
170 /*
171  * Signature for TSQuery lexeme check functions
172  *
173  * arg: opaque value passed through from caller of TS_execute
174  * val: lexeme to test for presence of
175  * data: to be filled with lexeme positions; NULL if position data not needed
176  *
177  * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
178  * present, TS_NO if it definitely is not present. If data is not NULL,
179  * it must be filled with lexeme positions if available. If position data
180  * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
181  */
184 
185 /*
186  * Flag bits for TS_execute
187  */
188 #define TS_EXEC_EMPTY (0x00)
189 /*
190  * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
191  * evaluated to be true. This was formerly the default behavior. It's now
192  * deprecated because it tends to give silly answers, but some applications
193  * might still have a use for it.
194  */
195 #define TS_EXEC_SKIP_NOT (0x01)
196 /*
197  * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
198  * in the absence of position information: a true result indicates that the
199  * phrase might be present. Without this flag, OP_PHRASE always returns
200  * false if lexeme position information is not available.
201  */
202 #define TS_EXEC_PHRASE_NO_POS (0x02)
203 
204 extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
205  TSExecuteCallback chkcond);
206 extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
207  uint32 flags,
208  TSExecuteCallback chkcond);
209 extern List *TS_execute_locations(QueryItem *curitem, void *arg,
210  uint32 flags,
211  TSExecuteCallback chkcond);
212 extern bool tsquery_requires_match(QueryItem *curitem);
213 
214 /*
215  * to_ts* - text transformation to tsvector, tsquery
216  */
217 extern TSVector make_tsvector(ParsedText *prs);
218 extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
219 
220 /*
221  * Possible strategy numbers for indexes
222  * TSearchStrategyNumber - (tsvector|text) @@ tsquery
223  * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
224  */
225 #define TSearchStrategyNumber 1
226 #define TSearchWithClassStrategyNumber 2
227 
228 /*
229  * TSQuery Utilities
230  */
232 extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
233 
234 typedef struct QTNode
235 {
239  char *word;
241  struct QTNode **child;
243 
244 /* bits in QTNode.flags */
245 #define QTN_NEEDFREE 0x01
246 #define QTN_NOCHANGE 0x02
247 #define QTN_WORDFREE 0x04
248 
249 typedef uint64 TSQuerySign;
250 
251 #define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
252 
253 static inline Datum
255 {
256  return Int64GetDatum((int64) X);
257 }
258 
259 static inline TSQuerySign
261 {
262  return (TSQuerySign) DatumGetInt64(X);
263 }
264 
265 #define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
266 #define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
267 
268 
269 extern QTNode *QT2QTN(QueryItem *in, char *operand);
270 extern TSQuery QTN2QT(QTNode *in);
271 extern void QTNFree(QTNode *in);
272 extern void QTNSort(QTNode *in);
273 extern void QTNTernary(QTNode *in);
274 extern void QTNBinary(QTNode *in);
275 extern int QTNodeCompare(QTNode *an, QTNode *bn);
276 extern QTNode *QTNCopy(QTNode *in);
277 extern void QTNClearFlags(QTNode *in, uint32 flags);
278 extern bool QTNEq(QTNode *a, QTNode *b);
280 extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
281  bool *isfind);
282 
283 #endif /* _PG_TS_UTILS_H_ */
unsigned short uint16
Definition: c.h:491
unsigned int uint32
Definition: c.h:492
signed char int8
Definition: c.h:480
signed short int16
Definition: c.h:481
signed int int32
Definition: c.h:482
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define token
Definition: indent_globs.h:126
FILE * input
long val
Definition: informix.c:689
int b
Definition: isn.c:69
int a
Definition: isn.c:68
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
void * arg
const void size_t len
const void * data
static char * buf
Definition: pg_test_fsync.c:72
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:385
uintptr_t Datum
Definition: postgres.h:64
unsigned int Oid
Definition: postgres_ext.h:31
tree ctl root
Definition: radixtree.h:1886
bool allocated
Definition: ts_utils.h:164
WordEntryPos * pos
Definition: ts_utils.h:166
Definition: pg_list.h:54
Definition: nodes.h:129
int32 pos
Definition: ts_utils.h:107
int32 lenwords
Definition: ts_utils.h:105
int32 curwords
Definition: ts_utils.h:106
ParsedWord * words
Definition: ts_utils.h:104
uint16 alen
Definition: ts_utils.h:87
uint16 flags
Definition: ts_utils.h:84
uint16 nvariant
Definition: ts_utils.h:86
uint16 len
Definition: ts_utils.h:85
uint16 pos
Definition: ts_utils.h:90
uint16 * apos
Definition: ts_utils.h:97
char * word
Definition: ts_utils.h:99
int32 nchild
Definition: ts_utils.h:238
QueryItem * valnode
Definition: ts_utils.h:236
uint32 sign
Definition: ts_utils.h:240
uint32 flags
Definition: ts_utils.h:237
char * word
Definition: ts_utils.h:239
struct QTNode ** child
Definition: ts_utils.h:241
ts_parserstate state
Definition: tsquery.c:88
Definition: regguts.h:323
Definition: c.h:666
uint16 WordEntryPos
Definition: ts_type.h:63
void reset_tsvector_parser(TSVectorParseState state, char *input)
void QTNClearFlags(QTNode *in, uint32 flags)
Definition: tsquery_util.c:434
int QTNodeCompare(QTNode *an, QTNode *bn)
Definition: tsquery_util.c:97
struct ExecPhraseData ExecPhraseData
QTNode * QTNCopy(QTNode *in)
Definition: tsquery_util.c:396
void pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
Definition: tsquery.c:580
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854
QTNode * QT2QTN(QueryItem *in, char *operand)
Definition: tsquery_util.c:25
void close_tsvector_parser(TSVectorParseState state)
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
TSTernaryValue
Definition: ts_utils.h:133
@ TS_MAYBE
Definition: ts_utils.h:136
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
static Datum TSQuerySignGetDatum(TSQuerySign X)
Definition: ts_utils.h:254
TSQuerySign makeTSQuerySign(TSQuery a)
Definition: tsquery_op.c:250
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen)
void QTNSort(QTNode *in)
Definition: tsquery_util.c:163
TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, int flags, Node *escontext)
Definition: tsquery.c:817
void pushOperator(TSQueryParserState state, int8 oper, int16 distance)
Definition: tsquery.c:531
TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy)
uint64 TSQuerySign
Definition: ts_utils.h:249
void QTNTernary(QTNode *in)
Definition: tsquery_util.c:201
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
static TSQuerySign DatumGetTSQuerySign(Datum X)
Definition: ts_utils.h:260
QTNode * findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int32 buflen)
text * generateHeadline(HeadlineParsedText *prs)
Definition: ts_parse.c:607
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:2156
QueryItem * clean_NOT(QueryItem *ptr, int32 *len)
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:165
struct QTNode QTNode
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
bool QTNEq(QTNode *a, QTNode *b)
Definition: tsquery_util.c:183
void QTNFree(QTNode *in)
Definition: tsquery_util.c:64
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1871
TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)
struct TSQueryParserStateData * TSQueryParserState
Definition: ts_utils.h:55
void pushStop(TSQueryParserState state)
Definition: tsquery.c:616
TSQuery QTN2QT(QTNode *in)
Definition: tsquery_util.c:363
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:182
void QTNBinary(QTNode *in)
Definition: tsquery_util.c:250
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:2007
void(* PushFunction)(Datum opaque, TSQueryParserState state, char *token, int tokenlen, int16 tokenweights, bool prefix)
Definition: ts_utils.h:57