PostgreSQL Source Code git master
ts_utils.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * ts_utils.h
4 * helper utilities for tsearch
5 *
6 * Copyright (c) 1998-2025, PostgreSQL Global Development Group
7 *
8 * src/include/tsearch/ts_utils.h
9 *
10 *-------------------------------------------------------------------------
11 */
12#ifndef _PG_TS_UTILS_H_
13#define _PG_TS_UTILS_H_
14
15#include "nodes/pg_list.h"
16#include "tsearch/ts_public.h"
17#include "tsearch/ts_type.h"
18
19/*
20 * Common parse definitions for tsvector and tsquery
21 */
22
23/* tsvector parser support. */
24
25struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
27
28/* flag bits that can be passed to init_tsvector_parser: */
29#define P_TSV_OPR_IS_DELIM (1 << 0)
30#define P_TSV_IS_TSQUERY (1 << 1)
31#define P_TSV_IS_WEB (1 << 2)
32
33extern TSVectorParseState init_tsvector_parser(char *input, int flags,
37 char **strval, int *lenval,
38 WordEntryPos **pos_ptr, int *poslen,
39 char **endptr);
41
42/* phrase operator begins with '<' */
43#define ISOPERATOR(x) \
44 ( pg_mblen(x) == 1 && ( *(x) == '!' || \
45 *(x) == '&' || \
46 *(x) == '|' || \
47 *(x) == '(' || \
48 *(x) == ')' || \
49 *(x) == '<' \
50 ) )
51
52/* parse_tsquery */
53
54struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
56
57typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
58 char *token, int tokenlen,
59 int16 tokenweights, /* bitmap as described in
60 * QueryOperand struct */
61 bool prefix);
62
63/* flag bits that can be passed to parse_tsquery: */
64#define P_TSQ_PLAIN (1 << 0)
65#define P_TSQ_WEB (1 << 1)
66
67extern TSQuery parse_tsquery(char *buf,
68 PushFunction pushval,
69 Datum opaque,
70 int flags,
72
73/* Functions for use by PushFunction implementations */
75 char *strval, int lenval, int16 weight, bool prefix);
77extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
78
79/*
80 * parse plain text and lexize words
81 */
82typedef struct
83{
84 uint16 flags; /* currently, only TSL_PREFIX */
88 union
89 {
91
92 /*
93 * When apos array is used, apos[0] is the number of elements in the
94 * array (excluding apos[0]), and alen is the allocated size of the
95 * array. We do not allow more than MAXNUMPOS array elements.
96 */
98 } pos;
99 char *word;
100} ParsedWord;
101
102typedef struct
103{
108} ParsedText;
109
110extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
111
112/*
113 * headline framework, flow in common to generate:
114 * 1 parse text with hlparsetext
115 * 2 parser-specific function to find part
116 * 3 generateHeadline to generate result text
117 */
118
119extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
120 char *buf, int32 buflen);
122
123/*
124 * TSQuery execution support
125 *
126 * TS_execute() executes a tsquery against data that can be represented in
127 * various forms. The TSExecuteCallback callback function is called to check
128 * whether a given primitive tsquery value is matched in the data.
129 */
130
131/* TS_execute requires ternary logic to handle NOT with phrase matches */
132typedef enum
133{
134 TS_NO, /* definitely no match */
135 TS_YES, /* definitely does match */
136 TS_MAYBE, /* can't verify match for lack of pos data */
138
139/*
140 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
141 * lexeme position data (because of a phrase-match operator in the tsquery).
142 * The callback should fill in position data when it returns TS_YES (success).
143 * If it cannot return position data, it should leave "data" unchanged and
144 * return TS_MAYBE. The caller of TS_execute() must then arrange for a later
145 * recheck with position data available.
146 *
147 * The reported lexeme positions must be sorted and unique. Callers must only
148 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
149 * This allows the returned "pos" to point directly to the WordEntryPos
150 * portion of a tsvector value. If "allocated" is true then the pos array
151 * is palloc'd workspace and caller may free it when done.
152 *
153 * "negate" means that the pos array contains positions where the query does
154 * not match, rather than positions where it does. "width" is positive when
155 * the match is wider than one lexeme. Neither of these fields normally need
156 * to be touched by TSExecuteCallback functions; they are used for
157 * phrase-search processing within TS_execute.
158 *
159 * All fields of the ExecPhraseData struct are initially zeroed by caller.
160 */
161typedef struct ExecPhraseData
162{
163 int npos; /* number of positions reported */
164 bool allocated; /* pos points to palloc'd data? */
165 bool negate; /* positions are where query is NOT matched */
166 WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
167 int width; /* width of match in lexemes, less 1 */
169
170/*
171 * Signature for TSQuery lexeme check functions
172 *
173 * arg: opaque value passed through from caller of TS_execute
174 * val: lexeme to test for presence of
175 * data: to be filled with lexeme positions; NULL if position data not needed
176 *
177 * Return TS_YES if lexeme is present in data, TS_MAYBE if it might be
178 * present, TS_NO if it definitely is not present. If data is not NULL,
179 * it must be filled with lexeme positions if available. If position data
180 * is not available, leave *data as zeroes and return TS_MAYBE, never TS_YES.
181 */
184
185/*
186 * Flag bits for TS_execute
187 */
188#define TS_EXEC_EMPTY (0x00)
189/*
190 * If TS_EXEC_SKIP_NOT is set, then NOT sub-expressions are automatically
191 * evaluated to be true. This was formerly the default behavior. It's now
192 * deprecated because it tends to give silly answers, but some applications
193 * might still have a use for it.
194 */
195#define TS_EXEC_SKIP_NOT (0x01)
196/*
197 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
198 * in the absence of position information: a true result indicates that the
199 * phrase might be present. Without this flag, OP_PHRASE always returns
200 * false if lexeme position information is not available.
201 */
202#define TS_EXEC_PHRASE_NO_POS (0x02)
203
204extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
205 TSExecuteCallback chkcond);
206extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
207 uint32 flags,
208 TSExecuteCallback chkcond);
209extern List *TS_execute_locations(QueryItem *curitem, void *arg,
210 uint32 flags,
211 TSExecuteCallback chkcond);
212extern bool tsquery_requires_match(QueryItem *curitem);
213
214/*
215 * to_ts* - text transformation to tsvector, tsquery
216 */
218extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
219
220/*
221 * Possible strategy numbers for indexes
222 * TSearchStrategyNumber - (tsvector|text) @@ tsquery
223 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
224 */
225#define TSearchStrategyNumber 1
226#define TSearchWithClassStrategyNumber 2
227
228/*
229 * TSQuery Utilities
230 */
232extern TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy);
233
234typedef struct QTNode
235{
239 char *word;
241 struct QTNode **child;
243
244/* bits in QTNode.flags */
245#define QTN_NEEDFREE 0x01
246#define QTN_NOCHANGE 0x02
247#define QTN_WORDFREE 0x04
248
250
251#define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
252
253static inline Datum
255{
256 return Int64GetDatum((int64) X);
257}
258
259static inline TSQuerySign
261{
262 return (TSQuerySign) DatumGetInt64(X);
263}
264
265#define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
266#define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
267
268
269extern QTNode *QT2QTN(QueryItem *in, char *operand);
270extern TSQuery QTN2QT(QTNode *in);
271extern void QTNFree(QTNode *in);
272extern void QTNSort(QTNode *in);
273extern void QTNTernary(QTNode *in);
274extern void QTNBinary(QTNode *in);
275extern int QTNodeCompare(QTNode *an, QTNode *bn);
276extern QTNode *QTNCopy(QTNode *in);
277extern void QTNClearFlags(QTNode *in, uint32 flags);
278extern bool QTNEq(QTNode *a, QTNode *b);
280extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
281 bool *isfind);
282
283#endif /* _PG_TS_UTILS_H_ */
int64_t int64
Definition: c.h:499
int16_t int16
Definition: c.h:497
int8_t int8
Definition: c.h:496
int32_t int32
Definition: c.h:498
uint64_t uint64
Definition: c.h:503
uint16_t uint16
Definition: c.h:501
uint32_t uint32
Definition: c.h:502
Datum Int64GetDatum(int64 X)
Definition: fmgr.c:1807
#define token
Definition: indent_globs.h:126
FILE * input
long val
Definition: informix.c:689
int b
Definition: isn.c:71
int a
Definition: isn.c:70
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:370
void * arg
const void size_t len
const void * data
static char * buf
Definition: pg_test_fsync.c:72
static int64 DatumGetInt64(Datum X)
Definition: postgres.h:390
uintptr_t Datum
Definition: postgres.h:69
unsigned int Oid
Definition: postgres_ext.h:30
tree ctl root
Definition: radixtree.h:1857
bool allocated
Definition: ts_utils.h:164
WordEntryPos * pos
Definition: ts_utils.h:166
Definition: pg_list.h:54
Definition: nodes.h:135
int32 pos
Definition: ts_utils.h:107
int32 lenwords
Definition: ts_utils.h:105
int32 curwords
Definition: ts_utils.h:106
ParsedWord * words
Definition: ts_utils.h:104
uint16 alen
Definition: ts_utils.h:87
uint16 flags
Definition: ts_utils.h:84
uint16 nvariant
Definition: ts_utils.h:86
uint16 len
Definition: ts_utils.h:85
uint16 pos
Definition: ts_utils.h:90
uint16 * apos
Definition: ts_utils.h:97
char * word
Definition: ts_utils.h:99
int32 nchild
Definition: ts_utils.h:238
QueryItem * valnode
Definition: ts_utils.h:236
uint32 sign
Definition: ts_utils.h:240
uint32 flags
Definition: ts_utils.h:237
char * word
Definition: ts_utils.h:239
struct QTNode ** child
Definition: ts_utils.h:241
ts_parserstate state
Definition: tsquery.c:88
Definition: regguts.h:323
Definition: c.h:658
uint16 WordEntryPos
Definition: ts_type.h:63
void reset_tsvector_parser(TSVectorParseState state, char *input)
void QTNClearFlags(QTNode *in, uint32 flags)
Definition: tsquery_util.c:434
int QTNodeCompare(QTNode *an, QTNode *bn)
Definition: tsquery_util.c:97
struct ExecPhraseData ExecPhraseData
text * generateHeadline(HeadlineParsedText *prs)
Definition: ts_parse.c:607
void pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
Definition: tsquery.c:580
bool TS_execute(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1854
void close_tsvector_parser(TSVectorParseState state)
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
QueryItem * clean_NOT(QueryItem *ptr, int32 *len)
QTNode * QTNCopy(QTNode *in)
Definition: tsquery_util.c:396
List * TS_execute_locations(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:2007
QTNode * findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
TSTernaryValue
Definition: ts_utils.h:133
@ TS_MAYBE
Definition: ts_utils.h:136
@ TS_NO
Definition: ts_utils.h:134
@ TS_YES
Definition: ts_utils.h:135
static Datum TSQuerySignGetDatum(TSQuerySign X)
Definition: ts_utils.h:254
TSQuerySign makeTSQuerySign(TSQuery a)
Definition: tsquery_op.c:250
void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen)
void QTNSort(QTNode *in)
Definition: tsquery_util.c:163
TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, int flags, Node *escontext)
Definition: tsquery.c:817
void pushOperator(TSQueryParserState state, int8 oper, int16 distance)
Definition: tsquery.c:531
TSQuery cleanup_tsquery_stopwords(TSQuery in, bool noisy)
uint64 TSQuerySign
Definition: ts_utils.h:249
void QTNTernary(QTNode *in)
Definition: tsquery_util.c:201
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
static TSQuerySign DatumGetTSQuerySign(Datum X)
Definition: ts_utils.h:260
void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int32 buflen)
bool tsquery_requires_match(QueryItem *curitem)
Definition: tsvector_op.c:2156
TSVector make_tsvector(ParsedText *prs)
Definition: to_tsany.c:165
struct QTNode QTNode
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
bool QTNEq(QTNode *a, QTNode *b)
Definition: tsquery_util.c:183
void QTNFree(QTNode *in)
Definition: tsquery_util.c:64
TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags, TSExecuteCallback chkcond)
Definition: tsvector_op.c:1871
TSVectorParseState init_tsvector_parser(char *input, int flags, Node *escontext)
struct TSQueryParserStateData * TSQueryParserState
Definition: ts_utils.h:55
QTNode * QT2QTN(QueryItem *in, char *operand)
Definition: tsquery_util.c:25
void pushStop(TSQueryParserState state)
Definition: tsquery.c:616
TSQuery QTN2QT(QTNode *in)
Definition: tsquery_util.c:363
TSTernaryValue(* TSExecuteCallback)(void *arg, QueryOperand *val, ExecPhraseData *data)
Definition: ts_utils.h:182
void QTNBinary(QTNode *in)
Definition: tsquery_util.c:250
void(* PushFunction)(Datum opaque, TSQueryParserState state, char *token, int tokenlen, int16 tokenweights, bool prefix)
Definition: ts_utils.h:57