PostgreSQL Source Code git master
Loading...
Searching...
No Matches
tsvector_parser.c File Reference
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData
 

Macros

#define RESIZEPRSBUF
 
#define RETURN_TOKEN
 
#define WAITWORD   1
 
#define WAITENDWORD   2
 
#define WAITNEXTCHAR   3
 
#define WAITENDCMPLX   4
 
#define WAITPOSINFO   5
 
#define INPOSINFO   6
 
#define WAITPOSDELIM   7
 
#define WAITCHARCMPLX   8
 
#define PRSSYNTAXERROR   return prssyntaxerror(state)
 

Functions

TSVectorParseState init_tsvector_parser (char *input, int flags, Node *escontext)
 
void reset_tsvector_parser (TSVectorParseState state, char *input)
 
void close_tsvector_parser (TSVectorParseState state)
 
static bool prssyntaxerror (TSVectorParseState state)
 
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
 

Macro Definition Documentation

◆ INPOSINFO

#define INPOSINFO   6

Definition at line 135 of file tsvector_parser.c.

◆ PRSSYNTAXERROR

#define PRSSYNTAXERROR   return prssyntaxerror(state)

Definition at line 139 of file tsvector_parser.c.

◆ RESIZEPRSBUF

#define RESIZEPRSBUF
Value:
do { \
int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \
state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \
} \
} while (0)
void * repalloc(void *pointer, Size size)
Definition mcxt.c:1632
static int fb(int x)

Definition at line 97 of file tsvector_parser.c.

98 { \
99 int clen = curpos - state->word; \
100 if ( clen + state->eml >= state->len ) \
101 { \
102 state->len *= 2; \
103 state->word = (char *) repalloc(state->word, state->len); \
104 curpos = state->word + clen; \
105 } \
106} while (0)

◆ RETURN_TOKEN

#define RETURN_TOKEN
Value:
do { \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)

Definition at line 109 of file tsvector_parser.c.

110 { \
111 if (pos_ptr != NULL) \
112 { \
113 *pos_ptr = pos; \
114 *poslen = npos; \
115 } \
116 else if (pos != NULL) \
117 pfree(pos); \
118 \
119 if (strval != NULL) \
120 *strval = state->word; \
121 if (lenval != NULL) \
122 *lenval = curpos - state->word; \
123 if (endptr != NULL) \
124 *endptr = state->prsbuf; \
125 return true; \
126} while(0)

◆ WAITCHARCMPLX

#define WAITCHARCMPLX   8

Definition at line 137 of file tsvector_parser.c.

◆ WAITENDCMPLX

#define WAITENDCMPLX   4

Definition at line 133 of file tsvector_parser.c.

◆ WAITENDWORD

#define WAITENDWORD   2

Definition at line 131 of file tsvector_parser.c.

◆ WAITNEXTCHAR

#define WAITNEXTCHAR   3

Definition at line 132 of file tsvector_parser.c.

◆ WAITPOSDELIM

#define WAITPOSDELIM   7

Definition at line 136 of file tsvector_parser.c.

◆ WAITPOSINFO

#define WAITPOSINFO   5

Definition at line 134 of file tsvector_parser.c.

◆ WAITWORD

#define WAITWORD   1

Definition at line 130 of file tsvector_parser.c.

Function Documentation

◆ close_tsvector_parser()

void close_tsvector_parser ( TSVectorParseState  state)

Definition at line 90 of file tsvector_parser.c.

91{
92 pfree(state->word);
93 pfree(state);
94}
void pfree(void *pointer)
Definition mcxt.c:1616

References pfree().

Referenced by parse_tsquery(), and tsvectorin().

◆ gettoken_tsvector()

bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int lenval,
WordEntryPos **  pos_ptr,
int poslen,
char **  endptr 
)

Definition at line 176 of file tsvector_parser.c.

180{
181 int oldstate = 0;
182 char *curpos = state->word;
183 int statecode = WAITWORD;
184
185 /*
186 * pos is for collecting the comma delimited list of positions followed by
187 * the actual token.
188 */
189 WordEntryPos *pos = NULL;
190 int npos = 0; /* elements of pos used */
191 int posalen = 0; /* allocated size of pos */
192
193 while (1)
194 {
195 if (statecode == WAITWORD)
196 {
197 if (*(state->prsbuf) == '\0')
198 return false;
199 else if (!state->is_web && t_iseq(state->prsbuf, '\''))
201 else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
202 {
205 }
206 else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
207 (state->is_web && t_iseq(state->prsbuf, '"')))
209 else if (!isspace((unsigned char) *state->prsbuf))
210 {
211 curpos += ts_copychar_cstr(curpos, state->prsbuf);
213 }
214 }
215 else if (statecode == WAITNEXTCHAR)
216 {
217 if (*(state->prsbuf) == '\0')
218 ereturn(state->escontext, false,
220 errmsg("there is no escaped character: \"%s\"",
221 state->bufstart)));
222 else
223 {
225 curpos += ts_copychar_cstr(curpos, state->prsbuf);
226 Assert(oldstate != 0);
228 }
229 }
230 else if (statecode == WAITENDWORD)
231 {
232 if (!state->is_web && t_iseq(state->prsbuf, '\\'))
233 {
236 }
237 else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0' ||
238 (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
239 (state->is_web && t_iseq(state->prsbuf, '"')))
240 {
242 if (curpos == state->word)
244 *(curpos) = '\0';
246 }
247 else if (t_iseq(state->prsbuf, ':'))
248 {
249 if (curpos == state->word)
251 *(curpos) = '\0';
252 if (state->oprisdelim)
254 else
256 }
257 else
258 {
260 curpos += ts_copychar_cstr(curpos, state->prsbuf);
261 }
262 }
263 else if (statecode == WAITENDCMPLX)
264 {
265 if (!state->is_web && t_iseq(state->prsbuf, '\''))
266 {
268 }
269 else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
270 {
273 }
274 else if (*(state->prsbuf) == '\0')
276 else
277 {
279 curpos += ts_copychar_cstr(curpos, state->prsbuf);
280 }
281 }
282 else if (statecode == WAITCHARCMPLX)
283 {
284 if (!state->is_web && t_iseq(state->prsbuf, '\''))
285 {
287 curpos += ts_copychar_cstr(curpos, state->prsbuf);
289 }
290 else
291 {
293 *(curpos) = '\0';
294 if (curpos == state->word)
296 if (state->oprisdelim)
297 {
298 /* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */
300 }
301 else
303 continue; /* recheck current character */
304 }
305 }
306 else if (statecode == WAITPOSINFO)
307 {
308 if (t_iseq(state->prsbuf, ':'))
310 else
312 }
313 else if (statecode == INPOSINFO)
314 {
315 if (isdigit((unsigned char) *state->prsbuf))
316 {
317 if (posalen == 0)
318 {
319 posalen = 4;
321 npos = 0;
322 }
323 else if (npos + 1 >= posalen)
324 {
325 posalen *= 2;
327 }
328 npos++;
329 WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
330 /* we cannot get here in tsquery, so no need for 2 errmsgs */
331 if (WEP_GETPOS(pos[npos - 1]) == 0)
332 ereturn(state->escontext, false,
334 errmsg("wrong position info in tsvector: \"%s\"",
335 state->bufstart)));
336 WEP_SETWEIGHT(pos[npos - 1], 0);
338 }
339 else
341 }
342 else if (statecode == WAITPOSDELIM)
343 {
344 if (t_iseq(state->prsbuf, ','))
346 else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
347 {
348 if (WEP_GETWEIGHT(pos[npos - 1]))
350 WEP_SETWEIGHT(pos[npos - 1], 3);
351 }
352 else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
353 {
354 if (WEP_GETWEIGHT(pos[npos - 1]))
356 WEP_SETWEIGHT(pos[npos - 1], 2);
357 }
358 else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
359 {
360 if (WEP_GETWEIGHT(pos[npos - 1]))
362 WEP_SETWEIGHT(pos[npos - 1], 1);
363 }
364 else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
365 {
366 if (WEP_GETWEIGHT(pos[npos - 1]))
368 WEP_SETWEIGHT(pos[npos - 1], 0);
369 }
370 else if (isspace((unsigned char) *state->prsbuf) ||
371 *(state->prsbuf) == '\0')
373 else if (!isdigit((unsigned char) *state->prsbuf))
375 }
376 else /* internal error */
377 elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
378 statecode);
379
380 /* get next char */
381 state->prsbuf += pg_mblen_cstr(state->prsbuf);
382 }
383}
#define Assert(condition)
Definition c.h:885
int errcode(int sqlerrcode)
Definition elog.c:874
int errmsg(const char *fmt,...)
Definition elog.c:1093
#define ereturn(context, dummy_value,...)
Definition elog.h:278
#define ERROR
Definition elog.h:39
#define elog(elevel,...)
Definition elog.h:226
#define repalloc_array(pointer, type, count)
Definition fe_memutils.h:78
#define palloc_array(type, count)
Definition fe_memutils.h:76
#define ISOPERATOR(x)
Definition ltree.h:167
int pg_mblen_cstr(const char *mbstr)
Definition mbutils.c:1045
static int ts_copychar_cstr(void *dest, const void *src)
Definition ts_locale.h:50
#define t_iseq(x, c)
Definition ts_locale.h:38
#define WEP_GETPOS(x)
Definition ts_type.h:80
#define WEP_SETPOS(x, v)
Definition ts_type.h:83
uint16 WordEntryPos
Definition ts_type.h:63
#define WEP_SETWEIGHT(x, v)
Definition ts_type.h:82
#define LIMITPOS(x)
Definition ts_type.h:87
#define WEP_GETWEIGHT(x)
Definition ts_type.h:79
#define WAITNEXTCHAR
#define PRSSYNTAXERROR
#define WAITENDCMPLX
#define WAITENDWORD
#define WAITCHARCMPLX
#define WAITPOSINFO
#define WAITWORD
#define INPOSINFO
#define RESIZEPRSBUF
#define RETURN_TOKEN
#define WAITPOSDELIM

References Assert, elog, ereturn, errcode(), errmsg(), ERROR, fb(), INPOSINFO, ISOPERATOR, LIMITPOS, palloc_array, pg_mblen_cstr(), PRSSYNTAXERROR, repalloc_array, RESIZEPRSBUF, RETURN_TOKEN, t_iseq, ts_copychar_cstr(), WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, and WEP_SETWEIGHT.

Referenced by gettoken_query_standard(), gettoken_query_websearch(), and tsvectorin().

◆ init_tsvector_parser()

TSVectorParseState init_tsvector_parser ( char input,
int  flags,
Node escontext 
)

Definition at line 57 of file tsvector_parser.c.

58{
60
62 state->prsbuf = input;
63 state->bufstart = input;
64 state->len = 32;
65 state->word = (char *) palloc(state->len);
67 state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
68 state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
69 state->is_web = (flags & P_TSV_IS_WEB) != 0;
70 state->escontext = escontext;
71
72 return state;
73}
#define palloc_object(type)
Definition fe_memutils.h:74
FILE * input
int pg_database_encoding_max_length(void)
Definition mbutils.c:1674
void * palloc(Size size)
Definition mcxt.c:1387
#define P_TSV_IS_TSQUERY
Definition ts_utils.h:30
#define P_TSV_IS_WEB
Definition ts_utils.h:31
#define P_TSV_OPR_IS_DELIM
Definition ts_utils.h:29

References input, P_TSV_IS_TSQUERY, P_TSV_IS_WEB, P_TSV_OPR_IS_DELIM, palloc(), palloc_object, and pg_database_encoding_max_length().

Referenced by parse_tsquery(), and tsvectorin().

◆ prssyntaxerror()

static bool prssyntaxerror ( TSVectorParseState  state)
static

Definition at line 142 of file tsvector_parser.c.

143{
144 errsave(state->escontext,
146 state->is_tsquery ?
147 errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
148 errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
149 /* In soft error situation, return false as convenience for caller */
150 return false;
151}
#define errsave(context,...)
Definition elog.h:262

References errcode(), errmsg(), errsave, and fb().

◆ reset_tsvector_parser()

void reset_tsvector_parser ( TSVectorParseState  state,
char input 
)

Definition at line 81 of file tsvector_parser.c.

82{
83 state->prsbuf = input;
84}

References input.

Referenced by gettoken_query_standard(), and gettoken_query_websearch().