PostgreSQL Source Code git master
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
tsvector_parser.c File Reference
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData
 

Macros

#define RESIZEPRSBUF
 
#define RETURN_TOKEN
 
#define WAITWORD   1
 
#define WAITENDWORD   2
 
#define WAITNEXTCHAR   3
 
#define WAITENDCMPLX   4
 
#define WAITPOSINFO   5
 
#define INPOSINFO   6
 
#define WAITPOSDELIM   7
 
#define WAITCHARCMPLX   8
 
#define PRSSYNTAXERROR   return prssyntaxerror(state)
 

Functions

TSVectorParseState init_tsvector_parser (char *input, int flags, Node *escontext)
 
void reset_tsvector_parser (TSVectorParseState state, char *input)
 
void close_tsvector_parser (TSVectorParseState state)
 
static bool prssyntaxerror (TSVectorParseState state)
 
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
 

Macro Definition Documentation

◆ INPOSINFO

#define INPOSINFO   6

Definition at line 135 of file tsvector_parser.c.

◆ PRSSYNTAXERROR

#define PRSSYNTAXERROR   return prssyntaxerror(state)

Definition at line 139 of file tsvector_parser.c.

◆ RESIZEPRSBUF

#define RESIZEPRSBUF
Value:
do { \
int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \
state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \
} \
} while (0)
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:2172
Definition: regguts.h:323

Definition at line 97 of file tsvector_parser.c.

◆ RETURN_TOKEN

#define RETURN_TOKEN
Value:
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81

Definition at line 109 of file tsvector_parser.c.

◆ WAITCHARCMPLX

#define WAITCHARCMPLX   8

Definition at line 137 of file tsvector_parser.c.

◆ WAITENDCMPLX

#define WAITENDCMPLX   4

Definition at line 133 of file tsvector_parser.c.

◆ WAITENDWORD

#define WAITENDWORD   2

Definition at line 131 of file tsvector_parser.c.

◆ WAITNEXTCHAR

#define WAITNEXTCHAR   3

Definition at line 132 of file tsvector_parser.c.

◆ WAITPOSDELIM

#define WAITPOSDELIM   7

Definition at line 136 of file tsvector_parser.c.

◆ WAITPOSINFO

#define WAITPOSINFO   5

Definition at line 134 of file tsvector_parser.c.

◆ WAITWORD

#define WAITWORD   1

Definition at line 130 of file tsvector_parser.c.

Function Documentation

◆ close_tsvector_parser()

void close_tsvector_parser ( TSVectorParseState  state)

Definition at line 90 of file tsvector_parser.c.

91{
92 pfree(state->word);
93 pfree(state);
94}
void pfree(void *pointer)
Definition: mcxt.c:2152

References pfree().

Referenced by parse_tsquery(), and tsvectorin().

◆ gettoken_tsvector()

bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int *  lenval,
WordEntryPos **  pos_ptr,
int *  poslen,
char **  endptr 
)

Definition at line 176 of file tsvector_parser.c.

180{
181 int oldstate = 0;
182 char *curpos = state->word;
183 int statecode = WAITWORD;
184
185 /*
186 * pos is for collecting the comma delimited list of positions followed by
187 * the actual token.
188 */
189 WordEntryPos *pos = NULL;
190 int npos = 0; /* elements of pos used */
191 int posalen = 0; /* allocated size of pos */
192
193 while (1)
194 {
195 if (statecode == WAITWORD)
196 {
197 if (*(state->prsbuf) == '\0')
198 return false;
199 else if (!state->is_web && t_iseq(state->prsbuf, '\''))
200 statecode = WAITENDCMPLX;
201 else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
202 {
203 statecode = WAITNEXTCHAR;
204 oldstate = WAITENDWORD;
205 }
206 else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
207 (state->is_web && t_iseq(state->prsbuf, '"')))
209 else if (!isspace((unsigned char) *state->prsbuf))
210 {
211 COPYCHAR(curpos, state->prsbuf);
212 curpos += pg_mblen(state->prsbuf);
213 statecode = WAITENDWORD;
214 }
215 }
216 else if (statecode == WAITNEXTCHAR)
217 {
218 if (*(state->prsbuf) == '\0')
219 ereturn(state->escontext, false,
220 (errcode(ERRCODE_SYNTAX_ERROR),
221 errmsg("there is no escaped character: \"%s\"",
222 state->bufstart)));
223 else
224 {
226 COPYCHAR(curpos, state->prsbuf);
227 curpos += pg_mblen(state->prsbuf);
228 Assert(oldstate != 0);
229 statecode = oldstate;
230 }
231 }
232 else if (statecode == WAITENDWORD)
233 {
234 if (!state->is_web && t_iseq(state->prsbuf, '\\'))
235 {
236 statecode = WAITNEXTCHAR;
237 oldstate = WAITENDWORD;
238 }
239 else if (isspace((unsigned char) *state->prsbuf) || *(state->prsbuf) == '\0' ||
240 (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
241 (state->is_web && t_iseq(state->prsbuf, '"')))
242 {
244 if (curpos == state->word)
246 *(curpos) = '\0';
248 }
249 else if (t_iseq(state->prsbuf, ':'))
250 {
251 if (curpos == state->word)
253 *(curpos) = '\0';
254 if (state->oprisdelim)
256 else
257 statecode = INPOSINFO;
258 }
259 else
260 {
262 COPYCHAR(curpos, state->prsbuf);
263 curpos += pg_mblen(state->prsbuf);
264 }
265 }
266 else if (statecode == WAITENDCMPLX)
267 {
268 if (!state->is_web && t_iseq(state->prsbuf, '\''))
269 {
270 statecode = WAITCHARCMPLX;
271 }
272 else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
273 {
274 statecode = WAITNEXTCHAR;
275 oldstate = WAITENDCMPLX;
276 }
277 else if (*(state->prsbuf) == '\0')
279 else
280 {
282 COPYCHAR(curpos, state->prsbuf);
283 curpos += pg_mblen(state->prsbuf);
284 }
285 }
286 else if (statecode == WAITCHARCMPLX)
287 {
288 if (!state->is_web && t_iseq(state->prsbuf, '\''))
289 {
291 COPYCHAR(curpos, state->prsbuf);
292 curpos += pg_mblen(state->prsbuf);
293 statecode = WAITENDCMPLX;
294 }
295 else
296 {
298 *(curpos) = '\0';
299 if (curpos == state->word)
301 if (state->oprisdelim)
302 {
303 /* state->prsbuf+=pg_mblen(state->prsbuf); */
305 }
306 else
307 statecode = WAITPOSINFO;
308 continue; /* recheck current character */
309 }
310 }
311 else if (statecode == WAITPOSINFO)
312 {
313 if (t_iseq(state->prsbuf, ':'))
314 statecode = INPOSINFO;
315 else
317 }
318 else if (statecode == INPOSINFO)
319 {
320 if (isdigit((unsigned char) *state->prsbuf))
321 {
322 if (posalen == 0)
323 {
324 posalen = 4;
325 pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
326 npos = 0;
327 }
328 else if (npos + 1 >= posalen)
329 {
330 posalen *= 2;
331 pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
332 }
333 npos++;
334 WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
335 /* we cannot get here in tsquery, so no need for 2 errmsgs */
336 if (WEP_GETPOS(pos[npos - 1]) == 0)
337 ereturn(state->escontext, false,
338 (errcode(ERRCODE_SYNTAX_ERROR),
339 errmsg("wrong position info in tsvector: \"%s\"",
340 state->bufstart)));
341 WEP_SETWEIGHT(pos[npos - 1], 0);
342 statecode = WAITPOSDELIM;
343 }
344 else
346 }
347 else if (statecode == WAITPOSDELIM)
348 {
349 if (t_iseq(state->prsbuf, ','))
350 statecode = INPOSINFO;
351 else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
352 {
353 if (WEP_GETWEIGHT(pos[npos - 1]))
355 WEP_SETWEIGHT(pos[npos - 1], 3);
356 }
357 else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
358 {
359 if (WEP_GETWEIGHT(pos[npos - 1]))
361 WEP_SETWEIGHT(pos[npos - 1], 2);
362 }
363 else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
364 {
365 if (WEP_GETWEIGHT(pos[npos - 1]))
367 WEP_SETWEIGHT(pos[npos - 1], 1);
368 }
369 else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
370 {
371 if (WEP_GETWEIGHT(pos[npos - 1]))
373 WEP_SETWEIGHT(pos[npos - 1], 0);
374 }
375 else if (isspace((unsigned char) *state->prsbuf) ||
376 *(state->prsbuf) == '\0')
378 else if (!isdigit((unsigned char) *state->prsbuf))
380 }
381 else /* internal error */
382 elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
383 statecode);
384
385 /* get next char */
386 state->prsbuf += pg_mblen(state->prsbuf);
387 }
388}
int errcode(int sqlerrcode)
Definition: elog.c:854
int errmsg(const char *fmt,...)
Definition: elog.c:1071
#define ereturn(context, dummy_value,...)
Definition: elog.h:277
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
Assert(PointerIsAligned(start, uint64))
#define ISOPERATOR(x)
Definition: ltree.h:167
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void * palloc(Size size)
Definition: mcxt.c:1945
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define COPYCHAR(d, s)
Definition: ts_locale.h:40
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
uint16 WordEntryPos
Definition: ts_type.h:63
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define LIMITPOS(x)
Definition: ts_type.h:87
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define WAITNEXTCHAR
#define PRSSYNTAXERROR
#define WAITENDCMPLX
#define WAITENDWORD
#define WAITCHARCMPLX
#define WAITPOSINFO
#define WAITWORD
#define INPOSINFO
#define RESIZEPRSBUF
#define RETURN_TOKEN
#define WAITPOSDELIM

References Assert(), COPYCHAR, elog, ereturn, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, palloc(), pg_mblen(), PRSSYNTAXERROR, repalloc(), RESIZEPRSBUF, RETURN_TOKEN, t_iseq, WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, and WEP_SETWEIGHT.

Referenced by gettoken_query_standard(), gettoken_query_websearch(), and tsvectorin().

◆ init_tsvector_parser()

TSVectorParseState init_tsvector_parser ( char *  input,
int  flags,
Node escontext 
)

Definition at line 57 of file tsvector_parser.c.

58{
60
62 state->prsbuf = input;
63 state->bufstart = input;
64 state->len = 32;
65 state->word = (char *) palloc(state->len);
67 state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
68 state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
69 state->is_web = (flags & P_TSV_IS_WEB) != 0;
70 state->escontext = escontext;
71
72 return state;
73}
FILE * input
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
#define P_TSV_IS_TSQUERY
Definition: ts_utils.h:30
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
#define P_TSV_IS_WEB
Definition: ts_utils.h:31
#define P_TSV_OPR_IS_DELIM
Definition: ts_utils.h:29

References input, P_TSV_IS_TSQUERY, P_TSV_IS_WEB, P_TSV_OPR_IS_DELIM, palloc(), and pg_database_encoding_max_length().

Referenced by parse_tsquery(), and tsvectorin().

◆ prssyntaxerror()

static bool prssyntaxerror ( TSVectorParseState  state)
static

Definition at line 142 of file tsvector_parser.c.

143{
144 errsave(state->escontext,
145 (errcode(ERRCODE_SYNTAX_ERROR),
146 state->is_tsquery ?
147 errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
148 errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
149 /* In soft error situation, return false as convenience for caller */
150 return false;
151}
#define errsave(context,...)
Definition: elog.h:261

References errcode(), errmsg(), and errsave.

◆ reset_tsvector_parser()

void reset_tsvector_parser ( TSVectorParseState  state,
char *  input 
)

Definition at line 81 of file tsvector_parser.c.

82{
83 state->prsbuf = input;
84}

References input.

Referenced by gettoken_query_standard(), and gettoken_query_websearch().