PostgreSQL Source Code  git master
tsvector_parser.c File Reference
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData
 

Macros

#define RESIZEPRSBUF
 
#define RETURN_TOKEN
 
#define WAITWORD   1
 
#define WAITENDWORD   2
 
#define WAITNEXTCHAR   3
 
#define WAITENDCMPLX   4
 
#define WAITPOSINFO   5
 
#define INPOSINFO   6
 
#define WAITPOSDELIM   7
 
#define WAITCHARCMPLX   8
 
#define PRSSYNTAXERROR   return prssyntaxerror(state)
 

Functions

TSVectorParseState init_tsvector_parser (char *input, int flags, Node *escontext)
 
void reset_tsvector_parser (TSVectorParseState state, char *input)
 
void close_tsvector_parser (TSVectorParseState state)
 
static bool prssyntaxerror (TSVectorParseState state)
 
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
 

Macro Definition Documentation

◆ INPOSINFO

#define INPOSINFO   6

Definition at line 135 of file tsvector_parser.c.

◆ PRSSYNTAXERROR

#define PRSSYNTAXERROR   return prssyntaxerror(state)

Definition at line 139 of file tsvector_parser.c.

◆ RESIZEPRSBUF

#define RESIZEPRSBUF
Value:
do { \
int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \
state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \
} \
} while (0)
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1540
Definition: regguts.h:323

Definition at line 97 of file tsvector_parser.c.

◆ RETURN_TOKEN

#define RETURN_TOKEN
Value:
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:77

Definition at line 109 of file tsvector_parser.c.

◆ WAITCHARCMPLX

#define WAITCHARCMPLX   8

Definition at line 137 of file tsvector_parser.c.

◆ WAITENDCMPLX

#define WAITENDCMPLX   4

Definition at line 133 of file tsvector_parser.c.

◆ WAITENDWORD

#define WAITENDWORD   2

Definition at line 131 of file tsvector_parser.c.

◆ WAITNEXTCHAR

#define WAITNEXTCHAR   3

Definition at line 132 of file tsvector_parser.c.

◆ WAITPOSDELIM

#define WAITPOSDELIM   7

Definition at line 136 of file tsvector_parser.c.

◆ WAITPOSINFO

#define WAITPOSINFO   5

Definition at line 134 of file tsvector_parser.c.

◆ WAITWORD

#define WAITWORD   1

Definition at line 130 of file tsvector_parser.c.

Function Documentation

◆ close_tsvector_parser()

void close_tsvector_parser ( TSVectorParseState  state)

Definition at line 90 of file tsvector_parser.c.

91 {
92  pfree(state->word);
93  pfree(state);
94 }
void pfree(void *pointer)
Definition: mcxt.c:1520

References pfree().

Referenced by parse_tsquery(), and tsvectorin().

◆ gettoken_tsvector()

bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int *  lenval,
WordEntryPos **  pos_ptr,
int *  poslen,
char **  endptr 
)

Definition at line 176 of file tsvector_parser.c.

180 {
181  int oldstate = 0;
182  char *curpos = state->word;
183  int statecode = WAITWORD;
184 
185  /*
186  * pos is for collecting the comma delimited list of positions followed by
187  * the actual token.
188  */
189  WordEntryPos *pos = NULL;
190  int npos = 0; /* elements of pos used */
191  int posalen = 0; /* allocated size of pos */
192 
193  while (1)
194  {
195  if (statecode == WAITWORD)
196  {
197  if (*(state->prsbuf) == '\0')
198  return false;
199  else if (!state->is_web && t_iseq(state->prsbuf, '\''))
200  statecode = WAITENDCMPLX;
201  else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
202  {
203  statecode = WAITNEXTCHAR;
204  oldstate = WAITENDWORD;
205  }
206  else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
207  (state->is_web && t_iseq(state->prsbuf, '"')))
209  else if (!t_isspace(state->prsbuf))
210  {
211  COPYCHAR(curpos, state->prsbuf);
212  curpos += pg_mblen(state->prsbuf);
213  statecode = WAITENDWORD;
214  }
215  }
216  else if (statecode == WAITNEXTCHAR)
217  {
218  if (*(state->prsbuf) == '\0')
219  ereturn(state->escontext, false,
220  (errcode(ERRCODE_SYNTAX_ERROR),
221  errmsg("there is no escaped character: \"%s\"",
222  state->bufstart)));
223  else
224  {
225  RESIZEPRSBUF;
226  COPYCHAR(curpos, state->prsbuf);
227  curpos += pg_mblen(state->prsbuf);
228  Assert(oldstate != 0);
229  statecode = oldstate;
230  }
231  }
232  else if (statecode == WAITENDWORD)
233  {
234  if (!state->is_web && t_iseq(state->prsbuf, '\\'))
235  {
236  statecode = WAITNEXTCHAR;
237  oldstate = WAITENDWORD;
238  }
239  else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
240  (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
241  (state->is_web && t_iseq(state->prsbuf, '"')))
242  {
243  RESIZEPRSBUF;
244  if (curpos == state->word)
246  *(curpos) = '\0';
247  RETURN_TOKEN;
248  }
249  else if (t_iseq(state->prsbuf, ':'))
250  {
251  if (curpos == state->word)
253  *(curpos) = '\0';
254  if (state->oprisdelim)
255  RETURN_TOKEN;
256  else
257  statecode = INPOSINFO;
258  }
259  else
260  {
261  RESIZEPRSBUF;
262  COPYCHAR(curpos, state->prsbuf);
263  curpos += pg_mblen(state->prsbuf);
264  }
265  }
266  else if (statecode == WAITENDCMPLX)
267  {
268  if (!state->is_web && t_iseq(state->prsbuf, '\''))
269  {
270  statecode = WAITCHARCMPLX;
271  }
272  else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
273  {
274  statecode = WAITNEXTCHAR;
275  oldstate = WAITENDCMPLX;
276  }
277  else if (*(state->prsbuf) == '\0')
279  else
280  {
281  RESIZEPRSBUF;
282  COPYCHAR(curpos, state->prsbuf);
283  curpos += pg_mblen(state->prsbuf);
284  }
285  }
286  else if (statecode == WAITCHARCMPLX)
287  {
288  if (!state->is_web && t_iseq(state->prsbuf, '\''))
289  {
290  RESIZEPRSBUF;
291  COPYCHAR(curpos, state->prsbuf);
292  curpos += pg_mblen(state->prsbuf);
293  statecode = WAITENDCMPLX;
294  }
295  else
296  {
297  RESIZEPRSBUF;
298  *(curpos) = '\0';
299  if (curpos == state->word)
301  if (state->oprisdelim)
302  {
303  /* state->prsbuf+=pg_mblen(state->prsbuf); */
304  RETURN_TOKEN;
305  }
306  else
307  statecode = WAITPOSINFO;
308  continue; /* recheck current character */
309  }
310  }
311  else if (statecode == WAITPOSINFO)
312  {
313  if (t_iseq(state->prsbuf, ':'))
314  statecode = INPOSINFO;
315  else
316  RETURN_TOKEN;
317  }
318  else if (statecode == INPOSINFO)
319  {
320  if (t_isdigit(state->prsbuf))
321  {
322  if (posalen == 0)
323  {
324  posalen = 4;
325  pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
326  npos = 0;
327  }
328  else if (npos + 1 >= posalen)
329  {
330  posalen *= 2;
331  pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
332  }
333  npos++;
334  WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
335  /* we cannot get here in tsquery, so no need for 2 errmsgs */
336  if (WEP_GETPOS(pos[npos - 1]) == 0)
337  ereturn(state->escontext, false,
338  (errcode(ERRCODE_SYNTAX_ERROR),
339  errmsg("wrong position info in tsvector: \"%s\"",
340  state->bufstart)));
341  WEP_SETWEIGHT(pos[npos - 1], 0);
342  statecode = WAITPOSDELIM;
343  }
344  else
346  }
347  else if (statecode == WAITPOSDELIM)
348  {
349  if (t_iseq(state->prsbuf, ','))
350  statecode = INPOSINFO;
351  else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
352  {
353  if (WEP_GETWEIGHT(pos[npos - 1]))
355  WEP_SETWEIGHT(pos[npos - 1], 3);
356  }
357  else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
358  {
359  if (WEP_GETWEIGHT(pos[npos - 1]))
361  WEP_SETWEIGHT(pos[npos - 1], 2);
362  }
363  else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
364  {
365  if (WEP_GETWEIGHT(pos[npos - 1]))
367  WEP_SETWEIGHT(pos[npos - 1], 1);
368  }
369  else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
370  {
371  if (WEP_GETWEIGHT(pos[npos - 1]))
373  WEP_SETWEIGHT(pos[npos - 1], 0);
374  }
375  else if (t_isspace(state->prsbuf) ||
376  *(state->prsbuf) == '\0')
377  RETURN_TOKEN;
378  else if (!t_isdigit(state->prsbuf))
380  }
381  else /* internal error */
382  elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
383  statecode);
384 
385  /* get next char */
386  state->prsbuf += pg_mblen(state->prsbuf);
387  }
388 }
#define Assert(condition)
Definition: c.h:858
int errcode(int sqlerrcode)
Definition: elog.c:859
int errmsg(const char *fmt,...)
Definition: elog.c:1072
#define ereturn(context, dummy_value,...)
Definition: elog.h:276
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
#define ISOPERATOR(x)
Definition: ltree.h:167
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void * palloc(Size size)
Definition: mcxt.c:1316
int t_isspace(const char *ptr)
Definition: ts_locale.c:50
int t_isdigit(const char *ptr)
Definition: ts_locale.c:35
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define COPYCHAR(d, s)
Definition: ts_locale.h:40
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
uint16 WordEntryPos
Definition: ts_type.h:63
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define LIMITPOS(x)
Definition: ts_type.h:87
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define WAITNEXTCHAR
#define PRSSYNTAXERROR
#define WAITENDCMPLX
#define WAITENDWORD
#define WAITCHARCMPLX
#define WAITPOSINFO
#define WAITWORD
#define INPOSINFO
#define RESIZEPRSBUF
#define RETURN_TOKEN
#define WAITPOSDELIM

References Assert, COPYCHAR, elog, ereturn, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, palloc(), pg_mblen(), PRSSYNTAXERROR, repalloc(), RESIZEPRSBUF, RETURN_TOKEN, t_isdigit(), t_iseq, t_isspace(), WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, and WEP_SETWEIGHT.

Referenced by gettoken_query_standard(), gettoken_query_websearch(), and tsvectorin().

◆ init_tsvector_parser()

TSVectorParseState init_tsvector_parser ( char *  input,
int  flags,
Node escontext 
)

Definition at line 57 of file tsvector_parser.c.

58 {
60 
62  state->prsbuf = input;
63  state->bufstart = input;
64  state->len = 32;
65  state->word = (char *) palloc(state->len);
67  state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
68  state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
69  state->is_web = (flags & P_TSV_IS_WEB) != 0;
70  state->escontext = escontext;
71 
72  return state;
73 }
FILE * input
int pg_database_encoding_max_length(void)
Definition: mbutils.c:1546
#define P_TSV_IS_TSQUERY
Definition: ts_utils.h:30
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
#define P_TSV_IS_WEB
Definition: ts_utils.h:31
#define P_TSV_OPR_IS_DELIM
Definition: ts_utils.h:29

References input, P_TSV_IS_TSQUERY, P_TSV_IS_WEB, P_TSV_OPR_IS_DELIM, palloc(), and pg_database_encoding_max_length().

Referenced by parse_tsquery(), and tsvectorin().

◆ prssyntaxerror()

static bool prssyntaxerror ( TSVectorParseState  state)
static

Definition at line 142 of file tsvector_parser.c.

143 {
144  errsave(state->escontext,
145  (errcode(ERRCODE_SYNTAX_ERROR),
146  state->is_tsquery ?
147  errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
148  errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
149  /* In soft error situation, return false as convenience for caller */
150  return false;
151 }
#define errsave(context,...)
Definition: elog.h:260

References errcode(), errmsg(), and errsave.

◆ reset_tsvector_parser()

void reset_tsvector_parser ( TSVectorParseState  state,
char *  input 
)

Definition at line 81 of file tsvector_parser.c.

82 {
83  state->prsbuf = input;
84 }

References input.

Referenced by gettoken_query_standard(), and gettoken_query_websearch().