PostgreSQL Source Code  git master
tsvector_parser.c File Reference
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData
 

Macros

#define RESIZEPRSBUF
 
#define RETURN_TOKEN
 
#define WAITWORD   1
 
#define WAITENDWORD   2
 
#define WAITNEXTCHAR   3
 
#define WAITENDCMPLX   4
 
#define WAITPOSINFO   5
 
#define INPOSINFO   6
 
#define WAITPOSDELIM   7
 
#define WAITCHARCMPLX   8
 
#define PRSSYNTAXERROR   prssyntaxerror(state)
 

Functions

TSVectorParseState init_tsvector_parser (char *input, int flags)
 
void reset_tsvector_parser (TSVectorParseState state, char *input)
 
void close_tsvector_parser (TSVectorParseState state)
 
static void prssyntaxerror (TSVectorParseState state)
 
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
 

Macro Definition Documentation

◆ INPOSINFO

#define INPOSINFO   6

Definition at line 121 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ PRSSYNTAXERROR

#define PRSSYNTAXERROR   prssyntaxerror(state)

Definition at line 125 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ RESIZEPRSBUF

#define RESIZEPRSBUF
Value:
do { \
int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \
state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \
} \
} while (0)
Definition: regguts.h:298
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044

Definition at line 83 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ RETURN_TOKEN

#define RETURN_TOKEN
Value:
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
Definition: regguts.h:298

Definition at line 95 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITCHARCMPLX

#define WAITCHARCMPLX   8

Definition at line 123 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITENDCMPLX

#define WAITENDCMPLX   4

Definition at line 119 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITENDWORD

#define WAITENDWORD   2

Definition at line 117 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITNEXTCHAR

#define WAITNEXTCHAR   3

Definition at line 118 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITPOSDELIM

#define WAITPOSDELIM   7

Definition at line 122 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITPOSINFO

#define WAITPOSINFO   5

Definition at line 120 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

◆ WAITWORD

#define WAITWORD   1

Definition at line 116 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

Function Documentation

◆ close_tsvector_parser()

void close_tsvector_parser ( TSVectorParseState  state)

Definition at line 76 of file tsvector_parser.c.

References pfree(), and TSVectorParseStateData::word.

Referenced by parse_tsquery(), and tsvectorin().

77 {
78  pfree(state->word);
79  pfree(state);
80 }
void pfree(void *pointer)
Definition: mcxt.c:1031

◆ gettoken_tsvector()

bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int *  lenval,
WordEntryPos **  pos_ptr,
int *  poslen,
char **  endptr 
)

Definition at line 155 of file tsvector_parser.c.

References Assert, TSVectorParseStateData::bufstart, COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, INPOSINFO, TSVectorParseStateData::is_web, ISOPERATOR, LIMITPOS, TSVectorParseStateData::oprisdelim, palloc(), pg_mblen(), TSVectorParseStateData::prsbuf, PRSSYNTAXERROR, repalloc(), RESIZEPRSBUF, RETURN_TOKEN, t_isdigit(), t_iseq, t_isspace(), WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, WEP_SETWEIGHT, and TSVectorParseStateData::word.

Referenced by gettoken_query_standard(), gettoken_query_websearch(), and tsvectorin().

159 {
160  int oldstate = 0;
161  char *curpos = state->word;
162  int statecode = WAITWORD;
163 
164  /*
165  * pos is for collecting the comma delimited list of positions followed by
166  * the actual token.
167  */
168  WordEntryPos *pos = NULL;
169  int npos = 0; /* elements of pos used */
170  int posalen = 0; /* allocated size of pos */
171 
172  while (1)
173  {
174  if (statecode == WAITWORD)
175  {
176  if (*(state->prsbuf) == '\0')
177  return false;
178  else if (!state->is_web && t_iseq(state->prsbuf, '\''))
179  statecode = WAITENDCMPLX;
180  else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
181  {
182  statecode = WAITNEXTCHAR;
183  oldstate = WAITENDWORD;
184  }
185  else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
186  (state->is_web && t_iseq(state->prsbuf, '"')))
188  else if (!t_isspace(state->prsbuf))
189  {
190  COPYCHAR(curpos, state->prsbuf);
191  curpos += pg_mblen(state->prsbuf);
192  statecode = WAITENDWORD;
193  }
194  }
195  else if (statecode == WAITNEXTCHAR)
196  {
197  if (*(state->prsbuf) == '\0')
198  ereport(ERROR,
199  (errcode(ERRCODE_SYNTAX_ERROR),
200  errmsg("there is no escaped character: \"%s\"",
201  state->bufstart)));
202  else
203  {
204  RESIZEPRSBUF;
205  COPYCHAR(curpos, state->prsbuf);
206  curpos += pg_mblen(state->prsbuf);
207  Assert(oldstate != 0);
208  statecode = oldstate;
209  }
210  }
211  else if (statecode == WAITENDWORD)
212  {
213  if (!state->is_web && t_iseq(state->prsbuf, '\\'))
214  {
215  statecode = WAITNEXTCHAR;
216  oldstate = WAITENDWORD;
217  }
218  else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
219  (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
220  (state->is_web && t_iseq(state->prsbuf, '"')))
221  {
222  RESIZEPRSBUF;
223  if (curpos == state->word)
225  *(curpos) = '\0';
226  RETURN_TOKEN;
227  }
228  else if (t_iseq(state->prsbuf, ':'))
229  {
230  if (curpos == state->word)
232  *(curpos) = '\0';
233  if (state->oprisdelim)
234  RETURN_TOKEN;
235  else
236  statecode = INPOSINFO;
237  }
238  else
239  {
240  RESIZEPRSBUF;
241  COPYCHAR(curpos, state->prsbuf);
242  curpos += pg_mblen(state->prsbuf);
243  }
244  }
245  else if (statecode == WAITENDCMPLX)
246  {
247  if (!state->is_web && t_iseq(state->prsbuf, '\''))
248  {
249  statecode = WAITCHARCMPLX;
250  }
251  else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
252  {
253  statecode = WAITNEXTCHAR;
254  oldstate = WAITENDCMPLX;
255  }
256  else if (*(state->prsbuf) == '\0')
258  else
259  {
260  RESIZEPRSBUF;
261  COPYCHAR(curpos, state->prsbuf);
262  curpos += pg_mblen(state->prsbuf);
263  }
264  }
265  else if (statecode == WAITCHARCMPLX)
266  {
267  if (!state->is_web && t_iseq(state->prsbuf, '\''))
268  {
269  RESIZEPRSBUF;
270  COPYCHAR(curpos, state->prsbuf);
271  curpos += pg_mblen(state->prsbuf);
272  statecode = WAITENDCMPLX;
273  }
274  else
275  {
276  RESIZEPRSBUF;
277  *(curpos) = '\0';
278  if (curpos == state->word)
280  if (state->oprisdelim)
281  {
282  /* state->prsbuf+=pg_mblen(state->prsbuf); */
283  RETURN_TOKEN;
284  }
285  else
286  statecode = WAITPOSINFO;
287  continue; /* recheck current character */
288  }
289  }
290  else if (statecode == WAITPOSINFO)
291  {
292  if (t_iseq(state->prsbuf, ':'))
293  statecode = INPOSINFO;
294  else
295  RETURN_TOKEN;
296  }
297  else if (statecode == INPOSINFO)
298  {
299  if (t_isdigit(state->prsbuf))
300  {
301  if (posalen == 0)
302  {
303  posalen = 4;
304  pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
305  npos = 0;
306  }
307  else if (npos + 1 >= posalen)
308  {
309  posalen *= 2;
310  pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
311  }
312  npos++;
313  WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
314  /* we cannot get here in tsquery, so no need for 2 errmsgs */
315  if (WEP_GETPOS(pos[npos - 1]) == 0)
316  ereport(ERROR,
317  (errcode(ERRCODE_SYNTAX_ERROR),
318  errmsg("wrong position info in tsvector: \"%s\"",
319  state->bufstart)));
320  WEP_SETWEIGHT(pos[npos - 1], 0);
321  statecode = WAITPOSDELIM;
322  }
323  else
325  }
326  else if (statecode == WAITPOSDELIM)
327  {
328  if (t_iseq(state->prsbuf, ','))
329  statecode = INPOSINFO;
330  else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
331  {
332  if (WEP_GETWEIGHT(pos[npos - 1]))
334  WEP_SETWEIGHT(pos[npos - 1], 3);
335  }
336  else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
337  {
338  if (WEP_GETWEIGHT(pos[npos - 1]))
340  WEP_SETWEIGHT(pos[npos - 1], 2);
341  }
342  else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
343  {
344  if (WEP_GETWEIGHT(pos[npos - 1]))
346  WEP_SETWEIGHT(pos[npos - 1], 1);
347  }
348  else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
349  {
350  if (WEP_GETWEIGHT(pos[npos - 1]))
352  WEP_SETWEIGHT(pos[npos - 1], 0);
353  }
354  else if (t_isspace(state->prsbuf) ||
355  *(state->prsbuf) == '\0')
356  RETURN_TOKEN;
357  else if (!t_isdigit(state->prsbuf))
359  }
360  else /* internal error */
361  elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
362  statecode);
363 
364  /* get next char */
365  state->prsbuf += pg_mblen(state->prsbuf);
366  }
367 }
#define COPYCHAR(d, s)
Definition: ts_locale.h:47
uint16 WordEntryPos
Definition: ts_type.h:63
#define WAITPOSDELIM
int errcode(int sqlerrcode)
Definition: elog.c:575
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define ISOPERATOR(x)
Definition: ltree.h:120
int t_isdigit(const char *ptr)
Definition: ts_locale.c:25
#define ERROR
Definition: elog.h:43
#define WAITCHARCMPLX
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define PRSSYNTAXERROR
int t_isspace(const char *ptr)
Definition: ts_locale.c:41
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define t_iseq(x, c)
Definition: ts_locale.h:45
#define RETURN_TOKEN
#define ereport(elevel, rest)
Definition: elog.h:122
#define Assert(condition)
Definition: c.h:699
#define INPOSINFO
int pg_mblen(const char *mbstr)
Definition: mbutils.c:760
#define WAITNEXTCHAR
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1044
#define WAITPOSINFO
#define RESIZEPRSBUF
#define WAITENDWORD
void * palloc(Size size)
Definition: mcxt.c:924
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define LIMITPOS(x)
Definition: ts_type.h:87
#define elog
Definition: elog.h:219
#define WAITWORD
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define WAITENDCMPLX

◆ init_tsvector_parser()

TSVectorParseState init_tsvector_parser ( char *  input,
int  flags 
)

Definition at line 46 of file tsvector_parser.c.

References TSVectorParseStateData::bufstart, TSVectorParseStateData::eml, TSVectorParseStateData::is_tsquery, TSVectorParseStateData::is_web, TSVectorParseStateData::len, TSVectorParseStateData::oprisdelim, P_TSV_IS_TSQUERY, P_TSV_IS_WEB, P_TSV_OPR_IS_DELIM, palloc(), pg_database_encoding_max_length(), TSVectorParseStateData::prsbuf, and TSVectorParseStateData::word.

Referenced by parse_tsquery(), and tsvectorin().

47 {
49 
50  state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
51  state->prsbuf = input;
52  state->bufstart = input;
53  state->len = 32;
54  state->word = (char *) palloc(state->len);
56  state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
57  state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
58  state->is_web = (flags & P_TSV_IS_WEB) != 0;
59 
60  return state;
61 }
#define P_TSV_IS_TSQUERY
Definition: ts_utils.h:29
#define P_TSV_IS_WEB
Definition: ts_utils.h:30
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
ts_parserstate state
Definition: tsquery.c:81
#define P_TSV_OPR_IS_DELIM
Definition: ts_utils.h:28
void * palloc(Size size)
Definition: mcxt.c:924

◆ prssyntaxerror()

static void prssyntaxerror ( TSVectorParseState  state)
static

Definition at line 128 of file tsvector_parser.c.

References TSVectorParseStateData::bufstart, ereport, errcode(), errmsg(), ERROR, and TSVectorParseStateData::is_tsquery.

129 {
130  ereport(ERROR,
131  (errcode(ERRCODE_SYNTAX_ERROR),
132  state->is_tsquery ?
133  errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
134  errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
135 }
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
#define ereport(elevel, rest)
Definition: elog.h:122
int errmsg(const char *fmt,...)
Definition: elog.c:797

◆ reset_tsvector_parser()

void reset_tsvector_parser ( TSVectorParseState  state,
char *  input 
)

Definition at line 67 of file tsvector_parser.c.

References TSVectorParseStateData::prsbuf.

Referenced by gettoken_query_standard(), and gettoken_query_websearch().

68 {
69  state->prsbuf = input;
70 }