PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
tsvector_parser.c File Reference
#include "postgres.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
Include dependency graph for tsvector_parser.c:

Go to the source code of this file.

Data Structures

struct  TSVectorParseStateData
 

Macros

#define RESIZEPRSBUF
 
#define ISOPERATOR(x)
 
#define RETURN_TOKEN
 
#define WAITWORD   1
 
#define WAITENDWORD   2
 
#define WAITNEXTCHAR   3
 
#define WAITENDCMPLX   4
 
#define WAITPOSINFO   5
 
#define INPOSINFO   6
 
#define WAITPOSDELIM   7
 
#define WAITCHARCMPLX   8
 
#define PRSSYNTAXERROR   prssyntaxerror(state)
 

Functions

TSVectorParseState init_tsvector_parser (char *input, bool oprisdelim, bool is_tsquery)
 
void reset_tsvector_parser (TSVectorParseState state, char *input)
 
void close_tsvector_parser (TSVectorParseState state)
 
static void prssyntaxerror (TSVectorParseState state)
 
bool gettoken_tsvector (TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
 

Macro Definition Documentation

#define INPOSINFO   6

Definition at line 129 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define ISOPERATOR (   x)
Value:
( pg_mblen(x) == 1 && ( *(x) == '!' || \
*(x) == '&' || \
*(x) == '|' || \
*(x) == '(' || \
*(x) == ')' || \
*(x) == '<' \
) )
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771

Definition at line 93 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define PRSSYNTAXERROR   prssyntaxerror(state)

Definition at line 133 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define RESIZEPRSBUF
Value:
do { \
int clen = curpos - state->word; \
if ( clen + state->eml >= state->len ) \
{ \
state->len *= 2; \
state->word = (char *) repalloc(state->word, state->len); \
curpos = state->word + clen; \
} \
} while (0)
Definition: regguts.h:298
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021

Definition at line 81 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define RETURN_TOKEN
Value:
do { \
if (pos_ptr != NULL) \
{ \
*pos_ptr = pos; \
*poslen = npos; \
} \
else if (pos != NULL) \
pfree(pos); \
\
if (strval != NULL) \
*strval = state->word; \
if (lenval != NULL) \
*lenval = curpos - state->word; \
if (endptr != NULL) \
*endptr = state->prsbuf; \
return true; \
} while(0)
void pfree(void *pointer)
Definition: mcxt.c:992
#define NULL
Definition: c.h:226
Definition: regguts.h:298

Definition at line 103 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITCHARCMPLX   8

Definition at line 131 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITENDCMPLX   4

Definition at line 127 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITENDWORD   2

Definition at line 125 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITNEXTCHAR   3

Definition at line 126 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITPOSDELIM   7

Definition at line 130 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITPOSINFO   5

Definition at line 128 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

#define WAITWORD   1

Definition at line 124 of file tsvector_parser.c.

Referenced by gettoken_tsvector().

Function Documentation

void close_tsvector_parser ( TSVectorParseState  state)

Definition at line 74 of file tsvector_parser.c.

References pfree(), and TSVectorParseStateData::word.

Referenced by parse_tsquery(), and tsvectorin().

75 {
76  pfree(state->word);
77  pfree(state);
78 }
void pfree(void *pointer)
Definition: mcxt.c:992
bool gettoken_tsvector ( TSVectorParseState  state,
char **  strval,
int *  lenval,
WordEntryPos **  pos_ptr,
int *  poslen,
char **  endptr 
)

Definition at line 163 of file tsvector_parser.c.

References Assert, TSVectorParseStateData::bufstart, COPYCHAR, elog, ereport, errcode(), errmsg(), ERROR, INPOSINFO, ISOPERATOR, LIMITPOS, NULL, TSVectorParseStateData::oprisdelim, palloc(), pg_mblen(), TSVectorParseStateData::prsbuf, PRSSYNTAXERROR, repalloc(), RESIZEPRSBUF, RETURN_TOKEN, t_isdigit, t_iseq, t_isspace, WAITCHARCMPLX, WAITENDCMPLX, WAITENDWORD, WAITNEXTCHAR, WAITPOSDELIM, WAITPOSINFO, WAITWORD, WEP_GETPOS, WEP_GETWEIGHT, WEP_SETPOS, WEP_SETWEIGHT, and TSVectorParseStateData::word.

Referenced by gettoken_query(), and tsvectorin().

167 {
168  int oldstate = 0;
169  char *curpos = state->word;
170  int statecode = WAITWORD;
171 
172  /*
173  * pos is for collecting the comma delimited list of positions followed by
174  * the actual token.
175  */
176  WordEntryPos *pos = NULL;
177  int npos = 0; /* elements of pos used */
178  int posalen = 0; /* allocated size of pos */
179 
180  while (1)
181  {
182  if (statecode == WAITWORD)
183  {
184  if (*(state->prsbuf) == '\0')
185  return false;
186  else if (t_iseq(state->prsbuf, '\''))
187  statecode = WAITENDCMPLX;
188  else if (t_iseq(state->prsbuf, '\\'))
189  {
190  statecode = WAITNEXTCHAR;
191  oldstate = WAITENDWORD;
192  }
193  else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
195  else if (!t_isspace(state->prsbuf))
196  {
197  COPYCHAR(curpos, state->prsbuf);
198  curpos += pg_mblen(state->prsbuf);
199  statecode = WAITENDWORD;
200  }
201  }
202  else if (statecode == WAITNEXTCHAR)
203  {
204  if (*(state->prsbuf) == '\0')
205  ereport(ERROR,
206  (errcode(ERRCODE_SYNTAX_ERROR),
207  errmsg("there is no escaped character: \"%s\"",
208  state->bufstart)));
209  else
210  {
211  RESIZEPRSBUF;
212  COPYCHAR(curpos, state->prsbuf);
213  curpos += pg_mblen(state->prsbuf);
214  Assert(oldstate != 0);
215  statecode = oldstate;
216  }
217  }
218  else if (statecode == WAITENDWORD)
219  {
220  if (t_iseq(state->prsbuf, '\\'))
221  {
222  statecode = WAITNEXTCHAR;
223  oldstate = WAITENDWORD;
224  }
225  else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
226  (state->oprisdelim && ISOPERATOR(state->prsbuf)))
227  {
228  RESIZEPRSBUF;
229  if (curpos == state->word)
231  *(curpos) = '\0';
232  RETURN_TOKEN;
233  }
234  else if (t_iseq(state->prsbuf, ':'))
235  {
236  if (curpos == state->word)
238  *(curpos) = '\0';
239  if (state->oprisdelim)
240  RETURN_TOKEN;
241  else
242  statecode = INPOSINFO;
243  }
244  else
245  {
246  RESIZEPRSBUF;
247  COPYCHAR(curpos, state->prsbuf);
248  curpos += pg_mblen(state->prsbuf);
249  }
250  }
251  else if (statecode == WAITENDCMPLX)
252  {
253  if (t_iseq(state->prsbuf, '\''))
254  {
255  statecode = WAITCHARCMPLX;
256  }
257  else if (t_iseq(state->prsbuf, '\\'))
258  {
259  statecode = WAITNEXTCHAR;
260  oldstate = WAITENDCMPLX;
261  }
262  else if (*(state->prsbuf) == '\0')
264  else
265  {
266  RESIZEPRSBUF;
267  COPYCHAR(curpos, state->prsbuf);
268  curpos += pg_mblen(state->prsbuf);
269  }
270  }
271  else if (statecode == WAITCHARCMPLX)
272  {
273  if (t_iseq(state->prsbuf, '\''))
274  {
275  RESIZEPRSBUF;
276  COPYCHAR(curpos, state->prsbuf);
277  curpos += pg_mblen(state->prsbuf);
278  statecode = WAITENDCMPLX;
279  }
280  else
281  {
282  RESIZEPRSBUF;
283  *(curpos) = '\0';
284  if (curpos == state->word)
286  if (state->oprisdelim)
287  {
288  /* state->prsbuf+=pg_mblen(state->prsbuf); */
289  RETURN_TOKEN;
290  }
291  else
292  statecode = WAITPOSINFO;
293  continue; /* recheck current character */
294  }
295  }
296  else if (statecode == WAITPOSINFO)
297  {
298  if (t_iseq(state->prsbuf, ':'))
299  statecode = INPOSINFO;
300  else
301  RETURN_TOKEN;
302  }
303  else if (statecode == INPOSINFO)
304  {
305  if (t_isdigit(state->prsbuf))
306  {
307  if (posalen == 0)
308  {
309  posalen = 4;
310  pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
311  npos = 0;
312  }
313  else if (npos + 1 >= posalen)
314  {
315  posalen *= 2;
316  pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
317  }
318  npos++;
319  WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
320  /* we cannot get here in tsquery, so no need for 2 errmsgs */
321  if (WEP_GETPOS(pos[npos - 1]) == 0)
322  ereport(ERROR,
323  (errcode(ERRCODE_SYNTAX_ERROR),
324  errmsg("wrong position info in tsvector: \"%s\"",
325  state->bufstart)));
326  WEP_SETWEIGHT(pos[npos - 1], 0);
327  statecode = WAITPOSDELIM;
328  }
329  else
331  }
332  else if (statecode == WAITPOSDELIM)
333  {
334  if (t_iseq(state->prsbuf, ','))
335  statecode = INPOSINFO;
336  else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
337  {
338  if (WEP_GETWEIGHT(pos[npos - 1]))
340  WEP_SETWEIGHT(pos[npos - 1], 3);
341  }
342  else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
343  {
344  if (WEP_GETWEIGHT(pos[npos - 1]))
346  WEP_SETWEIGHT(pos[npos - 1], 2);
347  }
348  else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
349  {
350  if (WEP_GETWEIGHT(pos[npos - 1]))
352  WEP_SETWEIGHT(pos[npos - 1], 1);
353  }
354  else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
355  {
356  if (WEP_GETWEIGHT(pos[npos - 1]))
358  WEP_SETWEIGHT(pos[npos - 1], 0);
359  }
360  else if (t_isspace(state->prsbuf) ||
361  *(state->prsbuf) == '\0')
362  RETURN_TOKEN;
363  else if (!t_isdigit(state->prsbuf))
365  }
366  else /* internal error */
367  elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
368  statecode);
369 
370  /* get next char */
371  state->prsbuf += pg_mblen(state->prsbuf);
372  }
373 }
#define COPYCHAR(d, s)
Definition: ts_locale.h:63
uint16 WordEntryPos
Definition: ts_type.h:63
#define t_isspace(x)
Definition: ts_locale.h:58
#define WAITPOSDELIM
int errcode(int sqlerrcode)
Definition: elog.c:575
#define t_isdigit(x)
Definition: ts_locale.h:57
#define WEP_SETPOS(x, v)
Definition: ts_type.h:83
#define ERROR
Definition: elog.h:43
#define WAITCHARCMPLX
#define WEP_GETPOS(x)
Definition: ts_type.h:80
#define ISOPERATOR(x)
#define PRSSYNTAXERROR
#define WEP_SETWEIGHT(x, v)
Definition: ts_type.h:82
#define t_iseq(x, c)
Definition: ts_locale.h:61
#define RETURN_TOKEN
#define ereport(elevel, rest)
Definition: elog.h:122
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:670
#define INPOSINFO
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define WAITNEXTCHAR
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
#define WAITPOSINFO
#define RESIZEPRSBUF
#define WAITENDWORD
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
#define LIMITPOS(x)
Definition: ts_type.h:87
#define elog
Definition: elog.h:219
#define WAITWORD
#define WEP_GETWEIGHT(x)
Definition: ts_type.h:79
#define WAITENDCMPLX
TSVectorParseState init_tsvector_parser ( char *  input,
bool  oprisdelim,
bool  is_tsquery 
)

Definition at line 45 of file tsvector_parser.c.

References TSVectorParseStateData::bufstart, TSVectorParseStateData::eml, TSVectorParseStateData::is_tsquery, TSVectorParseStateData::len, TSVectorParseStateData::oprisdelim, palloc(), pg_database_encoding_max_length(), TSVectorParseStateData::prsbuf, and TSVectorParseStateData::word.

Referenced by parse_tsquery(), and tsvectorin().

46 {
48 
49  state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
50  state->prsbuf = input;
51  state->bufstart = input;
52  state->len = 32;
53  state->word = (char *) palloc(state->len);
55  state->oprisdelim = oprisdelim;
56  state->is_tsquery = is_tsquery;
57 
58  return state;
59 }
struct TSVectorParseStateData * TSVectorParseState
Definition: ts_utils.h:26
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
void * palloc(Size size)
Definition: mcxt.c:891
static void prssyntaxerror ( TSVectorParseState  state)
static

Definition at line 136 of file tsvector_parser.c.

References TSVectorParseStateData::bufstart, ereport, errcode(), errmsg(), ERROR, and TSVectorParseStateData::is_tsquery.

137 {
138  ereport(ERROR,
139  (errcode(ERRCODE_SYNTAX_ERROR),
140  state->is_tsquery ?
141  errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
142  errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
143 }
int errcode(int sqlerrcode)
Definition: elog.c:575
#define ERROR
Definition: elog.h:43
#define ereport(elevel, rest)
Definition: elog.h:122
int errmsg(const char *fmt,...)
Definition: elog.c:797
void reset_tsvector_parser ( TSVectorParseState  state,
char *  input 
)

Definition at line 65 of file tsvector_parser.c.

References TSVectorParseStateData::prsbuf.

Referenced by gettoken_query().

66 {
67  state->prsbuf = input;
68 }