PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
ltxtquery_io.c
Go to the documentation of this file.
1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "ltree.h"
12 #include "miscadmin.h"
13 
16 
17 
18 /* parser's states */
19 #define WAITOPERAND 1
20 #define INOPERAND 2
21 #define WAITOPERATOR 3
22 
23 /*
24  * node of query tree, also used
25  * for storing polish notation in parser
26  */
27 typedef struct NODE
28 {
29  int32 type;
30  int32 val;
34  struct NODE *next;
35 } NODE;
36 
37 typedef struct
38 {
39  char *buf;
42  /* reverse polish notation in list (for temporary usage) */
44  /* number in str */
46 
47  /* user-friendly operand */
50  char *op;
51  char *curop;
52 } QPRS_STATE;
53 
54 /*
55  * get token from query string
56  */
57 static int32
58 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
59 {
60  int charlen;
61 
62  for (;;)
63  {
64  charlen = pg_mblen(state->buf);
65 
66  switch (state->state)
67  {
68  case WAITOPERAND:
69  if (charlen == 1 && t_iseq(state->buf, '!'))
70  {
71  (state->buf)++;
72  *val = (int32) '!';
73  return OPR;
74  }
75  else if (charlen == 1 && t_iseq(state->buf, '('))
76  {
77  state->count++;
78  (state->buf)++;
79  return OPEN;
80  }
81  else if (ISALNUM(state->buf))
82  {
83  state->state = INOPERAND;
84  *strval = state->buf;
85  *lenval = charlen;
86  *flag = 0;
87  }
88  else if (!t_isspace(state->buf))
89  ereport(ERROR,
90  (errcode(ERRCODE_SYNTAX_ERROR),
91  errmsg("operand syntax error")));
92  break;
93  case INOPERAND:
94  if (ISALNUM(state->buf))
95  {
96  if (*flag)
97  ereport(ERROR,
98  (errcode(ERRCODE_SYNTAX_ERROR),
99  errmsg("modifiers syntax error")));
100  *lenval += charlen;
101  }
102  else if (charlen == 1 && t_iseq(state->buf, '%'))
103  *flag |= LVAR_SUBLEXEME;
104  else if (charlen == 1 && t_iseq(state->buf, '@'))
105  *flag |= LVAR_INCASE;
106  else if (charlen == 1 && t_iseq(state->buf, '*'))
107  *flag |= LVAR_ANYEND;
108  else
109  {
110  state->state = WAITOPERATOR;
111  return VAL;
112  }
113  break;
114  case WAITOPERATOR:
115  if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
116  {
117  state->state = WAITOPERAND;
118  *val = (int32) *(state->buf);
119  (state->buf)++;
120  return OPR;
121  }
122  else if (charlen == 1 && t_iseq(state->buf, ')'))
123  {
124  (state->buf)++;
125  state->count--;
126  return (state->count < 0) ? ERR : CLOSE;
127  }
128  else if (*(state->buf) == '\0')
129  return (state->count) ? ERR : END;
130  else if (charlen == 1 && !t_iseq(state->buf, ' '))
131  return ERR;
132  break;
133  default:
134  return ERR;
135  break;
136  }
137 
138  state->buf += charlen;
139  }
140 }
141 
142 /*
143  * push new one in polish notation reverse view
144  */
145 static void
147 {
148  NODE *tmp = (NODE *) palloc(sizeof(NODE));
149 
150  tmp->type = type;
151  tmp->val = val;
152  tmp->flag = flag;
153  if (distance > 0xffff)
154  ereport(ERROR,
155  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
156  errmsg("value is too big")));
157  if (lenval > 0xff)
158  ereport(ERROR,
159  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
160  errmsg("operand is too long")));
161  tmp->distance = distance;
162  tmp->length = lenval;
163  tmp->next = state->str;
164  state->str = tmp;
165  state->num++;
166 }
167 
168 /*
169  * This function is used for query_txt parsing
170  */
171 static void
172 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
173 {
174  if (lenval > 0xffff)
175  ereport(ERROR,
176  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
177  errmsg("word is too long")));
178 
179  pushquery(state, type, ltree_crc32_sz(strval, lenval),
180  state->curop - state->op, lenval, flag);
181 
182  while (state->curop - state->op + lenval + 1 >= state->lenop)
183  {
184  int32 tmp = state->curop - state->op;
185 
186  state->lenop *= 2;
187  state->op = (char *) repalloc((void *) state->op, state->lenop);
188  state->curop = state->op + tmp;
189  }
190  memcpy((void *) state->curop, (void *) strval, lenval);
191  state->curop += lenval;
192  *(state->curop) = '\0';
193  state->curop++;
194  state->sumlen += lenval + 1;
195  return;
196 }
197 
198 #define STACKDEPTH 32
199 /*
200  * make polish notation of query
201  */
202 static int32
204 {
205  int32 val = 0,
206  type;
207  int32 lenval = 0;
208  char *strval = NULL;
209  int32 stack[STACKDEPTH];
210  int32 lenstack = 0;
211  uint16 flag = 0;
212 
213  /* since this function recurses, it could be driven to stack overflow */
215 
216  while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
217  {
218  switch (type)
219  {
220  case VAL:
221  pushval_asis(state, VAL, strval, lenval, flag);
222  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
223  stack[lenstack - 1] == (int32) '!'))
224  {
225  lenstack--;
226  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
227  }
228  break;
229  case OPR:
230  if (lenstack && val == (int32) '|')
231  pushquery(state, OPR, val, 0, 0, 0);
232  else
233  {
234  if (lenstack == STACKDEPTH)
235  /* internal error */
236  elog(ERROR, "stack too short");
237  stack[lenstack] = val;
238  lenstack++;
239  }
240  break;
241  case OPEN:
242  if (makepol(state) == ERR)
243  return ERR;
244  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
245  stack[lenstack - 1] == (int32) '!'))
246  {
247  lenstack--;
248  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
249  }
250  break;
251  case CLOSE:
252  while (lenstack)
253  {
254  lenstack--;
255  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
256  };
257  return END;
258  break;
259  case ERR:
260  default:
261  ereport(ERROR,
262  (errcode(ERRCODE_SYNTAX_ERROR),
263  errmsg("syntax error")));
264 
265  return ERR;
266 
267  }
268  }
269  while (lenstack)
270  {
271  lenstack--;
272  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
273  };
274  return END;
275 }
276 
277 static void
278 findoprnd(ITEM *ptr, int32 *pos)
279 {
280  /* since this function recurses, it could be driven to stack overflow. */
282 
283  if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
284  {
285  ptr[*pos].left = 0;
286  (*pos)++;
287  }
288  else if (ptr[*pos].val == (int32) '!')
289  {
290  ptr[*pos].left = 1;
291  (*pos)++;
292  findoprnd(ptr, pos);
293  }
294  else
295  {
296  ITEM *curitem = &ptr[*pos];
297  int32 tmp = *pos;
298 
299  (*pos)++;
300  findoprnd(ptr, pos);
301  curitem->left = *pos - tmp;
302  findoprnd(ptr, pos);
303  }
304 }
305 
306 
307 /*
308  * input
309  */
310 static ltxtquery *
311 queryin(char *buf)
312 {
314  int32 i;
315  ltxtquery *query;
316  int32 commonlen;
317  ITEM *ptr;
318  NODE *tmp;
319  int32 pos = 0;
320 
321 #ifdef BS_DEBUG
322  char pbuf[16384],
323  *cur;
324 #endif
325 
326  /* init state */
327  state.buf = buf;
328  state.state = WAITOPERAND;
329  state.count = 0;
330  state.num = 0;
331  state.str = NULL;
332 
333  /* init list of operand */
334  state.sumlen = 0;
335  state.lenop = 64;
336  state.curop = state.op = (char *) palloc(state.lenop);
337  *(state.curop) = '\0';
338 
339  /* parse query & make polish notation (postfix, but in reverse order) */
340  makepol(&state);
341  if (!state.num)
342  ereport(ERROR,
343  (errcode(ERRCODE_SYNTAX_ERROR),
344  errmsg("syntax error"),
345  errdetail("Empty query.")));
346 
347  if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
348  ereport(ERROR,
349  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
350  errmsg("ltxtquery is too large")));
351  commonlen = COMPUTESIZE(state.num, state.sumlen);
352 
353  query = (ltxtquery *) palloc0(commonlen);
354  SET_VARSIZE(query, commonlen);
355  query->size = state.num;
356  ptr = GETQUERY(query);
357 
358  /* set item in polish notation */
359  for (i = 0; i < state.num; i++)
360  {
361  ptr[i].type = state.str->type;
362  ptr[i].val = state.str->val;
363  ptr[i].distance = state.str->distance;
364  ptr[i].length = state.str->length;
365  ptr[i].flag = state.str->flag;
366  tmp = state.str->next;
367  pfree(state.str);
368  state.str = tmp;
369  }
370 
371  /* set user friendly-operand view */
372  memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
373  pfree(state.op);
374 
375  /* set left operand's position for every operator */
376  pos = 0;
377  findoprnd(ptr, &pos);
378 
379  return query;
380 }
381 
382 /*
383  * in without morphology
384  */
385 Datum
387 {
389 }
390 
391 /*
392  * out function
393  */
394 typedef struct
395 {
396  ITEM *curpol;
397  char *buf;
398  char *cur;
399  char *op;
400  int32 buflen;
401 } INFIX;
402 
403 #define RESIZEBUF(inf,addsize) \
404 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
405 { \
406  int32 len = (inf)->cur - (inf)->buf; \
407  (inf)->buflen *= 2; \
408  (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
409  (inf)->cur = (inf)->buf + len; \
410 }
411 
412 /*
413  * recursive walk on tree and print it in
414  * infix (human-readable) view
415  */
416 static void
417 infix(INFIX *in, bool first)
418 {
419  /* since this function recurses, it could be driven to stack overflow. */
421 
422  if (in->curpol->type == VAL)
423  {
424  char *op = in->op + in->curpol->distance;
425 
426  RESIZEBUF(in, in->curpol->length * 2 + 5);
427  while (*op)
428  {
429  *(in->cur) = *op;
430  op++;
431  in->cur++;
432  }
433  if (in->curpol->flag & LVAR_SUBLEXEME)
434  {
435  *(in->cur) = '%';
436  in->cur++;
437  }
438  if (in->curpol->flag & LVAR_INCASE)
439  {
440  *(in->cur) = '@';
441  in->cur++;
442  }
443  if (in->curpol->flag & LVAR_ANYEND)
444  {
445  *(in->cur) = '*';
446  in->cur++;
447  }
448  *(in->cur) = '\0';
449  in->curpol++;
450  }
451  else if (in->curpol->val == (int32) '!')
452  {
453  bool isopr = false;
454 
455  RESIZEBUF(in, 1);
456  *(in->cur) = '!';
457  in->cur++;
458  *(in->cur) = '\0';
459  in->curpol++;
460  if (in->curpol->type == OPR)
461  {
462  isopr = true;
463  RESIZEBUF(in, 2);
464  sprintf(in->cur, "( ");
465  in->cur = strchr(in->cur, '\0');
466  }
467  infix(in, isopr);
468  if (isopr)
469  {
470  RESIZEBUF(in, 2);
471  sprintf(in->cur, " )");
472  in->cur = strchr(in->cur, '\0');
473  }
474  }
475  else
476  {
477  int32 op = in->curpol->val;
478  INFIX nrm;
479 
480  in->curpol++;
481  if (op == (int32) '|' && !first)
482  {
483  RESIZEBUF(in, 2);
484  sprintf(in->cur, "( ");
485  in->cur = strchr(in->cur, '\0');
486  }
487 
488  nrm.curpol = in->curpol;
489  nrm.op = in->op;
490  nrm.buflen = 16;
491  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
492 
493  /* get right operand */
494  infix(&nrm, false);
495 
496  /* get & print left operand */
497  in->curpol = nrm.curpol;
498  infix(in, false);
499 
500  /* print operator & right operand */
501  RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
502  sprintf(in->cur, " %c %s", op, nrm.buf);
503  in->cur = strchr(in->cur, '\0');
504  pfree(nrm.buf);
505 
506  if (op == (int32) '|' && !first)
507  {
508  RESIZEBUF(in, 2);
509  sprintf(in->cur, " )");
510  in->cur = strchr(in->cur, '\0');
511  }
512  }
513 }
514 
515 Datum
517 {
518  ltxtquery *query = PG_GETARG_LTXTQUERY(0);
519  INFIX nrm;
520 
521  if (query->size == 0)
522  ereport(ERROR,
523  (errcode(ERRCODE_SYNTAX_ERROR),
524  errmsg("syntax error"),
525  errdetail("Empty query.")));
526 
527  nrm.curpol = GETQUERY(query);
528  nrm.buflen = 32;
529  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
530  *(nrm.cur) = '\0';
531  nrm.op = GETOPERAND(query);
532  infix(&nrm, true);
533 
534  PG_FREE_IF_COPY(query, 0);
535  PG_RETURN_POINTER(nrm.buf);
536 }
#define t_isspace(x)
Definition: ts_locale.h:58
signed short int16
Definition: c.h:255
Definition: _int.h:125
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:313
#define LVAR_INCASE
Definition: ltree.h:44
#define WAITOPERAND
Definition: ltxtquery_io.c:19
static int32 makepol(QPRS_STATE *state)
Definition: ltxtquery_io.c:203
Definition: _int_bool.c:27
#define LTXTQUERY_TOO_BIG(size, lenofoperand)
Definition: ltree.h:115
#define ERR
Definition: _int.h:146
int32 size
Definition: ltree.h:109
static void pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
Definition: ltxtquery_io.c:172
#define ISALNUM(x)
Definition: ltree.h:83
ITEM * curpol
Definition: _int_bool.c:551
#define PG_GETARG_LTXTQUERY(x)
Definition: ltree.h:172
#define LVAR_ANYEND
Definition: ltree.h:43
int32 val
Definition: _int_bool.c:30
int32 count
Definition: ltxtquery_io.c:41
char * op
Definition: ltxtquery_io.c:50
uint8 length
Definition: ltree.h:98
char * curop
Definition: ltxtquery_io.c:51
char * op
Definition: ltxtquery_io.c:399
PG_FUNCTION_INFO_V1(ltxtq_in)
struct cursor * cur
Definition: ecpg.c:28
char * buf
Definition: ltxtquery_io.c:39
int errcode(int sqlerrcode)
Definition: elog.c:575
static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
Definition: ltxtquery_io.c:58
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:233
uint8 flag
Definition: ltree.h:96
struct NODE NODE
unsigned int ltree_crc32_sz(char *buf, int size)
Definition: crc32.c:23
#define GETQUERY(x)
Definition: _int.h:142
int32 sumlen
Definition: ltxtquery_io.c:49
struct NODE * next
Definition: _int_bool.c:31
NODE * str
Definition: ltxtquery_io.c:43
signed int int32
Definition: c.h:256
#define GETOPERAND(x)
Definition: ltree.h:118
int16 type
Definition: _int.h:127
int32 state
Definition: ltxtquery_io.c:40
unsigned short uint16
Definition: c.h:267
void pfree(void *pointer)
Definition: mcxt.c:950
#define END
Definition: _int.h:145
#define CLOSE
Definition: _int.h:150
#define ERROR
Definition: elog.h:43
#define VALTRUE
Definition: ltree.h:128
uint16 flag
Definition: ltxtquery_io.c:33
static char * buf
Definition: pg_test_fsync.c:65
#define WAITOPERATOR
Definition: ltxtquery_io.c:21
void check_stack_depth(void)
Definition: postgres.c:3098
#define t_iseq(x, c)
Definition: ts_locale.h:61
char * flag(int b)
Definition: test-ctype.c:33
int errdetail(const char *fmt,...)
Definition: elog.c:873
int32 val
Definition: _int.h:129
static ltxtquery * queryin(char *buf)
Definition: ltxtquery_io.c:311
#define ereport(elevel, rest)
Definition: elog.h:122
#define OPEN
Definition: _int.h:149
static void pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
Definition: ltxtquery_io.c:146
void * palloc0(Size size)
Definition: mcxt.c:878
Datum ltxtq_in(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:386
uintptr_t Datum
Definition: postgres.h:372
#define COMPUTESIZE(size)
Definition: _int.h:140
int16 left
Definition: _int.h:128
#define VAL
Definition: _int.h:147
int32 lenop
Definition: ltxtquery_io.c:48
int16 length
Definition: ltxtquery_io.c:32
#define NULL
Definition: c.h:229
#define OPR
Definition: _int.h:148
Definition: regguts.h:298
#define LVAR_SUBLEXEME
Definition: ltree.h:45
static void infix(INFIX *in, bool first)
Definition: ltxtquery_io.c:417
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:217
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
#define INOPERAND
Definition: ltxtquery_io.c:20
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
#define STACKDEPTH
Definition: ltxtquery_io.c:198
int16 distance
Definition: ltxtquery_io.c:31
char * cur
Definition: _int_bool.c:553
static void findoprnd(ITEM *ptr, int32 *pos)
Definition: ltxtquery_io.c:278
void * palloc(Size size)
Definition: mcxt.c:849
int32 buflen
Definition: _int_bool.c:554
int errmsg(const char *fmt,...)
Definition: elog.c:797
int i
#define RESIZEBUF(inf, addsize)
Definition: ltxtquery_io.c:403
#define PG_FUNCTION_ARGS
Definition: fmgr.h:150
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
Datum ltxtq_out(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:516
#define elog
Definition: elog.h:219
long val
Definition: informix.c:689
uint16 distance
Definition: ltree.h:99
int32 type
Definition: _int_bool.c:29
char * buf
Definition: _int_bool.c:552