PostgreSQL Source Code  git master
ltxtquery_io.c
Go to the documentation of this file.
1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "miscadmin.h"
14 
15 
16 /* parser's states */
17 #define WAITOPERAND 1
18 #define INOPERAND 2
19 #define WAITOPERATOR 3
20 
21 /*
22  * node of query tree, also used
23  * for storing polish notation in parser
24  */
25 typedef struct NODE
26 {
27  int32 type;
28  int32 val;
32  struct NODE *next;
33 } NODE;
34 
35 typedef struct
36 {
37  char *buf;
40  /* reverse polish notation in list (for temporary usage) */
42  /* number in str */
44 
45  /* user-friendly operand */
48  char *op;
49  char *curop;
50 } QPRS_STATE;
51 
52 /*
53  * get token from query string
54  */
55 static int32
56 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
57 {
58  int charlen;
59 
60  for (;;)
61  {
62  charlen = pg_mblen(state->buf);
63 
64  switch (state->state)
65  {
66  case WAITOPERAND:
67  if (t_iseq(state->buf, '!'))
68  {
69  (state->buf)++;
70  *val = (int32) '!';
71  return OPR;
72  }
73  else if (t_iseq(state->buf, '('))
74  {
75  state->count++;
76  (state->buf)++;
77  return OPEN;
78  }
79  else if (ISALNUM(state->buf))
80  {
81  state->state = INOPERAND;
82  *strval = state->buf;
83  *lenval = charlen;
84  *flag = 0;
85  }
86  else if (!t_isspace(state->buf))
87  ereport(ERROR,
88  (errcode(ERRCODE_SYNTAX_ERROR),
89  errmsg("operand syntax error")));
90  break;
91  case INOPERAND:
92  if (ISALNUM(state->buf))
93  {
94  if (*flag)
95  ereport(ERROR,
96  (errcode(ERRCODE_SYNTAX_ERROR),
97  errmsg("modifiers syntax error")));
98  *lenval += charlen;
99  }
100  else if (t_iseq(state->buf, '%'))
101  *flag |= LVAR_SUBLEXEME;
102  else if (t_iseq(state->buf, '@'))
103  *flag |= LVAR_INCASE;
104  else if (t_iseq(state->buf, '*'))
105  *flag |= LVAR_ANYEND;
106  else
107  {
108  state->state = WAITOPERATOR;
109  return VAL;
110  }
111  break;
112  case WAITOPERATOR:
113  if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
114  {
115  state->state = WAITOPERAND;
116  *val = (int32) *(state->buf);
117  (state->buf)++;
118  return OPR;
119  }
120  else if (t_iseq(state->buf, ')'))
121  {
122  (state->buf)++;
123  state->count--;
124  return (state->count < 0) ? ERR : CLOSE;
125  }
126  else if (*(state->buf) == '\0')
127  return (state->count) ? ERR : END;
128  else if (!t_iseq(state->buf, ' '))
129  return ERR;
130  break;
131  default:
132  return ERR;
133  break;
134  }
135 
136  state->buf += charlen;
137  }
138 }
139 
140 /*
141  * push new one in polish notation reverse view
142  */
143 static void
145 {
146  NODE *tmp = (NODE *) palloc(sizeof(NODE));
147 
148  tmp->type = type;
149  tmp->val = val;
150  tmp->flag = flag;
151  if (distance > 0xffff)
152  ereport(ERROR,
153  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154  errmsg("value is too big")));
155  if (lenval > 0xff)
156  ereport(ERROR,
157  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
158  errmsg("operand is too long")));
159  tmp->distance = distance;
160  tmp->length = lenval;
161  tmp->next = state->str;
162  state->str = tmp;
163  state->num++;
164 }
165 
166 /*
167  * This function is used for query text parsing
168  */
169 static void
170 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
171 {
172  if (lenval > 0xffff)
173  ereport(ERROR,
174  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
175  errmsg("word is too long")));
176 
177  pushquery(state, type, ltree_crc32_sz(strval, lenval),
178  state->curop - state->op, lenval, flag);
179 
180  while (state->curop - state->op + lenval + 1 >= state->lenop)
181  {
182  int32 tmp = state->curop - state->op;
183 
184  state->lenop *= 2;
185  state->op = (char *) repalloc((void *) state->op, state->lenop);
186  state->curop = state->op + tmp;
187  }
188  memcpy((void *) state->curop, (void *) strval, lenval);
189  state->curop += lenval;
190  *(state->curop) = '\0';
191  state->curop++;
192  state->sumlen += lenval + 1;
193 }
194 
195 #define STACKDEPTH 32
196 /*
197  * make polish notation of query
198  */
199 static int32
201 {
202  int32 val = 0,
203  type;
204  int32 lenval = 0;
205  char *strval = NULL;
206  int32 stack[STACKDEPTH];
207  int32 lenstack = 0;
208  uint16 flag = 0;
209 
210  /* since this function recurses, it could be driven to stack overflow */
212 
213  while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
214  {
215  switch (type)
216  {
217  case VAL:
218  pushval_asis(state, VAL, strval, lenval, flag);
219  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
220  stack[lenstack - 1] == (int32) '!'))
221  {
222  lenstack--;
223  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
224  }
225  break;
226  case OPR:
227  if (lenstack && val == (int32) '|')
228  pushquery(state, OPR, val, 0, 0, 0);
229  else
230  {
231  if (lenstack == STACKDEPTH)
232  /* internal error */
233  elog(ERROR, "stack too short");
234  stack[lenstack] = val;
235  lenstack++;
236  }
237  break;
238  case OPEN:
239  if (makepol(state) == ERR)
240  return ERR;
241  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
242  stack[lenstack - 1] == (int32) '!'))
243  {
244  lenstack--;
245  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
246  }
247  break;
248  case CLOSE:
249  while (lenstack)
250  {
251  lenstack--;
252  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
253  };
254  return END;
255  break;
256  case ERR:
257  default:
258  ereport(ERROR,
259  (errcode(ERRCODE_SYNTAX_ERROR),
260  errmsg("syntax error")));
261 
262  return ERR;
263  }
264  }
265  while (lenstack)
266  {
267  lenstack--;
268  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
269  };
270  return END;
271 }
272 
273 static void
274 findoprnd(ITEM *ptr, int32 *pos)
275 {
276  /* since this function recurses, it could be driven to stack overflow. */
278 
279  if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
280  {
281  ptr[*pos].left = 0;
282  (*pos)++;
283  }
284  else if (ptr[*pos].val == (int32) '!')
285  {
286  ptr[*pos].left = 1;
287  (*pos)++;
288  findoprnd(ptr, pos);
289  }
290  else
291  {
292  ITEM *curitem = &ptr[*pos];
293  int32 tmp = *pos;
294 
295  (*pos)++;
296  findoprnd(ptr, pos);
297  curitem->left = *pos - tmp;
298  findoprnd(ptr, pos);
299  }
300 }
301 
302 
303 /*
304  * input
305  */
306 static ltxtquery *
307 queryin(char *buf)
308 {
310  int32 i;
311  ltxtquery *query;
312  int32 commonlen;
313  ITEM *ptr;
314  NODE *tmp;
315  int32 pos = 0;
316 
317 #ifdef BS_DEBUG
318  char pbuf[16384],
319  *cur;
320 #endif
321 
322  /* init state */
323  state.buf = buf;
324  state.state = WAITOPERAND;
325  state.count = 0;
326  state.num = 0;
327  state.str = NULL;
328 
329  /* init list of operand */
330  state.sumlen = 0;
331  state.lenop = 64;
332  state.curop = state.op = (char *) palloc(state.lenop);
333  *(state.curop) = '\0';
334 
335  /* parse query & make polish notation (postfix, but in reverse order) */
336  makepol(&state);
337  if (!state.num)
338  ereport(ERROR,
339  (errcode(ERRCODE_SYNTAX_ERROR),
340  errmsg("syntax error"),
341  errdetail("Empty query.")));
342 
343  if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
344  ereport(ERROR,
345  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
346  errmsg("ltxtquery is too large")));
347  commonlen = COMPUTESIZE(state.num, state.sumlen);
348 
349  query = (ltxtquery *) palloc0(commonlen);
350  SET_VARSIZE(query, commonlen);
351  query->size = state.num;
352  ptr = GETQUERY(query);
353 
354  /* set item in polish notation */
355  for (i = 0; i < state.num; i++)
356  {
357  ptr[i].type = state.str->type;
358  ptr[i].val = state.str->val;
359  ptr[i].distance = state.str->distance;
360  ptr[i].length = state.str->length;
361  ptr[i].flag = state.str->flag;
362  tmp = state.str->next;
363  pfree(state.str);
364  state.str = tmp;
365  }
366 
367  /* set user-friendly operand view */
368  memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
369  pfree(state.op);
370 
371  /* set left operand's position for every operator */
372  pos = 0;
373  findoprnd(ptr, &pos);
374 
375  return query;
376 }
377 
378 /*
379  * in without morphology
380  */
382 Datum
384 {
386 }
387 
388 /*
389  * ltxtquery type recv function
390  *
391  * The type is sent as text in binary mode, so this is almost the same
392  * as the input function, but it's prefixed with a version number so we
393  * can change the binary format sent in future if necessary. For now,
394  * only version 1 is supported.
395  */
397 Datum
399 {
401  int version = pq_getmsgint(buf, 1);
402  char *str;
403  int nbytes;
404  ltxtquery *res;
405 
406  if (version != 1)
407  elog(ERROR, "unsupported ltxtquery version number %d", version);
408 
409  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
410  res = queryin(str);
411  pfree(str);
412 
414 }
415 
416 /*
417  * out function
418  */
419 typedef struct
420 {
421  ITEM *curpol;
422  char *buf;
423  char *cur;
424  char *op;
425  int32 buflen;
426 } INFIX;
427 
428 #define RESIZEBUF(inf,addsize) \
429 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
430 { \
431  int32 len = (inf)->cur - (inf)->buf; \
432  (inf)->buflen *= 2; \
433  (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
434  (inf)->cur = (inf)->buf + len; \
435 }
436 
437 /*
438  * recursive walk on tree and print it in
439  * infix (human-readable) view
440  */
441 static void
442 infix(INFIX *in, bool first)
443 {
444  /* since this function recurses, it could be driven to stack overflow. */
446 
447  if (in->curpol->type == VAL)
448  {
449  char *op = in->op + in->curpol->distance;
450 
451  RESIZEBUF(in, in->curpol->length * 2 + 5);
452  while (*op)
453  {
454  *(in->cur) = *op;
455  op++;
456  in->cur++;
457  }
458  if (in->curpol->flag & LVAR_SUBLEXEME)
459  {
460  *(in->cur) = '%';
461  in->cur++;
462  }
463  if (in->curpol->flag & LVAR_INCASE)
464  {
465  *(in->cur) = '@';
466  in->cur++;
467  }
468  if (in->curpol->flag & LVAR_ANYEND)
469  {
470  *(in->cur) = '*';
471  in->cur++;
472  }
473  *(in->cur) = '\0';
474  in->curpol++;
475  }
476  else if (in->curpol->val == (int32) '!')
477  {
478  bool isopr = false;
479 
480  RESIZEBUF(in, 1);
481  *(in->cur) = '!';
482  in->cur++;
483  *(in->cur) = '\0';
484  in->curpol++;
485  if (in->curpol->type == OPR)
486  {
487  isopr = true;
488  RESIZEBUF(in, 2);
489  sprintf(in->cur, "( ");
490  in->cur = strchr(in->cur, '\0');
491  }
492  infix(in, isopr);
493  if (isopr)
494  {
495  RESIZEBUF(in, 2);
496  sprintf(in->cur, " )");
497  in->cur = strchr(in->cur, '\0');
498  }
499  }
500  else
501  {
502  int32 op = in->curpol->val;
503  INFIX nrm;
504 
505  in->curpol++;
506  if (op == (int32) '|' && !first)
507  {
508  RESIZEBUF(in, 2);
509  sprintf(in->cur, "( ");
510  in->cur = strchr(in->cur, '\0');
511  }
512 
513  nrm.curpol = in->curpol;
514  nrm.op = in->op;
515  nrm.buflen = 16;
516  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
517 
518  /* get right operand */
519  infix(&nrm, false);
520 
521  /* get & print left operand */
522  in->curpol = nrm.curpol;
523  infix(in, false);
524 
525  /* print operator & right operand */
526  RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
527  sprintf(in->cur, " %c %s", op, nrm.buf);
528  in->cur = strchr(in->cur, '\0');
529  pfree(nrm.buf);
530 
531  if (op == (int32) '|' && !first)
532  {
533  RESIZEBUF(in, 2);
534  sprintf(in->cur, " )");
535  in->cur = strchr(in->cur, '\0');
536  }
537  }
538 }
539 
541 Datum
543 {
544  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
545  INFIX nrm;
546 
547  if (query->size == 0)
548  ereport(ERROR,
549  (errcode(ERRCODE_SYNTAX_ERROR),
550  errmsg("syntax error"),
551  errdetail("Empty query.")));
552 
553  nrm.curpol = GETQUERY(query);
554  nrm.buflen = 32;
555  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
556  *(nrm.cur) = '\0';
557  nrm.op = GETOPERAND(query);
558  infix(&nrm, true);
559 
560  PG_RETURN_POINTER(nrm.buf);
561 }
562 
563 /*
564  * ltxtquery type send function
565  *
566  * The type is sent as text in binary mode, so this is almost the same
567  * as the output function, but it's prefixed with a version number so we
568  * can change the binary format sent in future if necessary. For now,
569  * only version 1 is supported.
570  */
572 Datum
574 {
575  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
577  int version = 1;
578  INFIX nrm;
579 
580  if (query->size == 0)
581  ereport(ERROR,
582  (errcode(ERRCODE_SYNTAX_ERROR),
583  errmsg("syntax error"),
584  errdetail("Empty query.")));
585 
586  nrm.curpol = GETQUERY(query);
587  nrm.buflen = 32;
588  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
589  *(nrm.cur) = '\0';
590  nrm.op = GETOPERAND(query);
591  infix(&nrm, true);
592 
594  pq_sendint8(&buf, version);
595  pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
596  pfree(nrm.buf);
597 
599 }
#define CLOSE
Definition: _int.h:165
#define OPEN
Definition: _int.h:164
#define END
Definition: _int.h:160
#define COMPUTESIZE(size)
Definition: _int.h:155
#define OPR
Definition: _int.h:163
#define VAL
Definition: _int.h:162
#define ERR
Definition: _int.h:161
#define GETQUERY(x)
Definition: _int.h:157
unsigned short uint16
Definition: c.h:441
signed short int16
Definition: c.h:429
signed int int32
Definition: c.h:430
unsigned int ltree_crc32_sz(const char *buf, int size)
Definition: crc32.c:24
struct cursor * cur
Definition: ecpg.c:28
int errdetail(const char *fmt,...)
Definition: elog.c:1039
int errcode(int sqlerrcode)
Definition: elog.c:695
int errmsg(const char *fmt,...)
Definition: elog.c:906
#define ERROR
Definition: elog.h:35
#define ereport(elevel,...)
Definition: elog.h:145
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
long val
Definition: informix.c:664
int i
Definition: isn.c:73
#define VALTRUE
Definition: ltree.h:174
#define PG_GETARG_LTXTQUERY_P(n)
Definition: ltree.h:227
#define ISALNUM(x)
Definition: ltree.h:129
#define LVAR_INCASE
Definition: ltree.h:75
#define LVAR_ANYEND
Definition: ltree.h:74
#define LTXTQUERY_TOO_BIG(size, lenofoperand)
Definition: ltree.h:161
#define GETOPERAND(x)
Definition: ltree.h:164
#define LVAR_SUBLEXEME
Definition: ltree.h:76
static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
Definition: ltxtquery_io.c:56
Datum ltxtq_recv(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:398
Datum ltxtq_send(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:573
static int32 makepol(QPRS_STATE *state)
Definition: ltxtquery_io.c:200
static void pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
Definition: ltxtquery_io.c:170
#define RESIZEBUF(inf, addsize)
Definition: ltxtquery_io.c:428
static void findoprnd(ITEM *ptr, int32 *pos)
Definition: ltxtquery_io.c:274
#define WAITOPERAND
Definition: ltxtquery_io.c:17
Datum ltxtq_out(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:542
static ltxtquery * queryin(char *buf)
Definition: ltxtquery_io.c:307
static void infix(INFIX *in, bool first)
Definition: ltxtquery_io.c:442
#define STACKDEPTH
Definition: ltxtquery_io.c:195
PG_FUNCTION_INFO_V1(ltxtq_in)
#define WAITOPERATOR
Definition: ltxtquery_io.c:19
static void pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
Definition: ltxtquery_io.c:144
#define INOPERAND
Definition: ltxtquery_io.c:18
Datum ltxtq_in(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:383
struct NODE NODE
int pg_mblen(const char *mbstr)
Definition: mbutils.c:966
void pfree(void *pointer)
Definition: mcxt.c:1306
void * palloc0(Size size)
Definition: mcxt.c:1230
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1321
void * palloc(Size size)
Definition: mcxt.c:1199
static char * buf
Definition: pg_test_fsync.c:67
#define sprintf
Definition: port.h:240
void check_stack_depth(void)
Definition: postgres.c:3440
uintptr_t Datum
Definition: postgres.h:412
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:343
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
static void pq_sendint8(StringInfo buf, uint8 i)
Definition: pqformat.h:129
StringInfoData * StringInfo
Definition: stringinfo.h:44
char * buf
Definition: _int_bool.c:550
char * cur
Definition: _int_bool.c:551
int32 buflen
Definition: _int_bool.c:552
ITEM * curpol
Definition: _int_bool.c:549
char * op
Definition: ltxtquery_io.c:424
Definition: _int.h:141
uint16 distance
Definition: ltree.h:145
int16 left
Definition: _int.h:143
uint8 flag
Definition: ltree.h:142
int32 val
Definition: _int.h:144
int16 type
Definition: _int.h:142
uint8 length
Definition: ltree.h:144
Definition: _int_bool.c:27
struct NODE * next
Definition: _int_bool.c:30
int32 val
Definition: _int_bool.c:29
int16 distance
Definition: ltxtquery_io.c:29
uint16 flag
Definition: ltxtquery_io.c:31
int16 length
Definition: ltxtquery_io.c:30
int32 type
Definition: _int_bool.c:28
char * curop
Definition: ltxtquery_io.c:49
char * buf
Definition: ltxtquery_io.c:37
NODE * str
Definition: ltxtquery_io.c:41
int32 lenop
Definition: ltxtquery_io.c:46
char * op
Definition: ltxtquery_io.c:48
int32 count
Definition: ltxtquery_io.c:39
int32 sumlen
Definition: ltxtquery_io.c:47
int32 state
Definition: ltxtquery_io.c:38
int32 size
Definition: ltree.h:155
Definition: regguts.h:318
char flag
Definition: regguts.h:321
struct state * next
Definition: regguts.h:327
char * flag(int b)
Definition: test-ctype.c:33
int t_isspace(const char *ptr)
Definition: ts_locale.c:53
#define t_iseq(x, c)
Definition: ts_locale.h:38