PostgreSQL Source Code  git master
ltxtquery_io.c
Go to the documentation of this file.
1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "miscadmin.h"
14 #include "nodes/miscnodes.h"
15 #include "varatt.h"
16 
17 
18 /* parser's states */
19 #define WAITOPERAND 1
20 #define INOPERAND 2
21 #define WAITOPERATOR 3
22 
23 /*
24  * node of query tree, also used
25  * for storing polish notation in parser
26  */
27 typedef struct NODE
28 {
29  int32 type;
30  int32 val;
34  struct NODE *next;
35 } NODE;
36 
37 typedef struct
38 {
39  char *buf;
42  struct Node *escontext;
43  /* reverse polish notation in list (for temporary usage) */
45  /* number in str */
47 
48  /* user-friendly operand */
51  char *op;
52  char *curop;
53 } QPRS_STATE;
54 
55 /*
56  * get token from query string
57  *
58  * caller needs to check if a soft-error was set if the result is ERR.
59  */
60 static int32
61 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
62 {
63  int charlen;
64 
65  for (;;)
66  {
67  charlen = pg_mblen(state->buf);
68 
69  switch (state->state)
70  {
71  case WAITOPERAND:
72  if (t_iseq(state->buf, '!'))
73  {
74  (state->buf)++;
75  *val = (int32) '!';
76  return OPR;
77  }
78  else if (t_iseq(state->buf, '('))
79  {
80  state->count++;
81  (state->buf)++;
82  return OPEN;
83  }
84  else if (ISLABEL(state->buf))
85  {
86  state->state = INOPERAND;
87  *strval = state->buf;
88  *lenval = charlen;
89  *flag = 0;
90  }
91  else if (!t_isspace(state->buf))
92  ereturn(state->escontext, ERR,
93  (errcode(ERRCODE_SYNTAX_ERROR),
94  errmsg("operand syntax error")));
95  break;
96  case INOPERAND:
97  if (ISLABEL(state->buf))
98  {
99  if (*flag)
100  ereturn(state->escontext, ERR,
101  (errcode(ERRCODE_SYNTAX_ERROR),
102  errmsg("modifiers syntax error")));
103  *lenval += charlen;
104  }
105  else if (t_iseq(state->buf, '%'))
106  *flag |= LVAR_SUBLEXEME;
107  else if (t_iseq(state->buf, '@'))
108  *flag |= LVAR_INCASE;
109  else if (t_iseq(state->buf, '*'))
110  *flag |= LVAR_ANYEND;
111  else
112  {
113  state->state = WAITOPERATOR;
114  return VAL;
115  }
116  break;
117  case WAITOPERATOR:
118  if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
119  {
120  state->state = WAITOPERAND;
121  *val = (int32) *(state->buf);
122  (state->buf)++;
123  return OPR;
124  }
125  else if (t_iseq(state->buf, ')'))
126  {
127  (state->buf)++;
128  state->count--;
129  return (state->count < 0) ? ERR : CLOSE;
130  }
131  else if (*(state->buf) == '\0')
132  {
133  return (state->count) ? ERR : END;
134  }
135  else if (!t_iseq(state->buf, ' '))
136  {
137  return ERR;
138  }
139  break;
140  default:
141  return ERR;
142  break;
143  }
144 
145  state->buf += charlen;
146  }
147 
148  /* should not get here */
149 }
150 
151 /*
152  * push new one in polish notation reverse view
153  */
154 static bool
156 {
157  NODE *tmp = (NODE *) palloc(sizeof(NODE));
158 
159  tmp->type = type;
160  tmp->val = val;
161  tmp->flag = flag;
162  if (distance > 0xffff)
163  ereturn(state->escontext, false,
164  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
165  errmsg("value is too big")));
166  if (lenval > 0xff)
167  ereturn(state->escontext, false,
168  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
169  errmsg("operand is too long")));
170  tmp->distance = distance;
171  tmp->length = lenval;
172  tmp->next = state->str;
173  state->str = tmp;
174  state->num++;
175  return true;
176 }
177 
178 /*
179  * This function is used for query text parsing
180  */
181 static bool
182 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
183 {
184  if (lenval > 0xffff)
185  ereturn(state->escontext, false,
186  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
187  errmsg("word is too long")));
188 
189  if (!pushquery(state, type, ltree_crc32_sz(strval, lenval),
190  state->curop - state->op, lenval, flag))
191  return false;
192 
193  while (state->curop - state->op + lenval + 1 >= state->lenop)
194  {
195  int32 tmp = state->curop - state->op;
196 
197  state->lenop *= 2;
198  state->op = (char *) repalloc(state->op, state->lenop);
199  state->curop = state->op + tmp;
200  }
201  memcpy(state->curop, strval, lenval);
202  state->curop += lenval;
203  *(state->curop) = '\0';
204  state->curop++;
205  state->sumlen += lenval + 1;
206  return true;
207 }
208 
209 #define STACKDEPTH 32
210 /*
211  * make polish notation of query
212  */
213 static int32
215 {
216  int32 val = 0,
217  type;
218  int32 lenval = 0;
219  char *strval = NULL;
220  int32 stack[STACKDEPTH];
221  int32 lenstack = 0;
222  uint16 flag = 0;
223 
224  /* since this function recurses, it could be driven to stack overflow */
226 
227  while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
228  {
229  switch (type)
230  {
231  case VAL:
232  if (!pushval_asis(state, VAL, strval, lenval, flag))
233  return ERR;
234  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
235  stack[lenstack - 1] == (int32) '!'))
236  {
237  lenstack--;
238  if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0))
239  return ERR;
240  }
241  break;
242  case OPR:
243  if (lenstack && val == (int32) '|')
244  {
245  if (!pushquery(state, OPR, val, 0, 0, 0))
246  return ERR;
247  }
248  else
249  {
250  if (lenstack == STACKDEPTH)
251  /* internal error */
252  elog(ERROR, "stack too short");
253  stack[lenstack] = val;
254  lenstack++;
255  }
256  break;
257  case OPEN:
258  if (makepol(state) == ERR)
259  return ERR;
260  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
261  stack[lenstack - 1] == (int32) '!'))
262  {
263  lenstack--;
264  if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0))
265  return ERR;
266  }
267  break;
268  case CLOSE:
269  while (lenstack)
270  {
271  lenstack--;
272  if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0))
273  return ERR;
274  };
275  return END;
276  break;
277  case ERR:
278  if (SOFT_ERROR_OCCURRED(state->escontext))
279  return ERR;
280  /* fall through */
281  default:
282  ereturn(state->escontext, ERR,
283  (errcode(ERRCODE_SYNTAX_ERROR),
284  errmsg("syntax error")));
285 
286  }
287  }
288  while (lenstack)
289  {
290  lenstack--;
291  if (!pushquery(state, OPR, stack[lenstack], 0, 0, 0))
292  return ERR;
293  };
294  return END;
295 }
296 
297 static void
298 findoprnd(ITEM *ptr, int32 *pos)
299 {
300  /* since this function recurses, it could be driven to stack overflow. */
302 
303  if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
304  {
305  ptr[*pos].left = 0;
306  (*pos)++;
307  }
308  else if (ptr[*pos].val == (int32) '!')
309  {
310  ptr[*pos].left = 1;
311  (*pos)++;
312  findoprnd(ptr, pos);
313  }
314  else
315  {
316  ITEM *curitem = &ptr[*pos];
317  int32 tmp = *pos;
318 
319  (*pos)++;
320  findoprnd(ptr, pos);
321  curitem->left = *pos - tmp;
322  findoprnd(ptr, pos);
323  }
324 }
325 
326 
327 /*
328  * input
329  */
330 static ltxtquery *
331 queryin(char *buf, struct Node *escontext)
332 {
334  int32 i;
335  ltxtquery *query;
336  int32 commonlen;
337  ITEM *ptr;
338  NODE *tmp;
339  int32 pos = 0;
340 
341 #ifdef BS_DEBUG
342  char pbuf[16384],
343  *cur;
344 #endif
345 
346  /* init state */
347  state.buf = buf;
348  state.state = WAITOPERAND;
349  state.count = 0;
350  state.num = 0;
351  state.str = NULL;
352  state.escontext = escontext;
353 
354  /* init list of operand */
355  state.sumlen = 0;
356  state.lenop = 64;
357  state.curop = state.op = (char *) palloc(state.lenop);
358  *(state.curop) = '\0';
359 
360  /* parse query & make polish notation (postfix, but in reverse order) */
361  if (makepol(&state) == ERR)
362  return NULL;
363  if (!state.num)
364  ereturn(escontext, NULL,
365  (errcode(ERRCODE_SYNTAX_ERROR),
366  errmsg("syntax error"),
367  errdetail("Empty query.")));
368 
369  if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
370  ereturn(escontext, NULL,
371  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
372  errmsg("ltxtquery is too large")));
373  commonlen = COMPUTESIZE(state.num, state.sumlen);
374 
375  query = (ltxtquery *) palloc0(commonlen);
376  SET_VARSIZE(query, commonlen);
377  query->size = state.num;
378  ptr = GETQUERY(query);
379 
380  /* set item in polish notation */
381  for (i = 0; i < state.num; i++)
382  {
383  ptr[i].type = state.str->type;
384  ptr[i].val = state.str->val;
385  ptr[i].distance = state.str->distance;
386  ptr[i].length = state.str->length;
387  ptr[i].flag = state.str->flag;
388  tmp = state.str->next;
389  pfree(state.str);
390  state.str = tmp;
391  }
392 
393  /* set user-friendly operand view */
394  memcpy(GETOPERAND(query), state.op, state.sumlen);
395  pfree(state.op);
396 
397  /* set left operand's position for every operator */
398  pos = 0;
399  findoprnd(ptr, &pos);
400 
401  return query;
402 }
403 
404 /*
405  * in without morphology
406  */
408 Datum
410 {
411  ltxtquery *res;
412 
413  if ((res = queryin((char *) PG_GETARG_POINTER(0), fcinfo->context)) == NULL)
414  PG_RETURN_NULL();
416 }
417 
418 /*
419  * ltxtquery type recv function
420  *
421  * The type is sent as text in binary mode, so this is almost the same
422  * as the input function, but it's prefixed with a version number so we
423  * can change the binary format sent in future if necessary. For now,
424  * only version 1 is supported.
425  */
427 Datum
429 {
431  int version = pq_getmsgint(buf, 1);
432  char *str;
433  int nbytes;
434  ltxtquery *res;
435 
436  if (version != 1)
437  elog(ERROR, "unsupported ltxtquery version number %d", version);
438 
439  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
440  res = queryin(str, NULL);
441  pfree(str);
442 
444 }
445 
446 /*
447  * out function
448  */
449 typedef struct
450 {
451  ITEM *curpol;
452  char *buf;
453  char *cur;
454  char *op;
455  int32 buflen;
456 } INFIX;
457 
458 #define RESIZEBUF(inf,addsize) \
459 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
460 { \
461  int32 len = (inf)->cur - (inf)->buf; \
462  (inf)->buflen *= 2; \
463  (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
464  (inf)->cur = (inf)->buf + len; \
465 }
466 
467 /*
468  * recursive walk on tree and print it in
469  * infix (human-readable) view
470  */
471 static void
472 infix(INFIX *in, bool first)
473 {
474  /* since this function recurses, it could be driven to stack overflow. */
476 
477  if (in->curpol->type == VAL)
478  {
479  char *op = in->op + in->curpol->distance;
480 
481  RESIZEBUF(in, in->curpol->length * 2 + 5);
482  while (*op)
483  {
484  *(in->cur) = *op;
485  op++;
486  in->cur++;
487  }
488  if (in->curpol->flag & LVAR_SUBLEXEME)
489  {
490  *(in->cur) = '%';
491  in->cur++;
492  }
493  if (in->curpol->flag & LVAR_INCASE)
494  {
495  *(in->cur) = '@';
496  in->cur++;
497  }
498  if (in->curpol->flag & LVAR_ANYEND)
499  {
500  *(in->cur) = '*';
501  in->cur++;
502  }
503  *(in->cur) = '\0';
504  in->curpol++;
505  }
506  else if (in->curpol->val == (int32) '!')
507  {
508  bool isopr = false;
509 
510  RESIZEBUF(in, 1);
511  *(in->cur) = '!';
512  in->cur++;
513  *(in->cur) = '\0';
514  in->curpol++;
515  if (in->curpol->type == OPR)
516  {
517  isopr = true;
518  RESIZEBUF(in, 2);
519  sprintf(in->cur, "( ");
520  in->cur = strchr(in->cur, '\0');
521  }
522  infix(in, isopr);
523  if (isopr)
524  {
525  RESIZEBUF(in, 2);
526  sprintf(in->cur, " )");
527  in->cur = strchr(in->cur, '\0');
528  }
529  }
530  else
531  {
532  int32 op = in->curpol->val;
533  INFIX nrm;
534 
535  in->curpol++;
536  if (op == (int32) '|' && !first)
537  {
538  RESIZEBUF(in, 2);
539  sprintf(in->cur, "( ");
540  in->cur = strchr(in->cur, '\0');
541  }
542 
543  nrm.curpol = in->curpol;
544  nrm.op = in->op;
545  nrm.buflen = 16;
546  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
547 
548  /* get right operand */
549  infix(&nrm, false);
550 
551  /* get & print left operand */
552  in->curpol = nrm.curpol;
553  infix(in, false);
554 
555  /* print operator & right operand */
556  RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
557  sprintf(in->cur, " %c %s", op, nrm.buf);
558  in->cur = strchr(in->cur, '\0');
559  pfree(nrm.buf);
560 
561  if (op == (int32) '|' && !first)
562  {
563  RESIZEBUF(in, 2);
564  sprintf(in->cur, " )");
565  in->cur = strchr(in->cur, '\0');
566  }
567  }
568 }
569 
571 Datum
573 {
574  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
575  INFIX nrm;
576 
577  if (query->size == 0)
578  ereport(ERROR,
579  (errcode(ERRCODE_SYNTAX_ERROR),
580  errmsg("syntax error"),
581  errdetail("Empty query.")));
582 
583  nrm.curpol = GETQUERY(query);
584  nrm.buflen = 32;
585  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
586  *(nrm.cur) = '\0';
587  nrm.op = GETOPERAND(query);
588  infix(&nrm, true);
589 
590  PG_RETURN_POINTER(nrm.buf);
591 }
592 
593 /*
594  * ltxtquery type send function
595  *
596  * The type is sent as text in binary mode, so this is almost the same
597  * as the output function, but it's prefixed with a version number so we
598  * can change the binary format sent in future if necessary. For now,
599  * only version 1 is supported.
600  */
602 Datum
604 {
605  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
607  int version = 1;
608  INFIX nrm;
609 
610  if (query->size == 0)
611  ereport(ERROR,
612  (errcode(ERRCODE_SYNTAX_ERROR),
613  errmsg("syntax error"),
614  errdetail("Empty query.")));
615 
616  nrm.curpol = GETQUERY(query);
617  nrm.buflen = 32;
618  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
619  *(nrm.cur) = '\0';
620  nrm.op = GETOPERAND(query);
621  infix(&nrm, true);
622 
624  pq_sendint8(&buf, version);
625  pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
626  pfree(nrm.buf);
627 
629 }
#define CLOSE
Definition: _int.h:165
#define OPEN
Definition: _int.h:164
#define END
Definition: _int.h:160
#define COMPUTESIZE(size)
Definition: _int.h:155
#define OPR
Definition: _int.h:163
#define VAL
Definition: _int.h:162
#define ERR
Definition: _int.h:161
#define GETQUERY(x)
Definition: _int.h:157
unsigned short uint16
Definition: c.h:508
signed short int16
Definition: c.h:496
signed int int32
Definition: c.h:497
unsigned int ltree_crc32_sz(const char *buf, int size)
Definition: crc32.c:24
struct cursor * cur
Definition: ecpg.c:28
int errdetail(const char *fmt,...)
Definition: elog.c:1203
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ereturn(context, dummy_value,...)
Definition: elog.h:277
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:371
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_NULL()
Definition: fmgr.h:345
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
const char * str
long val
Definition: informix.c:689
int i
Definition: isn.c:73
#define VALTRUE
Definition: ltree.h:175
#define PG_GETARG_LTXTQUERY_P(n)
Definition: ltree.h:228
#define LVAR_INCASE
Definition: ltree.h:75
#define LVAR_ANYEND
Definition: ltree.h:74
#define ISLABEL(x)
Definition: ltree.h:130
#define LTXTQUERY_TOO_BIG(size, lenofoperand)
Definition: ltree.h:162
#define GETOPERAND(x)
Definition: ltree.h:165
#define LVAR_SUBLEXEME
Definition: ltree.h:76
static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
Definition: ltxtquery_io.c:61
Datum ltxtq_recv(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:428
Datum ltxtq_send(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:603
static bool pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
Definition: ltxtquery_io.c:155
static ltxtquery * queryin(char *buf, struct Node *escontext)
Definition: ltxtquery_io.c:331
static int32 makepol(QPRS_STATE *state)
Definition: ltxtquery_io.c:214
static bool pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
Definition: ltxtquery_io.c:182
#define RESIZEBUF(inf, addsize)
Definition: ltxtquery_io.c:458
static void findoprnd(ITEM *ptr, int32 *pos)
Definition: ltxtquery_io.c:298
#define WAITOPERAND
Definition: ltxtquery_io.c:19
Datum ltxtq_out(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:572
static void infix(INFIX *in, bool first)
Definition: ltxtquery_io.c:472
#define STACKDEPTH
Definition: ltxtquery_io.c:209
PG_FUNCTION_INFO_V1(ltxtq_in)
#define WAITOPERATOR
Definition: ltxtquery_io.c:21
#define INOPERAND
Definition: ltxtquery_io.c:20
Datum ltxtq_in(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:409
struct NODE NODE
int pg_mblen(const char *mbstr)
Definition: mbutils.c:1023
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
#define SOFT_ERROR_OCCURRED(escontext)
Definition: miscnodes.h:52
static char * buf
Definition: pg_test_fsync.c:73
#define sprintf
Definition: port.h:240
void check_stack_depth(void)
Definition: postgres.c:3564
uintptr_t Datum
Definition: postgres.h:64
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:415
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:172
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:546
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:326
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:346
static void pq_sendint8(StringInfo buf, uint8 i)
Definition: pqformat.h:128
StringInfoData * StringInfo
Definition: stringinfo.h:54
char * buf
Definition: _int_bool.c:553
char * cur
Definition: _int_bool.c:554
int32 buflen
Definition: _int_bool.c:555
ITEM * curpol
Definition: _int_bool.c:552
char * op
Definition: ltxtquery_io.c:454
Definition: _int.h:141
uint16 distance
Definition: ltree.h:146
int16 left
Definition: _int.h:143
uint8 flag
Definition: ltree.h:143
int32 val
Definition: _int.h:144
int16 type
Definition: _int.h:142
uint8 length
Definition: ltree.h:145
Definition: _int_bool.c:27
struct NODE * next
Definition: _int_bool.c:30
int32 val
Definition: _int_bool.c:29
int16 distance
Definition: ltxtquery_io.c:31
uint16 flag
Definition: ltxtquery_io.c:33
int16 length
Definition: ltxtquery_io.c:32
int32 type
Definition: _int_bool.c:28
Definition: nodes.h:129
char * curop
Definition: ltxtquery_io.c:52
char * buf
Definition: ltxtquery_io.c:39
NODE * str
Definition: ltxtquery_io.c:44
int32 lenop
Definition: ltxtquery_io.c:49
char * op
Definition: ltxtquery_io.c:51
struct Node * escontext
Definition: ltxtquery_io.c:42
int32 count
Definition: ltxtquery_io.c:41
int32 sumlen
Definition: ltxtquery_io.c:50
int32 state
Definition: ltxtquery_io.c:40
int32 size
Definition: ltree.h:156
Definition: regguts.h:323
char flag
Definition: regguts.h:326
struct state * next
Definition: regguts.h:332
char * flag(int b)
Definition: test-ctype.c:33
int t_isspace(const char *ptr)
Definition: ts_locale.c:50
#define t_iseq(x, c)
Definition: ts_locale.h:38
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
const char * type