PostgreSQL Source Code  git master
ltxtquery_io.c
Go to the documentation of this file.
1 /*
2  * txtquery io
3  * Teodor Sigaev <teodor@stack.net>
4  * contrib/ltree/ltxtquery_io.c
5  */
6 #include "postgres.h"
7 
8 #include <ctype.h>
9 
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "miscadmin.h"
14 
15 
16 /* parser's states */
17 #define WAITOPERAND 1
18 #define INOPERAND 2
19 #define WAITOPERATOR 3
20 
21 /*
22  * node of query tree, also used
23  * for storing polish notation in parser
24  */
25 typedef struct NODE
26 {
27  int32 type;
28  int32 val;
32  struct NODE *next;
33 } NODE;
34 
35 typedef struct
36 {
37  char *buf;
40  /* reverse polish notation in list (for temporary usage) */
42  /* number in str */
44 
45  /* user-friendly operand */
48  char *op;
49  char *curop;
50 } QPRS_STATE;
51 
52 /*
53  * get token from query string
54  */
55 static int32
56 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
57 {
58  int charlen;
59 
60  for (;;)
61  {
62  charlen = pg_mblen(state->buf);
63 
64  switch (state->state)
65  {
66  case WAITOPERAND:
67  if (charlen == 1 && t_iseq(state->buf, '!'))
68  {
69  (state->buf)++;
70  *val = (int32) '!';
71  return OPR;
72  }
73  else if (charlen == 1 && t_iseq(state->buf, '('))
74  {
75  state->count++;
76  (state->buf)++;
77  return OPEN;
78  }
79  else if (ISALNUM(state->buf))
80  {
81  state->state = INOPERAND;
82  *strval = state->buf;
83  *lenval = charlen;
84  *flag = 0;
85  }
86  else if (!t_isspace(state->buf))
87  ereport(ERROR,
88  (errcode(ERRCODE_SYNTAX_ERROR),
89  errmsg("operand syntax error")));
90  break;
91  case INOPERAND:
92  if (ISALNUM(state->buf))
93  {
94  if (*flag)
95  ereport(ERROR,
96  (errcode(ERRCODE_SYNTAX_ERROR),
97  errmsg("modifiers syntax error")));
98  *lenval += charlen;
99  }
100  else if (charlen == 1 && t_iseq(state->buf, '%'))
101  *flag |= LVAR_SUBLEXEME;
102  else if (charlen == 1 && t_iseq(state->buf, '@'))
103  *flag |= LVAR_INCASE;
104  else if (charlen == 1 && t_iseq(state->buf, '*'))
105  *flag |= LVAR_ANYEND;
106  else
107  {
108  state->state = WAITOPERATOR;
109  return VAL;
110  }
111  break;
112  case WAITOPERATOR:
113  if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
114  {
115  state->state = WAITOPERAND;
116  *val = (int32) *(state->buf);
117  (state->buf)++;
118  return OPR;
119  }
120  else if (charlen == 1 && t_iseq(state->buf, ')'))
121  {
122  (state->buf)++;
123  state->count--;
124  return (state->count < 0) ? ERR : CLOSE;
125  }
126  else if (*(state->buf) == '\0')
127  return (state->count) ? ERR : END;
128  else if (charlen == 1 && !t_iseq(state->buf, ' '))
129  return ERR;
130  break;
131  default:
132  return ERR;
133  break;
134  }
135 
136  state->buf += charlen;
137  }
138 }
139 
140 /*
141  * push new one in polish notation reverse view
142  */
143 static void
145 {
146  NODE *tmp = (NODE *) palloc(sizeof(NODE));
147 
148  tmp->type = type;
149  tmp->val = val;
150  tmp->flag = flag;
151  if (distance > 0xffff)
152  ereport(ERROR,
153  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154  errmsg("value is too big")));
155  if (lenval > 0xff)
156  ereport(ERROR,
157  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
158  errmsg("operand is too long")));
159  tmp->distance = distance;
160  tmp->length = lenval;
161  tmp->next = state->str;
162  state->str = tmp;
163  state->num++;
164 }
165 
166 /*
167  * This function is used for query text parsing
168  */
169 static void
170 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
171 {
172  if (lenval > 0xffff)
173  ereport(ERROR,
174  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
175  errmsg("word is too long")));
176 
177  pushquery(state, type, ltree_crc32_sz(strval, lenval),
178  state->curop - state->op, lenval, flag);
179 
180  while (state->curop - state->op + lenval + 1 >= state->lenop)
181  {
182  int32 tmp = state->curop - state->op;
183 
184  state->lenop *= 2;
185  state->op = (char *) repalloc((void *) state->op, state->lenop);
186  state->curop = state->op + tmp;
187  }
188  memcpy((void *) state->curop, (void *) strval, lenval);
189  state->curop += lenval;
190  *(state->curop) = '\0';
191  state->curop++;
192  state->sumlen += lenval + 1;
193 }
194 
195 #define STACKDEPTH 32
196 /*
197  * make polish notation of query
198  */
199 static int32
201 {
202  int32 val = 0,
203  type;
204  int32 lenval = 0;
205  char *strval = NULL;
206  int32 stack[STACKDEPTH];
207  int32 lenstack = 0;
208  uint16 flag = 0;
209 
210  /* since this function recurses, it could be driven to stack overflow */
212 
213  while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
214  {
215  switch (type)
216  {
217  case VAL:
218  pushval_asis(state, VAL, strval, lenval, flag);
219  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
220  stack[lenstack - 1] == (int32) '!'))
221  {
222  lenstack--;
223  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
224  }
225  break;
226  case OPR:
227  if (lenstack && val == (int32) '|')
228  pushquery(state, OPR, val, 0, 0, 0);
229  else
230  {
231  if (lenstack == STACKDEPTH)
232  /* internal error */
233  elog(ERROR, "stack too short");
234  stack[lenstack] = val;
235  lenstack++;
236  }
237  break;
238  case OPEN:
239  if (makepol(state) == ERR)
240  return ERR;
241  while (lenstack && (stack[lenstack - 1] == (int32) '&' ||
242  stack[lenstack - 1] == (int32) '!'))
243  {
244  lenstack--;
245  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
246  }
247  break;
248  case CLOSE:
249  while (lenstack)
250  {
251  lenstack--;
252  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
253  };
254  return END;
255  break;
256  case ERR:
257  default:
258  ereport(ERROR,
259  (errcode(ERRCODE_SYNTAX_ERROR),
260  errmsg("syntax error")));
261 
262  return ERR;
263 
264  }
265  }
266  while (lenstack)
267  {
268  lenstack--;
269  pushquery(state, OPR, stack[lenstack], 0, 0, 0);
270  };
271  return END;
272 }
273 
274 static void
275 findoprnd(ITEM *ptr, int32 *pos)
276 {
277  /* since this function recurses, it could be driven to stack overflow. */
279 
280  if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
281  {
282  ptr[*pos].left = 0;
283  (*pos)++;
284  }
285  else if (ptr[*pos].val == (int32) '!')
286  {
287  ptr[*pos].left = 1;
288  (*pos)++;
289  findoprnd(ptr, pos);
290  }
291  else
292  {
293  ITEM *curitem = &ptr[*pos];
294  int32 tmp = *pos;
295 
296  (*pos)++;
297  findoprnd(ptr, pos);
298  curitem->left = *pos - tmp;
299  findoprnd(ptr, pos);
300  }
301 }
302 
303 
304 /*
305  * input
306  */
307 static ltxtquery *
308 queryin(char *buf)
309 {
311  int32 i;
312  ltxtquery *query;
313  int32 commonlen;
314  ITEM *ptr;
315  NODE *tmp;
316  int32 pos = 0;
317 
318 #ifdef BS_DEBUG
319  char pbuf[16384],
320  *cur;
321 #endif
322 
323  /* init state */
324  state.buf = buf;
325  state.state = WAITOPERAND;
326  state.count = 0;
327  state.num = 0;
328  state.str = NULL;
329 
330  /* init list of operand */
331  state.sumlen = 0;
332  state.lenop = 64;
333  state.curop = state.op = (char *) palloc(state.lenop);
334  *(state.curop) = '\0';
335 
336  /* parse query & make polish notation (postfix, but in reverse order) */
337  makepol(&state);
338  if (!state.num)
339  ereport(ERROR,
340  (errcode(ERRCODE_SYNTAX_ERROR),
341  errmsg("syntax error"),
342  errdetail("Empty query.")));
343 
344  if (LTXTQUERY_TOO_BIG(state.num, state.sumlen))
345  ereport(ERROR,
346  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
347  errmsg("ltxtquery is too large")));
348  commonlen = COMPUTESIZE(state.num, state.sumlen);
349 
350  query = (ltxtquery *) palloc0(commonlen);
351  SET_VARSIZE(query, commonlen);
352  query->size = state.num;
353  ptr = GETQUERY(query);
354 
355  /* set item in polish notation */
356  for (i = 0; i < state.num; i++)
357  {
358  ptr[i].type = state.str->type;
359  ptr[i].val = state.str->val;
360  ptr[i].distance = state.str->distance;
361  ptr[i].length = state.str->length;
362  ptr[i].flag = state.str->flag;
363  tmp = state.str->next;
364  pfree(state.str);
365  state.str = tmp;
366  }
367 
368  /* set user-friendly operand view */
369  memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
370  pfree(state.op);
371 
372  /* set left operand's position for every operator */
373  pos = 0;
374  findoprnd(ptr, &pos);
375 
376  return query;
377 }
378 
379 /*
380  * in without morphology
381  */
383 Datum
385 {
387 }
388 
389 /*
390  * ltxtquery type recv function
391  *
392  * The type is sent as text in binary mode, so this is almost the same
393  * as the input function, but it's prefixed with a version number so we
394  * can change the binary format sent in future if necessary. For now,
395  * only version 1 is supported.
396  */
398 Datum
400 {
402  int version = pq_getmsgint(buf, 1);
403  char *str;
404  int nbytes;
405  ltxtquery *res;
406 
407  if (version != 1)
408  elog(ERROR, "unsupported ltxtquery version number %d", version);
409 
410  str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
411  res = queryin(str);
412  pfree(str);
413 
414  PG_RETURN_POINTER(res);
415 }
416 
417 /*
418  * out function
419  */
420 typedef struct
421 {
422  ITEM *curpol;
423  char *buf;
424  char *cur;
425  char *op;
426  int32 buflen;
427 } INFIX;
428 
429 #define RESIZEBUF(inf,addsize) \
430 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
431 { \
432  int32 len = (inf)->cur - (inf)->buf; \
433  (inf)->buflen *= 2; \
434  (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
435  (inf)->cur = (inf)->buf + len; \
436 }
437 
438 /*
439  * recursive walk on tree and print it in
440  * infix (human-readable) view
441  */
442 static void
443 infix(INFIX *in, bool first)
444 {
445  /* since this function recurses, it could be driven to stack overflow. */
447 
448  if (in->curpol->type == VAL)
449  {
450  char *op = in->op + in->curpol->distance;
451 
452  RESIZEBUF(in, in->curpol->length * 2 + 5);
453  while (*op)
454  {
455  *(in->cur) = *op;
456  op++;
457  in->cur++;
458  }
459  if (in->curpol->flag & LVAR_SUBLEXEME)
460  {
461  *(in->cur) = '%';
462  in->cur++;
463  }
464  if (in->curpol->flag & LVAR_INCASE)
465  {
466  *(in->cur) = '@';
467  in->cur++;
468  }
469  if (in->curpol->flag & LVAR_ANYEND)
470  {
471  *(in->cur) = '*';
472  in->cur++;
473  }
474  *(in->cur) = '\0';
475  in->curpol++;
476  }
477  else if (in->curpol->val == (int32) '!')
478  {
479  bool isopr = false;
480 
481  RESIZEBUF(in, 1);
482  *(in->cur) = '!';
483  in->cur++;
484  *(in->cur) = '\0';
485  in->curpol++;
486  if (in->curpol->type == OPR)
487  {
488  isopr = true;
489  RESIZEBUF(in, 2);
490  sprintf(in->cur, "( ");
491  in->cur = strchr(in->cur, '\0');
492  }
493  infix(in, isopr);
494  if (isopr)
495  {
496  RESIZEBUF(in, 2);
497  sprintf(in->cur, " )");
498  in->cur = strchr(in->cur, '\0');
499  }
500  }
501  else
502  {
503  int32 op = in->curpol->val;
504  INFIX nrm;
505 
506  in->curpol++;
507  if (op == (int32) '|' && !first)
508  {
509  RESIZEBUF(in, 2);
510  sprintf(in->cur, "( ");
511  in->cur = strchr(in->cur, '\0');
512  }
513 
514  nrm.curpol = in->curpol;
515  nrm.op = in->op;
516  nrm.buflen = 16;
517  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
518 
519  /* get right operand */
520  infix(&nrm, false);
521 
522  /* get & print left operand */
523  in->curpol = nrm.curpol;
524  infix(in, false);
525 
526  /* print operator & right operand */
527  RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
528  sprintf(in->cur, " %c %s", op, nrm.buf);
529  in->cur = strchr(in->cur, '\0');
530  pfree(nrm.buf);
531 
532  if (op == (int32) '|' && !first)
533  {
534  RESIZEBUF(in, 2);
535  sprintf(in->cur, " )");
536  in->cur = strchr(in->cur, '\0');
537  }
538  }
539 }
540 
542 Datum
544 {
545  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
546  INFIX nrm;
547 
548  if (query->size == 0)
549  ereport(ERROR,
550  (errcode(ERRCODE_SYNTAX_ERROR),
551  errmsg("syntax error"),
552  errdetail("Empty query.")));
553 
554  nrm.curpol = GETQUERY(query);
555  nrm.buflen = 32;
556  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
557  *(nrm.cur) = '\0';
558  nrm.op = GETOPERAND(query);
559  infix(&nrm, true);
560 
561  PG_RETURN_POINTER(nrm.buf);
562 }
563 
564 /*
565  * ltxtquery type send function
566  *
567  * The type is sent as text in binary mode, so this is almost the same
568  * as the output function, but it's prefixed with a version number so we
569  * can change the binary format sent in future if necessary. For now,
570  * only version 1 is supported.
571  */
573 Datum
575 {
576  ltxtquery *query = PG_GETARG_LTXTQUERY_P(0);
578  int version = 1;
579  INFIX nrm;
580 
581  if (query->size == 0)
582  ereport(ERROR,
583  (errcode(ERRCODE_SYNTAX_ERROR),
584  errmsg("syntax error"),
585  errdetail("Empty query.")));
586 
587  nrm.curpol = GETQUERY(query);
588  nrm.buflen = 32;
589  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
590  *(nrm.cur) = '\0';
591  nrm.op = GETOPERAND(query);
592  infix(&nrm, true);
593 
594  pq_begintypsend(&buf);
595  pq_sendint8(&buf, version);
596  pq_sendtext(&buf, nrm.buf, strlen(nrm.buf));
597  pfree(nrm.buf);
598 
600 }
signed short int16
Definition: c.h:361
Definition: _int.h:140
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:360
#define LVAR_INCASE
Definition: ltree.h:62
#define WAITOPERAND
Definition: ltxtquery_io.c:17
static int32 makepol(QPRS_STATE *state)
Definition: ltxtquery_io.c:200
Datum ltxtq_send(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:574
Definition: _int_bool.c:26
#define LTXTQUERY_TOO_BIG(size, lenofoperand)
Definition: ltree.h:148
#define ERR
Definition: _int.h:161
unsigned int ltree_crc32_sz(const char *buf, int size)
Definition: crc32.c:23
int32 size
Definition: ltree.h:142
static void pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
Definition: ltxtquery_io.c:170
#define ISALNUM(x)
Definition: ltree.h:116
ITEM * curpol
Definition: _int_bool.c:550
#define LVAR_ANYEND
Definition: ltree.h:61
int32 val
Definition: _int_bool.c:29
int32 count
Definition: ltxtquery_io.c:39
char * op
Definition: ltxtquery_io.c:48
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:328
uint8 length
Definition: ltree.h:131
char * curop
Definition: ltxtquery_io.c:49
StringInfoData * StringInfo
Definition: stringinfo.h:44
char * op
Definition: ltxtquery_io.c:425
PG_FUNCTION_INFO_V1(ltxtq_in)
struct cursor * cur
Definition: ecpg.c:28
char * buf
Definition: ltxtquery_io.c:37
int errcode(int sqlerrcode)
Definition: elog.c:610
void pq_sendtext(StringInfo buf, const char *str, int slen)
Definition: pqformat.c:174
static int32 gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint16 *flag)
Definition: ltxtquery_io.c:56
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:276
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:369
uint8 flag
Definition: ltree.h:129
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:348
struct NODE NODE
#define GETQUERY(x)
Definition: _int.h:157
int32 sumlen
Definition: ltxtquery_io.c:47
struct NODE * next
Definition: _int_bool.c:30
NODE * str
Definition: ltxtquery_io.c:41
signed int int32
Definition: c.h:362
#define GETOPERAND(x)
Definition: ltree.h:151
int16 type
Definition: _int.h:142
int32 state
Definition: ltxtquery_io.c:38
#define sprintf
Definition: port.h:195
unsigned short uint16
Definition: c.h:373
void pfree(void *pointer)
Definition: mcxt.c:1057
#define END
Definition: _int.h:160
#define CLOSE
Definition: _int.h:165
#define ERROR
Definition: elog.h:43
#define VALTRUE
Definition: ltree.h:161
uint16 flag
Definition: ltxtquery_io.c:31
int t_isspace(const char *ptr)
Definition: ts_locale.c:53
static char * buf
Definition: pg_test_fsync.c:68
#define WAITOPERATOR
Definition: ltxtquery_io.c:19
void check_stack_depth(void)
Definition: postgres.c:3312
#define t_iseq(x, c)
Definition: ts_locale.h:46
int errdetail(const char *fmt,...)
Definition: elog.c:957
Datum ltxtq_recv(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:399
int32 val
Definition: _int.h:144
static ltxtquery * queryin(char *buf)
Definition: ltxtquery_io.c:308
#define OPEN
Definition: _int.h:164
char * pq_getmsgtext(StringInfo msg, int rawbytes, int *nbytes)
Definition: pqformat.c:548
static void pushquery(QPRS_STATE *state, int32 type, int32 val, int32 distance, int32 lenval, uint16 flag)
Definition: ltxtquery_io.c:144
void * palloc0(Size size)
Definition: mcxt.c:981
Datum ltxtq_in(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:384
uintptr_t Datum
Definition: postgres.h:367
#define COMPUTESIZE(size)
Definition: _int.h:155
int16 left
Definition: _int.h:143
#define ereport(elevel,...)
Definition: elog.h:144
#define VAL
Definition: _int.h:162
int32 lenop
Definition: ltxtquery_io.c:46
int16 length
Definition: ltxtquery_io.c:30
#define OPR
Definition: _int.h:163
Definition: regguts.h:298
#define LVAR_SUBLEXEME
Definition: ltree.h:63
static void infix(INFIX *in, bool first)
Definition: ltxtquery_io.c:443
int pg_mblen(const char *mbstr)
Definition: mbutils.c:907
#define INOPERAND
Definition: ltxtquery_io.c:18
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1070
#define PG_GETARG_LTXTQUERY_P(n)
Definition: ltree.h:214
#define STACKDEPTH
Definition: ltxtquery_io.c:195
int16 distance
Definition: ltxtquery_io.c:29
char * cur
Definition: _int_bool.c:552
static void findoprnd(ITEM *ptr, int32 *pos)
Definition: ltxtquery_io.c:275
void * palloc(Size size)
Definition: mcxt.c:950
int32 buflen
Definition: _int_bool.c:553
int errmsg(const char *fmt,...)
Definition: elog.c:824
#define elog(elevel,...)
Definition: elog.h:214
int i
#define RESIZEBUF(inf, addsize)
Definition: ltxtquery_io.c:429
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:417
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:329
Datum ltxtq_out(PG_FUNCTION_ARGS)
Definition: ltxtquery_io.c:543
static void pq_sendint8(StringInfo buf, uint8 i)
Definition: pqformat.h:129
uint16 distance
Definition: ltree.h:132
int32 type
Definition: _int_bool.c:28
char * buf
Definition: _int_bool.c:551