PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
tsquery.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsquery.c
4  * I/O functions for tsquery
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsquery.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "libpq/pqformat.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_type.h"
20 #include "tsearch/ts_locale.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 #include "utils/memutils.h"
24 #include "utils/pg_crc.h"
25 
26 /* FTS operator priorities, see ts_type.h */
28 {
29  4, /* OP_NOT */
30  2, /* OP_AND */
31  1, /* OP_OR */
32  3 /* OP_PHRASE */
33 };
34 
36 {
37  /* State for gettoken_query */
38  char *buffer; /* entire string we are scanning */
39  char *buf; /* current scan point */
40  int state;
41  int count; /* nesting count, incremented by (,
42  * decremented by ) */
43 
44  /* polish (prefix) notation in list, filled in by push* functions */
46 
47  /*
48  * Strings from operands are collected in op. curop is a pointer to the
49  * end of used space of op.
50  */
51  char *op;
52  char *curop;
53  int lenop; /* allocated size of op */
54  int sumlen; /* used size of op */
55 
56  /* state for value's parser */
58 };
59 
60 /* parser's states */
61 #define WAITOPERAND 1
62 #define WAITOPERATOR 2
63 #define WAITFIRSTOPERAND 3
64 #define WAITSINGLEOPERAND 4
65 
66 /*
67  * subroutine to parse the modifiers (weight and prefix flag currently)
68  * part, like ':AB*' of a query.
69  */
70 static char *
71 get_modifiers(char *buf, int16 *weight, bool *prefix)
72 {
73  *weight = 0;
74  *prefix = false;
75 
76  if (!t_iseq(buf, ':'))
77  return buf;
78 
79  buf++;
80  while (*buf && pg_mblen(buf) == 1)
81  {
82  switch (*buf)
83  {
84  case 'a':
85  case 'A':
86  *weight |= 1 << 3;
87  break;
88  case 'b':
89  case 'B':
90  *weight |= 1 << 2;
91  break;
92  case 'c':
93  case 'C':
94  *weight |= 1 << 1;
95  break;
96  case 'd':
97  case 'D':
98  *weight |= 1;
99  break;
100  case '*':
101  *prefix = true;
102  break;
103  default:
104  return buf;
105  }
106  buf++;
107  }
108 
109  return buf;
110 }
111 
112 /*
113  * Parse phrase operator. The operator
114  * may take the following forms:
115  *
116  * a <N> b (distance is exactly N lexemes)
117  * a <-> b (default distance = 1)
118  *
119  * The buffer should begin with '<' char
120  */
121 static char *
122 parse_phrase_operator(char *buf, int16 *distance)
123 {
124  enum
125  {
126  PHRASE_OPEN = 0,
127  PHRASE_DIST,
128  PHRASE_CLOSE,
129  PHRASE_ERR,
130  PHRASE_FINISH
131  } state = PHRASE_OPEN;
132  char *ptr = buf;
133  char *endptr;
134  long l = 1; /* default distance */
135 
136  while (*ptr)
137  {
138  switch (state)
139  {
140  case PHRASE_OPEN:
141  Assert(t_iseq(ptr, '<'));
142  state = PHRASE_DIST;
143  ptr++;
144  break;
145 
146  case PHRASE_DIST:
147  if (t_iseq(ptr, '-'))
148  {
149  state = PHRASE_CLOSE;
150  ptr++;
151  break;
152  }
153  if (!t_isdigit(ptr))
154  {
155  state = PHRASE_ERR;
156  break;
157  }
158 
159  errno = 0;
160  l = strtol(ptr, &endptr, 10);
161  if (ptr == endptr)
162  state = PHRASE_ERR;
163  else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
164  ereport(ERROR,
165  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
166  errmsg("distance in phrase operator should not be greater than %d",
167  MAXENTRYPOS)));
168  else
169  {
170  state = PHRASE_CLOSE;
171  ptr = endptr;
172  }
173  break;
174 
175  case PHRASE_CLOSE:
176  if (t_iseq(ptr, '>'))
177  {
178  state = PHRASE_FINISH;
179  ptr++;
180  }
181  else
182  state = PHRASE_ERR;
183  break;
184 
185  case PHRASE_FINISH:
186  *distance = (int16) l;
187  return ptr;
188 
189  case PHRASE_ERR:
190  default:
191  goto err;
192  }
193  }
194 
195 err:
196  *distance = -1;
197  return buf;
198 }
199 
200 /*
201  * token types for parsing
202  */
203 typedef enum
204 {
205  PT_END = 0,
206  PT_ERR = 1,
207  PT_VAL = 2,
208  PT_OPR = 3,
209  PT_OPEN = 4,
211 } ts_tokentype;
212 
213 /*
214  * get token from query string
215  *
216  * *operator is filled in with OP_* when return values is PT_OPR,
217  * but *weight could contain a distance value in case of phrase operator.
218  * *strval, *lenval and *weight are filled in when return value is PT_VAL
219  *
220  */
221 static ts_tokentype
223  int8 *operator,
224  int *lenval, char **strval, int16 *weight, bool *prefix)
225 {
226  *weight = 0;
227  *prefix = false;
228 
229  while (1)
230  {
231  switch (state->state)
232  {
233  case WAITFIRSTOPERAND:
234  case WAITOPERAND:
235  if (t_iseq(state->buf, '!'))
236  {
237  (state->buf)++; /* can safely ++, t_iseq guarantee that
238  * pg_mblen()==1 */
239  *operator = OP_NOT;
240  state->state = WAITOPERAND;
241  return PT_OPR;
242  }
243  else if (t_iseq(state->buf, '('))
244  {
245  state->count++;
246  (state->buf)++;
247  state->state = WAITOPERAND;
248  return PT_OPEN;
249  }
250  else if (t_iseq(state->buf, ':'))
251  {
252  ereport(ERROR,
253  (errcode(ERRCODE_SYNTAX_ERROR),
254  errmsg("syntax error in tsquery: \"%s\"",
255  state->buffer)));
256  }
257  else if (!t_isspace(state->buf))
258  {
259  /*
260  * We rely on the tsvector parser to parse the value for
261  * us
262  */
263  reset_tsvector_parser(state->valstate, state->buf);
264  if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
265  {
266  state->buf = get_modifiers(state->buf, weight, prefix);
267  state->state = WAITOPERATOR;
268  return PT_VAL;
269  }
270  else if (state->state == WAITFIRSTOPERAND)
271  return PT_END;
272  else
273  ereport(ERROR,
274  (errcode(ERRCODE_SYNTAX_ERROR),
275  errmsg("no operand in tsquery: \"%s\"",
276  state->buffer)));
277  }
278  break;
279  case WAITOPERATOR:
280  if (t_iseq(state->buf, '&'))
281  {
282  state->state = WAITOPERAND;
283  *operator = OP_AND;
284  (state->buf)++;
285  return PT_OPR;
286  }
287  else if (t_iseq(state->buf, '|'))
288  {
289  state->state = WAITOPERAND;
290  *operator = OP_OR;
291  (state->buf)++;
292  return PT_OPR;
293  }
294  else if (t_iseq(state->buf, '<'))
295  {
296  state->state = WAITOPERAND;
297  *operator = OP_PHRASE;
298  /* weight var is used as storage for distance */
299  state->buf = parse_phrase_operator(state->buf, weight);
300  if (*weight < 0)
301  return PT_ERR;
302  return PT_OPR;
303  }
304  else if (t_iseq(state->buf, ')'))
305  {
306  (state->buf)++;
307  state->count--;
308  return (state->count < 0) ? PT_ERR : PT_CLOSE;
309  }
310  else if (*(state->buf) == '\0')
311  return (state->count) ? PT_ERR : PT_END;
312  else if (!t_isspace(state->buf))
313  return PT_ERR;
314  break;
315  case WAITSINGLEOPERAND:
316  if (*(state->buf) == '\0')
317  return PT_END;
318  *strval = state->buf;
319  *lenval = strlen(state->buf);
320  state->buf += strlen(state->buf);
321  state->count++;
322  return PT_VAL;
323  default:
324  return PT_ERR;
325  break;
326  }
327  state->buf += pg_mblen(state->buf);
328  }
329 }
330 
331 /*
332  * Push an operator to state->polstr
333  */
334 void
336 {
337  QueryOperator *tmp;
338 
339  Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
340 
341  tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
342  tmp->type = QI_OPR;
343  tmp->oper = oper;
344  tmp->distance = (oper == OP_PHRASE) ? distance : 0;
345  /* left is filled in later with findoprnd */
346 
347  state->polstr = lcons(tmp, state->polstr);
348 }
349 
350 static void
351 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
352 {
353  QueryOperand *tmp;
354 
355  if (distance >= MAXSTRPOS)
356  ereport(ERROR,
357  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
358  errmsg("value is too big in tsquery: \"%s\"",
359  state->buffer)));
360  if (lenval >= MAXSTRLEN)
361  ereport(ERROR,
362  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
363  errmsg("operand is too long in tsquery: \"%s\"",
364  state->buffer)));
365 
366  tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
367  tmp->type = QI_VAL;
368  tmp->weight = weight;
369  tmp->prefix = prefix;
370  tmp->valcrc = (int32) valcrc;
371  tmp->length = lenval;
372  tmp->distance = distance;
373 
374  state->polstr = lcons(tmp, state->polstr);
375 }
376 
377 /*
378  * Push an operand to state->polstr.
379  *
380  * strval must point to a string equal to state->curop. lenval is the length
381  * of the string.
382  */
383 void
384 pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
385 {
386  pg_crc32 valcrc;
387 
388  if (lenval >= MAXSTRLEN)
389  ereport(ERROR,
390  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
391  errmsg("word is too long in tsquery: \"%s\"",
392  state->buffer)));
393 
394  INIT_LEGACY_CRC32(valcrc);
395  COMP_LEGACY_CRC32(valcrc, strval, lenval);
396  FIN_LEGACY_CRC32(valcrc);
397  pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
398 
399  /* append the value string to state.op, enlarging buffer if needed first */
400  while (state->curop - state->op + lenval + 1 >= state->lenop)
401  {
402  int used = state->curop - state->op;
403 
404  state->lenop *= 2;
405  state->op = (char *) repalloc((void *) state->op, state->lenop);
406  state->curop = state->op + used;
407  }
408  memcpy((void *) state->curop, (void *) strval, lenval);
409  state->curop += lenval;
410  *(state->curop) = '\0';
411  state->curop++;
412  state->sumlen += lenval + 1 /* \0 */ ;
413 }
414 
415 
416 /*
417  * Push a stopword placeholder to state->polstr
418  */
419 void
421 {
422  QueryOperand *tmp;
423 
424  tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
425  tmp->type = QI_VALSTOP;
426 
427  state->polstr = lcons(tmp, state->polstr);
428 }
429 
430 
431 #define STACKDEPTH 32
432 
433 typedef struct OperatorElement
434 {
438 
439 static void
440 pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
441 {
442  if (*lenstack == STACKDEPTH) /* internal error */
443  elog(ERROR, "tsquery stack too small");
444 
445  stack[*lenstack].op = op;
446  stack[*lenstack].distance = distance;
447 
448  (*lenstack)++;
449 }
450 
451 static void
453  OperatorElement *stack, int *lenstack, int8 op)
454 {
455  int opPriority = OP_PRIORITY(op);
456 
457  while (*lenstack)
458  {
459  /* NOT is right associative unlike to others */
460  if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
461  (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
462  break;
463 
464  (*lenstack)--;
465  pushOperator(state, stack[*lenstack].op,
466  stack[*lenstack].distance);
467  }
468 }
469 
470 /*
471  * Make polish (prefix) notation of query.
472  *
473  * See parse_tsquery for explanation of pushval.
474  */
475 static void
477  PushFunction pushval,
478  Datum opaque)
479 {
480  int8 operator = 0;
481  ts_tokentype type;
482  int lenval = 0;
483  char *strval = NULL;
484  OperatorElement opstack[STACKDEPTH];
485  int lenstack = 0;
486  int16 weight = 0;
487  bool prefix;
488 
489  /* since this function recurses, it could be driven to stack overflow */
491 
492  while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
493  {
494  switch (type)
495  {
496  case PT_VAL:
497  pushval(opaque, state, strval, lenval, weight, prefix);
498  break;
499  case PT_OPR:
500  cleanOpStack(state, opstack, &lenstack, operator);
501  pushOpStack(opstack, &lenstack, operator, weight);
502  break;
503  case PT_OPEN:
504  makepol(state, pushval, opaque);
505  break;
506  case PT_CLOSE:
507  cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
508  return;
509  case PT_ERR:
510  default:
511  ereport(ERROR,
512  (errcode(ERRCODE_SYNTAX_ERROR),
513  errmsg("syntax error in tsquery: \"%s\"",
514  state->buffer)));
515  }
516  }
517 
518  cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
519 }
520 
521 static void
522 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
523 {
524  /* since this function recurses, it could be driven to stack overflow. */
526 
527  if (*pos >= nnodes)
528  elog(ERROR, "malformed tsquery: operand not found");
529 
530  if (ptr[*pos].type == QI_VAL)
531  {
532  (*pos)++;
533  }
534  else if (ptr[*pos].type == QI_VALSTOP)
535  {
536  *needcleanup = true; /* we'll have to remove stop words */
537  (*pos)++;
538  }
539  else
540  {
541  Assert(ptr[*pos].type == QI_OPR);
542 
543  if (ptr[*pos].qoperator.oper == OP_NOT)
544  {
545  ptr[*pos].qoperator.left = 1; /* fixed offset */
546  (*pos)++;
547 
548  /* process the only argument */
549  findoprnd_recurse(ptr, pos, nnodes, needcleanup);
550  }
551  else
552  {
553  QueryOperator *curitem = &ptr[*pos].qoperator;
554  int tmp = *pos; /* save current position */
555 
556  Assert(curitem->oper == OP_AND ||
557  curitem->oper == OP_OR ||
558  curitem->oper == OP_PHRASE);
559 
560  (*pos)++;
561 
562  /* process RIGHT argument */
563  findoprnd_recurse(ptr, pos, nnodes, needcleanup);
564 
565  curitem->left = *pos - tmp; /* set LEFT arg's offset */
566 
567  /* process LEFT argument */
568  findoprnd_recurse(ptr, pos, nnodes, needcleanup);
569  }
570  }
571 }
572 
573 
574 /*
575  * Fill in the left-fields previously left unfilled.
576  * The input QueryItems must be in polish (prefix) notation.
577  * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
578  */
579 static void
580 findoprnd(QueryItem *ptr, int size, bool *needcleanup)
581 {
582  uint32 pos;
583 
584  *needcleanup = false;
585  pos = 0;
586  findoprnd_recurse(ptr, &pos, size, needcleanup);
587 
588  if (pos != size)
589  elog(ERROR, "malformed tsquery: extra nodes");
590 }
591 
592 
593 /*
594  * Each value (operand) in the query is passed to pushval. pushval can
595  * transform the simple value to an arbitrarily complex expression using
596  * pushValue and pushOperator. It must push a single value with pushValue,
597  * a complete expression with all operands, or a stopword placeholder
598  * with pushStop, otherwise the prefix notation representation will be broken,
599  * having an operator with no operand.
600  *
601  * opaque is passed on to pushval as is, pushval can use it to store its
602  * private state.
603  */
604 TSQuery
606  PushFunction pushval,
607  Datum opaque,
608  bool isplain)
609 {
610  struct TSQueryParserStateData state;
611  int i;
612  TSQuery query;
613  int commonlen;
614  QueryItem *ptr;
615  ListCell *cell;
616  bool needcleanup;
617 
618  /* init state */
619  state.buffer = buf;
620  state.buf = buf;
621  state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
622  state.count = 0;
623  state.polstr = NIL;
624 
625  /* init value parser's state */
626  state.valstate = init_tsvector_parser(state.buffer, true, true);
627 
628  /* init list of operand */
629  state.sumlen = 0;
630  state.lenop = 64;
631  state.curop = state.op = (char *) palloc(state.lenop);
632  *(state.curop) = '\0';
633 
634  /* parse query & make polish notation (postfix, but in reverse order) */
635  makepol(&state, pushval, opaque);
636 
638 
639  if (list_length(state.polstr) == 0)
640  {
641  ereport(NOTICE,
642  (errmsg("text-search query doesn't contain lexemes: \"%s\"",
643  state.buffer)));
644  query = (TSQuery) palloc(HDRSIZETQ);
645  SET_VARSIZE(query, HDRSIZETQ);
646  query->size = 0;
647  return query;
648  }
649 
650  if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
651  ereport(ERROR,
652  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
653  errmsg("tsquery is too large")));
654  commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
655 
656  /* Pack the QueryItems in the final TSQuery struct to return to caller */
657  query = (TSQuery) palloc0(commonlen);
658  SET_VARSIZE(query, commonlen);
659  query->size = list_length(state.polstr);
660  ptr = GETQUERY(query);
661 
662  /* Copy QueryItems to TSQuery */
663  i = 0;
664  foreach(cell, state.polstr)
665  {
666  QueryItem *item = (QueryItem *) lfirst(cell);
667 
668  switch (item->type)
669  {
670  case QI_VAL:
671  memcpy(&ptr[i], item, sizeof(QueryOperand));
672  break;
673  case QI_VALSTOP:
674  ptr[i].type = QI_VALSTOP;
675  break;
676  case QI_OPR:
677  memcpy(&ptr[i], item, sizeof(QueryOperator));
678  break;
679  default:
680  elog(ERROR, "unrecognized QueryItem type: %d", item->type);
681  }
682  i++;
683  }
684 
685  /* Copy all the operand strings to TSQuery */
686  memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
687  pfree(state.op);
688 
689  /*
690  * Set left operand pointers for every operator. While we're at it,
691  * detect whether there are any QI_VALSTOP nodes.
692  */
693  findoprnd(ptr, query->size, &needcleanup);
694 
695  /*
696  * If there are QI_VALSTOP nodes, delete them and simplify the tree.
697  */
698  if (needcleanup)
699  query = cleanup_tsquery_stopwords(query);
700 
701  return query;
702 }
703 
704 static void
705 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
706  int16 weight, bool prefix)
707 {
708  pushValue(state, strval, lenval, weight, prefix);
709 }
710 
711 /*
712  * in without morphology
713  */
714 Datum
716 {
717  char *in = PG_GETARG_CSTRING(0);
718 
720 }
721 
722 /*
723  * out function
724  */
725 typedef struct
726 {
728  char *buf;
729  char *cur;
730  char *op;
731  int buflen;
732 } INFIX;
733 
734 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
735 #define RESIZEBUF(inf, addsize) \
736 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
737 { \
738  int len = (inf)->cur - (inf)->buf; \
739  (inf)->buflen *= 2; \
740  (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
741  (inf)->cur = (inf)->buf + len; \
742 }
743 
744 /*
745  * recursively traverse the tree and
746  * print it in infix (human-readable) form
747  */
748 static void
749 infix(INFIX *in, int parentPriority, bool rightPhraseOp)
750 {
751  /* since this function recurses, it could be driven to stack overflow. */
753 
754  if (in->curpol->type == QI_VAL)
755  {
756  QueryOperand *curpol = &in->curpol->qoperand;
757  char *op = in->op + curpol->distance;
758  int clen;
759 
760  RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
761  *(in->cur) = '\'';
762  in->cur++;
763  while (*op)
764  {
765  if (t_iseq(op, '\''))
766  {
767  *(in->cur) = '\'';
768  in->cur++;
769  }
770  else if (t_iseq(op, '\\'))
771  {
772  *(in->cur) = '\\';
773  in->cur++;
774  }
775  COPYCHAR(in->cur, op);
776 
777  clen = pg_mblen(op);
778  op += clen;
779  in->cur += clen;
780  }
781  *(in->cur) = '\'';
782  in->cur++;
783  if (curpol->weight || curpol->prefix)
784  {
785  *(in->cur) = ':';
786  in->cur++;
787  if (curpol->prefix)
788  {
789  *(in->cur) = '*';
790  in->cur++;
791  }
792  if (curpol->weight & (1 << 3))
793  {
794  *(in->cur) = 'A';
795  in->cur++;
796  }
797  if (curpol->weight & (1 << 2))
798  {
799  *(in->cur) = 'B';
800  in->cur++;
801  }
802  if (curpol->weight & (1 << 1))
803  {
804  *(in->cur) = 'C';
805  in->cur++;
806  }
807  if (curpol->weight & 1)
808  {
809  *(in->cur) = 'D';
810  in->cur++;
811  }
812  }
813  *(in->cur) = '\0';
814  in->curpol++;
815  }
816  else if (in->curpol->qoperator.oper == OP_NOT)
817  {
818  int priority = QO_PRIORITY(in->curpol);
819 
820  if (priority < parentPriority)
821  {
822  RESIZEBUF(in, 2);
823  sprintf(in->cur, "( ");
824  in->cur = strchr(in->cur, '\0');
825  }
826  RESIZEBUF(in, 1);
827  *(in->cur) = '!';
828  in->cur++;
829  *(in->cur) = '\0';
830  in->curpol++;
831 
832  infix(in, priority, false);
833  if (priority < parentPriority)
834  {
835  RESIZEBUF(in, 2);
836  sprintf(in->cur, " )");
837  in->cur = strchr(in->cur, '\0');
838  }
839  }
840  else
841  {
842  int8 op = in->curpol->qoperator.oper;
843  int priority = QO_PRIORITY(in->curpol);
844  int16 distance = in->curpol->qoperator.distance;
845  INFIX nrm;
846  bool needParenthesis = false;
847 
848  in->curpol++;
849  if (priority < parentPriority ||
850  /* phrase operator depends on order */
851  (op == OP_PHRASE && rightPhraseOp))
852  {
853  needParenthesis = true;
854  RESIZEBUF(in, 2);
855  sprintf(in->cur, "( ");
856  in->cur = strchr(in->cur, '\0');
857  }
858 
859  nrm.curpol = in->curpol;
860  nrm.op = in->op;
861  nrm.buflen = 16;
862  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
863 
864  /* get right operand */
865  infix(&nrm, priority, (op == OP_PHRASE));
866 
867  /* get & print left operand */
868  in->curpol = nrm.curpol;
869  infix(in, priority, false);
870 
871  /* print operator & right operand */
872  RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
873  switch (op)
874  {
875  case OP_OR:
876  sprintf(in->cur, " | %s", nrm.buf);
877  break;
878  case OP_AND:
879  sprintf(in->cur, " & %s", nrm.buf);
880  break;
881  case OP_PHRASE:
882  if (distance != 1)
883  sprintf(in->cur, " <%d> %s", distance, nrm.buf);
884  else
885  sprintf(in->cur, " <-> %s", nrm.buf);
886  break;
887  default:
888  /* OP_NOT is handled in above if-branch */
889  elog(ERROR, "unrecognized operator type: %d", op);
890  }
891  in->cur = strchr(in->cur, '\0');
892  pfree(nrm.buf);
893 
894  if (needParenthesis)
895  {
896  RESIZEBUF(in, 2);
897  sprintf(in->cur, " )");
898  in->cur = strchr(in->cur, '\0');
899  }
900  }
901 }
902 
903 Datum
905 {
906  TSQuery query = PG_GETARG_TSQUERY(0);
907  INFIX nrm;
908 
909  if (query->size == 0)
910  {
911  char *b = palloc(1);
912 
913  *b = '\0';
915  }
916  nrm.curpol = GETQUERY(query);
917  nrm.buflen = 32;
918  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
919  *(nrm.cur) = '\0';
920  nrm.op = GETOPERAND(query);
921  infix(&nrm, -1 /* lowest priority */ , false);
922 
923  PG_FREE_IF_COPY(query, 0);
924  PG_RETURN_CSTRING(nrm.buf);
925 }
926 
927 /*
928  * Binary Input / Output functions. The binary format is as follows:
929  *
930  * uint32 number of operators/operands in the query
931  *
932  * Followed by the operators and operands, in prefix notation. For each
933  * operand:
934  *
935  * uint8 type, QI_VAL
936  * uint8 weight
937  * operand text in client encoding, null-terminated
938  * uint8 prefix
939  *
940  * For each operator:
941  * uint8 type, QI_OPR
942  * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
943  * uint16 distance (only for OP_PHRASE)
944  */
945 Datum
947 {
948  TSQuery query = PG_GETARG_TSQUERY(0);
950  int i;
951  QueryItem *item = GETQUERY(query);
952 
953  pq_begintypsend(&buf);
954 
955  pq_sendint(&buf, query->size, sizeof(uint32));
956  for (i = 0; i < query->size; i++)
957  {
958  pq_sendint(&buf, item->type, sizeof(item->type));
959 
960  switch (item->type)
961  {
962  case QI_VAL:
963  pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
964  pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
965  pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
966  break;
967  case QI_OPR:
968  pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
969  if (item->qoperator.oper == OP_PHRASE)
970  pq_sendint(&buf, item->qoperator.distance,
971  sizeof(item->qoperator.distance));
972  break;
973  default:
974  elog(ERROR, "unrecognized tsquery node type: %d", item->type);
975  }
976  item++;
977  }
978 
979  PG_FREE_IF_COPY(query, 0);
980 
982 }
983 
984 Datum
986 {
988  TSQuery query;
989  int i,
990  len;
991  QueryItem *item;
992  int datalen;
993  char *ptr;
994  uint32 size;
995  const char **operands;
996  bool needcleanup;
997 
998  size = pq_getmsgint(buf, sizeof(uint32));
999  if (size > (MaxAllocSize / sizeof(QueryItem)))
1000  elog(ERROR, "invalid size of tsquery");
1001 
1002  /* Allocate space to temporarily hold operand strings */
1003  operands = palloc(size * sizeof(char *));
1004 
1005  /* Allocate space for all the QueryItems. */
1006  len = HDRSIZETQ + sizeof(QueryItem) * size;
1007  query = (TSQuery) palloc0(len);
1008  query->size = size;
1009  item = GETQUERY(query);
1010 
1011  datalen = 0;
1012  for (i = 0; i < size; i++)
1013  {
1014  item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1015 
1016  if (item->type == QI_VAL)
1017  {
1018  size_t val_len; /* length after recoding to server
1019  * encoding */
1020  uint8 weight;
1021  uint8 prefix;
1022  const char *val;
1023  pg_crc32 valcrc;
1024 
1025  weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1026  prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1027  val = pq_getmsgstring(buf);
1028  val_len = strlen(val);
1029 
1030  /* Sanity checks */
1031 
1032  if (weight > 0xF)
1033  elog(ERROR, "invalid tsquery: invalid weight bitmap");
1034 
1035  if (val_len > MAXSTRLEN)
1036  elog(ERROR, "invalid tsquery: operand too long");
1037 
1038  if (datalen > MAXSTRPOS)
1039  elog(ERROR, "invalid tsquery: total operand length exceeded");
1040 
1041  /* Looks valid. */
1042 
1043  INIT_LEGACY_CRC32(valcrc);
1044  COMP_LEGACY_CRC32(valcrc, val, val_len);
1045  FIN_LEGACY_CRC32(valcrc);
1046 
1047  item->qoperand.weight = weight;
1048  item->qoperand.prefix = (prefix) ? true : false;
1049  item->qoperand.valcrc = (int32) valcrc;
1050  item->qoperand.length = val_len;
1051  item->qoperand.distance = datalen;
1052 
1053  /*
1054  * Operand strings are copied to the final struct after this loop;
1055  * here we just collect them to an array
1056  */
1057  operands[i] = val;
1058 
1059  datalen += val_len + 1; /* + 1 for the '\0' terminator */
1060  }
1061  else if (item->type == QI_OPR)
1062  {
1063  int8 oper;
1064 
1065  oper = (int8) pq_getmsgint(buf, sizeof(int8));
1066  if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1067  elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1068  (int) oper);
1069  if (i == size - 1)
1070  elog(ERROR, "invalid pointer to right operand");
1071 
1072  item->qoperator.oper = oper;
1073  if (oper == OP_PHRASE)
1074  item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1075  }
1076  else
1077  elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1078 
1079  item++;
1080  }
1081 
1082  /* Enlarge buffer to make room for the operand values. */
1083  query = (TSQuery) repalloc(query, len + datalen);
1084  item = GETQUERY(query);
1085  ptr = GETOPERAND(query);
1086 
1087  /*
1088  * Fill in the left-pointers. Checks that the tree is well-formed as a
1089  * side-effect.
1090  */
1091  findoprnd(item, size, &needcleanup);
1092 
1093  /* Can't have found any QI_VALSTOP nodes */
1094  Assert(!needcleanup);
1095 
1096  /* Copy operands to output struct */
1097  for (i = 0; i < size; i++)
1098  {
1099  if (item->type == QI_VAL)
1100  {
1101  memcpy(ptr, operands[i], item->qoperand.length + 1);
1102  ptr += item->qoperand.length + 1;
1103  }
1104  item++;
1105  }
1106 
1107  pfree(operands);
1108 
1109  Assert(ptr - GETOPERAND(query) == datalen);
1110 
1111  SET_VARSIZE(query, len + datalen);
1112 
1113  PG_RETURN_TSQUERY(query);
1114 }
1115 
1116 /*
1117  * debug function, used only for view query
1118  * which will be executed in non-leaf pages in index
1119  */
1120 Datum
1122 {
1123  TSQuery query = PG_GETARG_TSQUERY(0);
1124  INFIX nrm;
1125  text *res;
1126  QueryItem *q;
1127  int len;
1128 
1129  if (query->size == 0)
1130  {
1131  res = (text *) palloc(VARHDRSZ);
1132  SET_VARSIZE(res, VARHDRSZ);
1133  PG_RETURN_POINTER(res);
1134  }
1135 
1136  q = clean_NOT(GETQUERY(query), &len);
1137 
1138  if (!q)
1139  {
1140  res = cstring_to_text("T");
1141  }
1142  else
1143  {
1144  nrm.curpol = q;
1145  nrm.buflen = 32;
1146  nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1147  *(nrm.cur) = '\0';
1148  nrm.op = GETOPERAND(query);
1149  infix(&nrm, -1, false);
1150  res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1151  pfree(q);
1152  }
1153 
1154  PG_FREE_IF_COPY(query, 0);
1155 
1156  PG_RETURN_TEXT_P(res);
1157 }
#define QI_VALSTOP
Definition: ts_type.h:136
#define COPYCHAR(d, s)
Definition: ts_locale.h:63
#define t_isspace(x)
Definition: ts_locale.h:58
signed short int16
Definition: c.h:255
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:321
QueryOperator qoperator
Definition: ts_type.h:196
#define NIL
Definition: pg_list.h:69
#define INIT_LEGACY_CRC32(crc)
Definition: pg_crc.h:79
#define TSQUERY_TOO_BIG(size, lenofoperand)
Definition: ts_type.h:220
#define WAITFIRSTOPERAND
Definition: tsquery.c:63
#define QO_PRIORITY(x)
Definition: ts_type.h:177
ITEM * curpol
Definition: _int_bool.c:551
Datum tsquerytree(PG_FUNCTION_ARGS)
Definition: tsquery.c:1121
static void makepol(TSQueryParserState state, PushFunction pushval, Datum opaque)
Definition: tsquery.c:476
const char * pq_getmsgstring(StringInfo msg)
Definition: pqformat.c:621
#define PointerGetDatum(X)
Definition: postgres.h:562
bool gettoken_tsvector(TSVectorParseState state, char **strval, int *lenval, WordEntryPos **pos_ptr, int *poslen, char **endptr)
const int tsearch_op_priority[OP_COUNT]
Definition: tsquery.c:27
void pq_begintypsend(StringInfo buf)
Definition: pqformat.c:359
#define VARHDRSZ
Definition: c.h:445
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:238
#define WAITOPERAND
Definition: tsquery.c:61
static void pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
Definition: tsquery.c:440
StringInfoData * StringInfo
Definition: stringinfo.h:43
void close_tsvector_parser(TSVectorParseState state)
#define WAITSINGLEOPERAND
Definition: tsquery.c:64
char * op
Definition: ltxtquery_io.c:399
#define OP_PRIORITY(x)
Definition: ts_type.h:175
unsigned char uint8
Definition: c.h:266
TSVectorParseState valstate
Definition: tsquery.c:57
TSQueryData * TSQuery
Definition: ts_type.h:212
static ts_tokentype gettoken_query(TSQueryParserState state, int8 *operator, int *lenval, char **strval, int16 *weight, bool *prefix)
Definition: tsquery.c:222
struct cursor * cur
Definition: ecpg.c:28
int errcode(int sqlerrcode)
Definition: elog.c:575
#define QI_VAL
Definition: ts_type.h:134
static void pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
Definition: tsquery.c:351
#define PG_GETARG_POINTER(n)
Definition: fmgr.h:241
TSQuery cleanup_tsquery_stopwords(TSQuery in)
void pq_sendstring(StringInfo buf, const char *str)
Definition: pqformat.c:186
#define t_isdigit(x)
Definition: ts_locale.h:57
#define PG_RETURN_BYTEA_P(x)
Definition: fmgr.h:329
uint32 distance
Definition: ts_type.h:158
int16 distance
Definition: ts_type.h:183
#define MAXSTRPOS
Definition: ts_type.h:50
#define OP_OR
Definition: ts_type.h:168
struct OperatorElement OperatorElement
bytea * pq_endtypsend(StringInfo buf)
Definition: pqformat.c:379
#define GETQUERY(x)
Definition: _int.h:142
#define FIN_LEGACY_CRC32(crc)
Definition: pg_crc.h:80
#define RESIZEBUF(inf, addsize)
Definition: tsquery.c:735
signed int int32
Definition: c.h:256
#define GETOPERAND(x)
Definition: ltree.h:118
int16 type
Definition: _int.h:127
#define PG_RETURN_TSQUERY(x)
Definition: ts_type.h:240
Datum tsquerysend(PG_FUNCTION_ARGS)
Definition: tsquery.c:946
#define OP_AND
Definition: ts_type.h:167
void pfree(void *pointer)
Definition: mcxt.c:950
#define ERROR
Definition: elog.h:43
int16 distance
Definition: tsquery.c:436
static void findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
Definition: tsquery.c:522
static void infix(INFIX *in, int parentPriority, bool rightPhraseOp)
Definition: tsquery.c:749
static void pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
Definition: tsquery.c:705
uint8 weight
Definition: ts_type.h:146
static char * parse_phrase_operator(char *buf, int16 *distance)
Definition: tsquery.c:122
void pushStop(TSQueryParserState state)
Definition: tsquery.c:420
TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, bool isplain)
Definition: tsquery.c:605
static char * buf
Definition: pg_test_fsync.c:66
text * cstring_to_text_with_len(const char *s, int len)
Definition: varlena.c:161
QueryItem * clean_NOT(QueryItem *ptr, int *len)
void check_stack_depth(void)
Definition: postgres.c:3117
int pg_database_encoding_max_length(void)
Definition: wchar.c:1833
#define t_iseq(x, c)
Definition: ts_locale.h:61
TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
int32 valcrc
Definition: ts_type.h:151
QueryItem * curpol
Definition: tsquery.c:727
void pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
Definition: tsquery.c:384
unsigned int uint32
Definition: c.h:268
#define ereport(elevel, rest)
Definition: elog.h:122
#define STACKDEPTH
Definition: tsquery.c:431
int buflen
Definition: tsquery.c:731
#define MaxAllocSize
Definition: memutils.h:40
static char * get_modifiers(char *buf, int16 *weight, bool *prefix)
Definition: tsquery.c:71
signed char int8
Definition: c.h:254
#define OP_COUNT
Definition: ts_type.h:170
#define QI_OPR
Definition: ts_type.h:135
QueryItemType type
Definition: ts_type.h:195
void * palloc0(Size size)
Definition: mcxt.c:878
uintptr_t Datum
Definition: postgres.h:372
#define COMPUTESIZE(size)
Definition: _int.h:140
void reset_tsvector_parser(TSVectorParseState state, char *input)
void(* PushFunction)(Datum opaque, TSQueryParserState state, char *token, int tokenlen, int16 tokenweights, bool prefix)
Definition: ts_utils.h:43
void pushOperator(TSQueryParserState state, int8 oper, int16 distance)
Definition: tsquery.c:335
#define NOTICE
Definition: elog.h:37
#define PG_RETURN_TEXT_P(x)
Definition: fmgr.h:330
List * lcons(void *datum, List *list)
Definition: list.c:259
text * cstring_to_text(const char *s)
Definition: varlena.c:149
#define NULL
Definition: c.h:229
QueryItemType type
Definition: ts_type.h:181
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
Definition: regguts.h:298
#define OP_PHRASE
Definition: ts_type.h:169
#define PG_RETURN_CSTRING(x)
Definition: fmgr.h:322
Datum tsqueryrecv(PG_FUNCTION_ARGS)
Definition: tsquery.c:985
static int list_length(const List *l)
Definition: pg_list.h:89
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:225
int pg_mblen(const char *mbstr)
Definition: mbutils.c:771
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:963
uint32 length
Definition: ts_type.h:158
Datum tsqueryout(PG_FUNCTION_ARGS)
Definition: tsquery.c:904
char * cur
Definition: _int_bool.c:553
uint32 left
Definition: ts_type.h:184
void * palloc(Size size)
Definition: mcxt.c:849
int32 buflen
Definition: _int_bool.c:554
int errmsg(const char *fmt,...)
Definition: elog.c:797
void pq_sendint(StringInfo buf, int i, int b)
Definition: pqformat.c:236
int32 size
Definition: ts_type.h:208
#define COMP_LEGACY_CRC32(crc, data, len)
Definition: pg_crc.h:81
int i
QueryItemType type
Definition: ts_type.h:145
#define PG_GETARG_CSTRING(n)
Definition: fmgr.h:242
Definition: c.h:439
#define PG_FUNCTION_ARGS
Definition: fmgr.h:158
bool prefix
Definition: ts_type.h:150
unsigned int pq_getmsgint(StringInfo msg, int b)
Definition: pqformat.c:448
#define WAITOPERATOR
Definition: tsquery.c:62
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:328
uint32 pg_crc32
Definition: pg_crc.h:37
#define elog
Definition: elog.h:219
QueryOperand qoperand
Definition: ts_type.h:197
Operator oper(ParseState *pstate, List *opname, Oid ltypeId, Oid rtypeId, bool noError, int location)
Definition: parse_oper.c:377
#define MAXENTRYPOS
Definition: ts_type.h:85
Definition: pg_list.h:45
long val
Definition: informix.c:689
#define HDRSIZETQ
Definition: ts_type.h:214
uint16 distance
Definition: ltree.h:99
ts_tokentype
Definition: tsquery.c:203
#define MAXSTRLEN
Definition: ts_type.h:49
static void cleanOpStack(TSQueryParserState state, OperatorElement *stack, int *lenstack, int8 op)
Definition: tsquery.c:452
char * buf
Definition: _int_bool.c:552
Datum tsqueryin(PG_FUNCTION_ARGS)
Definition: tsquery.c:715
static void findoprnd(QueryItem *ptr, int size, bool *needcleanup)
Definition: tsquery.c:580
#define OP_NOT
Definition: ts_type.h:166