PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
tsquery_util.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsquery_util.c
4  * Utilities for tsquery datatype
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsquery_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "tsearch/ts_utils.h"
18 #include "miscadmin.h"
19 
20 /*
21  * Build QTNode tree for a tsquery given in QueryItem array format.
22  */
23 QTNode *
24 QT2QTN(QueryItem *in, char *operand)
25 {
26  QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
27 
28  /* since this function recurses, it could be driven to stack overflow. */
30 
31  node->valnode = in;
32 
33  if (in->type == QI_OPR)
34  {
35  node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
36  node->child[0] = QT2QTN(in + 1, operand);
37  node->sign = node->child[0]->sign;
38  if (in->qoperator.oper == OP_NOT)
39  node->nchild = 1;
40  else
41  {
42  node->nchild = 2;
43  node->child[1] = QT2QTN(in + in->qoperator.left, operand);
44  node->sign |= node->child[1]->sign;
45  }
46  }
47  else if (operand)
48  {
49  node->word = operand + in->qoperand.distance;
50  node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32);
51  }
52 
53  return node;
54 }
55 
56 /*
57  * Free a QTNode tree.
58  *
59  * Referenced "word" and "valnode" items are freed if marked as transient
60  * by flags.
61  */
62 void
64 {
65  if (!in)
66  return;
67 
68  /* since this function recurses, it could be driven to stack overflow. */
70 
71  if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
72  pfree(in->word);
73 
74  if (in->valnode->type == QI_OPR)
75  {
76  int i;
77 
78  for (i = 0; i < in->nchild; i++)
79  QTNFree(in->child[i]);
80  }
81  if (in->child)
82  pfree(in->child);
83 
84  if (in->flags & QTN_NEEDFREE)
85  pfree(in->valnode);
86 
87  pfree(in);
88 }
89 
90 /*
91  * Sort comparator for QTNodes.
92  *
93  * The sort order is somewhat arbitrary.
94  */
95 int
97 {
98  /* since this function recurses, it could be driven to stack overflow. */
100 
101  if (an->valnode->type != bn->valnode->type)
102  return (an->valnode->type > bn->valnode->type) ? -1 : 1;
103 
104  if (an->valnode->type == QI_OPR)
105  {
106  QueryOperator *ao = &an->valnode->qoperator;
107  QueryOperator *bo = &bn->valnode->qoperator;
108 
109  if (ao->oper != bo->oper)
110  return (ao->oper > bo->oper) ? -1 : 1;
111 
112  if (an->nchild != bn->nchild)
113  return (an->nchild > bn->nchild) ? -1 : 1;
114 
115  {
116  int i,
117  res;
118 
119  for (i = 0; i < an->nchild; i++)
120  if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
121  return res;
122  }
123 
124  if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
125  return (ao->distance > bo->distance) ? -1 : 1;
126 
127  return 0;
128  }
129  else if (an->valnode->type == QI_VAL)
130  {
131  QueryOperand *ao = &an->valnode->qoperand;
132  QueryOperand *bo = &bn->valnode->qoperand;
133 
134  if (ao->valcrc != bo->valcrc)
135  {
136  return (ao->valcrc > bo->valcrc) ? -1 : 1;
137  }
138 
139  return tsCompareString(an->word, ao->length, bn->word, bo->length, false);
140  }
141  else
142  {
143  elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type);
144  return 0; /* keep compiler quiet */
145  }
146 }
147 
148 /*
149  * qsort comparator for QTNode pointers.
150  */
151 static int
152 cmpQTN(const void *a, const void *b)
153 {
154  return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b);
155 }
156 
157 /*
158  * Canonicalize a QTNode tree by sorting the children of AND/OR nodes
159  * into an arbitrary but well-defined order.
160  */
161 void
163 {
164  int i;
165 
166  /* since this function recurses, it could be driven to stack overflow. */
168 
169  if (in->valnode->type != QI_OPR)
170  return;
171 
172  for (i = 0; i < in->nchild; i++)
173  QTNSort(in->child[i]);
174  if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
175  qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
176 }
177 
178 /*
179  * Are two QTNode trees equal according to QTNodeCompare?
180  */
181 bool
183 {
184  uint32 sign = a->sign & b->sign;
185 
186  if (!(sign == a->sign && sign == b->sign))
187  return false;
188 
189  return (QTNodeCompare(a, b) == 0) ? true : false;
190 }
191 
192 /*
193  * Remove unnecessary intermediate nodes. For example:
194  *
195  * OR OR
196  * a OR -> a b c
197  * b c
198  */
199 void
201 {
202  int i;
203 
204  /* since this function recurses, it could be driven to stack overflow. */
206 
207  if (in->valnode->type != QI_OPR)
208  return;
209 
210  for (i = 0; i < in->nchild; i++)
211  QTNTernary(in->child[i]);
212 
213  /* Only AND and OR are associative, so don't flatten other node types */
214  if (in->valnode->qoperator.oper != OP_AND &&
215  in->valnode->qoperator.oper != OP_OR)
216  return;
217 
218  for (i = 0; i < in->nchild; i++)
219  {
220  QTNode *cc = in->child[i];
221 
222  if (cc->valnode->type == QI_OPR &&
224  {
225  int oldnchild = in->nchild;
226 
227  in->nchild += cc->nchild - 1;
228  in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
229 
230  if (i + 1 != oldnchild)
231  memmove(in->child + i + cc->nchild, in->child + i + 1,
232  (oldnchild - i - 1) * sizeof(QTNode *));
233 
234  memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
235  i += cc->nchild - 1;
236 
237  if (cc->flags & QTN_NEEDFREE)
238  pfree(cc->valnode);
239  pfree(cc);
240  }
241  }
242 }
243 
244 /*
245  * Convert a tree to binary tree by inserting intermediate nodes.
246  * (Opposite of QTNTernary)
247  */
248 void
250 {
251  int i;
252 
253  /* since this function recurses, it could be driven to stack overflow. */
255 
256  if (in->valnode->type != QI_OPR)
257  return;
258 
259  for (i = 0; i < in->nchild; i++)
260  QTNBinary(in->child[i]);
261 
262  while (in->nchild > 2)
263  {
264  QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
265 
266  nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
267  nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
268 
269  nn->nchild = 2;
270  nn->flags = QTN_NEEDFREE;
271 
272  nn->child[0] = in->child[0];
273  nn->child[1] = in->child[1];
274  nn->sign = nn->child[0]->sign | nn->child[1]->sign;
275 
276  nn->valnode->type = in->valnode->type;
278 
279  in->child[0] = nn;
280  in->child[1] = in->child[in->nchild - 1];
281  in->nchild--;
282  }
283 }
284 
285 /*
286  * Count the total length of operand strings in tree (including '\0'-
287  * terminators) and the total number of nodes.
288  * Caller must initialize *sumlen and *nnode to zeroes.
289  */
290 static void
291 cntsize(QTNode *in, int *sumlen, int *nnode)
292 {
293  /* since this function recurses, it could be driven to stack overflow. */
295 
296  *nnode += 1;
297  if (in->valnode->type == QI_OPR)
298  {
299  int i;
300 
301  for (i = 0; i < in->nchild; i++)
302  cntsize(in->child[i], sumlen, nnode);
303  }
304  else
305  {
306  *sumlen += in->valnode->qoperand.length + 1;
307  }
308 }
309 
310 typedef struct
311 {
313  char *operand;
314  char *curoperand;
315 } QTN2QTState;
316 
317 /*
318  * Recursively convert a QTNode tree into flat tsquery format.
319  * Caller must have allocated arrays of the correct size.
320  */
321 static void
323 {
324  /* since this function recurses, it could be driven to stack overflow. */
326 
327  if (in->valnode->type == QI_VAL)
328  {
329  memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
330 
331  memcpy(state->curoperand, in->word, in->valnode->qoperand.length);
332  state->curitem->qoperand.distance = state->curoperand - state->operand;
333  state->curoperand[in->valnode->qoperand.length] = '\0';
334  state->curoperand += in->valnode->qoperand.length + 1;
335  state->curitem++;
336  }
337  else
338  {
339  QueryItem *curitem = state->curitem;
340 
341  Assert(in->valnode->type == QI_OPR);
342 
343  memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
344 
345  Assert(in->nchild <= 2);
346  state->curitem++;
347 
348  fillQT(state, in->child[0]);
349 
350  if (in->nchild == 2)
351  {
352  curitem->qoperator.left = state->curitem - curitem;
353  fillQT(state, in->child[1]);
354  }
355  }
356 }
357 
358 /*
359  * Build flat tsquery from a QTNode tree.
360  */
361 TSQuery
363 {
364  TSQuery out;
365  int len;
366  int sumlen = 0,
367  nnode = 0;
369 
370  cntsize(in, &sumlen, &nnode);
371 
372  if (TSQUERY_TOO_BIG(nnode, sumlen))
373  ereport(ERROR,
374  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
375  errmsg("tsquery is too large")));
376  len = COMPUTESIZE(nnode, sumlen);
377 
378  out = (TSQuery) palloc0(len);
379  SET_VARSIZE(out, len);
380  out->size = nnode;
381 
382  state.curitem = GETQUERY(out);
383  state.operand = state.curoperand = GETOPERAND(out);
384 
385  fillQT(&state, in);
386  return out;
387 }
388 
389 /*
390  * Copy a QTNode tree.
391  *
392  * Modifiable copies of the words and valnodes are made, too.
393  */
394 QTNode *
396 {
397  QTNode *out;
398 
399  /* since this function recurses, it could be driven to stack overflow. */
401 
402  out = (QTNode *) palloc(sizeof(QTNode));
403 
404  *out = *in;
405  out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
406  *(out->valnode) = *(in->valnode);
407  out->flags |= QTN_NEEDFREE;
408 
409  if (in->valnode->type == QI_VAL)
410  {
411  out->word = palloc(in->valnode->qoperand.length + 1);
412  memcpy(out->word, in->word, in->valnode->qoperand.length);
413  out->word[in->valnode->qoperand.length] = '\0';
414  out->flags |= QTN_WORDFREE;
415  }
416  else
417  {
418  int i;
419 
420  out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
421 
422  for (i = 0; i < in->nchild; i++)
423  out->child[i] = QTNCopy(in->child[i]);
424  }
425 
426  return out;
427 }
428 
429 /*
430  * Clear the specified flag bit(s) in all nodes of a QTNode tree.
431  */
432 void
434 {
435  /* since this function recurses, it could be driven to stack overflow. */
437 
438  in->flags &= ~flags;
439 
440  if (in->valnode->type != QI_VAL)
441  {
442  int i;
443 
444  for (i = 0; i < in->nchild; i++)
445  QTNClearFlags(in->child[i], flags);
446  }
447 }
void QTNTernary(QTNode *in)
Definition: tsquery_util.c:200
QueryOperator qoperator
Definition: ts_type.h:196
#define TSQUERY_TOO_BIG(size, lenofoperand)
Definition: ts_type.h:220
char * word
Definition: ts_utils.h:206
void QTNBinary(QTNode *in)
Definition: tsquery_util.c:249
void QTNFree(QTNode *in)
Definition: tsquery_util.c:63
uint32 sign
Definition: ts_utils.h:207
QTNode * QT2QTN(QueryItem *in, char *operand)
Definition: tsquery_util.c:24
QTNode * QTNCopy(QTNode *in)
Definition: tsquery_util.c:395
TSQueryData * TSQuery
Definition: ts_type.h:212
int errcode(int sqlerrcode)
Definition: elog.c:575
#define QI_VAL
Definition: ts_type.h:134
uint32 distance
Definition: ts_type.h:158
int16 distance
Definition: ts_type.h:183
#define OP_OR
Definition: ts_type.h:168
static void fillQT(QTN2QTState *state, QTNode *in)
Definition: tsquery_util.c:322
#define GETQUERY(x)
Definition: _int.h:142
QueryItem * curitem
Definition: tsquery_util.c:312
#define GETOPERAND(x)
Definition: ltree.h:118
#define OP_AND
Definition: ts_type.h:167
void pfree(void *pointer)
Definition: mcxt.c:992
#define ERROR
Definition: elog.h:43
char sign
Definition: informix.c:693
#define memmove(d, s, c)
Definition: c.h:1057
void check_stack_depth(void)
Definition: postgres.c:3096
int32 valcrc
Definition: ts_type.h:151
TSQuery QTN2QT(QTNode *in)
Definition: tsquery_util.c:362
unsigned int uint32
Definition: c.h:265
char * curoperand
Definition: tsquery_util.c:314
#define ereport(elevel, rest)
Definition: elog.h:122
void QTNSort(QTNode *in)
Definition: tsquery_util.c:162
void QTNClearFlags(QTNode *in, uint32 flags)
Definition: tsquery_util.c:433
#define QI_OPR
Definition: ts_type.h:135
QueryItemType type
Definition: ts_type.h:195
void * palloc0(Size size)
Definition: mcxt.c:920
#define COMPUTESIZE(size)
Definition: _int.h:140
#define Assert(condition)
Definition: c.h:670
Definition: regguts.h:298
#define OP_PHRASE
Definition: ts_type.h:169
bool QTNEq(QTNode *a, QTNode *b)
Definition: tsquery_util.c:182
#define QTN_WORDFREE
Definition: ts_utils.h:214
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1160
struct QTNode ** child
Definition: ts_utils.h:208
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1021
#define QTN_NEEDFREE
Definition: ts_utils.h:212
uint32 length
Definition: ts_type.h:158
char * operand
Definition: tsquery_util.c:313
uint32 left
Definition: ts_type.h:184
QueryItem * valnode
Definition: ts_utils.h:203
int32 nchild
Definition: ts_utils.h:205
void * palloc(Size size)
Definition: mcxt.c:891
int errmsg(const char *fmt,...)
Definition: elog.c:797
int32 size
Definition: ts_type.h:208
int i
int QTNodeCompare(QTNode *an, QTNode *bn)
Definition: tsquery_util.c:96
#define SET_VARSIZE(PTR, len)
Definition: postgres.h:330
#define elog
Definition: elog.h:219
QueryOperand qoperand
Definition: ts_type.h:197
#define qsort(a, b, c, d)
Definition: port.h:440
static void cntsize(QTNode *in, int *sumlen, int *nnode)
Definition: tsquery_util.c:291
static int cmpQTN(const void *a, const void *b)
Definition: tsquery_util.c:152
uint32 flags
Definition: ts_utils.h:204
#define OP_NOT
Definition: ts_type.h:166