PostgreSQL Source Code  git master
tsquery_util.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsquery_util.c
4  * Utilities for tsquery datatype
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsquery_util.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "miscadmin.h"
18 #include "tsearch/ts_utils.h"
19 #include "varatt.h"
20 
21 /*
22  * Build QTNode tree for a tsquery given in QueryItem array format.
23  */
24 QTNode *
25 QT2QTN(QueryItem *in, char *operand)
26 {
27  QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
28 
29  /* since this function recurses, it could be driven to stack overflow. */
31 
32  node->valnode = in;
33 
34  if (in->type == QI_OPR)
35  {
36  node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
37  node->child[0] = QT2QTN(in + 1, operand);
38  node->sign = node->child[0]->sign;
39  if (in->qoperator.oper == OP_NOT)
40  node->nchild = 1;
41  else
42  {
43  node->nchild = 2;
44  node->child[1] = QT2QTN(in + in->qoperator.left, operand);
45  node->sign |= node->child[1]->sign;
46  }
47  }
48  else if (operand)
49  {
50  node->word = operand + in->qoperand.distance;
51  node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32);
52  }
53 
54  return node;
55 }
56 
57 /*
58  * Free a QTNode tree.
59  *
60  * Referenced "word" and "valnode" items are freed if marked as transient
61  * by flags.
62  */
63 void
65 {
66  if (!in)
67  return;
68 
69  /* since this function recurses, it could be driven to stack overflow. */
71 
72  if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
73  pfree(in->word);
74 
75  if (in->valnode->type == QI_OPR)
76  {
77  int i;
78 
79  for (i = 0; i < in->nchild; i++)
80  QTNFree(in->child[i]);
81  }
82  if (in->child)
83  pfree(in->child);
84 
85  if (in->flags & QTN_NEEDFREE)
86  pfree(in->valnode);
87 
88  pfree(in);
89 }
90 
91 /*
92  * Sort comparator for QTNodes.
93  *
94  * The sort order is somewhat arbitrary.
95  */
96 int
98 {
99  /* since this function recurses, it could be driven to stack overflow. */
101 
102  if (an->valnode->type != bn->valnode->type)
103  return (an->valnode->type > bn->valnode->type) ? -1 : 1;
104 
105  if (an->valnode->type == QI_OPR)
106  {
107  QueryOperator *ao = &an->valnode->qoperator;
108  QueryOperator *bo = &bn->valnode->qoperator;
109 
110  if (ao->oper != bo->oper)
111  return (ao->oper > bo->oper) ? -1 : 1;
112 
113  if (an->nchild != bn->nchild)
114  return (an->nchild > bn->nchild) ? -1 : 1;
115 
116  {
117  int i,
118  res;
119 
120  for (i = 0; i < an->nchild; i++)
121  if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
122  return res;
123  }
124 
125  if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
126  return (ao->distance > bo->distance) ? -1 : 1;
127 
128  return 0;
129  }
130  else if (an->valnode->type == QI_VAL)
131  {
132  QueryOperand *ao = &an->valnode->qoperand;
133  QueryOperand *bo = &bn->valnode->qoperand;
134 
135  if (ao->valcrc != bo->valcrc)
136  {
137  return (ao->valcrc > bo->valcrc) ? -1 : 1;
138  }
139 
140  return tsCompareString(an->word, ao->length, bn->word, bo->length, false);
141  }
142  else
143  {
144  elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type);
145  return 0; /* keep compiler quiet */
146  }
147 }
148 
149 /*
150  * qsort comparator for QTNode pointers.
151  */
152 static int
153 cmpQTN(const void *a, const void *b)
154 {
155  return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b);
156 }
157 
158 /*
159  * Canonicalize a QTNode tree by sorting the children of AND/OR nodes
160  * into an arbitrary but well-defined order.
161  */
162 void
164 {
165  int i;
166 
167  /* since this function recurses, it could be driven to stack overflow. */
169 
170  if (in->valnode->type != QI_OPR)
171  return;
172 
173  for (i = 0; i < in->nchild; i++)
174  QTNSort(in->child[i]);
175  if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
176  qsort(in->child, in->nchild, sizeof(QTNode *), cmpQTN);
177 }
178 
179 /*
180  * Are two QTNode trees equal according to QTNodeCompare?
181  */
182 bool
184 {
185  uint32 sign = a->sign & b->sign;
186 
187  if (!(sign == a->sign && sign == b->sign))
188  return false;
189 
190  return (QTNodeCompare(a, b) == 0);
191 }
192 
193 /*
194  * Remove unnecessary intermediate nodes. For example:
195  *
196  * OR OR
197  * a OR -> a b c
198  * b c
199  */
200 void
202 {
203  int i;
204 
205  /* since this function recurses, it could be driven to stack overflow. */
207 
208  if (in->valnode->type != QI_OPR)
209  return;
210 
211  for (i = 0; i < in->nchild; i++)
212  QTNTernary(in->child[i]);
213 
214  /* Only AND and OR are associative, so don't flatten other node types */
215  if (in->valnode->qoperator.oper != OP_AND &&
216  in->valnode->qoperator.oper != OP_OR)
217  return;
218 
219  for (i = 0; i < in->nchild; i++)
220  {
221  QTNode *cc = in->child[i];
222 
223  if (cc->valnode->type == QI_OPR &&
225  {
226  int oldnchild = in->nchild;
227 
228  in->nchild += cc->nchild - 1;
229  in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
230 
231  if (i + 1 != oldnchild)
232  memmove(in->child + i + cc->nchild, in->child + i + 1,
233  (oldnchild - i - 1) * sizeof(QTNode *));
234 
235  memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
236  i += cc->nchild - 1;
237 
238  if (cc->flags & QTN_NEEDFREE)
239  pfree(cc->valnode);
240  pfree(cc);
241  }
242  }
243 }
244 
245 /*
246  * Convert a tree to binary tree by inserting intermediate nodes.
247  * (Opposite of QTNTernary)
248  */
249 void
251 {
252  int i;
253 
254  /* since this function recurses, it could be driven to stack overflow. */
256 
257  if (in->valnode->type != QI_OPR)
258  return;
259 
260  for (i = 0; i < in->nchild; i++)
261  QTNBinary(in->child[i]);
262 
263  while (in->nchild > 2)
264  {
265  QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
266 
267  nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
268  nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
269 
270  nn->nchild = 2;
271  nn->flags = QTN_NEEDFREE;
272 
273  nn->child[0] = in->child[0];
274  nn->child[1] = in->child[1];
275  nn->sign = nn->child[0]->sign | nn->child[1]->sign;
276 
277  nn->valnode->type = in->valnode->type;
279 
280  in->child[0] = nn;
281  in->child[1] = in->child[in->nchild - 1];
282  in->nchild--;
283  }
284 }
285 
286 /*
287  * Count the total length of operand strings in tree (including '\0'-
288  * terminators) and the total number of nodes.
289  * Caller must initialize *sumlen and *nnode to zeroes.
290  */
291 static void
292 cntsize(QTNode *in, int *sumlen, int *nnode)
293 {
294  /* since this function recurses, it could be driven to stack overflow. */
296 
297  *nnode += 1;
298  if (in->valnode->type == QI_OPR)
299  {
300  int i;
301 
302  for (i = 0; i < in->nchild; i++)
303  cntsize(in->child[i], sumlen, nnode);
304  }
305  else
306  {
307  *sumlen += in->valnode->qoperand.length + 1;
308  }
309 }
310 
311 typedef struct
312 {
314  char *operand;
315  char *curoperand;
316 } QTN2QTState;
317 
318 /*
319  * Recursively convert a QTNode tree into flat tsquery format.
320  * Caller must have allocated arrays of the correct size.
321  */
322 static void
324 {
325  /* since this function recurses, it could be driven to stack overflow. */
327 
328  if (in->valnode->type == QI_VAL)
329  {
330  memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
331 
332  memcpy(state->curoperand, in->word, in->valnode->qoperand.length);
333  state->curitem->qoperand.distance = state->curoperand - state->operand;
334  state->curoperand[in->valnode->qoperand.length] = '\0';
335  state->curoperand += in->valnode->qoperand.length + 1;
336  state->curitem++;
337  }
338  else
339  {
340  QueryItem *curitem = state->curitem;
341 
342  Assert(in->valnode->type == QI_OPR);
343 
344  memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
345 
346  Assert(in->nchild <= 2);
347  state->curitem++;
348 
349  fillQT(state, in->child[0]);
350 
351  if (in->nchild == 2)
352  {
353  curitem->qoperator.left = state->curitem - curitem;
354  fillQT(state, in->child[1]);
355  }
356  }
357 }
358 
359 /*
360  * Build flat tsquery from a QTNode tree.
361  */
362 TSQuery
364 {
365  TSQuery out;
366  int len;
367  int sumlen = 0,
368  nnode = 0;
370 
371  cntsize(in, &sumlen, &nnode);
372 
373  if (TSQUERY_TOO_BIG(nnode, sumlen))
374  ereport(ERROR,
375  (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
376  errmsg("tsquery is too large")));
377  len = COMPUTESIZE(nnode, sumlen);
378 
379  out = (TSQuery) palloc0(len);
380  SET_VARSIZE(out, len);
381  out->size = nnode;
382 
383  state.curitem = GETQUERY(out);
384  state.operand = state.curoperand = GETOPERAND(out);
385 
386  fillQT(&state, in);
387  return out;
388 }
389 
390 /*
391  * Copy a QTNode tree.
392  *
393  * Modifiable copies of the words and valnodes are made, too.
394  */
395 QTNode *
397 {
398  QTNode *out;
399 
400  /* since this function recurses, it could be driven to stack overflow. */
402 
403  out = (QTNode *) palloc(sizeof(QTNode));
404 
405  *out = *in;
406  out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
407  *(out->valnode) = *(in->valnode);
408  out->flags |= QTN_NEEDFREE;
409 
410  if (in->valnode->type == QI_VAL)
411  {
412  out->word = palloc(in->valnode->qoperand.length + 1);
413  memcpy(out->word, in->word, in->valnode->qoperand.length);
414  out->word[in->valnode->qoperand.length] = '\0';
415  out->flags |= QTN_WORDFREE;
416  }
417  else
418  {
419  int i;
420 
421  out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
422 
423  for (i = 0; i < in->nchild; i++)
424  out->child[i] = QTNCopy(in->child[i]);
425  }
426 
427  return out;
428 }
429 
430 /*
431  * Clear the specified flag bit(s) in all nodes of a QTNode tree.
432  */
433 void
435 {
436  /* since this function recurses, it could be driven to stack overflow. */
438 
439  in->flags &= ~flags;
440 
441  if (in->valnode->type != QI_VAL)
442  {
443  int i;
444 
445  for (i = 0; i < in->nchild; i++)
446  QTNClearFlags(in->child[i], flags);
447  }
448 }
#define COMPUTESIZE(size)
Definition: _int.h:155
#define GETQUERY(x)
Definition: _int.h:157
unsigned int uint32
Definition: c.h:506
#define Assert(condition)
Definition: c.h:858
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
char sign
Definition: informix.c:693
int b
Definition: isn.c:70
int a
Definition: isn.c:69
int i
Definition: isn.c:73
#define GETOPERAND(x)
Definition: ltree.h:165
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1541
void * palloc(Size size)
Definition: mcxt.c:1317
const void size_t len
#define qsort(a, b, c, d)
Definition: port.h:447
void check_stack_depth(void)
Definition: postgres.c:3540
char * curoperand
Definition: tsquery_util.c:315
char * operand
Definition: tsquery_util.c:314
QueryItem * curitem
Definition: tsquery_util.c:313
int32 nchild
Definition: ts_utils.h:238
QueryItem * valnode
Definition: ts_utils.h:236
uint32 sign
Definition: ts_utils.h:240
uint32 flags
Definition: ts_utils.h:237
char * word
Definition: ts_utils.h:239
struct QTNode ** child
Definition: ts_utils.h:241
uint32 distance
Definition: ts_type.h:172
uint32 length
Definition: ts_type.h:171
int32 valcrc
Definition: ts_type.h:164
int16 distance
Definition: ts_type.h:196
uint32 left
Definition: ts_type.h:197
int32 size
Definition: ts_type.h:221
Definition: regguts.h:323
TSQueryData * TSQuery
Definition: ts_type.h:225
#define QI_OPR
Definition: ts_type.h:150
#define TSQUERY_TOO_BIG(size, lenofoperand)
Definition: ts_type.h:233
#define QI_VAL
Definition: ts_type.h:149
#define OP_AND
Definition: ts_type.h:180
#define OP_PHRASE
Definition: ts_type.h:182
#define OP_OR
Definition: ts_type.h:181
#define OP_NOT
Definition: ts_type.h:179
#define QTN_NEEDFREE
Definition: ts_utils.h:245
#define QTN_WORDFREE
Definition: ts_utils.h:247
void QTNClearFlags(QTNode *in, uint32 flags)
Definition: tsquery_util.c:434
int QTNodeCompare(QTNode *an, QTNode *bn)
Definition: tsquery_util.c:97
QTNode * QTNCopy(QTNode *in)
Definition: tsquery_util.c:396
QTNode * QT2QTN(QueryItem *in, char *operand)
Definition: tsquery_util.c:25
static void cntsize(QTNode *in, int *sumlen, int *nnode)
Definition: tsquery_util.c:292
void QTNSort(QTNode *in)
Definition: tsquery_util.c:163
static int cmpQTN(const void *a, const void *b)
Definition: tsquery_util.c:153
void QTNTernary(QTNode *in)
Definition: tsquery_util.c:201
bool QTNEq(QTNode *a, QTNode *b)
Definition: tsquery_util.c:183
void QTNFree(QTNode *in)
Definition: tsquery_util.c:64
static void fillQT(QTN2QTState *state, QTNode *in)
Definition: tsquery_util.c:323
TSQuery QTN2QT(QTNode *in)
Definition: tsquery_util.c:363
void QTNBinary(QTNode *in)
Definition: tsquery_util.c:250
int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
Definition: tsvector_op.c:1152
QueryOperator qoperator
Definition: ts_type.h:209
QueryOperand qoperand
Definition: ts_type.h:210
QueryItemType type
Definition: ts_type.h:208
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305