PostgreSQL Source Code  git master
tsquery_rewrite.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * tsquery_rewrite.c
4  * Utilities for reconstructing tsquery
5  *
6  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  * src/backend/utils/adt/tsquery_rewrite.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 #include "postgres.h"
16 
17 #include "catalog/pg_type.h"
18 #include "executor/spi.h"
19 #include "miscadmin.h"
20 #include "tsearch/ts_utils.h"
21 #include "utils/builtins.h"
22 
23 
24 /*
25  * If "node" is equal to "ex", return a copy of "subs" instead.
26  * If "ex" matches a subset of node's children, return a modified version
27  * of "node" in which those children are replaced with a copy of "subs".
28  * Otherwise return "node" unmodified.
29  *
30  * The QTN_NOCHANGE bit is set in successfully modified nodes, so that
31  * we won't uselessly recurse into them.
32  * Also, set *isfind true if we make a replacement.
33  */
34 static QTNode *
35 findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
36 {
37  /* Can't match unless signature matches and node type matches. */
38  if ((node->sign & ex->sign) != ex->sign ||
39  node->valnode->type != ex->valnode->type)
40  return node;
41 
42  /* Ignore nodes marked NOCHANGE, too. */
43  if (node->flags & QTN_NOCHANGE)
44  return node;
45 
46  if (node->valnode->type == QI_OPR)
47  {
48  /* Must be same operator. */
49  if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
50  return node;
51 
52  if (node->nchild == ex->nchild)
53  {
54  /*
55  * Simple case: when same number of children, match if equal.
56  * (This is reliable when the children were sorted earlier.)
57  */
58  if (QTNEq(node, ex))
59  {
60  /* Match; delete node and return a copy of subs instead. */
61  QTNFree(node);
62  if (subs)
63  {
64  node = QTNCopy(subs);
65  node->flags |= QTN_NOCHANGE;
66  }
67  else
68  node = NULL;
69  *isfind = true;
70  }
71  }
72  else if (node->nchild > ex->nchild && ex->nchild > 0)
73  {
74  /*
75  * AND and OR are commutative/associative, so we should check if a
76  * subset of the children match. For example, if node is A|B|C,
77  * and ex is B|C, we have a match after we notionally convert node
78  * to A|(B|C). This does not work for NOT or PHRASE nodes, but we
79  * can't get here for those node types because they have a fixed
80  * number of children.
81  *
82  * Because we expect that the children are sorted, it suffices to
83  * make one pass through the two lists to find the matches.
84  */
85  bool *matched;
86  int nmatched;
87  int i,
88  j;
89 
90  /* Assert that the subset rule is OK */
91  Assert(node->valnode->qoperator.oper == OP_AND ||
92  node->valnode->qoperator.oper == OP_OR);
93 
94  /* matched[] will record which children of node matched */
95  matched = (bool *) palloc0(node->nchild * sizeof(bool));
96  nmatched = 0;
97  i = j = 0;
98  while (i < node->nchild && j < ex->nchild)
99  {
100  int cmp = QTNodeCompare(node->child[i], ex->child[j]);
101 
102  if (cmp == 0)
103  {
104  /* match! */
105  matched[i] = true;
106  nmatched++;
107  i++, j++;
108  }
109  else if (cmp < 0)
110  {
111  /* node->child[i] has no match, ignore it */
112  i++;
113  }
114  else
115  {
116  /* ex->child[j] has no match; we can give up immediately */
117  break;
118  }
119  }
120 
121  if (nmatched == ex->nchild)
122  {
123  /* collapse out the matched children of node */
124  j = 0;
125  for (i = 0; i < node->nchild; i++)
126  {
127  if (matched[i])
128  QTNFree(node->child[i]);
129  else
130  node->child[j++] = node->child[i];
131  }
132 
133  /* and instead insert a copy of subs */
134  if (subs)
135  {
136  subs = QTNCopy(subs);
137  subs->flags |= QTN_NOCHANGE;
138  node->child[j++] = subs;
139  }
140 
141  node->nchild = j;
142 
143  /*
144  * At this point we might have a node with zero or one child,
145  * which should be simplified. But we leave it to our caller
146  * (dofindsubquery) to take care of that.
147  */
148 
149  /*
150  * Re-sort the node to put new child in the right place. This
151  * is a bit bogus, because it won't matter for findsubquery's
152  * remaining processing, and it's insufficient to prepare the
153  * tree for another search (we would need to re-flatten as
154  * well, and we don't want to do that because we'd lose the
155  * QTN_NOCHANGE marking on the new child). But it's needed to
156  * keep the results the same as the regression tests expect.
157  */
158  QTNSort(node);
159 
160  *isfind = true;
161  }
162 
163  pfree(matched);
164  }
165  }
166  else
167  {
168  Assert(node->valnode->type == QI_VAL);
169 
170  if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc)
171  return node;
172  else if (QTNEq(node, ex))
173  {
174  QTNFree(node);
175  if (subs)
176  {
177  node = QTNCopy(subs);
178  node->flags |= QTN_NOCHANGE;
179  }
180  else
181  {
182  node = NULL;
183  }
184  *isfind = true;
185  }
186  }
187 
188  return node;
189 }
190 
191 /*
192  * Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
193  * at the root node, and if we failed to do so, recursively match against
194  * child nodes.
195  *
196  * Delete any void subtrees resulting from the replacement.
197  * In the following example '5' is replaced by empty operand:
198  *
199  * AND -> 6
200  * / \
201  * 5 OR
202  * / \
203  * 6 5
204  */
205 static QTNode *
206 dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
207 {
208  /* since this function recurses, it could be driven to stack overflow. */
210 
211  /* also, since it's a bit expensive, let's check for query cancel. */
213 
214  /* match at the node itself */
215  root = findeq(root, ex, subs, isfind);
216 
217  /* unless we matched here, consider matches at child nodes */
218  if (root && (root->flags & QTN_NOCHANGE) == 0 &&
219  root->valnode->type == QI_OPR)
220  {
221  int i,
222  j = 0;
223 
224  /*
225  * Any subtrees that are replaced by NULL must be dropped from the
226  * tree.
227  */
228  for (i = 0; i < root->nchild; i++)
229  {
230  root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind);
231  if (root->child[j])
232  j++;
233  }
234 
235  root->nchild = j;
236 
237  /*
238  * If we have just zero or one remaining child node, simplify out this
239  * operator node.
240  */
241  if (root->nchild == 0)
242  {
243  QTNFree(root);
244  root = NULL;
245  }
246  else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT)
247  {
248  QTNode *nroot = root->child[0];
249 
250  pfree(root);
251  root = nroot;
252  }
253  }
254 
255  return root;
256 }
257 
258 /*
259  * Substitute "subs" for "ex" throughout the QTNode tree at root.
260  *
261  * If isfind isn't NULL, set *isfind to show whether we made any substitution.
262  *
263  * Both "root" and "ex" must have been through QTNTernary and QTNSort
264  * to ensure reliable matching.
265  */
266 QTNode *
267 findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
268 {
269  bool DidFind = false;
270 
271  root = dofindsubquery(root, ex, subs, &DidFind);
272 
273  if (isfind)
274  *isfind = DidFind;
275 
276  return root;
277 }
278 
279 Datum
281 {
282  TSQuery query = PG_GETARG_TSQUERY_COPY(0);
283  text *in = PG_GETARG_TEXT_PP(1);
284  TSQuery rewritten = query;
285  MemoryContext outercontext = CurrentMemoryContext;
286  MemoryContext oldcontext;
287  QTNode *tree;
288  char *buf;
290  Portal portal;
291  bool isnull;
292 
293  if (query->size == 0)
294  {
295  PG_FREE_IF_COPY(in, 1);
296  PG_RETURN_POINTER(rewritten);
297  }
298 
299  tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
300  QTNTernary(tree);
301  QTNSort(tree);
302 
303  buf = text_to_cstring(in);
304 
305  SPI_connect();
306 
307  if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
308  elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
309 
310  if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
311  elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
312 
313  SPI_cursor_fetch(portal, true, 100);
314 
315  if (SPI_tuptable == NULL ||
316  SPI_tuptable->tupdesc->natts != 2 ||
317  SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
318  SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
319  ereport(ERROR,
320  (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
321  errmsg("ts_rewrite query must return two tsquery columns")));
322 
323  while (SPI_processed > 0 && tree)
324  {
325  uint64 i;
326 
327  for (i = 0; i < SPI_processed && tree; i++)
328  {
329  Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
330  Datum sdata;
331 
332  if (isnull)
333  continue;
334 
335  sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
336 
337  if (!isnull)
338  {
339  TSQuery qtex = DatumGetTSQuery(qdata);
340  TSQuery qtsubs = DatumGetTSQuery(sdata);
341  QTNode *qex,
342  *qsubs = NULL;
343 
344  if (qtex->size == 0)
345  {
346  if (qtex != (TSQuery) DatumGetPointer(qdata))
347  pfree(qtex);
348  if (qtsubs != (TSQuery) DatumGetPointer(sdata))
349  pfree(qtsubs);
350  continue;
351  }
352 
353  qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
354 
355  QTNTernary(qex);
356  QTNSort(qex);
357 
358  if (qtsubs->size)
359  qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
360 
361  oldcontext = MemoryContextSwitchTo(outercontext);
362  tree = findsubquery(tree, qex, qsubs, NULL);
363  MemoryContextSwitchTo(oldcontext);
364 
365  QTNFree(qex);
366  if (qtex != (TSQuery) DatumGetPointer(qdata))
367  pfree(qtex);
368  QTNFree(qsubs);
369  if (qtsubs != (TSQuery) DatumGetPointer(sdata))
370  pfree(qtsubs);
371 
372  if (tree)
373  {
374  /* ready the tree for another pass */
376  QTNTernary(tree);
377  QTNSort(tree);
378  }
379  }
380  }
381 
383  SPI_cursor_fetch(portal, true, 100);
384  }
385 
387  SPI_cursor_close(portal);
389  SPI_finish();
390 
391  if (tree)
392  {
393  QTNBinary(tree);
394  rewritten = QTN2QT(tree);
395  QTNFree(tree);
396  PG_FREE_IF_COPY(query, 0);
397  }
398  else
399  {
400  SET_VARSIZE(rewritten, HDRSIZETQ);
401  rewritten->size = 0;
402  }
403 
404  pfree(buf);
405  PG_FREE_IF_COPY(in, 1);
406  PG_RETURN_POINTER(rewritten);
407 }
408 
409 Datum
411 {
412  TSQuery query = PG_GETARG_TSQUERY_COPY(0);
413  TSQuery ex = PG_GETARG_TSQUERY(1);
414  TSQuery subst = PG_GETARG_TSQUERY(2);
415  TSQuery rewritten = query;
416  QTNode *tree,
417  *qex,
418  *subs = NULL;
419 
420  if (query->size == 0 || ex->size == 0)
421  {
422  PG_FREE_IF_COPY(ex, 1);
423  PG_FREE_IF_COPY(subst, 2);
424  PG_RETURN_POINTER(rewritten);
425  }
426 
427  tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
428  QTNTernary(tree);
429  QTNSort(tree);
430 
431  qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
432  QTNTernary(qex);
433  QTNSort(qex);
434 
435  if (subst->size)
436  subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
437 
438  tree = findsubquery(tree, qex, subs, NULL);
439 
440  QTNFree(qex);
441  QTNFree(subs);
442 
443  if (!tree)
444  {
445  SET_VARSIZE(rewritten, HDRSIZETQ);
446  rewritten->size = 0;
447  PG_FREE_IF_COPY(ex, 1);
448  PG_FREE_IF_COPY(subst, 2);
449  PG_RETURN_POINTER(rewritten);
450  }
451  else
452  {
453  QTNBinary(tree);
454  rewritten = QTN2QT(tree);
455  QTNFree(tree);
456  }
457 
458  PG_FREE_IF_COPY(query, 0);
459  PG_FREE_IF_COPY(ex, 1);
460  PG_FREE_IF_COPY(subst, 2);
461  PG_RETURN_POINTER(rewritten);
462 }
#define GETQUERY(x)
Definition: _int.h:157
#define Assert(condition)
Definition: c.h:861
int errcode(int sqlerrcode)
Definition: elog.c:853
int errmsg(const char *fmt,...)
Definition: elog.c:1070
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
#define ereport(elevel,...)
Definition: elog.h:149
#define PG_FREE_IF_COPY(ptr, n)
Definition: fmgr.h:260
#define PG_GETARG_TEXT_PP(n)
Definition: fmgr.h:309
#define PG_RETURN_POINTER(x)
Definition: fmgr.h:361
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
int j
Definition: isn.c:74
int i
Definition: isn.c:73
#define GETOPERAND(x)
Definition: ltree.h:165
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc0(Size size)
Definition: mcxt.c:1347
MemoryContext CurrentMemoryContext
Definition: mcxt.c:143
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:122
#define plan(x)
Definition: pg_regress.c:162
static char * buf
Definition: pg_test_fsync.c:73
void check_stack_depth(void)
Definition: postgres.c:3564
uintptr_t Datum
Definition: postgres.h:64
static Pointer DatumGetPointer(Datum X)
Definition: postgres.h:312
tree
Definition: radixtree.h:1834
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1886
static int cmp(const chr *x, const chr *y, size_t len)
Definition: regc_locale.c:743
uint64 SPI_processed
Definition: spi.c:44
Oid SPI_gettypeid(TupleDesc tupdesc, int fnumber)
Definition: spi.c:1305
int SPI_freeplan(SPIPlanPtr plan)
Definition: spi.c:1022
SPITupleTable * SPI_tuptable
Definition: spi.c:45
int SPI_connect(void)
Definition: spi.c:94
void SPI_cursor_fetch(Portal portal, bool forward, long count)
Definition: spi.c:1803
int SPI_finish(void)
Definition: spi.c:182
void SPI_freetuptable(SPITupleTable *tuptable)
Definition: spi.c:1383
Portal SPI_cursor_open(const char *name, SPIPlanPtr plan, Datum *Values, const char *Nulls, bool read_only)
Definition: spi.c:1442
SPIPlanPtr SPI_prepare(const char *src, int nargs, Oid *argtypes)
Definition: spi.c:857
void SPI_cursor_close(Portal portal)
Definition: spi.c:1859
Datum SPI_getbinval(HeapTuple tuple, TupleDesc tupdesc, int fnumber, bool *isnull)
Definition: spi.c:1249
int32 nchild
Definition: ts_utils.h:238
QueryItem * valnode
Definition: ts_utils.h:236
uint32 sign
Definition: ts_utils.h:240
uint32 flags
Definition: ts_utils.h:237
struct QTNode ** child
Definition: ts_utils.h:241
int32 valcrc
Definition: ts_type.h:164
TupleDesc tupdesc
Definition: spi.h:25
HeapTuple * vals
Definition: spi.h:26
int32 size
Definition: ts_type.h:221
Definition: c.h:690
static TSQuery DatumGetTSQuery(Datum X)
Definition: ts_type.h:249
#define PG_GETARG_TSQUERY(n)
Definition: ts_type.h:266
#define HDRSIZETQ
Definition: ts_type.h:227
#define QI_OPR
Definition: ts_type.h:150
#define PG_GETARG_TSQUERY_COPY(n)
Definition: ts_type.h:267
#define QI_VAL
Definition: ts_type.h:149
#define OP_AND
Definition: ts_type.h:180
#define OP_OR
Definition: ts_type.h:181
#define OP_NOT
Definition: ts_type.h:179
#define QTN_NOCHANGE
Definition: ts_utils.h:246
Datum tsquery_rewrite_query(PG_FUNCTION_ARGS)
static QTNode * dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
static QTNode * findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
QTNode * findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
Datum tsquery_rewrite(PG_FUNCTION_ARGS)
void QTNClearFlags(QTNode *in, uint32 flags)
Definition: tsquery_util.c:434
int QTNodeCompare(QTNode *an, QTNode *bn)
Definition: tsquery_util.c:97
QTNode * QTNCopy(QTNode *in)
Definition: tsquery_util.c:396
QTNode * QT2QTN(QueryItem *in, char *operand)
Definition: tsquery_util.c:25
void QTNSort(QTNode *in)
Definition: tsquery_util.c:163
void QTNTernary(QTNode *in)
Definition: tsquery_util.c:201
bool QTNEq(QTNode *a, QTNode *b)
Definition: tsquery_util.c:183
void QTNFree(QTNode *in)
Definition: tsquery_util.c:64
TSQuery QTN2QT(QTNode *in)
Definition: tsquery_util.c:363
void QTNBinary(QTNode *in)
Definition: tsquery_util.c:250
QueryOperator qoperator
Definition: ts_type.h:209
QueryOperand qoperand
Definition: ts_type.h:210
QueryItemType type
Definition: ts_type.h:208
#define SET_VARSIZE(PTR, len)
Definition: varatt.h:305
char * text_to_cstring(const text *t)
Definition: varlena.c:217