PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
prepqual.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  * Routines for preprocessing qualification expressions
5  *
6  *
7  * While the parser will produce flattened (N-argument) AND/OR trees from
8  * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9  * directly underneath another AND, or OR underneath OR, if the input was
10  * oddly parenthesized. Also, rule expansion and subquery flattening could
11  * produce such parsetrees. The planner wants to flatten all such cases
12  * to ensure consistent optimization behavior.
13  *
14  * Formerly, this module was responsible for doing the initial flattening,
15  * but now we leave it to eval_const_expressions to do that since it has to
16  * make a complete pass over the expression tree anyway. Instead, we just
17  * have to ensure that our manipulations preserve AND/OR flatness.
18  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19  * tree after local transformations that might introduce nested AND/ORs.
20  *
21  *
22  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
23  * Portions Copyright (c) 1994, Regents of the University of California
24  *
25  *
26  * IDENTIFICATION
27  * src/backend/optimizer/prep/prepqual.c
28  *
29  *-------------------------------------------------------------------------
30  */
31 
32 #include "postgres.h"
33 
34 #include "nodes/makefuncs.h"
35 #include "optimizer/clauses.h"
36 #include "optimizer/prep.h"
37 #include "utils/lsyscache.h"
38 
39 
40 static List *pull_ands(List *andlist);
41 static List *pull_ors(List *orlist);
42 static Expr *find_duplicate_ors(Expr *qual);
43 static Expr *process_duplicate_ors(List *orlist);
44 
45 
46 /*
47  * negate_clause
48  * Negate a Boolean expression.
49  *
50  * Input is a clause to be negated (e.g., the argument of a NOT clause).
51  * Returns a new clause equivalent to the negation of the given clause.
52  *
53  * Although this can be invoked on its own, it's mainly intended as a helper
54  * for eval_const_expressions(), and that context drives several design
55  * decisions. In particular, if the input is already AND/OR flat, we must
56  * preserve that property. We also don't bother to recurse in situations
57  * where we can assume that lower-level executions of eval_const_expressions
58  * would already have simplified sub-clauses of the input.
59  *
60  * The difference between this and a simple make_notclause() is that this
61  * tries to get rid of the NOT node by logical simplification. It's clearly
62  * always a win if the NOT node can be eliminated altogether. However, our
63  * use of DeMorgan's laws could result in having more NOT nodes rather than
64  * fewer. We do that unconditionally anyway, because in WHERE clauses it's
65  * important to expose as much top-level AND/OR structure as possible.
66  * Also, eliminating an intermediate NOT may allow us to flatten two levels
67  * of AND or OR together that we couldn't have otherwise. Finally, one of
68  * the motivations for doing this is to ensure that logically equivalent
69  * expressions will be seen as physically equal(), so we should always apply
70  * the same transformations.
71  */
72 Node *
74 {
75  if (node == NULL) /* should not happen */
76  elog(ERROR, "can't negate an empty subexpression");
77  switch (nodeTag(node))
78  {
79  case T_Const:
80  {
81  Const *c = (Const *) node;
82 
83  /* NOT NULL is still NULL */
84  if (c->constisnull)
85  return makeBoolConst(false, true);
86  /* otherwise pretty easy */
87  return makeBoolConst(!DatumGetBool(c->constvalue), false);
88  }
89  break;
90  case T_OpExpr:
91  {
92  /*
93  * Negate operator if possible: (NOT (< A B)) => (>= A B)
94  */
95  OpExpr *opexpr = (OpExpr *) node;
96  Oid negator = get_negator(opexpr->opno);
97 
98  if (negator)
99  {
100  OpExpr *newopexpr = makeNode(OpExpr);
101 
102  newopexpr->opno = negator;
103  newopexpr->opfuncid = InvalidOid;
104  newopexpr->opresulttype = opexpr->opresulttype;
105  newopexpr->opretset = opexpr->opretset;
106  newopexpr->opcollid = opexpr->opcollid;
107  newopexpr->inputcollid = opexpr->inputcollid;
108  newopexpr->args = opexpr->args;
109  newopexpr->location = opexpr->location;
110  return (Node *) newopexpr;
111  }
112  }
113  break;
114  case T_ScalarArrayOpExpr:
115  {
116  /*
117  * Negate a ScalarArrayOpExpr if its operator has a negator;
118  * for example x = ANY (list) becomes x <> ALL (list)
119  */
120  ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
121  Oid negator = get_negator(saopexpr->opno);
122 
123  if (negator)
124  {
126 
127  newopexpr->opno = negator;
128  newopexpr->opfuncid = InvalidOid;
129  newopexpr->useOr = !saopexpr->useOr;
130  newopexpr->inputcollid = saopexpr->inputcollid;
131  newopexpr->args = saopexpr->args;
132  newopexpr->location = saopexpr->location;
133  return (Node *) newopexpr;
134  }
135  }
136  break;
137  case T_BoolExpr:
138  {
139  BoolExpr *expr = (BoolExpr *) node;
140 
141  switch (expr->boolop)
142  {
143  /*--------------------
144  * Apply DeMorgan's Laws:
145  * (NOT (AND A B)) => (OR (NOT A) (NOT B))
146  * (NOT (OR A B)) => (AND (NOT A) (NOT B))
147  * i.e., swap AND for OR and negate each subclause.
148  *
149  * If the input is already AND/OR flat and has no NOT
150  * directly above AND or OR, this transformation preserves
151  * those properties. For example, if no direct child of
152  * the given AND clause is an AND or a NOT-above-OR, then
153  * the recursive calls of negate_clause() can't return any
154  * OR clauses. So we needn't call pull_ors() before
155  * building a new OR clause. Similarly for the OR case.
156  *--------------------
157  */
158  case AND_EXPR:
159  {
160  List *nargs = NIL;
161  ListCell *lc;
162 
163  foreach(lc, expr->args)
164  {
165  nargs = lappend(nargs,
166  negate_clause(lfirst(lc)));
167  }
168  return (Node *) make_orclause(nargs);
169  }
170  break;
171  case OR_EXPR:
172  {
173  List *nargs = NIL;
174  ListCell *lc;
175 
176  foreach(lc, expr->args)
177  {
178  nargs = lappend(nargs,
179  negate_clause(lfirst(lc)));
180  }
181  return (Node *) make_andclause(nargs);
182  }
183  break;
184  case NOT_EXPR:
185 
186  /*
187  * NOT underneath NOT: they cancel. We assume the
188  * input is already simplified, so no need to recurse.
189  */
190  return (Node *) linitial(expr->args);
191  default:
192  elog(ERROR, "unrecognized boolop: %d",
193  (int) expr->boolop);
194  break;
195  }
196  }
197  break;
198  case T_NullTest:
199  {
200  NullTest *expr = (NullTest *) node;
201 
202  /*
203  * In the rowtype case, the two flavors of NullTest are *not*
204  * logical inverses, so we can't simplify. But it does work
205  * for scalar datatypes.
206  */
207  if (!expr->argisrow)
208  {
209  NullTest *newexpr = makeNode(NullTest);
210 
211  newexpr->arg = expr->arg;
212  newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
213  IS_NOT_NULL : IS_NULL);
214  newexpr->argisrow = expr->argisrow;
215  newexpr->location = expr->location;
216  return (Node *) newexpr;
217  }
218  }
219  break;
220  case T_BooleanTest:
221  {
222  BooleanTest *expr = (BooleanTest *) node;
223  BooleanTest *newexpr = makeNode(BooleanTest);
224 
225  newexpr->arg = expr->arg;
226  switch (expr->booltesttype)
227  {
228  case IS_TRUE:
229  newexpr->booltesttype = IS_NOT_TRUE;
230  break;
231  case IS_NOT_TRUE:
232  newexpr->booltesttype = IS_TRUE;
233  break;
234  case IS_FALSE:
235  newexpr->booltesttype = IS_NOT_FALSE;
236  break;
237  case IS_NOT_FALSE:
238  newexpr->booltesttype = IS_FALSE;
239  break;
240  case IS_UNKNOWN:
241  newexpr->booltesttype = IS_NOT_UNKNOWN;
242  break;
243  case IS_NOT_UNKNOWN:
244  newexpr->booltesttype = IS_UNKNOWN;
245  break;
246  default:
247  elog(ERROR, "unrecognized booltesttype: %d",
248  (int) expr->booltesttype);
249  break;
250  }
251  newexpr->location = expr->location;
252  return (Node *) newexpr;
253  }
254  break;
255  default:
256  /* else fall through */
257  break;
258  }
259 
260  /*
261  * Otherwise we don't know how to simplify this, so just tack on an
262  * explicit NOT node.
263  */
264  return (Node *) make_notclause((Expr *) node);
265 }
266 
267 
268 /*
269  * canonicalize_qual
270  * Convert a qualification expression to the most useful form.
271  *
272  * The name of this routine is a holdover from a time when it would try to
273  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
274  * Eventually, we recognized that that had more theoretical purity than
275  * actual usefulness, and so now the transformation doesn't involve any
276  * notion of reaching a canonical form.
277  *
278  * NOTE: we assume the input has already been through eval_const_expressions
279  * and therefore possesses AND/OR flatness. Formerly this function included
280  * its own flattening logic, but that requires a useless extra pass over the
281  * tree.
282  *
283  * Returns the modified qualification.
284  */
285 Expr *
287 {
288  Expr *newqual;
289 
290  /* Quick exit for empty qual */
291  if (qual == NULL)
292  return NULL;
293 
294  /*
295  * Pull up redundant subclauses in OR-of-AND trees. We do this only
296  * within the top-level AND/OR structure; there's no point in looking
297  * deeper. Also remove any NULL constants in the top-level structure.
298  */
299  newqual = find_duplicate_ors(qual);
300 
301  return newqual;
302 }
303 
304 
305 /*
306  * pull_ands
307  * Recursively flatten nested AND clauses into a single and-clause list.
308  *
309  * Input is the arglist of an AND clause.
310  * Returns the rebuilt arglist (note original list structure is not touched).
311  */
312 static List *
313 pull_ands(List *andlist)
314 {
315  List *out_list = NIL;
316  ListCell *arg;
317 
318  foreach(arg, andlist)
319  {
320  Node *subexpr = (Node *) lfirst(arg);
321 
322  /*
323  * Note: we can destructively concat the subexpression's arglist
324  * because we know the recursive invocation of pull_ands will have
325  * built a new arglist not shared with any other expr. Otherwise we'd
326  * need a list_copy here.
327  */
328  if (and_clause(subexpr))
329  out_list = list_concat(out_list,
330  pull_ands(((BoolExpr *) subexpr)->args));
331  else
332  out_list = lappend(out_list, subexpr);
333  }
334  return out_list;
335 }
336 
337 /*
338  * pull_ors
339  * Recursively flatten nested OR clauses into a single or-clause list.
340  *
341  * Input is the arglist of an OR clause.
342  * Returns the rebuilt arglist (note original list structure is not touched).
343  */
344 static List *
345 pull_ors(List *orlist)
346 {
347  List *out_list = NIL;
348  ListCell *arg;
349 
350  foreach(arg, orlist)
351  {
352  Node *subexpr = (Node *) lfirst(arg);
353 
354  /*
355  * Note: we can destructively concat the subexpression's arglist
356  * because we know the recursive invocation of pull_ors will have
357  * built a new arglist not shared with any other expr. Otherwise we'd
358  * need a list_copy here.
359  */
360  if (or_clause(subexpr))
361  out_list = list_concat(out_list,
362  pull_ors(((BoolExpr *) subexpr)->args));
363  else
364  out_list = lappend(out_list, subexpr);
365  }
366  return out_list;
367 }
368 
369 
370 /*--------------------
371  * The following code attempts to apply the inverse OR distributive law:
372  * ((A AND B) OR (A AND C)) => (A AND (B OR C))
373  * That is, locate OR clauses in which every subclause contains an
374  * identical term, and pull out the duplicated terms.
375  *
376  * This may seem like a fairly useless activity, but it turns out to be
377  * applicable to many machine-generated queries, and there are also queries
378  * in some of the TPC benchmarks that need it. This was in fact almost the
379  * sole useful side-effect of the old prepqual code that tried to force
380  * the query into canonical AND-of-ORs form: the canonical equivalent of
381  * ((A AND B) OR (A AND C))
382  * is
383  * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
384  * which the code was able to simplify to
385  * (A AND (A OR C) AND (B OR A) AND (B OR C))
386  * thus successfully extracting the common condition A --- but at the cost
387  * of cluttering the qual with many redundant clauses.
388  *--------------------
389  */
390 
391 /*
392  * find_duplicate_ors
393  * Given a qualification tree with the NOTs pushed down, search for
394  * OR clauses to which the inverse OR distributive law might apply.
395  * Only the top-level AND/OR structure is searched.
396  *
397  * While at it, we remove any NULL constants within the top-level AND/OR
398  * structure, eg "x OR NULL::boolean" is reduced to "x". In general that
399  * would change the result, so eval_const_expressions can't do it; but at
400  * top level of WHERE, we don't need to distinguish between FALSE and NULL
401  * results, so it's valid to treat NULL::boolean the same as FALSE and then
402  * simplify AND/OR accordingly.
403  *
404  * Returns the modified qualification. AND/OR flatness is preserved.
405  */
406 static Expr *
408 {
409  if (or_clause((Node *) qual))
410  {
411  List *orlist = NIL;
412  ListCell *temp;
413 
414  /* Recurse */
415  foreach(temp, ((BoolExpr *) qual)->args)
416  {
417  Expr *arg = (Expr *) lfirst(temp);
418 
419  arg = find_duplicate_ors(arg);
420 
421  /* Get rid of any constant inputs */
422  if (arg && IsA(arg, Const))
423  {
424  Const *carg = (Const *) arg;
425 
426  /* Drop constant FALSE or NULL */
427  if (carg->constisnull || !DatumGetBool(carg->constvalue))
428  continue;
429  /* constant TRUE, so OR reduces to TRUE */
430  return arg;
431  }
432 
433  orlist = lappend(orlist, arg);
434  }
435 
436  /* Flatten any ORs pulled up to just below here */
437  orlist = pull_ors(orlist);
438 
439  /* Now we can look for duplicate ORs */
440  return process_duplicate_ors(orlist);
441  }
442  else if (and_clause((Node *) qual))
443  {
444  List *andlist = NIL;
445  ListCell *temp;
446 
447  /* Recurse */
448  foreach(temp, ((BoolExpr *) qual)->args)
449  {
450  Expr *arg = (Expr *) lfirst(temp);
451 
452  arg = find_duplicate_ors(arg);
453 
454  /* Get rid of any constant inputs */
455  if (arg && IsA(arg, Const))
456  {
457  Const *carg = (Const *) arg;
458 
459  /* Drop constant TRUE */
460  if (!carg->constisnull && DatumGetBool(carg->constvalue))
461  continue;
462  /* constant FALSE or NULL, so AND reduces to FALSE */
463  return (Expr *) makeBoolConst(false, false);
464  }
465 
466  andlist = lappend(andlist, arg);
467  }
468 
469  /* Flatten any ANDs introduced just below here */
470  andlist = pull_ands(andlist);
471 
472  /* AND of no inputs reduces to TRUE */
473  if (andlist == NIL)
474  return (Expr *) makeBoolConst(true, false);
475 
476  /* Single-expression AND just reduces to that expression */
477  if (list_length(andlist) == 1)
478  return (Expr *) linitial(andlist);
479 
480  /* Else we still need an AND node */
481  return make_andclause(andlist);
482  }
483  else
484  return qual;
485 }
486 
487 /*
488  * process_duplicate_ors
489  * Given a list of exprs which are ORed together, try to apply
490  * the inverse OR distributive law.
491  *
492  * Returns the resulting expression (could be an AND clause, an OR
493  * clause, or maybe even a single subexpression).
494  */
495 static Expr *
497 {
498  List *reference = NIL;
499  int num_subclauses = 0;
500  List *winners;
501  List *neworlist;
502  ListCell *temp;
503 
504  /* OR of no inputs reduces to FALSE */
505  if (orlist == NIL)
506  return (Expr *) makeBoolConst(false, false);
507 
508  /* Single-expression OR just reduces to that expression */
509  if (list_length(orlist) == 1)
510  return (Expr *) linitial(orlist);
511 
512  /*
513  * Choose the shortest AND clause as the reference list --- obviously, any
514  * subclause not in this clause isn't in all the clauses. If we find a
515  * clause that's not an AND, we can treat it as a one-element AND clause,
516  * which necessarily wins as shortest.
517  */
518  foreach(temp, orlist)
519  {
520  Expr *clause = (Expr *) lfirst(temp);
521 
522  if (and_clause((Node *) clause))
523  {
524  List *subclauses = ((BoolExpr *) clause)->args;
525  int nclauses = list_length(subclauses);
526 
527  if (reference == NIL || nclauses < num_subclauses)
528  {
529  reference = subclauses;
530  num_subclauses = nclauses;
531  }
532  }
533  else
534  {
535  reference = list_make1(clause);
536  break;
537  }
538  }
539 
540  /*
541  * Just in case, eliminate any duplicates in the reference list.
542  */
543  reference = list_union(NIL, reference);
544 
545  /*
546  * Check each element of the reference list to see if it's in all the OR
547  * clauses. Build a new list of winning clauses.
548  */
549  winners = NIL;
550  foreach(temp, reference)
551  {
552  Expr *refclause = (Expr *) lfirst(temp);
553  bool win = true;
554  ListCell *temp2;
555 
556  foreach(temp2, orlist)
557  {
558  Expr *clause = (Expr *) lfirst(temp2);
559 
560  if (and_clause((Node *) clause))
561  {
562  if (!list_member(((BoolExpr *) clause)->args, refclause))
563  {
564  win = false;
565  break;
566  }
567  }
568  else
569  {
570  if (!equal(refclause, clause))
571  {
572  win = false;
573  break;
574  }
575  }
576  }
577 
578  if (win)
579  winners = lappend(winners, refclause);
580  }
581 
582  /*
583  * If no winners, we can't transform the OR
584  */
585  if (winners == NIL)
586  return make_orclause(orlist);
587 
588  /*
589  * Generate new OR list consisting of the remaining sub-clauses.
590  *
591  * If any clause degenerates to empty, then we have a situation like (A
592  * AND B) OR (A), which can be reduced to just A --- that is, the
593  * additional conditions in other arms of the OR are irrelevant.
594  *
595  * Note that because we use list_difference, any multiple occurrences of a
596  * winning clause in an AND sub-clause will be removed automatically.
597  */
598  neworlist = NIL;
599  foreach(temp, orlist)
600  {
601  Expr *clause = (Expr *) lfirst(temp);
602 
603  if (and_clause((Node *) clause))
604  {
605  List *subclauses = ((BoolExpr *) clause)->args;
606 
607  subclauses = list_difference(subclauses, winners);
608  if (subclauses != NIL)
609  {
610  if (list_length(subclauses) == 1)
611  neworlist = lappend(neworlist, linitial(subclauses));
612  else
613  neworlist = lappend(neworlist, make_andclause(subclauses));
614  }
615  else
616  {
617  neworlist = NIL; /* degenerate case, see above */
618  break;
619  }
620  }
621  else
622  {
623  if (!list_member(winners, clause))
624  neworlist = lappend(neworlist, clause);
625  else
626  {
627  neworlist = NIL; /* degenerate case, see above */
628  break;
629  }
630  }
631  }
632 
633  /*
634  * Append reduced OR to the winners list, if it's not degenerate, handling
635  * the special case of one element correctly (can that really happen?).
636  * Also be careful to maintain AND/OR flatness in case we pulled up a
637  * sub-sub-OR-clause.
638  */
639  if (neworlist != NIL)
640  {
641  if (list_length(neworlist) == 1)
642  winners = lappend(winners, linitial(neworlist));
643  else
644  winners = lappend(winners, make_orclause(pull_ors(neworlist)));
645  }
646 
647  /*
648  * And return the constructed AND clause, again being wary of a single
649  * element and AND/OR flatness.
650  */
651  if (list_length(winners) == 1)
652  return (Expr *) linitial(winners);
653  else
654  return make_andclause(pull_ands(winners));
655 }
Datum constvalue
Definition: primnodes.h:196
#define NIL
Definition: pg_list.h:69
static List * pull_ands(List *andlist)
Definition: prepqual.c:313
#define IsA(nodeptr, _type_)
Definition: nodes.h:569
Node * negate_clause(Node *node)
Definition: prepqual.c:73
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:2923
Definition: nodes.h:518
List * list_concat(List *list1, List *list2)
Definition: list.c:321
List * list_union(const List *list1, const List *list2)
Definition: list.c:697
unsigned int Oid
Definition: postgres_ext.h:31
#define list_make1(x1)
Definition: pg_list.h:133
Oid opresulttype
Definition: primnodes.h:497
#define linitial(l)
Definition: pg_list.h:110
#define ERROR
Definition: elog.h:43
bool list_member(const List *list, const void *datum)
Definition: list.c:444
BoolExprType boolop
Definition: primnodes.h:561
Node * makeBoolConst(bool value, bool isnull)
Definition: makefuncs.c:354
Expr * arg
Definition: primnodes.h:1178
bool and_clause(Node *clause)
Definition: clauses.c:313
char * c
int location
Definition: primnodes.h:502
Expr * arg
Definition: primnodes.h:1201
#define DatumGetBool(X)
Definition: postgres.h:399
Oid opcollid
Definition: primnodes.h:499
Definition: nodes.h:145
List * lappend(List *list, void *datum)
Definition: list.c:128
BoolTestType booltesttype
Definition: primnodes.h:1202
Expr * make_notclause(Expr *notclause)
Definition: clauses.c:248
Expr * canonicalize_qual(Expr *qual)
Definition: prepqual.c:286
Oid opfuncid
Definition: primnodes.h:496
bool or_clause(Node *clause)
Definition: clauses.c:279
NullTestType nulltesttype
Definition: primnodes.h:1179
#define InvalidOid
Definition: postgres_ext.h:36
#define makeNode(_type_)
Definition: nodes.h:566
#define NULL
Definition: c.h:229
#define lfirst(lc)
Definition: pg_list.h:106
int location
Definition: primnodes.h:1181
static int list_length(const List *l)
Definition: pg_list.h:89
static List * pull_ors(List *orlist)
Definition: prepqual.c:345
Oid inputcollid
Definition: primnodes.h:500
List * list_difference(const List *list1, const List *list2)
Definition: list.c:858
List * args
Definition: primnodes.h:562
static Expr * process_duplicate_ors(List *orlist)
Definition: prepqual.c:496
#define nodeTag(nodeptr)
Definition: nodes.h:523
void * arg
bool argisrow
Definition: primnodes.h:1180
Expr * make_andclause(List *andclauses)
Definition: clauses.c:326
Oid opno
Definition: primnodes.h:495
#define elog
Definition: elog.h:219
Oid get_negator(Oid opno)
Definition: lsyscache.c:1305
List * args
Definition: primnodes.h:501
static Expr * find_duplicate_ors(Expr *qual)
Definition: prepqual.c:407
Definition: pg_list.h:45
Expr * make_orclause(List *orclauses)
Definition: clauses.c:292
bool constisnull
Definition: primnodes.h:197
bool opretset
Definition: primnodes.h:498