PostgreSQL Source Code  git master
orclauses.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * orclauses.c
4  * Routines to extract restriction OR clauses from join OR clauses
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/util/orclauses.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "nodes/makefuncs.h"
19 #include "nodes/nodeFuncs.h"
20 #include "optimizer/clauses.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/optimizer.h"
23 #include "optimizer/orclauses.h"
24 #include "optimizer/restrictinfo.h"
25 
26 
28 static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
29 static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
30  Expr *orclause, RestrictInfo *join_or_rinfo);
31 
32 
33 /*
34  * extract_restriction_or_clauses
35  * Examine join OR-of-AND clauses to see if any useful restriction OR
36  * clauses can be extracted. If so, add them to the query.
37  *
38  * Although a join clause must reference multiple relations overall,
39  * an OR of ANDs clause might contain sub-clauses that reference just one
40  * relation and can be used to build a restriction clause for that rel.
41  * For example consider
42  * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
43  * We can transform this into
44  * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
45  * AND (a.x = 42 OR a.x = 44)
46  * AND (b.y = 43 OR b.z = 45);
47  * which allows the latter clauses to be applied during the scans of a and b,
48  * perhaps as index qualifications, and in any case reducing the number of
49  * rows arriving at the join. In essence this is a partial transformation to
50  * CNF (AND of ORs format). It is not complete, however, because we do not
51  * unravel the original OR --- doing so would usually bloat the qualification
52  * expression to little gain.
53  *
54  * The added quals are partially redundant with the original OR, and therefore
55  * would cause the size of the joinrel to be underestimated when it is finally
56  * formed. (This would be true of a full transformation to CNF as well; the
57  * fault is not really in the transformation, but in clauselist_selectivity's
58  * inability to recognize redundant conditions.) We can compensate for this
59  * redundancy by changing the cached selectivity of the original OR clause,
60  * canceling out the (valid) reduction in the estimated sizes of the base
61  * relations so that the estimated joinrel size remains the same. This is
62  * a MAJOR HACK: it depends on the fact that clause selectivities are cached
63  * and on the fact that the same RestrictInfo node will appear in every
64  * joininfo list that might be used when the joinrel is formed.
65  * And it doesn't work in cases where the size estimation is nonlinear
66  * (i.e., outer and IN joins). But it beats not doing anything.
67  *
68  * We examine each base relation to see if join clauses associated with it
69  * contain extractable restriction conditions. If so, add those conditions
70  * to the rel's baserestrictinfo and update the cached selectivities of the
71  * join clauses. Note that the same join clause will be examined afresh
72  * from the point of view of each baserel that participates in it, so its
73  * cached selectivity may get updated multiple times.
74  */
75 void
77 {
78  Index rti;
79 
80  /* Examine each baserel for potential join OR clauses */
81  for (rti = 1; rti < root->simple_rel_array_size; rti++)
82  {
83  RelOptInfo *rel = root->simple_rel_array[rti];
84  ListCell *lc;
85 
86  /* there may be empty slots corresponding to non-baserel RTEs */
87  if (rel == NULL)
88  continue;
89 
90  Assert(rel->relid == rti); /* sanity check on array */
91 
92  /* ignore RTEs that are "other rels" */
93  if (rel->reloptkind != RELOPT_BASEREL)
94  continue;
95 
96  /*
97  * Find potentially interesting OR joinclauses. We can use any
98  * joinclause that is considered safe to move to this rel by the
99  * parameterized-path machinery, even though what we are going to do
100  * with it is not exactly a parameterized path.
101  *
102  * However, it seems best to ignore clauses that have been marked
103  * redundant (by setting norm_selec > 1). That likely can't happen
104  * for OR clauses, but let's be safe.
105  */
106  foreach(lc, rel->joininfo)
107  {
108  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
109 
110  if (restriction_is_or_clause(rinfo) &&
111  join_clause_is_movable_to(rinfo, rel) &&
112  rinfo->norm_selec <= 1)
113  {
114  /* Try to extract a qual for this rel only */
115  Expr *orclause = extract_or_clause(rinfo, rel);
116 
117  /*
118  * If successful, decide whether we want to use the clause,
119  * and insert it into the rel's restrictinfo list if so.
120  */
121  if (orclause)
122  consider_new_or_clause(root, rel, orclause, rinfo);
123  }
124  }
125  }
126 }
127 
128 /*
129  * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
130  */
131 static bool
133 {
134  /*
135  * We want clauses that mention the rel, and only the rel. So in
136  * particular pseudoconstant clauses can be rejected quickly. Then check
137  * the clause's Var membership.
138  */
139  if (rinfo->pseudoconstant)
140  return false;
141  if (!bms_equal(rinfo->clause_relids, rel->relids))
142  return false;
143 
144  /* We don't want extra evaluations of any volatile functions */
145  if (contain_volatile_functions((Node *) rinfo->clause))
146  return false;
147 
148  return true;
149 }
150 
151 /*
152  * Try to extract a restriction clause mentioning only "rel" from the given
153  * join OR-clause.
154  *
155  * We must be able to extract at least one qual for this rel from each of
156  * the arms of the OR, else we can't use it.
157  *
158  * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
159  * if no OR clause could be extracted.
160  */
161 static Expr *
163 {
164  List *clauselist = NIL;
165  ListCell *lc;
166 
167  /*
168  * Scan each arm of the input OR clause. Notice we descend into
169  * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
170  * toplevel OR/AND structure. This is useful because we can use the info
171  * in those nodes to make is_safe_restriction_clause_for()'s checks
172  * cheaper. We'll strip those nodes from the returned tree, though,
173  * meaning that fresh ones will be built if the clause is accepted as a
174  * restriction clause. This might seem wasteful --- couldn't we re-use
175  * the existing RestrictInfos? But that'd require assuming that
176  * selectivity and other cached data is computed exactly the same way for
177  * a restriction clause as for a join clause, which seems undesirable.
178  */
179  Assert(is_orclause(or_rinfo->orclause));
180  foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
181  {
182  Node *orarg = (Node *) lfirst(lc);
183  List *subclauses = NIL;
184  Node *subclause;
185 
186  /* OR arguments should be ANDs or sub-RestrictInfos */
187  if (is_andclause(orarg))
188  {
189  List *andargs = ((BoolExpr *) orarg)->args;
190  ListCell *lc2;
191 
192  foreach(lc2, andargs)
193  {
194  RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
195 
196  if (restriction_is_or_clause(rinfo))
197  {
198  /*
199  * Recurse to deal with nested OR. Note we *must* recurse
200  * here, this isn't just overly-tense optimization: we
201  * have to descend far enough to find and strip all
202  * RestrictInfos in the expression.
203  */
204  Expr *suborclause;
205 
206  suborclause = extract_or_clause(rinfo, rel);
207  if (suborclause)
208  subclauses = lappend(subclauses, suborclause);
209  }
210  else if (is_safe_restriction_clause_for(rinfo, rel))
211  subclauses = lappend(subclauses, rinfo->clause);
212  }
213  }
214  else
215  {
216  RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
217 
219  if (is_safe_restriction_clause_for(rinfo, rel))
220  subclauses = lappend(subclauses, rinfo->clause);
221  }
222 
223  /*
224  * If nothing could be extracted from this arm, we can't do anything
225  * with this OR clause.
226  */
227  if (subclauses == NIL)
228  return NULL;
229 
230  /*
231  * OK, add subclause(s) to the result OR. If we found more than one,
232  * we need an AND node. But if we found only one, and it is itself an
233  * OR node, add its subclauses to the result instead; this is needed
234  * to preserve AND/OR flatness (ie, no OR directly underneath OR).
235  */
236  subclause = (Node *) make_ands_explicit(subclauses);
237  if (is_orclause(subclause))
238  clauselist = list_concat(clauselist,
239  ((BoolExpr *) subclause)->args);
240  else
241  clauselist = lappend(clauselist, subclause);
242  }
243 
244  /*
245  * If we got a restriction clause from every arm, wrap them up in an OR
246  * node. (In theory the OR node might be unnecessary, if there was only
247  * one arm --- but then the input OR node was also redundant.)
248  */
249  if (clauselist != NIL)
250  return make_orclause(clauselist);
251  return NULL;
252 }
253 
254 /*
255  * Consider whether a successfully-extracted restriction OR clause is
256  * actually worth using. If so, add it to the planner's data structures,
257  * and adjust the original join clause (join_or_rinfo) to compensate.
258  */
259 static void
261  Expr *orclause, RestrictInfo *join_or_rinfo)
262 {
263  RestrictInfo *or_rinfo;
264  Selectivity or_selec,
265  orig_selec;
266 
267  /*
268  * Build a RestrictInfo from the new OR clause. We can assume it's valid
269  * as a base restriction clause.
270  */
271  or_rinfo = make_restrictinfo(root,
272  orclause,
273  true,
274  false,
275  false,
276  join_or_rinfo->security_level,
277  NULL,
278  NULL,
279  NULL);
280 
281  /*
282  * Estimate its selectivity. (We could have done this earlier, but doing
283  * it on the RestrictInfo representation allows the result to get cached,
284  * saving work later.)
285  */
286  or_selec = clause_selectivity(root, (Node *) or_rinfo,
287  0, JOIN_INNER, NULL);
288 
289  /*
290  * The clause is only worth adding to the query if it rejects a useful
291  * fraction of the base relation's rows; otherwise, it's just going to
292  * cause duplicate computation (since we will still have to check the
293  * original OR clause when the join is formed). Somewhat arbitrarily, we
294  * set the selectivity threshold at 0.9.
295  */
296  if (or_selec > 0.9)
297  return; /* forget it */
298 
299  /*
300  * OK, add it to the rel's restriction-clause list.
301  */
302  rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
304  or_rinfo->security_level);
305 
306  /*
307  * Adjust the original join OR clause's cached selectivity to compensate
308  * for the selectivity of the added (but redundant) lower-level qual. This
309  * should result in the join rel getting approximately the same rows
310  * estimate as it would have gotten without all these shenanigans.
311  *
312  * XXX major hack alert: this depends on the assumption that the
313  * selectivity will stay cached.
314  *
315  * XXX another major hack: we adjust only norm_selec, the cached
316  * selectivity for JOIN_INNER semantics, even though the join clause
317  * might've been an outer-join clause. This is partly because we can't
318  * easily identify the relevant SpecialJoinInfo here, and partly because
319  * the linearity assumption we're making would fail anyway. (If it is an
320  * outer-join clause, "rel" must be on the nullable side, else we'd not
321  * have gotten here. So the computation of the join size is going to be
322  * quite nonlinear with respect to the size of "rel", so it's not clear
323  * how we ought to adjust outer_selec even if we could compute its
324  * original value correctly.)
325  */
326  if (or_selec > 0)
327  {
328  SpecialJoinInfo sjinfo;
329 
330  /*
331  * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
332  * approx_tuple_count() in costsize.c.)
333  */
334  sjinfo.type = T_SpecialJoinInfo;
335  sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
336  rel->relids);
337  sjinfo.min_righthand = rel->relids;
338  sjinfo.syn_lefthand = sjinfo.min_lefthand;
339  sjinfo.syn_righthand = sjinfo.min_righthand;
340  sjinfo.jointype = JOIN_INNER;
341  /* we don't bother trying to make the remaining fields valid */
342  sjinfo.lhs_strict = false;
343  sjinfo.delay_upper_joins = false;
344  sjinfo.semi_can_btree = false;
345  sjinfo.semi_can_hash = false;
346  sjinfo.semi_operators = NIL;
347  sjinfo.semi_rhs_exprs = NIL;
348 
349  /* Compute inner-join size */
350  orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
351  0, JOIN_INNER, &sjinfo);
352 
353  /* And hack cached selectivity so join size remains the same */
354  join_or_rinfo->norm_selec = orig_selec / or_selec;
355  /* ensure result stays in sane range, in particular not "redundant" */
356  if (join_or_rinfo->norm_selec > 1)
357  join_or_rinfo->norm_selec = 1;
358  /* as explained above, we don't touch outer_selec */
359  }
360 }
#define NIL
Definition: pg_list.h:65
Index security_level
Definition: pathnodes.h:2071
RelOptKind reloptkind
Definition: pathnodes.h:678
Relids min_righthand
Definition: pathnodes.h:2253
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:106
#define castNode(_type_, nodeptr)
Definition: nodes.h:605
Expr * orclause
Definition: pathnodes.h:2090
List * baserestrictinfo
Definition: pathnodes.h:745
void extract_restriction_or_clauses(PlannerInfo *root)
Definition: orclauses.c:76
Relids clause_relids
Definition: pathnodes.h:2074
#define Min(x, y)
Definition: c.h:986
bool pseudoconstant
Definition: pathnodes.h:2064
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:97
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:291
Definition: nodes.h:536
static Expr * extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
Definition: orclauses.c:162
List * list_concat(List *list1, const List *list2)
Definition: list.c:530
Index baserestrict_min_security
Definition: pathnodes.h:747
double Selectivity
Definition: nodes.h:669
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:452
Expr * make_orclause(List *orclauses)
Definition: makefuncs.c:652
bool restriction_is_or_clause(RestrictInfo *restrictinfo)
Definition: restrictinfo.c:382
Relids syn_lefthand
Definition: pathnodes.h:2254
Selectivity norm_selec
Definition: pathnodes.h:2097
struct RelOptInfo ** simple_rel_array
Definition: pathnodes.h:186
Relids syn_righthand
Definition: pathnodes.h:2255
List * semi_rhs_exprs
Definition: pathnodes.h:2263
static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
Definition: orclauses.c:132
#define lfirst_node(type, lc)
Definition: pg_list.h:172
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:690
List * joininfo
Definition: pathnodes.h:749
Relids relids
Definition: pathnodes.h:681
int simple_rel_array_size
Definition: pathnodes.h:187
RestrictInfo * make_restrictinfo(PlannerInfo *root, Expr *clause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids)
Definition: restrictinfo.c:61
Index relid
Definition: pathnodes.h:709
List * lappend(List *list, void *datum)
Definition: list.c:336
Expr * clause
Definition: pathnodes.h:2056
bool delay_upper_joins
Definition: pathnodes.h:2258
Expr * make_ands_explicit(List *andclauses)
Definition: makefuncs.c:708
unsigned int Index
Definition: c.h:549
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
JoinType jointype
Definition: pathnodes.h:2256
bool join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
Definition: restrictinfo.c:525
List * semi_operators
Definition: pathnodes.h:2262
static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo)
Definition: orclauses.c:260
Definition: pg_list.h:50
Relids min_lefthand
Definition: pathnodes.h:2252
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:94