PostgreSQL Source Code git master
orclauses.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * orclauses.c
4 * Routines to extract restriction OR clauses from join OR clauses
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/util/orclauses.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "nodes/makefuncs.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/optimizer.h"
21#include "optimizer/orclauses.h"
22#include "optimizer/paths.h"
24
25
27static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
29 Expr *orclause, RestrictInfo *join_or_rinfo);
30
31
32/*
33 * extract_restriction_or_clauses
34 * Examine join OR-of-AND clauses to see if any useful restriction OR
35 * clauses can be extracted. If so, add them to the query.
36 *
37 * Although a join clause must reference multiple relations overall,
38 * an OR of ANDs clause might contain sub-clauses that reference just one
39 * relation and can be used to build a restriction clause for that rel.
40 * For example consider
41 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
42 * We can transform this into
43 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
44 * AND (a.x = 42 OR a.x = 44)
45 * AND (b.y = 43 OR b.z = 45);
46 * which allows the latter clauses to be applied during the scans of a and b,
47 * perhaps as index qualifications, and in any case reducing the number of
48 * rows arriving at the join. In essence this is a partial transformation to
49 * CNF (AND of ORs format). It is not complete, however, because we do not
50 * unravel the original OR --- doing so would usually bloat the qualification
51 * expression to little gain.
52 *
53 * The added quals are partially redundant with the original OR, and therefore
54 * would cause the size of the joinrel to be underestimated when it is finally
55 * formed. (This would be true of a full transformation to CNF as well; the
56 * fault is not really in the transformation, but in clauselist_selectivity's
57 * inability to recognize redundant conditions.) We can compensate for this
58 * redundancy by changing the cached selectivity of the original OR clause,
59 * canceling out the (valid) reduction in the estimated sizes of the base
60 * relations so that the estimated joinrel size remains the same. This is
61 * a MAJOR HACK: it depends on the fact that clause selectivities are cached
62 * and on the fact that the same RestrictInfo node will appear in every
63 * joininfo list that might be used when the joinrel is formed.
64 * And it doesn't work in cases where the size estimation is nonlinear
65 * (i.e., outer and IN joins). But it beats not doing anything.
66 *
67 * We examine each base relation to see if join clauses associated with it
68 * contain extractable restriction conditions. If so, add those conditions
69 * to the rel's baserestrictinfo and update the cached selectivities of the
70 * join clauses. Note that the same join clause will be examined afresh
71 * from the point of view of each baserel that participates in it, so its
72 * cached selectivity may get updated multiple times.
73 */
74void
76{
77 Index rti;
78
79 /* Examine each baserel for potential join OR clauses */
80 for (rti = 1; rti < root->simple_rel_array_size; rti++)
81 {
82 RelOptInfo *rel = root->simple_rel_array[rti];
83 ListCell *lc;
84
85 /* there may be empty slots corresponding to non-baserel RTEs */
86 if (rel == NULL)
87 continue;
88
89 Assert(rel->relid == rti); /* sanity check on array */
90
91 /* ignore RTEs that are "other rels" */
92 if (rel->reloptkind != RELOPT_BASEREL)
93 continue;
94
95 /*
96 * Find potentially interesting OR joinclauses. We can use any
97 * joinclause that is considered safe to move to this rel by the
98 * parameterized-path machinery, even though what we are going to do
99 * with it is not exactly a parameterized path.
100 */
101 foreach(lc, rel->joininfo)
102 {
103 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
104
105 if (restriction_is_or_clause(rinfo) &&
106 join_clause_is_movable_to(rinfo, rel))
107 {
108 /* Try to extract a qual for this rel only */
109 Expr *orclause = extract_or_clause(rinfo, rel);
110
111 /*
112 * If successful, decide whether we want to use the clause,
113 * and insert it into the rel's restrictinfo list if so.
114 */
115 if (orclause)
116 consider_new_or_clause(root, rel, orclause, rinfo);
117 }
118 }
119 }
120}
121
122/*
123 * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
124 */
125static bool
127{
128 /*
129 * We want clauses that mention the rel, and only the rel. So in
130 * particular pseudoconstant clauses can be rejected quickly. Then check
131 * the clause's Var membership.
132 */
133 if (rinfo->pseudoconstant)
134 return false;
135 if (!bms_equal(rinfo->clause_relids, rel->relids))
136 return false;
137
138 /* We don't want extra evaluations of any volatile functions */
139 if (contain_volatile_functions((Node *) rinfo->clause))
140 return false;
141
142 return true;
143}
144
145/*
146 * Try to extract a restriction clause mentioning only "rel" from the given
147 * join OR-clause.
148 *
149 * We must be able to extract at least one qual for this rel from each of
150 * the arms of the OR, else we can't use it.
151 *
152 * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
153 * if no OR clause could be extracted.
154 */
155static Expr *
157{
158 List *clauselist = NIL;
159 ListCell *lc;
160
161 /*
162 * Scan each arm of the input OR clause. Notice we descend into
163 * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
164 * toplevel OR/AND structure. This is useful because we can use the info
165 * in those nodes to make is_safe_restriction_clause_for()'s checks
166 * cheaper. We'll strip those nodes from the returned tree, though,
167 * meaning that fresh ones will be built if the clause is accepted as a
168 * restriction clause. This might seem wasteful --- couldn't we re-use
169 * the existing RestrictInfos? But that'd require assuming that
170 * selectivity and other cached data is computed exactly the same way for
171 * a restriction clause as for a join clause, which seems undesirable.
172 */
173 Assert(is_orclause(or_rinfo->orclause));
174 foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
175 {
176 Node *orarg = (Node *) lfirst(lc);
177 List *subclauses = NIL;
178 Node *subclause;
179
180 /* OR arguments should be ANDs or sub-RestrictInfos */
181 if (is_andclause(orarg))
182 {
183 List *andargs = ((BoolExpr *) orarg)->args;
184 ListCell *lc2;
185
186 foreach(lc2, andargs)
187 {
189
190 if (restriction_is_or_clause(rinfo))
191 {
192 /*
193 * Recurse to deal with nested OR. Note we *must* recurse
194 * here, this isn't just overly-tense optimization: we
195 * have to descend far enough to find and strip all
196 * RestrictInfos in the expression.
197 */
198 Expr *suborclause;
199
200 suborclause = extract_or_clause(rinfo, rel);
201 if (suborclause)
202 subclauses = lappend(subclauses, suborclause);
203 }
204 else if (is_safe_restriction_clause_for(rinfo, rel))
205 subclauses = lappend(subclauses, rinfo->clause);
206 }
207 }
208 else
209 {
210 RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
211
213 if (is_safe_restriction_clause_for(rinfo, rel))
214 subclauses = lappend(subclauses, rinfo->clause);
215 }
216
217 /*
218 * If nothing could be extracted from this arm, we can't do anything
219 * with this OR clause.
220 */
221 if (subclauses == NIL)
222 return NULL;
223
224 /*
225 * OK, add subclause(s) to the result OR. If we found more than one,
226 * we need an AND node. But if we found only one, and it is itself an
227 * OR node, add its subclauses to the result instead; this is needed
228 * to preserve AND/OR flatness (ie, no OR directly underneath OR).
229 */
230 subclause = (Node *) make_ands_explicit(subclauses);
231 if (is_orclause(subclause))
232 clauselist = list_concat(clauselist,
233 ((BoolExpr *) subclause)->args);
234 else
235 clauselist = lappend(clauselist, subclause);
236 }
237
238 /*
239 * If we got a restriction clause from every arm, wrap them up in an OR
240 * node. (In theory the OR node might be unnecessary, if there was only
241 * one arm --- but then the input OR node was also redundant.)
242 */
243 if (clauselist != NIL)
244 return make_orclause(clauselist);
245 return NULL;
246}
247
248/*
249 * Consider whether a successfully-extracted restriction OR clause is
250 * actually worth using. If so, add it to the planner's data structures,
251 * and adjust the original join clause (join_or_rinfo) to compensate.
252 */
253static void
255 Expr *orclause, RestrictInfo *join_or_rinfo)
256{
257 RestrictInfo *or_rinfo;
258 Selectivity or_selec,
259 orig_selec;
260
261 /*
262 * Build a RestrictInfo from the new OR clause. We can assume it's valid
263 * as a base restriction clause.
264 */
265 or_rinfo = make_restrictinfo(root,
266 orclause,
267 true,
268 false,
269 false,
270 false,
271 join_or_rinfo->security_level,
272 NULL,
273 NULL,
274 NULL);
275
276 /*
277 * Estimate its selectivity. (We could have done this earlier, but doing
278 * it on the RestrictInfo representation allows the result to get cached,
279 * saving work later.)
280 */
281 or_selec = clause_selectivity(root, (Node *) or_rinfo,
282 0, JOIN_INNER, NULL);
283
284 /*
285 * The clause is only worth adding to the query if it rejects a useful
286 * fraction of the base relation's rows; otherwise, it's just going to
287 * cause duplicate computation (since we will still have to check the
288 * original OR clause when the join is formed). Somewhat arbitrarily, we
289 * set the selectivity threshold at 0.9.
290 */
291 if (or_selec > 0.9)
292 return; /* forget it */
293
294 /*
295 * OK, add it to the rel's restriction-clause list.
296 */
297 rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
299 or_rinfo->security_level);
300
301 /*
302 * Adjust the original join OR clause's cached selectivity to compensate
303 * for the selectivity of the added (but redundant) lower-level qual. This
304 * should result in the join rel getting approximately the same rows
305 * estimate as it would have gotten without all these shenanigans.
306 *
307 * XXX major hack alert: this depends on the assumption that the
308 * selectivity will stay cached.
309 *
310 * XXX another major hack: we adjust only norm_selec, the cached
311 * selectivity for JOIN_INNER semantics, even though the join clause
312 * might've been an outer-join clause. This is partly because we can't
313 * easily identify the relevant SpecialJoinInfo here, and partly because
314 * the linearity assumption we're making would fail anyway. (If it is an
315 * outer-join clause, "rel" must be on the nullable side, else we'd not
316 * have gotten here. So the computation of the join size is going to be
317 * quite nonlinear with respect to the size of "rel", so it's not clear
318 * how we ought to adjust outer_selec even if we could compute its
319 * original value correctly.)
320 */
321 if (or_selec > 0)
322 {
323 SpecialJoinInfo sjinfo;
324
325 /*
326 * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
327 * approx_tuple_count() in costsize.c.)
328 */
329 init_dummy_sjinfo(&sjinfo,
330 bms_difference(join_or_rinfo->clause_relids,
331 rel->relids),
332 rel->relids);
333
334 /* Compute inner-join size */
335 orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
336 0, JOIN_INNER, &sjinfo);
337
338 /* And hack cached selectivity so join size remains the same */
339 join_or_rinfo->norm_selec = orig_selec / or_selec;
340 /* ensure result stays in sane range */
341 if (join_or_rinfo->norm_selec > 1)
342 join_or_rinfo->norm_selec = 1;
343 /* as explained above, we don't touch outer_selec */
344 }
345}
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:346
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:142
#define Min(x, y)
Definition: c.h:975
unsigned int Index
Definition: c.h:585
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:537
Selectivity clause_selectivity(PlannerInfo *root, Node *clause, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
Definition: clausesel.c:667
Assert(PointerIsAligned(start, uint64))
void init_dummy_sjinfo(SpecialJoinInfo *sjinfo, Relids left_relids, Relids right_relids)
Definition: joinrels.c:670
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
Expr * make_orclause(List *orclauses)
Definition: makefuncs.c:743
Expr * make_ands_explicit(List *andclauses)
Definition: makefuncs.c:799
static bool is_andclause(const void *clause)
Definition: nodeFuncs.h:107
static bool is_orclause(const void *clause)
Definition: nodeFuncs.h:116
double Selectivity
Definition: nodes.h:252
#define castNode(_type_, nodeptr)
Definition: nodes.h:178
@ JOIN_INNER
Definition: nodes.h:295
void extract_restriction_or_clauses(PlannerInfo *root)
Definition: orclauses.c:75
static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, Expr *orclause, RestrictInfo *join_or_rinfo)
Definition: orclauses.c:254
static Expr * extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
Definition: orclauses.c:156
static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
Definition: orclauses.c:126
@ RELOPT_BASEREL
Definition: pathnodes.h:851
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
#define NIL
Definition: pg_list.h:68
tree ctl root
Definition: radixtree.h:1857
bool restriction_is_or_clause(RestrictInfo *restrictinfo)
Definition: restrictinfo.c:407
bool join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel)
Definition: restrictinfo.c:575
RestrictInfo * make_restrictinfo(PlannerInfo *root, Expr *clause, bool is_pushed_down, bool has_clone, bool is_clone, bool pseudoconstant, Index security_level, Relids required_relids, Relids incompatible_relids, Relids outer_relids)
Definition: restrictinfo.c:52
Definition: pg_list.h:54
Definition: nodes.h:131
List * baserestrictinfo
Definition: pathnodes.h:1009
List * joininfo
Definition: pathnodes.h:1015
Relids relids
Definition: pathnodes.h:895
Index relid
Definition: pathnodes.h:942
RelOptKind reloptkind
Definition: pathnodes.h:889
Index baserestrict_min_security
Definition: pathnodes.h:1013
Index security_level
Definition: pathnodes.h:2621
Expr * clause
Definition: pathnodes.h:2599