PostgreSQL Source Code git master
Loading...
Searching...
No Matches
joinpath.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * joinpath.c
4 * Routines to find all possible paths for processing a set of joins
5 *
6 * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/joinpath.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "executor/executor.h"
18#include "foreign/fdwapi.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/cost.h"
21#include "optimizer/optimizer.h"
22#include "optimizer/pathnode.h"
23#include "optimizer/paths.h"
25#include "optimizer/planmain.h"
27#include "utils/lsyscache.h"
28#include "utils/typcache.h"
29
30/* Hooks for plugins to get control in add_paths_to_joinrel() */
33
34/*
35 * Paths parameterized by a parent rel can be considered to be parameterized
36 * by any of its children, when we are performing partitionwise joins. These
37 * macros simplify checking for such cases. Beware multiple eval of args.
38 */
39#define PATH_PARAM_BY_PARENT(path, rel) \
40 ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
41 (rel)->top_parent_relids))
42#define PATH_PARAM_BY_REL_SELF(path, rel) \
43 ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
44
45#define PATH_PARAM_BY_REL(path, rel) \
46 (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
47
49 RelOptInfo *joinrel,
52 List *pathkeys,
53 List *mergeclauses,
54 List *outersortkeys,
55 List *innersortkeys,
56 JoinType jointype,
57 JoinPathExtraData *extra);
58static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
59 RelOptInfo *outerrel, RelOptInfo *innerrel,
60 JoinType jointype, JoinPathExtraData *extra);
61static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62 RelOptInfo *outerrel, RelOptInfo *innerrel,
63 JoinType jointype, JoinPathExtraData *extra);
65 RelOptInfo *joinrel,
66 RelOptInfo *outerrel,
67 RelOptInfo *innerrel,
68 JoinType jointype,
69 JoinPathExtraData *extra);
71 RelOptInfo *joinrel,
72 RelOptInfo *outerrel,
73 RelOptInfo *innerrel,
74 JoinType jointype,
75 JoinPathExtraData *extra,
77static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78 RelOptInfo *outerrel, RelOptInfo *innerrel,
79 JoinType jointype, JoinPathExtraData *extra);
81 RelOptInfo *joinrel,
82 RelOptInfo *outerrel,
83 RelOptInfo *innerrel,
84 List *restrictlist,
85 JoinType jointype,
86 bool *mergejoin_allowed);
88 RelOptInfo *joinrel,
89 RelOptInfo *innerrel,
91 JoinType jointype,
92 JoinPathExtraData *extra,
93 bool useallclauses,
96 bool is_partial);
97
98
99/*
100 * add_paths_to_joinrel
101 * Given a join relation and two component rels from which it can be made,
102 * consider all possible paths that use the two component rels as outer
103 * and inner rel respectively. Add these paths to the join rel's pathlist
104 * if they survive comparison with other paths (and remove any existing
105 * paths that are dominated by these paths).
106 *
107 * Modifies the pathlist field of the joinrel node to contain the best
108 * paths found so far.
109 *
110 * jointype is not necessarily the same as sjinfo->jointype; it might be
111 * "flipped around" if we are considering joining the rels in the opposite
112 * direction from what's indicated in sjinfo.
113 *
114 * Also, this routine accepts the special JoinTypes JOIN_UNIQUE_OUTER and
115 * JOIN_UNIQUE_INNER to indicate that the outer or inner relation has been
116 * unique-ified and a regular inner join should then be applied. These values
117 * are not allowed to propagate outside this routine, however. Path cost
118 * estimation code, as well as match_unsorted_outer, may need to recognize that
119 * it's dealing with such a case --- the combination of nominal jointype INNER
120 * with sjinfo->jointype == JOIN_SEMI indicates that.
121 */
122void
124 RelOptInfo *joinrel,
125 RelOptInfo *outerrel,
126 RelOptInfo *innerrel,
127 JoinType jointype,
128 SpecialJoinInfo *sjinfo,
129 List *restrictlist)
130{
131 JoinType save_jointype = jointype;
132 JoinPathExtraData extra;
133 bool mergejoin_allowed = true;
134 ListCell *lc;
136
137 /*
138 * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
139 * between child relations, even if there is a SpecialJoinInfo node for
140 * the join between the topmost parents. So, while calculating Relids set
141 * representing the restriction, consider relids of topmost parent of
142 * partitions.
143 */
144 if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
145 joinrelids = joinrel->top_parent_relids;
146 else
147 joinrelids = joinrel->relids;
148
149 extra.restrictlist = restrictlist;
150 extra.mergeclause_list = NIL;
151 extra.sjinfo = sjinfo;
152 extra.param_source_rels = NULL;
153 extra.pgs_mask = joinrel->pgs_mask;
154
155 /*
156 * Give extensions a chance to take control. In particular, an extension
157 * might want to modify extra.pgs_mask. It's possible to override pgs_mask
158 * on a query-wide basis using join_search_hook, or for a particular
159 * relation using joinrel_setup_hook, but extensions that want to provide
160 * different advice for the same joinrel based on the choice of innerrel
161 * and outerrel will need to use this hook.
162 *
163 * A very simple way for an extension to use this hook is to set
164 * extra.pgs_mask &= ~PGS_JOIN_ANY, if it simply doesn't want any of the
165 * paths generated by this call to add_paths_to_joinrel() to be selected.
166 * An extension could use this technique to constrain the join order,
167 * since it could thereby arrange to reject all paths from join orders
168 * that it does not like. An extension can also selectively clear bits
169 * from extra.pgs_mask to rule out specific techniques for specific joins,
170 * or could even set additional bits to re-allow methods disabled at some
171 * higher level.
172 *
173 * NB: Below this point, this function should be careful to reference
174 * extra.pgs_mask rather than rel->pgs_mask to avoid disregarding any
175 * changes made by the hook we're about to call.
176 */
178 join_path_setup_hook(root, joinrel, outerrel, innerrel,
179 jointype, &extra);
180
181 /*
182 * See if the inner relation is provably unique for this outer rel.
183 *
184 * We have some special cases: for JOIN_SEMI, it doesn't matter since the
185 * executor can make the equivalent optimization anyway. It also doesn't
186 * help enable use of Memoize, since a semijoin with a provably unique
187 * inner side should have been reduced to an inner join in that case.
188 * Therefore, we need not expend planner cycles on proofs. (For
189 * JOIN_ANTI, although it doesn't help the executor for the same reason,
190 * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
191 * considering a semijoin whose inner side is not provably unique (else
192 * reduce_unique_semijoins would've simplified it), so there's no point in
193 * calling innerrel_is_unique. However, if the LHS covers all of the
194 * semijoin's min_lefthand, then it's appropriate to set inner_unique
195 * because the unique relation produced by create_unique_paths will be
196 * unique relative to the LHS. (If we have an LHS that's only part of the
197 * min_lefthand, that is *not* true.) For JOIN_UNIQUE_OUTER, pass
198 * JOIN_INNER to avoid letting that value escape this module.
199 */
200 switch (jointype)
201 {
202 case JOIN_SEMI:
203 extra.inner_unique = false; /* well, unproven */
204 break;
207 outerrel->relids);
208 break;
211 joinrel->relids,
212 outerrel->relids,
213 innerrel,
215 restrictlist,
216 false);
217 break;
218 default:
220 joinrel->relids,
221 outerrel->relids,
222 innerrel,
223 jointype,
224 restrictlist,
225 false);
226 break;
227 }
228
229 /*
230 * If the outer or inner relation has been unique-ified, handle as a plain
231 * inner join.
232 */
233 if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
234 jointype = JOIN_INNER;
235
236 /*
237 * Find potential mergejoin clauses. We can skip this if we are not
238 * interested in doing a mergejoin. However, mergejoin may be our only
239 * way of implementing a full outer join, so in that case we don't care
240 * whether mergejoins are disabled.
241 */
242 if ((extra.pgs_mask & PGS_MERGEJOIN_ANY) != 0 || jointype == JOIN_FULL)
244 joinrel,
245 outerrel,
246 innerrel,
247 restrictlist,
248 jointype,
250
251 /*
252 * If it's SEMI, ANTI, or inner_unique join, compute correction factors
253 * for cost estimation. These will be the same for all paths.
254 */
255 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
256 compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
257 jointype, sjinfo, restrictlist,
258 &extra.semifactors);
259
260 /*
261 * Decide whether it's sensible to generate parameterized paths for this
262 * joinrel, and if so, which relations such paths should require. There
263 * is usually no need to create a parameterized result path unless there
264 * is a join order restriction that prevents joining one of our input rels
265 * directly to the parameter source rel instead of joining to the other
266 * input rel. (But see allow_star_schema_join().) This restriction
267 * reduces the number of parameterized paths we have to deal with at
268 * higher join levels, without compromising the quality of the resulting
269 * plan. We express the restriction as a Relids set that must overlap the
270 * parameterization of any proposed join path. Note: param_source_rels
271 * should contain only baserels, not OJ relids, so starting from
272 * all_baserels not all_query_rels is correct.
273 */
274 foreach(lc, root->join_info_list)
275 {
277
278 /*
279 * SJ is relevant to this join if we have some part of its RHS
280 * (possibly not all of it), and haven't yet joined to its LHS. (This
281 * test is pretty simplistic, but should be sufficient considering the
282 * join has already been proven legal.) If the SJ is relevant, it
283 * presents constraints for joining to anything not in its RHS.
284 */
285 if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
286 !bms_overlap(joinrelids, sjinfo2->min_lefthand))
288 bms_difference(root->all_baserels,
289 sjinfo2->min_righthand));
290
291 /* full joins constrain both sides symmetrically */
292 if (sjinfo2->jointype == JOIN_FULL &&
293 bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
294 !bms_overlap(joinrelids, sjinfo2->min_righthand))
296 bms_difference(root->all_baserels,
297 sjinfo2->min_lefthand));
298 }
299
300 /*
301 * However, when a LATERAL subquery is involved, there will simply not be
302 * any paths for the joinrel that aren't parameterized by whatever the
303 * subquery is parameterized by, unless its parameterization is resolved
304 * within the joinrel. So we might as well allow additional dependencies
305 * on whatever residual lateral dependencies the joinrel will have.
306 */
308 joinrel->lateral_relids);
309
310 /*
311 * 1. Consider mergejoin paths where both relations must be explicitly
312 * sorted. Skip this if we can't mergejoin.
313 */
315 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
316 jointype, &extra);
317
318 /*
319 * 2. Consider paths where the outer relation need not be explicitly
320 * sorted. This includes both nestloops and mergejoins where the outer
321 * path is already ordered. Again, skip this if we can't mergejoin.
322 * (That's okay because we know that nestloop can't handle
323 * right/right-anti/right-semi/full joins at all, so it wouldn't work in
324 * the prohibited cases either.)
325 */
327 match_unsorted_outer(root, joinrel, outerrel, innerrel,
328 jointype, &extra);
329
330#ifdef NOT_USED
331
332 /*
333 * 3. Consider paths where the inner relation need not be explicitly
334 * sorted. This includes mergejoins only (nestloops were already built in
335 * match_unsorted_outer).
336 *
337 * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
338 * significant difference between the inner and outer side of a mergejoin,
339 * so match_unsorted_inner creates no paths that aren't equivalent to
340 * those made by match_unsorted_outer when add_paths_to_joinrel() is
341 * invoked with the two rels given in the other order.
342 */
344 match_unsorted_inner(root, joinrel, outerrel, innerrel,
345 jointype, &extra);
346#endif
347
348 /*
349 * 4. Consider paths where both outer and inner relations must be hashed
350 * before being joined. As above, when it's a full join, we must try this
351 * even when the path type is disabled, because it may be our only option.
352 */
353 if ((extra.pgs_mask & PGS_HASHJOIN) != 0 || jointype == JOIN_FULL)
354 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
355 jointype, &extra);
356
357 /*
358 * 5. If inner and outer relations are foreign tables (or joins) belonging
359 * to the same server and assigned to the same user to check access
360 * permissions as, give the FDW a chance to push down joins.
361 */
362 if ((extra.pgs_mask & PGS_FOREIGNJOIN) != 0 && joinrel->fdwroutine &&
363 joinrel->fdwroutine->GetForeignJoinPaths)
364 joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
365 outerrel, innerrel,
366 save_jointype, &extra);
367
368 /*
369 * 6. Finally, give extensions a chance to manipulate the path list. They
370 * could add new paths (such as CustomPaths) by calling add_path(), or
371 * add_partial_path() if parallel aware.
372 *
373 * In theory, extensions could also use this hook to delete or modify
374 * paths added by the core code, but in practice this is difficult to make
375 * work, since it's too late to get back any paths that have already been
376 * discarded by add_path() or add_partial_path(). If you're trying to
377 * suppress paths, consider using join_path_setup_hook instead.
378 */
380 set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
381 save_jointype, &extra);
382}
383
384/*
385 * We override the param_source_rels heuristic to accept nestloop paths in
386 * which the outer rel satisfies some but not all of the inner path's
387 * parameterization. This is necessary to get good plans for star-schema
388 * scenarios, in which a parameterized path for a large table may require
389 * parameters from multiple small tables that will not get joined directly to
390 * each other. We can handle that by stacking nestloops that have the small
391 * tables on the outside; but this breaks the rule the param_source_rels
392 * heuristic is based on, namely that parameters should not be passed down
393 * across joins unless there's a join-order-constraint-based reason to do so.
394 * So we ignore the param_source_rels restriction when this case applies.
395 *
396 * allow_star_schema_join() returns true if the param_source_rels restriction
397 * should be overridden, ie, it's okay to perform this join.
398 */
399static inline bool
401 Relids outerrelids,
403{
404 /*
405 * It's a star-schema case if the outer rel provides some but not all of
406 * the inner rel's parameterization.
407 */
408 return (bms_overlap(inner_paramrels, outerrelids) &&
410}
411
412/*
413 * If the parameterization is only partly satisfied by the outer rel,
414 * the unsatisfied part can't include any outer-join relids that could
415 * null rels of the satisfied part. That would imply that we're trying
416 * to use a clause involving a Var with nonempty varnullingrels at
417 * a join level where that value isn't yet computable.
418 *
419 * In practice, this test never finds a problem because earlier join order
420 * restrictions prevent us from attempting a join that would cause a problem.
421 * (That's unsurprising, because the code worked before we ever added
422 * outer-join relids to expression relids.) It still seems worth checking
423 * as a backstop, but we only do so in assert-enabled builds.
424 */
425#ifdef USE_ASSERT_CHECKING
426static inline bool
428 Relids outerrelids,
430{
431 bool result = false;
434
435 if (bms_overlap(unsatisfied, root->outer_join_rels))
436 {
437 ListCell *lc;
438
439 foreach(lc, root->join_info_list)
440 {
442
443 if (!bms_is_member(sjinfo->ojrelid, unsatisfied))
444 continue; /* not relevant */
445 if (bms_overlap(satisfied, sjinfo->min_righthand) ||
446 (sjinfo->jointype == JOIN_FULL &&
448 {
449 result = true; /* doesn't work */
450 break;
451 }
452 }
453 }
454
455 /* Waste no memory when we reject a path here */
458
459 return result;
460}
461#endif /* USE_ASSERT_CHECKING */
462
463/*
464 * paraminfo_get_equal_hashops
465 * Determine if the clauses in param_info and innerrel's lateral vars
466 * can be hashed.
467 * Returns true if hashing is possible, otherwise false.
468 *
469 * Additionally, on success we collect the outer expressions and the
470 * appropriate equality operators for each hashable parameter to innerrel.
471 * These are returned in parallel lists in *param_exprs and *operators.
472 * We also set *binary_mode to indicate whether strict binary matching is
473 * required.
474 */
475static bool
477 RelOptInfo *outerrel, RelOptInfo *innerrel,
478 List *ph_lateral_vars, List **param_exprs,
479 List **operators, bool *binary_mode)
480
481{
482 List *lateral_vars;
483 ListCell *lc;
484
485 *param_exprs = NIL;
486 *operators = NIL;
487 *binary_mode = false;
488
489 /* Add join clauses from param_info to the hash key */
490 if (param_info != NULL)
491 {
492 List *clauses = param_info->ppi_clauses;
493
494 foreach(lc, clauses)
495 {
496 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
497 OpExpr *opexpr;
498 Node *expr;
500
501 opexpr = (OpExpr *) rinfo->clause;
502
503 /*
504 * Bail if the rinfo is not compatible. We need a join OpExpr
505 * with 2 args.
506 */
507 if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
508 !clause_sides_match_join(rinfo, outerrel->relids,
509 innerrel->relids))
510 {
511 list_free(*operators);
512 list_free(*param_exprs);
513 return false;
514 }
515
516 if (rinfo->outer_is_left)
517 {
518 expr = (Node *) linitial(opexpr->args);
519 hasheqoperator = rinfo->left_hasheqoperator;
520 }
521 else
522 {
523 expr = (Node *) lsecond(opexpr->args);
524 hasheqoperator = rinfo->right_hasheqoperator;
525 }
526
527 /* can't do memoize if we can't hash the outer type */
529 {
530 list_free(*operators);
531 list_free(*param_exprs);
532 return false;
533 }
534
535 /*
536 * 'expr' may already exist as a parameter from a previous item in
537 * ppi_clauses. No need to include it again, however we'd better
538 * ensure we do switch into binary mode if required. See below.
539 */
540 if (!list_member(*param_exprs, expr))
541 {
542 *operators = lappend_oid(*operators, hasheqoperator);
543 *param_exprs = lappend(*param_exprs, expr);
544 }
545
546 /*
547 * When the join operator is not hashable then it's possible that
548 * the operator will be able to distinguish something that the
549 * hash equality operator could not. For example with floating
550 * point types -0.0 and +0.0 are classed as equal by the hash
551 * function and equality function, but some other operator may be
552 * able to tell those values apart. This means that we must put
553 * memoize into binary comparison mode so that it does bit-by-bit
554 * comparisons rather than a "logical" comparison as it would
555 * using the hash equality operator.
556 */
557 if (!OidIsValid(rinfo->hashjoinoperator))
558 *binary_mode = true;
559 }
560 }
561
562 /* Now add any lateral vars to the cache key too */
563 lateral_vars = list_concat(ph_lateral_vars, innerrel->lateral_vars);
564 foreach(lc, lateral_vars)
565 {
566 Node *expr = (Node *) lfirst(lc);
567 TypeCacheEntry *typentry;
568
569 /* Reject if there are any volatile functions in lateral vars */
571 {
572 list_free(*operators);
573 list_free(*param_exprs);
574 return false;
575 }
576
577 typentry = lookup_type_cache(exprType(expr),
579
580 /* can't use memoize without a valid hash proc and equals operator */
581 if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
582 {
583 list_free(*operators);
584 list_free(*param_exprs);
585 return false;
586 }
587
588 /*
589 * 'expr' may already exist as a parameter from the ppi_clauses. No
590 * need to include it again, however we'd better ensure we do switch
591 * into binary mode.
592 */
593 if (!list_member(*param_exprs, expr))
594 {
595 *operators = lappend_oid(*operators, typentry->eq_opr);
596 *param_exprs = lappend(*param_exprs, expr);
597 }
598
599 /*
600 * We must go into binary mode as we don't have too much of an idea of
601 * how these lateral Vars are being used. See comment above when we
602 * set *binary_mode for the non-lateral Var case. This could be
603 * relaxed a bit if we had the RestrictInfos and knew the operators
604 * being used, however for cases like Vars that are arguments to
605 * functions we must operate in binary mode as we don't have
606 * visibility into what the function is doing with the Vars.
607 */
608 *binary_mode = true;
609 }
610
611 /* We're okay to use memoize */
612 return true;
613}
614
615/*
616 * extract_lateral_vars_from_PHVs
617 * Extract lateral references within PlaceHolderVars that are due to be
618 * evaluated at 'innerrelids'.
619 */
620static List *
622{
624 ListCell *lc;
625
626 /* Nothing would be found if the query contains no LATERAL RTEs */
627 if (!root->hasLateralRTEs)
628 return NIL;
629
630 /*
631 * No need to consider PHVs that are due to be evaluated at joinrels,
632 * since we do not add Memoize nodes on top of joinrel paths.
633 */
635 return NIL;
636
637 foreach(lc, root->placeholder_list)
638 {
640 List *vars;
641 ListCell *cell;
642
643 /* PHV is uninteresting if no lateral refs */
644 if (phinfo->ph_lateral == NULL)
645 continue;
646
647 /* PHV is uninteresting if not due to be evaluated at innerrelids */
648 if (!bms_equal(phinfo->ph_eval_at, innerrelids))
649 continue;
650
651 /*
652 * If the PHV does not reference any rels in innerrelids, use its
653 * contained expression as a cache key rather than extracting the
654 * Vars/PHVs from it and using those. This can be beneficial in cases
655 * where the expression results in fewer distinct values to cache
656 * tuples for.
657 */
658 if (!bms_overlap(pull_varnos(root, (Node *) phinfo->ph_var->phexpr),
660 {
661 ph_lateral_vars = lappend(ph_lateral_vars, phinfo->ph_var->phexpr);
662 continue;
663 }
664
665 /* Fetch Vars and PHVs of lateral references within PlaceHolderVars */
666 vars = pull_vars_of_level((Node *) phinfo->ph_var->phexpr, 0);
667 foreach(cell, vars)
668 {
669 Node *node = (Node *) lfirst(cell);
670
671 if (IsA(node, Var))
672 {
673 Var *var = (Var *) node;
674
675 Assert(var->varlevelsup == 0);
676
677 if (bms_is_member(var->varno, phinfo->ph_lateral))
679 }
680 else if (IsA(node, PlaceHolderVar))
681 {
683
684 Assert(phv->phlevelsup == 0);
685
689 }
690 else
691 Assert(false);
692 }
693
695 }
696
697 return ph_lateral_vars;
698}
699
700/*
701 * get_memoize_path
702 * If possible, make and return a Memoize path atop of 'inner_path'.
703 * Otherwise return NULL.
704 *
705 * Note that currently we do not add Memoize nodes on top of join relation
706 * paths. This is because the ParamPathInfos for join relation paths do not
707 * maintain ppi_clauses, as the set of relevant clauses varies depending on how
708 * the join is formed. In addition, joinrels do not maintain lateral_vars. So
709 * we do not have a way to extract cache keys from joinrels.
710 */
711static Path *
713 RelOptInfo *outerrel, Path *inner_path,
714 Path *outer_path, JoinType jointype,
715 JoinPathExtraData *extra)
716{
717 List *param_exprs;
718 List *hash_operators;
719 ListCell *lc;
720 bool binary_mode;
722
723 /* Obviously not if it's disabled */
724 if ((extra->pgs_mask & PGS_NESTLOOP_MEMOIZE) == 0)
725 return NULL;
726
727 /*
728 * We can safely not bother with all this unless we expect to perform more
729 * than one inner scan. The first scan is always going to be a cache
730 * miss. This would likely fail later anyway based on costs, so this is
731 * really just to save some wasted effort.
732 *
733 * However, if the "plain nested loop" strategy is disabled, then it is no
734 * longer certain that any path we'd construct here would lose on cost.
735 * So, in that case, continue and let cost comparison sort things out.
736 */
737 if (outer_path->parent->rows < 2 &&
738 (extra->pgs_mask & PGS_NESTLOOP_PLAIN) != 0)
739 return NULL;
740
741 /*
742 * Extract lateral Vars/PHVs within PlaceHolderVars that are due to be
743 * evaluated at innerrel. These lateral Vars/PHVs could be used as
744 * memoize cache keys.
745 */
747
748 /*
749 * We can only have a memoize node when there's some kind of cache key,
750 * either parameterized path clauses or lateral Vars. No cache key sounds
751 * more like something a Materialize node might be more useful for.
752 */
753 if ((inner_path->param_info == NULL ||
754 inner_path->param_info->ppi_clauses == NIL) &&
755 innerrel->lateral_vars == NIL &&
757 return NULL;
758
759 /*
760 * Currently we don't do this for SEMI and ANTI joins, because nested loop
761 * SEMI/ANTI joins don't scan the inner node to completion, which means
762 * memoize cannot mark the cache entry as complete. Nor can we mark the
763 * cache entry as complete after fetching the first inner tuple, because
764 * if that tuple and the current outer tuple don't satisfy the join
765 * clauses, a second inner tuple that satisfies the parameters would find
766 * the cache entry already marked as complete. The only exception is when
767 * the inner relation is provably unique, as in that case, there won't be
768 * a second matching tuple and we can safely mark the cache entry as
769 * complete after fetching the first inner tuple. Note that in such
770 * cases, the SEMI join should have been reduced to an inner join by
771 * reduce_unique_semijoins.
772 */
773 if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
774 !extra->inner_unique)
775 return NULL;
776
777 /*
778 * Memoize normally marks cache entries as complete when it runs out of
779 * tuples to read from its subplan. However, with unique joins, Nested
780 * Loop will skip to the next outer tuple after finding the first matching
781 * inner tuple. This means that we may not read the inner side of the
782 * join to completion which leaves no opportunity to mark the cache entry
783 * as complete. To work around that, when the join is unique we
784 * automatically mark cache entries as complete after fetching the first
785 * tuple. This works when the entire join condition is parameterized.
786 * Otherwise, when the parameterization is only a subset of the join
787 * condition, we can't be sure which part of it causes the join to be
788 * unique. This means there are no guarantees that only 1 tuple will be
789 * read. We cannot mark the cache entry as complete after reading the
790 * first tuple without that guarantee. This means the scope of Memoize
791 * node's usefulness is limited to only outer rows that have no join
792 * partner as this is the only case where Nested Loop would exhaust the
793 * inner scan of a unique join. Since the scope is limited to that, we
794 * just don't bother making a memoize path in this case.
795 *
796 * Lateral vars needn't be considered here as they're not considered when
797 * determining if the join is unique.
798 */
799 if (extra->inner_unique)
800 {
801 Bitmapset *ppi_serials;
802
803 if (inner_path->param_info == NULL)
804 return NULL;
805
806 ppi_serials = inner_path->param_info->ppi_serials;
807
809 {
810 if (!bms_is_member(rinfo->rinfo_serial, ppi_serials))
811 return NULL;
812 }
813 }
814
815 /*
816 * We can't use a memoize node if there are volatile functions in the
817 * inner rel's target list or restrict list. A cache hit could reduce the
818 * number of calls to these functions.
819 */
820 if (contain_volatile_functions((Node *) innerrel->reltarget))
821 return NULL;
822
823 foreach(lc, innerrel->baserestrictinfo)
824 {
825 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
826
827 if (contain_volatile_functions((Node *) rinfo))
828 return NULL;
829 }
830
831 /*
832 * Also check the parameterized path restrictinfos for volatile functions.
833 * Indexed functions must be immutable so shouldn't have any volatile
834 * functions, however, with a lateral join the inner scan may not be an
835 * index scan.
836 */
837 if (inner_path->param_info != NULL)
838 {
839 foreach(lc, inner_path->param_info->ppi_clauses)
840 {
841 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
842
843 if (contain_volatile_functions((Node *) rinfo))
844 return NULL;
845 }
846 }
847
848 /* Check if we have hash ops for each parameter to the path */
850 inner_path->param_info,
851 outerrel->top_parent ?
852 outerrel->top_parent : outerrel,
853 innerrel,
855 &param_exprs,
856 &hash_operators,
857 &binary_mode))
858 {
859 return (Path *) create_memoize_path(root,
860 innerrel,
862 param_exprs,
863 hash_operators,
864 extra->inner_unique,
865 binary_mode,
866 outer_path->rows);
867 }
868
869 return NULL;
870}
871
872/*
873 * try_nestloop_path
874 * Consider a nestloop join path; if it appears useful, push it into
875 * the joinrel's pathlist via add_path().
876 */
877static void
879 RelOptInfo *joinrel,
882 List *pathkeys,
883 JoinType jointype,
885 JoinPathExtraData *extra)
886{
888 JoinCostWorkspace workspace;
889 RelOptInfo *innerrel = inner_path->parent;
890 RelOptInfo *outerrel = outer_path->parent;
892 Relids outerrelids;
895
896 /*
897 * If we are forming an outer join at this join, it's nonsensical to use
898 * an input path that uses the outer join as part of its parameterization.
899 * (This can happen despite our join order restrictions, since those apply
900 * to what is in an input relation not what its parameters are.)
901 */
902 if (extra->sjinfo->ojrelid != 0 &&
905 return;
906
907 /*
908 * Any parameterization of the input paths refers to topmost parents of
909 * the relevant relations, because reparameterize_path_by_child() hasn't
910 * been called yet. So we must consider topmost parents of the relations
911 * being joined, too, while determining parameterization of the result and
912 * checking for disallowed parameterization cases.
913 */
914 if (innerrel->top_parent_relids)
915 innerrelids = innerrel->top_parent_relids;
916 else
917 innerrelids = innerrel->relids;
918
919 if (outerrel->top_parent_relids)
920 outerrelids = outerrel->top_parent_relids;
921 else
922 outerrelids = outerrel->relids;
923
924 /*
925 * Check to see if proposed path is still parameterized, and reject if the
926 * parameterization wouldn't be sensible --- unless allow_star_schema_join
927 * says to allow it anyway.
928 */
931 if (required_outer &&
934 {
935 /* Waste no memory when we reject a path here */
937 return;
938 }
939
940 /* If we got past that, we shouldn't have any unsafe outer-join refs */
942
943 /*
944 * If the inner path is parameterized, it is parameterized by the topmost
945 * parent of the outer rel, not the outer rel itself. We will need to
946 * translate the parameterization, if this path is chosen, during
947 * create_plan(). Here we just check whether we will be able to perform
948 * the translation, and if not avoid creating a nestloop path.
949 */
952 {
954 return;
955 }
956
957 /*
958 * Do a precheck to quickly eliminate obviously-inferior paths. We
959 * calculate a cheap lower bound on the path's cost and then use
960 * add_path_precheck() to see if the path is clearly going to be dominated
961 * by some existing path for the joinrel. If not, do the full pushup with
962 * creating a fully valid path structure and submitting it to add_path().
963 * The latter two steps are expensive enough to make this two-phase
964 * methodology worthwhile.
965 */
966 initial_cost_nestloop(root, &workspace, jointype,
968 outer_path, inner_path, extra);
969
970 if (add_path_precheck(joinrel, workspace.disabled_nodes,
971 workspace.startup_cost, workspace.total_cost,
972 pathkeys, required_outer))
973 {
974 add_path(joinrel, (Path *)
976 joinrel,
977 jointype,
978 &workspace,
979 extra,
982 extra->restrictlist,
983 pathkeys,
985 }
986 else
987 {
988 /* Waste no memory when we reject a path here */
990 }
991}
992
993/*
994 * try_partial_nestloop_path
995 * Consider a partial nestloop join path; if it appears useful, push it into
996 * the joinrel's partial_pathlist via add_partial_path().
997 */
998static void
1000 RelOptInfo *joinrel,
1003 List *pathkeys,
1004 JoinType jointype,
1006 JoinPathExtraData *extra)
1007{
1008 JoinCostWorkspace workspace;
1009
1010 /*
1011 * If the inner path is parameterized, the parameterization must be fully
1012 * satisfied by the proposed outer path. Parameterized partial paths are
1013 * not supported. The caller should already have verified that no lateral
1014 * rels are required here.
1015 */
1018 if (inner_path->param_info != NULL)
1019 {
1020 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1021 RelOptInfo *outerrel = outer_path->parent;
1022 Relids outerrelids;
1023
1024 /*
1025 * The inner and outer paths are parameterized, if at all, by the top
1026 * level parents, not the child relations, so we must use those relids
1027 * for our parameterization tests.
1028 */
1029 if (outerrel->top_parent_relids)
1030 outerrelids = outerrel->top_parent_relids;
1031 else
1032 outerrelids = outerrel->relids;
1033
1034 if (!bms_is_subset(inner_paramrels, outerrelids))
1035 return;
1036 }
1037
1038 /*
1039 * If the inner path is parameterized, it is parameterized by the topmost
1040 * parent of the outer rel, not the outer rel itself. We will need to
1041 * translate the parameterization, if this path is chosen, during
1042 * create_plan(). Here we just check whether we will be able to perform
1043 * the translation, and if not avoid creating a nestloop path.
1044 */
1047 return;
1048
1049 /*
1050 * Before creating a path, get a quick lower bound on what it is likely to
1051 * cost. Bail out right away if it looks terrible.
1052 */
1053 initial_cost_nestloop(root, &workspace, jointype, nestloop_subtype,
1054 outer_path, inner_path, extra);
1055 if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1056 workspace.startup_cost,
1057 workspace.total_cost, pathkeys))
1058 return;
1059
1060 /* Might be good enough to be worth trying, so let's try it. */
1061 add_partial_path(joinrel, (Path *)
1063 joinrel,
1064 jointype,
1065 &workspace,
1066 extra,
1067 outer_path,
1068 inner_path,
1069 extra->restrictlist,
1070 pathkeys,
1071 NULL));
1072}
1073
1074/*
1075 * try_mergejoin_path
1076 * Consider a merge join path; if it appears useful, push it into
1077 * the joinrel's pathlist via add_path().
1078 */
1079static void
1081 RelOptInfo *joinrel,
1084 List *pathkeys,
1085 List *mergeclauses,
1086 List *outersortkeys,
1087 List *innersortkeys,
1088 JoinType jointype,
1089 JoinPathExtraData *extra,
1090 bool is_partial)
1091{
1093 int outer_presorted_keys = 0;
1094 JoinCostWorkspace workspace;
1095
1096 if (is_partial)
1097 {
1099 joinrel,
1100 outer_path,
1101 inner_path,
1102 pathkeys,
1103 mergeclauses,
1104 outersortkeys,
1105 innersortkeys,
1106 jointype,
1107 extra);
1108 return;
1109 }
1110
1111 /*
1112 * If we are forming an outer join at this join, it's nonsensical to use
1113 * an input path that uses the outer join as part of its parameterization.
1114 * (This can happen despite our join order restrictions, since those apply
1115 * to what is in an input relation not what its parameters are.)
1116 */
1117 if (extra->sjinfo->ojrelid != 0 &&
1120 return;
1121
1122 /*
1123 * Check to see if proposed path is still parameterized, and reject if the
1124 * parameterization wouldn't be sensible.
1125 */
1127 inner_path);
1128 if (required_outer &&
1130 {
1131 /* Waste no memory when we reject a path here */
1133 return;
1134 }
1135
1136 /*
1137 * If the given paths are already well enough ordered, we can skip doing
1138 * an explicit sort.
1139 *
1140 * We need to determine the number of presorted keys of the outer path to
1141 * decide whether explicit incremental sort can be applied when
1142 * outersortkeys is not NIL. We do not need to do the same for the inner
1143 * path though, as incremental sort currently does not support
1144 * mark/restore.
1145 */
1146 if (outersortkeys &&
1147 pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1148 &outer_presorted_keys))
1149 outersortkeys = NIL;
1150 if (innersortkeys &&
1151 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1152 innersortkeys = NIL;
1153
1154 /*
1155 * See comments in try_nestloop_path().
1156 */
1157 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1159 outersortkeys, innersortkeys,
1160 outer_presorted_keys,
1161 extra);
1162
1163 if (add_path_precheck(joinrel, workspace.disabled_nodes,
1164 workspace.startup_cost, workspace.total_cost,
1165 pathkeys, required_outer))
1166 {
1167 add_path(joinrel, (Path *)
1169 joinrel,
1170 jointype,
1171 &workspace,
1172 extra,
1173 outer_path,
1174 inner_path,
1175 extra->restrictlist,
1176 pathkeys,
1178 mergeclauses,
1179 outersortkeys,
1180 innersortkeys,
1181 outer_presorted_keys));
1182 }
1183 else
1184 {
1185 /* Waste no memory when we reject a path here */
1187 }
1188}
1189
1190/*
1191 * try_partial_mergejoin_path
1192 * Consider a partial merge join path; if it appears useful, push it into
1193 * the joinrel's pathlist via add_partial_path().
1194 */
1195static void
1197 RelOptInfo *joinrel,
1200 List *pathkeys,
1201 List *mergeclauses,
1202 List *outersortkeys,
1203 List *innersortkeys,
1204 JoinType jointype,
1205 JoinPathExtraData *extra)
1206{
1207 int outer_presorted_keys = 0;
1208 JoinCostWorkspace workspace;
1209
1210 /*
1211 * See comments in try_partial_hashjoin_path().
1212 */
1216 return;
1217
1218 /*
1219 * If the given paths are already well enough ordered, we can skip doing
1220 * an explicit sort.
1221 *
1222 * We need to determine the number of presorted keys of the outer path to
1223 * decide whether explicit incremental sort can be applied when
1224 * outersortkeys is not NIL. We do not need to do the same for the inner
1225 * path though, as incremental sort currently does not support
1226 * mark/restore.
1227 */
1228 if (outersortkeys &&
1229 pathkeys_count_contained_in(outersortkeys, outer_path->pathkeys,
1230 &outer_presorted_keys))
1231 outersortkeys = NIL;
1232 if (innersortkeys &&
1233 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
1234 innersortkeys = NIL;
1235
1236 /*
1237 * See comments in try_partial_nestloop_path().
1238 */
1239 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
1241 outersortkeys, innersortkeys,
1242 outer_presorted_keys,
1243 extra);
1244
1245 if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1246 workspace.startup_cost,
1247 workspace.total_cost, pathkeys))
1248 return;
1249
1250 /* Might be good enough to be worth trying, so let's try it. */
1251 add_partial_path(joinrel, (Path *)
1253 joinrel,
1254 jointype,
1255 &workspace,
1256 extra,
1257 outer_path,
1258 inner_path,
1259 extra->restrictlist,
1260 pathkeys,
1261 NULL,
1262 mergeclauses,
1263 outersortkeys,
1264 innersortkeys,
1265 outer_presorted_keys));
1266}
1267
1268/*
1269 * try_hashjoin_path
1270 * Consider a hash join path; if it appears useful, push it into
1271 * the joinrel's pathlist via add_path().
1272 */
1273static void
1275 RelOptInfo *joinrel,
1278 List *hashclauses,
1279 JoinType jointype,
1280 JoinPathExtraData *extra)
1281{
1283 JoinCostWorkspace workspace;
1284
1285 /*
1286 * If we are forming an outer join at this join, it's nonsensical to use
1287 * an input path that uses the outer join as part of its parameterization.
1288 * (This can happen despite our join order restrictions, since those apply
1289 * to what is in an input relation not what its parameters are.)
1290 */
1291 if (extra->sjinfo->ojrelid != 0 &&
1294 return;
1295
1296 /*
1297 * Check to see if proposed path is still parameterized, and reject if the
1298 * parameterization wouldn't be sensible.
1299 */
1301 inner_path);
1302 if (required_outer &&
1304 {
1305 /* Waste no memory when we reject a path here */
1307 return;
1308 }
1309
1310 /*
1311 * See comments in try_nestloop_path(). Also note that hashjoin paths
1312 * never have any output pathkeys, per comments in create_hashjoin_path.
1313 */
1314 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1315 outer_path, inner_path, extra, false);
1316
1317 if (add_path_precheck(joinrel, workspace.disabled_nodes,
1318 workspace.startup_cost, workspace.total_cost,
1320 {
1321 add_path(joinrel, (Path *)
1323 joinrel,
1324 jointype,
1325 &workspace,
1326 extra,
1327 outer_path,
1328 inner_path,
1329 false, /* parallel_hash */
1330 extra->restrictlist,
1332 hashclauses));
1333 }
1334 else
1335 {
1336 /* Waste no memory when we reject a path here */
1338 }
1339}
1340
1341/*
1342 * try_partial_hashjoin_path
1343 * Consider a partial hashjoin join path; if it appears useful, push it into
1344 * the joinrel's partial_pathlist via add_partial_path().
1345 * The outer side is partial. If parallel_hash is true, then the inner path
1346 * must be partial and will be run in parallel to create one or more shared
1347 * hash tables; otherwise the inner path must be complete and a copy of it
1348 * is run in every process to create separate identical private hash tables.
1349 */
1350static void
1352 RelOptInfo *joinrel,
1355 List *hashclauses,
1356 JoinType jointype,
1357 JoinPathExtraData *extra,
1358 bool parallel_hash)
1359{
1360 JoinCostWorkspace workspace;
1361
1362 /*
1363 * If the inner path is parameterized, we can't use a partial hashjoin.
1364 * Parameterized partial paths are not supported. The caller should
1365 * already have verified that no lateral rels are required here.
1366 */
1370 return;
1371
1372 /*
1373 * Before creating a path, get a quick lower bound on what it is likely to
1374 * cost. Bail out right away if it looks terrible.
1375 */
1376 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1378 if (!add_partial_path_precheck(joinrel, workspace.disabled_nodes,
1379 workspace.startup_cost,
1380 workspace.total_cost, NIL))
1381 return;
1382
1383 /* Might be good enough to be worth trying, so let's try it. */
1384 add_partial_path(joinrel, (Path *)
1386 joinrel,
1387 jointype,
1388 &workspace,
1389 extra,
1390 outer_path,
1391 inner_path,
1393 extra->restrictlist,
1394 NULL,
1395 hashclauses));
1396}
1397
1398/*
1399 * sort_inner_and_outer
1400 * Create mergejoin join paths by explicitly sorting both the outer and
1401 * inner join relations on each available merge ordering.
1402 *
1403 * 'joinrel' is the join relation
1404 * 'outerrel' is the outer join relation
1405 * 'innerrel' is the inner join relation
1406 * 'jointype' is the type of join to do
1407 * 'extra' contains additional input values
1408 */
1409static void
1411 RelOptInfo *joinrel,
1412 RelOptInfo *outerrel,
1413 RelOptInfo *innerrel,
1414 JoinType jointype,
1415 JoinPathExtraData *extra)
1416{
1422 ListCell *l;
1423
1424 /* Nothing to do if there are no available mergejoin clauses */
1425 if (extra->mergeclause_list == NIL)
1426 return;
1427
1428 /*
1429 * We only consider the cheapest-total-cost input paths, since we are
1430 * assuming here that a sort is required. We will consider
1431 * cheapest-startup-cost input paths later, and only if they don't need a
1432 * sort.
1433 *
1434 * This function intentionally does not consider parameterized input
1435 * paths, except when the cheapest-total is parameterized. If we did so,
1436 * we'd have a combinatorial explosion of mergejoin paths of dubious
1437 * value. This interacts with decisions elsewhere that also discriminate
1438 * against mergejoins with parameterized inputs; see comments in
1439 * src/backend/optimizer/README.
1440 */
1441 outer_path = outerrel->cheapest_total_path;
1442 inner_path = innerrel->cheapest_total_path;
1443
1444 /*
1445 * If either cheapest-total path is parameterized by the other rel, we
1446 * can't use a mergejoin. (There's no use looking for alternative input
1447 * paths, since these should already be the least-parameterized available
1448 * paths.)
1449 */
1450 if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1451 PATH_PARAM_BY_REL(inner_path, outerrel))
1452 return;
1453
1454 /*
1455 * If the joinrel is parallel-safe, we may be able to consider a partial
1456 * merge join. However, we can't handle JOIN_FULL, JOIN_RIGHT and
1457 * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1458 * Also, the resulting path must not be parameterized.
1459 */
1460 if (joinrel->consider_parallel &&
1461 jointype != JOIN_FULL &&
1462 jointype != JOIN_RIGHT &&
1463 jointype != JOIN_RIGHT_ANTI &&
1464 outerrel->partial_pathlist != NIL &&
1465 bms_is_empty(joinrel->lateral_relids))
1466 {
1468
1469 if (inner_path->parallel_safe)
1471 else
1474 }
1475
1476 /*
1477 * Each possible ordering of the available mergejoin clauses will generate
1478 * a differently-sorted result path at essentially the same cost. We have
1479 * no basis for choosing one over another at this level of joining, but
1480 * some sort orders may be more useful than others for higher-level
1481 * mergejoins, so it's worth considering multiple orderings.
1482 *
1483 * Actually, it's not quite true that every mergeclause ordering will
1484 * generate a different path order, because some of the clauses may be
1485 * partially redundant (refer to the same EquivalenceClasses). Therefore,
1486 * what we do is convert the mergeclause list to a list of canonical
1487 * pathkeys, and then consider different orderings of the pathkeys.
1488 *
1489 * Generating a path for *every* permutation of the pathkeys doesn't seem
1490 * like a winning strategy; the cost in planning time is too high. For
1491 * now, we generate one path for each pathkey, listing that pathkey first
1492 * and the rest in random order. This should allow at least a one-clause
1493 * mergejoin without re-sorting against any other possible mergejoin
1494 * partner path. But if we've not guessed the right ordering of secondary
1495 * keys, we may end up evaluating clauses as qpquals when they could have
1496 * been done as mergeclauses. (In practice, it's rare that there's more
1497 * than two or three mergeclauses, so expending a huge amount of thought
1498 * on that is probably not worth it.)
1499 *
1500 * The pathkey order returned by select_outer_pathkeys_for_merge() has
1501 * some heuristics behind it (see that function), so be sure to try it
1502 * exactly as-is as well as making variants.
1503 */
1505 extra->mergeclause_list,
1506 joinrel);
1507
1508 foreach(l, all_pathkeys)
1509 {
1512 List *outerkeys;
1513 List *innerkeys;
1515
1516 /* Make a pathkey list with this guy first */
1517 if (l != list_head(all_pathkeys))
1521 else
1522 outerkeys = all_pathkeys; /* no work at first one... */
1523
1524 /* Sort the mergeclauses into the corresponding ordering */
1527 outerkeys,
1528 extra->mergeclause_list);
1529
1530 /* Should have used them all... */
1532
1533 /* Build sort pathkeys for the inner side */
1536 outerkeys);
1537
1538 /* Build pathkeys representing output sort order */
1539 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1540 outerkeys);
1541
1542 /*
1543 * And now we can make the path.
1544 *
1545 * Note: it's possible that the cheapest paths will already be sorted
1546 * properly. try_mergejoin_path will detect that case and suppress an
1547 * explicit sort step, so we needn't do so here.
1548 */
1550 joinrel,
1551 outer_path,
1552 inner_path,
1555 outerkeys,
1556 innerkeys,
1557 jointype,
1558 extra,
1559 false);
1560
1561 /*
1562 * If we have partial outer and parallel safe inner path then try
1563 * partial mergejoin path.
1564 */
1567 joinrel,
1572 outerkeys,
1573 innerkeys,
1574 jointype,
1575 extra);
1576 }
1577}
1578
1579/*
1580 * generate_mergejoin_paths
1581 * Creates possible mergejoin paths for input outerpath.
1582 *
1583 * We generate mergejoins if mergejoin clauses are available. We have
1584 * two ways to generate the inner path for a mergejoin: sort the cheapest
1585 * inner path, or use an inner path that is already suitably ordered for the
1586 * merge. If we have several mergeclauses, it could be that there is no inner
1587 * path (or only a very expensive one) for the full list of mergeclauses, but
1588 * better paths exist if we truncate the mergeclause list (thereby discarding
1589 * some sort key requirements). So, we consider truncations of the
1590 * mergeclause list as well as the full list. (Ideally we'd consider all
1591 * subsets of the mergeclause list, but that seems way too expensive.)
1592 */
1593static void
1595 RelOptInfo *joinrel,
1596 RelOptInfo *innerrel,
1597 Path *outerpath,
1598 JoinType jointype,
1599 JoinPathExtraData *extra,
1600 bool useallclauses,
1603 bool is_partial)
1604{
1605 List *mergeclauses;
1606 List *innersortkeys;
1610 int num_sortkeys;
1611 int sortkeycnt;
1612
1613 /* Look for useful mergeclauses (if any) */
1614 mergeclauses =
1616 outerpath->pathkeys,
1617 extra->mergeclause_list);
1618
1619 /*
1620 * Done with this outer path if no chance for a mergejoin.
1621 *
1622 * Special corner case: for "x FULL JOIN y ON true", there will be no join
1623 * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1624 * but since mergejoin is our only join type that supports FULL JOIN
1625 * without any join clauses, it's necessary to generate a clauseless
1626 * mergejoin path instead.
1627 */
1628 if (mergeclauses == NIL)
1629 {
1630 if (jointype == JOIN_FULL)
1631 /* okay to try for mergejoin */ ;
1632 else
1633 return;
1634 }
1635 if (useallclauses &&
1636 list_length(mergeclauses) != list_length(extra->mergeclause_list))
1637 return;
1638
1639 /* Compute the required ordering of the inner path */
1640 innersortkeys = make_inner_pathkeys_for_merge(root,
1641 mergeclauses,
1642 outerpath->pathkeys);
1643
1644 /*
1645 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1646 * a sort will be needed, only cheapest total cost matters. (But
1647 * try_mergejoin_path will do the right thing if inner_cheapest_total is
1648 * already correctly sorted.)
1649 */
1651 joinrel,
1652 outerpath,
1655 mergeclauses,
1656 NIL,
1657 innersortkeys,
1658 jointype,
1659 extra,
1660 is_partial);
1661
1662 /*
1663 * Look for presorted inner paths that satisfy the innersortkey list ---
1664 * or any truncation thereof, if we are allowed to build a mergejoin using
1665 * a subset of the merge clauses. Here, we consider both cheap startup
1666 * cost and cheap total cost.
1667 *
1668 * Currently we do not consider parameterized inner paths here. This
1669 * interacts with decisions elsewhere that also discriminate against
1670 * mergejoins with parameterized inputs; see comments in
1671 * src/backend/optimizer/README.
1672 *
1673 * As we shorten the sortkey list, we should consider only paths that are
1674 * strictly cheaper than (in particular, not the same as) any path found
1675 * in an earlier iteration. Otherwise we'd be intentionally using fewer
1676 * merge keys than a given path allows (treating the rest as plain
1677 * joinquals), which is unlikely to be a good idea. Also, eliminating
1678 * paths here on the basis of compare_path_costs is a lot cheaper than
1679 * building the mergejoin path only to throw it away.
1680 *
1681 * If inner_cheapest_total is well enough sorted to have not required a
1682 * sort in the path made above, we shouldn't make a duplicate path with
1683 * it, either. We handle that case with the same logic that handles the
1684 * previous consideration, by initializing the variables that track
1685 * cheapest-so-far properly. Note that we do NOT reject
1686 * inner_cheapest_total if we find it matches some shorter set of
1687 * pathkeys. That case corresponds to using fewer mergekeys to avoid
1688 * sorting inner_cheapest_total, whereas we did sort it above, so the
1689 * plans being considered are different.
1690 */
1691 if (pathkeys_contained_in(innersortkeys,
1692 inner_cheapest_total->pathkeys))
1693 {
1694 /* inner_cheapest_total didn't require a sort */
1697 }
1698 else
1699 {
1700 /* it did require a sort, at least for the full set of keys */
1703 }
1704 num_sortkeys = list_length(innersortkeys);
1705 if (num_sortkeys > 1 && !useallclauses)
1706 trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1707 else
1708 trialsortkeys = innersortkeys; /* won't really truncate */
1709
1711 {
1712 Path *innerpath;
1713 List *newclauses = NIL;
1714
1715 /*
1716 * Look for an inner path ordered well enough for the first
1717 * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1718 * destructively, which is why we made a copy...
1719 */
1723 NULL,
1724 TOTAL_COST,
1725 is_partial);
1726 if (innerpath != NULL &&
1729 TOTAL_COST) < 0))
1730 {
1731 /* Found a cheap (or even-cheaper) sorted path */
1732 /* Select the right mergeclauses, if we didn't already */
1734 {
1735 newclauses =
1737 mergeclauses,
1739 Assert(newclauses != NIL);
1740 }
1741 else
1742 newclauses = mergeclauses;
1744 joinrel,
1745 outerpath,
1746 innerpath,
1748 newclauses,
1749 NIL,
1750 NIL,
1751 jointype,
1752 extra,
1753 is_partial);
1755 }
1756 /* Same on the basis of cheapest startup cost ... */
1759 NULL,
1761 is_partial);
1762 if (innerpath != NULL &&
1765 STARTUP_COST) < 0))
1766 {
1767 /* Found a cheap (or even-cheaper) sorted path */
1769 {
1770 /*
1771 * Avoid rebuilding clause list if we already made one; saves
1772 * memory in big join trees...
1773 */
1774 if (newclauses == NIL)
1775 {
1777 {
1778 newclauses =
1780 mergeclauses,
1782 Assert(newclauses != NIL);
1783 }
1784 else
1785 newclauses = mergeclauses;
1786 }
1788 joinrel,
1789 outerpath,
1790 innerpath,
1792 newclauses,
1793 NIL,
1794 NIL,
1795 jointype,
1796 extra,
1797 is_partial);
1798 }
1800 }
1801
1802 /*
1803 * Don't consider truncated sortkeys if we need all clauses.
1804 */
1805 if (useallclauses)
1806 break;
1807 }
1808}
1809
1810/*
1811 * match_unsorted_outer
1812 * Creates possible join paths for processing a single join relation
1813 * 'joinrel' by employing either iterative substitution or
1814 * mergejoining on each of its possible outer paths (considering
1815 * only outer paths that are already ordered well enough for merging).
1816 *
1817 * We always generate a nestloop path for each available outer path.
1818 * In fact we may generate as many as five: one on the cheapest-total-cost
1819 * inner path, one on the same with materialization, one on the
1820 * cheapest-startup-cost inner path (if different), one on the
1821 * cheapest-total inner-indexscan path (if any), and one on the
1822 * cheapest-startup inner-indexscan path (if different).
1823 *
1824 * We also consider mergejoins if mergejoin clauses are available. See
1825 * detailed comments in generate_mergejoin_paths.
1826 *
1827 * 'joinrel' is the join relation
1828 * 'outerrel' is the outer join relation
1829 * 'innerrel' is the inner join relation
1830 * 'jointype' is the type of join to do
1831 * 'extra' contains additional input values
1832 */
1833static void
1835 RelOptInfo *joinrel,
1836 RelOptInfo *outerrel,
1837 RelOptInfo *innerrel,
1838 JoinType jointype,
1839 JoinPathExtraData *extra)
1840{
1841 bool nestjoinOK;
1842 bool useallclauses;
1844 Path *matpath = NULL;
1845 ListCell *lc1;
1846
1847 /*
1848 * For now we do not support RIGHT_SEMI join in mergejoin or nestloop
1849 * join.
1850 */
1851 if (jointype == JOIN_RIGHT_SEMI)
1852 return;
1853
1854 /*
1855 * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1856 * are doing a right, right-anti or full mergejoin, we must use *all* the
1857 * mergeclauses as join clauses, else we will not have a valid plan.
1858 * (Although these two flags are currently inverses, keep them separate
1859 * for clarity and possible future changes.)
1860 */
1861 switch (jointype)
1862 {
1863 case JOIN_INNER:
1864 case JOIN_LEFT:
1865 case JOIN_SEMI:
1866 case JOIN_ANTI:
1867 nestjoinOK = true;
1868 useallclauses = false;
1869 break;
1870 case JOIN_RIGHT:
1871 case JOIN_RIGHT_ANTI:
1872 case JOIN_FULL:
1873 nestjoinOK = false;
1874 useallclauses = true;
1875 break;
1876 default:
1877 elog(ERROR, "unrecognized join type: %d",
1878 (int) jointype);
1879 nestjoinOK = false; /* keep compiler quiet */
1880 useallclauses = false;
1881 break;
1882 }
1883
1884 /*
1885 * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1886 * we will consider it below as a member of cheapest_parameterized_paths,
1887 * but the other possibilities considered in this routine aren't usable.
1888 *
1889 * Furthermore, if the inner side is a unique-ified relation, we cannot
1890 * generate any valid paths here, because the inner rel's dependency on
1891 * the outer rel makes unique-ification meaningless.
1892 */
1894 {
1896
1897 if (RELATION_WAS_MADE_UNIQUE(innerrel, extra->sjinfo, jointype))
1898 return;
1899 }
1900
1901 if (nestjoinOK)
1902 {
1903 /*
1904 * Consider materializing the cheapest inner path, unless that is
1905 * disabled or the path in question materializes its output anyway.
1906 *
1907 * At present, we only consider materialization for non-partial outer
1908 * paths, so it's correct to test PGS_CONSIDER_NONPARTIAL here. If we
1909 * ever want to consider materialization for partial paths, we'll need
1910 * to create matpath whenever PGS_NESTLOOP_MATERIALIZE is set, use it
1911 * for partial paths either way, and use it for non-partial paths only
1912 * when PGS_CONSIDER_NONPARTIAL is also set.
1913 */
1914 if ((extra->pgs_mask &
1919 matpath = (Path *)
1921 }
1922
1923 foreach(lc1, outerrel->pathlist)
1924 {
1925 Path *outerpath = (Path *) lfirst(lc1);
1927
1928 /*
1929 * We cannot use an outer path that is parameterized by the inner rel.
1930 */
1931 if (PATH_PARAM_BY_REL(outerpath, innerrel))
1932 continue;
1933
1934 /*
1935 * The result will have this sort order (even if it is implemented as
1936 * a nestloop, and even if some of the mergeclauses are implemented by
1937 * qpquals rather than as true mergeclauses):
1938 */
1939 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1940 outerpath->pathkeys);
1941
1942 if (nestjoinOK)
1943 {
1944 /*
1945 * Consider nestloop joins using this outer path and various
1946 * available paths for the inner relation. We consider the
1947 * cheapest-total paths for each available parameterization of the
1948 * inner relation, including the unparameterized case.
1949 */
1950 ListCell *lc2;
1951
1952 foreach(lc2, innerrel->cheapest_parameterized_paths)
1953 {
1954 Path *innerpath = (Path *) lfirst(lc2);
1955 Path *mpath;
1956
1958 joinrel,
1959 outerpath,
1960 innerpath,
1962 jointype,
1964 extra);
1965
1966 /*
1967 * Try generating a memoize path and see if that makes the
1968 * nested loop any cheaper.
1969 */
1970 mpath = get_memoize_path(root, innerrel, outerrel,
1971 innerpath, outerpath, jointype,
1972 extra);
1973 if (mpath != NULL)
1975 joinrel,
1976 outerpath,
1977 mpath,
1979 jointype,
1981 extra);
1982 }
1983
1984 /* Also consider materialized form of the cheapest inner path */
1985 if (matpath != NULL)
1987 joinrel,
1988 outerpath,
1989 matpath,
1991 jointype,
1993 extra);
1994 }
1995
1996 /* Can't do anything else if inner rel is parameterized by outer */
1998 continue;
1999
2000 /* Generate merge join paths */
2001 generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
2002 jointype, extra, useallclauses,
2004 false);
2005 }
2006
2007 /*
2008 * Consider partial nestloop and mergejoin plan if outerrel has any
2009 * partial path and the joinrel is parallel-safe. However, we can't
2010 * handle joins needing lateral rels, since partial paths must not be
2011 * parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
2012 * JOIN_RIGHT_ANTI, because they can produce false null extended rows.
2013 */
2014 if (joinrel->consider_parallel &&
2015 jointype != JOIN_FULL &&
2016 jointype != JOIN_RIGHT &&
2017 jointype != JOIN_RIGHT_ANTI &&
2018 outerrel->partial_pathlist != NIL &&
2019 bms_is_empty(joinrel->lateral_relids))
2020 {
2021 if (nestjoinOK)
2022 consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
2023 jointype, extra);
2024
2025 /*
2026 * If inner_cheapest_total is NULL or non parallel-safe then find the
2027 * cheapest total parallel safe path.
2028 */
2029 if (inner_cheapest_total == NULL ||
2030 !inner_cheapest_total->parallel_safe)
2031 {
2034 }
2035
2037 consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
2038 jointype, extra,
2040 }
2041}
2042
2043/*
2044 * consider_parallel_mergejoin
2045 * Try to build partial paths for a joinrel by joining a partial path
2046 * for the outer relation to a complete path for the inner relation.
2047 *
2048 * 'joinrel' is the join relation
2049 * 'outerrel' is the outer join relation
2050 * 'innerrel' is the inner join relation
2051 * 'jointype' is the type of join to do
2052 * 'extra' contains additional input values
2053 * 'inner_cheapest_total' cheapest total path for innerrel
2054 */
2055static void
2057 RelOptInfo *joinrel,
2058 RelOptInfo *outerrel,
2059 RelOptInfo *innerrel,
2060 JoinType jointype,
2061 JoinPathExtraData *extra,
2063{
2064 ListCell *lc1;
2065
2066 /* generate merge join path for each partial outer path */
2067 foreach(lc1, outerrel->partial_pathlist)
2068 {
2069 Path *outerpath = (Path *) lfirst(lc1);
2071
2072 /*
2073 * Figure out what useful ordering any paths we create will have.
2074 */
2075 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
2076 outerpath->pathkeys);
2077
2078 generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
2079 extra, false, inner_cheapest_total,
2080 merge_pathkeys, true);
2081 }
2082}
2083
2084/*
2085 * consider_parallel_nestloop
2086 * Try to build partial paths for a joinrel by joining a partial path for the
2087 * outer relation to a complete path for the inner relation.
2088 *
2089 * 'joinrel' is the join relation
2090 * 'outerrel' is the outer join relation
2091 * 'innerrel' is the inner join relation
2092 * 'jointype' is the type of join to do
2093 * 'extra' contains additional input values
2094 */
2095static void
2097 RelOptInfo *joinrel,
2098 RelOptInfo *outerrel,
2099 RelOptInfo *innerrel,
2100 JoinType jointype,
2101 JoinPathExtraData *extra)
2102{
2104 Path *matpath = NULL;
2105 ListCell *lc1;
2106
2107 /*
2108 * Consider materializing the cheapest inner path, unless: 1)
2109 * materialization is disabled here, 2) the cheapest inner path is not
2110 * parallel-safe, 3) the cheapest inner path is parameterized by the outer
2111 * rel, or 4) the cheapest inner path materializes its output anyway.
2112 */
2113 if ((extra->pgs_mask & PGS_NESTLOOP_MATERIALIZE) != 0 &&
2114 inner_cheapest_total->parallel_safe &&
2117 {
2118 matpath = (Path *)
2120 Assert(matpath->parallel_safe);
2121 }
2122
2123 foreach(lc1, outerrel->partial_pathlist)
2124 {
2125 Path *outerpath = (Path *) lfirst(lc1);
2126 List *pathkeys;
2127 ListCell *lc2;
2128
2129 /* Figure out what useful ordering any paths we create will have. */
2130 pathkeys = build_join_pathkeys(root, joinrel, jointype,
2131 outerpath->pathkeys);
2132
2133 /*
2134 * Try the cheapest parameterized paths; only those which will produce
2135 * an unparameterized path when joined to this outerrel will survive
2136 * try_partial_nestloop_path. The cheapest unparameterized path is
2137 * also in this list.
2138 */
2139 foreach(lc2, innerrel->cheapest_parameterized_paths)
2140 {
2141 Path *innerpath = (Path *) lfirst(lc2);
2142 Path *mpath;
2143
2144 /* Can't join to an inner path that is not parallel-safe */
2145 if (!innerpath->parallel_safe)
2146 continue;
2147
2149 pathkeys, jointype,
2150 PGS_NESTLOOP_PLAIN, extra);
2151
2152 /*
2153 * Try generating a memoize path and see if that makes the nested
2154 * loop any cheaper.
2155 */
2156 mpath = get_memoize_path(root, innerrel, outerrel,
2157 innerpath, outerpath, jointype,
2158 extra);
2159 if (mpath != NULL)
2161 pathkeys, jointype,
2162 PGS_NESTLOOP_MEMOIZE, extra);
2163 }
2164
2165 /* Also consider materialized form of the cheapest inner path */
2166 if (matpath != NULL)
2168 pathkeys, jointype,
2170 }
2171}
2172
2173/*
2174 * hash_inner_and_outer
2175 * Create hashjoin join paths by explicitly hashing both the outer and
2176 * inner keys of each available hash clause.
2177 *
2178 * 'joinrel' is the join relation
2179 * 'outerrel' is the outer join relation
2180 * 'innerrel' is the inner join relation
2181 * 'jointype' is the type of join to do
2182 * 'extra' contains additional input values
2183 */
2184static void
2186 RelOptInfo *joinrel,
2187 RelOptInfo *outerrel,
2188 RelOptInfo *innerrel,
2189 JoinType jointype,
2190 JoinPathExtraData *extra)
2191{
2192 bool isouterjoin = IS_OUTER_JOIN(jointype);
2193 List *hashclauses;
2194 ListCell *l;
2195
2196 /*
2197 * We need to build only one hashclauses list for any given pair of outer
2198 * and inner relations; all of the hashable clauses will be used as keys.
2199 *
2200 * Scan the join's restrictinfo list to find hashjoinable clauses that are
2201 * usable with this pair of sub-relations.
2202 */
2203 hashclauses = NIL;
2204 foreach(l, extra->restrictlist)
2205 {
2207
2208 /*
2209 * If processing an outer join, only use its own join clauses for
2210 * hashing. For inner joins we need not be so picky.
2211 */
2213 continue;
2214
2215 if (!restrictinfo->can_join ||
2216 restrictinfo->hashjoinoperator == InvalidOid)
2217 continue; /* not hashjoinable */
2218
2219 /*
2220 * Check if clause has the form "outer op inner" or "inner op outer".
2221 */
2223 innerrel->relids))
2224 continue; /* no good for these input relations */
2225
2226 /*
2227 * If clause has the form "inner op outer", check if its operator has
2228 * valid commutator. This is necessary because hashclauses in this
2229 * form will get commuted in createplan.c to put the outer var on the
2230 * left (see get_switched_clauses). This probably shouldn't ever
2231 * fail, since hashable operators ought to have commutators, but be
2232 * paranoid.
2233 *
2234 * The clause being hashjoinable indicates that it's an OpExpr.
2235 */
2236 if (!restrictinfo->outer_is_left &&
2238 continue;
2239
2240 hashclauses = lappend(hashclauses, restrictinfo);
2241 }
2242
2243 /* If we found any usable hashclauses, make paths */
2244 if (hashclauses)
2245 {
2246 /*
2247 * We consider both the cheapest-total-cost and cheapest-startup-cost
2248 * outer paths. There's no need to consider any but the
2249 * cheapest-total-cost inner path, however.
2250 */
2254 ListCell *lc1;
2255 ListCell *lc2;
2256
2257 /*
2258 * If either cheapest-total path is parameterized by the other rel, we
2259 * can't use a hashjoin. (There's no use looking for alternative
2260 * input paths, since these should already be the least-parameterized
2261 * available paths.)
2262 */
2263 if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2265 return;
2266
2267 /*
2268 * Consider the cheapest startup outer together with the cheapest
2269 * total inner, and then consider pairings of cheapest-total paths
2270 * including parameterized ones. There is no use in generating
2271 * parameterized paths on the basis of possibly cheap startup cost, so
2272 * this is sufficient.
2273 */
2276 joinrel,
2279 hashclauses,
2280 jointype,
2281 extra);
2282
2283 foreach(lc1, outerrel->cheapest_parameterized_paths)
2284 {
2285 Path *outerpath = (Path *) lfirst(lc1);
2286
2287 /*
2288 * We cannot use an outer path that is parameterized by the inner
2289 * rel.
2290 */
2291 if (PATH_PARAM_BY_REL(outerpath, innerrel))
2292 continue;
2293
2294 foreach(lc2, innerrel->cheapest_parameterized_paths)
2295 {
2296 Path *innerpath = (Path *) lfirst(lc2);
2297
2298 /*
2299 * We cannot use an inner path that is parameterized by the
2300 * outer rel, either.
2301 */
2302 if (PATH_PARAM_BY_REL(innerpath, outerrel))
2303 continue;
2304
2307 continue; /* already tried it */
2308
2310 joinrel,
2311 outerpath,
2312 innerpath,
2313 hashclauses,
2314 jointype,
2315 extra);
2316 }
2317 }
2318
2319 /*
2320 * If the joinrel is parallel-safe, we may be able to consider a
2321 * partial hash join.
2322 *
2323 * However, we can't handle JOIN_RIGHT_SEMI, because the hash table is
2324 * either a shared hash table or a private hash table per backend. In
2325 * the shared case, there is no concurrency protection for the match
2326 * flags, so multiple workers could inspect and set the flags
2327 * concurrently, potentially producing incorrect results. In the
2328 * private case, each worker has its own copy of the hash table, so no
2329 * single process has all the match flags.
2330 *
2331 * Also, the resulting path must not be parameterized.
2332 */
2333 if (joinrel->consider_parallel &&
2334 jointype != JOIN_RIGHT_SEMI &&
2335 outerrel->partial_pathlist != NIL &&
2336 bms_is_empty(joinrel->lateral_relids))
2337 {
2341
2343 (Path *) linitial(outerrel->partial_pathlist);
2344
2345 /*
2346 * Can we use a partial inner plan too, so that we can build a
2347 * shared hash table in parallel?
2348 */
2349 if (innerrel->partial_pathlist != NIL &&
2351 {
2353 (Path *) linitial(innerrel->partial_pathlist);
2357 hashclauses, jointype, extra,
2358 true /* parallel_hash */ );
2359 }
2360
2361 /*
2362 * Normally, given that the joinrel is parallel-safe, the cheapest
2363 * total inner path will also be parallel-safe, but if not, we'll
2364 * have to search for the cheapest safe, unparameterized inner
2365 * path. If full, right, or right-anti join, we can't use
2366 * parallelism (building the hash table in each backend) because
2367 * no one process has all the match bits.
2368 */
2369 if (jointype == JOIN_FULL ||
2370 jointype == JOIN_RIGHT ||
2371 jointype == JOIN_RIGHT_ANTI)
2373 else if (cheapest_total_inner->parallel_safe)
2375 else
2378
2383 hashclauses, jointype, extra,
2384 false /* parallel_hash */ );
2385 }
2386 }
2387}
2388
2389/*
2390 * select_mergejoin_clauses
2391 * Select mergejoin clauses that are usable for a particular join.
2392 * Returns a list of RestrictInfo nodes for those clauses.
2393 *
2394 * *mergejoin_allowed is normally set to true, but it is set to false if
2395 * this is a right-semi join, or this is a right/right-anti/full join and
2396 * there are nonmergejoinable join clauses. The executor's mergejoin
2397 * machinery cannot handle such cases, so we have to avoid generating a
2398 * mergejoin plan. (Note that this flag does NOT consider whether there are
2399 * actually any mergejoinable clauses. This is correct because in some
2400 * cases we need to build a clauseless mergejoin. Simply returning NIL is
2401 * therefore not enough to distinguish safe from unsafe cases.)
2402 *
2403 * We also mark each selected RestrictInfo to show which side is currently
2404 * being considered as outer. These are transient markings that are only
2405 * good for the duration of the current add_paths_to_joinrel() call!
2406 *
2407 * We examine each restrictinfo clause known for the join to see
2408 * if it is mergejoinable and involves vars from the two sub-relations
2409 * currently of interest.
2410 */
2411static List *
2413 RelOptInfo *joinrel,
2414 RelOptInfo *outerrel,
2415 RelOptInfo *innerrel,
2416 List *restrictlist,
2417 JoinType jointype,
2418 bool *mergejoin_allowed)
2419{
2420 List *result_list = NIL;
2421 bool isouterjoin = IS_OUTER_JOIN(jointype);
2422 bool have_nonmergeable_joinclause = false;
2423 ListCell *l;
2424
2425 /*
2426 * For now we do not support RIGHT_SEMI join in mergejoin: the benefit of
2427 * swapping inputs tends to be small here.
2428 */
2429 if (jointype == JOIN_RIGHT_SEMI)
2430 {
2431 *mergejoin_allowed = false;
2432 return NIL;
2433 }
2434
2435 foreach(l, restrictlist)
2436 {
2438
2439 /*
2440 * If processing an outer join, only use its own join clauses in the
2441 * merge. For inner joins we can use pushed-down clauses too. (Note:
2442 * we don't set have_nonmergeable_joinclause here because pushed-down
2443 * clauses will become otherquals not joinquals.)
2444 */
2446 continue;
2447
2448 /* Check that clause is a mergeable operator clause */
2449 if (!restrictinfo->can_join ||
2450 restrictinfo->mergeopfamilies == NIL)
2451 {
2452 /*
2453 * The executor can handle extra joinquals that are constants, but
2454 * not anything else, when doing right/right-anti/full merge join.
2455 * (The reason to support constants is so we can do FULL JOIN ON
2456 * FALSE.)
2457 */
2458 if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2460 continue; /* not mergejoinable */
2461 }
2462
2463 /*
2464 * Check if clause has the form "outer op inner" or "inner op outer".
2465 */
2467 innerrel->relids))
2468 {
2470 continue; /* no good for these input relations */
2471 }
2472
2473 /*
2474 * If clause has the form "inner op outer", check if its operator has
2475 * valid commutator. This is necessary because mergejoin clauses in
2476 * this form will get commuted in createplan.c to put the outer var on
2477 * the left (see get_switched_clauses). This probably shouldn't ever
2478 * fail, since mergejoinable operators ought to have commutators, but
2479 * be paranoid.
2480 *
2481 * The clause being mergejoinable indicates that it's an OpExpr.
2482 */
2483 if (!restrictinfo->outer_is_left &&
2485 {
2487 continue;
2488 }
2489
2490 /*
2491 * Insist that each side have a non-redundant eclass. This
2492 * restriction is needed because various bits of the planner expect
2493 * that each clause in a merge be associable with some pathkey in a
2494 * canonical pathkey list, but redundant eclasses can't appear in
2495 * canonical sort orderings. (XXX it might be worth relaxing this,
2496 * but not enough time to address it for 8.3.)
2497 */
2499
2500 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2502 {
2504 continue; /* can't handle redundant eclasses */
2505 }
2506
2508 }
2509
2510 /*
2511 * Report whether mergejoin is allowed (see comment at top of function).
2512 */
2513 switch (jointype)
2514 {
2515 case JOIN_RIGHT:
2516 case JOIN_RIGHT_ANTI:
2517 case JOIN_FULL:
2519 break;
2520 default:
2521 *mergejoin_allowed = true;
2522 break;
2523 }
2524
2525 return result_list;
2526}
bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:346
Bitmapset * bms_intersect(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:292
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:142
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:412
void bms_free(Bitmapset *a)
Definition bitmapset.c:239
bool bms_is_member(int x, const Bitmapset *a)
Definition bitmapset.c:510
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:901
BMS_Membership bms_membership(const Bitmapset *a)
Definition bitmapset.c:765
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:575
Bitmapset * bms_join(Bitmapset *a, Bitmapset *b)
Definition bitmapset.c:1214
bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
Definition bitmapset.c:634
#define bms_is_empty(a)
Definition bitmapset.h:118
@ BMS_MULTIPLE
Definition bitmapset.h:73
#define Assert(condition)
Definition c.h:943
uint64_t uint64
Definition c.h:625
#define OidIsValid(objectId)
Definition c.h:858
uint32 result
bool contain_volatile_functions(Node *clause)
Definition clauses.c:549
void compute_semi_anti_join_factors(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors)
Definition costsize.c:5258
void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, uint64 enable_mask, Path *outer_path, Path *inner_path, JoinPathExtraData *extra)
Definition costsize.c:3373
void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, JoinPathExtraData *extra, bool parallel_hash)
Definition costsize.c:4297
void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, int outer_presorted_keys, JoinPathExtraData *extra)
Definition costsize.c:3658
bool enable_parallel_hash
Definition costsize.c:163
#define ERROR
Definition elog.h:40
#define elog(elevel,...)
Definition elog.h:228
bool ExecMaterializesOutput(NodeTag plantype)
Definition execAmi.c:636
static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:1274
static List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed)
Definition joinpath.c:2412
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:1410
static List * extract_lateral_vars_from_PHVs(PlannerInfo *root, Relids innerrelids)
Definition joinpath.c:621
void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist)
Definition joinpath.c:123
static bool paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, RelOptInfo *outerrel, RelOptInfo *innerrel, List *ph_lateral_vars, List **param_exprs, List **operators, bool *binary_mode)
Definition joinpath.c:476
static Path * get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, RelOptInfo *outerrel, Path *inner_path, Path *outer_path, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:712
static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:2096
join_path_setup_hook_type join_path_setup_hook
Definition joinpath.c:32
#define PATH_PARAM_BY_PARENT(path, rel)
Definition joinpath.c:39
set_join_pathlist_hook_type set_join_pathlist_hook
Definition joinpath.c:31
static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, uint64 nestloop_subtype, JoinPathExtraData *extra)
Definition joinpath.c:878
static void try_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra, bool is_partial)
Definition joinpath.c:1080
static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra, Path *inner_cheapest_total)
Definition joinpath.c:2056
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath, JoinType jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, List *merge_pathkeys, bool is_partial)
Definition joinpath.c:1594
static void try_partial_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, uint64 nestloop_subtype, JoinPathExtraData *extra)
Definition joinpath.c:999
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:2185
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:1196
#define PATH_PARAM_BY_REL(path, rel)
Definition joinpath.c:45
static bool allow_star_schema_join(PlannerInfo *root, Relids outerrelids, Relids inner_paramrels)
Definition joinpath.c:400
static void try_partial_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra, bool parallel_hash)
Definition joinpath.c:1351
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition joinpath.c:1834
List * lappend(List *list, void *datum)
Definition list.c:339
List * list_delete_nth_cell(List *list, int n)
Definition list.c:767
List * list_concat(List *list1, const List *list2)
Definition list.c:561
List * list_copy(const List *oldlist)
Definition list.c:1573
List * lappend_oid(List *list, Oid datum)
Definition list.c:375
List * lcons(void *datum, List *list)
Definition list.c:495
void list_free(List *list)
Definition list.c:1546
List * list_truncate(List *list, int new_size)
Definition list.c:631
bool list_member(const List *list, const void *datum)
Definition list.c:661
Oid get_commutator(Oid opno)
Definition lsyscache.c:1729
Oid exprType(const Node *expr)
Definition nodeFuncs.c:42
#define IsA(nodeptr, _type_)
Definition nodes.h:164
#define IS_OUTER_JOIN(jointype)
Definition nodes.h:348
#define castNode(_type_, nodeptr)
Definition nodes.h:182
JoinType
Definition nodes.h:298
@ JOIN_SEMI
Definition nodes.h:317
@ JOIN_FULL
Definition nodes.h:305
@ JOIN_INNER
Definition nodes.h:303
@ JOIN_RIGHT
Definition nodes.h:306
@ JOIN_RIGHT_SEMI
Definition nodes.h:319
@ JOIN_LEFT
Definition nodes.h:304
@ JOIN_UNIQUE_OUTER
Definition nodes.h:326
@ JOIN_RIGHT_ANTI
Definition nodes.h:320
@ JOIN_UNIQUE_INNER
Definition nodes.h:327
@ JOIN_ANTI
Definition nodes.h:318
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition pathkeys.c:620
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition pathkeys.c:558
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition pathkeys.c:1858
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition pathkeys.c:1544
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition pathkeys.c:1510
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition pathkeys.c:1961
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition pathkeys.c:1659
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition pathkeys.c:343
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition pathkeys.c:1295
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition pathkeys.c:699
Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
Definition pathnode.c:2304
bool path_is_reparameterizable_by_child(Path *path, RelOptInfo *child_rel)
Definition pathnode.c:4383
MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, bool singlerow, bool binary_mode, Cardinality est_calls)
Definition pathnode.c:1746
MaterialPath * create_material_path(RelOptInfo *rel, Path *subpath, bool enabled)
Definition pathnode.c:1712
bool add_partial_path_precheck(RelOptInfo *parent_rel, int disabled_nodes, Cost startup_cost, Cost total_cost, List *pathkeys)
Definition pathnode.c:912
Relids calc_nestloop_required_outer(Relids outerrelids, Relids outer_paramrels, Relids innerrelids, Relids inner_paramrels)
Definition pathnode.c:2277
HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, bool parallel_hash, List *restrict_clauses, Relids required_outer, List *hashclauses)
Definition pathnode.c:2521
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition pathnode.c:793
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition pathnode.c:459
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition pathnode.c:68
bool add_path_precheck(RelOptInfo *parent_rel, int disabled_nodes, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer)
Definition pathnode.c:686
MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, List *outersortkeys, List *innersortkeys, int outer_presorted_keys)
Definition pathnode.c:2453
NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer)
Definition pathnode.c:2356
#define PGS_NESTLOOP_MEMOIZE
Definition pathnodes.h:76
#define EC_MUST_BE_REDUNDANT(eclass)
Definition pathnodes.h:1684
#define PGS_FOREIGNJOIN
Definition pathnodes.h:71
#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)
Definition pathnodes.h:3058
#define PGS_NESTLOOP_MATERIALIZE
Definition pathnodes.h:75
#define PGS_HASHJOIN
Definition pathnodes.h:77
@ TOTAL_COST
Definition pathnodes.h:111
@ STARTUP_COST
Definition pathnodes.h:111
#define PGS_CONSIDER_NONPARTIAL
Definition pathnodes.h:84
#define PATH_REQ_OUTER(path)
Definition pathnodes.h:2015
#define PGS_MERGEJOIN_ANY
Definition pathnodes.h:92
#define RELATION_WAS_MADE_UNIQUE(rel, sjinfo, nominal_jointype)
Definition pathnodes.h:1250
@ RELOPT_OTHER_JOINREL
Definition pathnodes.h:980
#define PGS_NESTLOOP_PLAIN
Definition pathnodes.h:74
void(* join_path_setup_hook_type)(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition paths.h:32
void(* set_join_pathlist_hook_type)(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition paths.h:46
#define lfirst(lc)
Definition pg_list.h:172
static int list_length(const List *l)
Definition pg_list.h:152
#define NIL
Definition pg_list.h:68
#define foreach_current_index(var_or_cell)
Definition pg_list.h:435
#define linitial(l)
Definition pg_list.h:178
#define lsecond(l)
Definition pg_list.h:183
#define foreach_node(type, var, lst)
Definition pg_list.h:528
static ListCell * list_head(const List *l)
Definition pg_list.h:128
PlaceHolderInfo * find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv)
Definition placeholder.c:85
#define InvalidOid
unsigned int Oid
static int fb(int x)
tree ctl root
Definition radixtree.h:1857
static bool clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, Relids innerrelids)
List * mergeclause_list
Definition pathnodes.h:3607
Relids param_source_rels
Definition pathnodes.h:3611
SemiAntiJoinFactors semifactors
Definition pathnodes.h:3610
SpecialJoinInfo * sjinfo
Definition pathnodes.h:3609
Definition pg_list.h:54
Definition nodes.h:135
List * args
Definition primnodes.h:869
List * baserestrictinfo
Definition pathnodes.h:1142
Relids relids
Definition pathnodes.h:1021
struct PathTarget * reltarget
Definition pathnodes.h:1045
List * lateral_vars
Definition pathnodes.h:1087
uint64 pgs_mask
Definition pathnodes.h:1039
bool consider_parallel
Definition pathnodes.h:1037
Relids top_parent_relids
Definition pathnodes.h:1174
Relids lateral_relids
Definition pathnodes.h:1064
List * cheapest_parameterized_paths
Definition pathnodes.h:1055
List * pathlist
Definition pathnodes.h:1050
RelOptKind reloptkind
Definition pathnodes.h:1015
struct Path * cheapest_startup_path
Definition pathnodes.h:1053
struct Path * cheapest_total_path
Definition pathnodes.h:1054
List * partial_pathlist
Definition pathnodes.h:1052
Expr * clause
Definition pathnodes.h:2901
Relids min_righthand
Definition pathnodes.h:3227
JoinType jointype
Definition pathnodes.h:3230
Relids min_lefthand
Definition pathnodes.h:3226
int varno
Definition primnodes.h:270
Index varlevelsup
Definition primnodes.h:295
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition typcache.c:389
#define TYPECACHE_EQ_OPR
Definition typcache.h:138
#define TYPECACHE_HASH_PROC
Definition typcache.h:142
Relids pull_varnos(PlannerInfo *root, Node *node)
Definition var.c:114
List * pull_vars_of_level(Node *node, int levelsup)
Definition var.c:339