PostgreSQL Source Code  git master
joinpath.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  * Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/joinpath.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "nodes/nodeFuncs.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/optimizer.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/planmain.h"
27 #include "utils/typcache.h"
28 
29 /* Hook for plugins to get control in add_paths_to_joinrel() */
31 
32 /*
33  * Paths parameterized by the parent can be considered to be parameterized by
34  * any of its child.
35  */
36 #define PATH_PARAM_BY_PARENT(path, rel) \
37  ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
38  (rel)->top_parent_relids))
39 #define PATH_PARAM_BY_REL_SELF(path, rel) \
40  ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
41 
42 #define PATH_PARAM_BY_REL(path, rel) \
43  (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
44 
45 static void try_partial_mergejoin_path(PlannerInfo *root,
46  RelOptInfo *joinrel,
47  Path *outer_path,
48  Path *inner_path,
49  List *pathkeys,
50  List *mergeclauses,
51  List *outersortkeys,
52  List *innersortkeys,
53  JoinType jointype,
54  JoinPathExtraData *extra);
55 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
56  RelOptInfo *outerrel, RelOptInfo *innerrel,
57  JoinType jointype, JoinPathExtraData *extra);
58 static inline bool clause_sides_match_join(RestrictInfo *rinfo,
59  RelOptInfo *outerrel,
60  RelOptInfo *innerrel);
61 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62  RelOptInfo *outerrel, RelOptInfo *innerrel,
63  JoinType jointype, JoinPathExtraData *extra);
64 static void consider_parallel_nestloop(PlannerInfo *root,
65  RelOptInfo *joinrel,
66  RelOptInfo *outerrel,
67  RelOptInfo *innerrel,
68  JoinType jointype,
69  JoinPathExtraData *extra);
71  RelOptInfo *joinrel,
72  RelOptInfo *outerrel,
73  RelOptInfo *innerrel,
74  JoinType jointype,
75  JoinPathExtraData *extra,
76  Path *inner_cheapest_total);
77 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78  RelOptInfo *outerrel, RelOptInfo *innerrel,
79  JoinType jointype, JoinPathExtraData *extra);
81  RelOptInfo *joinrel,
82  RelOptInfo *outerrel,
83  RelOptInfo *innerrel,
84  List *restrictlist,
85  JoinType jointype,
86  bool *mergejoin_allowed);
87 static void generate_mergejoin_paths(PlannerInfo *root,
88  RelOptInfo *joinrel,
89  RelOptInfo *innerrel,
90  Path *outerpath,
91  JoinType jointype,
92  JoinPathExtraData *extra,
93  bool useallclauses,
94  Path *inner_cheapest_total,
95  List *merge_pathkeys,
96  bool is_partial);
97 
98 
99 /*
100  * add_paths_to_joinrel
101  * Given a join relation and two component rels from which it can be made,
102  * consider all possible paths that use the two component rels as outer
103  * and inner rel respectively. Add these paths to the join rel's pathlist
104  * if they survive comparison with other paths (and remove any existing
105  * paths that are dominated by these paths).
106  *
107  * Modifies the pathlist field of the joinrel node to contain the best
108  * paths found so far.
109  *
110  * jointype is not necessarily the same as sjinfo->jointype; it might be
111  * "flipped around" if we are considering joining the rels in the opposite
112  * direction from what's indicated in sjinfo.
113  *
114  * Also, this routine and others in this module accept the special JoinTypes
115  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
116  * unique-ify the outer or inner relation and then apply a regular inner
117  * join. These values are not allowed to propagate outside this module,
118  * however. Path cost estimation code may need to recognize that it's
119  * dealing with such a case --- the combination of nominal jointype INNER
120  * with sjinfo->jointype == JOIN_SEMI indicates that.
121  */
122 void
124  RelOptInfo *joinrel,
125  RelOptInfo *outerrel,
126  RelOptInfo *innerrel,
127  JoinType jointype,
128  SpecialJoinInfo *sjinfo,
129  List *restrictlist)
130 {
131  JoinPathExtraData extra;
132  bool mergejoin_allowed = true;
133  ListCell *lc;
134  Relids joinrelids;
135 
136  /*
137  * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
138  * between child relations, even if there is a SpecialJoinInfo node for
139  * the join between the topmost parents. So, while calculating Relids set
140  * representing the restriction, consider relids of topmost parent of
141  * partitions.
142  */
143  if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
144  joinrelids = joinrel->top_parent_relids;
145  else
146  joinrelids = joinrel->relids;
147 
148  extra.restrictlist = restrictlist;
149  extra.mergeclause_list = NIL;
150  extra.sjinfo = sjinfo;
151  extra.param_source_rels = NULL;
152 
153  /*
154  * See if the inner relation is provably unique for this outer rel.
155  *
156  * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
157  * matter since the executor can make the equivalent optimization anyway;
158  * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
159  * must be considering a semijoin whose inner side is not provably unique
160  * (else reduce_unique_semijoins would've simplified it), so there's no
161  * point in calling innerrel_is_unique. However, if the LHS covers all of
162  * the semijoin's min_lefthand, then it's appropriate to set inner_unique
163  * because the path produced by create_unique_path will be unique relative
164  * to the LHS. (If we have an LHS that's only part of the min_lefthand,
165  * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
166  * letting that value escape this module.
167  */
168  switch (jointype)
169  {
170  case JOIN_SEMI:
171  case JOIN_ANTI:
172 
173  /*
174  * XXX it may be worth proving this to allow a Memoize to be
175  * considered for Nested Loop Semi/Anti Joins.
176  */
177  extra.inner_unique = false; /* well, unproven */
178  break;
179  case JOIN_UNIQUE_INNER:
180  extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
181  outerrel->relids);
182  break;
183  case JOIN_UNIQUE_OUTER:
184  extra.inner_unique = innerrel_is_unique(root,
185  joinrel->relids,
186  outerrel->relids,
187  innerrel,
188  JOIN_INNER,
189  restrictlist,
190  false);
191  break;
192  default:
193  extra.inner_unique = innerrel_is_unique(root,
194  joinrel->relids,
195  outerrel->relids,
196  innerrel,
197  jointype,
198  restrictlist,
199  false);
200  break;
201  }
202 
203  /*
204  * Find potential mergejoin clauses. We can skip this if we are not
205  * interested in doing a mergejoin. However, mergejoin may be our only
206  * way of implementing a full outer join, so override enable_mergejoin if
207  * it's a full join.
208  */
209  if (enable_mergejoin || jointype == JOIN_FULL)
211  joinrel,
212  outerrel,
213  innerrel,
214  restrictlist,
215  jointype,
216  &mergejoin_allowed);
217 
218  /*
219  * If it's SEMI, ANTI, or inner_unique join, compute correction factors
220  * for cost estimation. These will be the same for all paths.
221  */
222  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
223  compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
224  jointype, sjinfo, restrictlist,
225  &extra.semifactors);
226 
227  /*
228  * Decide whether it's sensible to generate parameterized paths for this
229  * joinrel, and if so, which relations such paths should require. There
230  * is usually no need to create a parameterized result path unless there
231  * is a join order restriction that prevents joining one of our input rels
232  * directly to the parameter source rel instead of joining to the other
233  * input rel. (But see allow_star_schema_join().) This restriction
234  * reduces the number of parameterized paths we have to deal with at
235  * higher join levels, without compromising the quality of the resulting
236  * plan. We express the restriction as a Relids set that must overlap the
237  * parameterization of any proposed join path.
238  */
239  foreach(lc, root->join_info_list)
240  {
241  SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
242 
243  /*
244  * SJ is relevant to this join if we have some part of its RHS
245  * (possibly not all of it), and haven't yet joined to its LHS. (This
246  * test is pretty simplistic, but should be sufficient considering the
247  * join has already been proven legal.) If the SJ is relevant, it
248  * presents constraints for joining to anything not in its RHS.
249  */
250  if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
251  !bms_overlap(joinrelids, sjinfo2->min_lefthand))
254  sjinfo2->min_righthand));
255 
256  /* full joins constrain both sides symmetrically */
257  if (sjinfo2->jointype == JOIN_FULL &&
258  bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
259  !bms_overlap(joinrelids, sjinfo2->min_righthand))
262  sjinfo2->min_lefthand));
263  }
264 
265  /*
266  * However, when a LATERAL subquery is involved, there will simply not be
267  * any paths for the joinrel that aren't parameterized by whatever the
268  * subquery is parameterized by, unless its parameterization is resolved
269  * within the joinrel. So we might as well allow additional dependencies
270  * on whatever residual lateral dependencies the joinrel will have.
271  */
273  joinrel->lateral_relids);
274 
275  /*
276  * 1. Consider mergejoin paths where both relations must be explicitly
277  * sorted. Skip this if we can't mergejoin.
278  */
279  if (mergejoin_allowed)
280  sort_inner_and_outer(root, joinrel, outerrel, innerrel,
281  jointype, &extra);
282 
283  /*
284  * 2. Consider paths where the outer relation need not be explicitly
285  * sorted. This includes both nestloops and mergejoins where the outer
286  * path is already ordered. Again, skip this if we can't mergejoin.
287  * (That's okay because we know that nestloop can't handle right/full
288  * joins at all, so it wouldn't work in the prohibited cases either.)
289  */
290  if (mergejoin_allowed)
291  match_unsorted_outer(root, joinrel, outerrel, innerrel,
292  jointype, &extra);
293 
294 #ifdef NOT_USED
295 
296  /*
297  * 3. Consider paths where the inner relation need not be explicitly
298  * sorted. This includes mergejoins only (nestloops were already built in
299  * match_unsorted_outer).
300  *
301  * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
302  * significant difference between the inner and outer side of a mergejoin,
303  * so match_unsorted_inner creates no paths that aren't equivalent to
304  * those made by match_unsorted_outer when add_paths_to_joinrel() is
305  * invoked with the two rels given in the other order.
306  */
307  if (mergejoin_allowed)
308  match_unsorted_inner(root, joinrel, outerrel, innerrel,
309  jointype, &extra);
310 #endif
311 
312  /*
313  * 4. Consider paths where both outer and inner relations must be hashed
314  * before being joined. As above, disregard enable_hashjoin for full
315  * joins, because there may be no other alternative.
316  */
317  if (enable_hashjoin || jointype == JOIN_FULL)
318  hash_inner_and_outer(root, joinrel, outerrel, innerrel,
319  jointype, &extra);
320 
321  /*
322  * 5. If inner and outer relations are foreign tables (or joins) belonging
323  * to the same server and assigned to the same user to check access
324  * permissions as, give the FDW a chance to push down joins.
325  */
326  if (joinrel->fdwroutine &&
328  joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
329  outerrel, innerrel,
330  jointype, &extra);
331 
332  /*
333  * 6. Finally, give extensions a chance to manipulate the path list.
334  */
336  set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
337  jointype, &extra);
338 }
339 
340 /*
341  * We override the param_source_rels heuristic to accept nestloop paths in
342  * which the outer rel satisfies some but not all of the inner path's
343  * parameterization. This is necessary to get good plans for star-schema
344  * scenarios, in which a parameterized path for a large table may require
345  * parameters from multiple small tables that will not get joined directly to
346  * each other. We can handle that by stacking nestloops that have the small
347  * tables on the outside; but this breaks the rule the param_source_rels
348  * heuristic is based on, namely that parameters should not be passed down
349  * across joins unless there's a join-order-constraint-based reason to do so.
350  * So we ignore the param_source_rels restriction when this case applies.
351  *
352  * allow_star_schema_join() returns true if the param_source_rels restriction
353  * should be overridden, ie, it's okay to perform this join.
354  */
355 static inline bool
357  Relids outerrelids,
358  Relids inner_paramrels)
359 {
360  /*
361  * It's a star-schema case if the outer rel provides some but not all of
362  * the inner rel's parameterization.
363  */
364  return (bms_overlap(inner_paramrels, outerrelids) &&
365  bms_nonempty_difference(inner_paramrels, outerrelids));
366 }
367 
368 /*
369  * paraminfo_get_equal_hashops
370  * Determine if param_info and innerrel's lateral_vars can be hashed.
371  * Returns true the hashing is possible, otherwise return false.
372  *
373  * Additionally we also collect the outer exprs and the hash operators for
374  * each parameter to innerrel. These set in 'param_exprs' and 'operators'
375  * when we return true.
376  */
377 static bool
379  RelOptInfo *outerrel, RelOptInfo *innerrel,
380  List **param_exprs, List **operators)
381 
382 {
383  ListCell *lc;
384 
385  *param_exprs = NIL;
386  *operators = NIL;
387 
388  if (param_info != NULL)
389  {
390  List *clauses = param_info->ppi_clauses;
391 
392  foreach(lc, clauses)
393  {
394  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
395  OpExpr *opexpr;
396  Node *expr;
397 
398  /* can't use a memoize node without a valid hash equals operator */
399  if (!OidIsValid(rinfo->hasheqoperator) ||
400  !clause_sides_match_join(rinfo, outerrel, innerrel))
401  {
402  list_free(*operators);
403  list_free(*param_exprs);
404  return false;
405  }
406 
407  /*
408  * We already checked that this is an OpExpr with 2 args when
409  * setting hasheqoperator.
410  */
411  opexpr = (OpExpr *) rinfo->clause;
412  if (rinfo->outer_is_left)
413  expr = (Node *) linitial(opexpr->args);
414  else
415  expr = (Node *) lsecond(opexpr->args);
416 
417  *operators = lappend_oid(*operators, rinfo->hasheqoperator);
418  *param_exprs = lappend(*param_exprs, expr);
419  }
420  }
421 
422  /* Now add any lateral vars to the cache key too */
423  foreach(lc, innerrel->lateral_vars)
424  {
425  Node *expr = (Node *) lfirst(lc);
426  TypeCacheEntry *typentry;
427 
428  /* Reject if there are any volatile functions */
429  if (contain_volatile_functions(expr))
430  {
431  list_free(*operators);
432  list_free(*param_exprs);
433  return false;
434  }
435 
436  typentry = lookup_type_cache(exprType(expr),
438 
439  /* can't use a memoize node without a valid hash equals operator */
440  if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
441  {
442  list_free(*operators);
443  list_free(*param_exprs);
444  return false;
445  }
446 
447  *operators = lappend_oid(*operators, typentry->eq_opr);
448  *param_exprs = lappend(*param_exprs, expr);
449  }
450 
451  /* We're okay to use memoize */
452  return true;
453 }
454 
455 /*
456  * get_memoize_path
457  * If possible, make and return a Memoize path atop of 'inner_path'.
458  * Otherwise return NULL.
459  */
460 static Path *
462  RelOptInfo *outerrel, Path *inner_path,
463  Path *outer_path, JoinType jointype,
464  JoinPathExtraData *extra)
465 {
466  List *param_exprs;
467  List *hash_operators;
468  ListCell *lc;
469 
470  /* Obviously not if it's disabled */
471  if (!enable_memoize)
472  return NULL;
473 
474  /*
475  * We can safely not bother with all this unless we expect to perform more
476  * than one inner scan. The first scan is always going to be a cache
477  * miss. This would likely fail later anyway based on costs, so this is
478  * really just to save some wasted effort.
479  */
480  if (outer_path->parent->rows < 2)
481  return NULL;
482 
483  /*
484  * We can only have a memoize node when there's some kind of cache key,
485  * either parameterized path clauses or lateral Vars. No cache key sounds
486  * more like something a Materialize node might be more useful for.
487  */
488  if ((inner_path->param_info == NULL ||
489  inner_path->param_info->ppi_clauses == NIL) &&
490  innerrel->lateral_vars == NIL)
491  return NULL;
492 
493  /*
494  * Currently we don't do this for SEMI and ANTI joins unless they're
495  * marked as inner_unique. This is because nested loop SEMI/ANTI joins
496  * don't scan the inner node to completion, which will mean memoize cannot
497  * mark the cache entry as complete.
498  *
499  * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
500  * = true. Should we? See add_paths_to_joinrel()
501  */
502  if (!extra->inner_unique && (jointype == JOIN_SEMI ||
503  jointype == JOIN_ANTI))
504  return NULL;
505 
506  /*
507  * Memoize normally marks cache entries as complete when it runs out of
508  * tuples to read from its subplan. However, with unique joins, Nested
509  * Loop will skip to the next outer tuple after finding the first matching
510  * inner tuple. This means that we may not read the inner side of the
511  * join to completion which leaves no opportunity to mark the cache entry
512  * as complete. To work around that, when the join is unique we
513  * automatically mark cache entries as complete after fetching the first
514  * tuple. This works when the entire join condition is parameterized.
515  * Otherwise, when the parameterization is only a subset of the join
516  * condition, we can't be sure which part of it causes the join to be
517  * unique. This means there are no guarantees that only 1 tuple will be
518  * read. We cannot mark the cache entry as complete after reading the
519  * first tuple without that guarantee. This means the scope of Memoize
520  * node's usefulness is limited to only outer rows that have no join
521  * partner as this is the only case where Nested Loop would exhaust the
522  * inner scan of a unique join. Since the scope is limited to that, we
523  * just don't bother making a memoize path in this case.
524  *
525  * Lateral vars needn't be considered here as they're not considered when
526  * determining if the join is unique.
527  *
528  * XXX this could be enabled if the remaining join quals were made part of
529  * the inner scan's filter instead of the join filter. Maybe it's worth
530  * considering doing that?
531  */
532  if (extra->inner_unique &&
533  (inner_path->param_info == NULL ||
534  list_length(inner_path->param_info->ppi_clauses) <
535  list_length(extra->restrictlist)))
536  return NULL;
537 
538  /*
539  * We can't use a memoize node if there are volatile functions in the
540  * inner rel's target list or restrict list. A cache hit could reduce the
541  * number of calls to these functions.
542  */
543  if (contain_volatile_functions((Node *) innerrel->reltarget))
544  return NULL;
545 
546  foreach(lc, innerrel->baserestrictinfo)
547  {
548  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
549 
550  if (contain_volatile_functions((Node *) rinfo))
551  return NULL;
552  }
553 
554  /* Check if we have hash ops for each parameter to the path */
556  inner_path->param_info,
557  outerrel,
558  innerrel,
559  &param_exprs,
560  &hash_operators))
561  {
562  return (Path *) create_memoize_path(root,
563  innerrel,
564  inner_path,
565  param_exprs,
566  hash_operators,
567  extra->inner_unique,
568  outer_path->parent->rows);
569  }
570 
571  return NULL;
572 }
573 
574 /*
575  * try_nestloop_path
576  * Consider a nestloop join path; if it appears useful, push it into
577  * the joinrel's pathlist via add_path().
578  */
579 static void
581  RelOptInfo *joinrel,
582  Path *outer_path,
583  Path *inner_path,
584  List *pathkeys,
585  JoinType jointype,
586  JoinPathExtraData *extra)
587 {
588  Relids required_outer;
589  JoinCostWorkspace workspace;
590  RelOptInfo *innerrel = inner_path->parent;
591  RelOptInfo *outerrel = outer_path->parent;
592  Relids innerrelids;
593  Relids outerrelids;
594  Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
595  Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
596 
597  /*
598  * Paths are parameterized by top-level parents, so run parameterization
599  * tests on the parent relids.
600  */
601  if (innerrel->top_parent_relids)
602  innerrelids = innerrel->top_parent_relids;
603  else
604  innerrelids = innerrel->relids;
605 
606  if (outerrel->top_parent_relids)
607  outerrelids = outerrel->top_parent_relids;
608  else
609  outerrelids = outerrel->relids;
610 
611  /*
612  * Check to see if proposed path is still parameterized, and reject if the
613  * parameterization wouldn't be sensible --- unless allow_star_schema_join
614  * says to allow it anyway. Also, we must reject if have_dangerous_phv
615  * doesn't like the look of it, which could only happen if the nestloop is
616  * still parameterized.
617  */
618  required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
619  innerrelids, inner_paramrels);
620  if (required_outer &&
621  ((!bms_overlap(required_outer, extra->param_source_rels) &&
622  !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
623  have_dangerous_phv(root, outerrelids, inner_paramrels)))
624  {
625  /* Waste no memory when we reject a path here */
626  bms_free(required_outer);
627  return;
628  }
629 
630  /*
631  * Do a precheck to quickly eliminate obviously-inferior paths. We
632  * calculate a cheap lower bound on the path's cost and then use
633  * add_path_precheck() to see if the path is clearly going to be dominated
634  * by some existing path for the joinrel. If not, do the full pushup with
635  * creating a fully valid path structure and submitting it to add_path().
636  * The latter two steps are expensive enough to make this two-phase
637  * methodology worthwhile.
638  */
639  initial_cost_nestloop(root, &workspace, jointype,
640  outer_path, inner_path, extra);
641 
642  if (add_path_precheck(joinrel,
643  workspace.startup_cost, workspace.total_cost,
644  pathkeys, required_outer))
645  {
646  /*
647  * If the inner path is parameterized, it is parameterized by the
648  * topmost parent of the outer rel, not the outer rel itself. Fix
649  * that.
650  */
651  if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
652  {
653  inner_path = reparameterize_path_by_child(root, inner_path,
654  outer_path->parent);
655 
656  /*
657  * If we could not translate the path, we can't create nest loop
658  * path.
659  */
660  if (!inner_path)
661  {
662  bms_free(required_outer);
663  return;
664  }
665  }
666 
667  add_path(joinrel, (Path *)
669  joinrel,
670  jointype,
671  &workspace,
672  extra,
673  outer_path,
674  inner_path,
675  extra->restrictlist,
676  pathkeys,
677  required_outer));
678  }
679  else
680  {
681  /* Waste no memory when we reject a path here */
682  bms_free(required_outer);
683  }
684 }
685 
686 /*
687  * try_partial_nestloop_path
688  * Consider a partial nestloop join path; if it appears useful, push it into
689  * the joinrel's partial_pathlist via add_partial_path().
690  */
691 static void
693  RelOptInfo *joinrel,
694  Path *outer_path,
695  Path *inner_path,
696  List *pathkeys,
697  JoinType jointype,
698  JoinPathExtraData *extra)
699 {
700  JoinCostWorkspace workspace;
701 
702  /*
703  * If the inner path is parameterized, the parameterization must be fully
704  * satisfied by the proposed outer path. Parameterized partial paths are
705  * not supported. The caller should already have verified that no lateral
706  * rels are required here.
707  */
709  if (inner_path->param_info != NULL)
710  {
711  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
712  RelOptInfo *outerrel = outer_path->parent;
713  Relids outerrelids;
714 
715  /*
716  * The inner and outer paths are parameterized, if at all, by the top
717  * level parents, not the child relations, so we must use those relids
718  * for our parameterization tests.
719  */
720  if (outerrel->top_parent_relids)
721  outerrelids = outerrel->top_parent_relids;
722  else
723  outerrelids = outerrel->relids;
724 
725  if (!bms_is_subset(inner_paramrels, outerrelids))
726  return;
727  }
728 
729  /*
730  * Before creating a path, get a quick lower bound on what it is likely to
731  * cost. Bail out right away if it looks terrible.
732  */
733  initial_cost_nestloop(root, &workspace, jointype,
734  outer_path, inner_path, extra);
735  if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
736  return;
737 
738  /*
739  * If the inner path is parameterized, it is parameterized by the topmost
740  * parent of the outer rel, not the outer rel itself. Fix that.
741  */
742  if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
743  {
744  inner_path = reparameterize_path_by_child(root, inner_path,
745  outer_path->parent);
746 
747  /*
748  * If we could not translate the path, we can't create nest loop path.
749  */
750  if (!inner_path)
751  return;
752  }
753 
754  /* Might be good enough to be worth trying, so let's try it. */
755  add_partial_path(joinrel, (Path *)
757  joinrel,
758  jointype,
759  &workspace,
760  extra,
761  outer_path,
762  inner_path,
763  extra->restrictlist,
764  pathkeys,
765  NULL));
766 }
767 
768 /*
769  * try_mergejoin_path
770  * Consider a merge join path; if it appears useful, push it into
771  * the joinrel's pathlist via add_path().
772  */
773 static void
775  RelOptInfo *joinrel,
776  Path *outer_path,
777  Path *inner_path,
778  List *pathkeys,
779  List *mergeclauses,
780  List *outersortkeys,
781  List *innersortkeys,
782  JoinType jointype,
783  JoinPathExtraData *extra,
784  bool is_partial)
785 {
786  Relids required_outer;
787  JoinCostWorkspace workspace;
788 
789  if (is_partial)
790  {
792  joinrel,
793  outer_path,
794  inner_path,
795  pathkeys,
796  mergeclauses,
797  outersortkeys,
798  innersortkeys,
799  jointype,
800  extra);
801  return;
802  }
803 
804  /*
805  * Check to see if proposed path is still parameterized, and reject if the
806  * parameterization wouldn't be sensible.
807  */
808  required_outer = calc_non_nestloop_required_outer(outer_path,
809  inner_path);
810  if (required_outer &&
811  !bms_overlap(required_outer, extra->param_source_rels))
812  {
813  /* Waste no memory when we reject a path here */
814  bms_free(required_outer);
815  return;
816  }
817 
818  /*
819  * If the given paths are already well enough ordered, we can skip doing
820  * an explicit sort.
821  */
822  if (outersortkeys &&
823  pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
824  outersortkeys = NIL;
825  if (innersortkeys &&
826  pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
827  innersortkeys = NIL;
828 
829  /*
830  * See comments in try_nestloop_path().
831  */
832  initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
833  outer_path, inner_path,
834  outersortkeys, innersortkeys,
835  extra);
836 
837  if (add_path_precheck(joinrel,
838  workspace.startup_cost, workspace.total_cost,
839  pathkeys, required_outer))
840  {
841  add_path(joinrel, (Path *)
843  joinrel,
844  jointype,
845  &workspace,
846  extra,
847  outer_path,
848  inner_path,
849  extra->restrictlist,
850  pathkeys,
851  required_outer,
852  mergeclauses,
853  outersortkeys,
854  innersortkeys));
855  }
856  else
857  {
858  /* Waste no memory when we reject a path here */
859  bms_free(required_outer);
860  }
861 }
862 
863 /*
864  * try_partial_mergejoin_path
865  * Consider a partial merge join path; if it appears useful, push it into
866  * the joinrel's pathlist via add_partial_path().
867  */
868 static void
870  RelOptInfo *joinrel,
871  Path *outer_path,
872  Path *inner_path,
873  List *pathkeys,
874  List *mergeclauses,
875  List *outersortkeys,
876  List *innersortkeys,
877  JoinType jointype,
878  JoinPathExtraData *extra)
879 {
880  JoinCostWorkspace workspace;
881 
882  /*
883  * See comments in try_partial_hashjoin_path().
884  */
886  if (inner_path->param_info != NULL)
887  {
888  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
889 
890  if (!bms_is_empty(inner_paramrels))
891  return;
892  }
893 
894  /*
895  * If the given paths are already well enough ordered, we can skip doing
896  * an explicit sort.
897  */
898  if (outersortkeys &&
899  pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
900  outersortkeys = NIL;
901  if (innersortkeys &&
902  pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
903  innersortkeys = NIL;
904 
905  /*
906  * See comments in try_partial_nestloop_path().
907  */
908  initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
909  outer_path, inner_path,
910  outersortkeys, innersortkeys,
911  extra);
912 
913  if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
914  return;
915 
916  /* Might be good enough to be worth trying, so let's try it. */
917  add_partial_path(joinrel, (Path *)
919  joinrel,
920  jointype,
921  &workspace,
922  extra,
923  outer_path,
924  inner_path,
925  extra->restrictlist,
926  pathkeys,
927  NULL,
928  mergeclauses,
929  outersortkeys,
930  innersortkeys));
931 }
932 
933 /*
934  * try_hashjoin_path
935  * Consider a hash join path; if it appears useful, push it into
936  * the joinrel's pathlist via add_path().
937  */
938 static void
940  RelOptInfo *joinrel,
941  Path *outer_path,
942  Path *inner_path,
943  List *hashclauses,
944  JoinType jointype,
945  JoinPathExtraData *extra)
946 {
947  Relids required_outer;
948  JoinCostWorkspace workspace;
949 
950  /*
951  * Check to see if proposed path is still parameterized, and reject if the
952  * parameterization wouldn't be sensible.
953  */
954  required_outer = calc_non_nestloop_required_outer(outer_path,
955  inner_path);
956  if (required_outer &&
957  !bms_overlap(required_outer, extra->param_source_rels))
958  {
959  /* Waste no memory when we reject a path here */
960  bms_free(required_outer);
961  return;
962  }
963 
964  /*
965  * See comments in try_nestloop_path(). Also note that hashjoin paths
966  * never have any output pathkeys, per comments in create_hashjoin_path.
967  */
968  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
969  outer_path, inner_path, extra, false);
970 
971  if (add_path_precheck(joinrel,
972  workspace.startup_cost, workspace.total_cost,
973  NIL, required_outer))
974  {
975  add_path(joinrel, (Path *)
977  joinrel,
978  jointype,
979  &workspace,
980  extra,
981  outer_path,
982  inner_path,
983  false, /* parallel_hash */
984  extra->restrictlist,
985  required_outer,
986  hashclauses));
987  }
988  else
989  {
990  /* Waste no memory when we reject a path here */
991  bms_free(required_outer);
992  }
993 }
994 
995 /*
996  * try_partial_hashjoin_path
997  * Consider a partial hashjoin join path; if it appears useful, push it into
998  * the joinrel's partial_pathlist via add_partial_path().
999  * The outer side is partial. If parallel_hash is true, then the inner path
1000  * must be partial and will be run in parallel to create one or more shared
1001  * hash tables; otherwise the inner path must be complete and a copy of it
1002  * is run in every process to create separate identical private hash tables.
1003  */
1004 static void
1006  RelOptInfo *joinrel,
1007  Path *outer_path,
1008  Path *inner_path,
1009  List *hashclauses,
1010  JoinType jointype,
1011  JoinPathExtraData *extra,
1012  bool parallel_hash)
1013 {
1014  JoinCostWorkspace workspace;
1015 
1016  /*
1017  * If the inner path is parameterized, the parameterization must be fully
1018  * satisfied by the proposed outer path. Parameterized partial paths are
1019  * not supported. The caller should already have verified that no lateral
1020  * rels are required here.
1021  */
1022  Assert(bms_is_empty(joinrel->lateral_relids));
1023  if (inner_path->param_info != NULL)
1024  {
1025  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1026 
1027  if (!bms_is_empty(inner_paramrels))
1028  return;
1029  }
1030 
1031  /*
1032  * Before creating a path, get a quick lower bound on what it is likely to
1033  * cost. Bail out right away if it looks terrible.
1034  */
1035  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1036  outer_path, inner_path, extra, parallel_hash);
1037  if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
1038  return;
1039 
1040  /* Might be good enough to be worth trying, so let's try it. */
1041  add_partial_path(joinrel, (Path *)
1042  create_hashjoin_path(root,
1043  joinrel,
1044  jointype,
1045  &workspace,
1046  extra,
1047  outer_path,
1048  inner_path,
1049  parallel_hash,
1050  extra->restrictlist,
1051  NULL,
1052  hashclauses));
1053 }
1054 
1055 /*
1056  * clause_sides_match_join
1057  * Determine whether a join clause is of the right form to use in this join.
1058  *
1059  * We already know that the clause is a binary opclause referencing only the
1060  * rels in the current join. The point here is to check whether it has the
1061  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1062  * rather than mixing outer and inner vars on either side. If it matches,
1063  * we set the transient flag outer_is_left to identify which side is which.
1064  */
1065 static inline bool
1067  RelOptInfo *innerrel)
1068 {
1069  if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
1070  bms_is_subset(rinfo->right_relids, innerrel->relids))
1071  {
1072  /* lefthand side is outer */
1073  rinfo->outer_is_left = true;
1074  return true;
1075  }
1076  else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1077  bms_is_subset(rinfo->right_relids, outerrel->relids))
1078  {
1079  /* righthand side is outer */
1080  rinfo->outer_is_left = false;
1081  return true;
1082  }
1083  return false; /* no good for these input relations */
1084 }
1085 
1086 /*
1087  * sort_inner_and_outer
1088  * Create mergejoin join paths by explicitly sorting both the outer and
1089  * inner join relations on each available merge ordering.
1090  *
1091  * 'joinrel' is the join relation
1092  * 'outerrel' is the outer join relation
1093  * 'innerrel' is the inner join relation
1094  * 'jointype' is the type of join to do
1095  * 'extra' contains additional input values
1096  */
1097 static void
1099  RelOptInfo *joinrel,
1100  RelOptInfo *outerrel,
1101  RelOptInfo *innerrel,
1102  JoinType jointype,
1103  JoinPathExtraData *extra)
1104 {
1105  JoinType save_jointype = jointype;
1106  Path *outer_path;
1107  Path *inner_path;
1108  Path *cheapest_partial_outer = NULL;
1109  Path *cheapest_safe_inner = NULL;
1110  List *all_pathkeys;
1111  ListCell *l;
1112 
1113  /*
1114  * We only consider the cheapest-total-cost input paths, since we are
1115  * assuming here that a sort is required. We will consider
1116  * cheapest-startup-cost input paths later, and only if they don't need a
1117  * sort.
1118  *
1119  * This function intentionally does not consider parameterized input
1120  * paths, except when the cheapest-total is parameterized. If we did so,
1121  * we'd have a combinatorial explosion of mergejoin paths of dubious
1122  * value. This interacts with decisions elsewhere that also discriminate
1123  * against mergejoins with parameterized inputs; see comments in
1124  * src/backend/optimizer/README.
1125  */
1126  outer_path = outerrel->cheapest_total_path;
1127  inner_path = innerrel->cheapest_total_path;
1128 
1129  /*
1130  * If either cheapest-total path is parameterized by the other rel, we
1131  * can't use a mergejoin. (There's no use looking for alternative input
1132  * paths, since these should already be the least-parameterized available
1133  * paths.)
1134  */
1135  if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1136  PATH_PARAM_BY_REL(inner_path, outerrel))
1137  return;
1138 
1139  /*
1140  * If unique-ification is requested, do it and then handle as a plain
1141  * inner join.
1142  */
1143  if (jointype == JOIN_UNIQUE_OUTER)
1144  {
1145  outer_path = (Path *) create_unique_path(root, outerrel,
1146  outer_path, extra->sjinfo);
1147  Assert(outer_path);
1148  jointype = JOIN_INNER;
1149  }
1150  else if (jointype == JOIN_UNIQUE_INNER)
1151  {
1152  inner_path = (Path *) create_unique_path(root, innerrel,
1153  inner_path, extra->sjinfo);
1154  Assert(inner_path);
1155  jointype = JOIN_INNER;
1156  }
1157 
1158  /*
1159  * If the joinrel is parallel-safe, we may be able to consider a partial
1160  * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1161  * outer path will be partial, and therefore we won't be able to properly
1162  * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
1163  * JOIN_RIGHT, because they can produce false null extended rows. Also,
1164  * the resulting path must not be parameterized.
1165  */
1166  if (joinrel->consider_parallel &&
1167  save_jointype != JOIN_UNIQUE_OUTER &&
1168  save_jointype != JOIN_FULL &&
1169  save_jointype != JOIN_RIGHT &&
1170  outerrel->partial_pathlist != NIL &&
1171  bms_is_empty(joinrel->lateral_relids))
1172  {
1173  cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1174 
1175  if (inner_path->parallel_safe)
1176  cheapest_safe_inner = inner_path;
1177  else if (save_jointype != JOIN_UNIQUE_INNER)
1178  cheapest_safe_inner =
1180  }
1181 
1182  /*
1183  * Each possible ordering of the available mergejoin clauses will generate
1184  * a differently-sorted result path at essentially the same cost. We have
1185  * no basis for choosing one over another at this level of joining, but
1186  * some sort orders may be more useful than others for higher-level
1187  * mergejoins, so it's worth considering multiple orderings.
1188  *
1189  * Actually, it's not quite true that every mergeclause ordering will
1190  * generate a different path order, because some of the clauses may be
1191  * partially redundant (refer to the same EquivalenceClasses). Therefore,
1192  * what we do is convert the mergeclause list to a list of canonical
1193  * pathkeys, and then consider different orderings of the pathkeys.
1194  *
1195  * Generating a path for *every* permutation of the pathkeys doesn't seem
1196  * like a winning strategy; the cost in planning time is too high. For
1197  * now, we generate one path for each pathkey, listing that pathkey first
1198  * and the rest in random order. This should allow at least a one-clause
1199  * mergejoin without re-sorting against any other possible mergejoin
1200  * partner path. But if we've not guessed the right ordering of secondary
1201  * keys, we may end up evaluating clauses as qpquals when they could have
1202  * been done as mergeclauses. (In practice, it's rare that there's more
1203  * than two or three mergeclauses, so expending a huge amount of thought
1204  * on that is probably not worth it.)
1205  *
1206  * The pathkey order returned by select_outer_pathkeys_for_merge() has
1207  * some heuristics behind it (see that function), so be sure to try it
1208  * exactly as-is as well as making variants.
1209  */
1210  all_pathkeys = select_outer_pathkeys_for_merge(root,
1211  extra->mergeclause_list,
1212  joinrel);
1213 
1214  foreach(l, all_pathkeys)
1215  {
1216  List *front_pathkey = (List *) lfirst(l);
1217  List *cur_mergeclauses;
1218  List *outerkeys;
1219  List *innerkeys;
1220  List *merge_pathkeys;
1221 
1222  /* Make a pathkey list with this guy first */
1223  if (l != list_head(all_pathkeys))
1224  outerkeys = lcons(front_pathkey,
1225  list_delete_nth_cell(list_copy(all_pathkeys),
1226  foreach_current_index(l)));
1227  else
1228  outerkeys = all_pathkeys; /* no work at first one... */
1229 
1230  /* Sort the mergeclauses into the corresponding ordering */
1231  cur_mergeclauses =
1233  outerkeys,
1234  extra->mergeclause_list);
1235 
1236  /* Should have used them all... */
1237  Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1238 
1239  /* Build sort pathkeys for the inner side */
1240  innerkeys = make_inner_pathkeys_for_merge(root,
1241  cur_mergeclauses,
1242  outerkeys);
1243 
1244  /* Build pathkeys representing output sort order */
1245  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1246  outerkeys);
1247 
1248  /*
1249  * And now we can make the path.
1250  *
1251  * Note: it's possible that the cheapest paths will already be sorted
1252  * properly. try_mergejoin_path will detect that case and suppress an
1253  * explicit sort step, so we needn't do so here.
1254  */
1255  try_mergejoin_path(root,
1256  joinrel,
1257  outer_path,
1258  inner_path,
1259  merge_pathkeys,
1260  cur_mergeclauses,
1261  outerkeys,
1262  innerkeys,
1263  jointype,
1264  extra,
1265  false);
1266 
1267  /*
1268  * If we have partial outer and parallel safe inner path then try
1269  * partial mergejoin path.
1270  */
1271  if (cheapest_partial_outer && cheapest_safe_inner)
1273  joinrel,
1274  cheapest_partial_outer,
1275  cheapest_safe_inner,
1276  merge_pathkeys,
1277  cur_mergeclauses,
1278  outerkeys,
1279  innerkeys,
1280  jointype,
1281  extra);
1282  }
1283 }
1284 
1285 /*
1286  * generate_mergejoin_paths
1287  * Creates possible mergejoin paths for input outerpath.
1288  *
1289  * We generate mergejoins if mergejoin clauses are available. We have
1290  * two ways to generate the inner path for a mergejoin: sort the cheapest
1291  * inner path, or use an inner path that is already suitably ordered for the
1292  * merge. If we have several mergeclauses, it could be that there is no inner
1293  * path (or only a very expensive one) for the full list of mergeclauses, but
1294  * better paths exist if we truncate the mergeclause list (thereby discarding
1295  * some sort key requirements). So, we consider truncations of the
1296  * mergeclause list as well as the full list. (Ideally we'd consider all
1297  * subsets of the mergeclause list, but that seems way too expensive.)
1298  */
1299 static void
1301  RelOptInfo *joinrel,
1302  RelOptInfo *innerrel,
1303  Path *outerpath,
1304  JoinType jointype,
1305  JoinPathExtraData *extra,
1306  bool useallclauses,
1307  Path *inner_cheapest_total,
1308  List *merge_pathkeys,
1309  bool is_partial)
1310 {
1311  List *mergeclauses;
1312  List *innersortkeys;
1313  List *trialsortkeys;
1314  Path *cheapest_startup_inner;
1315  Path *cheapest_total_inner;
1316  JoinType save_jointype = jointype;
1317  int num_sortkeys;
1318  int sortkeycnt;
1319 
1320  if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1321  jointype = JOIN_INNER;
1322 
1323  /* Look for useful mergeclauses (if any) */
1324  mergeclauses =
1326  outerpath->pathkeys,
1327  extra->mergeclause_list);
1328 
1329  /*
1330  * Done with this outer path if no chance for a mergejoin.
1331  *
1332  * Special corner case: for "x FULL JOIN y ON true", there will be no join
1333  * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1334  * but since mergejoin is our only join type that supports FULL JOIN
1335  * without any join clauses, it's necessary to generate a clauseless
1336  * mergejoin path instead.
1337  */
1338  if (mergeclauses == NIL)
1339  {
1340  if (jointype == JOIN_FULL)
1341  /* okay to try for mergejoin */ ;
1342  else
1343  return;
1344  }
1345  if (useallclauses &&
1346  list_length(mergeclauses) != list_length(extra->mergeclause_list))
1347  return;
1348 
1349  /* Compute the required ordering of the inner path */
1350  innersortkeys = make_inner_pathkeys_for_merge(root,
1351  mergeclauses,
1352  outerpath->pathkeys);
1353 
1354  /*
1355  * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1356  * a sort will be needed, only cheapest total cost matters. (But
1357  * try_mergejoin_path will do the right thing if inner_cheapest_total is
1358  * already correctly sorted.)
1359  */
1360  try_mergejoin_path(root,
1361  joinrel,
1362  outerpath,
1363  inner_cheapest_total,
1364  merge_pathkeys,
1365  mergeclauses,
1366  NIL,
1367  innersortkeys,
1368  jointype,
1369  extra,
1370  is_partial);
1371 
1372  /* Can't do anything else if inner path needs to be unique'd */
1373  if (save_jointype == JOIN_UNIQUE_INNER)
1374  return;
1375 
1376  /*
1377  * Look for presorted inner paths that satisfy the innersortkey list ---
1378  * or any truncation thereof, if we are allowed to build a mergejoin using
1379  * a subset of the merge clauses. Here, we consider both cheap startup
1380  * cost and cheap total cost.
1381  *
1382  * Currently we do not consider parameterized inner paths here. This
1383  * interacts with decisions elsewhere that also discriminate against
1384  * mergejoins with parameterized inputs; see comments in
1385  * src/backend/optimizer/README.
1386  *
1387  * As we shorten the sortkey list, we should consider only paths that are
1388  * strictly cheaper than (in particular, not the same as) any path found
1389  * in an earlier iteration. Otherwise we'd be intentionally using fewer
1390  * merge keys than a given path allows (treating the rest as plain
1391  * joinquals), which is unlikely to be a good idea. Also, eliminating
1392  * paths here on the basis of compare_path_costs is a lot cheaper than
1393  * building the mergejoin path only to throw it away.
1394  *
1395  * If inner_cheapest_total is well enough sorted to have not required a
1396  * sort in the path made above, we shouldn't make a duplicate path with
1397  * it, either. We handle that case with the same logic that handles the
1398  * previous consideration, by initializing the variables that track
1399  * cheapest-so-far properly. Note that we do NOT reject
1400  * inner_cheapest_total if we find it matches some shorter set of
1401  * pathkeys. That case corresponds to using fewer mergekeys to avoid
1402  * sorting inner_cheapest_total, whereas we did sort it above, so the
1403  * plans being considered are different.
1404  */
1405  if (pathkeys_contained_in(innersortkeys,
1406  inner_cheapest_total->pathkeys))
1407  {
1408  /* inner_cheapest_total didn't require a sort */
1409  cheapest_startup_inner = inner_cheapest_total;
1410  cheapest_total_inner = inner_cheapest_total;
1411  }
1412  else
1413  {
1414  /* it did require a sort, at least for the full set of keys */
1415  cheapest_startup_inner = NULL;
1416  cheapest_total_inner = NULL;
1417  }
1418  num_sortkeys = list_length(innersortkeys);
1419  if (num_sortkeys > 1 && !useallclauses)
1420  trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1421  else
1422  trialsortkeys = innersortkeys; /* won't really truncate */
1423 
1424  for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1425  {
1426  Path *innerpath;
1427  List *newclauses = NIL;
1428 
1429  /*
1430  * Look for an inner path ordered well enough for the first
1431  * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1432  * destructively, which is why we made a copy...
1433  */
1434  trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1435  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1436  trialsortkeys,
1437  NULL,
1438  TOTAL_COST,
1439  is_partial);
1440  if (innerpath != NULL &&
1441  (cheapest_total_inner == NULL ||
1442  compare_path_costs(innerpath, cheapest_total_inner,
1443  TOTAL_COST) < 0))
1444  {
1445  /* Found a cheap (or even-cheaper) sorted path */
1446  /* Select the right mergeclauses, if we didn't already */
1447  if (sortkeycnt < num_sortkeys)
1448  {
1449  newclauses =
1451  mergeclauses,
1452  trialsortkeys);
1453  Assert(newclauses != NIL);
1454  }
1455  else
1456  newclauses = mergeclauses;
1457  try_mergejoin_path(root,
1458  joinrel,
1459  outerpath,
1460  innerpath,
1461  merge_pathkeys,
1462  newclauses,
1463  NIL,
1464  NIL,
1465  jointype,
1466  extra,
1467  is_partial);
1468  cheapest_total_inner = innerpath;
1469  }
1470  /* Same on the basis of cheapest startup cost ... */
1471  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1472  trialsortkeys,
1473  NULL,
1474  STARTUP_COST,
1475  is_partial);
1476  if (innerpath != NULL &&
1477  (cheapest_startup_inner == NULL ||
1478  compare_path_costs(innerpath, cheapest_startup_inner,
1479  STARTUP_COST) < 0))
1480  {
1481  /* Found a cheap (or even-cheaper) sorted path */
1482  if (innerpath != cheapest_total_inner)
1483  {
1484  /*
1485  * Avoid rebuilding clause list if we already made one; saves
1486  * memory in big join trees...
1487  */
1488  if (newclauses == NIL)
1489  {
1490  if (sortkeycnt < num_sortkeys)
1491  {
1492  newclauses =
1494  mergeclauses,
1495  trialsortkeys);
1496  Assert(newclauses != NIL);
1497  }
1498  else
1499  newclauses = mergeclauses;
1500  }
1501  try_mergejoin_path(root,
1502  joinrel,
1503  outerpath,
1504  innerpath,
1505  merge_pathkeys,
1506  newclauses,
1507  NIL,
1508  NIL,
1509  jointype,
1510  extra,
1511  is_partial);
1512  }
1513  cheapest_startup_inner = innerpath;
1514  }
1515 
1516  /*
1517  * Don't consider truncated sortkeys if we need all clauses.
1518  */
1519  if (useallclauses)
1520  break;
1521  }
1522 }
1523 
1524 /*
1525  * match_unsorted_outer
1526  * Creates possible join paths for processing a single join relation
1527  * 'joinrel' by employing either iterative substitution or
1528  * mergejoining on each of its possible outer paths (considering
1529  * only outer paths that are already ordered well enough for merging).
1530  *
1531  * We always generate a nestloop path for each available outer path.
1532  * In fact we may generate as many as five: one on the cheapest-total-cost
1533  * inner path, one on the same with materialization, one on the
1534  * cheapest-startup-cost inner path (if different), one on the
1535  * cheapest-total inner-indexscan path (if any), and one on the
1536  * cheapest-startup inner-indexscan path (if different).
1537  *
1538  * We also consider mergejoins if mergejoin clauses are available. See
1539  * detailed comments in generate_mergejoin_paths.
1540  *
1541  * 'joinrel' is the join relation
1542  * 'outerrel' is the outer join relation
1543  * 'innerrel' is the inner join relation
1544  * 'jointype' is the type of join to do
1545  * 'extra' contains additional input values
1546  */
1547 static void
1549  RelOptInfo *joinrel,
1550  RelOptInfo *outerrel,
1551  RelOptInfo *innerrel,
1552  JoinType jointype,
1553  JoinPathExtraData *extra)
1554 {
1555  JoinType save_jointype = jointype;
1556  bool nestjoinOK;
1557  bool useallclauses;
1558  Path *inner_cheapest_total = innerrel->cheapest_total_path;
1559  Path *matpath = NULL;
1560  ListCell *lc1;
1561 
1562  /*
1563  * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1564  * are doing a right or full mergejoin, we must use *all* the mergeclauses
1565  * as join clauses, else we will not have a valid plan. (Although these
1566  * two flags are currently inverses, keep them separate for clarity and
1567  * possible future changes.)
1568  */
1569  switch (jointype)
1570  {
1571  case JOIN_INNER:
1572  case JOIN_LEFT:
1573  case JOIN_SEMI:
1574  case JOIN_ANTI:
1575  nestjoinOK = true;
1576  useallclauses = false;
1577  break;
1578  case JOIN_RIGHT:
1579  case JOIN_FULL:
1580  nestjoinOK = false;
1581  useallclauses = true;
1582  break;
1583  case JOIN_UNIQUE_OUTER:
1584  case JOIN_UNIQUE_INNER:
1585  jointype = JOIN_INNER;
1586  nestjoinOK = true;
1587  useallclauses = false;
1588  break;
1589  default:
1590  elog(ERROR, "unrecognized join type: %d",
1591  (int) jointype);
1592  nestjoinOK = false; /* keep compiler quiet */
1593  useallclauses = false;
1594  break;
1595  }
1596 
1597  /*
1598  * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1599  * we will consider it below as a member of cheapest_parameterized_paths,
1600  * but the other possibilities considered in this routine aren't usable.
1601  */
1602  if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1603  inner_cheapest_total = NULL;
1604 
1605  /*
1606  * If we need to unique-ify the inner path, we will consider only the
1607  * cheapest-total inner.
1608  */
1609  if (save_jointype == JOIN_UNIQUE_INNER)
1610  {
1611  /* No way to do this with an inner path parameterized by outer rel */
1612  if (inner_cheapest_total == NULL)
1613  return;
1614  inner_cheapest_total = (Path *)
1615  create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1616  Assert(inner_cheapest_total);
1617  }
1618  else if (nestjoinOK)
1619  {
1620  /*
1621  * Consider materializing the cheapest inner path, unless
1622  * enable_material is off or the path in question materializes its
1623  * output anyway.
1624  */
1625  if (enable_material && inner_cheapest_total != NULL &&
1626  !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1627  matpath = (Path *)
1628  create_material_path(innerrel, inner_cheapest_total);
1629  }
1630 
1631  foreach(lc1, outerrel->pathlist)
1632  {
1633  Path *outerpath = (Path *) lfirst(lc1);
1634  List *merge_pathkeys;
1635 
1636  /*
1637  * We cannot use an outer path that is parameterized by the inner rel.
1638  */
1639  if (PATH_PARAM_BY_REL(outerpath, innerrel))
1640  continue;
1641 
1642  /*
1643  * If we need to unique-ify the outer path, it's pointless to consider
1644  * any but the cheapest outer. (XXX we don't consider parameterized
1645  * outers, nor inners, for unique-ified cases. Should we?)
1646  */
1647  if (save_jointype == JOIN_UNIQUE_OUTER)
1648  {
1649  if (outerpath != outerrel->cheapest_total_path)
1650  continue;
1651  outerpath = (Path *) create_unique_path(root, outerrel,
1652  outerpath, extra->sjinfo);
1653  Assert(outerpath);
1654  }
1655 
1656  /*
1657  * The result will have this sort order (even if it is implemented as
1658  * a nestloop, and even if some of the mergeclauses are implemented by
1659  * qpquals rather than as true mergeclauses):
1660  */
1661  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1662  outerpath->pathkeys);
1663 
1664  if (save_jointype == JOIN_UNIQUE_INNER)
1665  {
1666  /*
1667  * Consider nestloop join, but only with the unique-ified cheapest
1668  * inner path
1669  */
1670  try_nestloop_path(root,
1671  joinrel,
1672  outerpath,
1673  inner_cheapest_total,
1674  merge_pathkeys,
1675  jointype,
1676  extra);
1677  }
1678  else if (nestjoinOK)
1679  {
1680  /*
1681  * Consider nestloop joins using this outer path and various
1682  * available paths for the inner relation. We consider the
1683  * cheapest-total paths for each available parameterization of the
1684  * inner relation, including the unparameterized case.
1685  */
1686  ListCell *lc2;
1687 
1688  foreach(lc2, innerrel->cheapest_parameterized_paths)
1689  {
1690  Path *innerpath = (Path *) lfirst(lc2);
1691  Path *mpath;
1692 
1693  try_nestloop_path(root,
1694  joinrel,
1695  outerpath,
1696  innerpath,
1697  merge_pathkeys,
1698  jointype,
1699  extra);
1700 
1701  /*
1702  * Try generating a memoize path and see if that makes the
1703  * nested loop any cheaper.
1704  */
1705  mpath = get_memoize_path(root, innerrel, outerrel,
1706  innerpath, outerpath, jointype,
1707  extra);
1708  if (mpath != NULL)
1709  try_nestloop_path(root,
1710  joinrel,
1711  outerpath,
1712  mpath,
1713  merge_pathkeys,
1714  jointype,
1715  extra);
1716  }
1717 
1718  /* Also consider materialized form of the cheapest inner path */
1719  if (matpath != NULL)
1720  try_nestloop_path(root,
1721  joinrel,
1722  outerpath,
1723  matpath,
1724  merge_pathkeys,
1725  jointype,
1726  extra);
1727  }
1728 
1729  /* Can't do anything else if outer path needs to be unique'd */
1730  if (save_jointype == JOIN_UNIQUE_OUTER)
1731  continue;
1732 
1733  /* Can't do anything else if inner rel is parameterized by outer */
1734  if (inner_cheapest_total == NULL)
1735  continue;
1736 
1737  /* Generate merge join paths */
1738  generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1739  save_jointype, extra, useallclauses,
1740  inner_cheapest_total, merge_pathkeys,
1741  false);
1742  }
1743 
1744  /*
1745  * Consider partial nestloop and mergejoin plan if outerrel has any
1746  * partial path and the joinrel is parallel-safe. However, we can't
1747  * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1748  * therefore we won't be able to properly guarantee uniqueness. Nor can
1749  * we handle joins needing lateral rels, since partial paths must not be
1750  * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1751  * because they can produce false null extended rows.
1752  */
1753  if (joinrel->consider_parallel &&
1754  save_jointype != JOIN_UNIQUE_OUTER &&
1755  save_jointype != JOIN_FULL &&
1756  save_jointype != JOIN_RIGHT &&
1757  outerrel->partial_pathlist != NIL &&
1758  bms_is_empty(joinrel->lateral_relids))
1759  {
1760  if (nestjoinOK)
1761  consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1762  save_jointype, extra);
1763 
1764  /*
1765  * If inner_cheapest_total is NULL or non parallel-safe then find the
1766  * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1767  * can't use any alternative inner path.
1768  */
1769  if (inner_cheapest_total == NULL ||
1770  !inner_cheapest_total->parallel_safe)
1771  {
1772  if (save_jointype == JOIN_UNIQUE_INNER)
1773  return;
1774 
1775  inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1776  }
1777 
1778  if (inner_cheapest_total)
1779  consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1780  save_jointype, extra,
1781  inner_cheapest_total);
1782  }
1783 }
1784 
1785 /*
1786  * consider_parallel_mergejoin
1787  * Try to build partial paths for a joinrel by joining a partial path
1788  * for the outer relation to a complete path for the inner relation.
1789  *
1790  * 'joinrel' is the join relation
1791  * 'outerrel' is the outer join relation
1792  * 'innerrel' is the inner join relation
1793  * 'jointype' is the type of join to do
1794  * 'extra' contains additional input values
1795  * 'inner_cheapest_total' cheapest total path for innerrel
1796  */
1797 static void
1799  RelOptInfo *joinrel,
1800  RelOptInfo *outerrel,
1801  RelOptInfo *innerrel,
1802  JoinType jointype,
1803  JoinPathExtraData *extra,
1804  Path *inner_cheapest_total)
1805 {
1806  ListCell *lc1;
1807 
1808  /* generate merge join path for each partial outer path */
1809  foreach(lc1, outerrel->partial_pathlist)
1810  {
1811  Path *outerpath = (Path *) lfirst(lc1);
1812  List *merge_pathkeys;
1813 
1814  /*
1815  * Figure out what useful ordering any paths we create will have.
1816  */
1817  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1818  outerpath->pathkeys);
1819 
1820  generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1821  extra, false, inner_cheapest_total,
1822  merge_pathkeys, true);
1823  }
1824 }
1825 
1826 /*
1827  * consider_parallel_nestloop
1828  * Try to build partial paths for a joinrel by joining a partial path for the
1829  * outer relation to a complete path for the inner relation.
1830  *
1831  * 'joinrel' is the join relation
1832  * 'outerrel' is the outer join relation
1833  * 'innerrel' is the inner join relation
1834  * 'jointype' is the type of join to do
1835  * 'extra' contains additional input values
1836  */
1837 static void
1839  RelOptInfo *joinrel,
1840  RelOptInfo *outerrel,
1841  RelOptInfo *innerrel,
1842  JoinType jointype,
1843  JoinPathExtraData *extra)
1844 {
1845  JoinType save_jointype = jointype;
1846  ListCell *lc1;
1847 
1848  if (jointype == JOIN_UNIQUE_INNER)
1849  jointype = JOIN_INNER;
1850 
1851  foreach(lc1, outerrel->partial_pathlist)
1852  {
1853  Path *outerpath = (Path *) lfirst(lc1);
1854  List *pathkeys;
1855  ListCell *lc2;
1856 
1857  /* Figure out what useful ordering any paths we create will have. */
1858  pathkeys = build_join_pathkeys(root, joinrel, jointype,
1859  outerpath->pathkeys);
1860 
1861  /*
1862  * Try the cheapest parameterized paths; only those which will produce
1863  * an unparameterized path when joined to this outerrel will survive
1864  * try_partial_nestloop_path. The cheapest unparameterized path is
1865  * also in this list.
1866  */
1867  foreach(lc2, innerrel->cheapest_parameterized_paths)
1868  {
1869  Path *innerpath = (Path *) lfirst(lc2);
1870  Path *mpath;
1871 
1872  /* Can't join to an inner path that is not parallel-safe */
1873  if (!innerpath->parallel_safe)
1874  continue;
1875 
1876  /*
1877  * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1878  * cheapest_total_path, and we have to unique-ify it. (We might
1879  * be able to relax this to allow other safe, unparameterized
1880  * inner paths, but right now create_unique_path is not on board
1881  * with that.)
1882  */
1883  if (save_jointype == JOIN_UNIQUE_INNER)
1884  {
1885  if (innerpath != innerrel->cheapest_total_path)
1886  continue;
1887  innerpath = (Path *) create_unique_path(root, innerrel,
1888  innerpath,
1889  extra->sjinfo);
1890  Assert(innerpath);
1891  }
1892 
1893  try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1894  pathkeys, jointype, extra);
1895 
1896  /*
1897  * Try generating a memoize path and see if that makes the nested
1898  * loop any cheaper.
1899  */
1900  mpath = get_memoize_path(root, innerrel, outerrel,
1901  innerpath, outerpath, jointype,
1902  extra);
1903  if (mpath != NULL)
1904  try_partial_nestloop_path(root, joinrel, outerpath, mpath,
1905  pathkeys, jointype, extra);
1906  }
1907  }
1908 }
1909 
1910 /*
1911  * hash_inner_and_outer
1912  * Create hashjoin join paths by explicitly hashing both the outer and
1913  * inner keys of each available hash clause.
1914  *
1915  * 'joinrel' is the join relation
1916  * 'outerrel' is the outer join relation
1917  * 'innerrel' is the inner join relation
1918  * 'jointype' is the type of join to do
1919  * 'extra' contains additional input values
1920  */
1921 static void
1923  RelOptInfo *joinrel,
1924  RelOptInfo *outerrel,
1925  RelOptInfo *innerrel,
1926  JoinType jointype,
1927  JoinPathExtraData *extra)
1928 {
1929  JoinType save_jointype = jointype;
1930  bool isouterjoin = IS_OUTER_JOIN(jointype);
1931  List *hashclauses;
1932  ListCell *l;
1933 
1934  /*
1935  * We need to build only one hashclauses list for any given pair of outer
1936  * and inner relations; all of the hashable clauses will be used as keys.
1937  *
1938  * Scan the join's restrictinfo list to find hashjoinable clauses that are
1939  * usable with this pair of sub-relations.
1940  */
1941  hashclauses = NIL;
1942  foreach(l, extra->restrictlist)
1943  {
1944  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1945 
1946  /*
1947  * If processing an outer join, only use its own join clauses for
1948  * hashing. For inner joins we need not be so picky.
1949  */
1950  if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1951  continue;
1952 
1953  if (!restrictinfo->can_join ||
1954  restrictinfo->hashjoinoperator == InvalidOid)
1955  continue; /* not hashjoinable */
1956 
1957  /*
1958  * Check if clause has the form "outer op inner" or "inner op outer".
1959  */
1960  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1961  continue; /* no good for these input relations */
1962 
1963  hashclauses = lappend(hashclauses, restrictinfo);
1964  }
1965 
1966  /* If we found any usable hashclauses, make paths */
1967  if (hashclauses)
1968  {
1969  /*
1970  * We consider both the cheapest-total-cost and cheapest-startup-cost
1971  * outer paths. There's no need to consider any but the
1972  * cheapest-total-cost inner path, however.
1973  */
1974  Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1975  Path *cheapest_total_outer = outerrel->cheapest_total_path;
1976  Path *cheapest_total_inner = innerrel->cheapest_total_path;
1977 
1978  /*
1979  * If either cheapest-total path is parameterized by the other rel, we
1980  * can't use a hashjoin. (There's no use looking for alternative
1981  * input paths, since these should already be the least-parameterized
1982  * available paths.)
1983  */
1984  if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1985  PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1986  return;
1987 
1988  /* Unique-ify if need be; we ignore parameterized possibilities */
1989  if (jointype == JOIN_UNIQUE_OUTER)
1990  {
1991  cheapest_total_outer = (Path *)
1992  create_unique_path(root, outerrel,
1993  cheapest_total_outer, extra->sjinfo);
1994  Assert(cheapest_total_outer);
1995  jointype = JOIN_INNER;
1996  try_hashjoin_path(root,
1997  joinrel,
1998  cheapest_total_outer,
1999  cheapest_total_inner,
2000  hashclauses,
2001  jointype,
2002  extra);
2003  /* no possibility of cheap startup here */
2004  }
2005  else if (jointype == JOIN_UNIQUE_INNER)
2006  {
2007  cheapest_total_inner = (Path *)
2008  create_unique_path(root, innerrel,
2009  cheapest_total_inner, extra->sjinfo);
2010  Assert(cheapest_total_inner);
2011  jointype = JOIN_INNER;
2012  try_hashjoin_path(root,
2013  joinrel,
2014  cheapest_total_outer,
2015  cheapest_total_inner,
2016  hashclauses,
2017  jointype,
2018  extra);
2019  if (cheapest_startup_outer != NULL &&
2020  cheapest_startup_outer != cheapest_total_outer)
2021  try_hashjoin_path(root,
2022  joinrel,
2023  cheapest_startup_outer,
2024  cheapest_total_inner,
2025  hashclauses,
2026  jointype,
2027  extra);
2028  }
2029  else
2030  {
2031  /*
2032  * For other jointypes, we consider the cheapest startup outer
2033  * together with the cheapest total inner, and then consider
2034  * pairings of cheapest-total paths including parameterized ones.
2035  * There is no use in generating parameterized paths on the basis
2036  * of possibly cheap startup cost, so this is sufficient.
2037  */
2038  ListCell *lc1;
2039  ListCell *lc2;
2040 
2041  if (cheapest_startup_outer != NULL)
2042  try_hashjoin_path(root,
2043  joinrel,
2044  cheapest_startup_outer,
2045  cheapest_total_inner,
2046  hashclauses,
2047  jointype,
2048  extra);
2049 
2050  foreach(lc1, outerrel->cheapest_parameterized_paths)
2051  {
2052  Path *outerpath = (Path *) lfirst(lc1);
2053 
2054  /*
2055  * We cannot use an outer path that is parameterized by the
2056  * inner rel.
2057  */
2058  if (PATH_PARAM_BY_REL(outerpath, innerrel))
2059  continue;
2060 
2061  foreach(lc2, innerrel->cheapest_parameterized_paths)
2062  {
2063  Path *innerpath = (Path *) lfirst(lc2);
2064 
2065  /*
2066  * We cannot use an inner path that is parameterized by
2067  * the outer rel, either.
2068  */
2069  if (PATH_PARAM_BY_REL(innerpath, outerrel))
2070  continue;
2071 
2072  if (outerpath == cheapest_startup_outer &&
2073  innerpath == cheapest_total_inner)
2074  continue; /* already tried it */
2075 
2076  try_hashjoin_path(root,
2077  joinrel,
2078  outerpath,
2079  innerpath,
2080  hashclauses,
2081  jointype,
2082  extra);
2083  }
2084  }
2085  }
2086 
2087  /*
2088  * If the joinrel is parallel-safe, we may be able to consider a
2089  * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2090  * because the outer path will be partial, and therefore we won't be
2091  * able to properly guarantee uniqueness. Similarly, we can't handle
2092  * JOIN_FULL and JOIN_RIGHT, because they can produce false null
2093  * extended rows. Also, the resulting path must not be parameterized.
2094  * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
2095  * Hash, since in that case we're back to a single hash table with a
2096  * single set of match bits for each batch, but that will require
2097  * figuring out a deadlock-free way to wait for the probe to finish.
2098  */
2099  if (joinrel->consider_parallel &&
2100  save_jointype != JOIN_UNIQUE_OUTER &&
2101  save_jointype != JOIN_FULL &&
2102  save_jointype != JOIN_RIGHT &&
2103  outerrel->partial_pathlist != NIL &&
2104  bms_is_empty(joinrel->lateral_relids))
2105  {
2106  Path *cheapest_partial_outer;
2107  Path *cheapest_partial_inner = NULL;
2108  Path *cheapest_safe_inner = NULL;
2109 
2110  cheapest_partial_outer =
2111  (Path *) linitial(outerrel->partial_pathlist);
2112 
2113  /*
2114  * Can we use a partial inner plan too, so that we can build a
2115  * shared hash table in parallel? We can't handle
2116  * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2117  */
2118  if (innerrel->partial_pathlist != NIL &&
2119  save_jointype != JOIN_UNIQUE_INNER &&
2121  {
2122  cheapest_partial_inner =
2123  (Path *) linitial(innerrel->partial_pathlist);
2124  try_partial_hashjoin_path(root, joinrel,
2125  cheapest_partial_outer,
2126  cheapest_partial_inner,
2127  hashclauses, jointype, extra,
2128  true /* parallel_hash */ );
2129  }
2130 
2131  /*
2132  * Normally, given that the joinrel is parallel-safe, the cheapest
2133  * total inner path will also be parallel-safe, but if not, we'll
2134  * have to search for the cheapest safe, unparameterized inner
2135  * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2136  * inner path.
2137  */
2138  if (cheapest_total_inner->parallel_safe)
2139  cheapest_safe_inner = cheapest_total_inner;
2140  else if (save_jointype != JOIN_UNIQUE_INNER)
2141  cheapest_safe_inner =
2143 
2144  if (cheapest_safe_inner != NULL)
2145  try_partial_hashjoin_path(root, joinrel,
2146  cheapest_partial_outer,
2147  cheapest_safe_inner,
2148  hashclauses, jointype, extra,
2149  false /* parallel_hash */ );
2150  }
2151  }
2152 }
2153 
2154 /*
2155  * select_mergejoin_clauses
2156  * Select mergejoin clauses that are usable for a particular join.
2157  * Returns a list of RestrictInfo nodes for those clauses.
2158  *
2159  * *mergejoin_allowed is normally set to true, but it is set to false if
2160  * this is a right/full join and there are nonmergejoinable join clauses.
2161  * The executor's mergejoin machinery cannot handle such cases, so we have
2162  * to avoid generating a mergejoin plan. (Note that this flag does NOT
2163  * consider whether there are actually any mergejoinable clauses. This is
2164  * correct because in some cases we need to build a clauseless mergejoin.
2165  * Simply returning NIL is therefore not enough to distinguish safe from
2166  * unsafe cases.)
2167  *
2168  * We also mark each selected RestrictInfo to show which side is currently
2169  * being considered as outer. These are transient markings that are only
2170  * good for the duration of the current add_paths_to_joinrel() call!
2171  *
2172  * We examine each restrictinfo clause known for the join to see
2173  * if it is mergejoinable and involves vars from the two sub-relations
2174  * currently of interest.
2175  */
2176 static List *
2178  RelOptInfo *joinrel,
2179  RelOptInfo *outerrel,
2180  RelOptInfo *innerrel,
2181  List *restrictlist,
2182  JoinType jointype,
2183  bool *mergejoin_allowed)
2184 {
2185  List *result_list = NIL;
2186  bool isouterjoin = IS_OUTER_JOIN(jointype);
2187  bool have_nonmergeable_joinclause = false;
2188  ListCell *l;
2189 
2190  foreach(l, restrictlist)
2191  {
2192  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2193 
2194  /*
2195  * If processing an outer join, only use its own join clauses in the
2196  * merge. For inner joins we can use pushed-down clauses too. (Note:
2197  * we don't set have_nonmergeable_joinclause here because pushed-down
2198  * clauses will become otherquals not joinquals.)
2199  */
2200  if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2201  continue;
2202 
2203  /* Check that clause is a mergeable operator clause */
2204  if (!restrictinfo->can_join ||
2205  restrictinfo->mergeopfamilies == NIL)
2206  {
2207  /*
2208  * The executor can handle extra joinquals that are constants, but
2209  * not anything else, when doing right/full merge join. (The
2210  * reason to support constants is so we can do FULL JOIN ON
2211  * FALSE.)
2212  */
2213  if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2214  have_nonmergeable_joinclause = true;
2215  continue; /* not mergejoinable */
2216  }
2217 
2218  /*
2219  * Check if clause has the form "outer op inner" or "inner op outer".
2220  */
2221  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2222  {
2223  have_nonmergeable_joinclause = true;
2224  continue; /* no good for these input relations */
2225  }
2226 
2227  /*
2228  * Insist that each side have a non-redundant eclass. This
2229  * restriction is needed because various bits of the planner expect
2230  * that each clause in a merge be associable with some pathkey in a
2231  * canonical pathkey list, but redundant eclasses can't appear in
2232  * canonical sort orderings. (XXX it might be worth relaxing this,
2233  * but not enough time to address it for 8.3.)
2234  *
2235  * Note: it would be bad if this condition failed for an otherwise
2236  * mergejoinable FULL JOIN clause, since that would result in
2237  * undesirable planner failure. I believe that is not possible
2238  * however; a variable involved in a full join could only appear in
2239  * below_outer_join eclasses, which aren't considered redundant.
2240  *
2241  * This case *can* happen for left/right join clauses: the outer-side
2242  * variable could be equated to a constant. Because we will propagate
2243  * that constant across the join clause, the loss of ability to do a
2244  * mergejoin is not really all that big a deal, and so it's not clear
2245  * that improving this is important.
2246  */
2247  update_mergeclause_eclasses(root, restrictinfo);
2248 
2249  if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2250  EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2251  {
2252  have_nonmergeable_joinclause = true;
2253  continue; /* can't handle redundant eclasses */
2254  }
2255 
2256  result_list = lappend(result_list, restrictinfo);
2257  }
2258 
2259  /*
2260  * Report whether mergejoin is allowed (see comment at top of function).
2261  */
2262  switch (jointype)
2263  {
2264  case JOIN_RIGHT:
2265  case JOIN_FULL:
2266  *mergejoin_allowed = !have_nonmergeable_joinclause;
2267  break;
2268  default:
2269  *mergejoin_allowed = true;
2270  break;
2271  }
2272 
2273  return result_list;
2274 }
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:404
static bool paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, RelOptInfo *outerrel, RelOptInfo *innerrel, List **param_exprs, List **operators)
Definition: joinpath.c:378
static void try_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra, bool is_partial)
Definition: joinpath.c:774
#define NIL
Definition: pg_list.h:65
#define IsA(nodeptr, _type_)
Definition: nodes.h:587
static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:939
bool enable_memoize
Definition: costsize.c:143
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:422
SemiAntiJoinFactors semifactors
Definition: pathnodes.h:2533
RelOptKind reloptkind
Definition: pathnodes.h:678
List * join_info_list
Definition: pathnodes.h:266
Relids min_righthand
Definition: pathnodes.h:2253
static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:580
static void try_partial_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra, bool parallel_hash)
Definition: joinpath.c:1005
bool add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost, List *pathkeys)
Definition: pathnode.c:867
bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer)
Definition: pathnode.c:644
struct Path * cheapest_startup_path
Definition: pathnodes.h:698
List * baserestrictinfo
Definition: pathnodes.h:745
MaterialPath * create_material_path(RelOptInfo *rel, Path *subpath)
Definition: pathnode.c:1552
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:291
List * list_truncate(List *list, int new_size)
Definition: list.c:600
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:753
ParamPathInfo * param_info
Definition: pathnodes.h:1185
bool enable_parallel_hash
Definition: costsize.c:150
Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
Definition: pathnode.c:2373
List * list_copy(const List *oldlist)
Definition: list.c:1418
Definition: nodes.h:536
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1547
#define TYPECACHE_EQ_OPR
Definition: typcache.h:136
List * partial_pathlist
Definition: pathnodes.h:697
HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, bool parallel_hash, List *restrict_clauses, Relids required_outer, List *hashclauses)
Definition: pathnode.c:2560
Relids left_relids
Definition: pathnodes.h:2086
void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, JoinPathExtraData *extra, bool parallel_hash)
Definition: costsize.c:3741
bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Definition: analyzejoins.c:964
EquivalenceClass * right_ec
Definition: pathnodes.h:2108
List * cheapest_parameterized_paths
Definition: pathnodes.h:701
UniquePath * create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo)
Definition: pathnode.c:1639
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:452
MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, List *outersortkeys, List *innersortkeys)
Definition: pathnode.c:2494
List * lappend_oid(List *list, Oid datum)
Definition: list.c:372
#define OidIsValid(objectId)
Definition: c.h:710
void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, JoinPathExtraData *extra)
Definition: costsize.c:3183
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1082
List * mergeopfamilies
Definition: pathnodes.h:2104
#define lsecond(l)
Definition: pg_list.h:179
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1262
#define PATH_PARAM_BY_PARENT(path, rel)
Definition: joinpath.c:36
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1922
JoinType
Definition: nodes.h:704
NodeTag pathtype
Definition: pathnodes.h:1180
Relids lateral_relids
Definition: pathnodes.h:706
SpecialJoinInfo * sjinfo
Definition: pathnodes.h:2532
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1098
#define linitial(l)
Definition: pg_list.h:174
Relids all_baserels
Definition: pathnodes.h:210
#define ERROR
Definition: elog.h:46
GetForeignJoinPaths_function GetForeignJoinPaths
Definition: fdwapi.h:223
Relids param_source_rels
Definition: pathnodes.h:2534
Bitmapset * bms_join(Bitmapset *a, Bitmapset *b)
Definition: bitmapset.c:949
RelOptInfo * parent
Definition: pathnodes.h:1182
Path * reparameterize_path_by_child(PlannerInfo *root, Path *path, RelOptInfo *child_rel)
Definition: pathnode.c:3972
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:71
NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer)
Definition: pathnode.c:2406
struct Path * cheapest_total_path
Definition: pathnodes.h:699
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1201
MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, bool singlerow, double calls)
Definition: pathnode.c:1584
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1008
static void try_partial_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:692
bool outer_is_left
Definition: pathnodes.h:2114
struct FdwRoutine * fdwroutine
Definition: pathnodes.h:736
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath, JoinType jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, List *merge_pathkeys, bool is_partial)
Definition: joinpath.c:1300
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
List * mergeclause_list
Definition: pathnodes.h:2530
Relids relids
Definition: pathnodes.h:681
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:482
Oid hasheqoperator
Definition: pathnodes.h:2126
List * lappend(List *list, void *datum)
Definition: list.c:336
static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1838
Expr * clause
Definition: pathnodes.h:2056
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:701
static List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed)
Definition: joinpath.c:2177
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:324
static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra, Path *inner_cheapest_total)
Definition: joinpath.c:1798
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1650
set_join_pathlist_hook_type set_join_pathlist_hook
Definition: joinpath.c:30
#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)
Definition: pathnodes.h:2139
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define InvalidOid
Definition: postgres_ext.h:36
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1375
#define PATH_PARAM_BY_REL(path, rel)
Definition: joinpath.c:42
List * lcons(void *datum, List *list)
Definition: list.c:468
List * pathkeys
Definition: pathnodes.h:1196
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
bool enable_mergejoin
Definition: costsize.c:144
Relids right_relids
Definition: pathnodes.h:2087
#define Assert(condition)
Definition: c.h:804
#define lfirst(lc)
Definition: pg_list.h:169
bool parallel_safe
Definition: pathnodes.h:1188
List * lateral_vars
Definition: pathnodes.h:716
static bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel)
Definition: joinpath.c:1066
Cardinality rows
Definition: pathnodes.h:684
JoinType jointype
Definition: pathnodes.h:2256
List * ppi_clauses
Definition: pathnodes.h:1143
Oid hashjoinoperator
Definition: pathnodes.h:2117
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
static int list_length(const List *l)
Definition: pg_list.h:149
Relids calc_nestloop_required_outer(Relids outerrelids, Relids outer_paramrels, Relids innerrelids, Relids inner_paramrels)
Definition: pathnode.c:2340
bool consider_parallel
Definition: pathnodes.h:689
void compute_semi_anti_join_factors(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors)
Definition: costsize.c:4665
void(* set_join_pathlist_hook_type)(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: paths.h:36
bool enable_hashjoin
Definition: costsize.c:145
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:494
EquivalenceClass * left_ec
Definition: pathnodes.h:2107
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:749
void list_free(List *list)
Definition: list.c:1391
#define elog(elevel,...)
Definition: elog.h:232
static Path * get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, RelOptInfo *outerrel, Path *inner_path, Path *outer_path, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:461
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:869
void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist)
Definition: joinpath.c:123
bool ExecMaterializesOutput(NodeTag plantype)
Definition: execAmi.c:637
void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, JoinPathExtraData *extra)
Definition: costsize.c:2902
List * pathlist
Definition: pathnodes.h:695
Relids ppi_req_outer
Definition: pathnodes.h:1141
List * args
Definition: primnodes.h:548
List * list_delete_nth_cell(List *list, int n)
Definition: list.c:711
static bool allow_star_schema_join(PlannerInfo *root, Relids outerrelids, Relids inner_paramrels)
Definition: joinpath.c:356
Definition: pg_list.h:50
struct PathTarget * reltarget
Definition: pathnodes.h:692
Relids min_lefthand
Definition: pathnodes.h:2252
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1228
#define TYPECACHE_HASH_PROC
Definition: typcache.h:140
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:793
#define foreach_current_index(cell)
Definition: pg_list.h:382
bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params)
Definition: joinrels.c:1184
Relids top_parent_relids
Definition: pathnodes.h:756
bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:545
bool enable_material
Definition: costsize.c:142
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1548