PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
joinpath.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  * Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/joinpath.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/pathnode.h"
23 #include "optimizer/paths.h"
24 
25 /* Hook for plugins to get control in add_paths_to_joinrel() */
27 
28 #define PATH_PARAM_BY_REL(path, rel) \
29  ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
30 
31 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
32  RelOptInfo *outerrel, RelOptInfo *innerrel,
33  JoinType jointype, JoinPathExtraData *extra);
34 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
35  RelOptInfo *outerrel, RelOptInfo *innerrel,
36  JoinType jointype, JoinPathExtraData *extra);
37 static void consider_parallel_nestloop(PlannerInfo *root,
38  RelOptInfo *joinrel,
39  RelOptInfo *outerrel,
40  RelOptInfo *innerrel,
41  JoinType jointype,
42  JoinPathExtraData *extra);
43 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
44  RelOptInfo *outerrel, RelOptInfo *innerrel,
45  JoinType jointype, JoinPathExtraData *extra);
47  RelOptInfo *joinrel,
48  RelOptInfo *outerrel,
49  RelOptInfo *innerrel,
50  List *restrictlist,
51  JoinType jointype,
52  bool *mergejoin_allowed);
53 static void generate_mergejoin_paths(PlannerInfo *root,
54  RelOptInfo *joinrel,
55  RelOptInfo *innerrel,
56  Path *outerpath,
57  JoinType jointype,
58  JoinPathExtraData *extra,
59  bool useallclauses,
60  Path *inner_cheapest_total,
61  List *merge_pathkeys);
62 
63 
64 /*
65  * add_paths_to_joinrel
66  * Given a join relation and two component rels from which it can be made,
67  * consider all possible paths that use the two component rels as outer
68  * and inner rel respectively. Add these paths to the join rel's pathlist
69  * if they survive comparison with other paths (and remove any existing
70  * paths that are dominated by these paths).
71  *
72  * Modifies the pathlist field of the joinrel node to contain the best
73  * paths found so far.
74  *
75  * jointype is not necessarily the same as sjinfo->jointype; it might be
76  * "flipped around" if we are considering joining the rels in the opposite
77  * direction from what's indicated in sjinfo.
78  *
79  * Also, this routine and others in this module accept the special JoinTypes
80  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
81  * unique-ify the outer or inner relation and then apply a regular inner
82  * join. These values are not allowed to propagate outside this module,
83  * however. Path cost estimation code may need to recognize that it's
84  * dealing with such a case --- the combination of nominal jointype INNER
85  * with sjinfo->jointype == JOIN_SEMI indicates that.
86  */
87 void
89  RelOptInfo *joinrel,
90  RelOptInfo *outerrel,
91  RelOptInfo *innerrel,
92  JoinType jointype,
93  SpecialJoinInfo *sjinfo,
94  List *restrictlist)
95 {
96  JoinPathExtraData extra;
97  bool mergejoin_allowed = true;
98  ListCell *lc;
99 
100  extra.restrictlist = restrictlist;
101  extra.mergeclause_list = NIL;
102  extra.sjinfo = sjinfo;
103  extra.param_source_rels = NULL;
104 
105  /*
106  * Find potential mergejoin clauses. We can skip this if we are not
107  * interested in doing a mergejoin. However, mergejoin may be our only
108  * way of implementing a full outer join, so override enable_mergejoin if
109  * it's a full join.
110  */
111  if (enable_mergejoin || jointype == JOIN_FULL)
113  joinrel,
114  outerrel,
115  innerrel,
116  restrictlist,
117  jointype,
118  &mergejoin_allowed);
119 
120  /*
121  * If it's SEMI or ANTI join, compute correction factors for cost
122  * estimation. These will be the same for all paths.
123  */
124  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
125  compute_semi_anti_join_factors(root, outerrel, innerrel,
126  jointype, sjinfo, restrictlist,
127  &extra.semifactors);
128 
129  /*
130  * Decide whether it's sensible to generate parameterized paths for this
131  * joinrel, and if so, which relations such paths should require. There
132  * is usually no need to create a parameterized result path unless there
133  * is a join order restriction that prevents joining one of our input rels
134  * directly to the parameter source rel instead of joining to the other
135  * input rel. (But see allow_star_schema_join().) This restriction
136  * reduces the number of parameterized paths we have to deal with at
137  * higher join levels, without compromising the quality of the resulting
138  * plan. We express the restriction as a Relids set that must overlap the
139  * parameterization of any proposed join path.
140  */
141  foreach(lc, root->join_info_list)
142  {
143  SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
144 
145  /*
146  * SJ is relevant to this join if we have some part of its RHS
147  * (possibly not all of it), and haven't yet joined to its LHS. (This
148  * test is pretty simplistic, but should be sufficient considering the
149  * join has already been proven legal.) If the SJ is relevant, it
150  * presents constraints for joining to anything not in its RHS.
151  */
152  if (bms_overlap(joinrel->relids, sjinfo2->min_righthand) &&
153  !bms_overlap(joinrel->relids, sjinfo2->min_lefthand))
156  sjinfo2->min_righthand));
157 
158  /* full joins constrain both sides symmetrically */
159  if (sjinfo2->jointype == JOIN_FULL &&
160  bms_overlap(joinrel->relids, sjinfo2->min_lefthand) &&
161  !bms_overlap(joinrel->relids, sjinfo2->min_righthand))
164  sjinfo2->min_lefthand));
165  }
166 
167  /*
168  * However, when a LATERAL subquery is involved, there will simply not be
169  * any paths for the joinrel that aren't parameterized by whatever the
170  * subquery is parameterized by, unless its parameterization is resolved
171  * within the joinrel. So we might as well allow additional dependencies
172  * on whatever residual lateral dependencies the joinrel will have.
173  */
175  joinrel->lateral_relids);
176 
177  /*
178  * 1. Consider mergejoin paths where both relations must be explicitly
179  * sorted. Skip this if we can't mergejoin.
180  */
181  if (mergejoin_allowed)
182  sort_inner_and_outer(root, joinrel, outerrel, innerrel,
183  jointype, &extra);
184 
185  /*
186  * 2. Consider paths where the outer relation need not be explicitly
187  * sorted. This includes both nestloops and mergejoins where the outer
188  * path is already ordered. Again, skip this if we can't mergejoin.
189  * (That's okay because we know that nestloop can't handle right/full
190  * joins at all, so it wouldn't work in the prohibited cases either.)
191  */
192  if (mergejoin_allowed)
193  match_unsorted_outer(root, joinrel, outerrel, innerrel,
194  jointype, &extra);
195 
196 #ifdef NOT_USED
197 
198  /*
199  * 3. Consider paths where the inner relation need not be explicitly
200  * sorted. This includes mergejoins only (nestloops were already built in
201  * match_unsorted_outer).
202  *
203  * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
204  * significant difference between the inner and outer side of a mergejoin,
205  * so match_unsorted_inner creates no paths that aren't equivalent to
206  * those made by match_unsorted_outer when add_paths_to_joinrel() is
207  * invoked with the two rels given in the other order.
208  */
209  if (mergejoin_allowed)
210  match_unsorted_inner(root, joinrel, outerrel, innerrel,
211  jointype, &extra);
212 #endif
213 
214  /*
215  * 4. Consider paths where both outer and inner relations must be hashed
216  * before being joined. As above, disregard enable_hashjoin for full
217  * joins, because there may be no other alternative.
218  */
219  if (enable_hashjoin || jointype == JOIN_FULL)
220  hash_inner_and_outer(root, joinrel, outerrel, innerrel,
221  jointype, &extra);
222 
223  /*
224  * 5. If inner and outer relations are foreign tables (or joins) belonging
225  * to the same server and assigned to the same user to check access
226  * permissions as, give the FDW a chance to push down joins.
227  */
228  if (joinrel->fdwroutine &&
230  joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
231  outerrel, innerrel,
232  jointype, &extra);
233 
234  /*
235  * 6. Finally, give extensions a chance to manipulate the path list.
236  */
238  set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
239  jointype, &extra);
240 }
241 
242 /*
243  * We override the param_source_rels heuristic to accept nestloop paths in
244  * which the outer rel satisfies some but not all of the inner path's
245  * parameterization. This is necessary to get good plans for star-schema
246  * scenarios, in which a parameterized path for a large table may require
247  * parameters from multiple small tables that will not get joined directly to
248  * each other. We can handle that by stacking nestloops that have the small
249  * tables on the outside; but this breaks the rule the param_source_rels
250  * heuristic is based on, namely that parameters should not be passed down
251  * across joins unless there's a join-order-constraint-based reason to do so.
252  * So we ignore the param_source_rels restriction when this case applies.
253  *
254  * allow_star_schema_join() returns TRUE if the param_source_rels restriction
255  * should be overridden, ie, it's okay to perform this join.
256  */
257 static inline bool
259  Path *outer_path,
260  Path *inner_path)
261 {
262  Relids innerparams = PATH_REQ_OUTER(inner_path);
263  Relids outerrelids = outer_path->parent->relids;
264 
265  /*
266  * It's a star-schema case if the outer rel provides some but not all of
267  * the inner rel's parameterization.
268  */
269  return (bms_overlap(innerparams, outerrelids) &&
270  bms_nonempty_difference(innerparams, outerrelids));
271 }
272 
273 /*
274  * try_nestloop_path
275  * Consider a nestloop join path; if it appears useful, push it into
276  * the joinrel's pathlist via add_path().
277  */
278 static void
280  RelOptInfo *joinrel,
281  Path *outer_path,
282  Path *inner_path,
283  List *pathkeys,
284  JoinType jointype,
285  JoinPathExtraData *extra)
286 {
287  Relids required_outer;
288  JoinCostWorkspace workspace;
289 
290  /*
291  * Check to see if proposed path is still parameterized, and reject if the
292  * parameterization wouldn't be sensible --- unless allow_star_schema_join
293  * says to allow it anyway. Also, we must reject if have_dangerous_phv
294  * doesn't like the look of it, which could only happen if the nestloop is
295  * still parameterized.
296  */
297  required_outer = calc_nestloop_required_outer(outer_path,
298  inner_path);
299  if (required_outer &&
300  ((!bms_overlap(required_outer, extra->param_source_rels) &&
301  !allow_star_schema_join(root, outer_path, inner_path)) ||
302  have_dangerous_phv(root,
303  outer_path->parent->relids,
304  PATH_REQ_OUTER(inner_path))))
305  {
306  /* Waste no memory when we reject a path here */
307  bms_free(required_outer);
308  return;
309  }
310 
311  /*
312  * Do a precheck to quickly eliminate obviously-inferior paths. We
313  * calculate a cheap lower bound on the path's cost and then use
314  * add_path_precheck() to see if the path is clearly going to be dominated
315  * by some existing path for the joinrel. If not, do the full pushup with
316  * creating a fully valid path structure and submitting it to add_path().
317  * The latter two steps are expensive enough to make this two-phase
318  * methodology worthwhile.
319  */
320  initial_cost_nestloop(root, &workspace, jointype,
321  outer_path, inner_path,
322  extra->sjinfo, &extra->semifactors);
323 
324  if (add_path_precheck(joinrel,
325  workspace.startup_cost, workspace.total_cost,
326  pathkeys, required_outer))
327  {
328  add_path(joinrel, (Path *)
330  joinrel,
331  jointype,
332  &workspace,
333  extra->sjinfo,
334  &extra->semifactors,
335  outer_path,
336  inner_path,
337  extra->restrictlist,
338  pathkeys,
339  required_outer));
340  }
341  else
342  {
343  /* Waste no memory when we reject a path here */
344  bms_free(required_outer);
345  }
346 }
347 
348 /*
349  * try_partial_nestloop_path
350  * Consider a partial nestloop join path; if it appears useful, push it into
351  * the joinrel's partial_pathlist via add_partial_path().
352  */
353 static void
355  RelOptInfo *joinrel,
356  Path *outer_path,
357  Path *inner_path,
358  List *pathkeys,
359  JoinType jointype,
360  JoinPathExtraData *extra)
361 {
362  JoinCostWorkspace workspace;
363 
364  /*
365  * If the inner path is parameterized, the parameterization must be fully
366  * satisfied by the proposed outer path. Parameterized partial paths are
367  * not supported. The caller should already have verified that no
368  * extra_lateral_rels are required here.
369  */
371  if (inner_path->param_info != NULL)
372  {
373  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
374 
375  if (!bms_is_subset(inner_paramrels, outer_path->parent->relids))
376  return;
377  }
378 
379  /*
380  * Before creating a path, get a quick lower bound on what it is likely to
381  * cost. Bail out right away if it looks terrible.
382  */
383  initial_cost_nestloop(root, &workspace, jointype,
384  outer_path, inner_path,
385  extra->sjinfo, &extra->semifactors);
386  if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
387  return;
388 
389  /* Might be good enough to be worth trying, so let's try it. */
390  add_partial_path(joinrel, (Path *)
392  joinrel,
393  jointype,
394  &workspace,
395  extra->sjinfo,
396  &extra->semifactors,
397  outer_path,
398  inner_path,
399  extra->restrictlist,
400  pathkeys,
401  NULL));
402 }
403 
404 /*
405  * try_mergejoin_path
406  * Consider a merge join path; if it appears useful, push it into
407  * the joinrel's pathlist via add_path().
408  */
409 static void
411  RelOptInfo *joinrel,
412  Path *outer_path,
413  Path *inner_path,
414  List *pathkeys,
415  List *mergeclauses,
416  List *outersortkeys,
417  List *innersortkeys,
418  JoinType jointype,
419  JoinPathExtraData *extra)
420 {
421  Relids required_outer;
422  JoinCostWorkspace workspace;
423 
424  /*
425  * Check to see if proposed path is still parameterized, and reject if the
426  * parameterization wouldn't be sensible.
427  */
428  required_outer = calc_non_nestloop_required_outer(outer_path,
429  inner_path);
430  if (required_outer &&
431  !bms_overlap(required_outer, extra->param_source_rels))
432  {
433  /* Waste no memory when we reject a path here */
434  bms_free(required_outer);
435  return;
436  }
437 
438  /*
439  * If the given paths are already well enough ordered, we can skip doing
440  * an explicit sort.
441  */
442  if (outersortkeys &&
443  pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
444  outersortkeys = NIL;
445  if (innersortkeys &&
446  pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
447  innersortkeys = NIL;
448 
449  /*
450  * See comments in try_nestloop_path().
451  */
452  initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
453  outer_path, inner_path,
454  outersortkeys, innersortkeys,
455  extra->sjinfo);
456 
457  if (add_path_precheck(joinrel,
458  workspace.startup_cost, workspace.total_cost,
459  pathkeys, required_outer))
460  {
461  add_path(joinrel, (Path *)
463  joinrel,
464  jointype,
465  &workspace,
466  extra->sjinfo,
467  outer_path,
468  inner_path,
469  extra->restrictlist,
470  pathkeys,
471  required_outer,
472  mergeclauses,
473  outersortkeys,
474  innersortkeys));
475  }
476  else
477  {
478  /* Waste no memory when we reject a path here */
479  bms_free(required_outer);
480  }
481 }
482 
483 /*
484  * try_hashjoin_path
485  * Consider a hash join path; if it appears useful, push it into
486  * the joinrel's pathlist via add_path().
487  */
488 static void
490  RelOptInfo *joinrel,
491  Path *outer_path,
492  Path *inner_path,
493  List *hashclauses,
494  JoinType jointype,
495  JoinPathExtraData *extra)
496 {
497  Relids required_outer;
498  JoinCostWorkspace workspace;
499 
500  /*
501  * Check to see if proposed path is still parameterized, and reject if the
502  * parameterization wouldn't be sensible.
503  */
504  required_outer = calc_non_nestloop_required_outer(outer_path,
505  inner_path);
506  if (required_outer &&
507  !bms_overlap(required_outer, extra->param_source_rels))
508  {
509  /* Waste no memory when we reject a path here */
510  bms_free(required_outer);
511  return;
512  }
513 
514  /*
515  * See comments in try_nestloop_path(). Also note that hashjoin paths
516  * never have any output pathkeys, per comments in create_hashjoin_path.
517  */
518  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
519  outer_path, inner_path,
520  extra->sjinfo, &extra->semifactors);
521 
522  if (add_path_precheck(joinrel,
523  workspace.startup_cost, workspace.total_cost,
524  NIL, required_outer))
525  {
526  add_path(joinrel, (Path *)
528  joinrel,
529  jointype,
530  &workspace,
531  extra->sjinfo,
532  &extra->semifactors,
533  outer_path,
534  inner_path,
535  extra->restrictlist,
536  required_outer,
537  hashclauses));
538  }
539  else
540  {
541  /* Waste no memory when we reject a path here */
542  bms_free(required_outer);
543  }
544 }
545 
546 /*
547  * try_partial_hashjoin_path
548  * Consider a partial hashjoin join path; if it appears useful, push it into
549  * the joinrel's partial_pathlist via add_partial_path().
550  */
551 static void
553  RelOptInfo *joinrel,
554  Path *outer_path,
555  Path *inner_path,
556  List *hashclauses,
557  JoinType jointype,
558  JoinPathExtraData *extra)
559 {
560  JoinCostWorkspace workspace;
561 
562  /*
563  * If the inner path is parameterized, the parameterization must be fully
564  * satisfied by the proposed outer path. Parameterized partial paths are
565  * not supported. The caller should already have verified that no
566  * extra_lateral_rels are required here.
567  */
569  if (inner_path->param_info != NULL)
570  {
571  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
572 
573  if (!bms_is_empty(inner_paramrels))
574  return;
575  }
576 
577  /*
578  * Before creating a path, get a quick lower bound on what it is likely to
579  * cost. Bail out right away if it looks terrible.
580  */
581  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
582  outer_path, inner_path,
583  extra->sjinfo, &extra->semifactors);
584  if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
585  return;
586 
587  /* Might be good enough to be worth trying, so let's try it. */
588  add_partial_path(joinrel, (Path *)
590  joinrel,
591  jointype,
592  &workspace,
593  extra->sjinfo,
594  &extra->semifactors,
595  outer_path,
596  inner_path,
597  extra->restrictlist,
598  NULL,
599  hashclauses));
600 }
601 
602 /*
603  * clause_sides_match_join
604  * Determine whether a join clause is of the right form to use in this join.
605  *
606  * We already know that the clause is a binary opclause referencing only the
607  * rels in the current join. The point here is to check whether it has the
608  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
609  * rather than mixing outer and inner vars on either side. If it matches,
610  * we set the transient flag outer_is_left to identify which side is which.
611  */
612 static inline bool
614  RelOptInfo *innerrel)
615 {
616  if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
617  bms_is_subset(rinfo->right_relids, innerrel->relids))
618  {
619  /* lefthand side is outer */
620  rinfo->outer_is_left = true;
621  return true;
622  }
623  else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
624  bms_is_subset(rinfo->right_relids, outerrel->relids))
625  {
626  /* righthand side is outer */
627  rinfo->outer_is_left = false;
628  return true;
629  }
630  return false; /* no good for these input relations */
631 }
632 
633 /*
634  * sort_inner_and_outer
635  * Create mergejoin join paths by explicitly sorting both the outer and
636  * inner join relations on each available merge ordering.
637  *
638  * 'joinrel' is the join relation
639  * 'outerrel' is the outer join relation
640  * 'innerrel' is the inner join relation
641  * 'jointype' is the type of join to do
642  * 'extra' contains additional input values
643  */
644 static void
646  RelOptInfo *joinrel,
647  RelOptInfo *outerrel,
648  RelOptInfo *innerrel,
649  JoinType jointype,
650  JoinPathExtraData *extra)
651 {
652  Path *outer_path;
653  Path *inner_path;
654  List *all_pathkeys;
655  ListCell *l;
656 
657  /*
658  * We only consider the cheapest-total-cost input paths, since we are
659  * assuming here that a sort is required. We will consider
660  * cheapest-startup-cost input paths later, and only if they don't need a
661  * sort.
662  *
663  * This function intentionally does not consider parameterized input
664  * paths, except when the cheapest-total is parameterized. If we did so,
665  * we'd have a combinatorial explosion of mergejoin paths of dubious
666  * value. This interacts with decisions elsewhere that also discriminate
667  * against mergejoins with parameterized inputs; see comments in
668  * src/backend/optimizer/README.
669  */
670  outer_path = outerrel->cheapest_total_path;
671  inner_path = innerrel->cheapest_total_path;
672 
673  /*
674  * If either cheapest-total path is parameterized by the other rel, we
675  * can't use a mergejoin. (There's no use looking for alternative input
676  * paths, since these should already be the least-parameterized available
677  * paths.)
678  */
679  if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
680  PATH_PARAM_BY_REL(inner_path, outerrel))
681  return;
682 
683  /*
684  * If unique-ification is requested, do it and then handle as a plain
685  * inner join.
686  */
687  if (jointype == JOIN_UNIQUE_OUTER)
688  {
689  outer_path = (Path *) create_unique_path(root, outerrel,
690  outer_path, extra->sjinfo);
691  Assert(outer_path);
692  jointype = JOIN_INNER;
693  }
694  else if (jointype == JOIN_UNIQUE_INNER)
695  {
696  inner_path = (Path *) create_unique_path(root, innerrel,
697  inner_path, extra->sjinfo);
698  Assert(inner_path);
699  jointype = JOIN_INNER;
700  }
701 
702  /*
703  * Each possible ordering of the available mergejoin clauses will generate
704  * a differently-sorted result path at essentially the same cost. We have
705  * no basis for choosing one over another at this level of joining, but
706  * some sort orders may be more useful than others for higher-level
707  * mergejoins, so it's worth considering multiple orderings.
708  *
709  * Actually, it's not quite true that every mergeclause ordering will
710  * generate a different path order, because some of the clauses may be
711  * partially redundant (refer to the same EquivalenceClasses). Therefore,
712  * what we do is convert the mergeclause list to a list of canonical
713  * pathkeys, and then consider different orderings of the pathkeys.
714  *
715  * Generating a path for *every* permutation of the pathkeys doesn't seem
716  * like a winning strategy; the cost in planning time is too high. For
717  * now, we generate one path for each pathkey, listing that pathkey first
718  * and the rest in random order. This should allow at least a one-clause
719  * mergejoin without re-sorting against any other possible mergejoin
720  * partner path. But if we've not guessed the right ordering of secondary
721  * keys, we may end up evaluating clauses as qpquals when they could have
722  * been done as mergeclauses. (In practice, it's rare that there's more
723  * than two or three mergeclauses, so expending a huge amount of thought
724  * on that is probably not worth it.)
725  *
726  * The pathkey order returned by select_outer_pathkeys_for_merge() has
727  * some heuristics behind it (see that function), so be sure to try it
728  * exactly as-is as well as making variants.
729  */
730  all_pathkeys = select_outer_pathkeys_for_merge(root,
731  extra->mergeclause_list,
732  joinrel);
733 
734  foreach(l, all_pathkeys)
735  {
736  List *front_pathkey = (List *) lfirst(l);
737  List *cur_mergeclauses;
738  List *outerkeys;
739  List *innerkeys;
740  List *merge_pathkeys;
741 
742  /* Make a pathkey list with this guy first */
743  if (l != list_head(all_pathkeys))
744  outerkeys = lcons(front_pathkey,
745  list_delete_ptr(list_copy(all_pathkeys),
746  front_pathkey));
747  else
748  outerkeys = all_pathkeys; /* no work at first one... */
749 
750  /* Sort the mergeclauses into the corresponding ordering */
751  cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
752  outerkeys,
753  true,
754  extra->mergeclause_list);
755 
756  /* Should have used them all... */
757  Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
758 
759  /* Build sort pathkeys for the inner side */
760  innerkeys = make_inner_pathkeys_for_merge(root,
761  cur_mergeclauses,
762  outerkeys);
763 
764  /* Build pathkeys representing output sort order */
765  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
766  outerkeys);
767 
768  /*
769  * And now we can make the path.
770  *
771  * Note: it's possible that the cheapest paths will already be sorted
772  * properly. try_mergejoin_path will detect that case and suppress an
773  * explicit sort step, so we needn't do so here.
774  */
775  try_mergejoin_path(root,
776  joinrel,
777  outer_path,
778  inner_path,
779  merge_pathkeys,
780  cur_mergeclauses,
781  outerkeys,
782  innerkeys,
783  jointype,
784  extra);
785  }
786 }
787 
788 /*
789  * generate_mergejoin_paths
790  * Creates possible mergejoin paths for input outerpath.
791  *
792  * We generate mergejoins if mergejoin clauses are available. We have
793  * two ways to generate the inner path for a mergejoin: sort the cheapest
794  * inner path, or use an inner path that is already suitably ordered for the
795  * merge. If we have several mergeclauses, it could be that there is no inner
796  * path (or only a very expensive one) for the full list of mergeclauses, but
797  * better paths exist if we truncate the mergeclause list (thereby discarding
798  * some sort key requirements). So, we consider truncations of the
799  * mergeclause list as well as the full list. (Ideally we'd consider all
800  * subsets of the mergeclause list, but that seems way too expensive.)
801  */
802 static void
804  RelOptInfo *joinrel,
805  RelOptInfo *innerrel,
806  Path *outerpath,
807  JoinType jointype,
808  JoinPathExtraData *extra,
809  bool useallclauses,
810  Path *inner_cheapest_total,
811  List *merge_pathkeys)
812 {
813  List *mergeclauses;
814  List *innersortkeys;
815  List *trialsortkeys;
816  Path *cheapest_startup_inner;
817  Path *cheapest_total_inner;
818  JoinType save_jointype = jointype;
819  int num_sortkeys;
820  int sortkeycnt;
821 
822  if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
823  jointype = JOIN_INNER;
824 
825  /* Look for useful mergeclauses (if any) */
826  mergeclauses = find_mergeclauses_for_pathkeys(root,
827  outerpath->pathkeys,
828  true,
829  extra->mergeclause_list);
830 
831  /*
832  * Done with this outer path if no chance for a mergejoin.
833  *
834  * Special corner case: for "x FULL JOIN y ON true", there will be no join
835  * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
836  * but since mergejoin is our only join type that supports FULL JOIN
837  * without any join clauses, it's necessary to generate a clauseless
838  * mergejoin path instead.
839  */
840  if (mergeclauses == NIL)
841  {
842  if (jointype == JOIN_FULL)
843  /* okay to try for mergejoin */ ;
844  else
845  return;
846  }
847  if (useallclauses &&
848  list_length(mergeclauses) != list_length(extra->mergeclause_list))
849  return;
850 
851  /* Compute the required ordering of the inner path */
852  innersortkeys = make_inner_pathkeys_for_merge(root,
853  mergeclauses,
854  outerpath->pathkeys);
855 
856  /*
857  * Generate a mergejoin on the basis of sorting the cheapest inner. Since
858  * a sort will be needed, only cheapest total cost matters. (But
859  * try_mergejoin_path will do the right thing if inner_cheapest_total is
860  * already correctly sorted.)
861  */
862  try_mergejoin_path(root,
863  joinrel,
864  outerpath,
865  inner_cheapest_total,
866  merge_pathkeys,
867  mergeclauses,
868  NIL,
869  innersortkeys,
870  jointype,
871  extra);
872 
873  /* Can't do anything else if inner path needs to be unique'd */
874  if (save_jointype == JOIN_UNIQUE_INNER)
875  return;
876 
877  /*
878  * Look for presorted inner paths that satisfy the innersortkey list ---
879  * or any truncation thereof, if we are allowed to build a mergejoin using
880  * a subset of the merge clauses. Here, we consider both cheap startup
881  * cost and cheap total cost.
882  *
883  * Currently we do not consider parameterized inner paths here. This
884  * interacts with decisions elsewhere that also discriminate against
885  * mergejoins with parameterized inputs; see comments in
886  * src/backend/optimizer/README.
887  *
888  * As we shorten the sortkey list, we should consider only paths that are
889  * strictly cheaper than (in particular, not the same as) any path found
890  * in an earlier iteration. Otherwise we'd be intentionally using fewer
891  * merge keys than a given path allows (treating the rest as plain
892  * joinquals), which is unlikely to be a good idea. Also, eliminating
893  * paths here on the basis of compare_path_costs is a lot cheaper than
894  * building the mergejoin path only to throw it away.
895  *
896  * If inner_cheapest_total is well enough sorted to have not required a
897  * sort in the path made above, we shouldn't make a duplicate path with
898  * it, either. We handle that case with the same logic that handles the
899  * previous consideration, by initializing the variables that track
900  * cheapest-so-far properly. Note that we do NOT reject
901  * inner_cheapest_total if we find it matches some shorter set of
902  * pathkeys. That case corresponds to using fewer mergekeys to avoid
903  * sorting inner_cheapest_total, whereas we did sort it above, so the
904  * plans being considered are different.
905  */
906  if (pathkeys_contained_in(innersortkeys,
907  inner_cheapest_total->pathkeys))
908  {
909  /* inner_cheapest_total didn't require a sort */
910  cheapest_startup_inner = inner_cheapest_total;
911  cheapest_total_inner = inner_cheapest_total;
912  }
913  else
914  {
915  /* it did require a sort, at least for the full set of keys */
916  cheapest_startup_inner = NULL;
917  cheapest_total_inner = NULL;
918  }
919  num_sortkeys = list_length(innersortkeys);
920  if (num_sortkeys > 1 && !useallclauses)
921  trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
922  else
923  trialsortkeys = innersortkeys; /* won't really truncate */
924 
925  for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
926  {
927  Path *innerpath;
928  List *newclauses = NIL;
929 
930  /*
931  * Look for an inner path ordered well enough for the first
932  * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
933  * destructively, which is why we made a copy...
934  */
935  trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
936  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
937  trialsortkeys,
938  NULL,
939  TOTAL_COST);
940  if (innerpath != NULL &&
941  (cheapest_total_inner == NULL ||
942  compare_path_costs(innerpath, cheapest_total_inner,
943  TOTAL_COST) < 0))
944  {
945  /* Found a cheap (or even-cheaper) sorted path */
946  /* Select the right mergeclauses, if we didn't already */
947  if (sortkeycnt < num_sortkeys)
948  {
949  newclauses =
951  trialsortkeys,
952  false,
953  mergeclauses);
954  Assert(newclauses != NIL);
955  }
956  else
957  newclauses = mergeclauses;
958  try_mergejoin_path(root,
959  joinrel,
960  outerpath,
961  innerpath,
962  merge_pathkeys,
963  newclauses,
964  NIL,
965  NIL,
966  jointype,
967  extra);
968  cheapest_total_inner = innerpath;
969  }
970  /* Same on the basis of cheapest startup cost ... */
971  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
972  trialsortkeys,
973  NULL,
974  STARTUP_COST);
975  if (innerpath != NULL &&
976  (cheapest_startup_inner == NULL ||
977  compare_path_costs(innerpath, cheapest_startup_inner,
978  STARTUP_COST) < 0))
979  {
980  /* Found a cheap (or even-cheaper) sorted path */
981  if (innerpath != cheapest_total_inner)
982  {
983  /*
984  * Avoid rebuilding clause list if we already made one; saves
985  * memory in big join trees...
986  */
987  if (newclauses == NIL)
988  {
989  if (sortkeycnt < num_sortkeys)
990  {
991  newclauses =
993  trialsortkeys,
994  false,
995  mergeclauses);
996  Assert(newclauses != NIL);
997  }
998  else
999  newclauses = mergeclauses;
1000  }
1001  try_mergejoin_path(root,
1002  joinrel,
1003  outerpath,
1004  innerpath,
1005  merge_pathkeys,
1006  newclauses,
1007  NIL,
1008  NIL,
1009  jointype,
1010  extra);
1011  }
1012  cheapest_startup_inner = innerpath;
1013  }
1014 
1015  /*
1016  * Don't consider truncated sortkeys if we need all clauses.
1017  */
1018  if (useallclauses)
1019  break;
1020  }
1021 }
1022 
1023 /*
1024  * match_unsorted_outer
1025  * Creates possible join paths for processing a single join relation
1026  * 'joinrel' by employing either iterative substitution or
1027  * mergejoining on each of its possible outer paths (considering
1028  * only outer paths that are already ordered well enough for merging).
1029  *
1030  * We always generate a nestloop path for each available outer path.
1031  * In fact we may generate as many as five: one on the cheapest-total-cost
1032  * inner path, one on the same with materialization, one on the
1033  * cheapest-startup-cost inner path (if different), one on the
1034  * cheapest-total inner-indexscan path (if any), and one on the
1035  * cheapest-startup inner-indexscan path (if different).
1036  *
1037  * We also consider mergejoins if mergejoin clauses are available. See
1038  * detailed comments in generate_mergejoin_paths.
1039  *
1040  * 'joinrel' is the join relation
1041  * 'outerrel' is the outer join relation
1042  * 'innerrel' is the inner join relation
1043  * 'jointype' is the type of join to do
1044  * 'extra' contains additional input values
1045  */
1046 static void
1048  RelOptInfo *joinrel,
1049  RelOptInfo *outerrel,
1050  RelOptInfo *innerrel,
1051  JoinType jointype,
1052  JoinPathExtraData *extra)
1053 {
1054  JoinType save_jointype = jointype;
1055  bool nestjoinOK;
1056  bool useallclauses;
1057  Path *inner_cheapest_total = innerrel->cheapest_total_path;
1058  Path *matpath = NULL;
1059  ListCell *lc1;
1060 
1061  /*
1062  * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1063  * are doing a right or full mergejoin, we must use *all* the mergeclauses
1064  * as join clauses, else we will not have a valid plan. (Although these
1065  * two flags are currently inverses, keep them separate for clarity and
1066  * possible future changes.)
1067  */
1068  switch (jointype)
1069  {
1070  case JOIN_INNER:
1071  case JOIN_LEFT:
1072  case JOIN_SEMI:
1073  case JOIN_ANTI:
1074  nestjoinOK = true;
1075  useallclauses = false;
1076  break;
1077  case JOIN_RIGHT:
1078  case JOIN_FULL:
1079  nestjoinOK = false;
1080  useallclauses = true;
1081  break;
1082  case JOIN_UNIQUE_OUTER:
1083  case JOIN_UNIQUE_INNER:
1084  jointype = JOIN_INNER;
1085  nestjoinOK = true;
1086  useallclauses = false;
1087  break;
1088  default:
1089  elog(ERROR, "unrecognized join type: %d",
1090  (int) jointype);
1091  nestjoinOK = false; /* keep compiler quiet */
1092  useallclauses = false;
1093  break;
1094  }
1095 
1096  /*
1097  * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1098  * we will consider it below as a member of cheapest_parameterized_paths,
1099  * but the other possibilities considered in this routine aren't usable.
1100  */
1101  if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1102  inner_cheapest_total = NULL;
1103 
1104  /*
1105  * If we need to unique-ify the inner path, we will consider only the
1106  * cheapest-total inner.
1107  */
1108  if (save_jointype == JOIN_UNIQUE_INNER)
1109  {
1110  /* No way to do this with an inner path parameterized by outer rel */
1111  if (inner_cheapest_total == NULL)
1112  return;
1113  inner_cheapest_total = (Path *)
1114  create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1115  Assert(inner_cheapest_total);
1116  }
1117  else if (nestjoinOK)
1118  {
1119  /*
1120  * Consider materializing the cheapest inner path, unless
1121  * enable_material is off or the path in question materializes its
1122  * output anyway.
1123  */
1124  if (enable_material && inner_cheapest_total != NULL &&
1125  !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1126  matpath = (Path *)
1127  create_material_path(innerrel, inner_cheapest_total);
1128  }
1129 
1130  foreach(lc1, outerrel->pathlist)
1131  {
1132  Path *outerpath = (Path *) lfirst(lc1);
1133  List *merge_pathkeys;
1134 
1135  /*
1136  * We cannot use an outer path that is parameterized by the inner rel.
1137  */
1138  if (PATH_PARAM_BY_REL(outerpath, innerrel))
1139  continue;
1140 
1141  /*
1142  * If we need to unique-ify the outer path, it's pointless to consider
1143  * any but the cheapest outer. (XXX we don't consider parameterized
1144  * outers, nor inners, for unique-ified cases. Should we?)
1145  */
1146  if (save_jointype == JOIN_UNIQUE_OUTER)
1147  {
1148  if (outerpath != outerrel->cheapest_total_path)
1149  continue;
1150  outerpath = (Path *) create_unique_path(root, outerrel,
1151  outerpath, extra->sjinfo);
1152  Assert(outerpath);
1153  }
1154 
1155  /*
1156  * The result will have this sort order (even if it is implemented as
1157  * a nestloop, and even if some of the mergeclauses are implemented by
1158  * qpquals rather than as true mergeclauses):
1159  */
1160  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1161  outerpath->pathkeys);
1162 
1163  if (save_jointype == JOIN_UNIQUE_INNER)
1164  {
1165  /*
1166  * Consider nestloop join, but only with the unique-ified cheapest
1167  * inner path
1168  */
1169  try_nestloop_path(root,
1170  joinrel,
1171  outerpath,
1172  inner_cheapest_total,
1173  merge_pathkeys,
1174  jointype,
1175  extra);
1176  }
1177  else if (nestjoinOK)
1178  {
1179  /*
1180  * Consider nestloop joins using this outer path and various
1181  * available paths for the inner relation. We consider the
1182  * cheapest-total paths for each available parameterization of the
1183  * inner relation, including the unparameterized case.
1184  */
1185  ListCell *lc2;
1186 
1187  foreach(lc2, innerrel->cheapest_parameterized_paths)
1188  {
1189  Path *innerpath = (Path *) lfirst(lc2);
1190 
1191  try_nestloop_path(root,
1192  joinrel,
1193  outerpath,
1194  innerpath,
1195  merge_pathkeys,
1196  jointype,
1197  extra);
1198  }
1199 
1200  /* Also consider materialized form of the cheapest inner path */
1201  if (matpath != NULL)
1202  try_nestloop_path(root,
1203  joinrel,
1204  outerpath,
1205  matpath,
1206  merge_pathkeys,
1207  jointype,
1208  extra);
1209  }
1210 
1211  /* Can't do anything else if outer path needs to be unique'd */
1212  if (save_jointype == JOIN_UNIQUE_OUTER)
1213  continue;
1214 
1215  /* Can't do anything else if inner rel is parameterized by outer */
1216  if (inner_cheapest_total == NULL)
1217  continue;
1218 
1219  /* Generate merge join paths */
1220  generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1221  save_jointype, extra, useallclauses,
1222  inner_cheapest_total, merge_pathkeys);
1223  }
1224 
1225  /*
1226  * If the joinrel is parallel-safe and the join type supports nested
1227  * loops, we may be able to consider a partial nestloop plan. However, we
1228  * can't handle JOIN_UNIQUE_OUTER, because the outer path will be partial,
1229  * and therefore we won't be able to properly guarantee uniqueness. Nor
1230  * can we handle extra_lateral_rels, since partial paths must not be
1231  * parameterized.
1232  */
1233  if (joinrel->consider_parallel && nestjoinOK &&
1234  save_jointype != JOIN_UNIQUE_OUTER &&
1235  bms_is_empty(joinrel->lateral_relids))
1236  consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1237  save_jointype, extra);
1238 }
1239 
1240 /*
1241  * consider_parallel_nestloop
1242  * Try to build partial paths for a joinrel by joining a partial path for the
1243  * outer relation to a complete path for the inner relation.
1244  *
1245  * 'joinrel' is the join relation
1246  * 'outerrel' is the outer join relation
1247  * 'innerrel' is the inner join relation
1248  * 'jointype' is the type of join to do
1249  * 'extra' contains additional input values
1250  */
1251 static void
1253  RelOptInfo *joinrel,
1254  RelOptInfo *outerrel,
1255  RelOptInfo *innerrel,
1256  JoinType jointype,
1257  JoinPathExtraData *extra)
1258 {
1259  JoinType save_jointype = jointype;
1260  ListCell *lc1;
1261 
1262  if (jointype == JOIN_UNIQUE_INNER)
1263  jointype = JOIN_INNER;
1264 
1265  foreach(lc1, outerrel->partial_pathlist)
1266  {
1267  Path *outerpath = (Path *) lfirst(lc1);
1268  List *pathkeys;
1269  ListCell *lc2;
1270 
1271  /* Figure out what useful ordering any paths we create will have. */
1272  pathkeys = build_join_pathkeys(root, joinrel, jointype,
1273  outerpath->pathkeys);
1274 
1275  /*
1276  * Try the cheapest parameterized paths; only those which will produce
1277  * an unparameterized path when joined to this outerrel will survive
1278  * try_partial_nestloop_path. The cheapest unparameterized path is
1279  * also in this list.
1280  */
1281  foreach(lc2, innerrel->cheapest_parameterized_paths)
1282  {
1283  Path *innerpath = (Path *) lfirst(lc2);
1284 
1285  /* Can't join to an inner path that is not parallel-safe */
1286  if (!innerpath->parallel_safe)
1287  continue;
1288 
1289  /*
1290  * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1291  * cheapest_total_path, and we have to unique-ify it. (We might
1292  * be able to relax this to allow other safe, unparameterized
1293  * inner paths, but right now create_unique_path is not on board
1294  * with that.)
1295  */
1296  if (save_jointype == JOIN_UNIQUE_INNER)
1297  {
1298  if (innerpath != innerrel->cheapest_total_path)
1299  continue;
1300  innerpath = (Path *) create_unique_path(root, innerrel,
1301  innerpath,
1302  extra->sjinfo);
1303  Assert(innerpath);
1304  }
1305 
1306  try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1307  pathkeys, jointype, extra);
1308  }
1309  }
1310 }
1311 
1312 /*
1313  * hash_inner_and_outer
1314  * Create hashjoin join paths by explicitly hashing both the outer and
1315  * inner keys of each available hash clause.
1316  *
1317  * 'joinrel' is the join relation
1318  * 'outerrel' is the outer join relation
1319  * 'innerrel' is the inner join relation
1320  * 'jointype' is the type of join to do
1321  * 'extra' contains additional input values
1322  */
1323 static void
1325  RelOptInfo *joinrel,
1326  RelOptInfo *outerrel,
1327  RelOptInfo *innerrel,
1328  JoinType jointype,
1329  JoinPathExtraData *extra)
1330 {
1331  JoinType save_jointype = jointype;
1332  bool isouterjoin = IS_OUTER_JOIN(jointype);
1333  List *hashclauses;
1334  ListCell *l;
1335 
1336  /*
1337  * We need to build only one hashclauses list for any given pair of outer
1338  * and inner relations; all of the hashable clauses will be used as keys.
1339  *
1340  * Scan the join's restrictinfo list to find hashjoinable clauses that are
1341  * usable with this pair of sub-relations.
1342  */
1343  hashclauses = NIL;
1344  foreach(l, extra->restrictlist)
1345  {
1346  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1347 
1348  /*
1349  * If processing an outer join, only use its own join clauses for
1350  * hashing. For inner joins we need not be so picky.
1351  */
1352  if (isouterjoin && restrictinfo->is_pushed_down)
1353  continue;
1354 
1355  if (!restrictinfo->can_join ||
1356  restrictinfo->hashjoinoperator == InvalidOid)
1357  continue; /* not hashjoinable */
1358 
1359  /*
1360  * Check if clause has the form "outer op inner" or "inner op outer".
1361  */
1362  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1363  continue; /* no good for these input relations */
1364 
1365  hashclauses = lappend(hashclauses, restrictinfo);
1366  }
1367 
1368  /* If we found any usable hashclauses, make paths */
1369  if (hashclauses)
1370  {
1371  /*
1372  * We consider both the cheapest-total-cost and cheapest-startup-cost
1373  * outer paths. There's no need to consider any but the
1374  * cheapest-total-cost inner path, however.
1375  */
1376  Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1377  Path *cheapest_total_outer = outerrel->cheapest_total_path;
1378  Path *cheapest_total_inner = innerrel->cheapest_total_path;
1379 
1380  /*
1381  * If either cheapest-total path is parameterized by the other rel, we
1382  * can't use a hashjoin. (There's no use looking for alternative
1383  * input paths, since these should already be the least-parameterized
1384  * available paths.)
1385  */
1386  if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1387  PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1388  return;
1389 
1390  /* Unique-ify if need be; we ignore parameterized possibilities */
1391  if (jointype == JOIN_UNIQUE_OUTER)
1392  {
1393  cheapest_total_outer = (Path *)
1394  create_unique_path(root, outerrel,
1395  cheapest_total_outer, extra->sjinfo);
1396  Assert(cheapest_total_outer);
1397  jointype = JOIN_INNER;
1398  try_hashjoin_path(root,
1399  joinrel,
1400  cheapest_total_outer,
1401  cheapest_total_inner,
1402  hashclauses,
1403  jointype,
1404  extra);
1405  /* no possibility of cheap startup here */
1406  }
1407  else if (jointype == JOIN_UNIQUE_INNER)
1408  {
1409  cheapest_total_inner = (Path *)
1410  create_unique_path(root, innerrel,
1411  cheapest_total_inner, extra->sjinfo);
1412  Assert(cheapest_total_inner);
1413  jointype = JOIN_INNER;
1414  try_hashjoin_path(root,
1415  joinrel,
1416  cheapest_total_outer,
1417  cheapest_total_inner,
1418  hashclauses,
1419  jointype,
1420  extra);
1421  if (cheapest_startup_outer != NULL &&
1422  cheapest_startup_outer != cheapest_total_outer)
1423  try_hashjoin_path(root,
1424  joinrel,
1425  cheapest_startup_outer,
1426  cheapest_total_inner,
1427  hashclauses,
1428  jointype,
1429  extra);
1430  }
1431  else
1432  {
1433  /*
1434  * For other jointypes, we consider the cheapest startup outer
1435  * together with the cheapest total inner, and then consider
1436  * pairings of cheapest-total paths including parameterized ones.
1437  * There is no use in generating parameterized paths on the basis
1438  * of possibly cheap startup cost, so this is sufficient.
1439  */
1440  ListCell *lc1;
1441  ListCell *lc2;
1442 
1443  if (cheapest_startup_outer != NULL)
1444  try_hashjoin_path(root,
1445  joinrel,
1446  cheapest_startup_outer,
1447  cheapest_total_inner,
1448  hashclauses,
1449  jointype,
1450  extra);
1451 
1452  foreach(lc1, outerrel->cheapest_parameterized_paths)
1453  {
1454  Path *outerpath = (Path *) lfirst(lc1);
1455 
1456  /*
1457  * We cannot use an outer path that is parameterized by the
1458  * inner rel.
1459  */
1460  if (PATH_PARAM_BY_REL(outerpath, innerrel))
1461  continue;
1462 
1463  foreach(lc2, innerrel->cheapest_parameterized_paths)
1464  {
1465  Path *innerpath = (Path *) lfirst(lc2);
1466 
1467  /*
1468  * We cannot use an inner path that is parameterized by
1469  * the outer rel, either.
1470  */
1471  if (PATH_PARAM_BY_REL(innerpath, outerrel))
1472  continue;
1473 
1474  if (outerpath == cheapest_startup_outer &&
1475  innerpath == cheapest_total_inner)
1476  continue; /* already tried it */
1477 
1478  try_hashjoin_path(root,
1479  joinrel,
1480  outerpath,
1481  innerpath,
1482  hashclauses,
1483  jointype,
1484  extra);
1485  }
1486  }
1487  }
1488 
1489  /*
1490  * If the joinrel is parallel-safe, we may be able to consider a
1491  * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
1492  * because the outer path will be partial, and therefore we won't be
1493  * able to properly guarantee uniqueness. Similarly, we can't handle
1494  * JOIN_FULL and JOIN_RIGHT, because they can produce false null
1495  * extended rows. Also, the resulting path must not be parameterized.
1496  */
1497  if (joinrel->consider_parallel &&
1498  save_jointype != JOIN_UNIQUE_OUTER &&
1499  save_jointype != JOIN_FULL &&
1500  save_jointype != JOIN_RIGHT &&
1501  outerrel->partial_pathlist != NIL &&
1502  bms_is_empty(joinrel->lateral_relids))
1503  {
1504  Path *cheapest_partial_outer;
1505  Path *cheapest_safe_inner = NULL;
1506 
1507  cheapest_partial_outer =
1508  (Path *) linitial(outerrel->partial_pathlist);
1509 
1510  /*
1511  * Normally, given that the joinrel is parallel-safe, the cheapest
1512  * total inner path will also be parallel-safe, but if not, we'll
1513  * have to search cheapest_parameterized_paths for the cheapest
1514  * safe, unparameterized inner path. If doing JOIN_UNIQUE_INNER,
1515  * we can't use any alternative inner path.
1516  */
1517  if (cheapest_total_inner->parallel_safe)
1518  cheapest_safe_inner = cheapest_total_inner;
1519  else if (save_jointype != JOIN_UNIQUE_INNER)
1520  {
1521  ListCell *lc;
1522 
1523  foreach(lc, innerrel->cheapest_parameterized_paths)
1524  {
1525  Path *innerpath = (Path *) lfirst(lc);
1526 
1527  if (innerpath->parallel_safe &&
1528  bms_is_empty(PATH_REQ_OUTER(innerpath)))
1529  {
1530  cheapest_safe_inner = innerpath;
1531  break;
1532  }
1533  }
1534  }
1535 
1536  if (cheapest_safe_inner != NULL)
1537  try_partial_hashjoin_path(root, joinrel,
1538  cheapest_partial_outer,
1539  cheapest_safe_inner,
1540  hashclauses, jointype, extra);
1541  }
1542  }
1543 }
1544 
1545 /*
1546  * select_mergejoin_clauses
1547  * Select mergejoin clauses that are usable for a particular join.
1548  * Returns a list of RestrictInfo nodes for those clauses.
1549  *
1550  * *mergejoin_allowed is normally set to TRUE, but it is set to FALSE if
1551  * this is a right/full join and there are nonmergejoinable join clauses.
1552  * The executor's mergejoin machinery cannot handle such cases, so we have
1553  * to avoid generating a mergejoin plan. (Note that this flag does NOT
1554  * consider whether there are actually any mergejoinable clauses. This is
1555  * correct because in some cases we need to build a clauseless mergejoin.
1556  * Simply returning NIL is therefore not enough to distinguish safe from
1557  * unsafe cases.)
1558  *
1559  * We also mark each selected RestrictInfo to show which side is currently
1560  * being considered as outer. These are transient markings that are only
1561  * good for the duration of the current add_paths_to_joinrel() call!
1562  *
1563  * We examine each restrictinfo clause known for the join to see
1564  * if it is mergejoinable and involves vars from the two sub-relations
1565  * currently of interest.
1566  */
1567 static List *
1569  RelOptInfo *joinrel,
1570  RelOptInfo *outerrel,
1571  RelOptInfo *innerrel,
1572  List *restrictlist,
1573  JoinType jointype,
1574  bool *mergejoin_allowed)
1575 {
1576  List *result_list = NIL;
1577  bool isouterjoin = IS_OUTER_JOIN(jointype);
1578  bool have_nonmergeable_joinclause = false;
1579  ListCell *l;
1580 
1581  foreach(l, restrictlist)
1582  {
1583  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1584 
1585  /*
1586  * If processing an outer join, only use its own join clauses in the
1587  * merge. For inner joins we can use pushed-down clauses too. (Note:
1588  * we don't set have_nonmergeable_joinclause here because pushed-down
1589  * clauses will become otherquals not joinquals.)
1590  */
1591  if (isouterjoin && restrictinfo->is_pushed_down)
1592  continue;
1593 
1594  /* Check that clause is a mergeable operator clause */
1595  if (!restrictinfo->can_join ||
1596  restrictinfo->mergeopfamilies == NIL)
1597  {
1598  /*
1599  * The executor can handle extra joinquals that are constants, but
1600  * not anything else, when doing right/full merge join. (The
1601  * reason to support constants is so we can do FULL JOIN ON
1602  * FALSE.)
1603  */
1604  if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
1605  have_nonmergeable_joinclause = true;
1606  continue; /* not mergejoinable */
1607  }
1608 
1609  /*
1610  * Check if clause has the form "outer op inner" or "inner op outer".
1611  */
1612  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1613  {
1614  have_nonmergeable_joinclause = true;
1615  continue; /* no good for these input relations */
1616  }
1617 
1618  /*
1619  * Insist that each side have a non-redundant eclass. This
1620  * restriction is needed because various bits of the planner expect
1621  * that each clause in a merge be associable with some pathkey in a
1622  * canonical pathkey list, but redundant eclasses can't appear in
1623  * canonical sort orderings. (XXX it might be worth relaxing this,
1624  * but not enough time to address it for 8.3.)
1625  *
1626  * Note: it would be bad if this condition failed for an otherwise
1627  * mergejoinable FULL JOIN clause, since that would result in
1628  * undesirable planner failure. I believe that is not possible
1629  * however; a variable involved in a full join could only appear in
1630  * below_outer_join eclasses, which aren't considered redundant.
1631  *
1632  * This case *can* happen for left/right join clauses: the outer-side
1633  * variable could be equated to a constant. Because we will propagate
1634  * that constant across the join clause, the loss of ability to do a
1635  * mergejoin is not really all that big a deal, and so it's not clear
1636  * that improving this is important.
1637  */
1638  update_mergeclause_eclasses(root, restrictinfo);
1639 
1640  if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1641  EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1642  {
1643  have_nonmergeable_joinclause = true;
1644  continue; /* can't handle redundant eclasses */
1645  }
1646 
1647  result_list = lappend(result_list, restrictinfo);
1648  }
1649 
1650  /*
1651  * Report whether mergejoin is allowed (see comment at top of function).
1652  */
1653  switch (jointype)
1654  {
1655  case JOIN_RIGHT:
1656  case JOIN_FULL:
1657  *mergejoin_allowed = !have_nonmergeable_joinclause;
1658  break;
1659  default:
1660  *mergejoin_allowed = true;
1661  break;
1662  }
1663 
1664  return result_list;
1665 }
#define NIL
Definition: pg_list.h:69
void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors)
Definition: costsize.c:1901
#define IsA(nodeptr, _type_)
Definition: nodes.h:559
static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:489
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:412
SemiAntiJoinFactors semifactors
Definition: relation.h:2052
List * join_info_list
Definition: relation.h:247
Relids min_righthand
Definition: relation.h:1808
static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:279
bool add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost, List *pathkeys)
Definition: pathnode.c:876
bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer)
Definition: pathnode.c:647
struct Path * cheapest_startup_path
Definition: relation.h:507
MaterialPath * create_material_path(RelOptInfo *rel, Path *subpath)
Definition: pathnode.c:1385
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:283
static void try_partial_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:552
List * list_truncate(List *list, int new_size)
Definition: list.c:350
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:714
ParamPathInfo * param_info
Definition: relation.h:897
Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
Definition: pathnode.c:1912
List * list_copy(const List *oldlist)
Definition: list.c:1160
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1265
List * partial_pathlist
Definition: relation.h:506
Relids left_relids
Definition: relation.h:1664
EquivalenceClass * right_ec
Definition: relation.h:1686
List * cheapest_parameterized_paths
Definition: relation.h:510
UniquePath * create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo)
Definition: pathnode.c:1424
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:795
List * mergeopfamilies
Definition: relation.h:1682
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1324
JoinType
Definition: nodes.h:665
NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer)
Definition: pathnode.c:1946
NodeTag pathtype
Definition: relation.h:892
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath, JoinType jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, List *merge_pathkeys)
Definition: joinpath.c:803
MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, List *outersortkeys, List *innersortkeys)
Definition: pathnode.c:2034
void(* set_join_pathlist_hook_type)(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: paths.h:36
Relids lateral_relids
Definition: relation.h:515
HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors, Path *outer_path, Path *inner_path, List *restrict_clauses, Relids required_outer, List *hashclauses)
Definition: pathnode.c:2098
SpecialJoinInfo * sjinfo
Definition: relation.h:2051
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:645
#define linitial(l)
Definition: pg_list.h:110
Relids all_baserels
Definition: relation.h:193
#define ERROR
Definition: elog.h:43
GetForeignJoinPaths_function GetForeignJoinPaths
Definition: fdwapi.h:188
Relids param_source_rels
Definition: relation.h:2053
Bitmapset * bms_join(Bitmapset *a, Bitmapset *b)
Definition: bitmapset.c:808
bool can_join
Definition: relation.h:1643
RelOptInfo * parent
Definition: relation.h:894
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:307
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:61
struct Path * cheapest_total_path
Definition: relation.h:508
static void try_partial_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:354
bool outer_is_left
Definition: relation.h:1692
struct FdwRoutine * fdwroutine
Definition: relation.h:540
static void try_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:410
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
List * mergeclause_list
Definition: relation.h:2050
Relids relids
Definition: relation.h:490
void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, SpecialJoinInfo *sjinfo, SemiAntiJoinFactors *semifactors)
Definition: costsize.c:2686
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: relation.h:733
List * lappend(List *list, void *datum)
Definition: list.c:128
static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1252
Expr * clause
Definition: relation.h:1637
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:633
static List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed)
Definition: joinpath.c:1568
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:317
void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, SpecialJoinInfo *sjinfo)
Definition: costsize.c:2167
List * restrictlist
Definition: relation.h:2049
set_join_pathlist_hook_type set_join_pathlist_hook
Definition: joinpath.c:26
#define InvalidOid
Definition: postgres_ext.h:36
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1093
bool is_pushed_down
Definition: relation.h:1639
#define PATH_PARAM_BY_REL(path, rel)
Definition: joinpath.c:28
List * lcons(void *datum, List *list)
Definition: list.c:259
List * pathkeys
Definition: relation.h:909
void bms_free(Bitmapset *a)
Definition: bitmapset.c:200
bool enable_mergejoin
Definition: costsize.c:127
Relids right_relids
Definition: relation.h:1665
#define NULL
Definition: c.h:226
#define Assert(condition)
Definition: c.h:671
#define lfirst(lc)
Definition: pg_list.h:106
void compute_semi_anti_join_factors(PlannerInfo *root, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors)
Definition: costsize.c:3506
bool parallel_safe
Definition: relation.h:900
static bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel)
Definition: joinpath.c:613
#define PATH_REQ_OUTER(path)
Definition: relation.h:914
JoinType jointype
Definition: relation.h:1811
Oid hashjoinoperator
Definition: relation.h:1695
static int list_length(const List *l)
Definition: pg_list.h:89
bool consider_parallel
Definition: relation.h:498
bool enable_hashjoin
Definition: costsize.c:128
List * find_mergeclauses_for_pathkeys(PlannerInfo *root, List *pathkeys, bool outer_keys, List *restrictinfos)
Definition: pathkeys.c:976
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:442
EquivalenceClass * left_ec
Definition: relation.h:1685
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:752
void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist)
Definition: joinpath.c:88
bool ExecMaterializesOutput(NodeTag plantype)
Definition: execAmi.c:561
List * pathlist
Definition: relation.h:504
Relids ppi_req_outer
Definition: relation.h:853
#define elog
Definition: elog.h:219
static bool allow_star_schema_join(PlannerInfo *root, Path *outer_path, Path *inner_path)
Definition: joinpath.c:258
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion)
Definition: pathkeys.c:342
Definition: pg_list.h:45
Relids min_lefthand
Definition: relation.h:1807
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:941
Definition: relation.h:888
Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
Definition: pathnode.c:1880
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:725
bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params)
Definition: joinrels.c:1132
bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:464
bool enable_material
Definition: costsize.c:126
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1047