PostgreSQL Source Code  git master
joinpath.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  * Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  * src/backend/optimizer/path/joinpath.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <math.h>
18 
19 #include "executor/executor.h"
20 #include "foreign/fdwapi.h"
21 #include "nodes/nodeFuncs.h"
22 #include "optimizer/cost.h"
23 #include "optimizer/optimizer.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/planmain.h"
27 #include "utils/typcache.h"
28 
29 /* Hook for plugins to get control in add_paths_to_joinrel() */
31 
32 /*
33  * Paths parameterized by the parent can be considered to be parameterized by
34  * any of its child.
35  */
36 #define PATH_PARAM_BY_PARENT(path, rel) \
37  ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
38  (rel)->top_parent_relids))
39 #define PATH_PARAM_BY_REL_SELF(path, rel) \
40  ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
41 
42 #define PATH_PARAM_BY_REL(path, rel) \
43  (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
44 
45 static void try_partial_mergejoin_path(PlannerInfo *root,
46  RelOptInfo *joinrel,
47  Path *outer_path,
48  Path *inner_path,
49  List *pathkeys,
50  List *mergeclauses,
51  List *outersortkeys,
52  List *innersortkeys,
53  JoinType jointype,
54  JoinPathExtraData *extra);
55 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
56  RelOptInfo *outerrel, RelOptInfo *innerrel,
57  JoinType jointype, JoinPathExtraData *extra);
58 static inline bool clause_sides_match_join(RestrictInfo *rinfo,
59  RelOptInfo *outerrel,
60  RelOptInfo *innerrel);
61 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
62  RelOptInfo *outerrel, RelOptInfo *innerrel,
63  JoinType jointype, JoinPathExtraData *extra);
64 static void consider_parallel_nestloop(PlannerInfo *root,
65  RelOptInfo *joinrel,
66  RelOptInfo *outerrel,
67  RelOptInfo *innerrel,
68  JoinType jointype,
69  JoinPathExtraData *extra);
71  RelOptInfo *joinrel,
72  RelOptInfo *outerrel,
73  RelOptInfo *innerrel,
74  JoinType jointype,
75  JoinPathExtraData *extra,
76  Path *inner_cheapest_total);
77 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
78  RelOptInfo *outerrel, RelOptInfo *innerrel,
79  JoinType jointype, JoinPathExtraData *extra);
81  RelOptInfo *joinrel,
82  RelOptInfo *outerrel,
83  RelOptInfo *innerrel,
84  List *restrictlist,
85  JoinType jointype,
86  bool *mergejoin_allowed);
87 static void generate_mergejoin_paths(PlannerInfo *root,
88  RelOptInfo *joinrel,
89  RelOptInfo *innerrel,
90  Path *outerpath,
91  JoinType jointype,
92  JoinPathExtraData *extra,
93  bool useallclauses,
94  Path *inner_cheapest_total,
95  List *merge_pathkeys,
96  bool is_partial);
97 
98 
99 /*
100  * add_paths_to_joinrel
101  * Given a join relation and two component rels from which it can be made,
102  * consider all possible paths that use the two component rels as outer
103  * and inner rel respectively. Add these paths to the join rel's pathlist
104  * if they survive comparison with other paths (and remove any existing
105  * paths that are dominated by these paths).
106  *
107  * Modifies the pathlist field of the joinrel node to contain the best
108  * paths found so far.
109  *
110  * jointype is not necessarily the same as sjinfo->jointype; it might be
111  * "flipped around" if we are considering joining the rels in the opposite
112  * direction from what's indicated in sjinfo.
113  *
114  * Also, this routine and others in this module accept the special JoinTypes
115  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
116  * unique-ify the outer or inner relation and then apply a regular inner
117  * join. These values are not allowed to propagate outside this module,
118  * however. Path cost estimation code may need to recognize that it's
119  * dealing with such a case --- the combination of nominal jointype INNER
120  * with sjinfo->jointype == JOIN_SEMI indicates that.
121  */
122 void
124  RelOptInfo *joinrel,
125  RelOptInfo *outerrel,
126  RelOptInfo *innerrel,
127  JoinType jointype,
128  SpecialJoinInfo *sjinfo,
129  List *restrictlist)
130 {
131  JoinPathExtraData extra;
132  bool mergejoin_allowed = true;
133  ListCell *lc;
134  Relids joinrelids;
135 
136  /*
137  * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
138  * between child relations, even if there is a SpecialJoinInfo node for
139  * the join between the topmost parents. So, while calculating Relids set
140  * representing the restriction, consider relids of topmost parent of
141  * partitions.
142  */
143  if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
144  joinrelids = joinrel->top_parent_relids;
145  else
146  joinrelids = joinrel->relids;
147 
148  extra.restrictlist = restrictlist;
149  extra.mergeclause_list = NIL;
150  extra.sjinfo = sjinfo;
151  extra.param_source_rels = NULL;
152 
153  /*
154  * See if the inner relation is provably unique for this outer rel.
155  *
156  * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
157  * matter since the executor can make the equivalent optimization anyway;
158  * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
159  * must be considering a semijoin whose inner side is not provably unique
160  * (else reduce_unique_semijoins would've simplified it), so there's no
161  * point in calling innerrel_is_unique. However, if the LHS covers all of
162  * the semijoin's min_lefthand, then it's appropriate to set inner_unique
163  * because the path produced by create_unique_path will be unique relative
164  * to the LHS. (If we have an LHS that's only part of the min_lefthand,
165  * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
166  * letting that value escape this module.
167  */
168  switch (jointype)
169  {
170  case JOIN_SEMI:
171  case JOIN_ANTI:
172 
173  /*
174  * XXX it may be worth proving this to allow a Memoize to be
175  * considered for Nested Loop Semi/Anti Joins.
176  */
177  extra.inner_unique = false; /* well, unproven */
178  break;
179  case JOIN_UNIQUE_INNER:
180  extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
181  outerrel->relids);
182  break;
183  case JOIN_UNIQUE_OUTER:
184  extra.inner_unique = innerrel_is_unique(root,
185  joinrel->relids,
186  outerrel->relids,
187  innerrel,
188  JOIN_INNER,
189  restrictlist,
190  false);
191  break;
192  default:
193  extra.inner_unique = innerrel_is_unique(root,
194  joinrel->relids,
195  outerrel->relids,
196  innerrel,
197  jointype,
198  restrictlist,
199  false);
200  break;
201  }
202 
203  /*
204  * Find potential mergejoin clauses. We can skip this if we are not
205  * interested in doing a mergejoin. However, mergejoin may be our only
206  * way of implementing a full outer join, so override enable_mergejoin if
207  * it's a full join.
208  */
209  if (enable_mergejoin || jointype == JOIN_FULL)
211  joinrel,
212  outerrel,
213  innerrel,
214  restrictlist,
215  jointype,
216  &mergejoin_allowed);
217 
218  /*
219  * If it's SEMI, ANTI, or inner_unique join, compute correction factors
220  * for cost estimation. These will be the same for all paths.
221  */
222  if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
223  compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
224  jointype, sjinfo, restrictlist,
225  &extra.semifactors);
226 
227  /*
228  * Decide whether it's sensible to generate parameterized paths for this
229  * joinrel, and if so, which relations such paths should require. There
230  * is usually no need to create a parameterized result path unless there
231  * is a join order restriction that prevents joining one of our input rels
232  * directly to the parameter source rel instead of joining to the other
233  * input rel. (But see allow_star_schema_join().) This restriction
234  * reduces the number of parameterized paths we have to deal with at
235  * higher join levels, without compromising the quality of the resulting
236  * plan. We express the restriction as a Relids set that must overlap the
237  * parameterization of any proposed join path.
238  */
239  foreach(lc, root->join_info_list)
240  {
241  SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
242 
243  /*
244  * SJ is relevant to this join if we have some part of its RHS
245  * (possibly not all of it), and haven't yet joined to its LHS. (This
246  * test is pretty simplistic, but should be sufficient considering the
247  * join has already been proven legal.) If the SJ is relevant, it
248  * presents constraints for joining to anything not in its RHS.
249  */
250  if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
251  !bms_overlap(joinrelids, sjinfo2->min_lefthand))
254  sjinfo2->min_righthand));
255 
256  /* full joins constrain both sides symmetrically */
257  if (sjinfo2->jointype == JOIN_FULL &&
258  bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
259  !bms_overlap(joinrelids, sjinfo2->min_righthand))
262  sjinfo2->min_lefthand));
263  }
264 
265  /*
266  * However, when a LATERAL subquery is involved, there will simply not be
267  * any paths for the joinrel that aren't parameterized by whatever the
268  * subquery is parameterized by, unless its parameterization is resolved
269  * within the joinrel. So we might as well allow additional dependencies
270  * on whatever residual lateral dependencies the joinrel will have.
271  */
273  joinrel->lateral_relids);
274 
275  /*
276  * 1. Consider mergejoin paths where both relations must be explicitly
277  * sorted. Skip this if we can't mergejoin.
278  */
279  if (mergejoin_allowed)
280  sort_inner_and_outer(root, joinrel, outerrel, innerrel,
281  jointype, &extra);
282 
283  /*
284  * 2. Consider paths where the outer relation need not be explicitly
285  * sorted. This includes both nestloops and mergejoins where the outer
286  * path is already ordered. Again, skip this if we can't mergejoin.
287  * (That's okay because we know that nestloop can't handle right/full
288  * joins at all, so it wouldn't work in the prohibited cases either.)
289  */
290  if (mergejoin_allowed)
291  match_unsorted_outer(root, joinrel, outerrel, innerrel,
292  jointype, &extra);
293 
294 #ifdef NOT_USED
295 
296  /*
297  * 3. Consider paths where the inner relation need not be explicitly
298  * sorted. This includes mergejoins only (nestloops were already built in
299  * match_unsorted_outer).
300  *
301  * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
302  * significant difference between the inner and outer side of a mergejoin,
303  * so match_unsorted_inner creates no paths that aren't equivalent to
304  * those made by match_unsorted_outer when add_paths_to_joinrel() is
305  * invoked with the two rels given in the other order.
306  */
307  if (mergejoin_allowed)
308  match_unsorted_inner(root, joinrel, outerrel, innerrel,
309  jointype, &extra);
310 #endif
311 
312  /*
313  * 4. Consider paths where both outer and inner relations must be hashed
314  * before being joined. As above, disregard enable_hashjoin for full
315  * joins, because there may be no other alternative.
316  */
317  if (enable_hashjoin || jointype == JOIN_FULL)
318  hash_inner_and_outer(root, joinrel, outerrel, innerrel,
319  jointype, &extra);
320 
321  /*
322  * 5. If inner and outer relations are foreign tables (or joins) belonging
323  * to the same server and assigned to the same user to check access
324  * permissions as, give the FDW a chance to push down joins.
325  */
326  if (joinrel->fdwroutine &&
328  joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
329  outerrel, innerrel,
330  jointype, &extra);
331 
332  /*
333  * 6. Finally, give extensions a chance to manipulate the path list.
334  */
336  set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
337  jointype, &extra);
338 }
339 
340 /*
341  * We override the param_source_rels heuristic to accept nestloop paths in
342  * which the outer rel satisfies some but not all of the inner path's
343  * parameterization. This is necessary to get good plans for star-schema
344  * scenarios, in which a parameterized path for a large table may require
345  * parameters from multiple small tables that will not get joined directly to
346  * each other. We can handle that by stacking nestloops that have the small
347  * tables on the outside; but this breaks the rule the param_source_rels
348  * heuristic is based on, namely that parameters should not be passed down
349  * across joins unless there's a join-order-constraint-based reason to do so.
350  * So we ignore the param_source_rels restriction when this case applies.
351  *
352  * allow_star_schema_join() returns true if the param_source_rels restriction
353  * should be overridden, ie, it's okay to perform this join.
354  */
355 static inline bool
357  Relids outerrelids,
358  Relids inner_paramrels)
359 {
360  /*
361  * It's a star-schema case if the outer rel provides some but not all of
362  * the inner rel's parameterization.
363  */
364  return (bms_overlap(inner_paramrels, outerrelids) &&
365  bms_nonempty_difference(inner_paramrels, outerrelids));
366 }
367 
368 /*
369  * paraminfo_get_equal_hashops
370  * Determine if param_info and innerrel's lateral_vars can be hashed.
371  * Returns true the hashing is possible, otherwise return false.
372  *
373  * Additionally we also collect the outer exprs and the hash operators for
374  * each parameter to innerrel. These set in 'param_exprs', 'operators' and
375  * 'binary_mode' when we return true.
376  */
377 static bool
379  RelOptInfo *outerrel, RelOptInfo *innerrel,
380  List **param_exprs, List **operators,
381  bool *binary_mode)
382 
383 {
384  ListCell *lc;
385 
386  *param_exprs = NIL;
387  *operators = NIL;
388  *binary_mode = false;
389 
390  if (param_info != NULL)
391  {
392  List *clauses = param_info->ppi_clauses;
393 
394  foreach(lc, clauses)
395  {
396  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
397  OpExpr *opexpr;
398  Node *expr;
399  Oid hasheqoperator;
400 
401  opexpr = (OpExpr *) rinfo->clause;
402 
403  /*
404  * Bail if the rinfo is not compatible. We need a join OpExpr
405  * with 2 args.
406  */
407  if (!IsA(opexpr, OpExpr) || list_length(opexpr->args) != 2 ||
408  !clause_sides_match_join(rinfo, outerrel, innerrel))
409  {
410  list_free(*operators);
411  list_free(*param_exprs);
412  return false;
413  }
414 
415  if (rinfo->outer_is_left)
416  {
417  expr = (Node *) linitial(opexpr->args);
418  hasheqoperator = rinfo->left_hasheqoperator;
419  }
420  else
421  {
422  expr = (Node *) lsecond(opexpr->args);
423  hasheqoperator = rinfo->right_hasheqoperator;
424  }
425 
426  /* can't do memoize if we can't hash the outer type */
427  if (!OidIsValid(hasheqoperator))
428  {
429  list_free(*operators);
430  list_free(*param_exprs);
431  return false;
432  }
433 
434  *operators = lappend_oid(*operators, hasheqoperator);
435  *param_exprs = lappend(*param_exprs, expr);
436 
437  /*
438  * When the join operator is not hashable then it's possible that
439  * the operator will be able to distinguish something that the
440  * hash equality operator could not. For example with floating
441  * point types -0.0 and +0.0 are classed as equal by the hash
442  * function and equality function, but some other operator may be
443  * able to tell those values apart. This means that we must put
444  * memoize into binary comparison mode so that it does bit-by-bit
445  * comparisons rather than a "logical" comparison as it would
446  * using the hash equality operator.
447  */
448  if (!OidIsValid(rinfo->hashjoinoperator))
449  *binary_mode = true;
450  }
451  }
452 
453  /* Now add any lateral vars to the cache key too */
454  foreach(lc, innerrel->lateral_vars)
455  {
456  Node *expr = (Node *) lfirst(lc);
457  TypeCacheEntry *typentry;
458 
459  /* Reject if there are any volatile functions */
460  if (contain_volatile_functions(expr))
461  {
462  list_free(*operators);
463  list_free(*param_exprs);
464  return false;
465  }
466 
467  typentry = lookup_type_cache(exprType(expr),
469 
470  /* can't use a memoize node without a valid hash equals operator */
471  if (!OidIsValid(typentry->hash_proc) || !OidIsValid(typentry->eq_opr))
472  {
473  list_free(*operators);
474  list_free(*param_exprs);
475  return false;
476  }
477 
478  *operators = lappend_oid(*operators, typentry->eq_opr);
479  *param_exprs = lappend(*param_exprs, expr);
480 
481  /*
482  * We must go into binary mode as we don't have too much of an idea of
483  * how these lateral Vars are being used. See comment above when we
484  * set *binary_mode for the non-lateral Var case. This could be
485  * relaxed a bit if we had the RestrictInfos and knew the operators
486  * being used, however for cases like Vars that are arguments to
487  * functions we must operate in binary mode as we don't have
488  * visibility into what the function is doing with the Vars.
489  */
490  *binary_mode = true;
491  }
492 
493  /* We're okay to use memoize */
494  return true;
495 }
496 
497 /*
498  * get_memoize_path
499  * If possible, make and return a Memoize path atop of 'inner_path'.
500  * Otherwise return NULL.
501  */
502 static Path *
504  RelOptInfo *outerrel, Path *inner_path,
505  Path *outer_path, JoinType jointype,
506  JoinPathExtraData *extra)
507 {
508  List *param_exprs;
509  List *hash_operators;
510  ListCell *lc;
511  bool binary_mode;
512 
513  /* Obviously not if it's disabled */
514  if (!enable_memoize)
515  return NULL;
516 
517  /*
518  * We can safely not bother with all this unless we expect to perform more
519  * than one inner scan. The first scan is always going to be a cache
520  * miss. This would likely fail later anyway based on costs, so this is
521  * really just to save some wasted effort.
522  */
523  if (outer_path->parent->rows < 2)
524  return NULL;
525 
526  /*
527  * We can only have a memoize node when there's some kind of cache key,
528  * either parameterized path clauses or lateral Vars. No cache key sounds
529  * more like something a Materialize node might be more useful for.
530  */
531  if ((inner_path->param_info == NULL ||
532  inner_path->param_info->ppi_clauses == NIL) &&
533  innerrel->lateral_vars == NIL)
534  return NULL;
535 
536  /*
537  * Currently we don't do this for SEMI and ANTI joins unless they're
538  * marked as inner_unique. This is because nested loop SEMI/ANTI joins
539  * don't scan the inner node to completion, which will mean memoize cannot
540  * mark the cache entry as complete.
541  *
542  * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
543  * = true. Should we? See add_paths_to_joinrel()
544  */
545  if (!extra->inner_unique && (jointype == JOIN_SEMI ||
546  jointype == JOIN_ANTI))
547  return NULL;
548 
549  /*
550  * Memoize normally marks cache entries as complete when it runs out of
551  * tuples to read from its subplan. However, with unique joins, Nested
552  * Loop will skip to the next outer tuple after finding the first matching
553  * inner tuple. This means that we may not read the inner side of the
554  * join to completion which leaves no opportunity to mark the cache entry
555  * as complete. To work around that, when the join is unique we
556  * automatically mark cache entries as complete after fetching the first
557  * tuple. This works when the entire join condition is parameterized.
558  * Otherwise, when the parameterization is only a subset of the join
559  * condition, we can't be sure which part of it causes the join to be
560  * unique. This means there are no guarantees that only 1 tuple will be
561  * read. We cannot mark the cache entry as complete after reading the
562  * first tuple without that guarantee. This means the scope of Memoize
563  * node's usefulness is limited to only outer rows that have no join
564  * partner as this is the only case where Nested Loop would exhaust the
565  * inner scan of a unique join. Since the scope is limited to that, we
566  * just don't bother making a memoize path in this case.
567  *
568  * Lateral vars needn't be considered here as they're not considered when
569  * determining if the join is unique.
570  *
571  * XXX this could be enabled if the remaining join quals were made part of
572  * the inner scan's filter instead of the join filter. Maybe it's worth
573  * considering doing that?
574  */
575  if (extra->inner_unique &&
576  (inner_path->param_info == NULL ||
577  list_length(inner_path->param_info->ppi_clauses) <
578  list_length(extra->restrictlist)))
579  return NULL;
580 
581  /*
582  * We can't use a memoize node if there are volatile functions in the
583  * inner rel's target list or restrict list. A cache hit could reduce the
584  * number of calls to these functions.
585  */
586  if (contain_volatile_functions((Node *) innerrel->reltarget))
587  return NULL;
588 
589  foreach(lc, innerrel->baserestrictinfo)
590  {
591  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
592 
593  if (contain_volatile_functions((Node *) rinfo))
594  return NULL;
595  }
596 
597  /* Check if we have hash ops for each parameter to the path */
599  inner_path->param_info,
600  outerrel,
601  innerrel,
602  &param_exprs,
603  &hash_operators,
604  &binary_mode))
605  {
606  return (Path *) create_memoize_path(root,
607  innerrel,
608  inner_path,
609  param_exprs,
610  hash_operators,
611  extra->inner_unique,
612  binary_mode,
613  outer_path->parent->rows);
614  }
615 
616  return NULL;
617 }
618 
619 /*
620  * try_nestloop_path
621  * Consider a nestloop join path; if it appears useful, push it into
622  * the joinrel's pathlist via add_path().
623  */
624 static void
626  RelOptInfo *joinrel,
627  Path *outer_path,
628  Path *inner_path,
629  List *pathkeys,
630  JoinType jointype,
631  JoinPathExtraData *extra)
632 {
633  Relids required_outer;
634  JoinCostWorkspace workspace;
635  RelOptInfo *innerrel = inner_path->parent;
636  RelOptInfo *outerrel = outer_path->parent;
637  Relids innerrelids;
638  Relids outerrelids;
639  Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
640  Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
641 
642  /*
643  * Paths are parameterized by top-level parents, so run parameterization
644  * tests on the parent relids.
645  */
646  if (innerrel->top_parent_relids)
647  innerrelids = innerrel->top_parent_relids;
648  else
649  innerrelids = innerrel->relids;
650 
651  if (outerrel->top_parent_relids)
652  outerrelids = outerrel->top_parent_relids;
653  else
654  outerrelids = outerrel->relids;
655 
656  /*
657  * Check to see if proposed path is still parameterized, and reject if the
658  * parameterization wouldn't be sensible --- unless allow_star_schema_join
659  * says to allow it anyway. Also, we must reject if have_dangerous_phv
660  * doesn't like the look of it, which could only happen if the nestloop is
661  * still parameterized.
662  */
663  required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
664  innerrelids, inner_paramrels);
665  if (required_outer &&
666  ((!bms_overlap(required_outer, extra->param_source_rels) &&
667  !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
668  have_dangerous_phv(root, outerrelids, inner_paramrels)))
669  {
670  /* Waste no memory when we reject a path here */
671  bms_free(required_outer);
672  return;
673  }
674 
675  /*
676  * Do a precheck to quickly eliminate obviously-inferior paths. We
677  * calculate a cheap lower bound on the path's cost and then use
678  * add_path_precheck() to see if the path is clearly going to be dominated
679  * by some existing path for the joinrel. If not, do the full pushup with
680  * creating a fully valid path structure and submitting it to add_path().
681  * The latter two steps are expensive enough to make this two-phase
682  * methodology worthwhile.
683  */
684  initial_cost_nestloop(root, &workspace, jointype,
685  outer_path, inner_path, extra);
686 
687  if (add_path_precheck(joinrel,
688  workspace.startup_cost, workspace.total_cost,
689  pathkeys, required_outer))
690  {
691  /*
692  * If the inner path is parameterized, it is parameterized by the
693  * topmost parent of the outer rel, not the outer rel itself. Fix
694  * that.
695  */
696  if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
697  {
698  inner_path = reparameterize_path_by_child(root, inner_path,
699  outer_path->parent);
700 
701  /*
702  * If we could not translate the path, we can't create nest loop
703  * path.
704  */
705  if (!inner_path)
706  {
707  bms_free(required_outer);
708  return;
709  }
710  }
711 
712  add_path(joinrel, (Path *)
714  joinrel,
715  jointype,
716  &workspace,
717  extra,
718  outer_path,
719  inner_path,
720  extra->restrictlist,
721  pathkeys,
722  required_outer));
723  }
724  else
725  {
726  /* Waste no memory when we reject a path here */
727  bms_free(required_outer);
728  }
729 }
730 
731 /*
732  * try_partial_nestloop_path
733  * Consider a partial nestloop join path; if it appears useful, push it into
734  * the joinrel's partial_pathlist via add_partial_path().
735  */
736 static void
738  RelOptInfo *joinrel,
739  Path *outer_path,
740  Path *inner_path,
741  List *pathkeys,
742  JoinType jointype,
743  JoinPathExtraData *extra)
744 {
745  JoinCostWorkspace workspace;
746 
747  /*
748  * If the inner path is parameterized, the parameterization must be fully
749  * satisfied by the proposed outer path. Parameterized partial paths are
750  * not supported. The caller should already have verified that no lateral
751  * rels are required here.
752  */
754  if (inner_path->param_info != NULL)
755  {
756  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
757  RelOptInfo *outerrel = outer_path->parent;
758  Relids outerrelids;
759 
760  /*
761  * The inner and outer paths are parameterized, if at all, by the top
762  * level parents, not the child relations, so we must use those relids
763  * for our parameterization tests.
764  */
765  if (outerrel->top_parent_relids)
766  outerrelids = outerrel->top_parent_relids;
767  else
768  outerrelids = outerrel->relids;
769 
770  if (!bms_is_subset(inner_paramrels, outerrelids))
771  return;
772  }
773 
774  /*
775  * Before creating a path, get a quick lower bound on what it is likely to
776  * cost. Bail out right away if it looks terrible.
777  */
778  initial_cost_nestloop(root, &workspace, jointype,
779  outer_path, inner_path, extra);
780  if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
781  return;
782 
783  /*
784  * If the inner path is parameterized, it is parameterized by the topmost
785  * parent of the outer rel, not the outer rel itself. Fix that.
786  */
787  if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
788  {
789  inner_path = reparameterize_path_by_child(root, inner_path,
790  outer_path->parent);
791 
792  /*
793  * If we could not translate the path, we can't create nest loop path.
794  */
795  if (!inner_path)
796  return;
797  }
798 
799  /* Might be good enough to be worth trying, so let's try it. */
800  add_partial_path(joinrel, (Path *)
802  joinrel,
803  jointype,
804  &workspace,
805  extra,
806  outer_path,
807  inner_path,
808  extra->restrictlist,
809  pathkeys,
810  NULL));
811 }
812 
813 /*
814  * try_mergejoin_path
815  * Consider a merge join path; if it appears useful, push it into
816  * the joinrel's pathlist via add_path().
817  */
818 static void
820  RelOptInfo *joinrel,
821  Path *outer_path,
822  Path *inner_path,
823  List *pathkeys,
824  List *mergeclauses,
825  List *outersortkeys,
826  List *innersortkeys,
827  JoinType jointype,
828  JoinPathExtraData *extra,
829  bool is_partial)
830 {
831  Relids required_outer;
832  JoinCostWorkspace workspace;
833 
834  if (is_partial)
835  {
837  joinrel,
838  outer_path,
839  inner_path,
840  pathkeys,
841  mergeclauses,
842  outersortkeys,
843  innersortkeys,
844  jointype,
845  extra);
846  return;
847  }
848 
849  /*
850  * Check to see if proposed path is still parameterized, and reject if the
851  * parameterization wouldn't be sensible.
852  */
853  required_outer = calc_non_nestloop_required_outer(outer_path,
854  inner_path);
855  if (required_outer &&
856  !bms_overlap(required_outer, extra->param_source_rels))
857  {
858  /* Waste no memory when we reject a path here */
859  bms_free(required_outer);
860  return;
861  }
862 
863  /*
864  * If the given paths are already well enough ordered, we can skip doing
865  * an explicit sort.
866  */
867  if (outersortkeys &&
868  pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
869  outersortkeys = NIL;
870  if (innersortkeys &&
871  pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
872  innersortkeys = NIL;
873 
874  /*
875  * See comments in try_nestloop_path().
876  */
877  initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
878  outer_path, inner_path,
879  outersortkeys, innersortkeys,
880  extra);
881 
882  if (add_path_precheck(joinrel,
883  workspace.startup_cost, workspace.total_cost,
884  pathkeys, required_outer))
885  {
886  add_path(joinrel, (Path *)
888  joinrel,
889  jointype,
890  &workspace,
891  extra,
892  outer_path,
893  inner_path,
894  extra->restrictlist,
895  pathkeys,
896  required_outer,
897  mergeclauses,
898  outersortkeys,
899  innersortkeys));
900  }
901  else
902  {
903  /* Waste no memory when we reject a path here */
904  bms_free(required_outer);
905  }
906 }
907 
908 /*
909  * try_partial_mergejoin_path
910  * Consider a partial merge join path; if it appears useful, push it into
911  * the joinrel's pathlist via add_partial_path().
912  */
913 static void
915  RelOptInfo *joinrel,
916  Path *outer_path,
917  Path *inner_path,
918  List *pathkeys,
919  List *mergeclauses,
920  List *outersortkeys,
921  List *innersortkeys,
922  JoinType jointype,
923  JoinPathExtraData *extra)
924 {
925  JoinCostWorkspace workspace;
926 
927  /*
928  * See comments in try_partial_hashjoin_path().
929  */
931  if (inner_path->param_info != NULL)
932  {
933  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
934 
935  if (!bms_is_empty(inner_paramrels))
936  return;
937  }
938 
939  /*
940  * If the given paths are already well enough ordered, we can skip doing
941  * an explicit sort.
942  */
943  if (outersortkeys &&
944  pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
945  outersortkeys = NIL;
946  if (innersortkeys &&
947  pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
948  innersortkeys = NIL;
949 
950  /*
951  * See comments in try_partial_nestloop_path().
952  */
953  initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
954  outer_path, inner_path,
955  outersortkeys, innersortkeys,
956  extra);
957 
958  if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
959  return;
960 
961  /* Might be good enough to be worth trying, so let's try it. */
962  add_partial_path(joinrel, (Path *)
964  joinrel,
965  jointype,
966  &workspace,
967  extra,
968  outer_path,
969  inner_path,
970  extra->restrictlist,
971  pathkeys,
972  NULL,
973  mergeclauses,
974  outersortkeys,
975  innersortkeys));
976 }
977 
978 /*
979  * try_hashjoin_path
980  * Consider a hash join path; if it appears useful, push it into
981  * the joinrel's pathlist via add_path().
982  */
983 static void
985  RelOptInfo *joinrel,
986  Path *outer_path,
987  Path *inner_path,
988  List *hashclauses,
989  JoinType jointype,
990  JoinPathExtraData *extra)
991 {
992  Relids required_outer;
993  JoinCostWorkspace workspace;
994 
995  /*
996  * Check to see if proposed path is still parameterized, and reject if the
997  * parameterization wouldn't be sensible.
998  */
999  required_outer = calc_non_nestloop_required_outer(outer_path,
1000  inner_path);
1001  if (required_outer &&
1002  !bms_overlap(required_outer, extra->param_source_rels))
1003  {
1004  /* Waste no memory when we reject a path here */
1005  bms_free(required_outer);
1006  return;
1007  }
1008 
1009  /*
1010  * See comments in try_nestloop_path(). Also note that hashjoin paths
1011  * never have any output pathkeys, per comments in create_hashjoin_path.
1012  */
1013  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1014  outer_path, inner_path, extra, false);
1015 
1016  if (add_path_precheck(joinrel,
1017  workspace.startup_cost, workspace.total_cost,
1018  NIL, required_outer))
1019  {
1020  add_path(joinrel, (Path *)
1021  create_hashjoin_path(root,
1022  joinrel,
1023  jointype,
1024  &workspace,
1025  extra,
1026  outer_path,
1027  inner_path,
1028  false, /* parallel_hash */
1029  extra->restrictlist,
1030  required_outer,
1031  hashclauses));
1032  }
1033  else
1034  {
1035  /* Waste no memory when we reject a path here */
1036  bms_free(required_outer);
1037  }
1038 }
1039 
1040 /*
1041  * try_partial_hashjoin_path
1042  * Consider a partial hashjoin join path; if it appears useful, push it into
1043  * the joinrel's partial_pathlist via add_partial_path().
1044  * The outer side is partial. If parallel_hash is true, then the inner path
1045  * must be partial and will be run in parallel to create one or more shared
1046  * hash tables; otherwise the inner path must be complete and a copy of it
1047  * is run in every process to create separate identical private hash tables.
1048  */
1049 static void
1051  RelOptInfo *joinrel,
1052  Path *outer_path,
1053  Path *inner_path,
1054  List *hashclauses,
1055  JoinType jointype,
1056  JoinPathExtraData *extra,
1057  bool parallel_hash)
1058 {
1059  JoinCostWorkspace workspace;
1060 
1061  /*
1062  * If the inner path is parameterized, the parameterization must be fully
1063  * satisfied by the proposed outer path. Parameterized partial paths are
1064  * not supported. The caller should already have verified that no lateral
1065  * rels are required here.
1066  */
1067  Assert(bms_is_empty(joinrel->lateral_relids));
1068  if (inner_path->param_info != NULL)
1069  {
1070  Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
1071 
1072  if (!bms_is_empty(inner_paramrels))
1073  return;
1074  }
1075 
1076  /*
1077  * Before creating a path, get a quick lower bound on what it is likely to
1078  * cost. Bail out right away if it looks terrible.
1079  */
1080  initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
1081  outer_path, inner_path, extra, parallel_hash);
1082  if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
1083  return;
1084 
1085  /* Might be good enough to be worth trying, so let's try it. */
1086  add_partial_path(joinrel, (Path *)
1087  create_hashjoin_path(root,
1088  joinrel,
1089  jointype,
1090  &workspace,
1091  extra,
1092  outer_path,
1093  inner_path,
1094  parallel_hash,
1095  extra->restrictlist,
1096  NULL,
1097  hashclauses));
1098 }
1099 
1100 /*
1101  * clause_sides_match_join
1102  * Determine whether a join clause is of the right form to use in this join.
1103  *
1104  * We already know that the clause is a binary opclause referencing only the
1105  * rels in the current join. The point here is to check whether it has the
1106  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
1107  * rather than mixing outer and inner vars on either side. If it matches,
1108  * we set the transient flag outer_is_left to identify which side is which.
1109  */
1110 static inline bool
1112  RelOptInfo *innerrel)
1113 {
1114  if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
1115  bms_is_subset(rinfo->right_relids, innerrel->relids))
1116  {
1117  /* lefthand side is outer */
1118  rinfo->outer_is_left = true;
1119  return true;
1120  }
1121  else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
1122  bms_is_subset(rinfo->right_relids, outerrel->relids))
1123  {
1124  /* righthand side is outer */
1125  rinfo->outer_is_left = false;
1126  return true;
1127  }
1128  return false; /* no good for these input relations */
1129 }
1130 
1131 /*
1132  * sort_inner_and_outer
1133  * Create mergejoin join paths by explicitly sorting both the outer and
1134  * inner join relations on each available merge ordering.
1135  *
1136  * 'joinrel' is the join relation
1137  * 'outerrel' is the outer join relation
1138  * 'innerrel' is the inner join relation
1139  * 'jointype' is the type of join to do
1140  * 'extra' contains additional input values
1141  */
1142 static void
1144  RelOptInfo *joinrel,
1145  RelOptInfo *outerrel,
1146  RelOptInfo *innerrel,
1147  JoinType jointype,
1148  JoinPathExtraData *extra)
1149 {
1150  JoinType save_jointype = jointype;
1151  Path *outer_path;
1152  Path *inner_path;
1153  Path *cheapest_partial_outer = NULL;
1154  Path *cheapest_safe_inner = NULL;
1155  List *all_pathkeys;
1156  ListCell *l;
1157 
1158  /*
1159  * We only consider the cheapest-total-cost input paths, since we are
1160  * assuming here that a sort is required. We will consider
1161  * cheapest-startup-cost input paths later, and only if they don't need a
1162  * sort.
1163  *
1164  * This function intentionally does not consider parameterized input
1165  * paths, except when the cheapest-total is parameterized. If we did so,
1166  * we'd have a combinatorial explosion of mergejoin paths of dubious
1167  * value. This interacts with decisions elsewhere that also discriminate
1168  * against mergejoins with parameterized inputs; see comments in
1169  * src/backend/optimizer/README.
1170  */
1171  outer_path = outerrel->cheapest_total_path;
1172  inner_path = innerrel->cheapest_total_path;
1173 
1174  /*
1175  * If either cheapest-total path is parameterized by the other rel, we
1176  * can't use a mergejoin. (There's no use looking for alternative input
1177  * paths, since these should already be the least-parameterized available
1178  * paths.)
1179  */
1180  if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
1181  PATH_PARAM_BY_REL(inner_path, outerrel))
1182  return;
1183 
1184  /*
1185  * If unique-ification is requested, do it and then handle as a plain
1186  * inner join.
1187  */
1188  if (jointype == JOIN_UNIQUE_OUTER)
1189  {
1190  outer_path = (Path *) create_unique_path(root, outerrel,
1191  outer_path, extra->sjinfo);
1192  Assert(outer_path);
1193  jointype = JOIN_INNER;
1194  }
1195  else if (jointype == JOIN_UNIQUE_INNER)
1196  {
1197  inner_path = (Path *) create_unique_path(root, innerrel,
1198  inner_path, extra->sjinfo);
1199  Assert(inner_path);
1200  jointype = JOIN_INNER;
1201  }
1202 
1203  /*
1204  * If the joinrel is parallel-safe, we may be able to consider a partial
1205  * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
1206  * outer path will be partial, and therefore we won't be able to properly
1207  * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
1208  * JOIN_RIGHT, because they can produce false null extended rows. Also,
1209  * the resulting path must not be parameterized.
1210  */
1211  if (joinrel->consider_parallel &&
1212  save_jointype != JOIN_UNIQUE_OUTER &&
1213  save_jointype != JOIN_FULL &&
1214  save_jointype != JOIN_RIGHT &&
1215  outerrel->partial_pathlist != NIL &&
1216  bms_is_empty(joinrel->lateral_relids))
1217  {
1218  cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
1219 
1220  if (inner_path->parallel_safe)
1221  cheapest_safe_inner = inner_path;
1222  else if (save_jointype != JOIN_UNIQUE_INNER)
1223  cheapest_safe_inner =
1225  }
1226 
1227  /*
1228  * Each possible ordering of the available mergejoin clauses will generate
1229  * a differently-sorted result path at essentially the same cost. We have
1230  * no basis for choosing one over another at this level of joining, but
1231  * some sort orders may be more useful than others for higher-level
1232  * mergejoins, so it's worth considering multiple orderings.
1233  *
1234  * Actually, it's not quite true that every mergeclause ordering will
1235  * generate a different path order, because some of the clauses may be
1236  * partially redundant (refer to the same EquivalenceClasses). Therefore,
1237  * what we do is convert the mergeclause list to a list of canonical
1238  * pathkeys, and then consider different orderings of the pathkeys.
1239  *
1240  * Generating a path for *every* permutation of the pathkeys doesn't seem
1241  * like a winning strategy; the cost in planning time is too high. For
1242  * now, we generate one path for each pathkey, listing that pathkey first
1243  * and the rest in random order. This should allow at least a one-clause
1244  * mergejoin without re-sorting against any other possible mergejoin
1245  * partner path. But if we've not guessed the right ordering of secondary
1246  * keys, we may end up evaluating clauses as qpquals when they could have
1247  * been done as mergeclauses. (In practice, it's rare that there's more
1248  * than two or three mergeclauses, so expending a huge amount of thought
1249  * on that is probably not worth it.)
1250  *
1251  * The pathkey order returned by select_outer_pathkeys_for_merge() has
1252  * some heuristics behind it (see that function), so be sure to try it
1253  * exactly as-is as well as making variants.
1254  */
1255  all_pathkeys = select_outer_pathkeys_for_merge(root,
1256  extra->mergeclause_list,
1257  joinrel);
1258 
1259  foreach(l, all_pathkeys)
1260  {
1261  List *front_pathkey = (List *) lfirst(l);
1262  List *cur_mergeclauses;
1263  List *outerkeys;
1264  List *innerkeys;
1265  List *merge_pathkeys;
1266 
1267  /* Make a pathkey list with this guy first */
1268  if (l != list_head(all_pathkeys))
1269  outerkeys = lcons(front_pathkey,
1270  list_delete_nth_cell(list_copy(all_pathkeys),
1271  foreach_current_index(l)));
1272  else
1273  outerkeys = all_pathkeys; /* no work at first one... */
1274 
1275  /* Sort the mergeclauses into the corresponding ordering */
1276  cur_mergeclauses =
1278  outerkeys,
1279  extra->mergeclause_list);
1280 
1281  /* Should have used them all... */
1282  Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1283 
1284  /* Build sort pathkeys for the inner side */
1285  innerkeys = make_inner_pathkeys_for_merge(root,
1286  cur_mergeclauses,
1287  outerkeys);
1288 
1289  /* Build pathkeys representing output sort order */
1290  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1291  outerkeys);
1292 
1293  /*
1294  * And now we can make the path.
1295  *
1296  * Note: it's possible that the cheapest paths will already be sorted
1297  * properly. try_mergejoin_path will detect that case and suppress an
1298  * explicit sort step, so we needn't do so here.
1299  */
1300  try_mergejoin_path(root,
1301  joinrel,
1302  outer_path,
1303  inner_path,
1304  merge_pathkeys,
1305  cur_mergeclauses,
1306  outerkeys,
1307  innerkeys,
1308  jointype,
1309  extra,
1310  false);
1311 
1312  /*
1313  * If we have partial outer and parallel safe inner path then try
1314  * partial mergejoin path.
1315  */
1316  if (cheapest_partial_outer && cheapest_safe_inner)
1318  joinrel,
1319  cheapest_partial_outer,
1320  cheapest_safe_inner,
1321  merge_pathkeys,
1322  cur_mergeclauses,
1323  outerkeys,
1324  innerkeys,
1325  jointype,
1326  extra);
1327  }
1328 }
1329 
1330 /*
1331  * generate_mergejoin_paths
1332  * Creates possible mergejoin paths for input outerpath.
1333  *
1334  * We generate mergejoins if mergejoin clauses are available. We have
1335  * two ways to generate the inner path for a mergejoin: sort the cheapest
1336  * inner path, or use an inner path that is already suitably ordered for the
1337  * merge. If we have several mergeclauses, it could be that there is no inner
1338  * path (or only a very expensive one) for the full list of mergeclauses, but
1339  * better paths exist if we truncate the mergeclause list (thereby discarding
1340  * some sort key requirements). So, we consider truncations of the
1341  * mergeclause list as well as the full list. (Ideally we'd consider all
1342  * subsets of the mergeclause list, but that seems way too expensive.)
1343  */
1344 static void
1346  RelOptInfo *joinrel,
1347  RelOptInfo *innerrel,
1348  Path *outerpath,
1349  JoinType jointype,
1350  JoinPathExtraData *extra,
1351  bool useallclauses,
1352  Path *inner_cheapest_total,
1353  List *merge_pathkeys,
1354  bool is_partial)
1355 {
1356  List *mergeclauses;
1357  List *innersortkeys;
1358  List *trialsortkeys;
1359  Path *cheapest_startup_inner;
1360  Path *cheapest_total_inner;
1361  JoinType save_jointype = jointype;
1362  int num_sortkeys;
1363  int sortkeycnt;
1364 
1365  if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1366  jointype = JOIN_INNER;
1367 
1368  /* Look for useful mergeclauses (if any) */
1369  mergeclauses =
1371  outerpath->pathkeys,
1372  extra->mergeclause_list);
1373 
1374  /*
1375  * Done with this outer path if no chance for a mergejoin.
1376  *
1377  * Special corner case: for "x FULL JOIN y ON true", there will be no join
1378  * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1379  * but since mergejoin is our only join type that supports FULL JOIN
1380  * without any join clauses, it's necessary to generate a clauseless
1381  * mergejoin path instead.
1382  */
1383  if (mergeclauses == NIL)
1384  {
1385  if (jointype == JOIN_FULL)
1386  /* okay to try for mergejoin */ ;
1387  else
1388  return;
1389  }
1390  if (useallclauses &&
1391  list_length(mergeclauses) != list_length(extra->mergeclause_list))
1392  return;
1393 
1394  /* Compute the required ordering of the inner path */
1395  innersortkeys = make_inner_pathkeys_for_merge(root,
1396  mergeclauses,
1397  outerpath->pathkeys);
1398 
1399  /*
1400  * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1401  * a sort will be needed, only cheapest total cost matters. (But
1402  * try_mergejoin_path will do the right thing if inner_cheapest_total is
1403  * already correctly sorted.)
1404  */
1405  try_mergejoin_path(root,
1406  joinrel,
1407  outerpath,
1408  inner_cheapest_total,
1409  merge_pathkeys,
1410  mergeclauses,
1411  NIL,
1412  innersortkeys,
1413  jointype,
1414  extra,
1415  is_partial);
1416 
1417  /* Can't do anything else if inner path needs to be unique'd */
1418  if (save_jointype == JOIN_UNIQUE_INNER)
1419  return;
1420 
1421  /*
1422  * Look for presorted inner paths that satisfy the innersortkey list ---
1423  * or any truncation thereof, if we are allowed to build a mergejoin using
1424  * a subset of the merge clauses. Here, we consider both cheap startup
1425  * cost and cheap total cost.
1426  *
1427  * Currently we do not consider parameterized inner paths here. This
1428  * interacts with decisions elsewhere that also discriminate against
1429  * mergejoins with parameterized inputs; see comments in
1430  * src/backend/optimizer/README.
1431  *
1432  * As we shorten the sortkey list, we should consider only paths that are
1433  * strictly cheaper than (in particular, not the same as) any path found
1434  * in an earlier iteration. Otherwise we'd be intentionally using fewer
1435  * merge keys than a given path allows (treating the rest as plain
1436  * joinquals), which is unlikely to be a good idea. Also, eliminating
1437  * paths here on the basis of compare_path_costs is a lot cheaper than
1438  * building the mergejoin path only to throw it away.
1439  *
1440  * If inner_cheapest_total is well enough sorted to have not required a
1441  * sort in the path made above, we shouldn't make a duplicate path with
1442  * it, either. We handle that case with the same logic that handles the
1443  * previous consideration, by initializing the variables that track
1444  * cheapest-so-far properly. Note that we do NOT reject
1445  * inner_cheapest_total if we find it matches some shorter set of
1446  * pathkeys. That case corresponds to using fewer mergekeys to avoid
1447  * sorting inner_cheapest_total, whereas we did sort it above, so the
1448  * plans being considered are different.
1449  */
1450  if (pathkeys_contained_in(innersortkeys,
1451  inner_cheapest_total->pathkeys))
1452  {
1453  /* inner_cheapest_total didn't require a sort */
1454  cheapest_startup_inner = inner_cheapest_total;
1455  cheapest_total_inner = inner_cheapest_total;
1456  }
1457  else
1458  {
1459  /* it did require a sort, at least for the full set of keys */
1460  cheapest_startup_inner = NULL;
1461  cheapest_total_inner = NULL;
1462  }
1463  num_sortkeys = list_length(innersortkeys);
1464  if (num_sortkeys > 1 && !useallclauses)
1465  trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1466  else
1467  trialsortkeys = innersortkeys; /* won't really truncate */
1468 
1469  for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1470  {
1471  Path *innerpath;
1472  List *newclauses = NIL;
1473 
1474  /*
1475  * Look for an inner path ordered well enough for the first
1476  * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1477  * destructively, which is why we made a copy...
1478  */
1479  trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1480  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1481  trialsortkeys,
1482  NULL,
1483  TOTAL_COST,
1484  is_partial);
1485  if (innerpath != NULL &&
1486  (cheapest_total_inner == NULL ||
1487  compare_path_costs(innerpath, cheapest_total_inner,
1488  TOTAL_COST) < 0))
1489  {
1490  /* Found a cheap (or even-cheaper) sorted path */
1491  /* Select the right mergeclauses, if we didn't already */
1492  if (sortkeycnt < num_sortkeys)
1493  {
1494  newclauses =
1496  mergeclauses,
1497  trialsortkeys);
1498  Assert(newclauses != NIL);
1499  }
1500  else
1501  newclauses = mergeclauses;
1502  try_mergejoin_path(root,
1503  joinrel,
1504  outerpath,
1505  innerpath,
1506  merge_pathkeys,
1507  newclauses,
1508  NIL,
1509  NIL,
1510  jointype,
1511  extra,
1512  is_partial);
1513  cheapest_total_inner = innerpath;
1514  }
1515  /* Same on the basis of cheapest startup cost ... */
1516  innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1517  trialsortkeys,
1518  NULL,
1519  STARTUP_COST,
1520  is_partial);
1521  if (innerpath != NULL &&
1522  (cheapest_startup_inner == NULL ||
1523  compare_path_costs(innerpath, cheapest_startup_inner,
1524  STARTUP_COST) < 0))
1525  {
1526  /* Found a cheap (or even-cheaper) sorted path */
1527  if (innerpath != cheapest_total_inner)
1528  {
1529  /*
1530  * Avoid rebuilding clause list if we already made one; saves
1531  * memory in big join trees...
1532  */
1533  if (newclauses == NIL)
1534  {
1535  if (sortkeycnt < num_sortkeys)
1536  {
1537  newclauses =
1539  mergeclauses,
1540  trialsortkeys);
1541  Assert(newclauses != NIL);
1542  }
1543  else
1544  newclauses = mergeclauses;
1545  }
1546  try_mergejoin_path(root,
1547  joinrel,
1548  outerpath,
1549  innerpath,
1550  merge_pathkeys,
1551  newclauses,
1552  NIL,
1553  NIL,
1554  jointype,
1555  extra,
1556  is_partial);
1557  }
1558  cheapest_startup_inner = innerpath;
1559  }
1560 
1561  /*
1562  * Don't consider truncated sortkeys if we need all clauses.
1563  */
1564  if (useallclauses)
1565  break;
1566  }
1567 }
1568 
1569 /*
1570  * match_unsorted_outer
1571  * Creates possible join paths for processing a single join relation
1572  * 'joinrel' by employing either iterative substitution or
1573  * mergejoining on each of its possible outer paths (considering
1574  * only outer paths that are already ordered well enough for merging).
1575  *
1576  * We always generate a nestloop path for each available outer path.
1577  * In fact we may generate as many as five: one on the cheapest-total-cost
1578  * inner path, one on the same with materialization, one on the
1579  * cheapest-startup-cost inner path (if different), one on the
1580  * cheapest-total inner-indexscan path (if any), and one on the
1581  * cheapest-startup inner-indexscan path (if different).
1582  *
1583  * We also consider mergejoins if mergejoin clauses are available. See
1584  * detailed comments in generate_mergejoin_paths.
1585  *
1586  * 'joinrel' is the join relation
1587  * 'outerrel' is the outer join relation
1588  * 'innerrel' is the inner join relation
1589  * 'jointype' is the type of join to do
1590  * 'extra' contains additional input values
1591  */
1592 static void
1594  RelOptInfo *joinrel,
1595  RelOptInfo *outerrel,
1596  RelOptInfo *innerrel,
1597  JoinType jointype,
1598  JoinPathExtraData *extra)
1599 {
1600  JoinType save_jointype = jointype;
1601  bool nestjoinOK;
1602  bool useallclauses;
1603  Path *inner_cheapest_total = innerrel->cheapest_total_path;
1604  Path *matpath = NULL;
1605  ListCell *lc1;
1606 
1607  /*
1608  * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1609  * are doing a right or full mergejoin, we must use *all* the mergeclauses
1610  * as join clauses, else we will not have a valid plan. (Although these
1611  * two flags are currently inverses, keep them separate for clarity and
1612  * possible future changes.)
1613  */
1614  switch (jointype)
1615  {
1616  case JOIN_INNER:
1617  case JOIN_LEFT:
1618  case JOIN_SEMI:
1619  case JOIN_ANTI:
1620  nestjoinOK = true;
1621  useallclauses = false;
1622  break;
1623  case JOIN_RIGHT:
1624  case JOIN_FULL:
1625  nestjoinOK = false;
1626  useallclauses = true;
1627  break;
1628  case JOIN_UNIQUE_OUTER:
1629  case JOIN_UNIQUE_INNER:
1630  jointype = JOIN_INNER;
1631  nestjoinOK = true;
1632  useallclauses = false;
1633  break;
1634  default:
1635  elog(ERROR, "unrecognized join type: %d",
1636  (int) jointype);
1637  nestjoinOK = false; /* keep compiler quiet */
1638  useallclauses = false;
1639  break;
1640  }
1641 
1642  /*
1643  * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1644  * we will consider it below as a member of cheapest_parameterized_paths,
1645  * but the other possibilities considered in this routine aren't usable.
1646  */
1647  if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1648  inner_cheapest_total = NULL;
1649 
1650  /*
1651  * If we need to unique-ify the inner path, we will consider only the
1652  * cheapest-total inner.
1653  */
1654  if (save_jointype == JOIN_UNIQUE_INNER)
1655  {
1656  /* No way to do this with an inner path parameterized by outer rel */
1657  if (inner_cheapest_total == NULL)
1658  return;
1659  inner_cheapest_total = (Path *)
1660  create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1661  Assert(inner_cheapest_total);
1662  }
1663  else if (nestjoinOK)
1664  {
1665  /*
1666  * Consider materializing the cheapest inner path, unless
1667  * enable_material is off or the path in question materializes its
1668  * output anyway.
1669  */
1670  if (enable_material && inner_cheapest_total != NULL &&
1671  !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1672  matpath = (Path *)
1673  create_material_path(innerrel, inner_cheapest_total);
1674  }
1675 
1676  foreach(lc1, outerrel->pathlist)
1677  {
1678  Path *outerpath = (Path *) lfirst(lc1);
1679  List *merge_pathkeys;
1680 
1681  /*
1682  * We cannot use an outer path that is parameterized by the inner rel.
1683  */
1684  if (PATH_PARAM_BY_REL(outerpath, innerrel))
1685  continue;
1686 
1687  /*
1688  * If we need to unique-ify the outer path, it's pointless to consider
1689  * any but the cheapest outer. (XXX we don't consider parameterized
1690  * outers, nor inners, for unique-ified cases. Should we?)
1691  */
1692  if (save_jointype == JOIN_UNIQUE_OUTER)
1693  {
1694  if (outerpath != outerrel->cheapest_total_path)
1695  continue;
1696  outerpath = (Path *) create_unique_path(root, outerrel,
1697  outerpath, extra->sjinfo);
1698  Assert(outerpath);
1699  }
1700 
1701  /*
1702  * The result will have this sort order (even if it is implemented as
1703  * a nestloop, and even if some of the mergeclauses are implemented by
1704  * qpquals rather than as true mergeclauses):
1705  */
1706  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1707  outerpath->pathkeys);
1708 
1709  if (save_jointype == JOIN_UNIQUE_INNER)
1710  {
1711  /*
1712  * Consider nestloop join, but only with the unique-ified cheapest
1713  * inner path
1714  */
1715  try_nestloop_path(root,
1716  joinrel,
1717  outerpath,
1718  inner_cheapest_total,
1719  merge_pathkeys,
1720  jointype,
1721  extra);
1722  }
1723  else if (nestjoinOK)
1724  {
1725  /*
1726  * Consider nestloop joins using this outer path and various
1727  * available paths for the inner relation. We consider the
1728  * cheapest-total paths for each available parameterization of the
1729  * inner relation, including the unparameterized case.
1730  */
1731  ListCell *lc2;
1732 
1733  foreach(lc2, innerrel->cheapest_parameterized_paths)
1734  {
1735  Path *innerpath = (Path *) lfirst(lc2);
1736  Path *mpath;
1737 
1738  try_nestloop_path(root,
1739  joinrel,
1740  outerpath,
1741  innerpath,
1742  merge_pathkeys,
1743  jointype,
1744  extra);
1745 
1746  /*
1747  * Try generating a memoize path and see if that makes the
1748  * nested loop any cheaper.
1749  */
1750  mpath = get_memoize_path(root, innerrel, outerrel,
1751  innerpath, outerpath, jointype,
1752  extra);
1753  if (mpath != NULL)
1754  try_nestloop_path(root,
1755  joinrel,
1756  outerpath,
1757  mpath,
1758  merge_pathkeys,
1759  jointype,
1760  extra);
1761  }
1762 
1763  /* Also consider materialized form of the cheapest inner path */
1764  if (matpath != NULL)
1765  try_nestloop_path(root,
1766  joinrel,
1767  outerpath,
1768  matpath,
1769  merge_pathkeys,
1770  jointype,
1771  extra);
1772  }
1773 
1774  /* Can't do anything else if outer path needs to be unique'd */
1775  if (save_jointype == JOIN_UNIQUE_OUTER)
1776  continue;
1777 
1778  /* Can't do anything else if inner rel is parameterized by outer */
1779  if (inner_cheapest_total == NULL)
1780  continue;
1781 
1782  /* Generate merge join paths */
1783  generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1784  save_jointype, extra, useallclauses,
1785  inner_cheapest_total, merge_pathkeys,
1786  false);
1787  }
1788 
1789  /*
1790  * Consider partial nestloop and mergejoin plan if outerrel has any
1791  * partial path and the joinrel is parallel-safe. However, we can't
1792  * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1793  * therefore we won't be able to properly guarantee uniqueness. Nor can
1794  * we handle joins needing lateral rels, since partial paths must not be
1795  * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1796  * because they can produce false null extended rows.
1797  */
1798  if (joinrel->consider_parallel &&
1799  save_jointype != JOIN_UNIQUE_OUTER &&
1800  save_jointype != JOIN_FULL &&
1801  save_jointype != JOIN_RIGHT &&
1802  outerrel->partial_pathlist != NIL &&
1803  bms_is_empty(joinrel->lateral_relids))
1804  {
1805  if (nestjoinOK)
1806  consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1807  save_jointype, extra);
1808 
1809  /*
1810  * If inner_cheapest_total is NULL or non parallel-safe then find the
1811  * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1812  * can't use any alternative inner path.
1813  */
1814  if (inner_cheapest_total == NULL ||
1815  !inner_cheapest_total->parallel_safe)
1816  {
1817  if (save_jointype == JOIN_UNIQUE_INNER)
1818  return;
1819 
1820  inner_cheapest_total = get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1821  }
1822 
1823  if (inner_cheapest_total)
1824  consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1825  save_jointype, extra,
1826  inner_cheapest_total);
1827  }
1828 }
1829 
1830 /*
1831  * consider_parallel_mergejoin
1832  * Try to build partial paths for a joinrel by joining a partial path
1833  * for the outer relation to a complete path for the inner relation.
1834  *
1835  * 'joinrel' is the join relation
1836  * 'outerrel' is the outer join relation
1837  * 'innerrel' is the inner join relation
1838  * 'jointype' is the type of join to do
1839  * 'extra' contains additional input values
1840  * 'inner_cheapest_total' cheapest total path for innerrel
1841  */
1842 static void
1844  RelOptInfo *joinrel,
1845  RelOptInfo *outerrel,
1846  RelOptInfo *innerrel,
1847  JoinType jointype,
1848  JoinPathExtraData *extra,
1849  Path *inner_cheapest_total)
1850 {
1851  ListCell *lc1;
1852 
1853  /* generate merge join path for each partial outer path */
1854  foreach(lc1, outerrel->partial_pathlist)
1855  {
1856  Path *outerpath = (Path *) lfirst(lc1);
1857  List *merge_pathkeys;
1858 
1859  /*
1860  * Figure out what useful ordering any paths we create will have.
1861  */
1862  merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1863  outerpath->pathkeys);
1864 
1865  generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1866  extra, false, inner_cheapest_total,
1867  merge_pathkeys, true);
1868  }
1869 }
1870 
1871 /*
1872  * consider_parallel_nestloop
1873  * Try to build partial paths for a joinrel by joining a partial path for the
1874  * outer relation to a complete path for the inner relation.
1875  *
1876  * 'joinrel' is the join relation
1877  * 'outerrel' is the outer join relation
1878  * 'innerrel' is the inner join relation
1879  * 'jointype' is the type of join to do
1880  * 'extra' contains additional input values
1881  */
1882 static void
1884  RelOptInfo *joinrel,
1885  RelOptInfo *outerrel,
1886  RelOptInfo *innerrel,
1887  JoinType jointype,
1888  JoinPathExtraData *extra)
1889 {
1890  JoinType save_jointype = jointype;
1891  ListCell *lc1;
1892 
1893  if (jointype == JOIN_UNIQUE_INNER)
1894  jointype = JOIN_INNER;
1895 
1896  foreach(lc1, outerrel->partial_pathlist)
1897  {
1898  Path *outerpath = (Path *) lfirst(lc1);
1899  List *pathkeys;
1900  ListCell *lc2;
1901 
1902  /* Figure out what useful ordering any paths we create will have. */
1903  pathkeys = build_join_pathkeys(root, joinrel, jointype,
1904  outerpath->pathkeys);
1905 
1906  /*
1907  * Try the cheapest parameterized paths; only those which will produce
1908  * an unparameterized path when joined to this outerrel will survive
1909  * try_partial_nestloop_path. The cheapest unparameterized path is
1910  * also in this list.
1911  */
1912  foreach(lc2, innerrel->cheapest_parameterized_paths)
1913  {
1914  Path *innerpath = (Path *) lfirst(lc2);
1915  Path *mpath;
1916 
1917  /* Can't join to an inner path that is not parallel-safe */
1918  if (!innerpath->parallel_safe)
1919  continue;
1920 
1921  /*
1922  * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1923  * cheapest_total_path, and we have to unique-ify it. (We might
1924  * be able to relax this to allow other safe, unparameterized
1925  * inner paths, but right now create_unique_path is not on board
1926  * with that.)
1927  */
1928  if (save_jointype == JOIN_UNIQUE_INNER)
1929  {
1930  if (innerpath != innerrel->cheapest_total_path)
1931  continue;
1932  innerpath = (Path *) create_unique_path(root, innerrel,
1933  innerpath,
1934  extra->sjinfo);
1935  Assert(innerpath);
1936  }
1937 
1938  try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1939  pathkeys, jointype, extra);
1940 
1941  /*
1942  * Try generating a memoize path and see if that makes the nested
1943  * loop any cheaper.
1944  */
1945  mpath = get_memoize_path(root, innerrel, outerrel,
1946  innerpath, outerpath, jointype,
1947  extra);
1948  if (mpath != NULL)
1949  try_partial_nestloop_path(root, joinrel, outerpath, mpath,
1950  pathkeys, jointype, extra);
1951  }
1952  }
1953 }
1954 
1955 /*
1956  * hash_inner_and_outer
1957  * Create hashjoin join paths by explicitly hashing both the outer and
1958  * inner keys of each available hash clause.
1959  *
1960  * 'joinrel' is the join relation
1961  * 'outerrel' is the outer join relation
1962  * 'innerrel' is the inner join relation
1963  * 'jointype' is the type of join to do
1964  * 'extra' contains additional input values
1965  */
1966 static void
1968  RelOptInfo *joinrel,
1969  RelOptInfo *outerrel,
1970  RelOptInfo *innerrel,
1971  JoinType jointype,
1972  JoinPathExtraData *extra)
1973 {
1974  JoinType save_jointype = jointype;
1975  bool isouterjoin = IS_OUTER_JOIN(jointype);
1976  List *hashclauses;
1977  ListCell *l;
1978 
1979  /*
1980  * We need to build only one hashclauses list for any given pair of outer
1981  * and inner relations; all of the hashable clauses will be used as keys.
1982  *
1983  * Scan the join's restrictinfo list to find hashjoinable clauses that are
1984  * usable with this pair of sub-relations.
1985  */
1986  hashclauses = NIL;
1987  foreach(l, extra->restrictlist)
1988  {
1989  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1990 
1991  /*
1992  * If processing an outer join, only use its own join clauses for
1993  * hashing. For inner joins we need not be so picky.
1994  */
1995  if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1996  continue;
1997 
1998  if (!restrictinfo->can_join ||
1999  restrictinfo->hashjoinoperator == InvalidOid)
2000  continue; /* not hashjoinable */
2001 
2002  /*
2003  * Check if clause has the form "outer op inner" or "inner op outer".
2004  */
2005  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2006  continue; /* no good for these input relations */
2007 
2008  hashclauses = lappend(hashclauses, restrictinfo);
2009  }
2010 
2011  /* If we found any usable hashclauses, make paths */
2012  if (hashclauses)
2013  {
2014  /*
2015  * We consider both the cheapest-total-cost and cheapest-startup-cost
2016  * outer paths. There's no need to consider any but the
2017  * cheapest-total-cost inner path, however.
2018  */
2019  Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
2020  Path *cheapest_total_outer = outerrel->cheapest_total_path;
2021  Path *cheapest_total_inner = innerrel->cheapest_total_path;
2022 
2023  /*
2024  * If either cheapest-total path is parameterized by the other rel, we
2025  * can't use a hashjoin. (There's no use looking for alternative
2026  * input paths, since these should already be the least-parameterized
2027  * available paths.)
2028  */
2029  if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
2030  PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
2031  return;
2032 
2033  /* Unique-ify if need be; we ignore parameterized possibilities */
2034  if (jointype == JOIN_UNIQUE_OUTER)
2035  {
2036  cheapest_total_outer = (Path *)
2037  create_unique_path(root, outerrel,
2038  cheapest_total_outer, extra->sjinfo);
2039  Assert(cheapest_total_outer);
2040  jointype = JOIN_INNER;
2041  try_hashjoin_path(root,
2042  joinrel,
2043  cheapest_total_outer,
2044  cheapest_total_inner,
2045  hashclauses,
2046  jointype,
2047  extra);
2048  /* no possibility of cheap startup here */
2049  }
2050  else if (jointype == JOIN_UNIQUE_INNER)
2051  {
2052  cheapest_total_inner = (Path *)
2053  create_unique_path(root, innerrel,
2054  cheapest_total_inner, extra->sjinfo);
2055  Assert(cheapest_total_inner);
2056  jointype = JOIN_INNER;
2057  try_hashjoin_path(root,
2058  joinrel,
2059  cheapest_total_outer,
2060  cheapest_total_inner,
2061  hashclauses,
2062  jointype,
2063  extra);
2064  if (cheapest_startup_outer != NULL &&
2065  cheapest_startup_outer != cheapest_total_outer)
2066  try_hashjoin_path(root,
2067  joinrel,
2068  cheapest_startup_outer,
2069  cheapest_total_inner,
2070  hashclauses,
2071  jointype,
2072  extra);
2073  }
2074  else
2075  {
2076  /*
2077  * For other jointypes, we consider the cheapest startup outer
2078  * together with the cheapest total inner, and then consider
2079  * pairings of cheapest-total paths including parameterized ones.
2080  * There is no use in generating parameterized paths on the basis
2081  * of possibly cheap startup cost, so this is sufficient.
2082  */
2083  ListCell *lc1;
2084  ListCell *lc2;
2085 
2086  if (cheapest_startup_outer != NULL)
2087  try_hashjoin_path(root,
2088  joinrel,
2089  cheapest_startup_outer,
2090  cheapest_total_inner,
2091  hashclauses,
2092  jointype,
2093  extra);
2094 
2095  foreach(lc1, outerrel->cheapest_parameterized_paths)
2096  {
2097  Path *outerpath = (Path *) lfirst(lc1);
2098 
2099  /*
2100  * We cannot use an outer path that is parameterized by the
2101  * inner rel.
2102  */
2103  if (PATH_PARAM_BY_REL(outerpath, innerrel))
2104  continue;
2105 
2106  foreach(lc2, innerrel->cheapest_parameterized_paths)
2107  {
2108  Path *innerpath = (Path *) lfirst(lc2);
2109 
2110  /*
2111  * We cannot use an inner path that is parameterized by
2112  * the outer rel, either.
2113  */
2114  if (PATH_PARAM_BY_REL(innerpath, outerrel))
2115  continue;
2116 
2117  if (outerpath == cheapest_startup_outer &&
2118  innerpath == cheapest_total_inner)
2119  continue; /* already tried it */
2120 
2121  try_hashjoin_path(root,
2122  joinrel,
2123  outerpath,
2124  innerpath,
2125  hashclauses,
2126  jointype,
2127  extra);
2128  }
2129  }
2130  }
2131 
2132  /*
2133  * If the joinrel is parallel-safe, we may be able to consider a
2134  * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
2135  * because the outer path will be partial, and therefore we won't be
2136  * able to properly guarantee uniqueness. Similarly, we can't handle
2137  * JOIN_FULL and JOIN_RIGHT, because they can produce false null
2138  * extended rows. Also, the resulting path must not be parameterized.
2139  * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
2140  * Hash, since in that case we're back to a single hash table with a
2141  * single set of match bits for each batch, but that will require
2142  * figuring out a deadlock-free way to wait for the probe to finish.
2143  */
2144  if (joinrel->consider_parallel &&
2145  save_jointype != JOIN_UNIQUE_OUTER &&
2146  save_jointype != JOIN_FULL &&
2147  save_jointype != JOIN_RIGHT &&
2148  outerrel->partial_pathlist != NIL &&
2149  bms_is_empty(joinrel->lateral_relids))
2150  {
2151  Path *cheapest_partial_outer;
2152  Path *cheapest_partial_inner = NULL;
2153  Path *cheapest_safe_inner = NULL;
2154 
2155  cheapest_partial_outer =
2156  (Path *) linitial(outerrel->partial_pathlist);
2157 
2158  /*
2159  * Can we use a partial inner plan too, so that we can build a
2160  * shared hash table in parallel? We can't handle
2161  * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
2162  */
2163  if (innerrel->partial_pathlist != NIL &&
2164  save_jointype != JOIN_UNIQUE_INNER &&
2166  {
2167  cheapest_partial_inner =
2168  (Path *) linitial(innerrel->partial_pathlist);
2169  try_partial_hashjoin_path(root, joinrel,
2170  cheapest_partial_outer,
2171  cheapest_partial_inner,
2172  hashclauses, jointype, extra,
2173  true /* parallel_hash */ );
2174  }
2175 
2176  /*
2177  * Normally, given that the joinrel is parallel-safe, the cheapest
2178  * total inner path will also be parallel-safe, but if not, we'll
2179  * have to search for the cheapest safe, unparameterized inner
2180  * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2181  * inner path.
2182  */
2183  if (cheapest_total_inner->parallel_safe)
2184  cheapest_safe_inner = cheapest_total_inner;
2185  else if (save_jointype != JOIN_UNIQUE_INNER)
2186  cheapest_safe_inner =
2188 
2189  if (cheapest_safe_inner != NULL)
2190  try_partial_hashjoin_path(root, joinrel,
2191  cheapest_partial_outer,
2192  cheapest_safe_inner,
2193  hashclauses, jointype, extra,
2194  false /* parallel_hash */ );
2195  }
2196  }
2197 }
2198 
2199 /*
2200  * select_mergejoin_clauses
2201  * Select mergejoin clauses that are usable for a particular join.
2202  * Returns a list of RestrictInfo nodes for those clauses.
2203  *
2204  * *mergejoin_allowed is normally set to true, but it is set to false if
2205  * this is a right/full join and there are nonmergejoinable join clauses.
2206  * The executor's mergejoin machinery cannot handle such cases, so we have
2207  * to avoid generating a mergejoin plan. (Note that this flag does NOT
2208  * consider whether there are actually any mergejoinable clauses. This is
2209  * correct because in some cases we need to build a clauseless mergejoin.
2210  * Simply returning NIL is therefore not enough to distinguish safe from
2211  * unsafe cases.)
2212  *
2213  * We also mark each selected RestrictInfo to show which side is currently
2214  * being considered as outer. These are transient markings that are only
2215  * good for the duration of the current add_paths_to_joinrel() call!
2216  *
2217  * We examine each restrictinfo clause known for the join to see
2218  * if it is mergejoinable and involves vars from the two sub-relations
2219  * currently of interest.
2220  */
2221 static List *
2223  RelOptInfo *joinrel,
2224  RelOptInfo *outerrel,
2225  RelOptInfo *innerrel,
2226  List *restrictlist,
2227  JoinType jointype,
2228  bool *mergejoin_allowed)
2229 {
2230  List *result_list = NIL;
2231  bool isouterjoin = IS_OUTER_JOIN(jointype);
2232  bool have_nonmergeable_joinclause = false;
2233  ListCell *l;
2234 
2235  foreach(l, restrictlist)
2236  {
2237  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
2238 
2239  /*
2240  * If processing an outer join, only use its own join clauses in the
2241  * merge. For inner joins we can use pushed-down clauses too. (Note:
2242  * we don't set have_nonmergeable_joinclause here because pushed-down
2243  * clauses will become otherquals not joinquals.)
2244  */
2245  if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
2246  continue;
2247 
2248  /* Check that clause is a mergeable operator clause */
2249  if (!restrictinfo->can_join ||
2250  restrictinfo->mergeopfamilies == NIL)
2251  {
2252  /*
2253  * The executor can handle extra joinquals that are constants, but
2254  * not anything else, when doing right/full merge join. (The
2255  * reason to support constants is so we can do FULL JOIN ON
2256  * FALSE.)
2257  */
2258  if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
2259  have_nonmergeable_joinclause = true;
2260  continue; /* not mergejoinable */
2261  }
2262 
2263  /*
2264  * Check if clause has the form "outer op inner" or "inner op outer".
2265  */
2266  if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
2267  {
2268  have_nonmergeable_joinclause = true;
2269  continue; /* no good for these input relations */
2270  }
2271 
2272  /*
2273  * Insist that each side have a non-redundant eclass. This
2274  * restriction is needed because various bits of the planner expect
2275  * that each clause in a merge be associable with some pathkey in a
2276  * canonical pathkey list, but redundant eclasses can't appear in
2277  * canonical sort orderings. (XXX it might be worth relaxing this,
2278  * but not enough time to address it for 8.3.)
2279  *
2280  * Note: it would be bad if this condition failed for an otherwise
2281  * mergejoinable FULL JOIN clause, since that would result in
2282  * undesirable planner failure. I believe that is not possible
2283  * however; a variable involved in a full join could only appear in
2284  * below_outer_join eclasses, which aren't considered redundant.
2285  *
2286  * This case *can* happen for left/right join clauses: the outer-side
2287  * variable could be equated to a constant. Because we will propagate
2288  * that constant across the join clause, the loss of ability to do a
2289  * mergejoin is not really all that big a deal, and so it's not clear
2290  * that improving this is important.
2291  */
2292  update_mergeclause_eclasses(root, restrictinfo);
2293 
2294  if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2295  EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2296  {
2297  have_nonmergeable_joinclause = true;
2298  continue; /* can't handle redundant eclasses */
2299  }
2300 
2301  result_list = lappend(result_list, restrictinfo);
2302  }
2303 
2304  /*
2305  * Report whether mergejoin is allowed (see comment at top of function).
2306  */
2307  switch (jointype)
2308  {
2309  case JOIN_RIGHT:
2310  case JOIN_FULL:
2311  *mergejoin_allowed = !have_nonmergeable_joinclause;
2312  break;
2313  default:
2314  *mergejoin_allowed = true;
2315  break;
2316  }
2317 
2318  return result_list;
2319 }
bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Definition: analyzejoins.c:964
Bitmapset * bms_join(Bitmapset *a, Bitmapset *b)
Definition: bitmapset.c:951
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:315
void bms_free(Bitmapset *a)
Definition: bitmapset.c:208
Bitmapset * bms_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:291
Bitmapset * bms_add_members(Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:795
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:703
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:494
bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:547
#define OidIsValid(objectId)
Definition: c.h:710
bool contain_volatile_functions(Node *clause)
Definition: clauses.c:453
void initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *mergeclauses, Path *outer_path, Path *inner_path, List *outersortkeys, List *innersortkeys, JoinPathExtraData *extra)
Definition: costsize.c:3183
bool enable_memoize
Definition: costsize.c:143
void compute_semi_anti_join_factors(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist, SemiAntiJoinFactors *semifactors)
Definition: costsize.c:4665
bool enable_material
Definition: costsize.c:142
void initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, List *hashclauses, Path *outer_path, Path *inner_path, JoinPathExtraData *extra, bool parallel_hash)
Definition: costsize.c:3741
bool enable_hashjoin
Definition: costsize.c:145
bool enable_mergejoin
Definition: costsize.c:144
void initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace, JoinType jointype, Path *outer_path, Path *inner_path, JoinPathExtraData *extra)
Definition: costsize.c:2902
bool enable_parallel_hash
Definition: costsize.c:150
#define ERROR
Definition: elog.h:33
#define elog(elevel,...)
Definition: elog.h:218
bool ExecMaterializesOutput(NodeTag plantype)
Definition: execAmi.c:637
static void try_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:984
static Path * get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, RelOptInfo *outerrel, Path *inner_path, Path *outer_path, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:503
static bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel)
Definition: joinpath.c:1111
static List * select_mergejoin_clauses(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype, bool *mergejoin_allowed)
Definition: joinpath.c:2222
static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1143
void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, SpecialJoinInfo *sjinfo, List *restrictlist)
Definition: joinpath.c:123
static bool paraminfo_get_equal_hashops(PlannerInfo *root, ParamPathInfo *param_info, RelOptInfo *outerrel, RelOptInfo *innerrel, List **param_exprs, List **operators, bool *binary_mode)
Definition: joinpath.c:378
static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:625
static void consider_parallel_nestloop(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1883
#define PATH_PARAM_BY_PARENT(path, rel)
Definition: joinpath.c:36
set_join_pathlist_hook_type set_join_pathlist_hook
Definition: joinpath.c:30
static void try_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra, bool is_partial)
Definition: joinpath.c:819
static void consider_parallel_mergejoin(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra, Path *inner_cheapest_total)
Definition: joinpath.c:1843
static void generate_mergejoin_paths(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *innerrel, Path *outerpath, JoinType jointype, JoinPathExtraData *extra, bool useallclauses, Path *inner_cheapest_total, List *merge_pathkeys, bool is_partial)
Definition: joinpath.c:1345
static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1967
static void try_partial_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, List *mergeclauses, List *outersortkeys, List *innersortkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:914
#define PATH_PARAM_BY_REL(path, rel)
Definition: joinpath.c:42
static bool allow_star_schema_join(PlannerInfo *root, Relids outerrelids, Relids inner_paramrels)
Definition: joinpath.c:356
static void try_partial_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *hashclauses, JoinType jointype, JoinPathExtraData *extra, bool parallel_hash)
Definition: joinpath.c:1050
static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:1593
static void try_partial_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra)
Definition: joinpath.c:737
bool have_dangerous_phv(PlannerInfo *root, Relids outer_relids, Relids inner_params)
Definition: joinrels.c:1184
Assert(fmt[strlen(fmt) - 1] !='\n')
List * list_truncate(List *list, int new_size)
Definition: list.c:610
List * lappend(List *list, void *datum)
Definition: list.c:336
List * lappend_oid(List *list, Oid datum)
Definition: list.c:372
List * list_copy(const List *oldlist)
Definition: list.c:1532
List * list_delete_nth_cell(List *list, int n)
Definition: list.c:726
void list_free(List *list)
Definition: list.c:1505
List * lcons(void *datum, List *list)
Definition: list.c:474
Oid exprType(const Node *expr)
Definition: nodeFuncs.c:41
#define IsA(nodeptr, _type_)
Definition: nodes.h:589
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:755
JoinType
Definition: nodes.h:707
@ JOIN_SEMI
Definition: nodes.h:726
@ JOIN_FULL
Definition: nodes.h:714
@ JOIN_INNER
Definition: nodes.h:712
@ JOIN_RIGHT
Definition: nodes.h:715
@ JOIN_LEFT
Definition: nodes.h:713
@ JOIN_UNIQUE_OUTER
Definition: nodes.h:733
@ JOIN_UNIQUE_INNER
Definition: nodes.h:734
@ JOIN_ANTI
Definition: nodes.h:727
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1082
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1547
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:404
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1262
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1650
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1375
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1228
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:482
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:324
NestPath * create_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer)
Definition: pathnode.c:2407
Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
Definition: pathnode.c:2374
UniquePath * create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, SpecialJoinInfo *sjinfo)
Definition: pathnode.c:1640
Relids calc_nestloop_required_outer(Relids outerrelids, Relids outer_paramrels, Relids innerrelids, Relids inner_paramrels)
Definition: pathnode.c:2341
Path * reparameterize_path_by_child(PlannerInfo *root, Path *path, RelOptInfo *child_rel)
Definition: pathnode.c:3974
MergePath * create_mergejoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, List *restrict_clauses, List *pathkeys, Relids required_outer, List *mergeclauses, List *outersortkeys, List *innersortkeys)
Definition: pathnode.c:2495
MemoizePath * create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, List *param_exprs, List *hash_operators, bool singlerow, bool binary_mode, double calls)
Definition: pathnode.c:1584
void add_partial_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:749
MaterialPath * create_material_path(RelOptInfo *rel, Path *subpath)
Definition: pathnode.c:1552
HashPath * create_hashjoin_path(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, JoinCostWorkspace *workspace, JoinPathExtraData *extra, Path *outer_path, Path *inner_path, bool parallel_hash, List *restrict_clauses, Relids required_outer, List *hashclauses)
Definition: pathnode.c:2561
bool add_partial_path_precheck(RelOptInfo *parent_rel, Cost total_cost, List *pathkeys)
Definition: pathnode.c:867
void add_path(RelOptInfo *parent_rel, Path *new_path)
Definition: pathnode.c:422
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:71
bool add_path_precheck(RelOptInfo *parent_rel, Cost startup_cost, Cost total_cost, List *pathkeys, Relids required_outer)
Definition: pathnode.c:644
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1008
#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)
Definition: pathnodes.h:2142
@ TOTAL_COST
Definition: pathnodes.h:36
@ STARTUP_COST
Definition: pathnodes.h:36
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1201
@ RELOPT_OTHER_JOINREL
Definition: pathnodes.h:644
void(* set_join_pathlist_hook_type)(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra)
Definition: paths.h:36
#define lfirst(lc)
Definition: pg_list.h:169
static int list_length(const List *l)
Definition: pg_list.h:149
#define NIL
Definition: pg_list.h:65
static ListCell * list_head(const List *l)
Definition: pg_list.h:125
#define linitial(l)
Definition: pg_list.h:174
#define lsecond(l)
Definition: pg_list.h:179
#define foreach_current_index(cell)
Definition: pg_list.h:382
#define InvalidOid
Definition: postgres_ext.h:36
unsigned int Oid
Definition: postgres_ext.h:31
GetForeignJoinPaths_function GetForeignJoinPaths
Definition: fdwapi.h:223
List * mergeclause_list
Definition: pathnodes.h:2533
Relids param_source_rels
Definition: pathnodes.h:2537
SemiAntiJoinFactors semifactors
Definition: pathnodes.h:2536
SpecialJoinInfo * sjinfo
Definition: pathnodes.h:2535
Definition: pg_list.h:51
Definition: nodes.h:539
List * args
Definition: primnodes.h:548
List * ppi_clauses
Definition: pathnodes.h:1143
Relids ppi_req_outer
Definition: pathnodes.h:1141
List * pathkeys
Definition: pathnodes.h:1196
NodeTag pathtype
Definition: pathnodes.h:1180
RelOptInfo * parent
Definition: pathnodes.h:1182
ParamPathInfo * param_info
Definition: pathnodes.h:1185
bool parallel_safe
Definition: pathnodes.h:1188
List * join_info_list
Definition: pathnodes.h:266
Relids all_baserels
Definition: pathnodes.h:210
List * baserestrictinfo
Definition: pathnodes.h:745
Relids relids
Definition: pathnodes.h:681
struct PathTarget * reltarget
Definition: pathnodes.h:692
struct FdwRoutine * fdwroutine
Definition: pathnodes.h:736
List * lateral_vars
Definition: pathnodes.h:716
bool consider_parallel
Definition: pathnodes.h:689
Relids top_parent_relids
Definition: pathnodes.h:756
Relids lateral_relids
Definition: pathnodes.h:706
List * cheapest_parameterized_paths
Definition: pathnodes.h:701
List * pathlist
Definition: pathnodes.h:695
RelOptKind reloptkind
Definition: pathnodes.h:678
struct Path * cheapest_startup_path
Definition: pathnodes.h:698
struct Path * cheapest_total_path
Definition: pathnodes.h:699
List * partial_pathlist
Definition: pathnodes.h:697
Cardinality rows
Definition: pathnodes.h:684
EquivalenceClass * left_ec
Definition: pathnodes.h:2109
bool outer_is_left
Definition: pathnodes.h:2116
Oid right_hasheqoperator
Definition: pathnodes.h:2129
Relids right_relids
Definition: pathnodes.h:2089
Oid left_hasheqoperator
Definition: pathnodes.h:2128
Relids left_relids
Definition: pathnodes.h:2088
Expr * clause
Definition: pathnodes.h:2058
EquivalenceClass * right_ec
Definition: pathnodes.h:2110
Oid hashjoinoperator
Definition: pathnodes.h:2119
List * mergeopfamilies
Definition: pathnodes.h:2106
Relids min_righthand
Definition: pathnodes.h:2256
JoinType jointype
Definition: pathnodes.h:2259
Relids min_lefthand
Definition: pathnodes.h:2255
TypeCacheEntry * lookup_type_cache(Oid type_id, int flags)
Definition: typcache.c:339
#define TYPECACHE_EQ_OPR
Definition: typcache.h:136
#define TYPECACHE_HASH_PROC
Definition: typcache.h:140