PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
analyzejoins.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * analyzejoins.c
4  * Routines for simplifying joins after initial query analysis
5  *
6  * While we do a great deal of join simplification in prep/prepjointree.c,
7  * certain optimizations cannot be performed at that stage for lack of
8  * detailed information about the query. The routines here are invoked
9  * after initsplan.c has done its work, and can do additional join removal
10  * and simplification steps based on the information extracted. The penalty
11  * is that we have to work harder to clean up after ourselves when we modify
12  * the query, since the derived data structures have to be updated too.
13  *
14  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  *
18  * IDENTIFICATION
19  * src/backend/optimizer/plan/analyzejoins.c
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24 
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "utils/lsyscache.h"
34 
35 /* local functions */
36 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 static void remove_rel_from_query(PlannerInfo *root, int relid,
38  Relids joinrelids);
39 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
40 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
41 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
42  List *clause_list);
43 static Oid distinct_col_search(int colno, List *colnos, List *opids);
44 static bool is_innerrel_unique_for(PlannerInfo *root,
45  Relids outerrelids,
46  RelOptInfo *innerrel,
47  JoinType jointype,
48  List *restrictlist);
49 
50 
51 /*
52  * remove_useless_joins
53  * Check for relations that don't actually need to be joined at all,
54  * and remove them from the query.
55  *
56  * We are passed the current joinlist and return the updated list. Other
57  * data structures that have to be updated are accessible via "root".
58  */
59 List *
61 {
62  ListCell *lc;
63 
64  /*
65  * We are only interested in relations that are left-joined to, so we can
66  * scan the join_info_list to find them easily.
67  */
68 restart:
69  foreach(lc, root->join_info_list)
70  {
71  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
72  int innerrelid;
73  int nremoved;
74 
75  /* Skip if not removable */
76  if (!join_is_removable(root, sjinfo))
77  continue;
78 
79  /*
80  * Currently, join_is_removable can only succeed when the sjinfo's
81  * righthand is a single baserel. Remove that rel from the query and
82  * joinlist.
83  */
84  innerrelid = bms_singleton_member(sjinfo->min_righthand);
85 
86  remove_rel_from_query(root, innerrelid,
87  bms_union(sjinfo->min_lefthand,
88  sjinfo->min_righthand));
89 
90  /* We verify that exactly one reference gets removed from joinlist */
91  nremoved = 0;
92  joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
93  if (nremoved != 1)
94  elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
95 
96  /*
97  * We can delete this SpecialJoinInfo from the list too, since it's no
98  * longer of interest.
99  */
100  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
101 
102  /*
103  * Restart the scan. This is necessary to ensure we find all
104  * removable joins independently of ordering of the join_info_list
105  * (note that removal of attr_needed bits may make a join appear
106  * removable that did not before). Also, since we just deleted the
107  * current list cell, we'd have to have some kluge to continue the
108  * list scan anyway.
109  */
110  goto restart;
111  }
112 
113  return joinlist;
114 }
115 
116 /*
117  * clause_sides_match_join
118  * Determine whether a join clause is of the right form to use in this join.
119  *
120  * We already know that the clause is a binary opclause referencing only the
121  * rels in the current join. The point here is to check whether it has the
122  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
123  * rather than mixing outer and inner vars on either side. If it matches,
124  * we set the transient flag outer_is_left to identify which side is which.
125  */
126 static inline bool
128  Relids innerrelids)
129 {
130  if (bms_is_subset(rinfo->left_relids, outerrelids) &&
131  bms_is_subset(rinfo->right_relids, innerrelids))
132  {
133  /* lefthand side is outer */
134  rinfo->outer_is_left = true;
135  return true;
136  }
137  else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
138  bms_is_subset(rinfo->right_relids, outerrelids))
139  {
140  /* righthand side is outer */
141  rinfo->outer_is_left = false;
142  return true;
143  }
144  return false; /* no good for these input relations */
145 }
146 
147 /*
148  * join_is_removable
149  * Check whether we need not perform this special join at all, because
150  * it will just duplicate its left input.
151  *
152  * This is true for a left join for which the join condition cannot match
153  * more than one inner-side row. (There are other possibly interesting
154  * cases, but we don't have the infrastructure to prove them.) We also
155  * have to check that the inner side doesn't generate any variables needed
156  * above the join.
157  */
158 static bool
160 {
161  int innerrelid;
162  RelOptInfo *innerrel;
163  Relids joinrelids;
164  List *clause_list = NIL;
165  ListCell *l;
166  int attroff;
167 
168  /*
169  * Must be a non-delaying left join to a single baserel, else we aren't
170  * going to be able to do anything with it.
171  */
172  if (sjinfo->jointype != JOIN_LEFT ||
173  sjinfo->delay_upper_joins)
174  return false;
175 
176  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
177  return false;
178 
179  innerrel = find_base_rel(root, innerrelid);
180 
181  /*
182  * Before we go to the effort of checking whether any innerrel variables
183  * are needed above the join, make a quick check to eliminate cases in
184  * which we will surely be unable to prove uniqueness of the innerrel.
185  */
186  if (!rel_supports_distinctness(root, innerrel))
187  return false;
188 
189  /* Compute the relid set for the join we are considering */
190  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
191 
192  /*
193  * We can't remove the join if any inner-rel attributes are used above the
194  * join.
195  *
196  * Note that this test only detects use of inner-rel attributes in higher
197  * join conditions and the target list. There might be such attributes in
198  * pushed-down conditions at this join, too. We check that case below.
199  *
200  * As a micro-optimization, it seems better to start with max_attr and
201  * count down rather than starting with min_attr and counting up, on the
202  * theory that the system attributes are somewhat less likely to be wanted
203  * and should be tested last.
204  */
205  for (attroff = innerrel->max_attr - innerrel->min_attr;
206  attroff >= 0;
207  attroff--)
208  {
209  if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
210  return false;
211  }
212 
213  /*
214  * Similarly check that the inner rel isn't needed by any PlaceHolderVars
215  * that will be used above the join. We only need to fail if such a PHV
216  * actually references some inner-rel attributes; but the correct check
217  * for that is relatively expensive, so we first check against ph_eval_at,
218  * which must mention the inner rel if the PHV uses any inner-rel attrs as
219  * non-lateral references. Note that if the PHV's syntactic scope is just
220  * the inner rel, we can't drop the rel even if the PHV is variable-free.
221  */
222  foreach(l, root->placeholder_list)
223  {
224  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
225 
226  if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
227  return false; /* it references innerrel laterally */
228  if (bms_is_subset(phinfo->ph_needed, joinrelids))
229  continue; /* PHV is not used above the join */
230  if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
231  continue; /* it definitely doesn't reference innerrel */
232  if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
233  return false; /* there isn't any other place to eval PHV */
234  if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
235  innerrel->relids))
236  return false; /* it does reference innerrel */
237  }
238 
239  /*
240  * Search for mergejoinable clauses that constrain the inner rel against
241  * either the outer rel or a pseudoconstant. If an operator is
242  * mergejoinable then it behaves like equality for some btree opclass, so
243  * it's what we want. The mergejoinability test also eliminates clauses
244  * containing volatile functions, which we couldn't depend on.
245  */
246  foreach(l, innerrel->joininfo)
247  {
248  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
249 
250  /*
251  * If it's not a join clause for this outer join, we can't use it.
252  * Note that if the clause is pushed-down, then it is logically from
253  * above the outer join, even if it references no other rels (it might
254  * be from WHERE, for example).
255  */
256  if (restrictinfo->is_pushed_down ||
257  !bms_equal(restrictinfo->required_relids, joinrelids))
258  {
259  /*
260  * If such a clause actually references the inner rel then join
261  * removal has to be disallowed. We have to check this despite
262  * the previous attr_needed checks because of the possibility of
263  * pushed-down clauses referencing the rel.
264  */
265  if (bms_is_member(innerrelid, restrictinfo->clause_relids))
266  return false;
267  continue; /* else, ignore; not useful here */
268  }
269 
270  /* Ignore if it's not a mergejoinable clause */
271  if (!restrictinfo->can_join ||
272  restrictinfo->mergeopfamilies == NIL)
273  continue; /* not mergejoinable */
274 
275  /*
276  * Check if clause has the form "outer op inner" or "inner op outer",
277  * and if so mark which side is inner.
278  */
279  if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
280  innerrel->relids))
281  continue; /* no good for these input relations */
282 
283  /* OK, add to list */
284  clause_list = lappend(clause_list, restrictinfo);
285  }
286 
287  /*
288  * Now that we have the relevant equality join clauses, try to prove the
289  * innerrel distinct.
290  */
291  if (rel_is_distinct_for(root, innerrel, clause_list))
292  return true;
293 
294  /*
295  * Some day it would be nice to check for other methods of establishing
296  * distinctness.
297  */
298  return false;
299 }
300 
301 
302 /*
303  * Remove the target relid from the planner's data structures, having
304  * determined that there is no need to include it in the query.
305  *
306  * We are not terribly thorough here. We must make sure that the rel is
307  * no longer treated as a baserel, and that attributes of other baserels
308  * are no longer marked as being needed at joins involving this rel.
309  * Also, join quals involving the rel have to be removed from the joininfo
310  * lists, but only if they belong to the outer join identified by joinrelids.
311  */
312 static void
313 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
314 {
315  RelOptInfo *rel = find_base_rel(root, relid);
316  List *joininfos;
317  Index rti;
318  ListCell *l;
319  ListCell *nextl;
320 
321  /*
322  * Mark the rel as "dead" to show it is no longer part of the join tree.
323  * (Removing it from the baserel array altogether seems too risky.)
324  */
325  rel->reloptkind = RELOPT_DEADREL;
326 
327  /*
328  * Remove references to the rel from other baserels' attr_needed arrays.
329  */
330  for (rti = 1; rti < root->simple_rel_array_size; rti++)
331  {
332  RelOptInfo *otherrel = root->simple_rel_array[rti];
333  int attroff;
334 
335  /* there may be empty slots corresponding to non-baserel RTEs */
336  if (otherrel == NULL)
337  continue;
338 
339  Assert(otherrel->relid == rti); /* sanity check on array */
340 
341  /* no point in processing target rel itself */
342  if (otherrel == rel)
343  continue;
344 
345  for (attroff = otherrel->max_attr - otherrel->min_attr;
346  attroff >= 0;
347  attroff--)
348  {
349  otherrel->attr_needed[attroff] =
350  bms_del_member(otherrel->attr_needed[attroff], relid);
351  }
352  }
353 
354  /*
355  * Likewise remove references from SpecialJoinInfo data structures.
356  *
357  * This is relevant in case the outer join we're deleting is nested inside
358  * other outer joins: the upper joins' relid sets have to be adjusted. The
359  * RHS of the target outer join will be made empty here, but that's OK
360  * since caller will delete that SpecialJoinInfo entirely.
361  */
362  foreach(l, root->join_info_list)
363  {
364  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
365 
366  sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
367  sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
368  sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
369  sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
370  }
371 
372  /*
373  * Likewise remove references from PlaceHolderVar data structures,
374  * removing any no-longer-needed placeholders entirely.
375  *
376  * Removal is a bit tricker than it might seem: we can remove PHVs that
377  * are used at the target rel and/or in the join qual, but not those that
378  * are used at join partner rels or above the join. It's not that easy to
379  * distinguish PHVs used at partner rels from those used in the join qual,
380  * since they will both have ph_needed sets that are subsets of
381  * joinrelids. However, a PHV used at a partner rel could not have the
382  * target rel in ph_eval_at, so we check that while deciding whether to
383  * remove or just update the PHV. There is no corresponding test in
384  * join_is_removable because it doesn't need to distinguish those cases.
385  */
386  for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
387  {
388  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
389 
390  nextl = lnext(l);
391  Assert(!bms_is_member(relid, phinfo->ph_lateral));
392  if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
393  bms_is_member(relid, phinfo->ph_eval_at))
395  phinfo);
396  else
397  {
398  phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
399  Assert(!bms_is_empty(phinfo->ph_eval_at));
400  phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
401  }
402  }
403 
404  /*
405  * Remove any joinquals referencing the rel from the joininfo lists.
406  *
407  * In some cases, a joinqual has to be put back after deleting its
408  * reference to the target rel. This can occur for pseudoconstant and
409  * outerjoin-delayed quals, which can get marked as requiring the rel in
410  * order to force them to be evaluated at or above the join. We can't
411  * just discard them, though. Only quals that logically belonged to the
412  * outer join being discarded should be removed from the query.
413  *
414  * We must make a copy of the rel's old joininfo list before starting the
415  * loop, because otherwise remove_join_clause_from_rels would destroy the
416  * list while we're scanning it.
417  */
418  joininfos = list_copy(rel->joininfo);
419  foreach(l, joininfos)
420  {
421  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
422 
423  remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
424 
425  if (rinfo->is_pushed_down ||
426  !bms_equal(rinfo->required_relids, joinrelids))
427  {
428  /* Recheck that qual doesn't actually reference the target rel */
429  Assert(!bms_is_member(relid, rinfo->clause_relids));
430 
431  /*
432  * The required_relids probably aren't shared with anything else,
433  * but let's copy them just to be sure.
434  */
435  rinfo->required_relids = bms_copy(rinfo->required_relids);
437  relid);
438  distribute_restrictinfo_to_rels(root, rinfo);
439  }
440  }
441 
442  /*
443  * There may be references to the rel in root->fkey_list, but if so,
444  * match_foreign_keys_to_quals() will get rid of them.
445  */
446 }
447 
448 /*
449  * Remove any occurrences of the target relid from a joinlist structure.
450  *
451  * It's easiest to build a whole new list structure, so we handle it that
452  * way. Efficiency is not a big deal here.
453  *
454  * *nremoved is incremented by the number of occurrences removed (there
455  * should be exactly one, but the caller checks that).
456  */
457 static List *
458 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
459 {
460  List *result = NIL;
461  ListCell *jl;
462 
463  foreach(jl, joinlist)
464  {
465  Node *jlnode = (Node *) lfirst(jl);
466 
467  if (IsA(jlnode, RangeTblRef))
468  {
469  int varno = ((RangeTblRef *) jlnode)->rtindex;
470 
471  if (varno == relid)
472  (*nremoved)++;
473  else
474  result = lappend(result, jlnode);
475  }
476  else if (IsA(jlnode, List))
477  {
478  /* Recurse to handle subproblem */
479  List *sublist;
480 
481  sublist = remove_rel_from_joinlist((List *) jlnode,
482  relid, nremoved);
483  /* Avoid including empty sub-lists in the result */
484  if (sublist)
485  result = lappend(result, sublist);
486  }
487  else
488  {
489  elog(ERROR, "unrecognized joinlist node type: %d",
490  (int) nodeTag(jlnode));
491  }
492  }
493 
494  return result;
495 }
496 
497 
498 /*
499  * reduce_unique_semijoins
500  * Check for semijoins that can be simplified to plain inner joins
501  * because the inner relation is provably unique for the join clauses.
502  *
503  * Ideally this would happen during reduce_outer_joins, but we don't have
504  * enough information at that point.
505  *
506  * To perform the strength reduction when applicable, we need only delete
507  * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
508  * bother fixing the join type attributed to it in the query jointree,
509  * since that won't be consulted again.)
510  */
511 void
513 {
514  ListCell *lc;
515  ListCell *next;
516 
517  /*
518  * Scan the join_info_list to find semijoins. We can't use foreach
519  * because we may delete the current cell.
520  */
521  for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
522  {
523  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
524  int innerrelid;
525  RelOptInfo *innerrel;
526  Relids joinrelids;
527  List *restrictlist;
528 
529  next = lnext(lc);
530 
531  /*
532  * Must be a non-delaying semijoin to a single baserel, else we aren't
533  * going to be able to do anything with it. (It's probably not
534  * possible for delay_upper_joins to be set on a semijoin, but we
535  * might as well check.)
536  */
537  if (sjinfo->jointype != JOIN_SEMI ||
538  sjinfo->delay_upper_joins)
539  continue;
540 
541  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
542  continue;
543 
544  innerrel = find_base_rel(root, innerrelid);
545 
546  /*
547  * Before we trouble to run generate_join_implied_equalities, make a
548  * quick check to eliminate cases in which we will surely be unable to
549  * prove uniqueness of the innerrel.
550  */
551  if (!rel_supports_distinctness(root, innerrel))
552  continue;
553 
554  /* Compute the relid set for the join we are considering */
555  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556 
557  /*
558  * Since we're only considering a single-rel RHS, any join clauses it
559  * has must be clauses linking it to the semijoin's min_lefthand. We
560  * can also consider EC-derived join clauses.
561  */
562  restrictlist =
564  joinrelids,
565  sjinfo->min_lefthand,
566  innerrel),
567  innerrel->joininfo);
568 
569  /* Test whether the innerrel is unique for those clauses. */
570  if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
571  JOIN_SEMI, restrictlist, true))
572  continue;
573 
574  /* OK, remove the SpecialJoinInfo from the list. */
575  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
576  }
577 }
578 
579 
580 /*
581  * rel_supports_distinctness
582  * Could the relation possibly be proven distinct on some set of columns?
583  *
584  * This is effectively a pre-checking function for rel_is_distinct_for().
585  * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
586  * with this rel, but it should not expend a lot of cycles. The idea is
587  * that callers can avoid doing possibly-expensive processing to compute
588  * rel_is_distinct_for()'s argument lists if the call could not possibly
589  * succeed.
590  */
591 static bool
593 {
594  /* We only know about baserels ... */
595  if (rel->reloptkind != RELOPT_BASEREL)
596  return false;
597  if (rel->rtekind == RTE_RELATION)
598  {
599  /*
600  * For a plain relation, we only know how to prove uniqueness by
601  * reference to unique indexes. Make sure there's at least one
602  * suitable unique index. It must be immediately enforced, and if
603  * it's a partial index, it must match the query. (Keep these
604  * conditions in sync with relation_has_unique_index_for!)
605  */
606  ListCell *lc;
607 
608  foreach(lc, rel->indexlist)
609  {
610  IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
611 
612  if (ind->unique && ind->immediate &&
613  (ind->indpred == NIL || ind->predOK))
614  return true;
615  }
616  }
617  else if (rel->rtekind == RTE_SUBQUERY)
618  {
619  Query *subquery = root->simple_rte_array[rel->relid]->subquery;
620 
621  /* Check if the subquery has any qualities that support distinctness */
622  if (query_supports_distinctness(subquery))
623  return true;
624  }
625  /* We have no proof rules for any other rtekinds. */
626  return false;
627 }
628 
629 /*
630  * rel_is_distinct_for
631  * Does the relation return only distinct rows according to clause_list?
632  *
633  * clause_list is a list of join restriction clauses involving this rel and
634  * some other one. Return true if no two rows emitted by this rel could
635  * possibly join to the same row of the other rel.
636  *
637  * The caller must have already determined that each condition is a
638  * mergejoinable equality with an expression in this relation on one side, and
639  * an expression not involving this relation on the other. The transient
640  * outer_is_left flag is used to identify which side references this relation:
641  * left side if outer_is_left is false, right side if it is true.
642  *
643  * Note that the passed-in clause_list may be destructively modified! This
644  * is OK for current uses, because the clause_list is built by the caller for
645  * the sole purpose of passing to this function.
646  */
647 static bool
648 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
649 {
650  /*
651  * We could skip a couple of tests here if we assume all callers checked
652  * rel_supports_distinctness first, but it doesn't seem worth taking any
653  * risk for.
654  */
655  if (rel->reloptkind != RELOPT_BASEREL)
656  return false;
657  if (rel->rtekind == RTE_RELATION)
658  {
659  /*
660  * Examine the indexes to see if we have a matching unique index.
661  * relation_has_unique_index_for automatically adds any usable
662  * restriction clauses for the rel, so we needn't do that here.
663  */
664  if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
665  return true;
666  }
667  else if (rel->rtekind == RTE_SUBQUERY)
668  {
669  Index relid = rel->relid;
670  Query *subquery = root->simple_rte_array[relid]->subquery;
671  List *colnos = NIL;
672  List *opids = NIL;
673  ListCell *l;
674 
675  /*
676  * Build the argument lists for query_is_distinct_for: a list of
677  * output column numbers that the query needs to be distinct over, and
678  * a list of equality operators that the output columns need to be
679  * distinct according to.
680  *
681  * (XXX we are not considering restriction clauses attached to the
682  * subquery; is that worth doing?)
683  */
684  foreach(l, clause_list)
685  {
687  Oid op;
688  Var *var;
689 
690  /*
691  * Get the equality operator we need uniqueness according to.
692  * (This might be a cross-type operator and thus not exactly the
693  * same operator the subquery would consider; that's all right
694  * since query_is_distinct_for can resolve such cases.) The
695  * caller's mergejoinability test should have selected only
696  * OpExprs.
697  */
698  op = castNode(OpExpr, rinfo->clause)->opno;
699 
700  /* caller identified the inner side for us */
701  if (rinfo->outer_is_left)
702  var = (Var *) get_rightop(rinfo->clause);
703  else
704  var = (Var *) get_leftop(rinfo->clause);
705 
706  /*
707  * We may ignore any RelabelType node above the operand. (There
708  * won't be more than one, since eval_const_expressions() has been
709  * applied already.)
710  */
711  if (var && IsA(var, RelabelType))
712  var = (Var *) ((RelabelType *) var)->arg;
713 
714  /*
715  * If inner side isn't a Var referencing a subquery output column,
716  * this clause doesn't help us.
717  */
718  if (!var || !IsA(var, Var) ||
719  var->varno != relid || var->varlevelsup != 0)
720  continue;
721 
722  colnos = lappend_int(colnos, var->varattno);
723  opids = lappend_oid(opids, op);
724  }
725 
726  if (query_is_distinct_for(subquery, colnos, opids))
727  return true;
728  }
729  return false;
730 }
731 
732 
733 /*
734  * query_supports_distinctness - could the query possibly be proven distinct
735  * on some set of output columns?
736  *
737  * This is effectively a pre-checking function for query_is_distinct_for().
738  * It must return TRUE if query_is_distinct_for() could possibly return TRUE
739  * with this query, but it should not expend a lot of cycles. The idea is
740  * that callers can avoid doing possibly-expensive processing to compute
741  * query_is_distinct_for()'s argument lists if the call could not possibly
742  * succeed.
743  */
744 bool
746 {
747  /* we don't cope with SRFs, see comment below */
748  if (query->hasTargetSRFs)
749  return false;
750 
751  /* check for features we can prove distinctness with */
752  if (query->distinctClause != NIL ||
753  query->groupClause != NIL ||
754  query->groupingSets != NIL ||
755  query->hasAggs ||
756  query->havingQual ||
757  query->setOperations)
758  return true;
759 
760  return false;
761 }
762 
763 /*
764  * query_is_distinct_for - does query never return duplicates of the
765  * specified columns?
766  *
767  * query is a not-yet-planned subquery (in current usage, it's always from
768  * a subquery RTE, which the planner avoids scribbling on).
769  *
770  * colnos is an integer list of output column numbers (resno's). We are
771  * interested in whether rows consisting of just these columns are certain
772  * to be distinct. "Distinctness" is defined according to whether the
773  * corresponding upper-level equality operators listed in opids would think
774  * the values are distinct. (Note: the opids entries could be cross-type
775  * operators, and thus not exactly the equality operators that the subquery
776  * would use itself. We use equality_ops_are_compatible() to check
777  * compatibility. That looks at btree or hash opfamily membership, and so
778  * should give trustworthy answers for all operators that we might need
779  * to deal with here.)
780  */
781 bool
782 query_is_distinct_for(Query *query, List *colnos, List *opids)
783 {
784  ListCell *l;
785  Oid opid;
786 
787  Assert(list_length(colnos) == list_length(opids));
788 
789  /*
790  * A set-returning function in the query's targetlist can result in
791  * returning duplicate rows, if the SRF is evaluated after the
792  * de-duplication step; so we play it safe and say "no" if there are any
793  * SRFs. (We could be certain that it's okay if SRFs appear only in the
794  * specified columns, since those must be evaluated before de-duplication;
795  * but it doesn't presently seem worth the complication to check that.)
796  */
797  if (query->hasTargetSRFs)
798  return false;
799 
800  /*
801  * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
802  * columns in the DISTINCT clause appear in colnos and operator semantics
803  * match.
804  */
805  if (query->distinctClause)
806  {
807  foreach(l, query->distinctClause)
808  {
809  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
811  query->targetList);
812 
813  opid = distinct_col_search(tle->resno, colnos, opids);
814  if (!OidIsValid(opid) ||
815  !equality_ops_are_compatible(opid, sgc->eqop))
816  break; /* exit early if no match */
817  }
818  if (l == NULL) /* had matches for all? */
819  return true;
820  }
821 
822  /*
823  * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
824  * the grouped columns appear in colnos and operator semantics match.
825  */
826  if (query->groupClause && !query->groupingSets)
827  {
828  foreach(l, query->groupClause)
829  {
830  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
832  query->targetList);
833 
834  opid = distinct_col_search(tle->resno, colnos, opids);
835  if (!OidIsValid(opid) ||
836  !equality_ops_are_compatible(opid, sgc->eqop))
837  break; /* exit early if no match */
838  }
839  if (l == NULL) /* had matches for all? */
840  return true;
841  }
842  else if (query->groupingSets)
843  {
844  /*
845  * If we have grouping sets with expressions, we probably don't have
846  * uniqueness and analysis would be hard. Punt.
847  */
848  if (query->groupClause)
849  return false;
850 
851  /*
852  * If we have no groupClause (therefore no grouping expressions), we
853  * might have one or many empty grouping sets. If there's just one,
854  * then we're returning only one row and are certainly unique. But
855  * otherwise, we know we're certainly not unique.
856  */
857  if (list_length(query->groupingSets) == 1 &&
858  ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
859  return true;
860  else
861  return false;
862  }
863  else
864  {
865  /*
866  * If we have no GROUP BY, but do have aggregates or HAVING, then the
867  * result is at most one row so it's surely unique, for any operators.
868  */
869  if (query->hasAggs || query->havingQual)
870  return true;
871  }
872 
873  /*
874  * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
875  * except with ALL.
876  */
877  if (query->setOperations)
878  {
880 
881  Assert(topop->op != SETOP_NONE);
882 
883  if (!topop->all)
884  {
885  ListCell *lg;
886 
887  /* We're good if all the nonjunk output columns are in colnos */
888  lg = list_head(topop->groupClauses);
889  foreach(l, query->targetList)
890  {
891  TargetEntry *tle = (TargetEntry *) lfirst(l);
892  SortGroupClause *sgc;
893 
894  if (tle->resjunk)
895  continue; /* ignore resjunk columns */
896 
897  /* non-resjunk columns should have grouping clauses */
898  Assert(lg != NULL);
899  sgc = (SortGroupClause *) lfirst(lg);
900  lg = lnext(lg);
901 
902  opid = distinct_col_search(tle->resno, colnos, opids);
903  if (!OidIsValid(opid) ||
904  !equality_ops_are_compatible(opid, sgc->eqop))
905  break; /* exit early if no match */
906  }
907  if (l == NULL) /* had matches for all? */
908  return true;
909  }
910  }
911 
912  /*
913  * XXX Are there any other cases in which we can easily see the result
914  * must be distinct?
915  *
916  * If you do add more smarts to this function, be sure to update
917  * query_supports_distinctness() to match.
918  */
919 
920  return false;
921 }
922 
923 /*
924  * distinct_col_search - subroutine for query_is_distinct_for
925  *
926  * If colno is in colnos, return the corresponding element of opids,
927  * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
928  * but if it does, we arbitrarily select the first match.)
929  */
930 static Oid
931 distinct_col_search(int colno, List *colnos, List *opids)
932 {
933  ListCell *lc1,
934  *lc2;
935 
936  forboth(lc1, colnos, lc2, opids)
937  {
938  if (colno == lfirst_int(lc1))
939  return lfirst_oid(lc2);
940  }
941  return InvalidOid;
942 }
943 
944 
945 /*
946  * innerrel_is_unique
947  * Check if the innerrel provably contains at most one tuple matching any
948  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
949  *
950  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
951  * identify the outerrel by its Relids. This asymmetry supports use of this
952  * function before joinrels have been built.
953  *
954  * The proof must be made based only on clauses that will be "joinquals"
955  * rather than "otherquals" at execution. For an inner join there's no
956  * difference; but if the join is outer, we must ignore pushed-down quals,
957  * as those will become "otherquals". Note that this means the answer might
958  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
959  * answer without regard to that, callers must take care not to call this
960  * with jointypes that would be classified differently by IS_OUTER_JOIN().
961  *
962  * The actual proof is undertaken by is_innerrel_unique_for(); this function
963  * is a frontend that is mainly concerned with caching the answers.
964  * In particular, the force_cache argument allows overriding the internal
965  * heuristic about whether to cache negative answers; it should be "true"
966  * if making an inquiry that is not part of the normal bottom-up join search
967  * sequence.
968  */
969 bool
971  Relids outerrelids,
972  RelOptInfo *innerrel,
973  JoinType jointype,
974  List *restrictlist,
975  bool force_cache)
976 {
977  MemoryContext old_context;
978  ListCell *lc;
979 
980  /* Certainly can't prove uniqueness when there are no joinclauses */
981  if (restrictlist == NIL)
982  return false;
983 
984  /*
985  * Make a quick check to eliminate cases in which we will surely be unable
986  * to prove uniqueness of the innerrel.
987  */
988  if (!rel_supports_distinctness(root, innerrel))
989  return false;
990 
991  /*
992  * Query the cache to see if we've managed to prove that innerrel is
993  * unique for any subset of this outerrel. We don't need an exact match,
994  * as extra outerrels can't make the innerrel any less unique (or more
995  * formally, the restrictlist for a join to a superset outerrel must be a
996  * superset of the conditions we successfully used before).
997  */
998  foreach(lc, innerrel->unique_for_rels)
999  {
1000  Relids unique_for_rels = (Relids) lfirst(lc);
1001 
1002  if (bms_is_subset(unique_for_rels, outerrelids))
1003  return true; /* Success! */
1004  }
1005 
1006  /*
1007  * Conversely, we may have already determined that this outerrel, or some
1008  * superset thereof, cannot prove this innerrel to be unique.
1009  */
1010  foreach(lc, innerrel->non_unique_for_rels)
1011  {
1012  Relids unique_for_rels = (Relids) lfirst(lc);
1013 
1014  if (bms_is_subset(outerrelids, unique_for_rels))
1015  return false;
1016  }
1017 
1018  /* No cached information, so try to make the proof. */
1019  if (is_innerrel_unique_for(root, outerrelids, innerrel,
1020  jointype, restrictlist))
1021  {
1022  /*
1023  * Cache the positive result for future probes, being sure to keep it
1024  * in the planner_cxt even if we are working in GEQO.
1025  *
1026  * Note: one might consider trying to isolate the minimal subset of
1027  * the outerrels that proved the innerrel unique. But it's not worth
1028  * the trouble, because the planner builds up joinrels incrementally
1029  * and so we'll see the minimally sufficient outerrels before any
1030  * supersets of them anyway.
1031  */
1032  old_context = MemoryContextSwitchTo(root->planner_cxt);
1033  innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1034  bms_copy(outerrelids));
1035  MemoryContextSwitchTo(old_context);
1036 
1037  return true; /* Success! */
1038  }
1039  else
1040  {
1041  /*
1042  * None of the join conditions for outerrel proved innerrel unique, so
1043  * we can safely reject this outerrel or any subset of it in future
1044  * checks.
1045  *
1046  * However, in normal planning mode, caching this knowledge is totally
1047  * pointless; it won't be queried again, because we build up joinrels
1048  * from smaller to larger. It is useful in GEQO mode, where the
1049  * knowledge can be carried across successive planning attempts; and
1050  * it's likely to be useful when using join-search plugins, too. Hence
1051  * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1052  * but it seems reasonable.)
1053  *
1054  * Also, allow callers to override that heuristic and force caching;
1055  * that's useful for reduce_unique_semijoins, which calls here before
1056  * the normal join search starts.
1057  */
1058  if (force_cache || root->join_search_private)
1059  {
1060  old_context = MemoryContextSwitchTo(root->planner_cxt);
1061  innerrel->non_unique_for_rels =
1062  lappend(innerrel->non_unique_for_rels,
1063  bms_copy(outerrelids));
1064  MemoryContextSwitchTo(old_context);
1065  }
1066 
1067  return false;
1068  }
1069 }
1070 
1071 /*
1072  * is_innerrel_unique_for
1073  * Check if the innerrel provably contains at most one tuple matching any
1074  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1075  */
1076 static bool
1078  Relids outerrelids,
1079  RelOptInfo *innerrel,
1080  JoinType jointype,
1081  List *restrictlist)
1082 {
1083  List *clause_list = NIL;
1084  ListCell *lc;
1085 
1086  /*
1087  * Search for mergejoinable clauses that constrain the inner rel against
1088  * the outer rel. If an operator is mergejoinable then it behaves like
1089  * equality for some btree opclass, so it's what we want. The
1090  * mergejoinability test also eliminates clauses containing volatile
1091  * functions, which we couldn't depend on.
1092  */
1093  foreach(lc, restrictlist)
1094  {
1095  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1096 
1097  /*
1098  * As noted above, if it's a pushed-down clause and we're at an outer
1099  * join, we can't use it.
1100  */
1101  if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype))
1102  continue;
1103 
1104  /* Ignore if it's not a mergejoinable clause */
1105  if (!restrictinfo->can_join ||
1106  restrictinfo->mergeopfamilies == NIL)
1107  continue; /* not mergejoinable */
1108 
1109  /*
1110  * Check if clause has the form "outer op inner" or "inner op outer",
1111  * and if so mark which side is inner.
1112  */
1113  if (!clause_sides_match_join(restrictinfo, outerrelids,
1114  innerrel->relids))
1115  continue; /* no good for these input relations */
1116 
1117  /* OK, add to list */
1118  clause_list = lappend(clause_list, restrictinfo);
1119  }
1120 
1121  /* Let rel_is_distinct_for() do the hard work */
1122  return rel_is_distinct_for(root, innerrel, clause_list);
1123 }
#define NIL
Definition: pg_list.h:69
Relids ph_needed
Definition: relation.h:2156
List * unique_for_rels
Definition: relation.h:640
#define IsA(nodeptr, _type_)
Definition: nodes.h:561
bool query_is_distinct_for(Query *query, List *colnos, List *opids)
Definition: analyzejoins.c:782
void remove_join_clause_from_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids)
Definition: joininfo.c:122
Index varlevelsup
Definition: primnodes.h:173
TargetEntry * get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:370
bool predOK
Definition: relation.h:752
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
void reduce_unique_semijoins(PlannerInfo *root)
Definition: analyzejoins.c:512
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:111
Relids ph_eval_at
Definition: relation.h:2154
PlaceHolderVar * ph_var
Definition: relation.h:2153
RelOptKind reloptkind
Definition: relation.h:582
static int32 next
Definition: blutils.c:210
Relids * attr_needed
Definition: relation.h:618
List * join_info_list
Definition: relation.h:250
Relids required_relids
Definition: relation.h:1853
Relids min_righthand
Definition: relation.h:2008
void * join_search_private
Definition: relation.h:319
#define castNode(_type_, nodeptr)
Definition: nodes.h:579
static Oid distinct_col_search(int colno, List *colnos, List *opids)
Definition: analyzejoins.c:931
bool hasAggs
Definition: parsenodes.h:123
Relids clause_relids
Definition: relation.h:1850
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:723
List * groupingSets
Definition: parsenodes.h:148
List * list_copy(const List *oldlist)
Definition: list.c:1160
Definition: nodes.h:510
Relids left_relids
Definition: relation.h:1862
AttrNumber varattno
Definition: primnodes.h:168
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:569
List * list_concat(List *list1, List *list2)
Definition: list.c:321
static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
Definition: analyzejoins.c:313
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
bool immediate
Definition: relation.h:754
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:163
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
#define OidIsValid(objectId)
Definition: c.h:532
List * mergeopfamilies
Definition: relation.h:1880
Relids syn_lefthand
Definition: relation.h:2009
JoinType
Definition: nodes.h:674
List * targetList
Definition: parsenodes.h:138
struct RelOptInfo ** simple_rel_array
Definition: relation.h:179
bool unique
Definition: relation.h:753
Relids syn_righthand
Definition: relation.h:2010
void distribute_restrictinfo_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: initsplan.c:2227
bool resjunk
Definition: primnodes.h:1375
#define linitial(l)
Definition: pg_list.h:111
List * distinctClause
Definition: parsenodes.h:154
#define ERROR
Definition: elog.h:43
Expr * phexpr
Definition: relation.h:1940
#define lfirst_int(lc)
Definition: pg_list.h:107
List * generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel)
Definition: equivclass.c:1071
Node * get_leftop(const Expr *clause)
Definition: clauses.c:199
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
Definition: analyzejoins.c:648
bool can_join
Definition: relation.h:1841
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:308
#define lfirst_node(type, lc)
Definition: pg_list.h:109
List * joininfo
Definition: relation.h:649
bool outer_is_left
Definition: relation.h:1890
AttrNumber resno
Definition: primnodes.h:1369
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
Definition: analyzejoins.c:159
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
static bool clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, Relids innerrelids)
Definition: analyzejoins.c:127
Relids relids
Definition: relation.h:585
int simple_rel_array_size
Definition: relation.h:180
#define lnext(lc)
Definition: pg_list.h:105
List * non_unique_for_rels
Definition: relation.h:642
Relids pull_varnos(Node *node)
Definition: var.c:95
List * lappend_int(List *list, int datum)
Definition: list.c:146
Index relid
Definition: relation.h:613
Bitmapset * Relids
Definition: relation.h:28
bool innerrel_is_unique(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Definition: analyzejoins.c:970
List * lappend(List *list, void *datum)
Definition: list.c:128
RangeTblEntry ** simple_rte_array
Definition: relation.h:188
Expr * clause
Definition: relation.h:1835
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:663
Index varno
Definition: primnodes.h:166
bool delay_upper_joins
Definition: relation.h:2013
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:526
Relids ph_lateral
Definition: relation.h:2155
unsigned int Index
Definition: c.h:359
RTEKind rtekind
Definition: relation.h:615
List * indexlist
Definition: relation.h:622
#define InvalidOid
Definition: postgres_ext.h:36
bool is_pushed_down
Definition: relation.h:1837
bool hasTargetSRFs
Definition: parsenodes.h:125
Relids right_relids
Definition: relation.h:1863
#define Assert(condition)
Definition: c.h:681
#define lfirst(lc)
Definition: pg_list.h:106
bool equality_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:695
List * remove_useless_joins(PlannerInfo *root, List *joinlist)
Definition: analyzejoins.c:60
JoinType jointype
Definition: relation.h:2011
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:218
static int list_length(const List *l)
Definition: pg_list.h:89
SetOperation op
Definition: parsenodes.h:1574
bool query_supports_distinctness(Query *query)
Definition: analyzejoins.c:745
#define nodeTag(nodeptr)
Definition: nodes.h:515
Node * get_rightop(const Expr *clause)
Definition: clauses.c:216
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:443
Node * setOperations
Definition: parsenodes.h:163
Query * subquery
Definition: parsenodes.h:968
List * groupClause
Definition: parsenodes.h:146
AttrNumber max_attr
Definition: relation.h:617
List * placeholder_list
Definition: relation.h:258
static List * remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
Definition: analyzejoins.c:458
static bool is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist)
MemoryContext planner_cxt
Definition: relation.h:290
#define elog
Definition: elog.h:219
bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist)
Definition: indxpath.c:2960
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:277
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
Definition: analyzejoins.c:592
Node * havingQual
Definition: parsenodes.h:150
List * indpred
Definition: relation.h:742
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:735
Definition: pg_list.h:45
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:420
Relids min_lefthand
Definition: relation.h:2007
#define lfirst_oid(lc)
Definition: pg_list.h:108
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:131
AttrNumber min_attr
Definition: relation.h:616