PostgreSQL Source Code  git master
analyzejoins.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * analyzejoins.c
4  * Routines for simplifying joins after initial query analysis
5  *
6  * While we do a great deal of join simplification in prep/prepjointree.c,
7  * certain optimizations cannot be performed at that stage for lack of
8  * detailed information about the query. The routines here are invoked
9  * after initsplan.c has done its work, and can do additional join removal
10  * and simplification steps based on the information extracted. The penalty
11  * is that we have to work harder to clean up after ourselves when we modify
12  * the query, since the derived data structures have to be updated too.
13  *
14  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  *
18  * IDENTIFICATION
19  * src/backend/optimizer/plan/analyzejoins.c
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24 
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "utils/lsyscache.h"
34 
35 /* local functions */
36 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 static void remove_rel_from_query(PlannerInfo *root, int relid,
38  Relids joinrelids);
39 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
40 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
41 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
42  List *clause_list);
43 static Oid distinct_col_search(int colno, List *colnos, List *opids);
44 static bool is_innerrel_unique_for(PlannerInfo *root,
45  Relids joinrelids,
46  Relids outerrelids,
47  RelOptInfo *innerrel,
48  JoinType jointype,
49  List *restrictlist);
50 
51 
52 /*
53  * remove_useless_joins
54  * Check for relations that don't actually need to be joined at all,
55  * and remove them from the query.
56  *
57  * We are passed the current joinlist and return the updated list. Other
58  * data structures that have to be updated are accessible via "root".
59  */
60 List *
62 {
63  ListCell *lc;
64 
65  /*
66  * We are only interested in relations that are left-joined to, so we can
67  * scan the join_info_list to find them easily.
68  */
69 restart:
70  foreach(lc, root->join_info_list)
71  {
72  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
73  int innerrelid;
74  int nremoved;
75 
76  /* Skip if not removable */
77  if (!join_is_removable(root, sjinfo))
78  continue;
79 
80  /*
81  * Currently, join_is_removable can only succeed when the sjinfo's
82  * righthand is a single baserel. Remove that rel from the query and
83  * joinlist.
84  */
85  innerrelid = bms_singleton_member(sjinfo->min_righthand);
86 
87  remove_rel_from_query(root, innerrelid,
88  bms_union(sjinfo->min_lefthand,
89  sjinfo->min_righthand));
90 
91  /* We verify that exactly one reference gets removed from joinlist */
92  nremoved = 0;
93  joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
94  if (nremoved != 1)
95  elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
96 
97  /*
98  * We can delete this SpecialJoinInfo from the list too, since it's no
99  * longer of interest.
100  */
101  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
102 
103  /*
104  * Restart the scan. This is necessary to ensure we find all
105  * removable joins independently of ordering of the join_info_list
106  * (note that removal of attr_needed bits may make a join appear
107  * removable that did not before). Also, since we just deleted the
108  * current list cell, we'd have to have some kluge to continue the
109  * list scan anyway.
110  */
111  goto restart;
112  }
113 
114  return joinlist;
115 }
116 
117 /*
118  * clause_sides_match_join
119  * Determine whether a join clause is of the right form to use in this join.
120  *
121  * We already know that the clause is a binary opclause referencing only the
122  * rels in the current join. The point here is to check whether it has the
123  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
124  * rather than mixing outer and inner vars on either side. If it matches,
125  * we set the transient flag outer_is_left to identify which side is which.
126  */
127 static inline bool
129  Relids innerrelids)
130 {
131  if (bms_is_subset(rinfo->left_relids, outerrelids) &&
132  bms_is_subset(rinfo->right_relids, innerrelids))
133  {
134  /* lefthand side is outer */
135  rinfo->outer_is_left = true;
136  return true;
137  }
138  else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
139  bms_is_subset(rinfo->right_relids, outerrelids))
140  {
141  /* righthand side is outer */
142  rinfo->outer_is_left = false;
143  return true;
144  }
145  return false; /* no good for these input relations */
146 }
147 
148 /*
149  * join_is_removable
150  * Check whether we need not perform this special join at all, because
151  * it will just duplicate its left input.
152  *
153  * This is true for a left join for which the join condition cannot match
154  * more than one inner-side row. (There are other possibly interesting
155  * cases, but we don't have the infrastructure to prove them.) We also
156  * have to check that the inner side doesn't generate any variables needed
157  * above the join.
158  */
159 static bool
161 {
162  int innerrelid;
163  RelOptInfo *innerrel;
164  Relids joinrelids;
165  List *clause_list = NIL;
166  ListCell *l;
167  int attroff;
168 
169  /*
170  * Must be a non-delaying left join to a single baserel, else we aren't
171  * going to be able to do anything with it.
172  */
173  if (sjinfo->jointype != JOIN_LEFT ||
174  sjinfo->delay_upper_joins)
175  return false;
176 
177  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
178  return false;
179 
180  innerrel = find_base_rel(root, innerrelid);
181 
182  /*
183  * Before we go to the effort of checking whether any innerrel variables
184  * are needed above the join, make a quick check to eliminate cases in
185  * which we will surely be unable to prove uniqueness of the innerrel.
186  */
187  if (!rel_supports_distinctness(root, innerrel))
188  return false;
189 
190  /* Compute the relid set for the join we are considering */
191  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
192 
193  /*
194  * We can't remove the join if any inner-rel attributes are used above the
195  * join.
196  *
197  * Note that this test only detects use of inner-rel attributes in higher
198  * join conditions and the target list. There might be such attributes in
199  * pushed-down conditions at this join, too. We check that case below.
200  *
201  * As a micro-optimization, it seems better to start with max_attr and
202  * count down rather than starting with min_attr and counting up, on the
203  * theory that the system attributes are somewhat less likely to be wanted
204  * and should be tested last.
205  */
206  for (attroff = innerrel->max_attr - innerrel->min_attr;
207  attroff >= 0;
208  attroff--)
209  {
210  if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
211  return false;
212  }
213 
214  /*
215  * Similarly check that the inner rel isn't needed by any PlaceHolderVars
216  * that will be used above the join. We only need to fail if such a PHV
217  * actually references some inner-rel attributes; but the correct check
218  * for that is relatively expensive, so we first check against ph_eval_at,
219  * which must mention the inner rel if the PHV uses any inner-rel attrs as
220  * non-lateral references. Note that if the PHV's syntactic scope is just
221  * the inner rel, we can't drop the rel even if the PHV is variable-free.
222  */
223  foreach(l, root->placeholder_list)
224  {
225  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
226 
227  if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
228  return false; /* it references innerrel laterally */
229  if (bms_is_subset(phinfo->ph_needed, joinrelids))
230  continue; /* PHV is not used above the join */
231  if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
232  continue; /* it definitely doesn't reference innerrel */
233  if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
234  return false; /* there isn't any other place to eval PHV */
235  if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
236  innerrel->relids))
237  return false; /* it does reference innerrel */
238  }
239 
240  /*
241  * Search for mergejoinable clauses that constrain the inner rel against
242  * either the outer rel or a pseudoconstant. If an operator is
243  * mergejoinable then it behaves like equality for some btree opclass, so
244  * it's what we want. The mergejoinability test also eliminates clauses
245  * containing volatile functions, which we couldn't depend on.
246  */
247  foreach(l, innerrel->joininfo)
248  {
249  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
250 
251  /*
252  * If it's not a join clause for this outer join, we can't use it.
253  * Note that if the clause is pushed-down, then it is logically from
254  * above the outer join, even if it references no other rels (it might
255  * be from WHERE, for example).
256  */
257  if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
258  {
259  /*
260  * If such a clause actually references the inner rel then join
261  * removal has to be disallowed. We have to check this despite
262  * the previous attr_needed checks because of the possibility of
263  * pushed-down clauses referencing the rel.
264  */
265  if (bms_is_member(innerrelid, restrictinfo->clause_relids))
266  return false;
267  continue; /* else, ignore; not useful here */
268  }
269 
270  /* Ignore if it's not a mergejoinable clause */
271  if (!restrictinfo->can_join ||
272  restrictinfo->mergeopfamilies == NIL)
273  continue; /* not mergejoinable */
274 
275  /*
276  * Check if clause has the form "outer op inner" or "inner op outer",
277  * and if so mark which side is inner.
278  */
279  if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
280  innerrel->relids))
281  continue; /* no good for these input relations */
282 
283  /* OK, add to list */
284  clause_list = lappend(clause_list, restrictinfo);
285  }
286 
287  /*
288  * Now that we have the relevant equality join clauses, try to prove the
289  * innerrel distinct.
290  */
291  if (rel_is_distinct_for(root, innerrel, clause_list))
292  return true;
293 
294  /*
295  * Some day it would be nice to check for other methods of establishing
296  * distinctness.
297  */
298  return false;
299 }
300 
301 
302 /*
303  * Remove the target relid from the planner's data structures, having
304  * determined that there is no need to include it in the query.
305  *
306  * We are not terribly thorough here. We must make sure that the rel is
307  * no longer treated as a baserel, and that attributes of other baserels
308  * are no longer marked as being needed at joins involving this rel.
309  * Also, join quals involving the rel have to be removed from the joininfo
310  * lists, but only if they belong to the outer join identified by joinrelids.
311  */
312 static void
313 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
314 {
315  RelOptInfo *rel = find_base_rel(root, relid);
316  List *joininfos;
317  Index rti;
318  ListCell *l;
319  ListCell *nextl;
320 
321  /*
322  * Mark the rel as "dead" to show it is no longer part of the join tree.
323  * (Removing it from the baserel array altogether seems too risky.)
324  */
325  rel->reloptkind = RELOPT_DEADREL;
326 
327  /*
328  * Remove references to the rel from other baserels' attr_needed arrays.
329  */
330  for (rti = 1; rti < root->simple_rel_array_size; rti++)
331  {
332  RelOptInfo *otherrel = root->simple_rel_array[rti];
333  int attroff;
334 
335  /* there may be empty slots corresponding to non-baserel RTEs */
336  if (otherrel == NULL)
337  continue;
338 
339  Assert(otherrel->relid == rti); /* sanity check on array */
340 
341  /* no point in processing target rel itself */
342  if (otherrel == rel)
343  continue;
344 
345  for (attroff = otherrel->max_attr - otherrel->min_attr;
346  attroff >= 0;
347  attroff--)
348  {
349  otherrel->attr_needed[attroff] =
350  bms_del_member(otherrel->attr_needed[attroff], relid);
351  }
352  }
353 
354  /*
355  * Likewise remove references from SpecialJoinInfo data structures.
356  *
357  * This is relevant in case the outer join we're deleting is nested inside
358  * other outer joins: the upper joins' relid sets have to be adjusted. The
359  * RHS of the target outer join will be made empty here, but that's OK
360  * since caller will delete that SpecialJoinInfo entirely.
361  */
362  foreach(l, root->join_info_list)
363  {
364  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
365 
366  sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
367  sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
368  sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
369  sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
370  }
371 
372  /*
373  * Likewise remove references from PlaceHolderVar data structures,
374  * removing any no-longer-needed placeholders entirely.
375  *
376  * Removal is a bit tricker than it might seem: we can remove PHVs that
377  * are used at the target rel and/or in the join qual, but not those that
378  * are used at join partner rels or above the join. It's not that easy to
379  * distinguish PHVs used at partner rels from those used in the join qual,
380  * since they will both have ph_needed sets that are subsets of
381  * joinrelids. However, a PHV used at a partner rel could not have the
382  * target rel in ph_eval_at, so we check that while deciding whether to
383  * remove or just update the PHV. There is no corresponding test in
384  * join_is_removable because it doesn't need to distinguish those cases.
385  */
386  for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
387  {
388  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
389 
390  nextl = lnext(l);
391  Assert(!bms_is_member(relid, phinfo->ph_lateral));
392  if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
393  bms_is_member(relid, phinfo->ph_eval_at))
395  phinfo);
396  else
397  {
398  phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
399  Assert(!bms_is_empty(phinfo->ph_eval_at));
400  phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
401  }
402  }
403 
404  /*
405  * Remove any joinquals referencing the rel from the joininfo lists.
406  *
407  * In some cases, a joinqual has to be put back after deleting its
408  * reference to the target rel. This can occur for pseudoconstant and
409  * outerjoin-delayed quals, which can get marked as requiring the rel in
410  * order to force them to be evaluated at or above the join. We can't
411  * just discard them, though. Only quals that logically belonged to the
412  * outer join being discarded should be removed from the query.
413  *
414  * We must make a copy of the rel's old joininfo list before starting the
415  * loop, because otherwise remove_join_clause_from_rels would destroy the
416  * list while we're scanning it.
417  */
418  joininfos = list_copy(rel->joininfo);
419  foreach(l, joininfos)
420  {
421  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
422 
423  remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
424 
425  if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
426  {
427  /* Recheck that qual doesn't actually reference the target rel */
428  Assert(!bms_is_member(relid, rinfo->clause_relids));
429 
430  /*
431  * The required_relids probably aren't shared with anything else,
432  * but let's copy them just to be sure.
433  */
434  rinfo->required_relids = bms_copy(rinfo->required_relids);
436  relid);
437  distribute_restrictinfo_to_rels(root, rinfo);
438  }
439  }
440 
441  /*
442  * There may be references to the rel in root->fkey_list, but if so,
443  * match_foreign_keys_to_quals() will get rid of them.
444  */
445 }
446 
447 /*
448  * Remove any occurrences of the target relid from a joinlist structure.
449  *
450  * It's easiest to build a whole new list structure, so we handle it that
451  * way. Efficiency is not a big deal here.
452  *
453  * *nremoved is incremented by the number of occurrences removed (there
454  * should be exactly one, but the caller checks that).
455  */
456 static List *
457 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
458 {
459  List *result = NIL;
460  ListCell *jl;
461 
462  foreach(jl, joinlist)
463  {
464  Node *jlnode = (Node *) lfirst(jl);
465 
466  if (IsA(jlnode, RangeTblRef))
467  {
468  int varno = ((RangeTblRef *) jlnode)->rtindex;
469 
470  if (varno == relid)
471  (*nremoved)++;
472  else
473  result = lappend(result, jlnode);
474  }
475  else if (IsA(jlnode, List))
476  {
477  /* Recurse to handle subproblem */
478  List *sublist;
479 
480  sublist = remove_rel_from_joinlist((List *) jlnode,
481  relid, nremoved);
482  /* Avoid including empty sub-lists in the result */
483  if (sublist)
484  result = lappend(result, sublist);
485  }
486  else
487  {
488  elog(ERROR, "unrecognized joinlist node type: %d",
489  (int) nodeTag(jlnode));
490  }
491  }
492 
493  return result;
494 }
495 
496 
497 /*
498  * reduce_unique_semijoins
499  * Check for semijoins that can be simplified to plain inner joins
500  * because the inner relation is provably unique for the join clauses.
501  *
502  * Ideally this would happen during reduce_outer_joins, but we don't have
503  * enough information at that point.
504  *
505  * To perform the strength reduction when applicable, we need only delete
506  * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
507  * bother fixing the join type attributed to it in the query jointree,
508  * since that won't be consulted again.)
509  */
510 void
512 {
513  ListCell *lc;
514  ListCell *next;
515 
516  /*
517  * Scan the join_info_list to find semijoins. We can't use foreach
518  * because we may delete the current cell.
519  */
520  for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
521  {
522  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
523  int innerrelid;
524  RelOptInfo *innerrel;
525  Relids joinrelids;
526  List *restrictlist;
527 
528  next = lnext(lc);
529 
530  /*
531  * Must be a non-delaying semijoin to a single baserel, else we aren't
532  * going to be able to do anything with it. (It's probably not
533  * possible for delay_upper_joins to be set on a semijoin, but we
534  * might as well check.)
535  */
536  if (sjinfo->jointype != JOIN_SEMI ||
537  sjinfo->delay_upper_joins)
538  continue;
539 
540  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
541  continue;
542 
543  innerrel = find_base_rel(root, innerrelid);
544 
545  /*
546  * Before we trouble to run generate_join_implied_equalities, make a
547  * quick check to eliminate cases in which we will surely be unable to
548  * prove uniqueness of the innerrel.
549  */
550  if (!rel_supports_distinctness(root, innerrel))
551  continue;
552 
553  /* Compute the relid set for the join we are considering */
554  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
555 
556  /*
557  * Since we're only considering a single-rel RHS, any join clauses it
558  * has must be clauses linking it to the semijoin's min_lefthand. We
559  * can also consider EC-derived join clauses.
560  */
561  restrictlist =
563  joinrelids,
564  sjinfo->min_lefthand,
565  innerrel),
566  innerrel->joininfo);
567 
568  /* Test whether the innerrel is unique for those clauses. */
569  if (!innerrel_is_unique(root,
570  joinrelids, sjinfo->min_lefthand, innerrel,
571  JOIN_SEMI, restrictlist, true))
572  continue;
573 
574  /* OK, remove the SpecialJoinInfo from the list. */
575  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
576  }
577 }
578 
579 
580 /*
581  * rel_supports_distinctness
582  * Could the relation possibly be proven distinct on some set of columns?
583  *
584  * This is effectively a pre-checking function for rel_is_distinct_for().
585  * It must return true if rel_is_distinct_for() could possibly return true
586  * with this rel, but it should not expend a lot of cycles. The idea is
587  * that callers can avoid doing possibly-expensive processing to compute
588  * rel_is_distinct_for()'s argument lists if the call could not possibly
589  * succeed.
590  */
591 static bool
593 {
594  /* We only know about baserels ... */
595  if (rel->reloptkind != RELOPT_BASEREL)
596  return false;
597  if (rel->rtekind == RTE_RELATION)
598  {
599  /*
600  * For a plain relation, we only know how to prove uniqueness by
601  * reference to unique indexes. Make sure there's at least one
602  * suitable unique index. It must be immediately enforced, and if
603  * it's a partial index, it must match the query. (Keep these
604  * conditions in sync with relation_has_unique_index_for!)
605  */
606  ListCell *lc;
607 
608  foreach(lc, rel->indexlist)
609  {
610  IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
611 
612  if (ind->unique && ind->immediate &&
613  (ind->indpred == NIL || ind->predOK))
614  return true;
615  }
616  }
617  else if (rel->rtekind == RTE_SUBQUERY)
618  {
619  Query *subquery = root->simple_rte_array[rel->relid]->subquery;
620 
621  /* Check if the subquery has any qualities that support distinctness */
622  if (query_supports_distinctness(subquery))
623  return true;
624  }
625  /* We have no proof rules for any other rtekinds. */
626  return false;
627 }
628 
629 /*
630  * rel_is_distinct_for
631  * Does the relation return only distinct rows according to clause_list?
632  *
633  * clause_list is a list of join restriction clauses involving this rel and
634  * some other one. Return true if no two rows emitted by this rel could
635  * possibly join to the same row of the other rel.
636  *
637  * The caller must have already determined that each condition is a
638  * mergejoinable equality with an expression in this relation on one side, and
639  * an expression not involving this relation on the other. The transient
640  * outer_is_left flag is used to identify which side references this relation:
641  * left side if outer_is_left is false, right side if it is true.
642  *
643  * Note that the passed-in clause_list may be destructively modified! This
644  * is OK for current uses, because the clause_list is built by the caller for
645  * the sole purpose of passing to this function.
646  */
647 static bool
648 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
649 {
650  /*
651  * We could skip a couple of tests here if we assume all callers checked
652  * rel_supports_distinctness first, but it doesn't seem worth taking any
653  * risk for.
654  */
655  if (rel->reloptkind != RELOPT_BASEREL)
656  return false;
657  if (rel->rtekind == RTE_RELATION)
658  {
659  /*
660  * Examine the indexes to see if we have a matching unique index.
661  * relation_has_unique_index_for automatically adds any usable
662  * restriction clauses for the rel, so we needn't do that here.
663  */
664  if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
665  return true;
666  }
667  else if (rel->rtekind == RTE_SUBQUERY)
668  {
669  Index relid = rel->relid;
670  Query *subquery = root->simple_rte_array[relid]->subquery;
671  List *colnos = NIL;
672  List *opids = NIL;
673  ListCell *l;
674 
675  /*
676  * Build the argument lists for query_is_distinct_for: a list of
677  * output column numbers that the query needs to be distinct over, and
678  * a list of equality operators that the output columns need to be
679  * distinct according to.
680  *
681  * (XXX we are not considering restriction clauses attached to the
682  * subquery; is that worth doing?)
683  */
684  foreach(l, clause_list)
685  {
687  Oid op;
688  Var *var;
689 
690  /*
691  * Get the equality operator we need uniqueness according to.
692  * (This might be a cross-type operator and thus not exactly the
693  * same operator the subquery would consider; that's all right
694  * since query_is_distinct_for can resolve such cases.) The
695  * caller's mergejoinability test should have selected only
696  * OpExprs.
697  */
698  op = castNode(OpExpr, rinfo->clause)->opno;
699 
700  /* caller identified the inner side for us */
701  if (rinfo->outer_is_left)
702  var = (Var *) get_rightop(rinfo->clause);
703  else
704  var = (Var *) get_leftop(rinfo->clause);
705 
706  /*
707  * We may ignore any RelabelType node above the operand. (There
708  * won't be more than one, since eval_const_expressions() has been
709  * applied already.)
710  */
711  if (var && IsA(var, RelabelType))
712  var = (Var *) ((RelabelType *) var)->arg;
713 
714  /*
715  * If inner side isn't a Var referencing a subquery output column,
716  * this clause doesn't help us.
717  */
718  if (!var || !IsA(var, Var) ||
719  var->varno != relid || var->varlevelsup != 0)
720  continue;
721 
722  colnos = lappend_int(colnos, var->varattno);
723  opids = lappend_oid(opids, op);
724  }
725 
726  if (query_is_distinct_for(subquery, colnos, opids))
727  return true;
728  }
729  return false;
730 }
731 
732 
733 /*
734  * query_supports_distinctness - could the query possibly be proven distinct
735  * on some set of output columns?
736  *
737  * This is effectively a pre-checking function for query_is_distinct_for().
738  * It must return true if query_is_distinct_for() could possibly return true
739  * with this query, but it should not expend a lot of cycles. The idea is
740  * that callers can avoid doing possibly-expensive processing to compute
741  * query_is_distinct_for()'s argument lists if the call could not possibly
742  * succeed.
743  */
744 bool
746 {
747  /* SRFs break distinctness except with DISTINCT, see below */
748  if (query->hasTargetSRFs && query->distinctClause == NIL)
749  return false;
750 
751  /* check for features we can prove distinctness with */
752  if (query->distinctClause != NIL ||
753  query->groupClause != NIL ||
754  query->groupingSets != NIL ||
755  query->hasAggs ||
756  query->havingQual ||
757  query->setOperations)
758  return true;
759 
760  return false;
761 }
762 
763 /*
764  * query_is_distinct_for - does query never return duplicates of the
765  * specified columns?
766  *
767  * query is a not-yet-planned subquery (in current usage, it's always from
768  * a subquery RTE, which the planner avoids scribbling on).
769  *
770  * colnos is an integer list of output column numbers (resno's). We are
771  * interested in whether rows consisting of just these columns are certain
772  * to be distinct. "Distinctness" is defined according to whether the
773  * corresponding upper-level equality operators listed in opids would think
774  * the values are distinct. (Note: the opids entries could be cross-type
775  * operators, and thus not exactly the equality operators that the subquery
776  * would use itself. We use equality_ops_are_compatible() to check
777  * compatibility. That looks at btree or hash opfamily membership, and so
778  * should give trustworthy answers for all operators that we might need
779  * to deal with here.)
780  */
781 bool
782 query_is_distinct_for(Query *query, List *colnos, List *opids)
783 {
784  ListCell *l;
785  Oid opid;
786 
787  Assert(list_length(colnos) == list_length(opids));
788 
789  /*
790  * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
791  * columns in the DISTINCT clause appear in colnos and operator semantics
792  * match. This is true even if there are SRFs in the DISTINCT columns or
793  * elsewhere in the tlist.
794  */
795  if (query->distinctClause)
796  {
797  foreach(l, query->distinctClause)
798  {
799  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
801  query->targetList);
802 
803  opid = distinct_col_search(tle->resno, colnos, opids);
804  if (!OidIsValid(opid) ||
805  !equality_ops_are_compatible(opid, sgc->eqop))
806  break; /* exit early if no match */
807  }
808  if (l == NULL) /* had matches for all? */
809  return true;
810  }
811 
812  /*
813  * Otherwise, a set-returning function in the query's targetlist can
814  * result in returning duplicate rows, despite any grouping that might
815  * occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
816  * columns, it would be safe because they'd be expanded before grouping.
817  * But it doesn't currently seem worth the effort to check for that.)
818  */
819  if (query->hasTargetSRFs)
820  return false;
821 
822  /*
823  * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
824  * the grouped columns appear in colnos and operator semantics match.
825  */
826  if (query->groupClause && !query->groupingSets)
827  {
828  foreach(l, query->groupClause)
829  {
830  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
832  query->targetList);
833 
834  opid = distinct_col_search(tle->resno, colnos, opids);
835  if (!OidIsValid(opid) ||
836  !equality_ops_are_compatible(opid, sgc->eqop))
837  break; /* exit early if no match */
838  }
839  if (l == NULL) /* had matches for all? */
840  return true;
841  }
842  else if (query->groupingSets)
843  {
844  /*
845  * If we have grouping sets with expressions, we probably don't have
846  * uniqueness and analysis would be hard. Punt.
847  */
848  if (query->groupClause)
849  return false;
850 
851  /*
852  * If we have no groupClause (therefore no grouping expressions), we
853  * might have one or many empty grouping sets. If there's just one,
854  * then we're returning only one row and are certainly unique. But
855  * otherwise, we know we're certainly not unique.
856  */
857  if (list_length(query->groupingSets) == 1 &&
858  ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
859  return true;
860  else
861  return false;
862  }
863  else
864  {
865  /*
866  * If we have no GROUP BY, but do have aggregates or HAVING, then the
867  * result is at most one row so it's surely unique, for any operators.
868  */
869  if (query->hasAggs || query->havingQual)
870  return true;
871  }
872 
873  /*
874  * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
875  * except with ALL.
876  */
877  if (query->setOperations)
878  {
880 
881  Assert(topop->op != SETOP_NONE);
882 
883  if (!topop->all)
884  {
885  ListCell *lg;
886 
887  /* We're good if all the nonjunk output columns are in colnos */
888  lg = list_head(topop->groupClauses);
889  foreach(l, query->targetList)
890  {
891  TargetEntry *tle = (TargetEntry *) lfirst(l);
892  SortGroupClause *sgc;
893 
894  if (tle->resjunk)
895  continue; /* ignore resjunk columns */
896 
897  /* non-resjunk columns should have grouping clauses */
898  Assert(lg != NULL);
899  sgc = (SortGroupClause *) lfirst(lg);
900  lg = lnext(lg);
901 
902  opid = distinct_col_search(tle->resno, colnos, opids);
903  if (!OidIsValid(opid) ||
904  !equality_ops_are_compatible(opid, sgc->eqop))
905  break; /* exit early if no match */
906  }
907  if (l == NULL) /* had matches for all? */
908  return true;
909  }
910  }
911 
912  /*
913  * XXX Are there any other cases in which we can easily see the result
914  * must be distinct?
915  *
916  * If you do add more smarts to this function, be sure to update
917  * query_supports_distinctness() to match.
918  */
919 
920  return false;
921 }
922 
923 /*
924  * distinct_col_search - subroutine for query_is_distinct_for
925  *
926  * If colno is in colnos, return the corresponding element of opids,
927  * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
928  * but if it does, we arbitrarily select the first match.)
929  */
930 static Oid
931 distinct_col_search(int colno, List *colnos, List *opids)
932 {
933  ListCell *lc1,
934  *lc2;
935 
936  forboth(lc1, colnos, lc2, opids)
937  {
938  if (colno == lfirst_int(lc1))
939  return lfirst_oid(lc2);
940  }
941  return InvalidOid;
942 }
943 
944 
945 /*
946  * innerrel_is_unique
947  * Check if the innerrel provably contains at most one tuple matching any
948  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
949  *
950  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
951  * identify the outerrel by its Relids. This asymmetry supports use of this
952  * function before joinrels have been built. (The caller is expected to
953  * also supply the joinrelids, just to save recalculating that.)
954  *
955  * The proof must be made based only on clauses that will be "joinquals"
956  * rather than "otherquals" at execution. For an inner join there's no
957  * difference; but if the join is outer, we must ignore pushed-down quals,
958  * as those will become "otherquals". Note that this means the answer might
959  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
960  * answer without regard to that, callers must take care not to call this
961  * with jointypes that would be classified differently by IS_OUTER_JOIN().
962  *
963  * The actual proof is undertaken by is_innerrel_unique_for(); this function
964  * is a frontend that is mainly concerned with caching the answers.
965  * In particular, the force_cache argument allows overriding the internal
966  * heuristic about whether to cache negative answers; it should be "true"
967  * if making an inquiry that is not part of the normal bottom-up join search
968  * sequence.
969  */
970 bool
972  Relids joinrelids,
973  Relids outerrelids,
974  RelOptInfo *innerrel,
975  JoinType jointype,
976  List *restrictlist,
977  bool force_cache)
978 {
979  MemoryContext old_context;
980  ListCell *lc;
981 
982  /* Certainly can't prove uniqueness when there are no joinclauses */
983  if (restrictlist == NIL)
984  return false;
985 
986  /*
987  * Make a quick check to eliminate cases in which we will surely be unable
988  * to prove uniqueness of the innerrel.
989  */
990  if (!rel_supports_distinctness(root, innerrel))
991  return false;
992 
993  /*
994  * Query the cache to see if we've managed to prove that innerrel is
995  * unique for any subset of this outerrel. We don't need an exact match,
996  * as extra outerrels can't make the innerrel any less unique (or more
997  * formally, the restrictlist for a join to a superset outerrel must be a
998  * superset of the conditions we successfully used before).
999  */
1000  foreach(lc, innerrel->unique_for_rels)
1001  {
1002  Relids unique_for_rels = (Relids) lfirst(lc);
1003 
1004  if (bms_is_subset(unique_for_rels, outerrelids))
1005  return true; /* Success! */
1006  }
1007 
1008  /*
1009  * Conversely, we may have already determined that this outerrel, or some
1010  * superset thereof, cannot prove this innerrel to be unique.
1011  */
1012  foreach(lc, innerrel->non_unique_for_rels)
1013  {
1014  Relids unique_for_rels = (Relids) lfirst(lc);
1015 
1016  if (bms_is_subset(outerrelids, unique_for_rels))
1017  return false;
1018  }
1019 
1020  /* No cached information, so try to make the proof. */
1021  if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1022  jointype, restrictlist))
1023  {
1024  /*
1025  * Cache the positive result for future probes, being sure to keep it
1026  * in the planner_cxt even if we are working in GEQO.
1027  *
1028  * Note: one might consider trying to isolate the minimal subset of
1029  * the outerrels that proved the innerrel unique. But it's not worth
1030  * the trouble, because the planner builds up joinrels incrementally
1031  * and so we'll see the minimally sufficient outerrels before any
1032  * supersets of them anyway.
1033  */
1034  old_context = MemoryContextSwitchTo(root->planner_cxt);
1035  innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1036  bms_copy(outerrelids));
1037  MemoryContextSwitchTo(old_context);
1038 
1039  return true; /* Success! */
1040  }
1041  else
1042  {
1043  /*
1044  * None of the join conditions for outerrel proved innerrel unique, so
1045  * we can safely reject this outerrel or any subset of it in future
1046  * checks.
1047  *
1048  * However, in normal planning mode, caching this knowledge is totally
1049  * pointless; it won't be queried again, because we build up joinrels
1050  * from smaller to larger. It is useful in GEQO mode, where the
1051  * knowledge can be carried across successive planning attempts; and
1052  * it's likely to be useful when using join-search plugins, too. Hence
1053  * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1054  * but it seems reasonable.)
1055  *
1056  * Also, allow callers to override that heuristic and force caching;
1057  * that's useful for reduce_unique_semijoins, which calls here before
1058  * the normal join search starts.
1059  */
1060  if (force_cache || root->join_search_private)
1061  {
1062  old_context = MemoryContextSwitchTo(root->planner_cxt);
1063  innerrel->non_unique_for_rels =
1064  lappend(innerrel->non_unique_for_rels,
1065  bms_copy(outerrelids));
1066  MemoryContextSwitchTo(old_context);
1067  }
1068 
1069  return false;
1070  }
1071 }
1072 
1073 /*
1074  * is_innerrel_unique_for
1075  * Check if the innerrel provably contains at most one tuple matching any
1076  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1077  */
1078 static bool
1080  Relids joinrelids,
1081  Relids outerrelids,
1082  RelOptInfo *innerrel,
1083  JoinType jointype,
1084  List *restrictlist)
1085 {
1086  List *clause_list = NIL;
1087  ListCell *lc;
1088 
1089  /*
1090  * Search for mergejoinable clauses that constrain the inner rel against
1091  * the outer rel. If an operator is mergejoinable then it behaves like
1092  * equality for some btree opclass, so it's what we want. The
1093  * mergejoinability test also eliminates clauses containing volatile
1094  * functions, which we couldn't depend on.
1095  */
1096  foreach(lc, restrictlist)
1097  {
1098  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1099 
1100  /*
1101  * As noted above, if it's a pushed-down clause and we're at an outer
1102  * join, we can't use it.
1103  */
1104  if (IS_OUTER_JOIN(jointype) &&
1105  RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1106  continue;
1107 
1108  /* Ignore if it's not a mergejoinable clause */
1109  if (!restrictinfo->can_join ||
1110  restrictinfo->mergeopfamilies == NIL)
1111  continue; /* not mergejoinable */
1112 
1113  /*
1114  * Check if clause has the form "outer op inner" or "inner op outer",
1115  * and if so mark which side is inner.
1116  */
1117  if (!clause_sides_match_join(restrictinfo, outerrelids,
1118  innerrel->relids))
1119  continue; /* no good for these input relations */
1120 
1121  /* OK, add to list */
1122  clause_list = lappend(clause_list, restrictinfo);
1123  }
1124 
1125  /* Let rel_is_distinct_for() do the hard work */
1126  return rel_is_distinct_for(root, innerrel, clause_list);
1127 }
static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist)
#define NIL
Definition: pg_list.h:69
Relids ph_needed
Definition: relation.h:2196
List * unique_for_rels
Definition: relation.h:667
#define IsA(nodeptr, _type_)
Definition: nodes.h:568
bool query_is_distinct_for(Query *query, List *colnos, List *opids)
Definition: analyzejoins.c:782
void remove_join_clause_from_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids)
Definition: joininfo.c:122
Index varlevelsup
Definition: primnodes.h:174
TargetEntry * get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:370
bool predOK
Definition: relation.h:788
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
void reduce_unique_semijoins(PlannerInfo *root)
Definition: analyzejoins.c:511
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:133
Relids ph_eval_at
Definition: relation.h:2194
PlaceHolderVar * ph_var
Definition: relation.h:2193
RelOptKind reloptkind
Definition: relation.h:609
static int32 next
Definition: blutils.c:211
Relids * attr_needed
Definition: relation.h:645
List * join_info_list
Definition: relation.h:264
Relids required_relids
Definition: relation.h:1898
Relids min_righthand
Definition: relation.h:2067
void * join_search_private
Definition: relation.h:332
#define castNode(_type_, nodeptr)
Definition: nodes.h:586
static Oid distinct_col_search(int colno, List *colnos, List *opids)
Definition: analyzejoins.c:931
bool hasAggs
Definition: parsenodes.h:125
Relids clause_relids
Definition: relation.h:1895
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:730
List * groupingSets
Definition: parsenodes.h:150
List * list_copy(const List *oldlist)
Definition: list.c:1160
Definition: nodes.h:517
Relids left_relids
Definition: relation.h:1907
AttrNumber varattno
Definition: primnodes.h:169
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:635
List * list_concat(List *list1, List *list2)
Definition: list.c:321
bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Definition: analyzejoins.c:971
static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
Definition: analyzejoins.c:313
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
bool immediate
Definition: relation.h:790
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:164
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
#define OidIsValid(objectId)
Definition: c.h:605
List * mergeopfamilies
Definition: relation.h:1925
Relids syn_lefthand
Definition: relation.h:2068
JoinType
Definition: nodes.h:681
List * targetList
Definition: parsenodes.h:140
struct RelOptInfo ** simple_rel_array
Definition: relation.h:193
bool unique
Definition: relation.h:789
Relids syn_righthand
Definition: relation.h:2069
void distribute_restrictinfo_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: initsplan.c:2232
#define RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)
Definition: relation.h:1957
bool resjunk
Definition: primnodes.h:1383
#define linitial(l)
Definition: pg_list.h:111
List * distinctClause
Definition: parsenodes.h:156
#define ERROR
Definition: elog.h:43
Expr * phexpr
Definition: relation.h:1999
#define lfirst_int(lc)
Definition: pg_list.h:107
List * generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel)
Definition: equivclass.c:1072
Node * get_leftop(const Expr *clause)
Definition: clauses.c:200
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
Definition: analyzejoins.c:648
bool can_join
Definition: relation.h:1886
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:374
#define lfirst_node(type, lc)
Definition: pg_list.h:109
List * joininfo
Definition: relation.h:676
bool outer_is_left
Definition: relation.h:1935
AttrNumber resno
Definition: primnodes.h:1377
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
Definition: analyzejoins.c:160
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
static bool clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, Relids innerrelids)
Definition: analyzejoins.c:128
Relids relids
Definition: relation.h:612
int simple_rel_array_size
Definition: relation.h:194
#define lnext(lc)
Definition: pg_list.h:105
List * non_unique_for_rels
Definition: relation.h:669
Relids pull_varnos(Node *node)
Definition: var.c:95
List * lappend_int(List *list, int datum)
Definition: list.c:146
Index relid
Definition: relation.h:640
Bitmapset * Relids
Definition: relation.h:29
List * lappend(List *list, void *datum)
Definition: list.c:128
RangeTblEntry ** simple_rte_array
Definition: relation.h:202
Expr * clause
Definition: relation.h:1880
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:729
Index varno
Definition: primnodes.h:167
bool delay_upper_joins
Definition: relation.h:2072
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:592
Relids ph_lateral
Definition: relation.h:2195
unsigned int Index
Definition: c.h:442
RTEKind rtekind
Definition: relation.h:642
List * indexlist
Definition: relation.h:649
#define InvalidOid
Definition: postgres_ext.h:36
bool hasTargetSRFs
Definition: parsenodes.h:127
Relids right_relids
Definition: relation.h:1908
#define Assert(condition)
Definition: c.h:699
#define lfirst(lc)
Definition: pg_list.h:106
bool equality_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:695
List * remove_useless_joins(PlannerInfo *root, List *joinlist)
Definition: analyzejoins.c:61
JoinType jointype
Definition: relation.h:2070
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:284
static int list_length(const List *l)
Definition: pg_list.h:89
SetOperation op
Definition: parsenodes.h:1599
bool query_supports_distinctness(Query *query)
Definition: analyzejoins.c:745
#define nodeTag(nodeptr)
Definition: nodes.h:522
Node * get_rightop(const Expr *clause)
Definition: clauses.c:217
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:509
Node * setOperations
Definition: parsenodes.h:165
Query * subquery
Definition: parsenodes.h:985
List * groupClause
Definition: parsenodes.h:148
AttrNumber max_attr
Definition: relation.h:644
List * placeholder_list
Definition: relation.h:270
static List * remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
Definition: analyzejoins.c:457
MemoryContext planner_cxt
Definition: relation.h:302
#define elog
Definition: elog.h:219
bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist)
Definition: indxpath.c:2986
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:279
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
Definition: analyzejoins.c:592
Node * havingQual
Definition: parsenodes.h:152
List * indpred
Definition: relation.h:778
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:801
Definition: pg_list.h:45
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:486
Relids min_lefthand
Definition: relation.h:2066
#define lfirst_oid(lc)
Definition: pg_list.h:108
AttrNumber min_attr
Definition: relation.h:643