PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
analyzejoins.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * analyzejoins.c
4  * Routines for simplifying joins after initial query analysis
5  *
6  * While we do a great deal of join simplification in prep/prepjointree.c,
7  * certain optimizations cannot be performed at that stage for lack of
8  * detailed information about the query. The routines here are invoked
9  * after initsplan.c has done its work, and can do additional join removal
10  * and simplification steps based on the information extracted. The penalty
11  * is that we have to work harder to clean up after ourselves when we modify
12  * the query, since the derived data structures have to be updated too.
13  *
14  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  *
18  * IDENTIFICATION
19  * src/backend/optimizer/plan/analyzejoins.c
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24 
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "utils/lsyscache.h"
34 
35 /* local functions */
36 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 static void remove_rel_from_query(PlannerInfo *root, int relid,
38  Relids joinrelids);
39 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
40 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
41 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
42  List *clause_list);
43 static Oid distinct_col_search(int colno, List *colnos, List *opids);
44 static bool is_innerrel_unique_for(PlannerInfo *root,
45  Relids outerrelids,
46  RelOptInfo *innerrel,
47  JoinType jointype,
48  List *restrictlist);
49 
50 
51 /*
52  * remove_useless_joins
53  * Check for relations that don't actually need to be joined at all,
54  * and remove them from the query.
55  *
56  * We are passed the current joinlist and return the updated list. Other
57  * data structures that have to be updated are accessible via "root".
58  */
59 List *
61 {
62  ListCell *lc;
63 
64  /*
65  * We are only interested in relations that are left-joined to, so we can
66  * scan the join_info_list to find them easily.
67  */
68 restart:
69  foreach(lc, root->join_info_list)
70  {
71  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
72  int innerrelid;
73  int nremoved;
74 
75  /* Skip if not removable */
76  if (!join_is_removable(root, sjinfo))
77  continue;
78 
79  /*
80  * Currently, join_is_removable can only succeed when the sjinfo's
81  * righthand is a single baserel. Remove that rel from the query and
82  * joinlist.
83  */
84  innerrelid = bms_singleton_member(sjinfo->min_righthand);
85 
86  remove_rel_from_query(root, innerrelid,
87  bms_union(sjinfo->min_lefthand,
88  sjinfo->min_righthand));
89 
90  /* We verify that exactly one reference gets removed from joinlist */
91  nremoved = 0;
92  joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
93  if (nremoved != 1)
94  elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
95 
96  /*
97  * We can delete this SpecialJoinInfo from the list too, since it's no
98  * longer of interest.
99  */
100  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
101 
102  /*
103  * Restart the scan. This is necessary to ensure we find all
104  * removable joins independently of ordering of the join_info_list
105  * (note that removal of attr_needed bits may make a join appear
106  * removable that did not before). Also, since we just deleted the
107  * current list cell, we'd have to have some kluge to continue the
108  * list scan anyway.
109  */
110  goto restart;
111  }
112 
113  return joinlist;
114 }
115 
116 /*
117  * clause_sides_match_join
118  * Determine whether a join clause is of the right form to use in this join.
119  *
120  * We already know that the clause is a binary opclause referencing only the
121  * rels in the current join. The point here is to check whether it has the
122  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
123  * rather than mixing outer and inner vars on either side. If it matches,
124  * we set the transient flag outer_is_left to identify which side is which.
125  */
126 static inline bool
128  Relids innerrelids)
129 {
130  if (bms_is_subset(rinfo->left_relids, outerrelids) &&
131  bms_is_subset(rinfo->right_relids, innerrelids))
132  {
133  /* lefthand side is outer */
134  rinfo->outer_is_left = true;
135  return true;
136  }
137  else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
138  bms_is_subset(rinfo->right_relids, outerrelids))
139  {
140  /* righthand side is outer */
141  rinfo->outer_is_left = false;
142  return true;
143  }
144  return false; /* no good for these input relations */
145 }
146 
147 /*
148  * join_is_removable
149  * Check whether we need not perform this special join at all, because
150  * it will just duplicate its left input.
151  *
152  * This is true for a left join for which the join condition cannot match
153  * more than one inner-side row. (There are other possibly interesting
154  * cases, but we don't have the infrastructure to prove them.) We also
155  * have to check that the inner side doesn't generate any variables needed
156  * above the join.
157  */
158 static bool
160 {
161  int innerrelid;
162  RelOptInfo *innerrel;
163  Relids joinrelids;
164  List *clause_list = NIL;
165  ListCell *l;
166  int attroff;
167 
168  /*
169  * Must be a non-delaying left join to a single baserel, else we aren't
170  * going to be able to do anything with it.
171  */
172  if (sjinfo->jointype != JOIN_LEFT ||
173  sjinfo->delay_upper_joins)
174  return false;
175 
176  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
177  return false;
178 
179  innerrel = find_base_rel(root, innerrelid);
180 
181  /*
182  * Before we go to the effort of checking whether any innerrel variables
183  * are needed above the join, make a quick check to eliminate cases in
184  * which we will surely be unable to prove uniqueness of the innerrel.
185  */
186  if (!rel_supports_distinctness(root, innerrel))
187  return false;
188 
189  /* Compute the relid set for the join we are considering */
190  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
191 
192  /*
193  * We can't remove the join if any inner-rel attributes are used above the
194  * join.
195  *
196  * Note that this test only detects use of inner-rel attributes in higher
197  * join conditions and the target list. There might be such attributes in
198  * pushed-down conditions at this join, too. We check that case below.
199  *
200  * As a micro-optimization, it seems better to start with max_attr and
201  * count down rather than starting with min_attr and counting up, on the
202  * theory that the system attributes are somewhat less likely to be wanted
203  * and should be tested last.
204  */
205  for (attroff = innerrel->max_attr - innerrel->min_attr;
206  attroff >= 0;
207  attroff--)
208  {
209  if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
210  return false;
211  }
212 
213  /*
214  * Similarly check that the inner rel isn't needed by any PlaceHolderVars
215  * that will be used above the join. We only need to fail if such a PHV
216  * actually references some inner-rel attributes; but the correct check
217  * for that is relatively expensive, so we first check against ph_eval_at,
218  * which must mention the inner rel if the PHV uses any inner-rel attrs as
219  * non-lateral references. Note that if the PHV's syntactic scope is just
220  * the inner rel, we can't drop the rel even if the PHV is variable-free.
221  */
222  foreach(l, root->placeholder_list)
223  {
224  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
225 
226  if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
227  return false; /* it references innerrel laterally */
228  if (bms_is_subset(phinfo->ph_needed, joinrelids))
229  continue; /* PHV is not used above the join */
230  if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
231  continue; /* it definitely doesn't reference innerrel */
232  if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
233  return false; /* there isn't any other place to eval PHV */
234  if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
235  innerrel->relids))
236  return false; /* it does reference innerrel */
237  }
238 
239  /*
240  * Search for mergejoinable clauses that constrain the inner rel against
241  * either the outer rel or a pseudoconstant. If an operator is
242  * mergejoinable then it behaves like equality for some btree opclass, so
243  * it's what we want. The mergejoinability test also eliminates clauses
244  * containing volatile functions, which we couldn't depend on.
245  */
246  foreach(l, innerrel->joininfo)
247  {
248  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
249 
250  /*
251  * If it's not a join clause for this outer join, we can't use it.
252  * Note that if the clause is pushed-down, then it is logically from
253  * above the outer join, even if it references no other rels (it might
254  * be from WHERE, for example).
255  */
256  if (restrictinfo->is_pushed_down ||
257  !bms_equal(restrictinfo->required_relids, joinrelids))
258  {
259  /*
260  * If such a clause actually references the inner rel then join
261  * removal has to be disallowed. We have to check this despite
262  * the previous attr_needed checks because of the possibility of
263  * pushed-down clauses referencing the rel.
264  */
265  if (bms_is_member(innerrelid, restrictinfo->clause_relids))
266  return false;
267  continue; /* else, ignore; not useful here */
268  }
269 
270  /* Ignore if it's not a mergejoinable clause */
271  if (!restrictinfo->can_join ||
272  restrictinfo->mergeopfamilies == NIL)
273  continue; /* not mergejoinable */
274 
275  /*
276  * Check if clause has the form "outer op inner" or "inner op outer",
277  * and if so mark which side is inner.
278  */
279  if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
280  innerrel->relids))
281  continue; /* no good for these input relations */
282 
283  /* OK, add to list */
284  clause_list = lappend(clause_list, restrictinfo);
285  }
286 
287  /*
288  * Now that we have the relevant equality join clauses, try to prove the
289  * innerrel distinct.
290  */
291  if (rel_is_distinct_for(root, innerrel, clause_list))
292  return true;
293 
294  /*
295  * Some day it would be nice to check for other methods of establishing
296  * distinctness.
297  */
298  return false;
299 }
300 
301 
302 /*
303  * Remove the target relid from the planner's data structures, having
304  * determined that there is no need to include it in the query.
305  *
306  * We are not terribly thorough here. We must make sure that the rel is
307  * no longer treated as a baserel, and that attributes of other baserels
308  * are no longer marked as being needed at joins involving this rel.
309  * Also, join quals involving the rel have to be removed from the joininfo
310  * lists, but only if they belong to the outer join identified by joinrelids.
311  */
312 static void
313 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
314 {
315  RelOptInfo *rel = find_base_rel(root, relid);
316  List *joininfos;
317  Index rti;
318  ListCell *l;
319  ListCell *nextl;
320 
321  /*
322  * Mark the rel as "dead" to show it is no longer part of the join tree.
323  * (Removing it from the baserel array altogether seems too risky.)
324  */
325  rel->reloptkind = RELOPT_DEADREL;
326 
327  /*
328  * Remove references to the rel from other baserels' attr_needed arrays.
329  */
330  for (rti = 1; rti < root->simple_rel_array_size; rti++)
331  {
332  RelOptInfo *otherrel = root->simple_rel_array[rti];
333  int attroff;
334 
335  /* there may be empty slots corresponding to non-baserel RTEs */
336  if (otherrel == NULL)
337  continue;
338 
339  Assert(otherrel->relid == rti); /* sanity check on array */
340 
341  /* no point in processing target rel itself */
342  if (otherrel == rel)
343  continue;
344 
345  for (attroff = otherrel->max_attr - otherrel->min_attr;
346  attroff >= 0;
347  attroff--)
348  {
349  otherrel->attr_needed[attroff] =
350  bms_del_member(otherrel->attr_needed[attroff], relid);
351  }
352  }
353 
354  /*
355  * Likewise remove references from SpecialJoinInfo data structures.
356  *
357  * This is relevant in case the outer join we're deleting is nested inside
358  * other outer joins: the upper joins' relid sets have to be adjusted. The
359  * RHS of the target outer join will be made empty here, but that's OK
360  * since caller will delete that SpecialJoinInfo entirely.
361  */
362  foreach(l, root->join_info_list)
363  {
364  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
365 
366  sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
367  sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
368  sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
369  sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
370  }
371 
372  /*
373  * Likewise remove references from PlaceHolderVar data structures,
374  * removing any no-longer-needed placeholders entirely.
375  *
376  * Removal is a bit tricker than it might seem: we can remove PHVs that
377  * are used at the target rel and/or in the join qual, but not those that
378  * are used at join partner rels or above the join. It's not that easy to
379  * distinguish PHVs used at partner rels from those used in the join qual,
380  * since they will both have ph_needed sets that are subsets of
381  * joinrelids. However, a PHV used at a partner rel could not have the
382  * target rel in ph_eval_at, so we check that while deciding whether to
383  * remove or just update the PHV. There is no corresponding test in
384  * join_is_removable because it doesn't need to distinguish those cases.
385  */
386  for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
387  {
388  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
389 
390  nextl = lnext(l);
391  Assert(!bms_is_member(relid, phinfo->ph_lateral));
392  if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
393  bms_is_member(relid, phinfo->ph_eval_at))
395  phinfo);
396  else
397  {
398  phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
399  Assert(!bms_is_empty(phinfo->ph_eval_at));
400  phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
401  }
402  }
403 
404  /*
405  * Remove any joinquals referencing the rel from the joininfo lists.
406  *
407  * In some cases, a joinqual has to be put back after deleting its
408  * reference to the target rel. This can occur for pseudoconstant and
409  * outerjoin-delayed quals, which can get marked as requiring the rel in
410  * order to force them to be evaluated at or above the join. We can't
411  * just discard them, though. Only quals that logically belonged to the
412  * outer join being discarded should be removed from the query.
413  *
414  * We must make a copy of the rel's old joininfo list before starting the
415  * loop, because otherwise remove_join_clause_from_rels would destroy the
416  * list while we're scanning it.
417  */
418  joininfos = list_copy(rel->joininfo);
419  foreach(l, joininfos)
420  {
421  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
422 
423  remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
424 
425  if (rinfo->is_pushed_down ||
426  !bms_equal(rinfo->required_relids, joinrelids))
427  {
428  /* Recheck that qual doesn't actually reference the target rel */
429  Assert(!bms_is_member(relid, rinfo->clause_relids));
430 
431  /*
432  * The required_relids probably aren't shared with anything else,
433  * but let's copy them just to be sure.
434  */
435  rinfo->required_relids = bms_copy(rinfo->required_relids);
437  relid);
438  distribute_restrictinfo_to_rels(root, rinfo);
439  }
440  }
441 
442  /*
443  * There may be references to the rel in root->fkey_list, but if so,
444  * match_foreign_keys_to_quals() will get rid of them.
445  */
446 }
447 
448 /*
449  * Remove any occurrences of the target relid from a joinlist structure.
450  *
451  * It's easiest to build a whole new list structure, so we handle it that
452  * way. Efficiency is not a big deal here.
453  *
454  * *nremoved is incremented by the number of occurrences removed (there
455  * should be exactly one, but the caller checks that).
456  */
457 static List *
458 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
459 {
460  List *result = NIL;
461  ListCell *jl;
462 
463  foreach(jl, joinlist)
464  {
465  Node *jlnode = (Node *) lfirst(jl);
466 
467  if (IsA(jlnode, RangeTblRef))
468  {
469  int varno = ((RangeTblRef *) jlnode)->rtindex;
470 
471  if (varno == relid)
472  (*nremoved)++;
473  else
474  result = lappend(result, jlnode);
475  }
476  else if (IsA(jlnode, List))
477  {
478  /* Recurse to handle subproblem */
479  List *sublist;
480 
481  sublist = remove_rel_from_joinlist((List *) jlnode,
482  relid, nremoved);
483  /* Avoid including empty sub-lists in the result */
484  if (sublist)
485  result = lappend(result, sublist);
486  }
487  else
488  {
489  elog(ERROR, "unrecognized joinlist node type: %d",
490  (int) nodeTag(jlnode));
491  }
492  }
493 
494  return result;
495 }
496 
497 
498 /*
499  * reduce_unique_semijoins
500  * Check for semijoins that can be simplified to plain inner joins
501  * because the inner relation is provably unique for the join clauses.
502  *
503  * Ideally this would happen during reduce_outer_joins, but we don't have
504  * enough information at that point.
505  *
506  * To perform the strength reduction when applicable, we need only delete
507  * the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
508  * bother fixing the join type attributed to it in the query jointree,
509  * since that won't be consulted again.)
510  */
511 void
513 {
514  ListCell *lc;
515  ListCell *next;
516 
517  /*
518  * Scan the join_info_list to find semijoins. We can't use foreach
519  * because we may delete the current cell.
520  */
521  for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
522  {
523  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
524  int innerrelid;
525  RelOptInfo *innerrel;
526  Relids joinrelids;
527  List *restrictlist;
528 
529  next = lnext(lc);
530 
531  /*
532  * Must be a non-delaying semijoin to a single baserel, else we aren't
533  * going to be able to do anything with it. (It's probably not
534  * possible for delay_upper_joins to be set on a semijoin, but we
535  * might as well check.)
536  */
537  if (sjinfo->jointype != JOIN_SEMI ||
538  sjinfo->delay_upper_joins)
539  continue;
540 
541  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
542  continue;
543 
544  innerrel = find_base_rel(root, innerrelid);
545 
546  /*
547  * Before we trouble to run generate_join_implied_equalities, make a
548  * quick check to eliminate cases in which we will surely be unable to
549  * prove uniqueness of the innerrel.
550  */
551  if (!rel_supports_distinctness(root, innerrel))
552  continue;
553 
554  /* Compute the relid set for the join we are considering */
555  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
556 
557  /*
558  * Since we're only considering a single-rel RHS, any join clauses it
559  * has must be clauses linking it to the semijoin's min_lefthand. We
560  * can also consider EC-derived join clauses.
561  */
562  restrictlist =
564  joinrelids,
565  sjinfo->min_lefthand,
566  innerrel),
567  innerrel->joininfo);
568 
569  /* Test whether the innerrel is unique for those clauses. */
570  if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
571  JOIN_SEMI, restrictlist, true))
572  continue;
573 
574  /* OK, remove the SpecialJoinInfo from the list. */
575  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
576  }
577 }
578 
579 
580 /*
581  * rel_supports_distinctness
582  * Could the relation possibly be proven distinct on some set of columns?
583  *
584  * This is effectively a pre-checking function for rel_is_distinct_for().
585  * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
586  * with this rel, but it should not expend a lot of cycles. The idea is
587  * that callers can avoid doing possibly-expensive processing to compute
588  * rel_is_distinct_for()'s argument lists if the call could not possibly
589  * succeed.
590  */
591 static bool
593 {
594  /* We only know about baserels ... */
595  if (rel->reloptkind != RELOPT_BASEREL)
596  return false;
597  if (rel->rtekind == RTE_RELATION)
598  {
599  /*
600  * For a plain relation, we only know how to prove uniqueness by
601  * reference to unique indexes. Make sure there's at least one
602  * suitable unique index. It must be immediately enforced, and if
603  * it's a partial index, it must match the query. (Keep these
604  * conditions in sync with relation_has_unique_index_for!)
605  */
606  ListCell *lc;
607 
608  foreach(lc, rel->indexlist)
609  {
610  IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
611 
612  if (ind->unique && ind->immediate &&
613  (ind->indpred == NIL || ind->predOK))
614  return true;
615  }
616  }
617  else if (rel->rtekind == RTE_SUBQUERY)
618  {
619  Query *subquery = root->simple_rte_array[rel->relid]->subquery;
620 
621  /* Check if the subquery has any qualities that support distinctness */
622  if (query_supports_distinctness(subquery))
623  return true;
624  }
625  /* We have no proof rules for any other rtekinds. */
626  return false;
627 }
628 
629 /*
630  * rel_is_distinct_for
631  * Does the relation return only distinct rows according to clause_list?
632  *
633  * clause_list is a list of join restriction clauses involving this rel and
634  * some other one. Return true if no two rows emitted by this rel could
635  * possibly join to the same row of the other rel.
636  *
637  * The caller must have already determined that each condition is a
638  * mergejoinable equality with an expression in this relation on one side, and
639  * an expression not involving this relation on the other. The transient
640  * outer_is_left flag is used to identify which side references this relation:
641  * left side if outer_is_left is false, right side if it is true.
642  *
643  * Note that the passed-in clause_list may be destructively modified! This
644  * is OK for current uses, because the clause_list is built by the caller for
645  * the sole purpose of passing to this function.
646  */
647 static bool
648 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
649 {
650  /*
651  * We could skip a couple of tests here if we assume all callers checked
652  * rel_supports_distinctness first, but it doesn't seem worth taking any
653  * risk for.
654  */
655  if (rel->reloptkind != RELOPT_BASEREL)
656  return false;
657  if (rel->rtekind == RTE_RELATION)
658  {
659  /*
660  * Examine the indexes to see if we have a matching unique index.
661  * relation_has_unique_index_for automatically adds any usable
662  * restriction clauses for the rel, so we needn't do that here.
663  */
664  if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
665  return true;
666  }
667  else if (rel->rtekind == RTE_SUBQUERY)
668  {
669  Index relid = rel->relid;
670  Query *subquery = root->simple_rte_array[relid]->subquery;
671  List *colnos = NIL;
672  List *opids = NIL;
673  ListCell *l;
674 
675  /*
676  * Build the argument lists for query_is_distinct_for: a list of
677  * output column numbers that the query needs to be distinct over, and
678  * a list of equality operators that the output columns need to be
679  * distinct according to.
680  *
681  * (XXX we are not considering restriction clauses attached to the
682  * subquery; is that worth doing?)
683  */
684  foreach(l, clause_list)
685  {
687  Oid op;
688  Var *var;
689 
690  /*
691  * Get the equality operator we need uniqueness according to.
692  * (This might be a cross-type operator and thus not exactly the
693  * same operator the subquery would consider; that's all right
694  * since query_is_distinct_for can resolve such cases.) The
695  * caller's mergejoinability test should have selected only
696  * OpExprs.
697  */
698  op = castNode(OpExpr, rinfo->clause)->opno;
699 
700  /* caller identified the inner side for us */
701  if (rinfo->outer_is_left)
702  var = (Var *) get_rightop(rinfo->clause);
703  else
704  var = (Var *) get_leftop(rinfo->clause);
705 
706  /*
707  * If inner side isn't a Var referencing a subquery output column,
708  * this clause doesn't help us.
709  */
710  if (!var || !IsA(var, Var) ||
711  var->varno != relid || var->varlevelsup != 0)
712  continue;
713 
714  colnos = lappend_int(colnos, var->varattno);
715  opids = lappend_oid(opids, op);
716  }
717 
718  if (query_is_distinct_for(subquery, colnos, opids))
719  return true;
720  }
721  return false;
722 }
723 
724 
725 /*
726  * query_supports_distinctness - could the query possibly be proven distinct
727  * on some set of output columns?
728  *
729  * This is effectively a pre-checking function for query_is_distinct_for().
730  * It must return TRUE if query_is_distinct_for() could possibly return TRUE
731  * with this query, but it should not expend a lot of cycles. The idea is
732  * that callers can avoid doing possibly-expensive processing to compute
733  * query_is_distinct_for()'s argument lists if the call could not possibly
734  * succeed.
735  */
736 bool
738 {
739  /* we don't cope with SRFs, see comment below */
740  if (query->hasTargetSRFs)
741  return false;
742 
743  /* check for features we can prove distinctness with */
744  if (query->distinctClause != NIL ||
745  query->groupClause != NIL ||
746  query->groupingSets != NIL ||
747  query->hasAggs ||
748  query->havingQual ||
749  query->setOperations)
750  return true;
751 
752  return false;
753 }
754 
755 /*
756  * query_is_distinct_for - does query never return duplicates of the
757  * specified columns?
758  *
759  * query is a not-yet-planned subquery (in current usage, it's always from
760  * a subquery RTE, which the planner avoids scribbling on).
761  *
762  * colnos is an integer list of output column numbers (resno's). We are
763  * interested in whether rows consisting of just these columns are certain
764  * to be distinct. "Distinctness" is defined according to whether the
765  * corresponding upper-level equality operators listed in opids would think
766  * the values are distinct. (Note: the opids entries could be cross-type
767  * operators, and thus not exactly the equality operators that the subquery
768  * would use itself. We use equality_ops_are_compatible() to check
769  * compatibility. That looks at btree or hash opfamily membership, and so
770  * should give trustworthy answers for all operators that we might need
771  * to deal with here.)
772  */
773 bool
774 query_is_distinct_for(Query *query, List *colnos, List *opids)
775 {
776  ListCell *l;
777  Oid opid;
778 
779  Assert(list_length(colnos) == list_length(opids));
780 
781  /*
782  * A set-returning function in the query's targetlist can result in
783  * returning duplicate rows, if the SRF is evaluated after the
784  * de-duplication step; so we play it safe and say "no" if there are any
785  * SRFs. (We could be certain that it's okay if SRFs appear only in the
786  * specified columns, since those must be evaluated before de-duplication;
787  * but it doesn't presently seem worth the complication to check that.)
788  */
789  if (query->hasTargetSRFs)
790  return false;
791 
792  /*
793  * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
794  * columns in the DISTINCT clause appear in colnos and operator semantics
795  * match.
796  */
797  if (query->distinctClause)
798  {
799  foreach(l, query->distinctClause)
800  {
801  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
803  query->targetList);
804 
805  opid = distinct_col_search(tle->resno, colnos, opids);
806  if (!OidIsValid(opid) ||
807  !equality_ops_are_compatible(opid, sgc->eqop))
808  break; /* exit early if no match */
809  }
810  if (l == NULL) /* had matches for all? */
811  return true;
812  }
813 
814  /*
815  * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
816  * the grouped columns appear in colnos and operator semantics match.
817  */
818  if (query->groupClause && !query->groupingSets)
819  {
820  foreach(l, query->groupClause)
821  {
822  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
824  query->targetList);
825 
826  opid = distinct_col_search(tle->resno, colnos, opids);
827  if (!OidIsValid(opid) ||
828  !equality_ops_are_compatible(opid, sgc->eqop))
829  break; /* exit early if no match */
830  }
831  if (l == NULL) /* had matches for all? */
832  return true;
833  }
834  else if (query->groupingSets)
835  {
836  /*
837  * If we have grouping sets with expressions, we probably don't have
838  * uniqueness and analysis would be hard. Punt.
839  */
840  if (query->groupClause)
841  return false;
842 
843  /*
844  * If we have no groupClause (therefore no grouping expressions), we
845  * might have one or many empty grouping sets. If there's just one,
846  * then we're returning only one row and are certainly unique. But
847  * otherwise, we know we're certainly not unique.
848  */
849  if (list_length(query->groupingSets) == 1 &&
850  ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
851  return true;
852  else
853  return false;
854  }
855  else
856  {
857  /*
858  * If we have no GROUP BY, but do have aggregates or HAVING, then the
859  * result is at most one row so it's surely unique, for any operators.
860  */
861  if (query->hasAggs || query->havingQual)
862  return true;
863  }
864 
865  /*
866  * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
867  * except with ALL.
868  */
869  if (query->setOperations)
870  {
872 
873  Assert(topop->op != SETOP_NONE);
874 
875  if (!topop->all)
876  {
877  ListCell *lg;
878 
879  /* We're good if all the nonjunk output columns are in colnos */
880  lg = list_head(topop->groupClauses);
881  foreach(l, query->targetList)
882  {
883  TargetEntry *tle = (TargetEntry *) lfirst(l);
884  SortGroupClause *sgc;
885 
886  if (tle->resjunk)
887  continue; /* ignore resjunk columns */
888 
889  /* non-resjunk columns should have grouping clauses */
890  Assert(lg != NULL);
891  sgc = (SortGroupClause *) lfirst(lg);
892  lg = lnext(lg);
893 
894  opid = distinct_col_search(tle->resno, colnos, opids);
895  if (!OidIsValid(opid) ||
896  !equality_ops_are_compatible(opid, sgc->eqop))
897  break; /* exit early if no match */
898  }
899  if (l == NULL) /* had matches for all? */
900  return true;
901  }
902  }
903 
904  /*
905  * XXX Are there any other cases in which we can easily see the result
906  * must be distinct?
907  *
908  * If you do add more smarts to this function, be sure to update
909  * query_supports_distinctness() to match.
910  */
911 
912  return false;
913 }
914 
915 /*
916  * distinct_col_search - subroutine for query_is_distinct_for
917  *
918  * If colno is in colnos, return the corresponding element of opids,
919  * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
920  * but if it does, we arbitrarily select the first match.)
921  */
922 static Oid
923 distinct_col_search(int colno, List *colnos, List *opids)
924 {
925  ListCell *lc1,
926  *lc2;
927 
928  forboth(lc1, colnos, lc2, opids)
929  {
930  if (colno == lfirst_int(lc1))
931  return lfirst_oid(lc2);
932  }
933  return InvalidOid;
934 }
935 
936 
937 /*
938  * innerrel_is_unique
939  * Check if the innerrel provably contains at most one tuple matching any
940  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
941  *
942  * We need an actual RelOptInfo for the innerrel, but it's sufficient to
943  * identify the outerrel by its Relids. This asymmetry supports use of this
944  * function before joinrels have been built.
945  *
946  * The proof must be made based only on clauses that will be "joinquals"
947  * rather than "otherquals" at execution. For an inner join there's no
948  * difference; but if the join is outer, we must ignore pushed-down quals,
949  * as those will become "otherquals". Note that this means the answer might
950  * vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
951  * answer without regard to that, callers must take care not to call this
952  * with jointypes that would be classified differently by IS_OUTER_JOIN().
953  *
954  * The actual proof is undertaken by is_innerrel_unique_for(); this function
955  * is a frontend that is mainly concerned with caching the answers.
956  * In particular, the force_cache argument allows overriding the internal
957  * heuristic about whether to cache negative answers; it should be "true"
958  * if making an inquiry that is not part of the normal bottom-up join search
959  * sequence.
960  */
961 bool
963  Relids outerrelids,
964  RelOptInfo *innerrel,
965  JoinType jointype,
966  List *restrictlist,
967  bool force_cache)
968 {
969  MemoryContext old_context;
970  ListCell *lc;
971 
972  /* Certainly can't prove uniqueness when there are no joinclauses */
973  if (restrictlist == NIL)
974  return false;
975 
976  /*
977  * Make a quick check to eliminate cases in which we will surely be unable
978  * to prove uniqueness of the innerrel.
979  */
980  if (!rel_supports_distinctness(root, innerrel))
981  return false;
982 
983  /*
984  * Query the cache to see if we've managed to prove that innerrel is
985  * unique for any subset of this outerrel. We don't need an exact match,
986  * as extra outerrels can't make the innerrel any less unique (or more
987  * formally, the restrictlist for a join to a superset outerrel must be a
988  * superset of the conditions we successfully used before).
989  */
990  foreach(lc, innerrel->unique_for_rels)
991  {
992  Relids unique_for_rels = (Relids) lfirst(lc);
993 
994  if (bms_is_subset(unique_for_rels, outerrelids))
995  return true; /* Success! */
996  }
997 
998  /*
999  * Conversely, we may have already determined that this outerrel, or some
1000  * superset thereof, cannot prove this innerrel to be unique.
1001  */
1002  foreach(lc, innerrel->non_unique_for_rels)
1003  {
1004  Relids unique_for_rels = (Relids) lfirst(lc);
1005 
1006  if (bms_is_subset(outerrelids, unique_for_rels))
1007  return false;
1008  }
1009 
1010  /* No cached information, so try to make the proof. */
1011  if (is_innerrel_unique_for(root, outerrelids, innerrel,
1012  jointype, restrictlist))
1013  {
1014  /*
1015  * Cache the positive result for future probes, being sure to keep it
1016  * in the planner_cxt even if we are working in GEQO.
1017  *
1018  * Note: one might consider trying to isolate the minimal subset of
1019  * the outerrels that proved the innerrel unique. But it's not worth
1020  * the trouble, because the planner builds up joinrels incrementally
1021  * and so we'll see the minimally sufficient outerrels before any
1022  * supersets of them anyway.
1023  */
1024  old_context = MemoryContextSwitchTo(root->planner_cxt);
1025  innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1026  bms_copy(outerrelids));
1027  MemoryContextSwitchTo(old_context);
1028 
1029  return true; /* Success! */
1030  }
1031  else
1032  {
1033  /*
1034  * None of the join conditions for outerrel proved innerrel unique, so
1035  * we can safely reject this outerrel or any subset of it in future
1036  * checks.
1037  *
1038  * However, in normal planning mode, caching this knowledge is totally
1039  * pointless; it won't be queried again, because we build up joinrels
1040  * from smaller to larger. It is useful in GEQO mode, where the
1041  * knowledge can be carried across successive planning attempts; and
1042  * it's likely to be useful when using join-search plugins, too. Hence
1043  * cache when join_search_private is non-NULL. (Yeah, that's a hack,
1044  * but it seems reasonable.)
1045  *
1046  * Also, allow callers to override that heuristic and force caching;
1047  * that's useful for reduce_unique_semijoins, which calls here before
1048  * the normal join search starts.
1049  */
1050  if (force_cache || root->join_search_private)
1051  {
1052  old_context = MemoryContextSwitchTo(root->planner_cxt);
1053  innerrel->non_unique_for_rels =
1054  lappend(innerrel->non_unique_for_rels,
1055  bms_copy(outerrelids));
1056  MemoryContextSwitchTo(old_context);
1057  }
1058 
1059  return false;
1060  }
1061 }
1062 
1063 /*
1064  * is_innerrel_unique_for
1065  * Check if the innerrel provably contains at most one tuple matching any
1066  * tuple from the outerrel, based on join clauses in the 'restrictlist'.
1067  */
1068 static bool
1070  Relids outerrelids,
1071  RelOptInfo *innerrel,
1072  JoinType jointype,
1073  List *restrictlist)
1074 {
1075  List *clause_list = NIL;
1076  ListCell *lc;
1077 
1078  /*
1079  * Search for mergejoinable clauses that constrain the inner rel against
1080  * the outer rel. If an operator is mergejoinable then it behaves like
1081  * equality for some btree opclass, so it's what we want. The
1082  * mergejoinability test also eliminates clauses containing volatile
1083  * functions, which we couldn't depend on.
1084  */
1085  foreach(lc, restrictlist)
1086  {
1087  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(lc);
1088 
1089  /*
1090  * As noted above, if it's a pushed-down clause and we're at an outer
1091  * join, we can't use it.
1092  */
1093  if (restrictinfo->is_pushed_down && IS_OUTER_JOIN(jointype))
1094  continue;
1095 
1096  /* Ignore if it's not a mergejoinable clause */
1097  if (!restrictinfo->can_join ||
1098  restrictinfo->mergeopfamilies == NIL)
1099  continue; /* not mergejoinable */
1100 
1101  /*
1102  * Check if clause has the form "outer op inner" or "inner op outer",
1103  * and if so mark which side is inner.
1104  */
1105  if (!clause_sides_match_join(restrictinfo, outerrelids,
1106  innerrel->relids))
1107  continue; /* no good for these input relations */
1108 
1109  /* OK, add to list */
1110  clause_list = lappend(clause_list, restrictinfo);
1111  }
1112 
1113  /* Let rel_is_distinct_for() do the hard work */
1114  return rel_is_distinct_for(root, innerrel, clause_list);
1115 }
#define NIL
Definition: pg_list.h:69
Relids ph_needed
Definition: relation.h:2066
List * unique_for_rels
Definition: relation.h:580
#define IsA(nodeptr, _type_)
Definition: nodes.h:560
bool query_is_distinct_for(Query *query, List *colnos, List *opids)
Definition: analyzejoins.c:774
void remove_join_clause_from_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids)
Definition: joininfo.c:122
Index varlevelsup
Definition: primnodes.h:173
TargetEntry * get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:370
bool predOK
Definition: relation.h:664
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
void reduce_unique_semijoins(PlannerInfo *root)
Definition: analyzejoins.c:512
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:111
Relids ph_eval_at
Definition: relation.h:2064
PlaceHolderVar * ph_var
Definition: relation.h:2063
RelOptKind reloptkind
Definition: relation.h:522
static int32 next
Definition: blutils.c:210
Relids * attr_needed
Definition: relation.h:558
List * join_info_list
Definition: relation.h:250
Relids required_relids
Definition: relation.h:1765
Relids min_righthand
Definition: relation.h:1918
void * join_search_private
Definition: relation.h:316
#define castNode(_type_, nodeptr)
Definition: nodes.h:578
static Oid distinct_col_search(int colno, List *colnos, List *opids)
Definition: analyzejoins.c:923
bool hasAggs
Definition: parsenodes.h:123
Relids clause_relids
Definition: relation.h:1762
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
#define IS_OUTER_JOIN(jointype)
Definition: nodes.h:722
List * groupingSets
Definition: parsenodes.h:148
List * list_copy(const List *oldlist)
Definition: list.c:1160
Definition: nodes.h:509
Relids left_relids
Definition: relation.h:1774
AttrNumber varattno
Definition: primnodes.h:168
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:569
List * list_concat(List *list1, List *list2)
Definition: list.c:321
return result
Definition: formatting.c:1633
static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
Definition: analyzejoins.c:313
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
bool immediate
Definition: relation.h:666
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:163
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
#define OidIsValid(objectId)
Definition: c.h:538
List * mergeopfamilies
Definition: relation.h:1792
Relids syn_lefthand
Definition: relation.h:1919
JoinType
Definition: nodes.h:673
List * targetList
Definition: parsenodes.h:138
struct RelOptInfo ** simple_rel_array
Definition: relation.h:179
bool unique
Definition: relation.h:665
Relids syn_righthand
Definition: relation.h:1920
void distribute_restrictinfo_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: initsplan.c:2205
bool resjunk
Definition: primnodes.h:1375
#define linitial(l)
Definition: pg_list.h:111
List * distinctClause
Definition: parsenodes.h:154
#define ERROR
Definition: elog.h:43
Expr * phexpr
Definition: relation.h:1850
#define lfirst_int(lc)
Definition: pg_list.h:107
List * generate_join_implied_equalities(PlannerInfo *root, Relids join_relids, Relids outer_relids, RelOptInfo *inner_rel)
Definition: equivclass.c:1033
Node * get_leftop(const Expr *clause)
Definition: clauses.c:199
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
Definition: analyzejoins.c:648
bool can_join
Definition: relation.h:1753
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:308
#define lfirst_node(type, lc)
Definition: pg_list.h:109
List * joininfo
Definition: relation.h:589
bool outer_is_left
Definition: relation.h:1802
AttrNumber resno
Definition: primnodes.h:1369
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
Definition: analyzejoins.c:159
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
static bool clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, Relids innerrelids)
Definition: analyzejoins.c:127
Relids relids
Definition: relation.h:525
int simple_rel_array_size
Definition: relation.h:180
#define lnext(lc)
Definition: pg_list.h:105
List * non_unique_for_rels
Definition: relation.h:582
Relids pull_varnos(Node *node)
Definition: var.c:95
List * lappend_int(List *list, int datum)
Definition: list.c:146
Index relid
Definition: relation.h:553
Bitmapset * Relids
Definition: relation.h:28
bool innerrel_is_unique(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache)
Definition: analyzejoins.c:962
List * lappend(List *list, void *datum)
Definition: list.c:128
RangeTblEntry ** simple_rte_array
Definition: relation.h:188
Expr * clause
Definition: relation.h:1747
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:663
Index varno
Definition: primnodes.h:166
bool delay_upper_joins
Definition: relation.h:1923
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:526
Relids ph_lateral
Definition: relation.h:2065
unsigned int Index
Definition: c.h:365
RTEKind rtekind
Definition: relation.h:555
List * indexlist
Definition: relation.h:562
#define InvalidOid
Definition: postgres_ext.h:36
bool is_pushed_down
Definition: relation.h:1749
bool hasTargetSRFs
Definition: parsenodes.h:125
Relids right_relids
Definition: relation.h:1775
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
bool equality_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:695
List * remove_useless_joins(PlannerInfo *root, List *joinlist)
Definition: analyzejoins.c:60
JoinType jointype
Definition: relation.h:1921
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:218
static int list_length(const List *l)
Definition: pg_list.h:89
SetOperation op
Definition: parsenodes.h:1560
bool query_supports_distinctness(Query *query)
Definition: analyzejoins.c:737
#define nodeTag(nodeptr)
Definition: nodes.h:514
Node * get_rightop(const Expr *clause)
Definition: clauses.c:216
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:443
Node * setOperations
Definition: parsenodes.h:163
Query * subquery
Definition: parsenodes.h:959
List * groupClause
Definition: parsenodes.h:146
AttrNumber max_attr
Definition: relation.h:557
List * placeholder_list
Definition: relation.h:258
static List * remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
Definition: analyzejoins.c:458
static bool is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist)
MemoryContext planner_cxt
Definition: relation.h:287
#define elog
Definition: elog.h:219
bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist)
Definition: indxpath.c:2960
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:243
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
Definition: analyzejoins.c:592
Node * havingQual
Definition: parsenodes.h:150
List * indpred
Definition: relation.h:654
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:735
Definition: pg_list.h:45
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:420
Relids min_lefthand
Definition: relation.h:1917
#define lfirst_oid(lc)
Definition: pg_list.h:108
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:131
AttrNumber min_attr
Definition: relation.h:556