PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
analyzejoins.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * analyzejoins.c
4  * Routines for simplifying joins after initial query analysis
5  *
6  * While we do a great deal of join simplification in prep/prepjointree.c,
7  * certain optimizations cannot be performed at that stage for lack of
8  * detailed information about the query. The routines here are invoked
9  * after initsplan.c has done its work, and can do additional join removal
10  * and simplification steps based on the information extracted. The penalty
11  * is that we have to work harder to clean up after ourselves when we modify
12  * the query, since the derived data structures have to be updated too.
13  *
14  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  *
18  * IDENTIFICATION
19  * src/backend/optimizer/plan/analyzejoins.c
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24 
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "utils/lsyscache.h"
34 
35 /* local functions */
36 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
37 static void remove_rel_from_query(PlannerInfo *root, int relid,
38  Relids joinrelids);
39 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
40 static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
41 static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
42  List *clause_list);
43 static Oid distinct_col_search(int colno, List *colnos, List *opids);
44 
45 
46 /*
47  * remove_useless_joins
48  * Check for relations that don't actually need to be joined at all,
49  * and remove them from the query.
50  *
51  * We are passed the current joinlist and return the updated list. Other
52  * data structures that have to be updated are accessible via "root".
53  */
54 List *
56 {
57  ListCell *lc;
58 
59  /*
60  * We are only interested in relations that are left-joined to, so we can
61  * scan the join_info_list to find them easily.
62  */
63 restart:
64  foreach(lc, root->join_info_list)
65  {
66  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
67  int innerrelid;
68  int nremoved;
69 
70  /* Skip if not removable */
71  if (!join_is_removable(root, sjinfo))
72  continue;
73 
74  /*
75  * Currently, join_is_removable can only succeed when the sjinfo's
76  * righthand is a single baserel. Remove that rel from the query and
77  * joinlist.
78  */
79  innerrelid = bms_singleton_member(sjinfo->min_righthand);
80 
81  remove_rel_from_query(root, innerrelid,
82  bms_union(sjinfo->min_lefthand,
83  sjinfo->min_righthand));
84 
85  /* We verify that exactly one reference gets removed from joinlist */
86  nremoved = 0;
87  joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
88  if (nremoved != 1)
89  elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
90 
91  /*
92  * We can delete this SpecialJoinInfo from the list too, since it's no
93  * longer of interest.
94  */
95  root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
96 
97  /*
98  * Restart the scan. This is necessary to ensure we find all
99  * removable joins independently of ordering of the join_info_list
100  * (note that removal of attr_needed bits may make a join appear
101  * removable that did not before). Also, since we just deleted the
102  * current list cell, we'd have to have some kluge to continue the
103  * list scan anyway.
104  */
105  goto restart;
106  }
107 
108  return joinlist;
109 }
110 
111 /*
112  * clause_sides_match_join
113  * Determine whether a join clause is of the right form to use in this join.
114  *
115  * We already know that the clause is a binary opclause referencing only the
116  * rels in the current join. The point here is to check whether it has the
117  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
118  * rather than mixing outer and inner vars on either side. If it matches,
119  * we set the transient flag outer_is_left to identify which side is which.
120  */
121 static inline bool
123  Relids innerrelids)
124 {
125  if (bms_is_subset(rinfo->left_relids, outerrelids) &&
126  bms_is_subset(rinfo->right_relids, innerrelids))
127  {
128  /* lefthand side is outer */
129  rinfo->outer_is_left = true;
130  return true;
131  }
132  else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
133  bms_is_subset(rinfo->right_relids, outerrelids))
134  {
135  /* righthand side is outer */
136  rinfo->outer_is_left = false;
137  return true;
138  }
139  return false; /* no good for these input relations */
140 }
141 
142 /*
143  * join_is_removable
144  * Check whether we need not perform this special join at all, because
145  * it will just duplicate its left input.
146  *
147  * This is true for a left join for which the join condition cannot match
148  * more than one inner-side row. (There are other possibly interesting
149  * cases, but we don't have the infrastructure to prove them.) We also
150  * have to check that the inner side doesn't generate any variables needed
151  * above the join.
152  */
153 static bool
155 {
156  int innerrelid;
157  RelOptInfo *innerrel;
158  Relids joinrelids;
159  List *clause_list = NIL;
160  ListCell *l;
161  int attroff;
162 
163  /*
164  * Must be a non-delaying left join to a single baserel, else we aren't
165  * going to be able to do anything with it.
166  */
167  if (sjinfo->jointype != JOIN_LEFT ||
168  sjinfo->delay_upper_joins)
169  return false;
170 
171  if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
172  return false;
173 
174  innerrel = find_base_rel(root, innerrelid);
175 
176  /*
177  * Before we go to the effort of checking whether any innerrel variables
178  * are needed above the join, make a quick check to eliminate cases in
179  * which we will surely be unable to prove uniqueness of the innerrel.
180  */
181  if (!rel_supports_distinctness(root, innerrel))
182  return false;
183 
184  /* Compute the relid set for the join we are considering */
185  joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
186 
187  /*
188  * We can't remove the join if any inner-rel attributes are used above the
189  * join.
190  *
191  * Note that this test only detects use of inner-rel attributes in higher
192  * join conditions and the target list. There might be such attributes in
193  * pushed-down conditions at this join, too. We check that case below.
194  *
195  * As a micro-optimization, it seems better to start with max_attr and
196  * count down rather than starting with min_attr and counting up, on the
197  * theory that the system attributes are somewhat less likely to be wanted
198  * and should be tested last.
199  */
200  for (attroff = innerrel->max_attr - innerrel->min_attr;
201  attroff >= 0;
202  attroff--)
203  {
204  if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
205  return false;
206  }
207 
208  /*
209  * Similarly check that the inner rel isn't needed by any PlaceHolderVars
210  * that will be used above the join. We only need to fail if such a PHV
211  * actually references some inner-rel attributes; but the correct check
212  * for that is relatively expensive, so we first check against ph_eval_at,
213  * which must mention the inner rel if the PHV uses any inner-rel attrs as
214  * non-lateral references. Note that if the PHV's syntactic scope is just
215  * the inner rel, we can't drop the rel even if the PHV is variable-free.
216  */
217  foreach(l, root->placeholder_list)
218  {
219  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
220 
221  if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
222  return false; /* it references innerrel laterally */
223  if (bms_is_subset(phinfo->ph_needed, joinrelids))
224  continue; /* PHV is not used above the join */
225  if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
226  continue; /* it definitely doesn't reference innerrel */
227  if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
228  return false; /* there isn't any other place to eval PHV */
229  if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
230  innerrel->relids))
231  return false; /* it does reference innerrel */
232  }
233 
234  /*
235  * Search for mergejoinable clauses that constrain the inner rel against
236  * either the outer rel or a pseudoconstant. If an operator is
237  * mergejoinable then it behaves like equality for some btree opclass, so
238  * it's what we want. The mergejoinability test also eliminates clauses
239  * containing volatile functions, which we couldn't depend on.
240  */
241  foreach(l, innerrel->joininfo)
242  {
243  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
244 
245  /*
246  * If it's not a join clause for this outer join, we can't use it.
247  * Note that if the clause is pushed-down, then it is logically from
248  * above the outer join, even if it references no other rels (it might
249  * be from WHERE, for example).
250  */
251  if (restrictinfo->is_pushed_down ||
252  !bms_equal(restrictinfo->required_relids, joinrelids))
253  {
254  /*
255  * If such a clause actually references the inner rel then join
256  * removal has to be disallowed. We have to check this despite
257  * the previous attr_needed checks because of the possibility of
258  * pushed-down clauses referencing the rel.
259  */
260  if (bms_is_member(innerrelid, restrictinfo->clause_relids))
261  return false;
262  continue; /* else, ignore; not useful here */
263  }
264 
265  /* Ignore if it's not a mergejoinable clause */
266  if (!restrictinfo->can_join ||
267  restrictinfo->mergeopfamilies == NIL)
268  continue; /* not mergejoinable */
269 
270  /*
271  * Check if clause has the form "outer op inner" or "inner op outer",
272  * and if so mark which side is inner.
273  */
274  if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
275  innerrel->relids))
276  continue; /* no good for these input relations */
277 
278  /* OK, add to list */
279  clause_list = lappend(clause_list, restrictinfo);
280  }
281 
282  /*
283  * Now that we have the relevant equality join clauses, try to prove the
284  * innerrel distinct.
285  */
286  if (rel_is_distinct_for(root, innerrel, clause_list))
287  return true;
288 
289  /*
290  * Some day it would be nice to check for other methods of establishing
291  * distinctness.
292  */
293  return false;
294 }
295 
296 
297 /*
298  * Remove the target relid from the planner's data structures, having
299  * determined that there is no need to include it in the query.
300  *
301  * We are not terribly thorough here. We must make sure that the rel is
302  * no longer treated as a baserel, and that attributes of other baserels
303  * are no longer marked as being needed at joins involving this rel.
304  * Also, join quals involving the rel have to be removed from the joininfo
305  * lists, but only if they belong to the outer join identified by joinrelids.
306  */
307 static void
308 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
309 {
310  RelOptInfo *rel = find_base_rel(root, relid);
311  List *joininfos;
312  Index rti;
313  ListCell *l;
314  ListCell *nextl;
315 
316  /*
317  * Mark the rel as "dead" to show it is no longer part of the join tree.
318  * (Removing it from the baserel array altogether seems too risky.)
319  */
320  rel->reloptkind = RELOPT_DEADREL;
321 
322  /*
323  * Remove references to the rel from other baserels' attr_needed arrays.
324  */
325  for (rti = 1; rti < root->simple_rel_array_size; rti++)
326  {
327  RelOptInfo *otherrel = root->simple_rel_array[rti];
328  int attroff;
329 
330  /* there may be empty slots corresponding to non-baserel RTEs */
331  if (otherrel == NULL)
332  continue;
333 
334  Assert(otherrel->relid == rti); /* sanity check on array */
335 
336  /* no point in processing target rel itself */
337  if (otherrel == rel)
338  continue;
339 
340  for (attroff = otherrel->max_attr - otherrel->min_attr;
341  attroff >= 0;
342  attroff--)
343  {
344  otherrel->attr_needed[attroff] =
345  bms_del_member(otherrel->attr_needed[attroff], relid);
346  }
347  }
348 
349  /*
350  * Likewise remove references from SpecialJoinInfo data structures.
351  *
352  * This is relevant in case the outer join we're deleting is nested inside
353  * other outer joins: the upper joins' relid sets have to be adjusted. The
354  * RHS of the target outer join will be made empty here, but that's OK
355  * since caller will delete that SpecialJoinInfo entirely.
356  */
357  foreach(l, root->join_info_list)
358  {
359  SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
360 
361  sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
362  sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
363  sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
364  sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
365  }
366 
367  /*
368  * Likewise remove references from PlaceHolderVar data structures,
369  * removing any no-longer-needed placeholders entirely.
370  *
371  * Removal is a bit tricker than it might seem: we can remove PHVs that
372  * are used at the target rel and/or in the join qual, but not those that
373  * are used at join partner rels or above the join. It's not that easy to
374  * distinguish PHVs used at partner rels from those used in the join qual,
375  * since they will both have ph_needed sets that are subsets of
376  * joinrelids. However, a PHV used at a partner rel could not have the
377  * target rel in ph_eval_at, so we check that while deciding whether to
378  * remove or just update the PHV. There is no corresponding test in
379  * join_is_removable because it doesn't need to distinguish those cases.
380  */
381  for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
382  {
383  PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
384 
385  nextl = lnext(l);
386  Assert(!bms_is_member(relid, phinfo->ph_lateral));
387  if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
388  bms_is_member(relid, phinfo->ph_eval_at))
390  phinfo);
391  else
392  {
393  phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
394  Assert(!bms_is_empty(phinfo->ph_eval_at));
395  phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
396  }
397  }
398 
399  /*
400  * Remove any joinquals referencing the rel from the joininfo lists.
401  *
402  * In some cases, a joinqual has to be put back after deleting its
403  * reference to the target rel. This can occur for pseudoconstant and
404  * outerjoin-delayed quals, which can get marked as requiring the rel in
405  * order to force them to be evaluated at or above the join. We can't
406  * just discard them, though. Only quals that logically belonged to the
407  * outer join being discarded should be removed from the query.
408  *
409  * We must make a copy of the rel's old joininfo list before starting the
410  * loop, because otherwise remove_join_clause_from_rels would destroy the
411  * list while we're scanning it.
412  */
413  joininfos = list_copy(rel->joininfo);
414  foreach(l, joininfos)
415  {
416  RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
417 
418  remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
419 
420  if (rinfo->is_pushed_down ||
421  !bms_equal(rinfo->required_relids, joinrelids))
422  {
423  /* Recheck that qual doesn't actually reference the target rel */
424  Assert(!bms_is_member(relid, rinfo->clause_relids));
425 
426  /*
427  * The required_relids probably aren't shared with anything else,
428  * but let's copy them just to be sure.
429  */
430  rinfo->required_relids = bms_copy(rinfo->required_relids);
432  relid);
433  distribute_restrictinfo_to_rels(root, rinfo);
434  }
435  }
436 
437  /*
438  * There may be references to the rel in root->fkey_list, but if so,
439  * match_foreign_keys_to_quals() will get rid of them.
440  */
441 }
442 
443 /*
444  * Remove any occurrences of the target relid from a joinlist structure.
445  *
446  * It's easiest to build a whole new list structure, so we handle it that
447  * way. Efficiency is not a big deal here.
448  *
449  * *nremoved is incremented by the number of occurrences removed (there
450  * should be exactly one, but the caller checks that).
451  */
452 static List *
453 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
454 {
455  List *result = NIL;
456  ListCell *jl;
457 
458  foreach(jl, joinlist)
459  {
460  Node *jlnode = (Node *) lfirst(jl);
461 
462  if (IsA(jlnode, RangeTblRef))
463  {
464  int varno = ((RangeTblRef *) jlnode)->rtindex;
465 
466  if (varno == relid)
467  (*nremoved)++;
468  else
469  result = lappend(result, jlnode);
470  }
471  else if (IsA(jlnode, List))
472  {
473  /* Recurse to handle subproblem */
474  List *sublist;
475 
476  sublist = remove_rel_from_joinlist((List *) jlnode,
477  relid, nremoved);
478  /* Avoid including empty sub-lists in the result */
479  if (sublist)
480  result = lappend(result, sublist);
481  }
482  else
483  {
484  elog(ERROR, "unrecognized joinlist node type: %d",
485  (int) nodeTag(jlnode));
486  }
487  }
488 
489  return result;
490 }
491 
492 
493 /*
494  * rel_supports_distinctness
495  * Could the relation possibly be proven distinct on some set of columns?
496  *
497  * This is effectively a pre-checking function for rel_is_distinct_for().
498  * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
499  * with this rel, but it should not expend a lot of cycles. The idea is
500  * that callers can avoid doing possibly-expensive processing to compute
501  * rel_is_distinct_for()'s argument lists if the call could not possibly
502  * succeed.
503  */
504 static bool
506 {
507  /* We only know about baserels ... */
508  if (rel->reloptkind != RELOPT_BASEREL)
509  return false;
510  if (rel->rtekind == RTE_RELATION)
511  {
512  /*
513  * For a plain relation, we only know how to prove uniqueness by
514  * reference to unique indexes. Make sure there's at least one
515  * suitable unique index. It must be immediately enforced, and if
516  * it's a partial index, it must match the query. (Keep these
517  * conditions in sync with relation_has_unique_index_for!)
518  */
519  ListCell *lc;
520 
521  foreach(lc, rel->indexlist)
522  {
523  IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
524 
525  if (ind->unique && ind->immediate &&
526  (ind->indpred == NIL || ind->predOK))
527  return true;
528  }
529  }
530  else if (rel->rtekind == RTE_SUBQUERY)
531  {
532  Query *subquery = root->simple_rte_array[rel->relid]->subquery;
533 
534  /* Check if the subquery has any qualities that support distinctness */
535  if (query_supports_distinctness(subquery))
536  return true;
537  }
538  /* We have no proof rules for any other rtekinds. */
539  return false;
540 }
541 
542 /*
543  * rel_is_distinct_for
544  * Does the relation return only distinct rows according to clause_list?
545  *
546  * clause_list is a list of join restriction clauses involving this rel and
547  * some other one. Return true if no two rows emitted by this rel could
548  * possibly join to the same row of the other rel.
549  *
550  * The caller must have already determined that each condition is a
551  * mergejoinable equality with an expression in this relation on one side, and
552  * an expression not involving this relation on the other. The transient
553  * outer_is_left flag is used to identify which side references this relation:
554  * left side if outer_is_left is false, right side if it is true.
555  *
556  * Note that the passed-in clause_list may be destructively modified! This
557  * is OK for current uses, because the clause_list is built by the caller for
558  * the sole purpose of passing to this function.
559  */
560 static bool
561 rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
562 {
563  /*
564  * We could skip a couple of tests here if we assume all callers checked
565  * rel_supports_distinctness first, but it doesn't seem worth taking any
566  * risk for.
567  */
568  if (rel->reloptkind != RELOPT_BASEREL)
569  return false;
570  if (rel->rtekind == RTE_RELATION)
571  {
572  /*
573  * Examine the indexes to see if we have a matching unique index.
574  * relation_has_unique_index_for automatically adds any usable
575  * restriction clauses for the rel, so we needn't do that here.
576  */
577  if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
578  return true;
579  }
580  else if (rel->rtekind == RTE_SUBQUERY)
581  {
582  Index relid = rel->relid;
583  Query *subquery = root->simple_rte_array[relid]->subquery;
584  List *colnos = NIL;
585  List *opids = NIL;
586  ListCell *l;
587 
588  /*
589  * Build the argument lists for query_is_distinct_for: a list of
590  * output column numbers that the query needs to be distinct over, and
591  * a list of equality operators that the output columns need to be
592  * distinct according to.
593  *
594  * (XXX we are not considering restriction clauses attached to the
595  * subquery; is that worth doing?)
596  */
597  foreach(l, clause_list)
598  {
600  Oid op;
601  Var *var;
602 
603  /*
604  * Get the equality operator we need uniqueness according to.
605  * (This might be a cross-type operator and thus not exactly the
606  * same operator the subquery would consider; that's all right
607  * since query_is_distinct_for can resolve such cases.) The
608  * caller's mergejoinability test should have selected only
609  * OpExprs.
610  */
611  op = castNode(OpExpr, rinfo->clause)->opno;
612 
613  /* caller identified the inner side for us */
614  if (rinfo->outer_is_left)
615  var = (Var *) get_rightop(rinfo->clause);
616  else
617  var = (Var *) get_leftop(rinfo->clause);
618 
619  /*
620  * If inner side isn't a Var referencing a subquery output column,
621  * this clause doesn't help us.
622  */
623  if (!var || !IsA(var, Var) ||
624  var->varno != relid || var->varlevelsup != 0)
625  continue;
626 
627  colnos = lappend_int(colnos, var->varattno);
628  opids = lappend_oid(opids, op);
629  }
630 
631  if (query_is_distinct_for(subquery, colnos, opids))
632  return true;
633  }
634  return false;
635 }
636 
637 
638 /*
639  * query_supports_distinctness - could the query possibly be proven distinct
640  * on some set of output columns?
641  *
642  * This is effectively a pre-checking function for query_is_distinct_for().
643  * It must return TRUE if query_is_distinct_for() could possibly return TRUE
644  * with this query, but it should not expend a lot of cycles. The idea is
645  * that callers can avoid doing possibly-expensive processing to compute
646  * query_is_distinct_for()'s argument lists if the call could not possibly
647  * succeed.
648  */
649 bool
651 {
652  /* we don't cope with SRFs, see comment below */
653  if (query->hasTargetSRFs)
654  return false;
655 
656  /* check for features we can prove distinctness with */
657  if (query->distinctClause != NIL ||
658  query->groupClause != NIL ||
659  query->groupingSets != NIL ||
660  query->hasAggs ||
661  query->havingQual ||
662  query->setOperations)
663  return true;
664 
665  return false;
666 }
667 
668 /*
669  * query_is_distinct_for - does query never return duplicates of the
670  * specified columns?
671  *
672  * query is a not-yet-planned subquery (in current usage, it's always from
673  * a subquery RTE, which the planner avoids scribbling on).
674  *
675  * colnos is an integer list of output column numbers (resno's). We are
676  * interested in whether rows consisting of just these columns are certain
677  * to be distinct. "Distinctness" is defined according to whether the
678  * corresponding upper-level equality operators listed in opids would think
679  * the values are distinct. (Note: the opids entries could be cross-type
680  * operators, and thus not exactly the equality operators that the subquery
681  * would use itself. We use equality_ops_are_compatible() to check
682  * compatibility. That looks at btree or hash opfamily membership, and so
683  * should give trustworthy answers for all operators that we might need
684  * to deal with here.)
685  */
686 bool
687 query_is_distinct_for(Query *query, List *colnos, List *opids)
688 {
689  ListCell *l;
690  Oid opid;
691 
692  Assert(list_length(colnos) == list_length(opids));
693 
694  /*
695  * A set-returning function in the query's targetlist can result in
696  * returning duplicate rows, if the SRF is evaluated after the
697  * de-duplication step; so we play it safe and say "no" if there are any
698  * SRFs. (We could be certain that it's okay if SRFs appear only in the
699  * specified columns, since those must be evaluated before de-duplication;
700  * but it doesn't presently seem worth the complication to check that.)
701  */
702  if (query->hasTargetSRFs)
703  return false;
704 
705  /*
706  * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
707  * columns in the DISTINCT clause appear in colnos and operator semantics
708  * match.
709  */
710  if (query->distinctClause)
711  {
712  foreach(l, query->distinctClause)
713  {
714  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
716  query->targetList);
717 
718  opid = distinct_col_search(tle->resno, colnos, opids);
719  if (!OidIsValid(opid) ||
720  !equality_ops_are_compatible(opid, sgc->eqop))
721  break; /* exit early if no match */
722  }
723  if (l == NULL) /* had matches for all? */
724  return true;
725  }
726 
727  /*
728  * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
729  * the grouped columns appear in colnos and operator semantics match.
730  */
731  if (query->groupClause && !query->groupingSets)
732  {
733  foreach(l, query->groupClause)
734  {
735  SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
737  query->targetList);
738 
739  opid = distinct_col_search(tle->resno, colnos, opids);
740  if (!OidIsValid(opid) ||
741  !equality_ops_are_compatible(opid, sgc->eqop))
742  break; /* exit early if no match */
743  }
744  if (l == NULL) /* had matches for all? */
745  return true;
746  }
747  else if (query->groupingSets)
748  {
749  /*
750  * If we have grouping sets with expressions, we probably don't have
751  * uniqueness and analysis would be hard. Punt.
752  */
753  if (query->groupClause)
754  return false;
755 
756  /*
757  * If we have no groupClause (therefore no grouping expressions), we
758  * might have one or many empty grouping sets. If there's just one,
759  * then we're returning only one row and are certainly unique. But
760  * otherwise, we know we're certainly not unique.
761  */
762  if (list_length(query->groupingSets) == 1 &&
763  ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
764  return true;
765  else
766  return false;
767  }
768  else
769  {
770  /*
771  * If we have no GROUP BY, but do have aggregates or HAVING, then the
772  * result is at most one row so it's surely unique, for any operators.
773  */
774  if (query->hasAggs || query->havingQual)
775  return true;
776  }
777 
778  /*
779  * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
780  * except with ALL.
781  */
782  if (query->setOperations)
783  {
785 
786  Assert(topop->op != SETOP_NONE);
787 
788  if (!topop->all)
789  {
790  ListCell *lg;
791 
792  /* We're good if all the nonjunk output columns are in colnos */
793  lg = list_head(topop->groupClauses);
794  foreach(l, query->targetList)
795  {
796  TargetEntry *tle = (TargetEntry *) lfirst(l);
797  SortGroupClause *sgc;
798 
799  if (tle->resjunk)
800  continue; /* ignore resjunk columns */
801 
802  /* non-resjunk columns should have grouping clauses */
803  Assert(lg != NULL);
804  sgc = (SortGroupClause *) lfirst(lg);
805  lg = lnext(lg);
806 
807  opid = distinct_col_search(tle->resno, colnos, opids);
808  if (!OidIsValid(opid) ||
809  !equality_ops_are_compatible(opid, sgc->eqop))
810  break; /* exit early if no match */
811  }
812  if (l == NULL) /* had matches for all? */
813  return true;
814  }
815  }
816 
817  /*
818  * XXX Are there any other cases in which we can easily see the result
819  * must be distinct?
820  *
821  * If you do add more smarts to this function, be sure to update
822  * query_supports_distinctness() to match.
823  */
824 
825  return false;
826 }
827 
828 /*
829  * distinct_col_search - subroutine for query_is_distinct_for
830  *
831  * If colno is in colnos, return the corresponding element of opids,
832  * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
833  * but if it does, we arbitrarily select the first match.)
834  */
835 static Oid
836 distinct_col_search(int colno, List *colnos, List *opids)
837 {
838  ListCell *lc1,
839  *lc2;
840 
841  forboth(lc1, colnos, lc2, opids)
842  {
843  if (colno == lfirst_int(lc1))
844  return lfirst_oid(lc2);
845  }
846  return InvalidOid;
847 }
#define NIL
Definition: pg_list.h:69
Relids ph_needed
Definition: relation.h:1979
#define IsA(nodeptr, _type_)
Definition: nodes.h:571
bool query_is_distinct_for(Query *query, List *colnos, List *opids)
Definition: analyzejoins.c:687
void remove_join_clause_from_rels(PlannerInfo *root, RestrictInfo *restrictinfo, Relids join_relids)
Definition: joininfo.c:122
Index varlevelsup
Definition: primnodes.h:173
TargetEntry * get_sortgroupclause_tle(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:370
bool predOK
Definition: relation.h:624
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:174
Bitmapset * bms_copy(const Bitmapset *a)
Definition: bitmapset.c:110
Relids ph_eval_at
Definition: relation.h:1977
PlaceHolderVar * ph_var
Definition: relation.h:1976
RelOptKind reloptkind
Definition: relation.h:491
Relids * attr_needed
Definition: relation.h:527
List * join_info_list
Definition: relation.h:249
Relids required_relids
Definition: relation.h:1678
Relids min_righthand
Definition: relation.h:1831
#define castNode(_type_, nodeptr)
Definition: nodes.h:589
static Oid distinct_col_search(int colno, List *colnos, List *opids)
Definition: analyzejoins.c:836
bool hasAggs
Definition: parsenodes.h:116
Relids clause_relids
Definition: relation.h:1675
List * groupingSets
Definition: parsenodes.h:139
List * list_copy(const List *oldlist)
Definition: list.c:1160
Definition: nodes.h:520
Relids left_relids
Definition: relation.h:1687
AttrNumber varattno
Definition: primnodes.h:168
bool bms_get_singleton_member(const Bitmapset *a, int *member)
Definition: bitmapset.c:539
return result
Definition: formatting.c:1618
static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
Definition: analyzejoins.c:308
List * list_delete_ptr(List *list, void *datum)
Definition: list.c:590
bool immediate
Definition: relation.h:626
unsigned int Oid
Definition: postgres_ext.h:31
Definition: primnodes.h:163
List * lappend_oid(List *list, Oid datum)
Definition: list.c:164
#define OidIsValid(objectId)
Definition: c.h:538
List * mergeopfamilies
Definition: relation.h:1705
Relids syn_lefthand
Definition: relation.h:1832
List * targetList
Definition: parsenodes.h:131
struct RelOptInfo ** simple_rel_array
Definition: relation.h:178
bool unique
Definition: relation.h:625
Relids syn_righthand
Definition: relation.h:1833
void distribute_restrictinfo_to_rels(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: initsplan.c:2205
bool resjunk
Definition: primnodes.h:1359
#define linitial(l)
Definition: pg_list.h:110
List * distinctClause
Definition: parsenodes.h:145
#define ERROR
Definition: elog.h:43
Expr * phexpr
Definition: relation.h:1763
#define lfirst_int(lc)
Definition: pg_list.h:107
Node * get_leftop(const Expr *clause)
Definition: clauses.c:198
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
Definition: analyzejoins.c:561
bool can_join
Definition: relation.h:1666
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:307
List * joininfo
Definition: relation.h:553
bool outer_is_left
Definition: relation.h:1715
AttrNumber resno
Definition: primnodes.h:1353
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
Definition: analyzejoins.c:154
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
static bool clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids, Relids innerrelids)
Definition: analyzejoins.c:122
Relids relids
Definition: relation.h:494
int simple_rel_array_size
Definition: relation.h:179
#define lnext(lc)
Definition: pg_list.h:105
Relids pull_varnos(Node *node)
Definition: var.c:95
List * lappend_int(List *list, int datum)
Definition: list.c:146
Index relid
Definition: relation.h:522
List * lappend(List *list, void *datum)
Definition: list.c:128
RangeTblEntry ** simple_rte_array
Definition: relation.h:187
Expr * clause
Definition: relation.h:1660
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:633
Index varno
Definition: primnodes.h:166
bool delay_upper_joins
Definition: relation.h:1836
int bms_singleton_member(const Bitmapset *a)
Definition: bitmapset.c:496
Relids ph_lateral
Definition: relation.h:1978
unsigned int Index
Definition: c.h:365
RTEKind rtekind
Definition: relation.h:524
List * indexlist
Definition: relation.h:531
#define InvalidOid
Definition: postgres_ext.h:36
bool is_pushed_down
Definition: relation.h:1662
bool hasTargetSRFs
Definition: parsenodes.h:118
Relids right_relids
Definition: relation.h:1688
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:675
#define lfirst(lc)
Definition: pg_list.h:106
bool equality_ops_are_compatible(Oid opno1, Oid opno2)
Definition: lsyscache.c:695
List * remove_useless_joins(PlannerInfo *root, List *joinlist)
Definition: analyzejoins.c:55
JoinType jointype
Definition: relation.h:1834
Bitmapset * bms_union(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:217
static int list_length(const List *l)
Definition: pg_list.h:89
SetOperation op
Definition: parsenodes.h:1524
bool query_supports_distinctness(Query *query)
Definition: analyzejoins.c:650
#define nodeTag(nodeptr)
Definition: nodes.h:525
Node * get_rightop(const Expr *clause)
Definition: clauses.c:215
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:442
Node * setOperations
Definition: parsenodes.h:154
Query * subquery
Definition: parsenodes.h:934
List * groupClause
Definition: parsenodes.h:137
AttrNumber max_attr
Definition: relation.h:526
List * placeholder_list
Definition: relation.h:257
static List * remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
Definition: analyzejoins.c:453
#define elog
Definition: elog.h:219
bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist)
Definition: indxpath.c:2956
RelOptInfo * find_base_rel(PlannerInfo *root, int relid)
Definition: relnode.c:223
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
Definition: analyzejoins.c:505
Node * havingQual
Definition: parsenodes.h:141
List * indpred
Definition: relation.h:615
Bitmapset * bms_del_member(Bitmapset *a, int x)
Definition: bitmapset.c:705
Definition: pg_list.h:45
bool bms_is_member(int x, const Bitmapset *a)
Definition: bitmapset.c:419
Relids min_lefthand
Definition: relation.h:1830
#define lfirst_oid(lc)
Definition: pg_list.h:108
bool bms_equal(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:130
AttrNumber min_attr
Definition: relation.h:525