PostgreSQL Source Code  git master
pathkeys.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pathkeys.c
4  * Utilities for matching and building path keys
5  *
6  * See src/backend/optimizer/README for a great deal of information about
7  * the nature and use of path keys.
8  *
9  *
10  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/optimizer/path/pathkeys.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/stratnum.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/plannodes.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/tlist.h"
28 #include "utils/lsyscache.h"
29 
30 
31 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
33 
34 
35 /****************************************************************************
36  * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
37  ****************************************************************************/
38 
39 /*
40  * make_canonical_pathkey
41  * Given the parameters for a PathKey, find any pre-existing matching
42  * pathkey in the query's list of "canonical" pathkeys. Make a new
43  * entry if there's not one already.
44  *
45  * Note that this function must not be used until after we have completed
46  * merging EquivalenceClasses. (We don't try to enforce that here; instead,
47  * equivclass.c will complain if a merge occurs after root->canon_pathkeys
48  * has become nonempty.)
49  */
50 PathKey *
52  EquivalenceClass *eclass, Oid opfamily,
53  int strategy, bool nulls_first)
54 {
55  PathKey *pk;
56  ListCell *lc;
57  MemoryContext oldcontext;
58 
59  /* The passed eclass might be non-canonical, so chase up to the top */
60  while (eclass->ec_merged)
61  eclass = eclass->ec_merged;
62 
63  foreach(lc, root->canon_pathkeys)
64  {
65  pk = (PathKey *) lfirst(lc);
66  if (eclass == pk->pk_eclass &&
67  opfamily == pk->pk_opfamily &&
68  strategy == pk->pk_strategy &&
69  nulls_first == pk->pk_nulls_first)
70  return pk;
71  }
72 
73  /*
74  * Be sure canonical pathkeys are allocated in the main planning context.
75  * Not an issue in normal planning, but it is for GEQO.
76  */
77  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
78 
79  pk = makeNode(PathKey);
80  pk->pk_eclass = eclass;
81  pk->pk_opfamily = opfamily;
82  pk->pk_strategy = strategy;
83  pk->pk_nulls_first = nulls_first;
84 
85  root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
86 
87  MemoryContextSwitchTo(oldcontext);
88 
89  return pk;
90 }
91 
92 /*
93  * pathkey_is_redundant
94  * Is a pathkey redundant with one already in the given list?
95  *
96  * We detect two cases:
97  *
98  * 1. If the new pathkey's equivalence class contains a constant, and isn't
99  * below an outer join, then we can disregard it as a sort key. An example:
100  * SELECT ... WHERE x = 42 ORDER BY x, y;
101  * We may as well just sort by y. Note that because of opfamily matching,
102  * this is semantically correct: we know that the equality constraint is one
103  * that actually binds the variable to a single value in the terms of any
104  * ordering operator that might go with the eclass. This rule not only lets
105  * us simplify (or even skip) explicit sorts, but also allows matching index
106  * sort orders to a query when there are don't-care index columns.
107  *
108  * 2. If the new pathkey's equivalence class is the same as that of any
109  * existing member of the pathkey list, then it is redundant. Some examples:
110  * SELECT ... ORDER BY x, x;
111  * SELECT ... ORDER BY x, x DESC;
112  * SELECT ... WHERE x = y ORDER BY x, y;
113  * In all these cases the second sort key cannot distinguish values that are
114  * considered equal by the first, and so there's no point in using it.
115  * Note in particular that we need not compare opfamily (all the opfamilies
116  * of the EC have the same notion of equality) nor sort direction.
117  *
118  * Both the given pathkey and the list members must be canonical for this
119  * to work properly, but that's okay since we no longer ever construct any
120  * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
121  * canonical includes the additional requirement of no redundant entries,
122  * which is exactly what we are checking for here.)
123  *
124  * Because the equivclass.c machinery forms only one copy of any EC per query,
125  * pointer comparison is enough to decide whether canonical ECs are the same.
126  */
127 static bool
128 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
129 {
130  EquivalenceClass *new_ec = new_pathkey->pk_eclass;
131  ListCell *lc;
132 
133  /* Check for EC containing a constant --- unconditionally redundant */
134  if (EC_MUST_BE_REDUNDANT(new_ec))
135  return true;
136 
137  /* If same EC already used in list, then redundant */
138  foreach(lc, pathkeys)
139  {
140  PathKey *old_pathkey = (PathKey *) lfirst(lc);
141 
142  if (new_ec == old_pathkey->pk_eclass)
143  return true;
144  }
145 
146  return false;
147 }
148 
149 /*
150  * make_pathkey_from_sortinfo
151  * Given an expression and sort-order information, create a PathKey.
152  * The result is always a "canonical" PathKey, but it might be redundant.
153  *
154  * expr is the expression, and nullable_relids is the set of base relids
155  * that are potentially nullable below it.
156  *
157  * If the PathKey is being generated from a SortGroupClause, sortref should be
158  * the SortGroupClause's SortGroupRef; otherwise zero.
159  *
160  * If rel is not NULL, it identifies a specific relation we're considering
161  * a path for, and indicates that child EC members for that relation can be
162  * considered. Otherwise child members are ignored. (See the comments for
163  * get_eclass_for_sort_expr.)
164  *
165  * create_it is true if we should create any missing EquivalenceClass
166  * needed to represent the sort key. If it's false, we return NULL if the
167  * sort key isn't already present in any EquivalenceClass.
168  */
169 static PathKey *
171  Expr *expr,
172  Relids nullable_relids,
173  Oid opfamily,
174  Oid opcintype,
175  Oid collation,
176  bool reverse_sort,
177  bool nulls_first,
178  Index sortref,
179  Relids rel,
180  bool create_it)
181 {
182  int16 strategy;
183  Oid equality_op;
184  List *opfamilies;
186 
187  strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
188 
189  /*
190  * EquivalenceClasses need to contain opfamily lists based on the family
191  * membership of mergejoinable equality operators, which could belong to
192  * more than one opfamily. So we have to look up the opfamily's equality
193  * operator and get its membership.
194  */
195  equality_op = get_opfamily_member(opfamily,
196  opcintype,
197  opcintype,
199  if (!OidIsValid(equality_op)) /* shouldn't happen */
200  elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
201  BTEqualStrategyNumber, opcintype, opcintype, opfamily);
202  opfamilies = get_mergejoin_opfamilies(equality_op);
203  if (!opfamilies) /* certainly should find some */
204  elog(ERROR, "could not find opfamilies for equality operator %u",
205  equality_op);
206 
207  /* Now find or (optionally) create a matching EquivalenceClass */
208  eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
209  opfamilies, opcintype, collation,
210  sortref, rel, create_it);
211 
212  /* Fail if no EC and !create_it */
213  if (!eclass)
214  return NULL;
215 
216  /* And finally we can find or create a PathKey node */
217  return make_canonical_pathkey(root, eclass, opfamily,
218  strategy, nulls_first);
219 }
220 
221 /*
222  * make_pathkey_from_sortop
223  * Like make_pathkey_from_sortinfo, but work from a sort operator.
224  *
225  * This should eventually go away, but we need to restructure SortGroupClause
226  * first.
227  */
228 static PathKey *
230  Expr *expr,
231  Relids nullable_relids,
232  Oid ordering_op,
233  bool nulls_first,
234  Index sortref,
235  bool create_it)
236 {
237  Oid opfamily,
238  opcintype,
239  collation;
240  int16 strategy;
241 
242  /* Find the operator in pg_amop --- failure shouldn't happen */
243  if (!get_ordering_op_properties(ordering_op,
244  &opfamily, &opcintype, &strategy))
245  elog(ERROR, "operator %u is not a valid ordering operator",
246  ordering_op);
247 
248  /* Because SortGroupClause doesn't carry collation, consult the expr */
249  collation = exprCollation((Node *) expr);
250 
251  return make_pathkey_from_sortinfo(root,
252  expr,
253  nullable_relids,
254  opfamily,
255  opcintype,
256  collation,
257  (strategy == BTGreaterStrategyNumber),
258  nulls_first,
259  sortref,
260  NULL,
261  create_it);
262 }
263 
264 
265 /****************************************************************************
266  * PATHKEY COMPARISONS
267  ****************************************************************************/
268 
269 /*
270  * compare_pathkeys
271  * Compare two pathkeys to see if they are equivalent, and if not whether
272  * one is "better" than the other.
273  *
274  * We assume the pathkeys are canonical, and so they can be checked for
275  * equality by simple pointer comparison.
276  */
278 compare_pathkeys(List *keys1, List *keys2)
279 {
280  ListCell *key1,
281  *key2;
282 
283  /*
284  * Fall out quickly if we are passed two identical lists. This mostly
285  * catches the case where both are NIL, but that's common enough to
286  * warrant the test.
287  */
288  if (keys1 == keys2)
289  return PATHKEYS_EQUAL;
290 
291  forboth(key1, keys1, key2, keys2)
292  {
293  PathKey *pathkey1 = (PathKey *) lfirst(key1);
294  PathKey *pathkey2 = (PathKey *) lfirst(key2);
295 
296  if (pathkey1 != pathkey2)
297  return PATHKEYS_DIFFERENT; /* no need to keep looking */
298  }
299 
300  /*
301  * If we reached the end of only one list, the other is longer and
302  * therefore not a subset.
303  */
304  if (key1 != NULL)
305  return PATHKEYS_BETTER1; /* key1 is longer */
306  if (key2 != NULL)
307  return PATHKEYS_BETTER2; /* key2 is longer */
308  return PATHKEYS_EQUAL;
309 }
310 
311 /*
312  * pathkeys_contained_in
313  * Common special case of compare_pathkeys: we just want to know
314  * if keys2 are at least as well sorted as keys1.
315  */
316 bool
318 {
319  switch (compare_pathkeys(keys1, keys2))
320  {
321  case PATHKEYS_EQUAL:
322  case PATHKEYS_BETTER2:
323  return true;
324  default:
325  break;
326  }
327  return false;
328 }
329 
330 /*
331  * get_cheapest_path_for_pathkeys
332  * Find the cheapest path (according to the specified criterion) that
333  * satisfies the given pathkeys and parameterization.
334  * Return NULL if no such path.
335  *
336  * 'paths' is a list of possible paths that all generate the same relation
337  * 'pathkeys' represents a required ordering (in canonical form!)
338  * 'required_outer' denotes allowable outer relations for parameterized paths
339  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
340  * 'require_parallel_safe' causes us to consider only parallel-safe paths
341  */
342 Path *
344  Relids required_outer,
345  CostSelector cost_criterion,
346  bool require_parallel_safe)
347 {
348  Path *matched_path = NULL;
349  ListCell *l;
350 
351  foreach(l, paths)
352  {
353  Path *path = (Path *) lfirst(l);
354 
355  /*
356  * Since cost comparison is a lot cheaper than pathkey comparison, do
357  * that first. (XXX is that still true?)
358  */
359  if (matched_path != NULL &&
360  compare_path_costs(matched_path, path, cost_criterion) <= 0)
361  continue;
362 
363  if (require_parallel_safe && !path->parallel_safe)
364  continue;
365 
366  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
367  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
368  matched_path = path;
369  }
370  return matched_path;
371 }
372 
373 /*
374  * get_cheapest_fractional_path_for_pathkeys
375  * Find the cheapest path (for retrieving a specified fraction of all
376  * the tuples) that satisfies the given pathkeys and parameterization.
377  * Return NULL if no such path.
378  *
379  * See compare_fractional_path_costs() for the interpretation of the fraction
380  * parameter.
381  *
382  * 'paths' is a list of possible paths that all generate the same relation
383  * 'pathkeys' represents a required ordering (in canonical form!)
384  * 'required_outer' denotes allowable outer relations for parameterized paths
385  * 'fraction' is the fraction of the total tuples expected to be retrieved
386  */
387 Path *
389  List *pathkeys,
390  Relids required_outer,
391  double fraction)
392 {
393  Path *matched_path = NULL;
394  ListCell *l;
395 
396  foreach(l, paths)
397  {
398  Path *path = (Path *) lfirst(l);
399 
400  /*
401  * Since cost comparison is a lot cheaper than pathkey comparison, do
402  * that first. (XXX is that still true?)
403  */
404  if (matched_path != NULL &&
405  compare_fractional_path_costs(matched_path, path, fraction) <= 0)
406  continue;
407 
408  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
409  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
410  matched_path = path;
411  }
412  return matched_path;
413 }
414 
415 
416 /*
417  * get_cheapest_parallel_safe_total_inner
418  * Find the unparameterized parallel-safe path with the least total cost.
419  */
420 Path *
422 {
423  ListCell *l;
424 
425  foreach(l, paths)
426  {
427  Path *innerpath = (Path *) lfirst(l);
428 
429  if (innerpath->parallel_safe &&
430  bms_is_empty(PATH_REQ_OUTER(innerpath)))
431  return innerpath;
432  }
433 
434  return NULL;
435 }
436 
437 /****************************************************************************
438  * NEW PATHKEY FORMATION
439  ****************************************************************************/
440 
441 /*
442  * build_index_pathkeys
443  * Build a pathkeys list that describes the ordering induced by an index
444  * scan using the given index. (Note that an unordered index doesn't
445  * induce any ordering, so we return NIL.)
446  *
447  * If 'scandir' is BackwardScanDirection, build pathkeys representing a
448  * backwards scan of the index.
449  *
450  * We iterate only key columns of covering indexes, since non-key columns
451  * don't influence index ordering. The result is canonical, meaning that
452  * redundant pathkeys are removed; it may therefore have fewer entries than
453  * there are key columns in the index.
454  *
455  * Another reason for stopping early is that we may be able to tell that
456  * an index column's sort order is uninteresting for this query. However,
457  * that test is just based on the existence of an EquivalenceClass and not
458  * on position in pathkey lists, so it's not complete. Caller should call
459  * truncate_useless_pathkeys() to possibly remove more pathkeys.
460  */
461 List *
464  ScanDirection scandir)
465 {
466  List *retval = NIL;
467  ListCell *lc;
468  int i;
469 
470  if (index->sortopfamily == NULL)
471  return NIL; /* non-orderable index */
472 
473  i = 0;
474  foreach(lc, index->indextlist)
475  {
476  TargetEntry *indextle = (TargetEntry *) lfirst(lc);
477  Expr *indexkey;
478  bool reverse_sort;
479  bool nulls_first;
480  PathKey *cpathkey;
481 
482  /*
483  * INCLUDE columns are stored in index unordered, so they don't
484  * support ordered index scan.
485  */
486  if (i >= index->nkeycolumns)
487  break;
488 
489  /* We assume we don't need to make a copy of the tlist item */
490  indexkey = indextle->expr;
491 
492  if (ScanDirectionIsBackward(scandir))
493  {
494  reverse_sort = !index->reverse_sort[i];
495  nulls_first = !index->nulls_first[i];
496  }
497  else
498  {
499  reverse_sort = index->reverse_sort[i];
500  nulls_first = index->nulls_first[i];
501  }
502 
503  /*
504  * OK, try to make a canonical pathkey for this sort key. Note we're
505  * underneath any outer joins, so nullable_relids should be NULL.
506  */
507  cpathkey = make_pathkey_from_sortinfo(root,
508  indexkey,
509  NULL,
510  index->sortopfamily[i],
511  index->opcintype[i],
512  index->indexcollations[i],
513  reverse_sort,
514  nulls_first,
515  0,
516  index->rel->relids,
517  false);
518 
519  if (cpathkey)
520  {
521  /*
522  * We found the sort key in an EquivalenceClass, so it's relevant
523  * for this query. Add it to list, unless it's redundant.
524  */
525  if (!pathkey_is_redundant(cpathkey, retval))
526  retval = lappend(retval, cpathkey);
527  }
528  else
529  {
530  /*
531  * Boolean index keys might be redundant even if they do not
532  * appear in an EquivalenceClass, because of our special treatment
533  * of boolean equality conditions --- see the comment for
534  * indexcol_is_bool_constant_for_query(). If that applies, we can
535  * continue to examine lower-order index columns. Otherwise, the
536  * sort key is not an interesting sort order for this query, so we
537  * should stop considering index columns; any lower-order sort
538  * keys won't be useful either.
539  */
541  break;
542  }
543 
544  i++;
545  }
546 
547  return retval;
548 }
549 
550 /*
551  * build_expression_pathkey
552  * Build a pathkeys list that describes an ordering by a single expression
553  * using the given sort operator.
554  *
555  * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
556  * We induce the other arguments assuming default sort order for the operator.
557  *
558  * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
559  * is false and the expression isn't already in some EquivalenceClass.
560  */
561 List *
563  Expr *expr,
564  Relids nullable_relids,
565  Oid opno,
566  Relids rel,
567  bool create_it)
568 {
569  List *pathkeys;
570  Oid opfamily,
571  opcintype;
572  int16 strategy;
573  PathKey *cpathkey;
574 
575  /* Find the operator in pg_amop --- failure shouldn't happen */
576  if (!get_ordering_op_properties(opno,
577  &opfamily, &opcintype, &strategy))
578  elog(ERROR, "operator %u is not a valid ordering operator",
579  opno);
580 
581  cpathkey = make_pathkey_from_sortinfo(root,
582  expr,
583  nullable_relids,
584  opfamily,
585  opcintype,
586  exprCollation((Node *) expr),
587  (strategy == BTGreaterStrategyNumber),
588  (strategy == BTGreaterStrategyNumber),
589  0,
590  rel,
591  create_it);
592 
593  if (cpathkey)
594  pathkeys = list_make1(cpathkey);
595  else
596  pathkeys = NIL;
597 
598  return pathkeys;
599 }
600 
601 /*
602  * convert_subquery_pathkeys
603  * Build a pathkeys list that describes the ordering of a subquery's
604  * result, in the terms of the outer query. This is essentially a
605  * task of conversion.
606  *
607  * 'rel': outer query's RelOptInfo for the subquery relation.
608  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
609  * 'subquery_tlist': the subquery's output targetlist, in its terms.
610  *
611  * It is not necessary for caller to do truncate_useless_pathkeys(),
612  * because we select keys in a way that takes usefulness of the keys into
613  * account.
614  */
615 List *
617  List *subquery_pathkeys,
618  List *subquery_tlist)
619 {
620  List *retval = NIL;
621  int retvallen = 0;
622  int outer_query_keys = list_length(root->query_pathkeys);
623  ListCell *i;
624 
625  foreach(i, subquery_pathkeys)
626  {
627  PathKey *sub_pathkey = (PathKey *) lfirst(i);
628  EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
629  PathKey *best_pathkey = NULL;
630 
631  if (sub_eclass->ec_has_volatile)
632  {
633  /*
634  * If the sub_pathkey's EquivalenceClass is volatile, then it must
635  * have come from an ORDER BY clause, and we have to match it to
636  * that same targetlist entry.
637  */
638  TargetEntry *tle;
639 
640  if (sub_eclass->ec_sortref == 0) /* can't happen */
641  elog(ERROR, "volatile EquivalenceClass has no sortref");
642  tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
643  Assert(tle);
644  /* resjunk items aren't visible to outer query */
645  if (!tle->resjunk)
646  {
647  /* We can represent this sub_pathkey */
648  EquivalenceMember *sub_member;
649  Expr *outer_expr;
650  EquivalenceClass *outer_ec;
651 
652  Assert(list_length(sub_eclass->ec_members) == 1);
653  sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
654  outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
655 
656  /*
657  * Note: it might look funny to be setting sortref = 0 for a
658  * reference to a volatile sub_eclass. However, the
659  * expression is *not* volatile in the outer query: it's just
660  * a Var referencing whatever the subquery emitted. (IOW, the
661  * outer query isn't going to re-execute the volatile
662  * expression itself.) So this is okay. Likewise, it's
663  * correct to pass nullable_relids = NULL, because we're
664  * underneath any outer joins appearing in the outer query.
665  */
666  outer_ec =
668  outer_expr,
669  NULL,
670  sub_eclass->ec_opfamilies,
671  sub_member->em_datatype,
672  sub_eclass->ec_collation,
673  0,
674  rel->relids,
675  false);
676 
677  /*
678  * If we don't find a matching EC, sub-pathkey isn't
679  * interesting to the outer query
680  */
681  if (outer_ec)
682  best_pathkey =
684  outer_ec,
685  sub_pathkey->pk_opfamily,
686  sub_pathkey->pk_strategy,
687  sub_pathkey->pk_nulls_first);
688  }
689  }
690  else
691  {
692  /*
693  * Otherwise, the sub_pathkey's EquivalenceClass could contain
694  * multiple elements (representing knowledge that multiple items
695  * are effectively equal). Each element might match none, one, or
696  * more of the output columns that are visible to the outer query.
697  * This means we may have multiple possible representations of the
698  * sub_pathkey in the context of the outer query. Ideally we
699  * would generate them all and put them all into an EC of the
700  * outer query, thereby propagating equality knowledge up to the
701  * outer query. Right now we cannot do so, because the outer
702  * query's EquivalenceClasses are already frozen when this is
703  * called. Instead we prefer the one that has the highest "score"
704  * (number of EC peers, plus one if it matches the outer
705  * query_pathkeys). This is the most likely to be useful in the
706  * outer query.
707  */
708  int best_score = -1;
709  ListCell *j;
710 
711  foreach(j, sub_eclass->ec_members)
712  {
713  EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
714  Expr *sub_expr = sub_member->em_expr;
715  Oid sub_expr_type = sub_member->em_datatype;
716  Oid sub_expr_coll = sub_eclass->ec_collation;
717  ListCell *k;
718 
719  if (sub_member->em_is_child)
720  continue; /* ignore children here */
721 
722  foreach(k, subquery_tlist)
723  {
724  TargetEntry *tle = (TargetEntry *) lfirst(k);
725  Expr *tle_expr;
726  Expr *outer_expr;
727  EquivalenceClass *outer_ec;
728  PathKey *outer_pk;
729  int score;
730 
731  /* resjunk items aren't visible to outer query */
732  if (tle->resjunk)
733  continue;
734 
735  /*
736  * The targetlist entry is considered to match if it
737  * matches after sort-key canonicalization. That is
738  * needed since the sub_expr has been through the same
739  * process.
740  */
741  tle_expr = canonicalize_ec_expression(tle->expr,
742  sub_expr_type,
743  sub_expr_coll);
744  if (!equal(tle_expr, sub_expr))
745  continue;
746 
747  /*
748  * Build a representation of this targetlist entry as an
749  * outer Var.
750  */
751  outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
752  tle);
753 
754  /* See if we have a matching EC for that */
755  outer_ec = get_eclass_for_sort_expr(root,
756  outer_expr,
757  NULL,
758  sub_eclass->ec_opfamilies,
759  sub_expr_type,
760  sub_expr_coll,
761  0,
762  rel->relids,
763  false);
764 
765  /*
766  * If we don't find a matching EC, this sub-pathkey isn't
767  * interesting to the outer query
768  */
769  if (!outer_ec)
770  continue;
771 
772  outer_pk = make_canonical_pathkey(root,
773  outer_ec,
774  sub_pathkey->pk_opfamily,
775  sub_pathkey->pk_strategy,
776  sub_pathkey->pk_nulls_first);
777  /* score = # of equivalence peers */
778  score = list_length(outer_ec->ec_members) - 1;
779  /* +1 if it matches the proper query_pathkeys item */
780  if (retvallen < outer_query_keys &&
781  list_nth(root->query_pathkeys, retvallen) == outer_pk)
782  score++;
783  if (score > best_score)
784  {
785  best_pathkey = outer_pk;
786  best_score = score;
787  }
788  }
789  }
790  }
791 
792  /*
793  * If we couldn't find a representation of this sub_pathkey, we're
794  * done (we can't use the ones to its right, either).
795  */
796  if (!best_pathkey)
797  break;
798 
799  /*
800  * Eliminate redundant ordering info; could happen if outer query
801  * equivalences subquery keys...
802  */
803  if (!pathkey_is_redundant(best_pathkey, retval))
804  {
805  retval = lappend(retval, best_pathkey);
806  retvallen++;
807  }
808  }
809 
810  return retval;
811 }
812 
813 /*
814  * build_join_pathkeys
815  * Build the path keys for a join relation constructed by mergejoin or
816  * nestloop join. This is normally the same as the outer path's keys.
817  *
818  * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
819  * having the outer path's path keys, because null lefthand rows may be
820  * inserted at random points. It must be treated as unsorted.
821  *
822  * We truncate away any pathkeys that are uninteresting for higher joins.
823  *
824  * 'joinrel' is the join relation that paths are being formed for
825  * 'jointype' is the join type (inner, left, full, etc)
826  * 'outer_pathkeys' is the list of the current outer path's path keys
827  *
828  * Returns the list of new path keys.
829  */
830 List *
832  RelOptInfo *joinrel,
833  JoinType jointype,
834  List *outer_pathkeys)
835 {
836  if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
837  return NIL;
838 
839  /*
840  * This used to be quite a complex bit of code, but now that all pathkey
841  * sublists start out life canonicalized, we don't have to do a darn thing
842  * here!
843  *
844  * We do, however, need to truncate the pathkeys list, since it may
845  * contain pathkeys that were useful for forming this joinrel but are
846  * uninteresting to higher levels.
847  */
848  return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
849 }
850 
851 /****************************************************************************
852  * PATHKEYS AND SORT CLAUSES
853  ****************************************************************************/
854 
855 /*
856  * make_pathkeys_for_sortclauses
857  * Generate a pathkeys list that represents the sort order specified
858  * by a list of SortGroupClauses
859  *
860  * The resulting PathKeys are always in canonical form. (Actually, there
861  * is no longer any code anywhere that creates non-canonical PathKeys.)
862  *
863  * We assume that root->nullable_baserels is the set of base relids that could
864  * have gone to NULL below the SortGroupClause expressions. This is okay if
865  * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
866  * and if this function is only invoked after deconstruct_jointree. In the
867  * future we might have to make callers pass in the appropriate
868  * nullable-relids set, but for now it seems unnecessary.
869  *
870  * 'sortclauses' is a list of SortGroupClause nodes
871  * 'tlist' is the targetlist to find the referenced tlist entries in
872  */
873 List *
875  List *sortclauses,
876  List *tlist)
877 {
878  List *pathkeys = NIL;
879  ListCell *l;
880 
881  foreach(l, sortclauses)
882  {
883  SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
884  Expr *sortkey;
885  PathKey *pathkey;
886 
887  sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
888  Assert(OidIsValid(sortcl->sortop));
889  pathkey = make_pathkey_from_sortop(root,
890  sortkey,
891  root->nullable_baserels,
892  sortcl->sortop,
893  sortcl->nulls_first,
894  sortcl->tleSortGroupRef,
895  true);
896 
897  /* Canonical form eliminates redundant ordering keys */
898  if (!pathkey_is_redundant(pathkey, pathkeys))
899  pathkeys = lappend(pathkeys, pathkey);
900  }
901  return pathkeys;
902 }
903 
904 /****************************************************************************
905  * PATHKEYS AND MERGECLAUSES
906  ****************************************************************************/
907 
908 /*
909  * initialize_mergeclause_eclasses
910  * Set the EquivalenceClass links in a mergeclause restrictinfo.
911  *
912  * RestrictInfo contains fields in which we may cache pointers to
913  * EquivalenceClasses for the left and right inputs of the mergeclause.
914  * (If the mergeclause is a true equivalence clause these will be the
915  * same EquivalenceClass, otherwise not.) If the mergeclause is either
916  * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
917  * then it's easy to set up the left_ec and right_ec members --- otherwise,
918  * this function should be called to set them up. We will generate new
919  * EquivalenceClauses if necessary to represent the mergeclause's left and
920  * right sides.
921  *
922  * Note this is called before EC merging is complete, so the links won't
923  * necessarily point to canonical ECs. Before they are actually used for
924  * anything, update_mergeclause_eclasses must be called to ensure that
925  * they've been updated to point to canonical ECs.
926  */
927 void
929 {
930  Expr *clause = restrictinfo->clause;
931  Oid lefttype,
932  righttype;
933 
934  /* Should be a mergeclause ... */
935  Assert(restrictinfo->mergeopfamilies != NIL);
936  /* ... with links not yet set */
937  Assert(restrictinfo->left_ec == NULL);
938  Assert(restrictinfo->right_ec == NULL);
939 
940  /* Need the declared input types of the operator */
941  op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
942 
943  /* Find or create a matching EquivalenceClass for each side */
944  restrictinfo->left_ec =
946  (Expr *) get_leftop(clause),
947  restrictinfo->nullable_relids,
948  restrictinfo->mergeopfamilies,
949  lefttype,
950  ((OpExpr *) clause)->inputcollid,
951  0,
952  NULL,
953  true);
954  restrictinfo->right_ec =
956  (Expr *) get_rightop(clause),
957  restrictinfo->nullable_relids,
958  restrictinfo->mergeopfamilies,
959  righttype,
960  ((OpExpr *) clause)->inputcollid,
961  0,
962  NULL,
963  true);
964 }
965 
966 /*
967  * update_mergeclause_eclasses
968  * Make the cached EquivalenceClass links valid in a mergeclause
969  * restrictinfo.
970  *
971  * These pointers should have been set by process_equivalence or
972  * initialize_mergeclause_eclasses, but they might have been set to
973  * non-canonical ECs that got merged later. Chase up to the canonical
974  * merged parent if so.
975  */
976 void
978 {
979  /* Should be a merge clause ... */
980  Assert(restrictinfo->mergeopfamilies != NIL);
981  /* ... with pointers already set */
982  Assert(restrictinfo->left_ec != NULL);
983  Assert(restrictinfo->right_ec != NULL);
984 
985  /* Chase up to the top as needed */
986  while (restrictinfo->left_ec->ec_merged)
987  restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
988  while (restrictinfo->right_ec->ec_merged)
989  restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
990 }
991 
992 /*
993  * find_mergeclauses_for_outer_pathkeys
994  * This routine attempts to find a list of mergeclauses that can be
995  * used with a specified ordering for the join's outer relation.
996  * If successful, it returns a list of mergeclauses.
997  *
998  * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
999  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1000  * join relation being formed, in no particular order.
1001  *
1002  * The restrictinfos must be marked (via outer_is_left) to show which side
1003  * of each clause is associated with the current outer path. (See
1004  * select_mergejoin_clauses())
1005  *
1006  * The result is NIL if no merge can be done, else a maximal list of
1007  * usable mergeclauses (represented as a list of their restrictinfo nodes).
1008  * The list is ordered to match the pathkeys, as required for execution.
1009  */
1010 List *
1012  List *pathkeys,
1013  List *restrictinfos)
1014 {
1015  List *mergeclauses = NIL;
1016  ListCell *i;
1017 
1018  /* make sure we have eclasses cached in the clauses */
1019  foreach(i, restrictinfos)
1020  {
1021  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1022 
1023  update_mergeclause_eclasses(root, rinfo);
1024  }
1025 
1026  foreach(i, pathkeys)
1027  {
1028  PathKey *pathkey = (PathKey *) lfirst(i);
1029  EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1030  List *matched_restrictinfos = NIL;
1031  ListCell *j;
1032 
1033  /*----------
1034  * A mergejoin clause matches a pathkey if it has the same EC.
1035  * If there are multiple matching clauses, take them all. In plain
1036  * inner-join scenarios we expect only one match, because
1037  * equivalence-class processing will have removed any redundant
1038  * mergeclauses. However, in outer-join scenarios there might be
1039  * multiple matches. An example is
1040  *
1041  * select * from a full join b
1042  * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1043  *
1044  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1045  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1046  * we *must* do so or we will be unable to form a valid plan.
1047  *
1048  * We expect that the given pathkeys list is canonical, which means
1049  * no two members have the same EC, so it's not possible for this
1050  * code to enter the same mergeclause into the result list twice.
1051  *
1052  * It's possible that multiple matching clauses might have different
1053  * ECs on the other side, in which case the order we put them into our
1054  * result makes a difference in the pathkeys required for the inner
1055  * input rel. However this routine hasn't got any info about which
1056  * order would be best, so we don't worry about that.
1057  *
1058  * It's also possible that the selected mergejoin clauses produce
1059  * a noncanonical ordering of pathkeys for the inner side, ie, we
1060  * might select clauses that reference b.v1, b.v2, b.v1 in that
1061  * order. This is not harmful in itself, though it suggests that
1062  * the clauses are partially redundant. Since the alternative is
1063  * to omit mergejoin clauses and thereby possibly fail to generate a
1064  * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1065  * has to delete duplicates when it constructs the inner pathkeys
1066  * list, and we also have to deal with such cases specially in
1067  * create_mergejoin_plan().
1068  *----------
1069  */
1070  foreach(j, restrictinfos)
1071  {
1072  RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1073  EquivalenceClass *clause_ec;
1074 
1075  clause_ec = rinfo->outer_is_left ?
1076  rinfo->left_ec : rinfo->right_ec;
1077  if (clause_ec == pathkey_ec)
1078  matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1079  }
1080 
1081  /*
1082  * If we didn't find a mergeclause, we're done --- any additional
1083  * sort-key positions in the pathkeys are useless. (But we can still
1084  * mergejoin if we found at least one mergeclause.)
1085  */
1086  if (matched_restrictinfos == NIL)
1087  break;
1088 
1089  /*
1090  * If we did find usable mergeclause(s) for this sort-key position,
1091  * add them to result list.
1092  */
1093  mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1094  }
1095 
1096  return mergeclauses;
1097 }
1098 
1099 /*
1100  * select_outer_pathkeys_for_merge
1101  * Builds a pathkey list representing a possible sort ordering
1102  * that can be used with the given mergeclauses.
1103  *
1104  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1105  * that will be used in a merge join.
1106  * 'joinrel' is the join relation we are trying to construct.
1107  *
1108  * The restrictinfos must be marked (via outer_is_left) to show which side
1109  * of each clause is associated with the current outer path. (See
1110  * select_mergejoin_clauses())
1111  *
1112  * Returns a pathkeys list that can be applied to the outer relation.
1113  *
1114  * Since we assume here that a sort is required, there is no particular use
1115  * in matching any available ordering of the outerrel. (joinpath.c has an
1116  * entirely separate code path for considering sort-free mergejoins.) Rather,
1117  * it's interesting to try to match the requested query_pathkeys so that a
1118  * second output sort may be avoided; and failing that, we try to list "more
1119  * popular" keys (those with the most unmatched EquivalenceClass peers)
1120  * earlier, in hopes of making the resulting ordering useful for as many
1121  * higher-level mergejoins as possible.
1122  */
1123 List *
1125  List *mergeclauses,
1126  RelOptInfo *joinrel)
1127 {
1128  List *pathkeys = NIL;
1129  int nClauses = list_length(mergeclauses);
1130  EquivalenceClass **ecs;
1131  int *scores;
1132  int necs;
1133  ListCell *lc;
1134  int j;
1135 
1136  /* Might have no mergeclauses */
1137  if (nClauses == 0)
1138  return NIL;
1139 
1140  /*
1141  * Make arrays of the ECs used by the mergeclauses (dropping any
1142  * duplicates) and their "popularity" scores.
1143  */
1144  ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1145  scores = (int *) palloc(nClauses * sizeof(int));
1146  necs = 0;
1147 
1148  foreach(lc, mergeclauses)
1149  {
1150  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1151  EquivalenceClass *oeclass;
1152  int score;
1153  ListCell *lc2;
1154 
1155  /* get the outer eclass */
1156  update_mergeclause_eclasses(root, rinfo);
1157 
1158  if (rinfo->outer_is_left)
1159  oeclass = rinfo->left_ec;
1160  else
1161  oeclass = rinfo->right_ec;
1162 
1163  /* reject duplicates */
1164  for (j = 0; j < necs; j++)
1165  {
1166  if (ecs[j] == oeclass)
1167  break;
1168  }
1169  if (j < necs)
1170  continue;
1171 
1172  /* compute score */
1173  score = 0;
1174  foreach(lc2, oeclass->ec_members)
1175  {
1177 
1178  /* Potential future join partner? */
1179  if (!em->em_is_const && !em->em_is_child &&
1180  !bms_overlap(em->em_relids, joinrel->relids))
1181  score++;
1182  }
1183 
1184  ecs[necs] = oeclass;
1185  scores[necs] = score;
1186  necs++;
1187  }
1188 
1189  /*
1190  * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1191  * can generate a sort order that's also useful for final output. There is
1192  * no percentage in a partial match, though, so we have to have 'em all.
1193  */
1194  if (root->query_pathkeys)
1195  {
1196  foreach(lc, root->query_pathkeys)
1197  {
1198  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1199  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1200 
1201  for (j = 0; j < necs; j++)
1202  {
1203  if (ecs[j] == query_ec)
1204  break; /* found match */
1205  }
1206  if (j >= necs)
1207  break; /* didn't find match */
1208  }
1209  /* if we got to the end of the list, we have them all */
1210  if (lc == NULL)
1211  {
1212  /* copy query_pathkeys as starting point for our output */
1213  pathkeys = list_copy(root->query_pathkeys);
1214  /* mark their ECs as already-emitted */
1215  foreach(lc, root->query_pathkeys)
1216  {
1217  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1218  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1219 
1220  for (j = 0; j < necs; j++)
1221  {
1222  if (ecs[j] == query_ec)
1223  {
1224  scores[j] = -1;
1225  break;
1226  }
1227  }
1228  }
1229  }
1230  }
1231 
1232  /*
1233  * Add remaining ECs to the list in popularity order, using a default sort
1234  * ordering. (We could use qsort() here, but the list length is usually
1235  * so small it's not worth it.)
1236  */
1237  for (;;)
1238  {
1239  int best_j;
1240  int best_score;
1241  EquivalenceClass *ec;
1242  PathKey *pathkey;
1243 
1244  best_j = 0;
1245  best_score = scores[0];
1246  for (j = 1; j < necs; j++)
1247  {
1248  if (scores[j] > best_score)
1249  {
1250  best_j = j;
1251  best_score = scores[j];
1252  }
1253  }
1254  if (best_score < 0)
1255  break; /* all done */
1256  ec = ecs[best_j];
1257  scores[best_j] = -1;
1258  pathkey = make_canonical_pathkey(root,
1259  ec,
1262  false);
1263  /* can't be redundant because no duplicate ECs */
1264  Assert(!pathkey_is_redundant(pathkey, pathkeys));
1265  pathkeys = lappend(pathkeys, pathkey);
1266  }
1267 
1268  pfree(ecs);
1269  pfree(scores);
1270 
1271  return pathkeys;
1272 }
1273 
1274 /*
1275  * make_inner_pathkeys_for_merge
1276  * Builds a pathkey list representing the explicit sort order that
1277  * must be applied to an inner path to make it usable with the
1278  * given mergeclauses.
1279  *
1280  * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1281  * that will be used in a merge join, in order.
1282  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1283  * side of the join.
1284  *
1285  * The restrictinfos must be marked (via outer_is_left) to show which side
1286  * of each clause is associated with the current outer path. (See
1287  * select_mergejoin_clauses())
1288  *
1289  * Returns a pathkeys list that can be applied to the inner relation.
1290  *
1291  * Note that it is not this routine's job to decide whether sorting is
1292  * actually needed for a particular input path. Assume a sort is necessary;
1293  * just make the keys, eh?
1294  */
1295 List *
1297  List *mergeclauses,
1298  List *outer_pathkeys)
1299 {
1300  List *pathkeys = NIL;
1301  EquivalenceClass *lastoeclass;
1302  PathKey *opathkey;
1303  ListCell *lc;
1304  ListCell *lop;
1305 
1306  lastoeclass = NULL;
1307  opathkey = NULL;
1308  lop = list_head(outer_pathkeys);
1309 
1310  foreach(lc, mergeclauses)
1311  {
1312  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1313  EquivalenceClass *oeclass;
1314  EquivalenceClass *ieclass;
1315  PathKey *pathkey;
1316 
1317  update_mergeclause_eclasses(root, rinfo);
1318 
1319  if (rinfo->outer_is_left)
1320  {
1321  oeclass = rinfo->left_ec;
1322  ieclass = rinfo->right_ec;
1323  }
1324  else
1325  {
1326  oeclass = rinfo->right_ec;
1327  ieclass = rinfo->left_ec;
1328  }
1329 
1330  /* outer eclass should match current or next pathkeys */
1331  /* we check this carefully for debugging reasons */
1332  if (oeclass != lastoeclass)
1333  {
1334  if (!lop)
1335  elog(ERROR, "too few pathkeys for mergeclauses");
1336  opathkey = (PathKey *) lfirst(lop);
1337  lop = lnext(lop);
1338  lastoeclass = opathkey->pk_eclass;
1339  if (oeclass != lastoeclass)
1340  elog(ERROR, "outer pathkeys do not match mergeclause");
1341  }
1342 
1343  /*
1344  * Often, we'll have same EC on both sides, in which case the outer
1345  * pathkey is also canonical for the inner side, and we can skip a
1346  * useless search.
1347  */
1348  if (ieclass == oeclass)
1349  pathkey = opathkey;
1350  else
1351  pathkey = make_canonical_pathkey(root,
1352  ieclass,
1353  opathkey->pk_opfamily,
1354  opathkey->pk_strategy,
1355  opathkey->pk_nulls_first);
1356 
1357  /*
1358  * Don't generate redundant pathkeys (which can happen if multiple
1359  * mergeclauses refer to the same EC). Because we do this, the output
1360  * pathkey list isn't necessarily ordered like the mergeclauses, which
1361  * complicates life for create_mergejoin_plan(). But if we didn't,
1362  * we'd have a noncanonical sort key list, which would be bad; for one
1363  * reason, it certainly wouldn't match any available sort order for
1364  * the input relation.
1365  */
1366  if (!pathkey_is_redundant(pathkey, pathkeys))
1367  pathkeys = lappend(pathkeys, pathkey);
1368  }
1369 
1370  return pathkeys;
1371 }
1372 
1373 /*
1374  * trim_mergeclauses_for_inner_pathkeys
1375  * This routine trims a list of mergeclauses to include just those that
1376  * work with a specified ordering for the join's inner relation.
1377  *
1378  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1379  * join relation being formed, in an order known to work for the
1380  * currently-considered sort ordering of the join's outer rel.
1381  * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1382  * it should be equal to, or a truncation of, the result of
1383  * make_inner_pathkeys_for_merge for these mergeclauses.
1384  *
1385  * What we return will be a prefix of the given mergeclauses list.
1386  *
1387  * We need this logic because make_inner_pathkeys_for_merge's result isn't
1388  * necessarily in the same order as the mergeclauses. That means that if we
1389  * consider an inner-rel pathkey list that is a truncation of that result,
1390  * we might need to drop mergeclauses even though they match a surviving inner
1391  * pathkey. This happens when they are to the right of a mergeclause that
1392  * matches a removed inner pathkey.
1393  *
1394  * The mergeclauses must be marked (via outer_is_left) to show which side
1395  * of each clause is associated with the current outer path. (See
1396  * select_mergejoin_clauses())
1397  */
1398 List *
1400  List *mergeclauses,
1401  List *pathkeys)
1402 {
1403  List *new_mergeclauses = NIL;
1404  PathKey *pathkey;
1405  EquivalenceClass *pathkey_ec;
1406  bool matched_pathkey;
1407  ListCell *lip;
1408  ListCell *i;
1409 
1410  /* No pathkeys => no mergeclauses (though we don't expect this case) */
1411  if (pathkeys == NIL)
1412  return NIL;
1413  /* Initialize to consider first pathkey */
1414  lip = list_head(pathkeys);
1415  pathkey = (PathKey *) lfirst(lip);
1416  pathkey_ec = pathkey->pk_eclass;
1417  lip = lnext(lip);
1418  matched_pathkey = false;
1419 
1420  /* Scan mergeclauses to see how many we can use */
1421  foreach(i, mergeclauses)
1422  {
1423  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1424  EquivalenceClass *clause_ec;
1425 
1426  /* Assume we needn't do update_mergeclause_eclasses again here */
1427 
1428  /* Check clause's inner-rel EC against current pathkey */
1429  clause_ec = rinfo->outer_is_left ?
1430  rinfo->right_ec : rinfo->left_ec;
1431 
1432  /* If we don't have a match, attempt to advance to next pathkey */
1433  if (clause_ec != pathkey_ec)
1434  {
1435  /* If we had no clauses matching this inner pathkey, must stop */
1436  if (!matched_pathkey)
1437  break;
1438 
1439  /* Advance to next inner pathkey, if any */
1440  if (lip == NULL)
1441  break;
1442  pathkey = (PathKey *) lfirst(lip);
1443  pathkey_ec = pathkey->pk_eclass;
1444  lip = lnext(lip);
1445  matched_pathkey = false;
1446  }
1447 
1448  /* If mergeclause matches current inner pathkey, we can use it */
1449  if (clause_ec == pathkey_ec)
1450  {
1451  new_mergeclauses = lappend(new_mergeclauses, rinfo);
1452  matched_pathkey = true;
1453  }
1454  else
1455  {
1456  /* Else, no hope of adding any more mergeclauses */
1457  break;
1458  }
1459  }
1460 
1461  return new_mergeclauses;
1462 }
1463 
1464 
1465 /****************************************************************************
1466  * PATHKEY USEFULNESS CHECKS
1467  *
1468  * We only want to remember as many of the pathkeys of a path as have some
1469  * potential use, either for subsequent mergejoins or for meeting the query's
1470  * requested output ordering. This ensures that add_path() won't consider
1471  * a path to have a usefully different ordering unless it really is useful.
1472  * These routines check for usefulness of given pathkeys.
1473  ****************************************************************************/
1474 
1475 /*
1476  * pathkeys_useful_for_merging
1477  * Count the number of pathkeys that may be useful for mergejoins
1478  * above the given relation.
1479  *
1480  * We consider a pathkey potentially useful if it corresponds to the merge
1481  * ordering of either side of any joinclause for the rel. This might be
1482  * overoptimistic, since joinclauses that require different other relations
1483  * might never be usable at the same time, but trying to be exact is likely
1484  * to be more trouble than it's worth.
1485  *
1486  * To avoid doubling the number of mergejoin paths considered, we would like
1487  * to consider only one of the two scan directions (ASC or DESC) as useful
1488  * for merging for any given target column. The choice is arbitrary unless
1489  * one of the directions happens to match an ORDER BY key, in which case
1490  * that direction should be preferred, in hopes of avoiding a final sort step.
1491  * right_merge_direction() implements this heuristic.
1492  */
1493 static int
1495 {
1496  int useful = 0;
1497  ListCell *i;
1498 
1499  foreach(i, pathkeys)
1500  {
1501  PathKey *pathkey = (PathKey *) lfirst(i);
1502  bool matched = false;
1503  ListCell *j;
1504 
1505  /* If "wrong" direction, not useful for merging */
1506  if (!right_merge_direction(root, pathkey))
1507  break;
1508 
1509  /*
1510  * First look into the EquivalenceClass of the pathkey, to see if
1511  * there are any members not yet joined to the rel. If so, it's
1512  * surely possible to generate a mergejoin clause using them.
1513  */
1514  if (rel->has_eclass_joins &&
1515  eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
1516  matched = true;
1517  else
1518  {
1519  /*
1520  * Otherwise search the rel's joininfo list, which contains
1521  * non-EquivalenceClass-derivable join clauses that might
1522  * nonetheless be mergejoinable.
1523  */
1524  foreach(j, rel->joininfo)
1525  {
1526  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1527 
1528  if (restrictinfo->mergeopfamilies == NIL)
1529  continue;
1530  update_mergeclause_eclasses(root, restrictinfo);
1531 
1532  if (pathkey->pk_eclass == restrictinfo->left_ec ||
1533  pathkey->pk_eclass == restrictinfo->right_ec)
1534  {
1535  matched = true;
1536  break;
1537  }
1538  }
1539  }
1540 
1541  /*
1542  * If we didn't find a mergeclause, we're done --- any additional
1543  * sort-key positions in the pathkeys are useless. (But we can still
1544  * mergejoin if we found at least one mergeclause.)
1545  */
1546  if (matched)
1547  useful++;
1548  else
1549  break;
1550  }
1551 
1552  return useful;
1553 }
1554 
1555 /*
1556  * right_merge_direction
1557  * Check whether the pathkey embodies the preferred sort direction
1558  * for merging its target column.
1559  */
1560 static bool
1562 {
1563  ListCell *l;
1564 
1565  foreach(l, root->query_pathkeys)
1566  {
1567  PathKey *query_pathkey = (PathKey *) lfirst(l);
1568 
1569  if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1570  pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1571  {
1572  /*
1573  * Found a matching query sort column. Prefer this pathkey's
1574  * direction iff it matches. Note that we ignore pk_nulls_first,
1575  * which means that a sort might be needed anyway ... but we still
1576  * want to prefer only one of the two possible directions, and we
1577  * might as well use this one.
1578  */
1579  return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1580  }
1581  }
1582 
1583  /* If no matching ORDER BY request, prefer the ASC direction */
1584  return (pathkey->pk_strategy == BTLessStrategyNumber);
1585 }
1586 
1587 /*
1588  * pathkeys_useful_for_ordering
1589  * Count the number of pathkeys that are useful for meeting the
1590  * query's requested output ordering.
1591  *
1592  * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1593  * no good to order by just the first key(s) of the requested ordering.
1594  * So the result is always either 0 or list_length(root->query_pathkeys).
1595  */
1596 static int
1598 {
1599  if (root->query_pathkeys == NIL)
1600  return 0; /* no special ordering requested */
1601 
1602  if (pathkeys == NIL)
1603  return 0; /* unordered path */
1604 
1605  if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1606  {
1607  /* It's useful ... or at least the first N keys are */
1608  return list_length(root->query_pathkeys);
1609  }
1610 
1611  return 0; /* path ordering not useful */
1612 }
1613 
1614 /*
1615  * truncate_useless_pathkeys
1616  * Shorten the given pathkey list to just the useful pathkeys.
1617  */
1618 List *
1620  RelOptInfo *rel,
1621  List *pathkeys)
1622 {
1623  int nuseful;
1624  int nuseful2;
1625 
1626  nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1627  nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1628  if (nuseful2 > nuseful)
1629  nuseful = nuseful2;
1630 
1631  /*
1632  * Note: not safe to modify input list destructively, but we can avoid
1633  * copying the list if we're not actually going to change it
1634  */
1635  if (nuseful == 0)
1636  return NIL;
1637  else if (nuseful == list_length(pathkeys))
1638  return pathkeys;
1639  else
1640  return list_truncate(list_copy(pathkeys), nuseful);
1641 }
1642 
1643 /*
1644  * has_useful_pathkeys
1645  * Detect whether the specified rel could have any pathkeys that are
1646  * useful according to truncate_useless_pathkeys().
1647  *
1648  * This is a cheap test that lets us skip building pathkeys at all in very
1649  * simple queries. It's OK to err in the direction of returning "true" when
1650  * there really aren't any usable pathkeys, but erring in the other direction
1651  * is bad --- so keep this in sync with the routines above!
1652  *
1653  * We could make the test more complex, for example checking to see if any of
1654  * the joinclauses are really mergejoinable, but that likely wouldn't win
1655  * often enough to repay the extra cycles. Queries with neither a join nor
1656  * a sort are reasonably common, though, so this much work seems worthwhile.
1657  */
1658 bool
1660 {
1661  if (rel->joininfo != NIL || rel->has_eclass_joins)
1662  return true; /* might be able to use pathkeys for merging */
1663  if (root->query_pathkeys != NIL)
1664  return true; /* might be able to use them for ordering */
1665  return false; /* definitely useless */
1666 }
bool has_eclass_joins
Definition: relation.h:678
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:343
signed short int16
Definition: c.h:312
#define NIL
Definition: pg_list.h:69
List * build_expression_pathkey(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid opno, Relids rel, bool create_it)
Definition: pathkeys.c:562
static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid ordering_op, bool nulls_first, Index sortref, bool create_it)
Definition: pathkeys.c:229
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:180
Oid * indexcollations
Definition: relation.h:767
void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:928
List * query_pathkeys
Definition: relation.h:274
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:2986
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:874
List * indextlist
Definition: relation.h:780
List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir)
Definition: pathkeys.c:462
bool eclass_useful_for_merging(PlannerInfo *root, EquivalenceClass *eclass, RelOptInfo *rel)
Definition: equivclass.c:2437
List * get_mergejoin_opfamilies(Oid opno)
Definition: lsyscache.c:363
EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, Relids nullable_relids, List *opfamilies, Oid opcintype, Oid collation, Index sortref, Relids rel, bool create_it)
Definition: equivclass.c:620
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
Var * makeVarFromTargetEntry(Index varno, TargetEntry *tle)
Definition: makefuncs.c:104
List * list_truncate(List *list, int new_size)
Definition: list.c:350
Index tleSortGroupRef
Definition: parsenodes.h:1207
Node * get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:382
List * list_copy(const List *oldlist)
Definition: list.c:1160
Index ec_sortref
Definition: relation.h:907
Definition: nodes.h:516
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1296
Oid * sortopfamily
Definition: relation.h:770
List * list_concat(List *list1, List *list2)
Definition: list.c:321
PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first)
Definition: pathkeys.c:51
static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:1597
EquivalenceClass * right_ec
Definition: relation.h:1929
PathKeysComparison compare_pathkeys(List *keys1, List *keys2)
Definition: pathkeys.c:278
unsigned int Oid
Definition: postgres_ext.h:31
List * truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:1619
#define OidIsValid(objectId)
Definition: c.h:605
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:831
List * mergeopfamilies
Definition: relation.h:1925
int pk_strategy
Definition: relation.h:977
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1011
static struct cvec * eclass(struct vars *v, chr c, int cases)
Definition: regc_locale.c:508
JoinType
Definition: nodes.h:680
Definition: type.h:89
#define list_make1(x1)
Definition: pg_list.h:139
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:41
void pfree(void *pointer)
Definition: mcxt.c:1031
RelOptInfo * rel
Definition: relation.h:755
bool resjunk
Definition: primnodes.h:1382
#define linitial(l)
Definition: pg_list.h:111
bool pk_nulls_first
Definition: relation.h:978
#define ERROR
Definition: elog.h:43
Node * get_leftop(const Expr *clause)
Definition: clauses.c:200
Expr * canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
Definition: equivclass.c:494
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:374
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:71
void * list_nth(const List *list, int n)
Definition: list.c:410
List * joininfo
Definition: relation.h:676
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:616
bool outer_is_left
Definition: relation.h:1935
ScanDirection
Definition: sdir.h:22
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:163
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
Relids relids
Definition: relation.h:612
void op_input_types(Oid opno, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:1152
Path * get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction)
Definition: pathkeys.c:388
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Definition: pathkeys.c:128
#define lnext(lc)
Definition: pg_list.h:105
List * canon_pathkeys
Definition: relation.h:251
TargetEntry * get_sortgroupref_tle(Index sortref, List *targetList)
Definition: tlist.c:348
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: relation.h:917
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:421
Index relid
Definition: relation.h:640
List * lappend(List *list, void *datum)
Definition: list.c:128
Expr * clause
Definition: relation.h:1880
bool bms_is_empty(const Bitmapset *a)
Definition: bitmapset.c:729
List * ec_opfamilies
Definition: relation.h:896
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:317
Relids nullable_relids
Definition: relation.h:1904
Relids em_relids
Definition: relation.h:947
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1399
unsigned int Index
Definition: c.h:442
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1124
List * pathkeys
Definition: relation.h:1092
#define makeNode(_type_)
Definition: nodes.h:564
#define Assert(condition)
Definition: c.h:699
#define lfirst(lc)
Definition: pg_list.h:106
bool parallel_safe
Definition: relation.h:1084
Expr * expr
Definition: primnodes.h:1375
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:117
#define PATH_REQ_OUTER(path)
Definition: relation.h:1097
EquivalenceClass * pk_eclass
Definition: relation.h:975
bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy)
Definition: lsyscache.c:204
#define linitial_oid(l)
Definition: pg_list.h:113
static int list_length(const List *l)
Definition: pg_list.h:89
bool ec_has_volatile
Definition: relation.h:904
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:720
CostSelector
Definition: relation.h:35
int nkeycolumns
Definition: relation.h:764
Oid * opcintype
Definition: relation.h:769
Oid pk_opfamily
Definition: relation.h:976
Node * get_rightop(const Expr *clause)
Definition: clauses.c:217
bool indexcol_is_bool_constant_for_query(IndexOptInfo *index, int indexcol)
Definition: indxpath.c:3157
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:509
PathKeysComparison
Definition: paths.h:183
void * palloc(Size size)
Definition: mcxt.c:924
EquivalenceClass * left_ec
Definition: relation.h:1928
Relids nullable_baserels
Definition: relation.h:218
int i
bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
Definition: pathkeys.c:1659
MemoryContext planner_cxt
Definition: relation.h:302
#define elog
Definition: elog.h:219
static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid opfamily, Oid opcintype, Oid collation, bool reverse_sort, bool nulls_first, Index sortref, Relids rel, bool create_it)
Definition: pathkeys.c:170
bool * nulls_first
Definition: relation.h:772
bool * reverse_sort
Definition: relation.h:771
#define BTLessStrategyNumber
Definition: stratnum.h:29
Definition: pg_list.h:45
static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:1494
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:977
struct EquivalenceClass * ec_merged
Definition: relation.h:910
#define BTEqualStrategyNumber
Definition: stratnum.h:31
List * ec_members
Definition: relation.h:898
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey)
Definition: pathkeys.c:1561