PostgreSQL Source Code  git master
pathkeys.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pathkeys.c
4  * Utilities for matching and building path keys
5  *
6  * See src/backend/optimizer/README for a great deal of information about
7  * the nature and use of path keys.
8  *
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/optimizer/path/pathkeys.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/stratnum.h"
21 #include "catalog/pg_opfamily.h"
22 #include "nodes/makefuncs.h"
23 #include "nodes/nodeFuncs.h"
24 #include "nodes/plannodes.h"
25 #include "optimizer/cost.h"
26 #include "optimizer/optimizer.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/paths.h"
30 #include "utils/lsyscache.h"
31 
32 /* Consider reordering of GROUP BY keys? */
34 
35 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
37  RelOptInfo *partrel,
38  int partkeycol);
40 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
41 
42 
43 /****************************************************************************
44  * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
45  ****************************************************************************/
46 
47 /*
48  * make_canonical_pathkey
49  * Given the parameters for a PathKey, find any pre-existing matching
50  * pathkey in the query's list of "canonical" pathkeys. Make a new
51  * entry if there's not one already.
52  *
53  * Note that this function must not be used until after we have completed
54  * merging EquivalenceClasses.
55  */
56 PathKey *
58  EquivalenceClass *eclass, Oid opfamily,
59  int strategy, bool nulls_first)
60 {
61  PathKey *pk;
62  ListCell *lc;
63  MemoryContext oldcontext;
64 
65  /* Can't make canonical pathkeys if the set of ECs might still change */
66  if (!root->ec_merging_done)
67  elog(ERROR, "too soon to build canonical pathkeys");
68 
69  /* The passed eclass might be non-canonical, so chase up to the top */
70  while (eclass->ec_merged)
71  eclass = eclass->ec_merged;
72 
73  foreach(lc, root->canon_pathkeys)
74  {
75  pk = (PathKey *) lfirst(lc);
76  if (eclass == pk->pk_eclass &&
77  opfamily == pk->pk_opfamily &&
78  strategy == pk->pk_strategy &&
79  nulls_first == pk->pk_nulls_first)
80  return pk;
81  }
82 
83  /*
84  * Be sure canonical pathkeys are allocated in the main planning context.
85  * Not an issue in normal planning, but it is for GEQO.
86  */
87  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
88 
89  pk = makeNode(PathKey);
90  pk->pk_eclass = eclass;
91  pk->pk_opfamily = opfamily;
92  pk->pk_strategy = strategy;
93  pk->pk_nulls_first = nulls_first;
94 
95  root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
96 
97  MemoryContextSwitchTo(oldcontext);
98 
99  return pk;
100 }
101 
102 /*
103  * append_pathkeys
104  * Append all non-redundant PathKeys in 'source' onto 'target' and
105  * returns the updated 'target' list.
106  */
107 List *
109 {
110  ListCell *lc;
111 
112  Assert(target != NIL);
113 
114  foreach(lc, source)
115  {
116  PathKey *pk = lfirst_node(PathKey, lc);
117 
118  if (!pathkey_is_redundant(pk, target))
119  target = lappend(target, pk);
120  }
121  return target;
122 }
123 
124 /*
125  * pathkey_is_redundant
126  * Is a pathkey redundant with one already in the given list?
127  *
128  * We detect two cases:
129  *
130  * 1. If the new pathkey's equivalence class contains a constant, and isn't
131  * below an outer join, then we can disregard it as a sort key. An example:
132  * SELECT ... WHERE x = 42 ORDER BY x, y;
133  * We may as well just sort by y. Note that because of opfamily matching,
134  * this is semantically correct: we know that the equality constraint is one
135  * that actually binds the variable to a single value in the terms of any
136  * ordering operator that might go with the eclass. This rule not only lets
137  * us simplify (or even skip) explicit sorts, but also allows matching index
138  * sort orders to a query when there are don't-care index columns.
139  *
140  * 2. If the new pathkey's equivalence class is the same as that of any
141  * existing member of the pathkey list, then it is redundant. Some examples:
142  * SELECT ... ORDER BY x, x;
143  * SELECT ... ORDER BY x, x DESC;
144  * SELECT ... WHERE x = y ORDER BY x, y;
145  * In all these cases the second sort key cannot distinguish values that are
146  * considered equal by the first, and so there's no point in using it.
147  * Note in particular that we need not compare opfamily (all the opfamilies
148  * of the EC have the same notion of equality) nor sort direction.
149  *
150  * Both the given pathkey and the list members must be canonical for this
151  * to work properly, but that's okay since we no longer ever construct any
152  * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
153  * canonical includes the additional requirement of no redundant entries,
154  * which is exactly what we are checking for here.)
155  *
156  * Because the equivclass.c machinery forms only one copy of any EC per query,
157  * pointer comparison is enough to decide whether canonical ECs are the same.
158  */
159 static bool
160 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
161 {
162  EquivalenceClass *new_ec = new_pathkey->pk_eclass;
163  ListCell *lc;
164 
165  /* Check for EC containing a constant --- unconditionally redundant */
166  if (EC_MUST_BE_REDUNDANT(new_ec))
167  return true;
168 
169  /* If same EC already used in list, then redundant */
170  foreach(lc, pathkeys)
171  {
172  PathKey *old_pathkey = (PathKey *) lfirst(lc);
173 
174  if (new_ec == old_pathkey->pk_eclass)
175  return true;
176  }
177 
178  return false;
179 }
180 
181 /*
182  * make_pathkey_from_sortinfo
183  * Given an expression and sort-order information, create a PathKey.
184  * The result is always a "canonical" PathKey, but it might be redundant.
185  *
186  * If the PathKey is being generated from a SortGroupClause, sortref should be
187  * the SortGroupClause's SortGroupRef; otherwise zero.
188  *
189  * If rel is not NULL, it identifies a specific relation we're considering
190  * a path for, and indicates that child EC members for that relation can be
191  * considered. Otherwise child members are ignored. (See the comments for
192  * get_eclass_for_sort_expr.)
193  *
194  * create_it is true if we should create any missing EquivalenceClass
195  * needed to represent the sort key. If it's false, we return NULL if the
196  * sort key isn't already present in any EquivalenceClass.
197  */
198 static PathKey *
200  Expr *expr,
201  Oid opfamily,
202  Oid opcintype,
203  Oid collation,
204  bool reverse_sort,
205  bool nulls_first,
206  Index sortref,
207  Relids rel,
208  bool create_it)
209 {
210  int16 strategy;
211  Oid equality_op;
212  List *opfamilies;
214 
215  strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
216 
217  /*
218  * EquivalenceClasses need to contain opfamily lists based on the family
219  * membership of mergejoinable equality operators, which could belong to
220  * more than one opfamily. So we have to look up the opfamily's equality
221  * operator and get its membership.
222  */
223  equality_op = get_opfamily_member(opfamily,
224  opcintype,
225  opcintype,
227  if (!OidIsValid(equality_op)) /* shouldn't happen */
228  elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
229  BTEqualStrategyNumber, opcintype, opcintype, opfamily);
230  opfamilies = get_mergejoin_opfamilies(equality_op);
231  if (!opfamilies) /* certainly should find some */
232  elog(ERROR, "could not find opfamilies for equality operator %u",
233  equality_op);
234 
235  /* Now find or (optionally) create a matching EquivalenceClass */
236  eclass = get_eclass_for_sort_expr(root, expr,
237  opfamilies, opcintype, collation,
238  sortref, rel, create_it);
239 
240  /* Fail if no EC and !create_it */
241  if (!eclass)
242  return NULL;
243 
244  /* And finally we can find or create a PathKey node */
245  return make_canonical_pathkey(root, eclass, opfamily,
246  strategy, nulls_first);
247 }
248 
249 /*
250  * make_pathkey_from_sortop
251  * Like make_pathkey_from_sortinfo, but work from a sort operator.
252  *
253  * This should eventually go away, but we need to restructure SortGroupClause
254  * first.
255  */
256 static PathKey *
258  Expr *expr,
259  Oid ordering_op,
260  bool nulls_first,
261  Index sortref,
262  bool create_it)
263 {
264  Oid opfamily,
265  opcintype,
266  collation;
267  int16 strategy;
268 
269  /* Find the operator in pg_amop --- failure shouldn't happen */
270  if (!get_ordering_op_properties(ordering_op,
271  &opfamily, &opcintype, &strategy))
272  elog(ERROR, "operator %u is not a valid ordering operator",
273  ordering_op);
274 
275  /* Because SortGroupClause doesn't carry collation, consult the expr */
276  collation = exprCollation((Node *) expr);
277 
278  return make_pathkey_from_sortinfo(root,
279  expr,
280  opfamily,
281  opcintype,
282  collation,
283  (strategy == BTGreaterStrategyNumber),
284  nulls_first,
285  sortref,
286  NULL,
287  create_it);
288 }
289 
290 
291 /****************************************************************************
292  * PATHKEY COMPARISONS
293  ****************************************************************************/
294 
295 /*
296  * compare_pathkeys
297  * Compare two pathkeys to see if they are equivalent, and if not whether
298  * one is "better" than the other.
299  *
300  * We assume the pathkeys are canonical, and so they can be checked for
301  * equality by simple pointer comparison.
302  */
304 compare_pathkeys(List *keys1, List *keys2)
305 {
306  ListCell *key1,
307  *key2;
308 
309  /*
310  * Fall out quickly if we are passed two identical lists. This mostly
311  * catches the case where both are NIL, but that's common enough to
312  * warrant the test.
313  */
314  if (keys1 == keys2)
315  return PATHKEYS_EQUAL;
316 
317  forboth(key1, keys1, key2, keys2)
318  {
319  PathKey *pathkey1 = (PathKey *) lfirst(key1);
320  PathKey *pathkey2 = (PathKey *) lfirst(key2);
321 
322  if (pathkey1 != pathkey2)
323  return PATHKEYS_DIFFERENT; /* no need to keep looking */
324  }
325 
326  /*
327  * If we reached the end of only one list, the other is longer and
328  * therefore not a subset.
329  */
330  if (key1 != NULL)
331  return PATHKEYS_BETTER1; /* key1 is longer */
332  if (key2 != NULL)
333  return PATHKEYS_BETTER2; /* key2 is longer */
334  return PATHKEYS_EQUAL;
335 }
336 
337 /*
338  * pathkeys_contained_in
339  * Common special case of compare_pathkeys: we just want to know
340  * if keys2 are at least as well sorted as keys1.
341  */
342 bool
344 {
345  switch (compare_pathkeys(keys1, keys2))
346  {
347  case PATHKEYS_EQUAL:
348  case PATHKEYS_BETTER2:
349  return true;
350  default:
351  break;
352  }
353  return false;
354 }
355 
356 /*
357  * group_keys_reorder_by_pathkeys
358  * Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
359  *
360  * 'pathkeys' is an input list of pathkeys
361  * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
362  * reorder. The pointers are redirected to new lists, original lists
363  * stay untouched.
364  * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
365  * search matching pathkeys.
366  *
367  * Returns the number of GROUP BY keys with a matching pathkey.
368  */
369 static int
370 group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
371  List **group_clauses,
372  int num_groupby_pathkeys)
373 {
374  List *new_group_pathkeys = NIL,
375  *new_group_clauses = NIL;
376  List *grouping_pathkeys;
377  ListCell *lc;
378  int n;
379 
380  if (pathkeys == NIL || *group_pathkeys == NIL)
381  return 0;
382 
383  /*
384  * We're going to search within just the first num_groupby_pathkeys of
385  * *group_pathkeys. The thing is that root->group_pathkeys is passed as
386  * *group_pathkeys containing grouping pathkeys altogether with aggregate
387  * pathkeys. If we process aggregate pathkeys we could get an invalid
388  * result of get_sortgroupref_clause_noerr(), because their
389  * pathkey->pk_eclass->ec_sortref doesn't referece query targetlist. So,
390  * we allocate a separate list of pathkeys for lookups.
391  */
392  grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
393 
394  /*
395  * Walk the pathkeys (determining ordering of the input path) and see if
396  * there's a matching GROUP BY key. If we find one, we append it to the
397  * list, and do the same for the clauses.
398  *
399  * Once we find the first pathkey without a matching GROUP BY key, the
400  * rest of the pathkeys are useless and can't be used to evaluate the
401  * grouping, so we abort the loop and ignore the remaining pathkeys.
402  */
403  foreach(lc, pathkeys)
404  {
405  PathKey *pathkey = (PathKey *) lfirst(lc);
406  SortGroupClause *sgc;
407 
408  /*
409  * Pathkeys are built in a way that allows simply comparing pointers.
410  * Give up if we can't find the matching pointer. Also give up if
411  * there is no sortclause reference for some reason.
412  */
413  if (foreach_current_index(lc) >= num_groupby_pathkeys ||
414  !list_member_ptr(grouping_pathkeys, pathkey) ||
415  pathkey->pk_eclass->ec_sortref == 0)
416  break;
417 
418  /*
419  * Since 1349d27 pathkey coming from underlying node can be in the
420  * root->group_pathkeys but not in the processed_groupClause. So, we
421  * should be careful here.
422  */
423  sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
424  *group_clauses);
425  if (!sgc)
426  /* The grouping clause does not cover this pathkey */
427  break;
428 
429  /*
430  * Sort group clause should have an ordering operator as long as there
431  * is an associated pathkey.
432  */
433  Assert(OidIsValid(sgc->sortop));
434 
435  new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
436  new_group_clauses = lappend(new_group_clauses, sgc);
437  }
438 
439  /* remember the number of pathkeys with a matching GROUP BY key */
440  n = list_length(new_group_pathkeys);
441 
442  /* append the remaining group pathkeys (will be treated as not sorted) */
443  *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
444  *group_pathkeys);
445  *group_clauses = list_concat_unique_ptr(new_group_clauses,
446  *group_clauses);
447 
448  list_free(grouping_pathkeys);
449  return n;
450 }
451 
452 /*
453  * pathkeys_are_duplicate
454  * Check if give pathkeys are already contained the list of
455  * PathKeyInfo's.
456  */
457 static bool
458 pathkeys_are_duplicate(List *infos, List *pathkeys)
459 {
460  ListCell *lc;
461 
462  foreach(lc, infos)
463  {
464  PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
465 
466  if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
467  return true;
468  }
469  return false;
470 }
471 
472 /*
473  * get_useful_group_keys_orderings
474  * Determine which orderings of GROUP BY keys are potentially interesting.
475  *
476  * Returns a list of PathKeyInfo items, each representing an interesting
477  * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
478  * matching order.
479  *
480  * The function considers (and keeps) multiple GROUP BY orderings:
481  *
482  * - the original ordering, as specified by the GROUP BY clause,
483  * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
484  * - GROUP BY keys to match target ORDER BY clause (as much as possible).
485  */
486 List *
488 {
489  Query *parse = root->parse;
490  List *infos = NIL;
491  PathKeyInfo *info;
492 
493  List *pathkeys = root->group_pathkeys;
494  List *clauses = root->processed_groupClause;
495 
496  /* always return at least the original pathkeys/clauses */
497  info = makeNode(PathKeyInfo);
498  info->pathkeys = pathkeys;
499  info->clauses = clauses;
500  infos = lappend(infos, info);
501 
502  /*
503  * Should we try generating alternative orderings of the group keys? If
504  * not, we produce only the order specified in the query, i.e. the
505  * optimization is effectively disabled.
506  */
508  return infos;
509 
510  /*
511  * Grouping sets have own and more complex logic to decide the ordering.
512  */
513  if (parse->groupingSets)
514  return infos;
515 
516  /*
517  * If the path is sorted in some way, try reordering the group keys to
518  * match the path as much of the ordering as possible. Then thanks to
519  * incremental sort we would get this sort as cheap as possible.
520  */
521  if (path->pathkeys &&
523  {
524  int n;
525 
526  n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
527  root->num_groupby_pathkeys);
528 
529  if (n > 0 &&
531  !pathkeys_are_duplicate(infos, pathkeys))
532  {
533  info = makeNode(PathKeyInfo);
534  info->pathkeys = pathkeys;
535  info->clauses = clauses;
536 
537  infos = lappend(infos, info);
538  }
539  }
540 
541  /*
542  * Try reordering pathkeys to minimize the sort cost (this time consider
543  * the ORDER BY clause).
544  */
545  if (root->sort_pathkeys &&
547  {
548  int n;
549 
550  n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
551  &clauses,
552  root->num_groupby_pathkeys);
553 
554  if (n > 0 &&
556  !pathkeys_are_duplicate(infos, pathkeys))
557  {
558  info = makeNode(PathKeyInfo);
559  info->pathkeys = pathkeys;
560  info->clauses = clauses;
561 
562  infos = lappend(infos, info);
563  }
564  }
565 
566  return infos;
567 }
568 
569 /*
570  * pathkeys_count_contained_in
571  * Same as pathkeys_contained_in, but also sets length of longest
572  * common prefix of keys1 and keys2.
573  */
574 bool
575 pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
576 {
577  int n = 0;
578  ListCell *key1,
579  *key2;
580 
581  /*
582  * See if we can avoiding looping through both lists. This optimization
583  * gains us several percent in planning time in a worst-case test.
584  */
585  if (keys1 == keys2)
586  {
587  *n_common = list_length(keys1);
588  return true;
589  }
590  else if (keys1 == NIL)
591  {
592  *n_common = 0;
593  return true;
594  }
595  else if (keys2 == NIL)
596  {
597  *n_common = 0;
598  return false;
599  }
600 
601  /*
602  * If both lists are non-empty, iterate through both to find out how many
603  * items are shared.
604  */
605  forboth(key1, keys1, key2, keys2)
606  {
607  PathKey *pathkey1 = (PathKey *) lfirst(key1);
608  PathKey *pathkey2 = (PathKey *) lfirst(key2);
609 
610  if (pathkey1 != pathkey2)
611  {
612  *n_common = n;
613  return false;
614  }
615  n++;
616  }
617 
618  /* If we ended with a null value, then we've processed the whole list. */
619  *n_common = n;
620  return (key1 == NULL);
621 }
622 
623 /*
624  * get_cheapest_path_for_pathkeys
625  * Find the cheapest path (according to the specified criterion) that
626  * satisfies the given pathkeys and parameterization, and is parallel-safe
627  * if required.
628  * Return NULL if no such path.
629  *
630  * 'paths' is a list of possible paths that all generate the same relation
631  * 'pathkeys' represents a required ordering (in canonical form!)
632  * 'required_outer' denotes allowable outer relations for parameterized paths
633  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
634  * 'require_parallel_safe' causes us to consider only parallel-safe paths
635  */
636 Path *
638  Relids required_outer,
639  CostSelector cost_criterion,
640  bool require_parallel_safe)
641 {
642  Path *matched_path = NULL;
643  ListCell *l;
644 
645  foreach(l, paths)
646  {
647  Path *path = (Path *) lfirst(l);
648 
649  /* If required, reject paths that are not parallel-safe */
650  if (require_parallel_safe && !path->parallel_safe)
651  continue;
652 
653  /*
654  * Since cost comparison is a lot cheaper than pathkey comparison, do
655  * that first. (XXX is that still true?)
656  */
657  if (matched_path != NULL &&
658  compare_path_costs(matched_path, path, cost_criterion) <= 0)
659  continue;
660 
661  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
662  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
663  matched_path = path;
664  }
665  return matched_path;
666 }
667 
668 /*
669  * get_cheapest_fractional_path_for_pathkeys
670  * Find the cheapest path (for retrieving a specified fraction of all
671  * the tuples) that satisfies the given pathkeys and parameterization.
672  * Return NULL if no such path.
673  *
674  * See compare_fractional_path_costs() for the interpretation of the fraction
675  * parameter.
676  *
677  * 'paths' is a list of possible paths that all generate the same relation
678  * 'pathkeys' represents a required ordering (in canonical form!)
679  * 'required_outer' denotes allowable outer relations for parameterized paths
680  * 'fraction' is the fraction of the total tuples expected to be retrieved
681  */
682 Path *
684  List *pathkeys,
685  Relids required_outer,
686  double fraction)
687 {
688  Path *matched_path = NULL;
689  ListCell *l;
690 
691  foreach(l, paths)
692  {
693  Path *path = (Path *) lfirst(l);
694 
695  /*
696  * Since cost comparison is a lot cheaper than pathkey comparison, do
697  * that first. (XXX is that still true?)
698  */
699  if (matched_path != NULL &&
700  compare_fractional_path_costs(matched_path, path, fraction) <= 0)
701  continue;
702 
703  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
704  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
705  matched_path = path;
706  }
707  return matched_path;
708 }
709 
710 
711 /*
712  * get_cheapest_parallel_safe_total_inner
713  * Find the unparameterized parallel-safe path with the least total cost.
714  */
715 Path *
717 {
718  ListCell *l;
719 
720  foreach(l, paths)
721  {
722  Path *innerpath = (Path *) lfirst(l);
723 
724  if (innerpath->parallel_safe &&
725  bms_is_empty(PATH_REQ_OUTER(innerpath)))
726  return innerpath;
727  }
728 
729  return NULL;
730 }
731 
732 /****************************************************************************
733  * NEW PATHKEY FORMATION
734  ****************************************************************************/
735 
736 /*
737  * build_index_pathkeys
738  * Build a pathkeys list that describes the ordering induced by an index
739  * scan using the given index. (Note that an unordered index doesn't
740  * induce any ordering, so we return NIL.)
741  *
742  * If 'scandir' is BackwardScanDirection, build pathkeys representing a
743  * backwards scan of the index.
744  *
745  * We iterate only key columns of covering indexes, since non-key columns
746  * don't influence index ordering. The result is canonical, meaning that
747  * redundant pathkeys are removed; it may therefore have fewer entries than
748  * there are key columns in the index.
749  *
750  * Another reason for stopping early is that we may be able to tell that
751  * an index column's sort order is uninteresting for this query. However,
752  * that test is just based on the existence of an EquivalenceClass and not
753  * on position in pathkey lists, so it's not complete. Caller should call
754  * truncate_useless_pathkeys() to possibly remove more pathkeys.
755  */
756 List *
759  ScanDirection scandir)
760 {
761  List *retval = NIL;
762  ListCell *lc;
763  int i;
764 
765  if (index->sortopfamily == NULL)
766  return NIL; /* non-orderable index */
767 
768  i = 0;
769  foreach(lc, index->indextlist)
770  {
771  TargetEntry *indextle = (TargetEntry *) lfirst(lc);
772  Expr *indexkey;
773  bool reverse_sort;
774  bool nulls_first;
775  PathKey *cpathkey;
776 
777  /*
778  * INCLUDE columns are stored in index unordered, so they don't
779  * support ordered index scan.
780  */
781  if (i >= index->nkeycolumns)
782  break;
783 
784  /* We assume we don't need to make a copy of the tlist item */
785  indexkey = indextle->expr;
786 
787  if (ScanDirectionIsBackward(scandir))
788  {
789  reverse_sort = !index->reverse_sort[i];
790  nulls_first = !index->nulls_first[i];
791  }
792  else
793  {
794  reverse_sort = index->reverse_sort[i];
795  nulls_first = index->nulls_first[i];
796  }
797 
798  /*
799  * OK, try to make a canonical pathkey for this sort key.
800  */
801  cpathkey = make_pathkey_from_sortinfo(root,
802  indexkey,
803  index->sortopfamily[i],
804  index->opcintype[i],
805  index->indexcollations[i],
806  reverse_sort,
807  nulls_first,
808  0,
809  index->rel->relids,
810  false);
811 
812  if (cpathkey)
813  {
814  /*
815  * We found the sort key in an EquivalenceClass, so it's relevant
816  * for this query. Add it to list, unless it's redundant.
817  */
818  if (!pathkey_is_redundant(cpathkey, retval))
819  retval = lappend(retval, cpathkey);
820  }
821  else
822  {
823  /*
824  * Boolean index keys might be redundant even if they do not
825  * appear in an EquivalenceClass, because of our special treatment
826  * of boolean equality conditions --- see the comment for
827  * indexcol_is_bool_constant_for_query(). If that applies, we can
828  * continue to examine lower-order index columns. Otherwise, the
829  * sort key is not an interesting sort order for this query, so we
830  * should stop considering index columns; any lower-order sort
831  * keys won't be useful either.
832  */
834  break;
835  }
836 
837  i++;
838  }
839 
840  return retval;
841 }
842 
843 /*
844  * partkey_is_bool_constant_for_query
845  *
846  * If a partition key column is constrained to have a constant value by the
847  * query's WHERE conditions, then it's irrelevant for sort-order
848  * considerations. Usually that means we have a restriction clause
849  * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
850  * containing a constant, which is recognized as redundant by
851  * build_partition_pathkeys(). But if the partition key column is a
852  * boolean variable (or expression), then we are not going to see such a
853  * WHERE clause, because expression preprocessing will have simplified it
854  * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
855  * to have a matching EquivalenceClass (unless the query also contains
856  * "ORDER BY partkeycol"). To allow such cases to work the same as they would
857  * for non-boolean values, this function is provided to detect whether the
858  * specified partition key column matches a boolean restriction clause.
859  */
860 static bool
862 {
863  PartitionScheme partscheme = partrel->part_scheme;
864  ListCell *lc;
865 
866  /*
867  * If the partkey isn't boolean, we can't possibly get a match.
868  *
869  * Partitioning currently can only use built-in AMs, so checking for
870  * built-in boolean opfamilies is good enough.
871  */
872  if (!IsBuiltinBooleanOpfamily(partscheme->partopfamily[partkeycol]))
873  return false;
874 
875  /* Check each restriction clause for the partitioned rel */
876  foreach(lc, partrel->baserestrictinfo)
877  {
878  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
879 
880  /* Ignore pseudoconstant quals, they won't match */
881  if (rinfo->pseudoconstant)
882  continue;
883 
884  /* See if we can match the clause's expression to the partkey column */
885  if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
886  return true;
887  }
888 
889  return false;
890 }
891 
892 /*
893  * matches_boolean_partition_clause
894  * Determine if the boolean clause described by rinfo matches
895  * partrel's partkeycol-th partition key column.
896  *
897  * "Matches" can be either an exact match (equivalent to partkey = true),
898  * or a NOT above an exact match (equivalent to partkey = false).
899  */
900 static bool
902  RelOptInfo *partrel, int partkeycol)
903 {
904  Node *clause = (Node *) rinfo->clause;
905  Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
906 
907  /* Direct match? */
908  if (equal(partexpr, clause))
909  return true;
910  /* NOT clause? */
911  else if (is_notclause(clause))
912  {
913  Node *arg = (Node *) get_notclausearg((Expr *) clause);
914 
915  if (equal(partexpr, arg))
916  return true;
917  }
918 
919  return false;
920 }
921 
922 /*
923  * build_partition_pathkeys
924  * Build a pathkeys list that describes the ordering induced by the
925  * partitions of partrel, under either forward or backward scan
926  * as per scandir.
927  *
928  * Caller must have checked that the partitions are properly ordered,
929  * as detected by partitions_are_ordered().
930  *
931  * Sets *partialkeys to true if pathkeys were only built for a prefix of the
932  * partition key, or false if the pathkeys include all columns of the
933  * partition key.
934  */
935 List *
937  ScanDirection scandir, bool *partialkeys)
938 {
939  List *retval = NIL;
940  PartitionScheme partscheme = partrel->part_scheme;
941  int i;
942 
943  Assert(partscheme != NULL);
944  Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
945  /* For now, we can only cope with baserels */
946  Assert(IS_SIMPLE_REL(partrel));
947 
948  for (i = 0; i < partscheme->partnatts; i++)
949  {
950  PathKey *cpathkey;
951  Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
952 
953  /*
954  * Try to make a canonical pathkey for this partkey.
955  *
956  * We assume the PartitionDesc lists any NULL partition last, so we
957  * treat the scan like a NULLS LAST index: we have nulls_first for
958  * backwards scan only.
959  */
960  cpathkey = make_pathkey_from_sortinfo(root,
961  keyCol,
962  partscheme->partopfamily[i],
963  partscheme->partopcintype[i],
964  partscheme->partcollation[i],
965  ScanDirectionIsBackward(scandir),
966  ScanDirectionIsBackward(scandir),
967  0,
968  partrel->relids,
969  false);
970 
971 
972  if (cpathkey)
973  {
974  /*
975  * We found the sort key in an EquivalenceClass, so it's relevant
976  * for this query. Add it to list, unless it's redundant.
977  */
978  if (!pathkey_is_redundant(cpathkey, retval))
979  retval = lappend(retval, cpathkey);
980  }
981  else
982  {
983  /*
984  * Boolean partition keys might be redundant even if they do not
985  * appear in an EquivalenceClass, because of our special treatment
986  * of boolean equality conditions --- see the comment for
987  * partkey_is_bool_constant_for_query(). If that applies, we can
988  * continue to examine lower-order partition keys. Otherwise, the
989  * sort key is not an interesting sort order for this query, so we
990  * should stop considering partition columns; any lower-order sort
991  * keys won't be useful either.
992  */
993  if (!partkey_is_bool_constant_for_query(partrel, i))
994  {
995  *partialkeys = true;
996  return retval;
997  }
998  }
999  }
1000 
1001  *partialkeys = false;
1002  return retval;
1003 }
1004 
1005 /*
1006  * build_expression_pathkey
1007  * Build a pathkeys list that describes an ordering by a single expression
1008  * using the given sort operator.
1009  *
1010  * expr and rel are as for make_pathkey_from_sortinfo.
1011  * We induce the other arguments assuming default sort order for the operator.
1012  *
1013  * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
1014  * is false and the expression isn't already in some EquivalenceClass.
1015  */
1016 List *
1018  Expr *expr,
1019  Oid opno,
1020  Relids rel,
1021  bool create_it)
1022 {
1023  List *pathkeys;
1024  Oid opfamily,
1025  opcintype;
1026  int16 strategy;
1027  PathKey *cpathkey;
1028 
1029  /* Find the operator in pg_amop --- failure shouldn't happen */
1030  if (!get_ordering_op_properties(opno,
1031  &opfamily, &opcintype, &strategy))
1032  elog(ERROR, "operator %u is not a valid ordering operator",
1033  opno);
1034 
1035  cpathkey = make_pathkey_from_sortinfo(root,
1036  expr,
1037  opfamily,
1038  opcintype,
1039  exprCollation((Node *) expr),
1040  (strategy == BTGreaterStrategyNumber),
1041  (strategy == BTGreaterStrategyNumber),
1042  0,
1043  rel,
1044  create_it);
1045 
1046  if (cpathkey)
1047  pathkeys = list_make1(cpathkey);
1048  else
1049  pathkeys = NIL;
1050 
1051  return pathkeys;
1052 }
1053 
1054 /*
1055  * convert_subquery_pathkeys
1056  * Build a pathkeys list that describes the ordering of a subquery's
1057  * result, in the terms of the outer query. This is essentially a
1058  * task of conversion.
1059  *
1060  * 'rel': outer query's RelOptInfo for the subquery relation.
1061  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
1062  * 'subquery_tlist': the subquery's output targetlist, in its terms.
1063  *
1064  * We intentionally don't do truncate_useless_pathkeys() here, because there
1065  * are situations where seeing the raw ordering of the subquery is helpful.
1066  * For example, if it returns ORDER BY x DESC, that may prompt us to
1067  * construct a mergejoin using DESC order rather than ASC order; but the
1068  * right_merge_direction heuristic would have us throw the knowledge away.
1069  */
1070 List *
1072  List *subquery_pathkeys,
1073  List *subquery_tlist)
1074 {
1075  List *retval = NIL;
1076  int retvallen = 0;
1077  int outer_query_keys = list_length(root->query_pathkeys);
1078  ListCell *i;
1079 
1080  foreach(i, subquery_pathkeys)
1081  {
1082  PathKey *sub_pathkey = (PathKey *) lfirst(i);
1083  EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
1084  PathKey *best_pathkey = NULL;
1085 
1086  if (sub_eclass->ec_has_volatile)
1087  {
1088  /*
1089  * If the sub_pathkey's EquivalenceClass is volatile, then it must
1090  * have come from an ORDER BY clause, and we have to match it to
1091  * that same targetlist entry.
1092  */
1093  TargetEntry *tle;
1094  Var *outer_var;
1095 
1096  if (sub_eclass->ec_sortref == 0) /* can't happen */
1097  elog(ERROR, "volatile EquivalenceClass has no sortref");
1098  tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
1099  Assert(tle);
1100  /* Is TLE actually available to the outer query? */
1101  outer_var = find_var_for_subquery_tle(rel, tle);
1102  if (outer_var)
1103  {
1104  /* We can represent this sub_pathkey */
1105  EquivalenceMember *sub_member;
1106  EquivalenceClass *outer_ec;
1107 
1108  Assert(list_length(sub_eclass->ec_members) == 1);
1109  sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
1110 
1111  /*
1112  * Note: it might look funny to be setting sortref = 0 for a
1113  * reference to a volatile sub_eclass. However, the
1114  * expression is *not* volatile in the outer query: it's just
1115  * a Var referencing whatever the subquery emitted. (IOW, the
1116  * outer query isn't going to re-execute the volatile
1117  * expression itself.) So this is okay.
1118  */
1119  outer_ec =
1121  (Expr *) outer_var,
1122  sub_eclass->ec_opfamilies,
1123  sub_member->em_datatype,
1124  sub_eclass->ec_collation,
1125  0,
1126  rel->relids,
1127  false);
1128 
1129  /*
1130  * If we don't find a matching EC, sub-pathkey isn't
1131  * interesting to the outer query
1132  */
1133  if (outer_ec)
1134  best_pathkey =
1136  outer_ec,
1137  sub_pathkey->pk_opfamily,
1138  sub_pathkey->pk_strategy,
1139  sub_pathkey->pk_nulls_first);
1140  }
1141  }
1142  else
1143  {
1144  /*
1145  * Otherwise, the sub_pathkey's EquivalenceClass could contain
1146  * multiple elements (representing knowledge that multiple items
1147  * are effectively equal). Each element might match none, one, or
1148  * more of the output columns that are visible to the outer query.
1149  * This means we may have multiple possible representations of the
1150  * sub_pathkey in the context of the outer query. Ideally we
1151  * would generate them all and put them all into an EC of the
1152  * outer query, thereby propagating equality knowledge up to the
1153  * outer query. Right now we cannot do so, because the outer
1154  * query's EquivalenceClasses are already frozen when this is
1155  * called. Instead we prefer the one that has the highest "score"
1156  * (number of EC peers, plus one if it matches the outer
1157  * query_pathkeys). This is the most likely to be useful in the
1158  * outer query.
1159  */
1160  int best_score = -1;
1161  ListCell *j;
1162 
1163  foreach(j, sub_eclass->ec_members)
1164  {
1165  EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
1166  Expr *sub_expr = sub_member->em_expr;
1167  Oid sub_expr_type = sub_member->em_datatype;
1168  Oid sub_expr_coll = sub_eclass->ec_collation;
1169  ListCell *k;
1170 
1171  if (sub_member->em_is_child)
1172  continue; /* ignore children here */
1173 
1174  foreach(k, subquery_tlist)
1175  {
1176  TargetEntry *tle = (TargetEntry *) lfirst(k);
1177  Var *outer_var;
1178  Expr *tle_expr;
1179  EquivalenceClass *outer_ec;
1180  PathKey *outer_pk;
1181  int score;
1182 
1183  /* Is TLE actually available to the outer query? */
1184  outer_var = find_var_for_subquery_tle(rel, tle);
1185  if (!outer_var)
1186  continue;
1187 
1188  /*
1189  * The targetlist entry is considered to match if it
1190  * matches after sort-key canonicalization. That is
1191  * needed since the sub_expr has been through the same
1192  * process.
1193  */
1194  tle_expr = canonicalize_ec_expression(tle->expr,
1195  sub_expr_type,
1196  sub_expr_coll);
1197  if (!equal(tle_expr, sub_expr))
1198  continue;
1199 
1200  /* See if we have a matching EC for the TLE */
1201  outer_ec = get_eclass_for_sort_expr(root,
1202  (Expr *) outer_var,
1203  sub_eclass->ec_opfamilies,
1204  sub_expr_type,
1205  sub_expr_coll,
1206  0,
1207  rel->relids,
1208  false);
1209 
1210  /*
1211  * If we don't find a matching EC, this sub-pathkey isn't
1212  * interesting to the outer query
1213  */
1214  if (!outer_ec)
1215  continue;
1216 
1217  outer_pk = make_canonical_pathkey(root,
1218  outer_ec,
1219  sub_pathkey->pk_opfamily,
1220  sub_pathkey->pk_strategy,
1221  sub_pathkey->pk_nulls_first);
1222  /* score = # of equivalence peers */
1223  score = list_length(outer_ec->ec_members) - 1;
1224  /* +1 if it matches the proper query_pathkeys item */
1225  if (retvallen < outer_query_keys &&
1226  list_nth(root->query_pathkeys, retvallen) == outer_pk)
1227  score++;
1228  if (score > best_score)
1229  {
1230  best_pathkey = outer_pk;
1231  best_score = score;
1232  }
1233  }
1234  }
1235  }
1236 
1237  /*
1238  * If we couldn't find a representation of this sub_pathkey, we're
1239  * done (we can't use the ones to its right, either).
1240  */
1241  if (!best_pathkey)
1242  break;
1243 
1244  /*
1245  * Eliminate redundant ordering info; could happen if outer query
1246  * equivalences subquery keys...
1247  */
1248  if (!pathkey_is_redundant(best_pathkey, retval))
1249  {
1250  retval = lappend(retval, best_pathkey);
1251  retvallen++;
1252  }
1253  }
1254 
1255  return retval;
1256 }
1257 
1258 /*
1259  * find_var_for_subquery_tle
1260  *
1261  * If the given subquery tlist entry is due to be emitted by the subquery's
1262  * scan node, return a Var for it, else return NULL.
1263  *
1264  * We need this to ensure that we don't return pathkeys describing values
1265  * that are unavailable above the level of the subquery scan.
1266  */
1267 static Var *
1269 {
1270  ListCell *lc;
1271 
1272  /* If the TLE is resjunk, it's certainly not visible to the outer query */
1273  if (tle->resjunk)
1274  return NULL;
1275 
1276  /* Search the rel's targetlist to see what it will return */
1277  foreach(lc, rel->reltarget->exprs)
1278  {
1279  Var *var = (Var *) lfirst(lc);
1280 
1281  /* Ignore placeholders */
1282  if (!IsA(var, Var))
1283  continue;
1284  Assert(var->varno == rel->relid);
1285 
1286  /* If we find a Var referencing this TLE, we're good */
1287  if (var->varattno == tle->resno)
1288  return copyObject(var); /* Make a copy for safety */
1289  }
1290  return NULL;
1291 }
1292 
1293 /*
1294  * build_join_pathkeys
1295  * Build the path keys for a join relation constructed by mergejoin or
1296  * nestloop join. This is normally the same as the outer path's keys.
1297  *
1298  * EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
1299  * result as having the outer path's path keys, because null lefthand rows
1300  * may be inserted at random points. It must be treated as unsorted.
1301  *
1302  * We truncate away any pathkeys that are uninteresting for higher joins.
1303  *
1304  * 'joinrel' is the join relation that paths are being formed for
1305  * 'jointype' is the join type (inner, left, full, etc)
1306  * 'outer_pathkeys' is the list of the current outer path's path keys
1307  *
1308  * Returns the list of new path keys.
1309  */
1310 List *
1312  RelOptInfo *joinrel,
1313  JoinType jointype,
1314  List *outer_pathkeys)
1315 {
1316  if (jointype == JOIN_FULL ||
1317  jointype == JOIN_RIGHT ||
1318  jointype == JOIN_RIGHT_ANTI)
1319  return NIL;
1320 
1321  /*
1322  * This used to be quite a complex bit of code, but now that all pathkey
1323  * sublists start out life canonicalized, we don't have to do a darn thing
1324  * here!
1325  *
1326  * We do, however, need to truncate the pathkeys list, since it may
1327  * contain pathkeys that were useful for forming this joinrel but are
1328  * uninteresting to higher levels.
1329  */
1330  return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
1331 }
1332 
1333 /****************************************************************************
1334  * PATHKEYS AND SORT CLAUSES
1335  ****************************************************************************/
1336 
1337 /*
1338  * make_pathkeys_for_sortclauses
1339  * Generate a pathkeys list that represents the sort order specified
1340  * by a list of SortGroupClauses
1341  *
1342  * The resulting PathKeys are always in canonical form. (Actually, there
1343  * is no longer any code anywhere that creates non-canonical PathKeys.)
1344  *
1345  * 'sortclauses' is a list of SortGroupClause nodes
1346  * 'tlist' is the targetlist to find the referenced tlist entries in
1347  */
1348 List *
1350  List *sortclauses,
1351  List *tlist)
1352 {
1353  List *result;
1354  bool sortable;
1355 
1357  &sortclauses,
1358  tlist,
1359  false,
1360  &sortable);
1361  /* It's caller error if not all clauses were sortable */
1362  Assert(sortable);
1363  return result;
1364 }
1365 
1366 /*
1367  * make_pathkeys_for_sortclauses_extended
1368  * Generate a pathkeys list that represents the sort order specified
1369  * by a list of SortGroupClauses
1370  *
1371  * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
1372  *
1373  * If remove_redundant is true, then any sort clauses that are found to
1374  * give rise to redundant pathkeys are removed from the sortclauses list
1375  * (which therefore must be pass-by-reference in this version).
1376  *
1377  * *sortable is set to true if all the sort clauses are in fact sortable.
1378  * If any are not, they are ignored except for setting *sortable false.
1379  * (In that case, the output pathkey list isn't really useful. However,
1380  * we process the whole sortclauses list anyway, because it's still valid
1381  * to remove any clauses that can be proven redundant via the eclass logic.
1382  * Even though we'll have to hash in that case, we might as well not hash
1383  * redundant columns.)
1384  */
1385 List *
1387  List **sortclauses,
1388  List *tlist,
1389  bool remove_redundant,
1390  bool *sortable)
1391 {
1392  List *pathkeys = NIL;
1393  ListCell *l;
1394 
1395  *sortable = true;
1396  foreach(l, *sortclauses)
1397  {
1398  SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
1399  Expr *sortkey;
1400  PathKey *pathkey;
1401 
1402  sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1403  if (!OidIsValid(sortcl->sortop))
1404  {
1405  *sortable = false;
1406  continue;
1407  }
1408  pathkey = make_pathkey_from_sortop(root,
1409  sortkey,
1410  sortcl->sortop,
1411  sortcl->nulls_first,
1412  sortcl->tleSortGroupRef,
1413  true);
1414 
1415  /* Canonical form eliminates redundant ordering keys */
1416  if (!pathkey_is_redundant(pathkey, pathkeys))
1417  pathkeys = lappend(pathkeys, pathkey);
1418  else if (remove_redundant)
1419  *sortclauses = foreach_delete_current(*sortclauses, l);
1420  }
1421  return pathkeys;
1422 }
1423 
1424 /****************************************************************************
1425  * PATHKEYS AND MERGECLAUSES
1426  ****************************************************************************/
1427 
1428 /*
1429  * initialize_mergeclause_eclasses
1430  * Set the EquivalenceClass links in a mergeclause restrictinfo.
1431  *
1432  * RestrictInfo contains fields in which we may cache pointers to
1433  * EquivalenceClasses for the left and right inputs of the mergeclause.
1434  * (If the mergeclause is a true equivalence clause these will be the
1435  * same EquivalenceClass, otherwise not.) If the mergeclause is either
1436  * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
1437  * then it's easy to set up the left_ec and right_ec members --- otherwise,
1438  * this function should be called to set them up. We will generate new
1439  * EquivalenceClauses if necessary to represent the mergeclause's left and
1440  * right sides.
1441  *
1442  * Note this is called before EC merging is complete, so the links won't
1443  * necessarily point to canonical ECs. Before they are actually used for
1444  * anything, update_mergeclause_eclasses must be called to ensure that
1445  * they've been updated to point to canonical ECs.
1446  */
1447 void
1449 {
1450  Expr *clause = restrictinfo->clause;
1451  Oid lefttype,
1452  righttype;
1453 
1454  /* Should be a mergeclause ... */
1455  Assert(restrictinfo->mergeopfamilies != NIL);
1456  /* ... with links not yet set */
1457  Assert(restrictinfo->left_ec == NULL);
1458  Assert(restrictinfo->right_ec == NULL);
1459 
1460  /* Need the declared input types of the operator */
1461  op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
1462 
1463  /* Find or create a matching EquivalenceClass for each side */
1464  restrictinfo->left_ec =
1466  (Expr *) get_leftop(clause),
1467  restrictinfo->mergeopfamilies,
1468  lefttype,
1469  ((OpExpr *) clause)->inputcollid,
1470  0,
1471  NULL,
1472  true);
1473  restrictinfo->right_ec =
1475  (Expr *) get_rightop(clause),
1476  restrictinfo->mergeopfamilies,
1477  righttype,
1478  ((OpExpr *) clause)->inputcollid,
1479  0,
1480  NULL,
1481  true);
1482 }
1483 
1484 /*
1485  * update_mergeclause_eclasses
1486  * Make the cached EquivalenceClass links valid in a mergeclause
1487  * restrictinfo.
1488  *
1489  * These pointers should have been set by process_equivalence or
1490  * initialize_mergeclause_eclasses, but they might have been set to
1491  * non-canonical ECs that got merged later. Chase up to the canonical
1492  * merged parent if so.
1493  */
1494 void
1496 {
1497  /* Should be a merge clause ... */
1498  Assert(restrictinfo->mergeopfamilies != NIL);
1499  /* ... with pointers already set */
1500  Assert(restrictinfo->left_ec != NULL);
1501  Assert(restrictinfo->right_ec != NULL);
1502 
1503  /* Chase up to the top as needed */
1504  while (restrictinfo->left_ec->ec_merged)
1505  restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
1506  while (restrictinfo->right_ec->ec_merged)
1507  restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
1508 }
1509 
1510 /*
1511  * find_mergeclauses_for_outer_pathkeys
1512  * This routine attempts to find a list of mergeclauses that can be
1513  * used with a specified ordering for the join's outer relation.
1514  * If successful, it returns a list of mergeclauses.
1515  *
1516  * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
1517  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1518  * join relation being formed, in no particular order.
1519  *
1520  * The restrictinfos must be marked (via outer_is_left) to show which side
1521  * of each clause is associated with the current outer path. (See
1522  * select_mergejoin_clauses())
1523  *
1524  * The result is NIL if no merge can be done, else a maximal list of
1525  * usable mergeclauses (represented as a list of their restrictinfo nodes).
1526  * The list is ordered to match the pathkeys, as required for execution.
1527  */
1528 List *
1530  List *pathkeys,
1531  List *restrictinfos)
1532 {
1533  List *mergeclauses = NIL;
1534  ListCell *i;
1535 
1536  /* make sure we have eclasses cached in the clauses */
1537  foreach(i, restrictinfos)
1538  {
1539  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1540 
1541  update_mergeclause_eclasses(root, rinfo);
1542  }
1543 
1544  foreach(i, pathkeys)
1545  {
1546  PathKey *pathkey = (PathKey *) lfirst(i);
1547  EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1548  List *matched_restrictinfos = NIL;
1549  ListCell *j;
1550 
1551  /*----------
1552  * A mergejoin clause matches a pathkey if it has the same EC.
1553  * If there are multiple matching clauses, take them all. In plain
1554  * inner-join scenarios we expect only one match, because
1555  * equivalence-class processing will have removed any redundant
1556  * mergeclauses. However, in outer-join scenarios there might be
1557  * multiple matches. An example is
1558  *
1559  * select * from a full join b
1560  * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1561  *
1562  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1563  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1564  * we *must* do so or we will be unable to form a valid plan.
1565  *
1566  * We expect that the given pathkeys list is canonical, which means
1567  * no two members have the same EC, so it's not possible for this
1568  * code to enter the same mergeclause into the result list twice.
1569  *
1570  * It's possible that multiple matching clauses might have different
1571  * ECs on the other side, in which case the order we put them into our
1572  * result makes a difference in the pathkeys required for the inner
1573  * input rel. However this routine hasn't got any info about which
1574  * order would be best, so we don't worry about that.
1575  *
1576  * It's also possible that the selected mergejoin clauses produce
1577  * a noncanonical ordering of pathkeys for the inner side, ie, we
1578  * might select clauses that reference b.v1, b.v2, b.v1 in that
1579  * order. This is not harmful in itself, though it suggests that
1580  * the clauses are partially redundant. Since the alternative is
1581  * to omit mergejoin clauses and thereby possibly fail to generate a
1582  * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1583  * has to delete duplicates when it constructs the inner pathkeys
1584  * list, and we also have to deal with such cases specially in
1585  * create_mergejoin_plan().
1586  *----------
1587  */
1588  foreach(j, restrictinfos)
1589  {
1590  RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1591  EquivalenceClass *clause_ec;
1592 
1593  clause_ec = rinfo->outer_is_left ?
1594  rinfo->left_ec : rinfo->right_ec;
1595  if (clause_ec == pathkey_ec)
1596  matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1597  }
1598 
1599  /*
1600  * If we didn't find a mergeclause, we're done --- any additional
1601  * sort-key positions in the pathkeys are useless. (But we can still
1602  * mergejoin if we found at least one mergeclause.)
1603  */
1604  if (matched_restrictinfos == NIL)
1605  break;
1606 
1607  /*
1608  * If we did find usable mergeclause(s) for this sort-key position,
1609  * add them to result list.
1610  */
1611  mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1612  }
1613 
1614  return mergeclauses;
1615 }
1616 
1617 /*
1618  * select_outer_pathkeys_for_merge
1619  * Builds a pathkey list representing a possible sort ordering
1620  * that can be used with the given mergeclauses.
1621  *
1622  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1623  * that will be used in a merge join.
1624  * 'joinrel' is the join relation we are trying to construct.
1625  *
1626  * The restrictinfos must be marked (via outer_is_left) to show which side
1627  * of each clause is associated with the current outer path. (See
1628  * select_mergejoin_clauses())
1629  *
1630  * Returns a pathkeys list that can be applied to the outer relation.
1631  *
1632  * Since we assume here that a sort is required, there is no particular use
1633  * in matching any available ordering of the outerrel. (joinpath.c has an
1634  * entirely separate code path for considering sort-free mergejoins.) Rather,
1635  * it's interesting to try to match, or match a prefix of the requested
1636  * query_pathkeys so that a second output sort may be avoided or an
1637  * incremental sort may be done instead. We can get away with just a prefix
1638  * of the query_pathkeys when that prefix covers the entire join condition.
1639  * Failing that, we try to list "more popular" keys (those with the most
1640  * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting
1641  * ordering useful for as many higher-level mergejoins as possible.
1642  */
1643 List *
1645  List *mergeclauses,
1646  RelOptInfo *joinrel)
1647 {
1648  List *pathkeys = NIL;
1649  int nClauses = list_length(mergeclauses);
1650  EquivalenceClass **ecs;
1651  int *scores;
1652  int necs;
1653  ListCell *lc;
1654  int j;
1655 
1656  /* Might have no mergeclauses */
1657  if (nClauses == 0)
1658  return NIL;
1659 
1660  /*
1661  * Make arrays of the ECs used by the mergeclauses (dropping any
1662  * duplicates) and their "popularity" scores.
1663  */
1664  ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1665  scores = (int *) palloc(nClauses * sizeof(int));
1666  necs = 0;
1667 
1668  foreach(lc, mergeclauses)
1669  {
1670  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1671  EquivalenceClass *oeclass;
1672  int score;
1673  ListCell *lc2;
1674 
1675  /* get the outer eclass */
1676  update_mergeclause_eclasses(root, rinfo);
1677 
1678  if (rinfo->outer_is_left)
1679  oeclass = rinfo->left_ec;
1680  else
1681  oeclass = rinfo->right_ec;
1682 
1683  /* reject duplicates */
1684  for (j = 0; j < necs; j++)
1685  {
1686  if (ecs[j] == oeclass)
1687  break;
1688  }
1689  if (j < necs)
1690  continue;
1691 
1692  /* compute score */
1693  score = 0;
1694  foreach(lc2, oeclass->ec_members)
1695  {
1697 
1698  /* Potential future join partner? */
1699  if (!em->em_is_const && !em->em_is_child &&
1700  !bms_overlap(em->em_relids, joinrel->relids))
1701  score++;
1702  }
1703 
1704  ecs[necs] = oeclass;
1705  scores[necs] = score;
1706  necs++;
1707  }
1708 
1709  /*
1710  * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1711  * can generate a sort order that's also useful for final output. If we
1712  * only have a prefix of the query_pathkeys, and that prefix is the entire
1713  * join condition, then it's useful to use the prefix as the pathkeys as
1714  * this increases the chances that an incremental sort will be able to be
1715  * used by the upper planner.
1716  */
1717  if (root->query_pathkeys)
1718  {
1719  int matches = 0;
1720 
1721  foreach(lc, root->query_pathkeys)
1722  {
1723  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1724  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1725 
1726  for (j = 0; j < necs; j++)
1727  {
1728  if (ecs[j] == query_ec)
1729  break; /* found match */
1730  }
1731  if (j >= necs)
1732  break; /* didn't find match */
1733 
1734  matches++;
1735  }
1736  /* if we got to the end of the list, we have them all */
1737  if (lc == NULL)
1738  {
1739  /* copy query_pathkeys as starting point for our output */
1740  pathkeys = list_copy(root->query_pathkeys);
1741  /* mark their ECs as already-emitted */
1742  foreach(lc, root->query_pathkeys)
1743  {
1744  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1745  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1746 
1747  for (j = 0; j < necs; j++)
1748  {
1749  if (ecs[j] == query_ec)
1750  {
1751  scores[j] = -1;
1752  break;
1753  }
1754  }
1755  }
1756  }
1757 
1758  /*
1759  * If we didn't match to all of the query_pathkeys, but did match to
1760  * all of the join clauses then we'll make use of these as partially
1761  * sorted input is better than nothing for the upper planner as it may
1762  * lead to incremental sorts instead of full sorts.
1763  */
1764  else if (matches == nClauses)
1765  {
1766  pathkeys = list_copy_head(root->query_pathkeys, matches);
1767 
1768  /* we have all of the join pathkeys, so nothing more to do */
1769  pfree(ecs);
1770  pfree(scores);
1771 
1772  return pathkeys;
1773  }
1774  }
1775 
1776  /*
1777  * Add remaining ECs to the list in popularity order, using a default sort
1778  * ordering. (We could use qsort() here, but the list length is usually
1779  * so small it's not worth it.)
1780  */
1781  for (;;)
1782  {
1783  int best_j;
1784  int best_score;
1785  EquivalenceClass *ec;
1786  PathKey *pathkey;
1787 
1788  best_j = 0;
1789  best_score = scores[0];
1790  for (j = 1; j < necs; j++)
1791  {
1792  if (scores[j] > best_score)
1793  {
1794  best_j = j;
1795  best_score = scores[j];
1796  }
1797  }
1798  if (best_score < 0)
1799  break; /* all done */
1800  ec = ecs[best_j];
1801  scores[best_j] = -1;
1802  pathkey = make_canonical_pathkey(root,
1803  ec,
1806  false);
1807  /* can't be redundant because no duplicate ECs */
1808  Assert(!pathkey_is_redundant(pathkey, pathkeys));
1809  pathkeys = lappend(pathkeys, pathkey);
1810  }
1811 
1812  pfree(ecs);
1813  pfree(scores);
1814 
1815  return pathkeys;
1816 }
1817 
1818 /*
1819  * make_inner_pathkeys_for_merge
1820  * Builds a pathkey list representing the explicit sort order that
1821  * must be applied to an inner path to make it usable with the
1822  * given mergeclauses.
1823  *
1824  * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1825  * that will be used in a merge join, in order.
1826  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1827  * side of the join.
1828  *
1829  * The restrictinfos must be marked (via outer_is_left) to show which side
1830  * of each clause is associated with the current outer path. (See
1831  * select_mergejoin_clauses())
1832  *
1833  * Returns a pathkeys list that can be applied to the inner relation.
1834  *
1835  * Note that it is not this routine's job to decide whether sorting is
1836  * actually needed for a particular input path. Assume a sort is necessary;
1837  * just make the keys, eh?
1838  */
1839 List *
1841  List *mergeclauses,
1842  List *outer_pathkeys)
1843 {
1844  List *pathkeys = NIL;
1845  EquivalenceClass *lastoeclass;
1846  PathKey *opathkey;
1847  ListCell *lc;
1848  ListCell *lop;
1849 
1850  lastoeclass = NULL;
1851  opathkey = NULL;
1852  lop = list_head(outer_pathkeys);
1853 
1854  foreach(lc, mergeclauses)
1855  {
1856  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1857  EquivalenceClass *oeclass;
1858  EquivalenceClass *ieclass;
1859  PathKey *pathkey;
1860 
1861  update_mergeclause_eclasses(root, rinfo);
1862 
1863  if (rinfo->outer_is_left)
1864  {
1865  oeclass = rinfo->left_ec;
1866  ieclass = rinfo->right_ec;
1867  }
1868  else
1869  {
1870  oeclass = rinfo->right_ec;
1871  ieclass = rinfo->left_ec;
1872  }
1873 
1874  /* outer eclass should match current or next pathkeys */
1875  /* we check this carefully for debugging reasons */
1876  if (oeclass != lastoeclass)
1877  {
1878  if (!lop)
1879  elog(ERROR, "too few pathkeys for mergeclauses");
1880  opathkey = (PathKey *) lfirst(lop);
1881  lop = lnext(outer_pathkeys, lop);
1882  lastoeclass = opathkey->pk_eclass;
1883  if (oeclass != lastoeclass)
1884  elog(ERROR, "outer pathkeys do not match mergeclause");
1885  }
1886 
1887  /*
1888  * Often, we'll have same EC on both sides, in which case the outer
1889  * pathkey is also canonical for the inner side, and we can skip a
1890  * useless search.
1891  */
1892  if (ieclass == oeclass)
1893  pathkey = opathkey;
1894  else
1895  pathkey = make_canonical_pathkey(root,
1896  ieclass,
1897  opathkey->pk_opfamily,
1898  opathkey->pk_strategy,
1899  opathkey->pk_nulls_first);
1900 
1901  /*
1902  * Don't generate redundant pathkeys (which can happen if multiple
1903  * mergeclauses refer to the same EC). Because we do this, the output
1904  * pathkey list isn't necessarily ordered like the mergeclauses, which
1905  * complicates life for create_mergejoin_plan(). But if we didn't,
1906  * we'd have a noncanonical sort key list, which would be bad; for one
1907  * reason, it certainly wouldn't match any available sort order for
1908  * the input relation.
1909  */
1910  if (!pathkey_is_redundant(pathkey, pathkeys))
1911  pathkeys = lappend(pathkeys, pathkey);
1912  }
1913 
1914  return pathkeys;
1915 }
1916 
1917 /*
1918  * trim_mergeclauses_for_inner_pathkeys
1919  * This routine trims a list of mergeclauses to include just those that
1920  * work with a specified ordering for the join's inner relation.
1921  *
1922  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1923  * join relation being formed, in an order known to work for the
1924  * currently-considered sort ordering of the join's outer rel.
1925  * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1926  * it should be equal to, or a truncation of, the result of
1927  * make_inner_pathkeys_for_merge for these mergeclauses.
1928  *
1929  * What we return will be a prefix of the given mergeclauses list.
1930  *
1931  * We need this logic because make_inner_pathkeys_for_merge's result isn't
1932  * necessarily in the same order as the mergeclauses. That means that if we
1933  * consider an inner-rel pathkey list that is a truncation of that result,
1934  * we might need to drop mergeclauses even though they match a surviving inner
1935  * pathkey. This happens when they are to the right of a mergeclause that
1936  * matches a removed inner pathkey.
1937  *
1938  * The mergeclauses must be marked (via outer_is_left) to show which side
1939  * of each clause is associated with the current outer path. (See
1940  * select_mergejoin_clauses())
1941  */
1942 List *
1944  List *mergeclauses,
1945  List *pathkeys)
1946 {
1947  List *new_mergeclauses = NIL;
1948  PathKey *pathkey;
1949  EquivalenceClass *pathkey_ec;
1950  bool matched_pathkey;
1951  ListCell *lip;
1952  ListCell *i;
1953 
1954  /* No pathkeys => no mergeclauses (though we don't expect this case) */
1955  if (pathkeys == NIL)
1956  return NIL;
1957  /* Initialize to consider first pathkey */
1958  lip = list_head(pathkeys);
1959  pathkey = (PathKey *) lfirst(lip);
1960  pathkey_ec = pathkey->pk_eclass;
1961  lip = lnext(pathkeys, lip);
1962  matched_pathkey = false;
1963 
1964  /* Scan mergeclauses to see how many we can use */
1965  foreach(i, mergeclauses)
1966  {
1967  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1968  EquivalenceClass *clause_ec;
1969 
1970  /* Assume we needn't do update_mergeclause_eclasses again here */
1971 
1972  /* Check clause's inner-rel EC against current pathkey */
1973  clause_ec = rinfo->outer_is_left ?
1974  rinfo->right_ec : rinfo->left_ec;
1975 
1976  /* If we don't have a match, attempt to advance to next pathkey */
1977  if (clause_ec != pathkey_ec)
1978  {
1979  /* If we had no clauses matching this inner pathkey, must stop */
1980  if (!matched_pathkey)
1981  break;
1982 
1983  /* Advance to next inner pathkey, if any */
1984  if (lip == NULL)
1985  break;
1986  pathkey = (PathKey *) lfirst(lip);
1987  pathkey_ec = pathkey->pk_eclass;
1988  lip = lnext(pathkeys, lip);
1989  matched_pathkey = false;
1990  }
1991 
1992  /* If mergeclause matches current inner pathkey, we can use it */
1993  if (clause_ec == pathkey_ec)
1994  {
1995  new_mergeclauses = lappend(new_mergeclauses, rinfo);
1996  matched_pathkey = true;
1997  }
1998  else
1999  {
2000  /* Else, no hope of adding any more mergeclauses */
2001  break;
2002  }
2003  }
2004 
2005  return new_mergeclauses;
2006 }
2007 
2008 
2009 /****************************************************************************
2010  * PATHKEY USEFULNESS CHECKS
2011  *
2012  * We only want to remember as many of the pathkeys of a path as have some
2013  * potential use, either for subsequent mergejoins or for meeting the query's
2014  * requested output ordering. This ensures that add_path() won't consider
2015  * a path to have a usefully different ordering unless it really is useful.
2016  * These routines check for usefulness of given pathkeys.
2017  ****************************************************************************/
2018 
2019 /*
2020  * pathkeys_useful_for_merging
2021  * Count the number of pathkeys that may be useful for mergejoins
2022  * above the given relation.
2023  *
2024  * We consider a pathkey potentially useful if it corresponds to the merge
2025  * ordering of either side of any joinclause for the rel. This might be
2026  * overoptimistic, since joinclauses that require different other relations
2027  * might never be usable at the same time, but trying to be exact is likely
2028  * to be more trouble than it's worth.
2029  *
2030  * To avoid doubling the number of mergejoin paths considered, we would like
2031  * to consider only one of the two scan directions (ASC or DESC) as useful
2032  * for merging for any given target column. The choice is arbitrary unless
2033  * one of the directions happens to match an ORDER BY key, in which case
2034  * that direction should be preferred, in hopes of avoiding a final sort step.
2035  * right_merge_direction() implements this heuristic.
2036  */
2037 static int
2039 {
2040  int useful = 0;
2041  ListCell *i;
2042 
2043  foreach(i, pathkeys)
2044  {
2045  PathKey *pathkey = (PathKey *) lfirst(i);
2046  bool matched = false;
2047  ListCell *j;
2048 
2049  /* If "wrong" direction, not useful for merging */
2050  if (!right_merge_direction(root, pathkey))
2051  break;
2052 
2053  /*
2054  * First look into the EquivalenceClass of the pathkey, to see if
2055  * there are any members not yet joined to the rel. If so, it's
2056  * surely possible to generate a mergejoin clause using them.
2057  */
2058  if (rel->has_eclass_joins &&
2059  eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
2060  matched = true;
2061  else
2062  {
2063  /*
2064  * Otherwise search the rel's joininfo list, which contains
2065  * non-EquivalenceClass-derivable join clauses that might
2066  * nonetheless be mergejoinable.
2067  */
2068  foreach(j, rel->joininfo)
2069  {
2070  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
2071 
2072  if (restrictinfo->mergeopfamilies == NIL)
2073  continue;
2074  update_mergeclause_eclasses(root, restrictinfo);
2075 
2076  if (pathkey->pk_eclass == restrictinfo->left_ec ||
2077  pathkey->pk_eclass == restrictinfo->right_ec)
2078  {
2079  matched = true;
2080  break;
2081  }
2082  }
2083  }
2084 
2085  /*
2086  * If we didn't find a mergeclause, we're done --- any additional
2087  * sort-key positions in the pathkeys are useless. (But we can still
2088  * mergejoin if we found at least one mergeclause.)
2089  */
2090  if (matched)
2091  useful++;
2092  else
2093  break;
2094  }
2095 
2096  return useful;
2097 }
2098 
2099 /*
2100  * right_merge_direction
2101  * Check whether the pathkey embodies the preferred sort direction
2102  * for merging its target column.
2103  */
2104 static bool
2106 {
2107  ListCell *l;
2108 
2109  foreach(l, root->query_pathkeys)
2110  {
2111  PathKey *query_pathkey = (PathKey *) lfirst(l);
2112 
2113  if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
2114  pathkey->pk_opfamily == query_pathkey->pk_opfamily)
2115  {
2116  /*
2117  * Found a matching query sort column. Prefer this pathkey's
2118  * direction iff it matches. Note that we ignore pk_nulls_first,
2119  * which means that a sort might be needed anyway ... but we still
2120  * want to prefer only one of the two possible directions, and we
2121  * might as well use this one.
2122  */
2123  return (pathkey->pk_strategy == query_pathkey->pk_strategy);
2124  }
2125  }
2126 
2127  /* If no matching ORDER BY request, prefer the ASC direction */
2128  return (pathkey->pk_strategy == BTLessStrategyNumber);
2129 }
2130 
2131 /*
2132  * pathkeys_useful_for_ordering
2133  * Count the number of pathkeys that are useful for meeting the
2134  * query's requested output ordering.
2135  *
2136  * Because we the have the possibility of incremental sort, a prefix list of
2137  * keys is potentially useful for improving the performance of the requested
2138  * ordering. Thus we return 0, if no valuable keys are found, or the number
2139  * of leading keys shared by the list and the requested ordering..
2140  */
2141 static int
2143 {
2144  int n_common_pathkeys;
2145 
2146  (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
2147  &n_common_pathkeys);
2148 
2149  return n_common_pathkeys;
2150 }
2151 
2152 /*
2153  * pathkeys_useful_for_grouping
2154  * Count the number of pathkeys that are useful for grouping (instead of
2155  * explicit sort)
2156  *
2157  * Group pathkeys could be reordered to benefit from the ordering. The
2158  * ordering may not be "complete" and may require incremental sort, but that's
2159  * fine. So we simply count prefix pathkeys with a matching group key, and
2160  * stop once we find the first pathkey without a match.
2161  *
2162  * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2163  * pathkeys are useful for grouping, and we might do incremental sort to get
2164  * path ordered by (a,b,e).
2165  *
2166  * This logic is necessary to retain paths with ordering not matching grouping
2167  * keys directly, without the reordering.
2168  *
2169  * Returns the length of pathkey prefix with matching group keys.
2170  */
2171 static int
2173 {
2174  ListCell *key;
2175  int n = 0;
2176 
2177  /* no special ordering requested for grouping */
2178  if (root->group_pathkeys == NIL)
2179  return 0;
2180 
2181  /* walk the pathkeys and search for matching group key */
2182  foreach(key, pathkeys)
2183  {
2184  PathKey *pathkey = (PathKey *) lfirst(key);
2185 
2186  /* no matching group key, we're done */
2187  if (!list_member_ptr(root->group_pathkeys, pathkey))
2188  break;
2189 
2190  n++;
2191  }
2192 
2193  return n;
2194 }
2195 
2196 /*
2197  * truncate_useless_pathkeys
2198  * Shorten the given pathkey list to just the useful pathkeys.
2199  */
2200 List *
2202  RelOptInfo *rel,
2203  List *pathkeys)
2204 {
2205  int nuseful;
2206  int nuseful2;
2207 
2208  nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
2209  nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
2210  if (nuseful2 > nuseful)
2211  nuseful = nuseful2;
2212  nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
2213  if (nuseful2 > nuseful)
2214  nuseful = nuseful2;
2215 
2216  /*
2217  * Note: not safe to modify input list destructively, but we can avoid
2218  * copying the list if we're not actually going to change it
2219  */
2220  if (nuseful == 0)
2221  return NIL;
2222  else if (nuseful == list_length(pathkeys))
2223  return pathkeys;
2224  else
2225  return list_copy_head(pathkeys, nuseful);
2226 }
2227 
2228 /*
2229  * has_useful_pathkeys
2230  * Detect whether the specified rel could have any pathkeys that are
2231  * useful according to truncate_useless_pathkeys().
2232  *
2233  * This is a cheap test that lets us skip building pathkeys at all in very
2234  * simple queries. It's OK to err in the direction of returning "true" when
2235  * there really aren't any usable pathkeys, but erring in the other direction
2236  * is bad --- so keep this in sync with the routines above!
2237  *
2238  * We could make the test more complex, for example checking to see if any of
2239  * the joinclauses are really mergejoinable, but that likely wouldn't win
2240  * often enough to repay the extra cycles. Queries with neither a join nor
2241  * a sort are reasonably common, though, so this much work seems worthwhile.
2242  */
2243 bool
2245 {
2246  if (rel->joininfo != NIL || rel->has_eclass_joins)
2247  return true; /* might be able to use pathkeys for merging */
2248  if (root->group_pathkeys != NIL)
2249  return true; /* might be able to use pathkeys for grouping */
2250  if (root->query_pathkeys != NIL)
2251  return true; /* might be able to use them for ordering */
2252  return false; /* definitely useless */
2253 }
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:425
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:595
#define bms_is_empty(a)
Definition: bitmapset.h:105
signed short int16
Definition: c.h:482
unsigned int Index
Definition: c.h:603
#define OidIsValid(objectId)
Definition: c.h:764
bool enable_incremental_sort
Definition: costsize.c:141
#define ERROR
Definition: elog.h:39
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Expr * canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
Definition: equivclass.c:472
EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, List *opfamilies, Oid opcintype, Oid collation, Index sortref, Relids rel, bool create_it)
Definition: equivclass.c:587
bool eclass_useful_for_merging(PlannerInfo *root, EquivalenceClass *eclass, RelOptInfo *rel)
Definition: equivclass.c:3147
bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol)
Definition: indxpath.c:3707
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_copy_head(const List *oldlist, int len)
Definition: list.c:1593
List * list_copy(const List *oldlist)
Definition: list.c:1573
bool list_member_ptr(const List *list, const void *datum)
Definition: list.c:682
List * list_concat_unique_ptr(List *list1, const List *list2)
Definition: list.c:1427
void list_free(List *list)
Definition: list.c:1546
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
List * get_mergejoin_opfamilies(Oid opno)
Definition: lsyscache.c:365
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:165
bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy)
Definition: lsyscache.c:206
void op_input_types(Oid opno, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:1335
void pfree(void *pointer)
Definition: mcxt.c:1431
void * palloc(Size size)
Definition: mcxt.c:1201
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:786
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:132
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:93
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:123
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:81
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:223
#define makeNode(_type_)
Definition: nodes.h:155
JoinType
Definition: nodes.h:278
@ JOIN_FULL
Definition: nodes.h:285
@ JOIN_RIGHT
Definition: nodes.h:286
@ JOIN_RIGHT_ANTI
Definition: nodes.h:299
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
bool partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
Definition: partbounds.c:2853
static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:901
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1311
List * get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
Definition: pathkeys.c:487
List * build_expression_pathkey(PlannerInfo *root, Expr *expr, Oid opno, Relids rel, bool create_it)
Definition: pathkeys.c:1017
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey)
Definition: pathkeys.c:2105
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1840
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:637
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:575
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1529
static int group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, List **group_clauses, int num_groupby_pathkeys)
Definition: pathkeys.c:370
static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, bool create_it)
Definition: pathkeys.c:257
bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
Definition: pathkeys.c:2244
List * append_pathkeys(List *target, List *source)
Definition: pathkeys.c:108
List * truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2201
static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2142
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1943
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1644
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1495
static Var * find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
Definition: pathkeys.c:1268
List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir)
Definition: pathkeys.c:757
static int pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2172
static bool partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:861
bool enable_group_by_reordering
Definition: pathkeys.c:33
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Definition: pathkeys.c:160
List * make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, bool *sortable)
Definition: pathkeys.c:1386
PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first)
Definition: pathkeys.c:57
static bool pathkeys_are_duplicate(List *infos, List *pathkeys)
Definition: pathkeys.c:458
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:716
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1349
static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2038
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1071
static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid opfamily, Oid opcintype, Oid collation, bool reverse_sort, bool nulls_first, Index sortref, Relids rel, bool create_it)
Definition: pathkeys.c:199
void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1448
Path * get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction)
Definition: pathkeys.c:683
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:343
PathKeysComparison compare_pathkeys(List *keys1, List *keys2)
Definition: pathkeys.c:304
List * build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, ScanDirection scandir, bool *partialkeys)
Definition: pathkeys.c:936
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:117
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:71
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1390
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:824
CostSelector
Definition: pathnodes.h:37
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1649
PathKeysComparison
Definition: paths.h:198
@ PATHKEYS_BETTER2
Definition: paths.h:201
@ PATHKEYS_BETTER1
Definition: paths.h:200
@ PATHKEYS_DIFFERENT
Definition: paths.h:202
@ PATHKEYS_EQUAL
Definition: paths.h:199
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define foreach_current_index(var_or_cell)
Definition: pg_list.h:403
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define list_make1(x1)
Definition: pg_list.h:212
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define linitial(l)
Definition: pg_list.h:178
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define linitial_oid(l)
Definition: pg_list.h:180
static rewind_source * source
Definition: pg_rewind.c:89
unsigned int Oid
Definition: postgres_ext.h:31
static struct cvec * eclass(struct vars *v, chr c, int cases)
Definition: regc_locale.c:500
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ScanDirection
Definition: sdir.h:25
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
List * ec_opfamilies
Definition: pathnodes.h:1370
Definition: pg_list.h:54
Definition: nodes.h:129
List * pathkeys
Definition: pathnodes.h:1467
List * clauses
Definition: pathnodes.h:1468
bool pk_nulls_first
Definition: pathnodes.h:1458
int pk_strategy
Definition: pathnodes.h:1457
Oid pk_opfamily
Definition: pathnodes.h:1456
List * exprs
Definition: pathnodes.h:1513
List * pathkeys
Definition: pathnodes.h:1645
bool parallel_safe
Definition: pathnodes.h:1635
int num_groupby_pathkeys
Definition: pathnodes.h:392
List * canon_pathkeys
Definition: pathnodes.h:317
bool ec_merging_done
Definition: pathnodes.h:314
List * sort_pathkeys
Definition: pathnodes.h:399
List * group_pathkeys
Definition: pathnodes.h:385
List * processed_groupClause
Definition: pathnodes.h:430
Query * parse
Definition: pathnodes.h:199
List * query_pathkeys
Definition: pathnodes.h:382
List * baserestrictinfo
Definition: pathnodes.h:966
List * joininfo
Definition: pathnodes.h:972
Relids relids
Definition: pathnodes.h:856
struct PathTarget * reltarget
Definition: pathnodes.h:878
Index relid
Definition: pathnodes.h:903
bool has_eclass_joins
Definition: pathnodes.h:974
Bitmapset * live_parts
Definition: pathnodes.h:1020
Expr * clause
Definition: pathnodes.h:2541
Index tleSortGroupRef
Definition: parsenodes.h:1385
Expr * expr
Definition: primnodes.h:1922
AttrNumber resno
Definition: primnodes.h:1924
Definition: primnodes.h:234
AttrNumber varattno
Definition: primnodes.h:246
int varno
Definition: primnodes.h:241
Definition: type.h:95
TargetEntry * get_sortgroupref_tle(Index sortref, List *targetList)
Definition: tlist.c:345
SortGroupClause * get_sortgroupref_clause_noerr(Index sortref, List *clauses)
Definition: tlist.c:443
Node * get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:379