PostgreSQL Source Code  git master
pathkeys.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pathkeys.c
4  * Utilities for matching and building path keys
5  *
6  * See src/backend/optimizer/README for a great deal of information about
7  * the nature and use of path keys.
8  *
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/optimizer/path/pathkeys.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/stratnum.h"
21 #include "catalog/pg_opfamily.h"
22 #include "nodes/nodeFuncs.h"
23 #include "optimizer/cost.h"
24 #include "optimizer/optimizer.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
28 #include "utils/lsyscache.h"
29 
30 /* Consider reordering of GROUP BY keys? */
32 
33 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
35  RelOptInfo *partrel,
36  int partkeycol);
38 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
39 
40 
41 /****************************************************************************
42  * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
43  ****************************************************************************/
44 
45 /*
46  * make_canonical_pathkey
47  * Given the parameters for a PathKey, find any pre-existing matching
48  * pathkey in the query's list of "canonical" pathkeys. Make a new
49  * entry if there's not one already.
50  *
51  * Note that this function must not be used until after we have completed
52  * merging EquivalenceClasses.
53  */
54 PathKey *
56  EquivalenceClass *eclass, Oid opfamily,
57  int strategy, bool nulls_first)
58 {
59  PathKey *pk;
60  ListCell *lc;
61  MemoryContext oldcontext;
62 
63  /* Can't make canonical pathkeys if the set of ECs might still change */
64  if (!root->ec_merging_done)
65  elog(ERROR, "too soon to build canonical pathkeys");
66 
67  /* The passed eclass might be non-canonical, so chase up to the top */
68  while (eclass->ec_merged)
69  eclass = eclass->ec_merged;
70 
71  foreach(lc, root->canon_pathkeys)
72  {
73  pk = (PathKey *) lfirst(lc);
74  if (eclass == pk->pk_eclass &&
75  opfamily == pk->pk_opfamily &&
76  strategy == pk->pk_strategy &&
77  nulls_first == pk->pk_nulls_first)
78  return pk;
79  }
80 
81  /*
82  * Be sure canonical pathkeys are allocated in the main planning context.
83  * Not an issue in normal planning, but it is for GEQO.
84  */
85  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
86 
87  pk = makeNode(PathKey);
88  pk->pk_eclass = eclass;
89  pk->pk_opfamily = opfamily;
90  pk->pk_strategy = strategy;
91  pk->pk_nulls_first = nulls_first;
92 
93  root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
94 
95  MemoryContextSwitchTo(oldcontext);
96 
97  return pk;
98 }
99 
100 /*
101  * append_pathkeys
102  * Append all non-redundant PathKeys in 'source' onto 'target' and
103  * returns the updated 'target' list.
104  */
105 List *
107 {
108  ListCell *lc;
109 
110  Assert(target != NIL);
111 
112  foreach(lc, source)
113  {
114  PathKey *pk = lfirst_node(PathKey, lc);
115 
116  if (!pathkey_is_redundant(pk, target))
117  target = lappend(target, pk);
118  }
119  return target;
120 }
121 
122 /*
123  * pathkey_is_redundant
124  * Is a pathkey redundant with one already in the given list?
125  *
126  * We detect two cases:
127  *
128  * 1. If the new pathkey's equivalence class contains a constant, and isn't
129  * below an outer join, then we can disregard it as a sort key. An example:
130  * SELECT ... WHERE x = 42 ORDER BY x, y;
131  * We may as well just sort by y. Note that because of opfamily matching,
132  * this is semantically correct: we know that the equality constraint is one
133  * that actually binds the variable to a single value in the terms of any
134  * ordering operator that might go with the eclass. This rule not only lets
135  * us simplify (or even skip) explicit sorts, but also allows matching index
136  * sort orders to a query when there are don't-care index columns.
137  *
138  * 2. If the new pathkey's equivalence class is the same as that of any
139  * existing member of the pathkey list, then it is redundant. Some examples:
140  * SELECT ... ORDER BY x, x;
141  * SELECT ... ORDER BY x, x DESC;
142  * SELECT ... WHERE x = y ORDER BY x, y;
143  * In all these cases the second sort key cannot distinguish values that are
144  * considered equal by the first, and so there's no point in using it.
145  * Note in particular that we need not compare opfamily (all the opfamilies
146  * of the EC have the same notion of equality) nor sort direction.
147  *
148  * Both the given pathkey and the list members must be canonical for this
149  * to work properly, but that's okay since we no longer ever construct any
150  * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
151  * canonical includes the additional requirement of no redundant entries,
152  * which is exactly what we are checking for here.)
153  *
154  * Because the equivclass.c machinery forms only one copy of any EC per query,
155  * pointer comparison is enough to decide whether canonical ECs are the same.
156  */
157 static bool
158 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
159 {
160  EquivalenceClass *new_ec = new_pathkey->pk_eclass;
161  ListCell *lc;
162 
163  /* Check for EC containing a constant --- unconditionally redundant */
164  if (EC_MUST_BE_REDUNDANT(new_ec))
165  return true;
166 
167  /* If same EC already used in list, then redundant */
168  foreach(lc, pathkeys)
169  {
170  PathKey *old_pathkey = (PathKey *) lfirst(lc);
171 
172  if (new_ec == old_pathkey->pk_eclass)
173  return true;
174  }
175 
176  return false;
177 }
178 
179 /*
180  * make_pathkey_from_sortinfo
181  * Given an expression and sort-order information, create a PathKey.
182  * The result is always a "canonical" PathKey, but it might be redundant.
183  *
184  * If the PathKey is being generated from a SortGroupClause, sortref should be
185  * the SortGroupClause's SortGroupRef; otherwise zero.
186  *
187  * If rel is not NULL, it identifies a specific relation we're considering
188  * a path for, and indicates that child EC members for that relation can be
189  * considered. Otherwise child members are ignored. (See the comments for
190  * get_eclass_for_sort_expr.)
191  *
192  * create_it is true if we should create any missing EquivalenceClass
193  * needed to represent the sort key. If it's false, we return NULL if the
194  * sort key isn't already present in any EquivalenceClass.
195  */
196 static PathKey *
198  Expr *expr,
199  Oid opfamily,
200  Oid opcintype,
201  Oid collation,
202  bool reverse_sort,
203  bool nulls_first,
204  Index sortref,
205  Relids rel,
206  bool create_it)
207 {
208  int16 strategy;
209  Oid equality_op;
210  List *opfamilies;
212 
213  strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
214 
215  /*
216  * EquivalenceClasses need to contain opfamily lists based on the family
217  * membership of mergejoinable equality operators, which could belong to
218  * more than one opfamily. So we have to look up the opfamily's equality
219  * operator and get its membership.
220  */
221  equality_op = get_opfamily_member(opfamily,
222  opcintype,
223  opcintype,
225  if (!OidIsValid(equality_op)) /* shouldn't happen */
226  elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
227  BTEqualStrategyNumber, opcintype, opcintype, opfamily);
228  opfamilies = get_mergejoin_opfamilies(equality_op);
229  if (!opfamilies) /* certainly should find some */
230  elog(ERROR, "could not find opfamilies for equality operator %u",
231  equality_op);
232 
233  /* Now find or (optionally) create a matching EquivalenceClass */
234  eclass = get_eclass_for_sort_expr(root, expr,
235  opfamilies, opcintype, collation,
236  sortref, rel, create_it);
237 
238  /* Fail if no EC and !create_it */
239  if (!eclass)
240  return NULL;
241 
242  /* And finally we can find or create a PathKey node */
243  return make_canonical_pathkey(root, eclass, opfamily,
244  strategy, nulls_first);
245 }
246 
247 /*
248  * make_pathkey_from_sortop
249  * Like make_pathkey_from_sortinfo, but work from a sort operator.
250  *
251  * This should eventually go away, but we need to restructure SortGroupClause
252  * first.
253  */
254 static PathKey *
256  Expr *expr,
257  Oid ordering_op,
258  bool nulls_first,
259  Index sortref,
260  bool create_it)
261 {
262  Oid opfamily,
263  opcintype,
264  collation;
265  int16 strategy;
266 
267  /* Find the operator in pg_amop --- failure shouldn't happen */
268  if (!get_ordering_op_properties(ordering_op,
269  &opfamily, &opcintype, &strategy))
270  elog(ERROR, "operator %u is not a valid ordering operator",
271  ordering_op);
272 
273  /* Because SortGroupClause doesn't carry collation, consult the expr */
274  collation = exprCollation((Node *) expr);
275 
276  return make_pathkey_from_sortinfo(root,
277  expr,
278  opfamily,
279  opcintype,
280  collation,
281  (strategy == BTGreaterStrategyNumber),
282  nulls_first,
283  sortref,
284  NULL,
285  create_it);
286 }
287 
288 
289 /****************************************************************************
290  * PATHKEY COMPARISONS
291  ****************************************************************************/
292 
293 /*
294  * compare_pathkeys
295  * Compare two pathkeys to see if they are equivalent, and if not whether
296  * one is "better" than the other.
297  *
298  * We assume the pathkeys are canonical, and so they can be checked for
299  * equality by simple pointer comparison.
300  */
302 compare_pathkeys(List *keys1, List *keys2)
303 {
304  ListCell *key1,
305  *key2;
306 
307  /*
308  * Fall out quickly if we are passed two identical lists. This mostly
309  * catches the case where both are NIL, but that's common enough to
310  * warrant the test.
311  */
312  if (keys1 == keys2)
313  return PATHKEYS_EQUAL;
314 
315  forboth(key1, keys1, key2, keys2)
316  {
317  PathKey *pathkey1 = (PathKey *) lfirst(key1);
318  PathKey *pathkey2 = (PathKey *) lfirst(key2);
319 
320  if (pathkey1 != pathkey2)
321  return PATHKEYS_DIFFERENT; /* no need to keep looking */
322  }
323 
324  /*
325  * If we reached the end of only one list, the other is longer and
326  * therefore not a subset.
327  */
328  if (key1 != NULL)
329  return PATHKEYS_BETTER1; /* key1 is longer */
330  if (key2 != NULL)
331  return PATHKEYS_BETTER2; /* key2 is longer */
332  return PATHKEYS_EQUAL;
333 }
334 
335 /*
336  * pathkeys_contained_in
337  * Common special case of compare_pathkeys: we just want to know
338  * if keys2 are at least as well sorted as keys1.
339  */
340 bool
342 {
343  switch (compare_pathkeys(keys1, keys2))
344  {
345  case PATHKEYS_EQUAL:
346  case PATHKEYS_BETTER2:
347  return true;
348  default:
349  break;
350  }
351  return false;
352 }
353 
354 /*
355  * group_keys_reorder_by_pathkeys
356  * Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
357  *
358  * 'pathkeys' is an input list of pathkeys
359  * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
360  * reorder. The pointers are redirected to new lists, original lists
361  * stay untouched.
362  * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
363  * search matching pathkeys.
364  *
365  * Returns the number of GROUP BY keys with a matching pathkey.
366  */
367 static int
368 group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
369  List **group_clauses,
370  int num_groupby_pathkeys)
371 {
372  List *new_group_pathkeys = NIL,
373  *new_group_clauses = NIL;
374  List *grouping_pathkeys;
375  ListCell *lc;
376  int n;
377 
378  if (pathkeys == NIL || *group_pathkeys == NIL)
379  return 0;
380 
381  /*
382  * We're going to search within just the first num_groupby_pathkeys of
383  * *group_pathkeys. The thing is that root->group_pathkeys is passed as
384  * *group_pathkeys containing grouping pathkeys altogether with aggregate
385  * pathkeys. If we process aggregate pathkeys we could get an invalid
386  * result of get_sortgroupref_clause_noerr(), because their
387  * pathkey->pk_eclass->ec_sortref doesn't referece query targetlist. So,
388  * we allocate a separate list of pathkeys for lookups.
389  */
390  grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
391 
392  /*
393  * Walk the pathkeys (determining ordering of the input path) and see if
394  * there's a matching GROUP BY key. If we find one, we append it to the
395  * list, and do the same for the clauses.
396  *
397  * Once we find the first pathkey without a matching GROUP BY key, the
398  * rest of the pathkeys are useless and can't be used to evaluate the
399  * grouping, so we abort the loop and ignore the remaining pathkeys.
400  */
401  foreach(lc, pathkeys)
402  {
403  PathKey *pathkey = (PathKey *) lfirst(lc);
404  SortGroupClause *sgc;
405 
406  /*
407  * Pathkeys are built in a way that allows simply comparing pointers.
408  * Give up if we can't find the matching pointer. Also give up if
409  * there is no sortclause reference for some reason.
410  */
411  if (foreach_current_index(lc) >= num_groupby_pathkeys ||
412  !list_member_ptr(grouping_pathkeys, pathkey) ||
413  pathkey->pk_eclass->ec_sortref == 0)
414  break;
415 
416  /*
417  * Since 1349d27 pathkey coming from underlying node can be in the
418  * root->group_pathkeys but not in the processed_groupClause. So, we
419  * should be careful here.
420  */
421  sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
422  *group_clauses);
423  if (!sgc)
424  /* The grouping clause does not cover this pathkey */
425  break;
426 
427  /*
428  * Sort group clause should have an ordering operator as long as there
429  * is an associated pathkey.
430  */
431  Assert(OidIsValid(sgc->sortop));
432 
433  new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
434  new_group_clauses = lappend(new_group_clauses, sgc);
435  }
436 
437  /* remember the number of pathkeys with a matching GROUP BY key */
438  n = list_length(new_group_pathkeys);
439 
440  /* append the remaining group pathkeys (will be treated as not sorted) */
441  *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
442  *group_pathkeys);
443  *group_clauses = list_concat_unique_ptr(new_group_clauses,
444  *group_clauses);
445 
446  list_free(grouping_pathkeys);
447  return n;
448 }
449 
450 /*
451  * pathkeys_are_duplicate
452  * Check if give pathkeys are already contained the list of
453  * PathKeyInfo's.
454  */
455 static bool
456 pathkeys_are_duplicate(List *infos, List *pathkeys)
457 {
458  ListCell *lc;
459 
460  foreach(lc, infos)
461  {
462  PathKeyInfo *info = lfirst_node(PathKeyInfo, lc);
463 
464  if (compare_pathkeys(pathkeys, info->pathkeys) == PATHKEYS_EQUAL)
465  return true;
466  }
467  return false;
468 }
469 
470 /*
471  * get_useful_group_keys_orderings
472  * Determine which orderings of GROUP BY keys are potentially interesting.
473  *
474  * Returns a list of PathKeyInfo items, each representing an interesting
475  * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
476  * matching order.
477  *
478  * The function considers (and keeps) multiple GROUP BY orderings:
479  *
480  * - the original ordering, as specified by the GROUP BY clause,
481  * - GROUP BY keys reordered to match 'path' ordering (as much as possible),
482  * - GROUP BY keys to match target ORDER BY clause (as much as possible).
483  */
484 List *
486 {
487  Query *parse = root->parse;
488  List *infos = NIL;
489  PathKeyInfo *info;
490 
491  List *pathkeys = root->group_pathkeys;
492  List *clauses = root->processed_groupClause;
493 
494  /* always return at least the original pathkeys/clauses */
495  info = makeNode(PathKeyInfo);
496  info->pathkeys = pathkeys;
497  info->clauses = clauses;
498  infos = lappend(infos, info);
499 
500  /*
501  * Should we try generating alternative orderings of the group keys? If
502  * not, we produce only the order specified in the query, i.e. the
503  * optimization is effectively disabled.
504  */
506  return infos;
507 
508  /*
509  * Grouping sets have own and more complex logic to decide the ordering.
510  */
511  if (parse->groupingSets)
512  return infos;
513 
514  /*
515  * If the path is sorted in some way, try reordering the group keys to
516  * match the path as much of the ordering as possible. Then thanks to
517  * incremental sort we would get this sort as cheap as possible.
518  */
519  if (path->pathkeys &&
521  {
522  int n;
523 
524  n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
525  root->num_groupby_pathkeys);
526 
527  if (n > 0 &&
529  !pathkeys_are_duplicate(infos, pathkeys))
530  {
531  info = makeNode(PathKeyInfo);
532  info->pathkeys = pathkeys;
533  info->clauses = clauses;
534 
535  infos = lappend(infos, info);
536  }
537  }
538 
539  /*
540  * Try reordering pathkeys to minimize the sort cost (this time consider
541  * the ORDER BY clause).
542  */
543  if (root->sort_pathkeys &&
545  {
546  int n;
547 
548  n = group_keys_reorder_by_pathkeys(root->sort_pathkeys, &pathkeys,
549  &clauses,
550  root->num_groupby_pathkeys);
551 
552  if (n > 0 &&
554  !pathkeys_are_duplicate(infos, pathkeys))
555  {
556  info = makeNode(PathKeyInfo);
557  info->pathkeys = pathkeys;
558  info->clauses = clauses;
559 
560  infos = lappend(infos, info);
561  }
562  }
563 
564  return infos;
565 }
566 
567 /*
568  * pathkeys_count_contained_in
569  * Same as pathkeys_contained_in, but also sets length of longest
570  * common prefix of keys1 and keys2.
571  */
572 bool
573 pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
574 {
575  int n = 0;
576  ListCell *key1,
577  *key2;
578 
579  /*
580  * See if we can avoiding looping through both lists. This optimization
581  * gains us several percent in planning time in a worst-case test.
582  */
583  if (keys1 == keys2)
584  {
585  *n_common = list_length(keys1);
586  return true;
587  }
588  else if (keys1 == NIL)
589  {
590  *n_common = 0;
591  return true;
592  }
593  else if (keys2 == NIL)
594  {
595  *n_common = 0;
596  return false;
597  }
598 
599  /*
600  * If both lists are non-empty, iterate through both to find out how many
601  * items are shared.
602  */
603  forboth(key1, keys1, key2, keys2)
604  {
605  PathKey *pathkey1 = (PathKey *) lfirst(key1);
606  PathKey *pathkey2 = (PathKey *) lfirst(key2);
607 
608  if (pathkey1 != pathkey2)
609  {
610  *n_common = n;
611  return false;
612  }
613  n++;
614  }
615 
616  /* If we ended with a null value, then we've processed the whole list. */
617  *n_common = n;
618  return (key1 == NULL);
619 }
620 
621 /*
622  * get_cheapest_path_for_pathkeys
623  * Find the cheapest path (according to the specified criterion) that
624  * satisfies the given pathkeys and parameterization, and is parallel-safe
625  * if required.
626  * Return NULL if no such path.
627  *
628  * 'paths' is a list of possible paths that all generate the same relation
629  * 'pathkeys' represents a required ordering (in canonical form!)
630  * 'required_outer' denotes allowable outer relations for parameterized paths
631  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
632  * 'require_parallel_safe' causes us to consider only parallel-safe paths
633  */
634 Path *
636  Relids required_outer,
637  CostSelector cost_criterion,
638  bool require_parallel_safe)
639 {
640  Path *matched_path = NULL;
641  ListCell *l;
642 
643  foreach(l, paths)
644  {
645  Path *path = (Path *) lfirst(l);
646 
647  /* If required, reject paths that are not parallel-safe */
648  if (require_parallel_safe && !path->parallel_safe)
649  continue;
650 
651  /*
652  * Since cost comparison is a lot cheaper than pathkey comparison, do
653  * that first. (XXX is that still true?)
654  */
655  if (matched_path != NULL &&
656  compare_path_costs(matched_path, path, cost_criterion) <= 0)
657  continue;
658 
659  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
660  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
661  matched_path = path;
662  }
663  return matched_path;
664 }
665 
666 /*
667  * get_cheapest_fractional_path_for_pathkeys
668  * Find the cheapest path (for retrieving a specified fraction of all
669  * the tuples) that satisfies the given pathkeys and parameterization.
670  * Return NULL if no such path.
671  *
672  * See compare_fractional_path_costs() for the interpretation of the fraction
673  * parameter.
674  *
675  * 'paths' is a list of possible paths that all generate the same relation
676  * 'pathkeys' represents a required ordering (in canonical form!)
677  * 'required_outer' denotes allowable outer relations for parameterized paths
678  * 'fraction' is the fraction of the total tuples expected to be retrieved
679  */
680 Path *
682  List *pathkeys,
683  Relids required_outer,
684  double fraction)
685 {
686  Path *matched_path = NULL;
687  ListCell *l;
688 
689  foreach(l, paths)
690  {
691  Path *path = (Path *) lfirst(l);
692 
693  /*
694  * Since cost comparison is a lot cheaper than pathkey comparison, do
695  * that first. (XXX is that still true?)
696  */
697  if (matched_path != NULL &&
698  compare_fractional_path_costs(matched_path, path, fraction) <= 0)
699  continue;
700 
701  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
702  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
703  matched_path = path;
704  }
705  return matched_path;
706 }
707 
708 
709 /*
710  * get_cheapest_parallel_safe_total_inner
711  * Find the unparameterized parallel-safe path with the least total cost.
712  */
713 Path *
715 {
716  ListCell *l;
717 
718  foreach(l, paths)
719  {
720  Path *innerpath = (Path *) lfirst(l);
721 
722  if (innerpath->parallel_safe &&
723  bms_is_empty(PATH_REQ_OUTER(innerpath)))
724  return innerpath;
725  }
726 
727  return NULL;
728 }
729 
730 /****************************************************************************
731  * NEW PATHKEY FORMATION
732  ****************************************************************************/
733 
734 /*
735  * build_index_pathkeys
736  * Build a pathkeys list that describes the ordering induced by an index
737  * scan using the given index. (Note that an unordered index doesn't
738  * induce any ordering, so we return NIL.)
739  *
740  * If 'scandir' is BackwardScanDirection, build pathkeys representing a
741  * backwards scan of the index.
742  *
743  * We iterate only key columns of covering indexes, since non-key columns
744  * don't influence index ordering. The result is canonical, meaning that
745  * redundant pathkeys are removed; it may therefore have fewer entries than
746  * there are key columns in the index.
747  *
748  * Another reason for stopping early is that we may be able to tell that
749  * an index column's sort order is uninteresting for this query. However,
750  * that test is just based on the existence of an EquivalenceClass and not
751  * on position in pathkey lists, so it's not complete. Caller should call
752  * truncate_useless_pathkeys() to possibly remove more pathkeys.
753  */
754 List *
757  ScanDirection scandir)
758 {
759  List *retval = NIL;
760  ListCell *lc;
761  int i;
762 
763  if (index->sortopfamily == NULL)
764  return NIL; /* non-orderable index */
765 
766  i = 0;
767  foreach(lc, index->indextlist)
768  {
769  TargetEntry *indextle = (TargetEntry *) lfirst(lc);
770  Expr *indexkey;
771  bool reverse_sort;
772  bool nulls_first;
773  PathKey *cpathkey;
774 
775  /*
776  * INCLUDE columns are stored in index unordered, so they don't
777  * support ordered index scan.
778  */
779  if (i >= index->nkeycolumns)
780  break;
781 
782  /* We assume we don't need to make a copy of the tlist item */
783  indexkey = indextle->expr;
784 
785  if (ScanDirectionIsBackward(scandir))
786  {
787  reverse_sort = !index->reverse_sort[i];
788  nulls_first = !index->nulls_first[i];
789  }
790  else
791  {
792  reverse_sort = index->reverse_sort[i];
793  nulls_first = index->nulls_first[i];
794  }
795 
796  /*
797  * OK, try to make a canonical pathkey for this sort key.
798  */
799  cpathkey = make_pathkey_from_sortinfo(root,
800  indexkey,
801  index->sortopfamily[i],
802  index->opcintype[i],
803  index->indexcollations[i],
804  reverse_sort,
805  nulls_first,
806  0,
807  index->rel->relids,
808  false);
809 
810  if (cpathkey)
811  {
812  /*
813  * We found the sort key in an EquivalenceClass, so it's relevant
814  * for this query. Add it to list, unless it's redundant.
815  */
816  if (!pathkey_is_redundant(cpathkey, retval))
817  retval = lappend(retval, cpathkey);
818  }
819  else
820  {
821  /*
822  * Boolean index keys might be redundant even if they do not
823  * appear in an EquivalenceClass, because of our special treatment
824  * of boolean equality conditions --- see the comment for
825  * indexcol_is_bool_constant_for_query(). If that applies, we can
826  * continue to examine lower-order index columns. Otherwise, the
827  * sort key is not an interesting sort order for this query, so we
828  * should stop considering index columns; any lower-order sort
829  * keys won't be useful either.
830  */
832  break;
833  }
834 
835  i++;
836  }
837 
838  return retval;
839 }
840 
841 /*
842  * partkey_is_bool_constant_for_query
843  *
844  * If a partition key column is constrained to have a constant value by the
845  * query's WHERE conditions, then it's irrelevant for sort-order
846  * considerations. Usually that means we have a restriction clause
847  * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
848  * containing a constant, which is recognized as redundant by
849  * build_partition_pathkeys(). But if the partition key column is a
850  * boolean variable (or expression), then we are not going to see such a
851  * WHERE clause, because expression preprocessing will have simplified it
852  * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
853  * to have a matching EquivalenceClass (unless the query also contains
854  * "ORDER BY partkeycol"). To allow such cases to work the same as they would
855  * for non-boolean values, this function is provided to detect whether the
856  * specified partition key column matches a boolean restriction clause.
857  */
858 static bool
860 {
861  PartitionScheme partscheme = partrel->part_scheme;
862  ListCell *lc;
863 
864  /*
865  * If the partkey isn't boolean, we can't possibly get a match.
866  *
867  * Partitioning currently can only use built-in AMs, so checking for
868  * built-in boolean opfamilies is good enough.
869  */
870  if (!IsBuiltinBooleanOpfamily(partscheme->partopfamily[partkeycol]))
871  return false;
872 
873  /* Check each restriction clause for the partitioned rel */
874  foreach(lc, partrel->baserestrictinfo)
875  {
876  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
877 
878  /* Ignore pseudoconstant quals, they won't match */
879  if (rinfo->pseudoconstant)
880  continue;
881 
882  /* See if we can match the clause's expression to the partkey column */
883  if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
884  return true;
885  }
886 
887  return false;
888 }
889 
890 /*
891  * matches_boolean_partition_clause
892  * Determine if the boolean clause described by rinfo matches
893  * partrel's partkeycol-th partition key column.
894  *
895  * "Matches" can be either an exact match (equivalent to partkey = true),
896  * or a NOT above an exact match (equivalent to partkey = false).
897  */
898 static bool
900  RelOptInfo *partrel, int partkeycol)
901 {
902  Node *clause = (Node *) rinfo->clause;
903  Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
904 
905  /* Direct match? */
906  if (equal(partexpr, clause))
907  return true;
908  /* NOT clause? */
909  else if (is_notclause(clause))
910  {
911  Node *arg = (Node *) get_notclausearg((Expr *) clause);
912 
913  if (equal(partexpr, arg))
914  return true;
915  }
916 
917  return false;
918 }
919 
920 /*
921  * build_partition_pathkeys
922  * Build a pathkeys list that describes the ordering induced by the
923  * partitions of partrel, under either forward or backward scan
924  * as per scandir.
925  *
926  * Caller must have checked that the partitions are properly ordered,
927  * as detected by partitions_are_ordered().
928  *
929  * Sets *partialkeys to true if pathkeys were only built for a prefix of the
930  * partition key, or false if the pathkeys include all columns of the
931  * partition key.
932  */
933 List *
935  ScanDirection scandir, bool *partialkeys)
936 {
937  List *retval = NIL;
938  PartitionScheme partscheme = partrel->part_scheme;
939  int i;
940 
941  Assert(partscheme != NULL);
942  Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
943  /* For now, we can only cope with baserels */
944  Assert(IS_SIMPLE_REL(partrel));
945 
946  for (i = 0; i < partscheme->partnatts; i++)
947  {
948  PathKey *cpathkey;
949  Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
950 
951  /*
952  * Try to make a canonical pathkey for this partkey.
953  *
954  * We assume the PartitionDesc lists any NULL partition last, so we
955  * treat the scan like a NULLS LAST index: we have nulls_first for
956  * backwards scan only.
957  */
958  cpathkey = make_pathkey_from_sortinfo(root,
959  keyCol,
960  partscheme->partopfamily[i],
961  partscheme->partopcintype[i],
962  partscheme->partcollation[i],
963  ScanDirectionIsBackward(scandir),
964  ScanDirectionIsBackward(scandir),
965  0,
966  partrel->relids,
967  false);
968 
969 
970  if (cpathkey)
971  {
972  /*
973  * We found the sort key in an EquivalenceClass, so it's relevant
974  * for this query. Add it to list, unless it's redundant.
975  */
976  if (!pathkey_is_redundant(cpathkey, retval))
977  retval = lappend(retval, cpathkey);
978  }
979  else
980  {
981  /*
982  * Boolean partition keys might be redundant even if they do not
983  * appear in an EquivalenceClass, because of our special treatment
984  * of boolean equality conditions --- see the comment for
985  * partkey_is_bool_constant_for_query(). If that applies, we can
986  * continue to examine lower-order partition keys. Otherwise, the
987  * sort key is not an interesting sort order for this query, so we
988  * should stop considering partition columns; any lower-order sort
989  * keys won't be useful either.
990  */
991  if (!partkey_is_bool_constant_for_query(partrel, i))
992  {
993  *partialkeys = true;
994  return retval;
995  }
996  }
997  }
998 
999  *partialkeys = false;
1000  return retval;
1001 }
1002 
1003 /*
1004  * build_expression_pathkey
1005  * Build a pathkeys list that describes an ordering by a single expression
1006  * using the given sort operator.
1007  *
1008  * expr and rel are as for make_pathkey_from_sortinfo.
1009  * We induce the other arguments assuming default sort order for the operator.
1010  *
1011  * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
1012  * is false and the expression isn't already in some EquivalenceClass.
1013  */
1014 List *
1016  Expr *expr,
1017  Oid opno,
1018  Relids rel,
1019  bool create_it)
1020 {
1021  List *pathkeys;
1022  Oid opfamily,
1023  opcintype;
1024  int16 strategy;
1025  PathKey *cpathkey;
1026 
1027  /* Find the operator in pg_amop --- failure shouldn't happen */
1028  if (!get_ordering_op_properties(opno,
1029  &opfamily, &opcintype, &strategy))
1030  elog(ERROR, "operator %u is not a valid ordering operator",
1031  opno);
1032 
1033  cpathkey = make_pathkey_from_sortinfo(root,
1034  expr,
1035  opfamily,
1036  opcintype,
1037  exprCollation((Node *) expr),
1038  (strategy == BTGreaterStrategyNumber),
1039  (strategy == BTGreaterStrategyNumber),
1040  0,
1041  rel,
1042  create_it);
1043 
1044  if (cpathkey)
1045  pathkeys = list_make1(cpathkey);
1046  else
1047  pathkeys = NIL;
1048 
1049  return pathkeys;
1050 }
1051 
1052 /*
1053  * convert_subquery_pathkeys
1054  * Build a pathkeys list that describes the ordering of a subquery's
1055  * result, in the terms of the outer query. This is essentially a
1056  * task of conversion.
1057  *
1058  * 'rel': outer query's RelOptInfo for the subquery relation.
1059  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
1060  * 'subquery_tlist': the subquery's output targetlist, in its terms.
1061  *
1062  * We intentionally don't do truncate_useless_pathkeys() here, because there
1063  * are situations where seeing the raw ordering of the subquery is helpful.
1064  * For example, if it returns ORDER BY x DESC, that may prompt us to
1065  * construct a mergejoin using DESC order rather than ASC order; but the
1066  * right_merge_direction heuristic would have us throw the knowledge away.
1067  */
1068 List *
1070  List *subquery_pathkeys,
1071  List *subquery_tlist)
1072 {
1073  List *retval = NIL;
1074  int retvallen = 0;
1075  int outer_query_keys = list_length(root->query_pathkeys);
1076  ListCell *i;
1077 
1078  foreach(i, subquery_pathkeys)
1079  {
1080  PathKey *sub_pathkey = (PathKey *) lfirst(i);
1081  EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
1082  PathKey *best_pathkey = NULL;
1083 
1084  if (sub_eclass->ec_has_volatile)
1085  {
1086  /*
1087  * If the sub_pathkey's EquivalenceClass is volatile, then it must
1088  * have come from an ORDER BY clause, and we have to match it to
1089  * that same targetlist entry.
1090  */
1091  TargetEntry *tle;
1092  Var *outer_var;
1093 
1094  if (sub_eclass->ec_sortref == 0) /* can't happen */
1095  elog(ERROR, "volatile EquivalenceClass has no sortref");
1096  tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
1097  Assert(tle);
1098  /* Is TLE actually available to the outer query? */
1099  outer_var = find_var_for_subquery_tle(rel, tle);
1100  if (outer_var)
1101  {
1102  /* We can represent this sub_pathkey */
1103  EquivalenceMember *sub_member;
1104  EquivalenceClass *outer_ec;
1105 
1106  Assert(list_length(sub_eclass->ec_members) == 1);
1107  sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
1108 
1109  /*
1110  * Note: it might look funny to be setting sortref = 0 for a
1111  * reference to a volatile sub_eclass. However, the
1112  * expression is *not* volatile in the outer query: it's just
1113  * a Var referencing whatever the subquery emitted. (IOW, the
1114  * outer query isn't going to re-execute the volatile
1115  * expression itself.) So this is okay.
1116  */
1117  outer_ec =
1119  (Expr *) outer_var,
1120  sub_eclass->ec_opfamilies,
1121  sub_member->em_datatype,
1122  sub_eclass->ec_collation,
1123  0,
1124  rel->relids,
1125  false);
1126 
1127  /*
1128  * If we don't find a matching EC, sub-pathkey isn't
1129  * interesting to the outer query
1130  */
1131  if (outer_ec)
1132  best_pathkey =
1134  outer_ec,
1135  sub_pathkey->pk_opfamily,
1136  sub_pathkey->pk_strategy,
1137  sub_pathkey->pk_nulls_first);
1138  }
1139  }
1140  else
1141  {
1142  /*
1143  * Otherwise, the sub_pathkey's EquivalenceClass could contain
1144  * multiple elements (representing knowledge that multiple items
1145  * are effectively equal). Each element might match none, one, or
1146  * more of the output columns that are visible to the outer query.
1147  * This means we may have multiple possible representations of the
1148  * sub_pathkey in the context of the outer query. Ideally we
1149  * would generate them all and put them all into an EC of the
1150  * outer query, thereby propagating equality knowledge up to the
1151  * outer query. Right now we cannot do so, because the outer
1152  * query's EquivalenceClasses are already frozen when this is
1153  * called. Instead we prefer the one that has the highest "score"
1154  * (number of EC peers, plus one if it matches the outer
1155  * query_pathkeys). This is the most likely to be useful in the
1156  * outer query.
1157  */
1158  int best_score = -1;
1159  ListCell *j;
1160 
1161  foreach(j, sub_eclass->ec_members)
1162  {
1163  EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
1164  Expr *sub_expr = sub_member->em_expr;
1165  Oid sub_expr_type = sub_member->em_datatype;
1166  Oid sub_expr_coll = sub_eclass->ec_collation;
1167  ListCell *k;
1168 
1169  if (sub_member->em_is_child)
1170  continue; /* ignore children here */
1171 
1172  foreach(k, subquery_tlist)
1173  {
1174  TargetEntry *tle = (TargetEntry *) lfirst(k);
1175  Var *outer_var;
1176  Expr *tle_expr;
1177  EquivalenceClass *outer_ec;
1178  PathKey *outer_pk;
1179  int score;
1180 
1181  /* Is TLE actually available to the outer query? */
1182  outer_var = find_var_for_subquery_tle(rel, tle);
1183  if (!outer_var)
1184  continue;
1185 
1186  /*
1187  * The targetlist entry is considered to match if it
1188  * matches after sort-key canonicalization. That is
1189  * needed since the sub_expr has been through the same
1190  * process.
1191  */
1192  tle_expr = canonicalize_ec_expression(tle->expr,
1193  sub_expr_type,
1194  sub_expr_coll);
1195  if (!equal(tle_expr, sub_expr))
1196  continue;
1197 
1198  /* See if we have a matching EC for the TLE */
1199  outer_ec = get_eclass_for_sort_expr(root,
1200  (Expr *) outer_var,
1201  sub_eclass->ec_opfamilies,
1202  sub_expr_type,
1203  sub_expr_coll,
1204  0,
1205  rel->relids,
1206  false);
1207 
1208  /*
1209  * If we don't find a matching EC, this sub-pathkey isn't
1210  * interesting to the outer query
1211  */
1212  if (!outer_ec)
1213  continue;
1214 
1215  outer_pk = make_canonical_pathkey(root,
1216  outer_ec,
1217  sub_pathkey->pk_opfamily,
1218  sub_pathkey->pk_strategy,
1219  sub_pathkey->pk_nulls_first);
1220  /* score = # of equivalence peers */
1221  score = list_length(outer_ec->ec_members) - 1;
1222  /* +1 if it matches the proper query_pathkeys item */
1223  if (retvallen < outer_query_keys &&
1224  list_nth(root->query_pathkeys, retvallen) == outer_pk)
1225  score++;
1226  if (score > best_score)
1227  {
1228  best_pathkey = outer_pk;
1229  best_score = score;
1230  }
1231  }
1232  }
1233  }
1234 
1235  /*
1236  * If we couldn't find a representation of this sub_pathkey, we're
1237  * done (we can't use the ones to its right, either).
1238  */
1239  if (!best_pathkey)
1240  break;
1241 
1242  /*
1243  * Eliminate redundant ordering info; could happen if outer query
1244  * equivalences subquery keys...
1245  */
1246  if (!pathkey_is_redundant(best_pathkey, retval))
1247  {
1248  retval = lappend(retval, best_pathkey);
1249  retvallen++;
1250  }
1251  }
1252 
1253  return retval;
1254 }
1255 
1256 /*
1257  * find_var_for_subquery_tle
1258  *
1259  * If the given subquery tlist entry is due to be emitted by the subquery's
1260  * scan node, return a Var for it, else return NULL.
1261  *
1262  * We need this to ensure that we don't return pathkeys describing values
1263  * that are unavailable above the level of the subquery scan.
1264  */
1265 static Var *
1267 {
1268  ListCell *lc;
1269 
1270  /* If the TLE is resjunk, it's certainly not visible to the outer query */
1271  if (tle->resjunk)
1272  return NULL;
1273 
1274  /* Search the rel's targetlist to see what it will return */
1275  foreach(lc, rel->reltarget->exprs)
1276  {
1277  Var *var = (Var *) lfirst(lc);
1278 
1279  /* Ignore placeholders */
1280  if (!IsA(var, Var))
1281  continue;
1282  Assert(var->varno == rel->relid);
1283 
1284  /* If we find a Var referencing this TLE, we're good */
1285  if (var->varattno == tle->resno)
1286  return copyObject(var); /* Make a copy for safety */
1287  }
1288  return NULL;
1289 }
1290 
1291 /*
1292  * build_join_pathkeys
1293  * Build the path keys for a join relation constructed by mergejoin or
1294  * nestloop join. This is normally the same as the outer path's keys.
1295  *
1296  * EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
1297  * result as having the outer path's path keys, because null lefthand rows
1298  * may be inserted at random points. It must be treated as unsorted.
1299  *
1300  * We truncate away any pathkeys that are uninteresting for higher joins.
1301  *
1302  * 'joinrel' is the join relation that paths are being formed for
1303  * 'jointype' is the join type (inner, left, full, etc)
1304  * 'outer_pathkeys' is the list of the current outer path's path keys
1305  *
1306  * Returns the list of new path keys.
1307  */
1308 List *
1310  RelOptInfo *joinrel,
1311  JoinType jointype,
1312  List *outer_pathkeys)
1313 {
1314  if (jointype == JOIN_FULL ||
1315  jointype == JOIN_RIGHT ||
1316  jointype == JOIN_RIGHT_ANTI)
1317  return NIL;
1318 
1319  /*
1320  * This used to be quite a complex bit of code, but now that all pathkey
1321  * sublists start out life canonicalized, we don't have to do a darn thing
1322  * here!
1323  *
1324  * We do, however, need to truncate the pathkeys list, since it may
1325  * contain pathkeys that were useful for forming this joinrel but are
1326  * uninteresting to higher levels.
1327  */
1328  return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
1329 }
1330 
1331 /****************************************************************************
1332  * PATHKEYS AND SORT CLAUSES
1333  ****************************************************************************/
1334 
1335 /*
1336  * make_pathkeys_for_sortclauses
1337  * Generate a pathkeys list that represents the sort order specified
1338  * by a list of SortGroupClauses
1339  *
1340  * The resulting PathKeys are always in canonical form. (Actually, there
1341  * is no longer any code anywhere that creates non-canonical PathKeys.)
1342  *
1343  * 'sortclauses' is a list of SortGroupClause nodes
1344  * 'tlist' is the targetlist to find the referenced tlist entries in
1345  */
1346 List *
1348  List *sortclauses,
1349  List *tlist)
1350 {
1351  List *result;
1352  bool sortable;
1353 
1355  &sortclauses,
1356  tlist,
1357  false,
1358  &sortable);
1359  /* It's caller error if not all clauses were sortable */
1360  Assert(sortable);
1361  return result;
1362 }
1363 
1364 /*
1365  * make_pathkeys_for_sortclauses_extended
1366  * Generate a pathkeys list that represents the sort order specified
1367  * by a list of SortGroupClauses
1368  *
1369  * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
1370  *
1371  * If remove_redundant is true, then any sort clauses that are found to
1372  * give rise to redundant pathkeys are removed from the sortclauses list
1373  * (which therefore must be pass-by-reference in this version).
1374  *
1375  * *sortable is set to true if all the sort clauses are in fact sortable.
1376  * If any are not, they are ignored except for setting *sortable false.
1377  * (In that case, the output pathkey list isn't really useful. However,
1378  * we process the whole sortclauses list anyway, because it's still valid
1379  * to remove any clauses that can be proven redundant via the eclass logic.
1380  * Even though we'll have to hash in that case, we might as well not hash
1381  * redundant columns.)
1382  */
1383 List *
1385  List **sortclauses,
1386  List *tlist,
1387  bool remove_redundant,
1388  bool *sortable)
1389 {
1390  List *pathkeys = NIL;
1391  ListCell *l;
1392 
1393  *sortable = true;
1394  foreach(l, *sortclauses)
1395  {
1396  SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
1397  Expr *sortkey;
1398  PathKey *pathkey;
1399 
1400  sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1401  if (!OidIsValid(sortcl->sortop))
1402  {
1403  *sortable = false;
1404  continue;
1405  }
1406  pathkey = make_pathkey_from_sortop(root,
1407  sortkey,
1408  sortcl->sortop,
1409  sortcl->nulls_first,
1410  sortcl->tleSortGroupRef,
1411  true);
1412 
1413  /* Canonical form eliminates redundant ordering keys */
1414  if (!pathkey_is_redundant(pathkey, pathkeys))
1415  pathkeys = lappend(pathkeys, pathkey);
1416  else if (remove_redundant)
1417  *sortclauses = foreach_delete_current(*sortclauses, l);
1418  }
1419  return pathkeys;
1420 }
1421 
1422 /****************************************************************************
1423  * PATHKEYS AND MERGECLAUSES
1424  ****************************************************************************/
1425 
1426 /*
1427  * initialize_mergeclause_eclasses
1428  * Set the EquivalenceClass links in a mergeclause restrictinfo.
1429  *
1430  * RestrictInfo contains fields in which we may cache pointers to
1431  * EquivalenceClasses for the left and right inputs of the mergeclause.
1432  * (If the mergeclause is a true equivalence clause these will be the
1433  * same EquivalenceClass, otherwise not.) If the mergeclause is either
1434  * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
1435  * then it's easy to set up the left_ec and right_ec members --- otherwise,
1436  * this function should be called to set them up. We will generate new
1437  * EquivalenceClauses if necessary to represent the mergeclause's left and
1438  * right sides.
1439  *
1440  * Note this is called before EC merging is complete, so the links won't
1441  * necessarily point to canonical ECs. Before they are actually used for
1442  * anything, update_mergeclause_eclasses must be called to ensure that
1443  * they've been updated to point to canonical ECs.
1444  */
1445 void
1447 {
1448  Expr *clause = restrictinfo->clause;
1449  Oid lefttype,
1450  righttype;
1451 
1452  /* Should be a mergeclause ... */
1453  Assert(restrictinfo->mergeopfamilies != NIL);
1454  /* ... with links not yet set */
1455  Assert(restrictinfo->left_ec == NULL);
1456  Assert(restrictinfo->right_ec == NULL);
1457 
1458  /* Need the declared input types of the operator */
1459  op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
1460 
1461  /* Find or create a matching EquivalenceClass for each side */
1462  restrictinfo->left_ec =
1464  (Expr *) get_leftop(clause),
1465  restrictinfo->mergeopfamilies,
1466  lefttype,
1467  ((OpExpr *) clause)->inputcollid,
1468  0,
1469  NULL,
1470  true);
1471  restrictinfo->right_ec =
1473  (Expr *) get_rightop(clause),
1474  restrictinfo->mergeopfamilies,
1475  righttype,
1476  ((OpExpr *) clause)->inputcollid,
1477  0,
1478  NULL,
1479  true);
1480 }
1481 
1482 /*
1483  * update_mergeclause_eclasses
1484  * Make the cached EquivalenceClass links valid in a mergeclause
1485  * restrictinfo.
1486  *
1487  * These pointers should have been set by process_equivalence or
1488  * initialize_mergeclause_eclasses, but they might have been set to
1489  * non-canonical ECs that got merged later. Chase up to the canonical
1490  * merged parent if so.
1491  */
1492 void
1494 {
1495  /* Should be a merge clause ... */
1496  Assert(restrictinfo->mergeopfamilies != NIL);
1497  /* ... with pointers already set */
1498  Assert(restrictinfo->left_ec != NULL);
1499  Assert(restrictinfo->right_ec != NULL);
1500 
1501  /* Chase up to the top as needed */
1502  while (restrictinfo->left_ec->ec_merged)
1503  restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
1504  while (restrictinfo->right_ec->ec_merged)
1505  restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
1506 }
1507 
1508 /*
1509  * find_mergeclauses_for_outer_pathkeys
1510  * This routine attempts to find a list of mergeclauses that can be
1511  * used with a specified ordering for the join's outer relation.
1512  * If successful, it returns a list of mergeclauses.
1513  *
1514  * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
1515  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1516  * join relation being formed, in no particular order.
1517  *
1518  * The restrictinfos must be marked (via outer_is_left) to show which side
1519  * of each clause is associated with the current outer path. (See
1520  * select_mergejoin_clauses())
1521  *
1522  * The result is NIL if no merge can be done, else a maximal list of
1523  * usable mergeclauses (represented as a list of their restrictinfo nodes).
1524  * The list is ordered to match the pathkeys, as required for execution.
1525  */
1526 List *
1528  List *pathkeys,
1529  List *restrictinfos)
1530 {
1531  List *mergeclauses = NIL;
1532  ListCell *i;
1533 
1534  /* make sure we have eclasses cached in the clauses */
1535  foreach(i, restrictinfos)
1536  {
1537  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1538 
1539  update_mergeclause_eclasses(root, rinfo);
1540  }
1541 
1542  foreach(i, pathkeys)
1543  {
1544  PathKey *pathkey = (PathKey *) lfirst(i);
1545  EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1546  List *matched_restrictinfos = NIL;
1547  ListCell *j;
1548 
1549  /*----------
1550  * A mergejoin clause matches a pathkey if it has the same EC.
1551  * If there are multiple matching clauses, take them all. In plain
1552  * inner-join scenarios we expect only one match, because
1553  * equivalence-class processing will have removed any redundant
1554  * mergeclauses. However, in outer-join scenarios there might be
1555  * multiple matches. An example is
1556  *
1557  * select * from a full join b
1558  * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1559  *
1560  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1561  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1562  * we *must* do so or we will be unable to form a valid plan.
1563  *
1564  * We expect that the given pathkeys list is canonical, which means
1565  * no two members have the same EC, so it's not possible for this
1566  * code to enter the same mergeclause into the result list twice.
1567  *
1568  * It's possible that multiple matching clauses might have different
1569  * ECs on the other side, in which case the order we put them into our
1570  * result makes a difference in the pathkeys required for the inner
1571  * input rel. However this routine hasn't got any info about which
1572  * order would be best, so we don't worry about that.
1573  *
1574  * It's also possible that the selected mergejoin clauses produce
1575  * a noncanonical ordering of pathkeys for the inner side, ie, we
1576  * might select clauses that reference b.v1, b.v2, b.v1 in that
1577  * order. This is not harmful in itself, though it suggests that
1578  * the clauses are partially redundant. Since the alternative is
1579  * to omit mergejoin clauses and thereby possibly fail to generate a
1580  * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1581  * has to delete duplicates when it constructs the inner pathkeys
1582  * list, and we also have to deal with such cases specially in
1583  * create_mergejoin_plan().
1584  *----------
1585  */
1586  foreach(j, restrictinfos)
1587  {
1588  RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1589  EquivalenceClass *clause_ec;
1590 
1591  clause_ec = rinfo->outer_is_left ?
1592  rinfo->left_ec : rinfo->right_ec;
1593  if (clause_ec == pathkey_ec)
1594  matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1595  }
1596 
1597  /*
1598  * If we didn't find a mergeclause, we're done --- any additional
1599  * sort-key positions in the pathkeys are useless. (But we can still
1600  * mergejoin if we found at least one mergeclause.)
1601  */
1602  if (matched_restrictinfos == NIL)
1603  break;
1604 
1605  /*
1606  * If we did find usable mergeclause(s) for this sort-key position,
1607  * add them to result list.
1608  */
1609  mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1610  }
1611 
1612  return mergeclauses;
1613 }
1614 
1615 /*
1616  * select_outer_pathkeys_for_merge
1617  * Builds a pathkey list representing a possible sort ordering
1618  * that can be used with the given mergeclauses.
1619  *
1620  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1621  * that will be used in a merge join.
1622  * 'joinrel' is the join relation we are trying to construct.
1623  *
1624  * The restrictinfos must be marked (via outer_is_left) to show which side
1625  * of each clause is associated with the current outer path. (See
1626  * select_mergejoin_clauses())
1627  *
1628  * Returns a pathkeys list that can be applied to the outer relation.
1629  *
1630  * Since we assume here that a sort is required, there is no particular use
1631  * in matching any available ordering of the outerrel. (joinpath.c has an
1632  * entirely separate code path for considering sort-free mergejoins.) Rather,
1633  * it's interesting to try to match, or match a prefix of the requested
1634  * query_pathkeys so that a second output sort may be avoided or an
1635  * incremental sort may be done instead. We can get away with just a prefix
1636  * of the query_pathkeys when that prefix covers the entire join condition.
1637  * Failing that, we try to list "more popular" keys (those with the most
1638  * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting
1639  * ordering useful for as many higher-level mergejoins as possible.
1640  */
1641 List *
1643  List *mergeclauses,
1644  RelOptInfo *joinrel)
1645 {
1646  List *pathkeys = NIL;
1647  int nClauses = list_length(mergeclauses);
1648  EquivalenceClass **ecs;
1649  int *scores;
1650  int necs;
1651  ListCell *lc;
1652  int j;
1653 
1654  /* Might have no mergeclauses */
1655  if (nClauses == 0)
1656  return NIL;
1657 
1658  /*
1659  * Make arrays of the ECs used by the mergeclauses (dropping any
1660  * duplicates) and their "popularity" scores.
1661  */
1662  ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1663  scores = (int *) palloc(nClauses * sizeof(int));
1664  necs = 0;
1665 
1666  foreach(lc, mergeclauses)
1667  {
1668  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1669  EquivalenceClass *oeclass;
1670  int score;
1671  ListCell *lc2;
1672 
1673  /* get the outer eclass */
1674  update_mergeclause_eclasses(root, rinfo);
1675 
1676  if (rinfo->outer_is_left)
1677  oeclass = rinfo->left_ec;
1678  else
1679  oeclass = rinfo->right_ec;
1680 
1681  /* reject duplicates */
1682  for (j = 0; j < necs; j++)
1683  {
1684  if (ecs[j] == oeclass)
1685  break;
1686  }
1687  if (j < necs)
1688  continue;
1689 
1690  /* compute score */
1691  score = 0;
1692  foreach(lc2, oeclass->ec_members)
1693  {
1695 
1696  /* Potential future join partner? */
1697  if (!em->em_is_const && !em->em_is_child &&
1698  !bms_overlap(em->em_relids, joinrel->relids))
1699  score++;
1700  }
1701 
1702  ecs[necs] = oeclass;
1703  scores[necs] = score;
1704  necs++;
1705  }
1706 
1707  /*
1708  * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1709  * can generate a sort order that's also useful for final output. If we
1710  * only have a prefix of the query_pathkeys, and that prefix is the entire
1711  * join condition, then it's useful to use the prefix as the pathkeys as
1712  * this increases the chances that an incremental sort will be able to be
1713  * used by the upper planner.
1714  */
1715  if (root->query_pathkeys)
1716  {
1717  int matches = 0;
1718 
1719  foreach(lc, root->query_pathkeys)
1720  {
1721  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1722  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1723 
1724  for (j = 0; j < necs; j++)
1725  {
1726  if (ecs[j] == query_ec)
1727  break; /* found match */
1728  }
1729  if (j >= necs)
1730  break; /* didn't find match */
1731 
1732  matches++;
1733  }
1734  /* if we got to the end of the list, we have them all */
1735  if (lc == NULL)
1736  {
1737  /* copy query_pathkeys as starting point for our output */
1738  pathkeys = list_copy(root->query_pathkeys);
1739  /* mark their ECs as already-emitted */
1740  foreach(lc, root->query_pathkeys)
1741  {
1742  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1743  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1744 
1745  for (j = 0; j < necs; j++)
1746  {
1747  if (ecs[j] == query_ec)
1748  {
1749  scores[j] = -1;
1750  break;
1751  }
1752  }
1753  }
1754  }
1755 
1756  /*
1757  * If we didn't match to all of the query_pathkeys, but did match to
1758  * all of the join clauses then we'll make use of these as partially
1759  * sorted input is better than nothing for the upper planner as it may
1760  * lead to incremental sorts instead of full sorts.
1761  */
1762  else if (matches == nClauses)
1763  {
1764  pathkeys = list_copy_head(root->query_pathkeys, matches);
1765 
1766  /* we have all of the join pathkeys, so nothing more to do */
1767  pfree(ecs);
1768  pfree(scores);
1769 
1770  return pathkeys;
1771  }
1772  }
1773 
1774  /*
1775  * Add remaining ECs to the list in popularity order, using a default sort
1776  * ordering. (We could use qsort() here, but the list length is usually
1777  * so small it's not worth it.)
1778  */
1779  for (;;)
1780  {
1781  int best_j;
1782  int best_score;
1783  EquivalenceClass *ec;
1784  PathKey *pathkey;
1785 
1786  best_j = 0;
1787  best_score = scores[0];
1788  for (j = 1; j < necs; j++)
1789  {
1790  if (scores[j] > best_score)
1791  {
1792  best_j = j;
1793  best_score = scores[j];
1794  }
1795  }
1796  if (best_score < 0)
1797  break; /* all done */
1798  ec = ecs[best_j];
1799  scores[best_j] = -1;
1800  pathkey = make_canonical_pathkey(root,
1801  ec,
1804  false);
1805  /* can't be redundant because no duplicate ECs */
1806  Assert(!pathkey_is_redundant(pathkey, pathkeys));
1807  pathkeys = lappend(pathkeys, pathkey);
1808  }
1809 
1810  pfree(ecs);
1811  pfree(scores);
1812 
1813  return pathkeys;
1814 }
1815 
1816 /*
1817  * make_inner_pathkeys_for_merge
1818  * Builds a pathkey list representing the explicit sort order that
1819  * must be applied to an inner path to make it usable with the
1820  * given mergeclauses.
1821  *
1822  * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1823  * that will be used in a merge join, in order.
1824  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1825  * side of the join.
1826  *
1827  * The restrictinfos must be marked (via outer_is_left) to show which side
1828  * of each clause is associated with the current outer path. (See
1829  * select_mergejoin_clauses())
1830  *
1831  * Returns a pathkeys list that can be applied to the inner relation.
1832  *
1833  * Note that it is not this routine's job to decide whether sorting is
1834  * actually needed for a particular input path. Assume a sort is necessary;
1835  * just make the keys, eh?
1836  */
1837 List *
1839  List *mergeclauses,
1840  List *outer_pathkeys)
1841 {
1842  List *pathkeys = NIL;
1843  EquivalenceClass *lastoeclass;
1844  PathKey *opathkey;
1845  ListCell *lc;
1846  ListCell *lop;
1847 
1848  lastoeclass = NULL;
1849  opathkey = NULL;
1850  lop = list_head(outer_pathkeys);
1851 
1852  foreach(lc, mergeclauses)
1853  {
1854  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1855  EquivalenceClass *oeclass;
1856  EquivalenceClass *ieclass;
1857  PathKey *pathkey;
1858 
1859  update_mergeclause_eclasses(root, rinfo);
1860 
1861  if (rinfo->outer_is_left)
1862  {
1863  oeclass = rinfo->left_ec;
1864  ieclass = rinfo->right_ec;
1865  }
1866  else
1867  {
1868  oeclass = rinfo->right_ec;
1869  ieclass = rinfo->left_ec;
1870  }
1871 
1872  /* outer eclass should match current or next pathkeys */
1873  /* we check this carefully for debugging reasons */
1874  if (oeclass != lastoeclass)
1875  {
1876  if (!lop)
1877  elog(ERROR, "too few pathkeys for mergeclauses");
1878  opathkey = (PathKey *) lfirst(lop);
1879  lop = lnext(outer_pathkeys, lop);
1880  lastoeclass = opathkey->pk_eclass;
1881  if (oeclass != lastoeclass)
1882  elog(ERROR, "outer pathkeys do not match mergeclause");
1883  }
1884 
1885  /*
1886  * Often, we'll have same EC on both sides, in which case the outer
1887  * pathkey is also canonical for the inner side, and we can skip a
1888  * useless search.
1889  */
1890  if (ieclass == oeclass)
1891  pathkey = opathkey;
1892  else
1893  pathkey = make_canonical_pathkey(root,
1894  ieclass,
1895  opathkey->pk_opfamily,
1896  opathkey->pk_strategy,
1897  opathkey->pk_nulls_first);
1898 
1899  /*
1900  * Don't generate redundant pathkeys (which can happen if multiple
1901  * mergeclauses refer to the same EC). Because we do this, the output
1902  * pathkey list isn't necessarily ordered like the mergeclauses, which
1903  * complicates life for create_mergejoin_plan(). But if we didn't,
1904  * we'd have a noncanonical sort key list, which would be bad; for one
1905  * reason, it certainly wouldn't match any available sort order for
1906  * the input relation.
1907  */
1908  if (!pathkey_is_redundant(pathkey, pathkeys))
1909  pathkeys = lappend(pathkeys, pathkey);
1910  }
1911 
1912  return pathkeys;
1913 }
1914 
1915 /*
1916  * trim_mergeclauses_for_inner_pathkeys
1917  * This routine trims a list of mergeclauses to include just those that
1918  * work with a specified ordering for the join's inner relation.
1919  *
1920  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1921  * join relation being formed, in an order known to work for the
1922  * currently-considered sort ordering of the join's outer rel.
1923  * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1924  * it should be equal to, or a truncation of, the result of
1925  * make_inner_pathkeys_for_merge for these mergeclauses.
1926  *
1927  * What we return will be a prefix of the given mergeclauses list.
1928  *
1929  * We need this logic because make_inner_pathkeys_for_merge's result isn't
1930  * necessarily in the same order as the mergeclauses. That means that if we
1931  * consider an inner-rel pathkey list that is a truncation of that result,
1932  * we might need to drop mergeclauses even though they match a surviving inner
1933  * pathkey. This happens when they are to the right of a mergeclause that
1934  * matches a removed inner pathkey.
1935  *
1936  * The mergeclauses must be marked (via outer_is_left) to show which side
1937  * of each clause is associated with the current outer path. (See
1938  * select_mergejoin_clauses())
1939  */
1940 List *
1942  List *mergeclauses,
1943  List *pathkeys)
1944 {
1945  List *new_mergeclauses = NIL;
1946  PathKey *pathkey;
1947  EquivalenceClass *pathkey_ec;
1948  bool matched_pathkey;
1949  ListCell *lip;
1950  ListCell *i;
1951 
1952  /* No pathkeys => no mergeclauses (though we don't expect this case) */
1953  if (pathkeys == NIL)
1954  return NIL;
1955  /* Initialize to consider first pathkey */
1956  lip = list_head(pathkeys);
1957  pathkey = (PathKey *) lfirst(lip);
1958  pathkey_ec = pathkey->pk_eclass;
1959  lip = lnext(pathkeys, lip);
1960  matched_pathkey = false;
1961 
1962  /* Scan mergeclauses to see how many we can use */
1963  foreach(i, mergeclauses)
1964  {
1965  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1966  EquivalenceClass *clause_ec;
1967 
1968  /* Assume we needn't do update_mergeclause_eclasses again here */
1969 
1970  /* Check clause's inner-rel EC against current pathkey */
1971  clause_ec = rinfo->outer_is_left ?
1972  rinfo->right_ec : rinfo->left_ec;
1973 
1974  /* If we don't have a match, attempt to advance to next pathkey */
1975  if (clause_ec != pathkey_ec)
1976  {
1977  /* If we had no clauses matching this inner pathkey, must stop */
1978  if (!matched_pathkey)
1979  break;
1980 
1981  /* Advance to next inner pathkey, if any */
1982  if (lip == NULL)
1983  break;
1984  pathkey = (PathKey *) lfirst(lip);
1985  pathkey_ec = pathkey->pk_eclass;
1986  lip = lnext(pathkeys, lip);
1987  matched_pathkey = false;
1988  }
1989 
1990  /* If mergeclause matches current inner pathkey, we can use it */
1991  if (clause_ec == pathkey_ec)
1992  {
1993  new_mergeclauses = lappend(new_mergeclauses, rinfo);
1994  matched_pathkey = true;
1995  }
1996  else
1997  {
1998  /* Else, no hope of adding any more mergeclauses */
1999  break;
2000  }
2001  }
2002 
2003  return new_mergeclauses;
2004 }
2005 
2006 
2007 /****************************************************************************
2008  * PATHKEY USEFULNESS CHECKS
2009  *
2010  * We only want to remember as many of the pathkeys of a path as have some
2011  * potential use, either for subsequent mergejoins or for meeting the query's
2012  * requested output ordering. This ensures that add_path() won't consider
2013  * a path to have a usefully different ordering unless it really is useful.
2014  * These routines check for usefulness of given pathkeys.
2015  ****************************************************************************/
2016 
2017 /*
2018  * pathkeys_useful_for_merging
2019  * Count the number of pathkeys that may be useful for mergejoins
2020  * above the given relation.
2021  *
2022  * We consider a pathkey potentially useful if it corresponds to the merge
2023  * ordering of either side of any joinclause for the rel. This might be
2024  * overoptimistic, since joinclauses that require different other relations
2025  * might never be usable at the same time, but trying to be exact is likely
2026  * to be more trouble than it's worth.
2027  *
2028  * To avoid doubling the number of mergejoin paths considered, we would like
2029  * to consider only one of the two scan directions (ASC or DESC) as useful
2030  * for merging for any given target column. The choice is arbitrary unless
2031  * one of the directions happens to match an ORDER BY key, in which case
2032  * that direction should be preferred, in hopes of avoiding a final sort step.
2033  * right_merge_direction() implements this heuristic.
2034  */
2035 static int
2037 {
2038  int useful = 0;
2039  ListCell *i;
2040 
2041  foreach(i, pathkeys)
2042  {
2043  PathKey *pathkey = (PathKey *) lfirst(i);
2044  bool matched = false;
2045  ListCell *j;
2046 
2047  /* If "wrong" direction, not useful for merging */
2048  if (!right_merge_direction(root, pathkey))
2049  break;
2050 
2051  /*
2052  * First look into the EquivalenceClass of the pathkey, to see if
2053  * there are any members not yet joined to the rel. If so, it's
2054  * surely possible to generate a mergejoin clause using them.
2055  */
2056  if (rel->has_eclass_joins &&
2057  eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
2058  matched = true;
2059  else
2060  {
2061  /*
2062  * Otherwise search the rel's joininfo list, which contains
2063  * non-EquivalenceClass-derivable join clauses that might
2064  * nonetheless be mergejoinable.
2065  */
2066  foreach(j, rel->joininfo)
2067  {
2068  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
2069 
2070  if (restrictinfo->mergeopfamilies == NIL)
2071  continue;
2072  update_mergeclause_eclasses(root, restrictinfo);
2073 
2074  if (pathkey->pk_eclass == restrictinfo->left_ec ||
2075  pathkey->pk_eclass == restrictinfo->right_ec)
2076  {
2077  matched = true;
2078  break;
2079  }
2080  }
2081  }
2082 
2083  /*
2084  * If we didn't find a mergeclause, we're done --- any additional
2085  * sort-key positions in the pathkeys are useless. (But we can still
2086  * mergejoin if we found at least one mergeclause.)
2087  */
2088  if (matched)
2089  useful++;
2090  else
2091  break;
2092  }
2093 
2094  return useful;
2095 }
2096 
2097 /*
2098  * right_merge_direction
2099  * Check whether the pathkey embodies the preferred sort direction
2100  * for merging its target column.
2101  */
2102 static bool
2104 {
2105  ListCell *l;
2106 
2107  foreach(l, root->query_pathkeys)
2108  {
2109  PathKey *query_pathkey = (PathKey *) lfirst(l);
2110 
2111  if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
2112  pathkey->pk_opfamily == query_pathkey->pk_opfamily)
2113  {
2114  /*
2115  * Found a matching query sort column. Prefer this pathkey's
2116  * direction iff it matches. Note that we ignore pk_nulls_first,
2117  * which means that a sort might be needed anyway ... but we still
2118  * want to prefer only one of the two possible directions, and we
2119  * might as well use this one.
2120  */
2121  return (pathkey->pk_strategy == query_pathkey->pk_strategy);
2122  }
2123  }
2124 
2125  /* If no matching ORDER BY request, prefer the ASC direction */
2126  return (pathkey->pk_strategy == BTLessStrategyNumber);
2127 }
2128 
2129 /*
2130  * pathkeys_useful_for_ordering
2131  * Count the number of pathkeys that are useful for meeting the
2132  * query's requested output ordering.
2133  *
2134  * Because we the have the possibility of incremental sort, a prefix list of
2135  * keys is potentially useful for improving the performance of the requested
2136  * ordering. Thus we return 0, if no valuable keys are found, or the number
2137  * of leading keys shared by the list and the requested ordering..
2138  */
2139 static int
2141 {
2142  int n_common_pathkeys;
2143 
2144  (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
2145  &n_common_pathkeys);
2146 
2147  return n_common_pathkeys;
2148 }
2149 
2150 /*
2151  * pathkeys_useful_for_grouping
2152  * Count the number of pathkeys that are useful for grouping (instead of
2153  * explicit sort)
2154  *
2155  * Group pathkeys could be reordered to benefit from the ordering. The
2156  * ordering may not be "complete" and may require incremental sort, but that's
2157  * fine. So we simply count prefix pathkeys with a matching group key, and
2158  * stop once we find the first pathkey without a match.
2159  *
2160  * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2161  * pathkeys are useful for grouping, and we might do incremental sort to get
2162  * path ordered by (a,b,e).
2163  *
2164  * This logic is necessary to retain paths with ordering not matching grouping
2165  * keys directly, without the reordering.
2166  *
2167  * Returns the length of pathkey prefix with matching group keys.
2168  */
2169 static int
2171 {
2172  ListCell *key;
2173  int n = 0;
2174 
2175  /* no special ordering requested for grouping */
2176  if (root->group_pathkeys == NIL)
2177  return 0;
2178 
2179  /* walk the pathkeys and search for matching group key */
2180  foreach(key, pathkeys)
2181  {
2182  PathKey *pathkey = (PathKey *) lfirst(key);
2183 
2184  /* no matching group key, we're done */
2185  if (!list_member_ptr(root->group_pathkeys, pathkey))
2186  break;
2187 
2188  n++;
2189  }
2190 
2191  return n;
2192 }
2193 
2194 /*
2195  * truncate_useless_pathkeys
2196  * Shorten the given pathkey list to just the useful pathkeys.
2197  */
2198 List *
2200  RelOptInfo *rel,
2201  List *pathkeys)
2202 {
2203  int nuseful;
2204  int nuseful2;
2205 
2206  nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
2207  nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
2208  if (nuseful2 > nuseful)
2209  nuseful = nuseful2;
2210  nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
2211  if (nuseful2 > nuseful)
2212  nuseful = nuseful2;
2213 
2214  /*
2215  * Note: not safe to modify input list destructively, but we can avoid
2216  * copying the list if we're not actually going to change it
2217  */
2218  if (nuseful == 0)
2219  return NIL;
2220  else if (nuseful == list_length(pathkeys))
2221  return pathkeys;
2222  else
2223  return list_copy_head(pathkeys, nuseful);
2224 }
2225 
2226 /*
2227  * has_useful_pathkeys
2228  * Detect whether the specified rel could have any pathkeys that are
2229  * useful according to truncate_useless_pathkeys().
2230  *
2231  * This is a cheap test that lets us skip building pathkeys at all in very
2232  * simple queries. It's OK to err in the direction of returning "true" when
2233  * there really aren't any usable pathkeys, but erring in the other direction
2234  * is bad --- so keep this in sync with the routines above!
2235  *
2236  * We could make the test more complex, for example checking to see if any of
2237  * the joinclauses are really mergejoinable, but that likely wouldn't win
2238  * often enough to repay the extra cycles. Queries with neither a join nor
2239  * a sort are reasonably common, though, so this much work seems worthwhile.
2240  */
2241 bool
2243 {
2244  if (rel->joininfo != NIL || rel->has_eclass_joins)
2245  return true; /* might be able to use pathkeys for merging */
2246  if (root->group_pathkeys != NIL)
2247  return true; /* might be able to use pathkeys for grouping */
2248  if (root->query_pathkeys != NIL)
2249  return true; /* might be able to use them for ordering */
2250  return false; /* definitely useless */
2251 }
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:412
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
#define bms_is_empty(a)
Definition: bitmapset.h:118
signed short int16
Definition: c.h:480
unsigned int Index
Definition: c.h:601
#define OidIsValid(objectId)
Definition: c.h:762
bool enable_incremental_sort
Definition: costsize.c:140
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Expr * canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
Definition: equivclass.c:472
EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, List *opfamilies, Oid opcintype, Oid collation, Index sortref, Relids rel, bool create_it)
Definition: equivclass.c:587
bool eclass_useful_for_merging(PlannerInfo *root, EquivalenceClass *eclass, RelOptInfo *rel)
Definition: equivclass.c:3147
bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol)
Definition: indxpath.c:3707
int j
Definition: isn.c:74
int i
Definition: isn.c:73
Assert(fmt[strlen(fmt) - 1] !='\n')
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_copy_head(const List *oldlist, int len)
Definition: list.c:1593
List * list_copy(const List *oldlist)
Definition: list.c:1573
bool list_member_ptr(const List *list, const void *datum)
Definition: list.c:682
List * list_concat_unique_ptr(List *list1, const List *list2)
Definition: list.c:1427
void list_free(List *list)
Definition: list.c:1546
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
List * get_mergejoin_opfamilies(Oid opno)
Definition: lsyscache.c:366
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:166
bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy)
Definition: lsyscache.c:207
void op_input_types(Oid opno, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:1336
void pfree(void *pointer)
Definition: mcxt.c:1508
void * palloc(Size size)
Definition: mcxt.c:1304
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:788
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:132
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:93
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:123
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:81
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:223
#define makeNode(_type_)
Definition: nodes.h:155
JoinType
Definition: nodes.h:278
@ JOIN_FULL
Definition: nodes.h:285
@ JOIN_RIGHT
Definition: nodes.h:286
@ JOIN_RIGHT_ANTI
Definition: nodes.h:299
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
bool partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
Definition: partbounds.c:2852
static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:899
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1309
List * get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
Definition: pathkeys.c:485
List * build_expression_pathkey(PlannerInfo *root, Expr *expr, Oid opno, Relids rel, bool create_it)
Definition: pathkeys.c:1015
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey)
Definition: pathkeys.c:2103
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1838
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:635
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:573
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1527
static int group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, List **group_clauses, int num_groupby_pathkeys)
Definition: pathkeys.c:368
static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, bool create_it)
Definition: pathkeys.c:255
bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
Definition: pathkeys.c:2242
List * append_pathkeys(List *target, List *source)
Definition: pathkeys.c:106
List * truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2199
static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2140
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1941
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1642
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1493
static Var * find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
Definition: pathkeys.c:1266
List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir)
Definition: pathkeys.c:755
static int pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2170
static bool partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:859
bool enable_group_by_reordering
Definition: pathkeys.c:31
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Definition: pathkeys.c:158
List * make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, bool *sortable)
Definition: pathkeys.c:1384
PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first)
Definition: pathkeys.c:55
static bool pathkeys_are_duplicate(List *infos, List *pathkeys)
Definition: pathkeys.c:456
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:714
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1347
static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2036
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1069
static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid opfamily, Oid opcintype, Oid collation, bool reverse_sort, bool nulls_first, Index sortref, Relids rel, bool create_it)
Definition: pathkeys.c:197
void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1446
Path * get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction)
Definition: pathkeys.c:681
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:341
PathKeysComparison compare_pathkeys(List *keys1, List *keys2)
Definition: pathkeys.c:302
List * build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, ScanDirection scandir, bool *partialkeys)
Definition: pathkeys.c:934
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:113
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:67
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1390
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:824
CostSelector
Definition: pathnodes.h:37
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1649
PathKeysComparison
Definition: paths.h:198
@ PATHKEYS_BETTER2
Definition: paths.h:201
@ PATHKEYS_BETTER1
Definition: paths.h:200
@ PATHKEYS_DIFFERENT
Definition: paths.h:202
@ PATHKEYS_EQUAL
Definition: paths.h:199
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define foreach_current_index(var_or_cell)
Definition: pg_list.h:403
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define list_make1(x1)
Definition: pg_list.h:212
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define linitial(l)
Definition: pg_list.h:178
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define linitial_oid(l)
Definition: pg_list.h:180
static rewind_source * source
Definition: pg_rewind.c:89
unsigned int Oid
Definition: postgres_ext.h:31
static struct cvec * eclass(struct vars *v, chr c, int cases)
Definition: regc_locale.c:500
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ScanDirection
Definition: sdir.h:25
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
List * ec_opfamilies
Definition: pathnodes.h:1370
Definition: pg_list.h:54
Definition: nodes.h:129
List * pathkeys
Definition: pathnodes.h:1467
List * clauses
Definition: pathnodes.h:1468
bool pk_nulls_first
Definition: pathnodes.h:1458
int pk_strategy
Definition: pathnodes.h:1457
Oid pk_opfamily
Definition: pathnodes.h:1456
List * exprs
Definition: pathnodes.h:1513
List * pathkeys
Definition: pathnodes.h:1645
bool parallel_safe
Definition: pathnodes.h:1635
int num_groupby_pathkeys
Definition: pathnodes.h:392
List * canon_pathkeys
Definition: pathnodes.h:317
bool ec_merging_done
Definition: pathnodes.h:314
List * sort_pathkeys
Definition: pathnodes.h:399
List * group_pathkeys
Definition: pathnodes.h:385
List * processed_groupClause
Definition: pathnodes.h:430
Query * parse
Definition: pathnodes.h:199
List * query_pathkeys
Definition: pathnodes.h:382
List * baserestrictinfo
Definition: pathnodes.h:966
List * joininfo
Definition: pathnodes.h:972
Relids relids
Definition: pathnodes.h:856
struct PathTarget * reltarget
Definition: pathnodes.h:878
Index relid
Definition: pathnodes.h:903
bool has_eclass_joins
Definition: pathnodes.h:974
Bitmapset * live_parts
Definition: pathnodes.h:1020
Expr * clause
Definition: pathnodes.h:2541
Index tleSortGroupRef
Definition: parsenodes.h:1397
Expr * expr
Definition: primnodes.h:1943
AttrNumber resno
Definition: primnodes.h:1945
Definition: primnodes.h:234
AttrNumber varattno
Definition: primnodes.h:246
int varno
Definition: primnodes.h:241
Definition: type.h:95
TargetEntry * get_sortgroupref_tle(Index sortref, List *targetList)
Definition: tlist.c:345
SortGroupClause * get_sortgroupref_clause_noerr(Index sortref, List *clauses)
Definition: tlist.c:443
Node * get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:379