PostgreSQL Source Code  git master
pathkeys.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pathkeys.c
4  * Utilities for matching and building path keys
5  *
6  * See src/backend/optimizer/README for a great deal of information about
7  * the nature and use of path keys.
8  *
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/optimizer/path/pathkeys.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/stratnum.h"
21 #include "catalog/pg_opfamily.h"
22 #include "nodes/nodeFuncs.h"
23 #include "optimizer/cost.h"
24 #include "optimizer/optimizer.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
28 #include "utils/lsyscache.h"
29 
30 /* Consider reordering of GROUP BY keys? */
32 
33 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
35  RelOptInfo *partrel,
36  int partkeycol);
38 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
39 
40 
41 /****************************************************************************
42  * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
43  ****************************************************************************/
44 
45 /*
46  * make_canonical_pathkey
47  * Given the parameters for a PathKey, find any pre-existing matching
48  * pathkey in the query's list of "canonical" pathkeys. Make a new
49  * entry if there's not one already.
50  *
51  * Note that this function must not be used until after we have completed
52  * merging EquivalenceClasses.
53  */
54 PathKey *
56  EquivalenceClass *eclass, Oid opfamily,
57  int strategy, bool nulls_first)
58 {
59  PathKey *pk;
60  ListCell *lc;
61  MemoryContext oldcontext;
62 
63  /* Can't make canonical pathkeys if the set of ECs might still change */
64  if (!root->ec_merging_done)
65  elog(ERROR, "too soon to build canonical pathkeys");
66 
67  /* The passed eclass might be non-canonical, so chase up to the top */
68  while (eclass->ec_merged)
69  eclass = eclass->ec_merged;
70 
71  foreach(lc, root->canon_pathkeys)
72  {
73  pk = (PathKey *) lfirst(lc);
74  if (eclass == pk->pk_eclass &&
75  opfamily == pk->pk_opfamily &&
76  strategy == pk->pk_strategy &&
77  nulls_first == pk->pk_nulls_first)
78  return pk;
79  }
80 
81  /*
82  * Be sure canonical pathkeys are allocated in the main planning context.
83  * Not an issue in normal planning, but it is for GEQO.
84  */
85  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
86 
87  pk = makeNode(PathKey);
88  pk->pk_eclass = eclass;
89  pk->pk_opfamily = opfamily;
90  pk->pk_strategy = strategy;
91  pk->pk_nulls_first = nulls_first;
92 
93  root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
94 
95  MemoryContextSwitchTo(oldcontext);
96 
97  return pk;
98 }
99 
100 /*
101  * append_pathkeys
102  * Append all non-redundant PathKeys in 'source' onto 'target' and
103  * returns the updated 'target' list.
104  */
105 List *
107 {
108  ListCell *lc;
109 
110  Assert(target != NIL);
111 
112  foreach(lc, source)
113  {
114  PathKey *pk = lfirst_node(PathKey, lc);
115 
116  if (!pathkey_is_redundant(pk, target))
117  target = lappend(target, pk);
118  }
119  return target;
120 }
121 
122 /*
123  * pathkey_is_redundant
124  * Is a pathkey redundant with one already in the given list?
125  *
126  * We detect two cases:
127  *
128  * 1. If the new pathkey's equivalence class contains a constant, and isn't
129  * below an outer join, then we can disregard it as a sort key. An example:
130  * SELECT ... WHERE x = 42 ORDER BY x, y;
131  * We may as well just sort by y. Note that because of opfamily matching,
132  * this is semantically correct: we know that the equality constraint is one
133  * that actually binds the variable to a single value in the terms of any
134  * ordering operator that might go with the eclass. This rule not only lets
135  * us simplify (or even skip) explicit sorts, but also allows matching index
136  * sort orders to a query when there are don't-care index columns.
137  *
138  * 2. If the new pathkey's equivalence class is the same as that of any
139  * existing member of the pathkey list, then it is redundant. Some examples:
140  * SELECT ... ORDER BY x, x;
141  * SELECT ... ORDER BY x, x DESC;
142  * SELECT ... WHERE x = y ORDER BY x, y;
143  * In all these cases the second sort key cannot distinguish values that are
144  * considered equal by the first, and so there's no point in using it.
145  * Note in particular that we need not compare opfamily (all the opfamilies
146  * of the EC have the same notion of equality) nor sort direction.
147  *
148  * Both the given pathkey and the list members must be canonical for this
149  * to work properly, but that's okay since we no longer ever construct any
150  * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
151  * canonical includes the additional requirement of no redundant entries,
152  * which is exactly what we are checking for here.)
153  *
154  * Because the equivclass.c machinery forms only one copy of any EC per query,
155  * pointer comparison is enough to decide whether canonical ECs are the same.
156  */
157 static bool
158 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
159 {
160  EquivalenceClass *new_ec = new_pathkey->pk_eclass;
161  ListCell *lc;
162 
163  /* Check for EC containing a constant --- unconditionally redundant */
164  if (EC_MUST_BE_REDUNDANT(new_ec))
165  return true;
166 
167  /* If same EC already used in list, then redundant */
168  foreach(lc, pathkeys)
169  {
170  PathKey *old_pathkey = (PathKey *) lfirst(lc);
171 
172  if (new_ec == old_pathkey->pk_eclass)
173  return true;
174  }
175 
176  return false;
177 }
178 
179 /*
180  * make_pathkey_from_sortinfo
181  * Given an expression and sort-order information, create a PathKey.
182  * The result is always a "canonical" PathKey, but it might be redundant.
183  *
184  * If the PathKey is being generated from a SortGroupClause, sortref should be
185  * the SortGroupClause's SortGroupRef; otherwise zero.
186  *
187  * If rel is not NULL, it identifies a specific relation we're considering
188  * a path for, and indicates that child EC members for that relation can be
189  * considered. Otherwise child members are ignored. (See the comments for
190  * get_eclass_for_sort_expr.)
191  *
192  * create_it is true if we should create any missing EquivalenceClass
193  * needed to represent the sort key. If it's false, we return NULL if the
194  * sort key isn't already present in any EquivalenceClass.
195  */
196 static PathKey *
198  Expr *expr,
199  Oid opfamily,
200  Oid opcintype,
201  Oid collation,
202  bool reverse_sort,
203  bool nulls_first,
204  Index sortref,
205  Relids rel,
206  bool create_it)
207 {
208  int16 strategy;
209  Oid equality_op;
210  List *opfamilies;
212 
213  strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
214 
215  /*
216  * EquivalenceClasses need to contain opfamily lists based on the family
217  * membership of mergejoinable equality operators, which could belong to
218  * more than one opfamily. So we have to look up the opfamily's equality
219  * operator and get its membership.
220  */
221  equality_op = get_opfamily_member(opfamily,
222  opcintype,
223  opcintype,
225  if (!OidIsValid(equality_op)) /* shouldn't happen */
226  elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
227  BTEqualStrategyNumber, opcintype, opcintype, opfamily);
228  opfamilies = get_mergejoin_opfamilies(equality_op);
229  if (!opfamilies) /* certainly should find some */
230  elog(ERROR, "could not find opfamilies for equality operator %u",
231  equality_op);
232 
233  /* Now find or (optionally) create a matching EquivalenceClass */
235  opfamilies, opcintype, collation,
236  sortref, rel, create_it);
237 
238  /* Fail if no EC and !create_it */
239  if (!eclass)
240  return NULL;
241 
242  /* And finally we can find or create a PathKey node */
243  return make_canonical_pathkey(root, eclass, opfamily,
244  strategy, nulls_first);
245 }
246 
247 /*
248  * make_pathkey_from_sortop
249  * Like make_pathkey_from_sortinfo, but work from a sort operator.
250  *
251  * This should eventually go away, but we need to restructure SortGroupClause
252  * first.
253  */
254 static PathKey *
256  Expr *expr,
257  Oid ordering_op,
258  bool nulls_first,
259  Index sortref,
260  bool create_it)
261 {
262  Oid opfamily,
263  opcintype,
264  collation;
265  int16 strategy;
266 
267  /* Find the operator in pg_amop --- failure shouldn't happen */
268  if (!get_ordering_op_properties(ordering_op,
269  &opfamily, &opcintype, &strategy))
270  elog(ERROR, "operator %u is not a valid ordering operator",
271  ordering_op);
272 
273  /* Because SortGroupClause doesn't carry collation, consult the expr */
274  collation = exprCollation((Node *) expr);
275 
277  expr,
278  opfamily,
279  opcintype,
280  collation,
281  (strategy == BTGreaterStrategyNumber),
282  nulls_first,
283  sortref,
284  NULL,
285  create_it);
286 }
287 
288 
289 /****************************************************************************
290  * PATHKEY COMPARISONS
291  ****************************************************************************/
292 
293 /*
294  * compare_pathkeys
295  * Compare two pathkeys to see if they are equivalent, and if not whether
296  * one is "better" than the other.
297  *
298  * We assume the pathkeys are canonical, and so they can be checked for
299  * equality by simple pointer comparison.
300  */
302 compare_pathkeys(List *keys1, List *keys2)
303 {
304  ListCell *key1,
305  *key2;
306 
307  /*
308  * Fall out quickly if we are passed two identical lists. This mostly
309  * catches the case where both are NIL, but that's common enough to
310  * warrant the test.
311  */
312  if (keys1 == keys2)
313  return PATHKEYS_EQUAL;
314 
315  forboth(key1, keys1, key2, keys2)
316  {
317  PathKey *pathkey1 = (PathKey *) lfirst(key1);
318  PathKey *pathkey2 = (PathKey *) lfirst(key2);
319 
320  if (pathkey1 != pathkey2)
321  return PATHKEYS_DIFFERENT; /* no need to keep looking */
322  }
323 
324  /*
325  * If we reached the end of only one list, the other is longer and
326  * therefore not a subset.
327  */
328  if (key1 != NULL)
329  return PATHKEYS_BETTER1; /* key1 is longer */
330  if (key2 != NULL)
331  return PATHKEYS_BETTER2; /* key2 is longer */
332  return PATHKEYS_EQUAL;
333 }
334 
335 /*
336  * pathkeys_contained_in
337  * Common special case of compare_pathkeys: we just want to know
338  * if keys2 are at least as well sorted as keys1.
339  */
340 bool
342 {
343  switch (compare_pathkeys(keys1, keys2))
344  {
345  case PATHKEYS_EQUAL:
346  case PATHKEYS_BETTER2:
347  return true;
348  default:
349  break;
350  }
351  return false;
352 }
353 
354 /*
355  * group_keys_reorder_by_pathkeys
356  * Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
357  *
358  * 'pathkeys' is an input list of pathkeys
359  * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
360  * reorder. The pointers are redirected to new lists, original lists
361  * stay untouched.
362  * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
363  * search matching pathkeys.
364  *
365  * Returns the number of GROUP BY keys with a matching pathkey.
366  */
367 static int
368 group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
369  List **group_clauses,
370  int num_groupby_pathkeys)
371 {
372  List *new_group_pathkeys = NIL,
373  *new_group_clauses = NIL;
374  List *grouping_pathkeys;
375  ListCell *lc;
376  int n;
377 
378  if (pathkeys == NIL || *group_pathkeys == NIL)
379  return 0;
380 
381  /*
382  * We're going to search within just the first num_groupby_pathkeys of
383  * *group_pathkeys. The thing is that root->group_pathkeys is passed as
384  * *group_pathkeys containing grouping pathkeys altogether with aggregate
385  * pathkeys. If we process aggregate pathkeys we could get an invalid
386  * result of get_sortgroupref_clause_noerr(), because their
387  * pathkey->pk_eclass->ec_sortref doesn't reference query targetlist. So,
388  * we allocate a separate list of pathkeys for lookups.
389  */
390  grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
391 
392  /*
393  * Walk the pathkeys (determining ordering of the input path) and see if
394  * there's a matching GROUP BY key. If we find one, we append it to the
395  * list, and do the same for the clauses.
396  *
397  * Once we find the first pathkey without a matching GROUP BY key, the
398  * rest of the pathkeys are useless and can't be used to evaluate the
399  * grouping, so we abort the loop and ignore the remaining pathkeys.
400  */
401  foreach(lc, pathkeys)
402  {
403  PathKey *pathkey = (PathKey *) lfirst(lc);
404  SortGroupClause *sgc;
405 
406  /*
407  * Pathkeys are built in a way that allows simply comparing pointers.
408  * Give up if we can't find the matching pointer. Also give up if
409  * there is no sortclause reference for some reason.
410  */
411  if (foreach_current_index(lc) >= num_groupby_pathkeys ||
412  !list_member_ptr(grouping_pathkeys, pathkey) ||
413  pathkey->pk_eclass->ec_sortref == 0)
414  break;
415 
416  /*
417  * Since 1349d27 pathkey coming from underlying node can be in the
418  * root->group_pathkeys but not in the processed_groupClause. So, we
419  * should be careful here.
420  */
421  sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
422  *group_clauses);
423  if (!sgc)
424  /* The grouping clause does not cover this pathkey */
425  break;
426 
427  /*
428  * Sort group clause should have an ordering operator as long as there
429  * is an associated pathkey.
430  */
431  Assert(OidIsValid(sgc->sortop));
432 
433  new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
434  new_group_clauses = lappend(new_group_clauses, sgc);
435  }
436 
437  /* remember the number of pathkeys with a matching GROUP BY key */
438  n = list_length(new_group_pathkeys);
439 
440  /* append the remaining group pathkeys (will be treated as not sorted) */
441  *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
442  *group_pathkeys);
443  *group_clauses = list_concat_unique_ptr(new_group_clauses,
444  *group_clauses);
445 
446  list_free(grouping_pathkeys);
447  return n;
448 }
449 
450 /*
451  * get_useful_group_keys_orderings
452  * Determine which orderings of GROUP BY keys are potentially interesting.
453  *
454  * Returns a list of GroupByOrdering items, each representing an interesting
455  * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
456  * matching order.
457  *
458  * The function considers (and keeps) following GROUP BY orderings:
459  *
460  * - GROUP BY keys as ordered by preprocess_groupclause() to match target
461  * ORDER BY clause (as much as possible),
462  * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
463  */
464 List *
466 {
467  Query *parse = root->parse;
468  List *infos = NIL;
469  GroupByOrdering *info;
470 
471  List *pathkeys = root->group_pathkeys;
472  List *clauses = root->processed_groupClause;
473 
474  /* always return at least the original pathkeys/clauses */
475  info = makeNode(GroupByOrdering);
476  info->pathkeys = pathkeys;
477  info->clauses = clauses;
478  infos = lappend(infos, info);
479 
480  /*
481  * Should we try generating alternative orderings of the group keys? If
482  * not, we produce only the order specified in the query, i.e. the
483  * optimization is effectively disabled.
484  */
486  return infos;
487 
488  /*
489  * Grouping sets have own and more complex logic to decide the ordering.
490  */
491  if (parse->groupingSets)
492  return infos;
493 
494  /*
495  * If the path is sorted in some way, try reordering the group keys to
496  * match the path as much of the ordering as possible. Then thanks to
497  * incremental sort we would get this sort as cheap as possible.
498  */
499  if (path->pathkeys &&
500  !pathkeys_contained_in(path->pathkeys, root->group_pathkeys))
501  {
502  int n;
503 
504  n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
505  root->num_groupby_pathkeys);
506 
507  if (n > 0 &&
508  (enable_incremental_sort || n == root->num_groupby_pathkeys) &&
509  compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
510  {
511  info = makeNode(GroupByOrdering);
512  info->pathkeys = pathkeys;
513  info->clauses = clauses;
514 
515  infos = lappend(infos, info);
516  }
517  }
518 
519 #ifdef USE_ASSERT_CHECKING
520  {
522  ListCell *lc;
523 
524  /* Test consistency of info structures */
525  for_each_from(lc, infos, 1)
526  {
527  ListCell *lc1,
528  *lc2;
529 
530  info = lfirst_node(GroupByOrdering, lc);
531 
532  Assert(list_length(info->clauses) == list_length(pinfo->clauses));
533  Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
534  Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
535  Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
536 
537  forboth(lc1, info->clauses, lc2, info->pathkeys)
538  {
540  PathKey *pk = lfirst_node(PathKey, lc2);
541 
542  Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef);
543  }
544  }
545  }
546 #endif
547  return infos;
548 }
549 
550 /*
551  * pathkeys_count_contained_in
552  * Same as pathkeys_contained_in, but also sets length of longest
553  * common prefix of keys1 and keys2.
554  */
555 bool
556 pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
557 {
558  int n = 0;
559  ListCell *key1,
560  *key2;
561 
562  /*
563  * See if we can avoiding looping through both lists. This optimization
564  * gains us several percent in planning time in a worst-case test.
565  */
566  if (keys1 == keys2)
567  {
568  *n_common = list_length(keys1);
569  return true;
570  }
571  else if (keys1 == NIL)
572  {
573  *n_common = 0;
574  return true;
575  }
576  else if (keys2 == NIL)
577  {
578  *n_common = 0;
579  return false;
580  }
581 
582  /*
583  * If both lists are non-empty, iterate through both to find out how many
584  * items are shared.
585  */
586  forboth(key1, keys1, key2, keys2)
587  {
588  PathKey *pathkey1 = (PathKey *) lfirst(key1);
589  PathKey *pathkey2 = (PathKey *) lfirst(key2);
590 
591  if (pathkey1 != pathkey2)
592  {
593  *n_common = n;
594  return false;
595  }
596  n++;
597  }
598 
599  /* If we ended with a null value, then we've processed the whole list. */
600  *n_common = n;
601  return (key1 == NULL);
602 }
603 
604 /*
605  * get_cheapest_path_for_pathkeys
606  * Find the cheapest path (according to the specified criterion) that
607  * satisfies the given pathkeys and parameterization, and is parallel-safe
608  * if required.
609  * Return NULL if no such path.
610  *
611  * 'paths' is a list of possible paths that all generate the same relation
612  * 'pathkeys' represents a required ordering (in canonical form!)
613  * 'required_outer' denotes allowable outer relations for parameterized paths
614  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
615  * 'require_parallel_safe' causes us to consider only parallel-safe paths
616  */
617 Path *
619  Relids required_outer,
620  CostSelector cost_criterion,
621  bool require_parallel_safe)
622 {
623  Path *matched_path = NULL;
624  ListCell *l;
625 
626  foreach(l, paths)
627  {
628  Path *path = (Path *) lfirst(l);
629 
630  /* If required, reject paths that are not parallel-safe */
631  if (require_parallel_safe && !path->parallel_safe)
632  continue;
633 
634  /*
635  * Since cost comparison is a lot cheaper than pathkey comparison, do
636  * that first. (XXX is that still true?)
637  */
638  if (matched_path != NULL &&
639  compare_path_costs(matched_path, path, cost_criterion) <= 0)
640  continue;
641 
642  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
643  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
644  matched_path = path;
645  }
646  return matched_path;
647 }
648 
649 /*
650  * get_cheapest_fractional_path_for_pathkeys
651  * Find the cheapest path (for retrieving a specified fraction of all
652  * the tuples) that satisfies the given pathkeys and parameterization.
653  * Return NULL if no such path.
654  *
655  * See compare_fractional_path_costs() for the interpretation of the fraction
656  * parameter.
657  *
658  * 'paths' is a list of possible paths that all generate the same relation
659  * 'pathkeys' represents a required ordering (in canonical form!)
660  * 'required_outer' denotes allowable outer relations for parameterized paths
661  * 'fraction' is the fraction of the total tuples expected to be retrieved
662  */
663 Path *
665  List *pathkeys,
666  Relids required_outer,
667  double fraction)
668 {
669  Path *matched_path = NULL;
670  ListCell *l;
671 
672  foreach(l, paths)
673  {
674  Path *path = (Path *) lfirst(l);
675 
676  /*
677  * Since cost comparison is a lot cheaper than pathkey comparison, do
678  * that first. (XXX is that still true?)
679  */
680  if (matched_path != NULL &&
681  compare_fractional_path_costs(matched_path, path, fraction) <= 0)
682  continue;
683 
684  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
685  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
686  matched_path = path;
687  }
688  return matched_path;
689 }
690 
691 
692 /*
693  * get_cheapest_parallel_safe_total_inner
694  * Find the unparameterized parallel-safe path with the least total cost.
695  */
696 Path *
698 {
699  ListCell *l;
700 
701  foreach(l, paths)
702  {
703  Path *innerpath = (Path *) lfirst(l);
704 
705  if (innerpath->parallel_safe &&
706  bms_is_empty(PATH_REQ_OUTER(innerpath)))
707  return innerpath;
708  }
709 
710  return NULL;
711 }
712 
713 /****************************************************************************
714  * NEW PATHKEY FORMATION
715  ****************************************************************************/
716 
717 /*
718  * build_index_pathkeys
719  * Build a pathkeys list that describes the ordering induced by an index
720  * scan using the given index. (Note that an unordered index doesn't
721  * induce any ordering, so we return NIL.)
722  *
723  * If 'scandir' is BackwardScanDirection, build pathkeys representing a
724  * backwards scan of the index.
725  *
726  * We iterate only key columns of covering indexes, since non-key columns
727  * don't influence index ordering. The result is canonical, meaning that
728  * redundant pathkeys are removed; it may therefore have fewer entries than
729  * there are key columns in the index.
730  *
731  * Another reason for stopping early is that we may be able to tell that
732  * an index column's sort order is uninteresting for this query. However,
733  * that test is just based on the existence of an EquivalenceClass and not
734  * on position in pathkey lists, so it's not complete. Caller should call
735  * truncate_useless_pathkeys() to possibly remove more pathkeys.
736  */
737 List *
740  ScanDirection scandir)
741 {
742  List *retval = NIL;
743  ListCell *lc;
744  int i;
745 
746  if (index->sortopfamily == NULL)
747  return NIL; /* non-orderable index */
748 
749  i = 0;
750  foreach(lc, index->indextlist)
751  {
752  TargetEntry *indextle = (TargetEntry *) lfirst(lc);
753  Expr *indexkey;
754  bool reverse_sort;
755  bool nulls_first;
756  PathKey *cpathkey;
757 
758  /*
759  * INCLUDE columns are stored in index unordered, so they don't
760  * support ordered index scan.
761  */
762  if (i >= index->nkeycolumns)
763  break;
764 
765  /* We assume we don't need to make a copy of the tlist item */
766  indexkey = indextle->expr;
767 
768  if (ScanDirectionIsBackward(scandir))
769  {
770  reverse_sort = !index->reverse_sort[i];
771  nulls_first = !index->nulls_first[i];
772  }
773  else
774  {
775  reverse_sort = index->reverse_sort[i];
776  nulls_first = index->nulls_first[i];
777  }
778 
779  /*
780  * OK, try to make a canonical pathkey for this sort key.
781  */
782  cpathkey = make_pathkey_from_sortinfo(root,
783  indexkey,
784  index->sortopfamily[i],
785  index->opcintype[i],
786  index->indexcollations[i],
787  reverse_sort,
788  nulls_first,
789  0,
790  index->rel->relids,
791  false);
792 
793  if (cpathkey)
794  {
795  /*
796  * We found the sort key in an EquivalenceClass, so it's relevant
797  * for this query. Add it to list, unless it's redundant.
798  */
799  if (!pathkey_is_redundant(cpathkey, retval))
800  retval = lappend(retval, cpathkey);
801  }
802  else
803  {
804  /*
805  * Boolean index keys might be redundant even if they do not
806  * appear in an EquivalenceClass, because of our special treatment
807  * of boolean equality conditions --- see the comment for
808  * indexcol_is_bool_constant_for_query(). If that applies, we can
809  * continue to examine lower-order index columns. Otherwise, the
810  * sort key is not an interesting sort order for this query, so we
811  * should stop considering index columns; any lower-order sort
812  * keys won't be useful either.
813  */
815  break;
816  }
817 
818  i++;
819  }
820 
821  return retval;
822 }
823 
824 /*
825  * partkey_is_bool_constant_for_query
826  *
827  * If a partition key column is constrained to have a constant value by the
828  * query's WHERE conditions, then it's irrelevant for sort-order
829  * considerations. Usually that means we have a restriction clause
830  * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
831  * containing a constant, which is recognized as redundant by
832  * build_partition_pathkeys(). But if the partition key column is a
833  * boolean variable (or expression), then we are not going to see such a
834  * WHERE clause, because expression preprocessing will have simplified it
835  * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
836  * to have a matching EquivalenceClass (unless the query also contains
837  * "ORDER BY partkeycol"). To allow such cases to work the same as they would
838  * for non-boolean values, this function is provided to detect whether the
839  * specified partition key column matches a boolean restriction clause.
840  */
841 static bool
843 {
844  PartitionScheme partscheme = partrel->part_scheme;
845  ListCell *lc;
846 
847  /*
848  * If the partkey isn't boolean, we can't possibly get a match.
849  *
850  * Partitioning currently can only use built-in AMs, so checking for
851  * built-in boolean opfamilies is good enough.
852  */
853  if (!IsBuiltinBooleanOpfamily(partscheme->partopfamily[partkeycol]))
854  return false;
855 
856  /* Check each restriction clause for the partitioned rel */
857  foreach(lc, partrel->baserestrictinfo)
858  {
859  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
860 
861  /* Ignore pseudoconstant quals, they won't match */
862  if (rinfo->pseudoconstant)
863  continue;
864 
865  /* See if we can match the clause's expression to the partkey column */
866  if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
867  return true;
868  }
869 
870  return false;
871 }
872 
873 /*
874  * matches_boolean_partition_clause
875  * Determine if the boolean clause described by rinfo matches
876  * partrel's partkeycol-th partition key column.
877  *
878  * "Matches" can be either an exact match (equivalent to partkey = true),
879  * or a NOT above an exact match (equivalent to partkey = false).
880  */
881 static bool
883  RelOptInfo *partrel, int partkeycol)
884 {
885  Node *clause = (Node *) rinfo->clause;
886  Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
887 
888  /* Direct match? */
889  if (equal(partexpr, clause))
890  return true;
891  /* NOT clause? */
892  else if (is_notclause(clause))
893  {
894  Node *arg = (Node *) get_notclausearg((Expr *) clause);
895 
896  if (equal(partexpr, arg))
897  return true;
898  }
899 
900  return false;
901 }
902 
903 /*
904  * build_partition_pathkeys
905  * Build a pathkeys list that describes the ordering induced by the
906  * partitions of partrel, under either forward or backward scan
907  * as per scandir.
908  *
909  * Caller must have checked that the partitions are properly ordered,
910  * as detected by partitions_are_ordered().
911  *
912  * Sets *partialkeys to true if pathkeys were only built for a prefix of the
913  * partition key, or false if the pathkeys include all columns of the
914  * partition key.
915  */
916 List *
918  ScanDirection scandir, bool *partialkeys)
919 {
920  List *retval = NIL;
921  PartitionScheme partscheme = partrel->part_scheme;
922  int i;
923 
924  Assert(partscheme != NULL);
925  Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
926  /* For now, we can only cope with baserels */
927  Assert(IS_SIMPLE_REL(partrel));
928 
929  for (i = 0; i < partscheme->partnatts; i++)
930  {
931  PathKey *cpathkey;
932  Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
933 
934  /*
935  * Try to make a canonical pathkey for this partkey.
936  *
937  * We assume the PartitionDesc lists any NULL partition last, so we
938  * treat the scan like a NULLS LAST index: we have nulls_first for
939  * backwards scan only.
940  */
941  cpathkey = make_pathkey_from_sortinfo(root,
942  keyCol,
943  partscheme->partopfamily[i],
944  partscheme->partopcintype[i],
945  partscheme->partcollation[i],
946  ScanDirectionIsBackward(scandir),
947  ScanDirectionIsBackward(scandir),
948  0,
949  partrel->relids,
950  false);
951 
952 
953  if (cpathkey)
954  {
955  /*
956  * We found the sort key in an EquivalenceClass, so it's relevant
957  * for this query. Add it to list, unless it's redundant.
958  */
959  if (!pathkey_is_redundant(cpathkey, retval))
960  retval = lappend(retval, cpathkey);
961  }
962  else
963  {
964  /*
965  * Boolean partition keys might be redundant even if they do not
966  * appear in an EquivalenceClass, because of our special treatment
967  * of boolean equality conditions --- see the comment for
968  * partkey_is_bool_constant_for_query(). If that applies, we can
969  * continue to examine lower-order partition keys. Otherwise, the
970  * sort key is not an interesting sort order for this query, so we
971  * should stop considering partition columns; any lower-order sort
972  * keys won't be useful either.
973  */
974  if (!partkey_is_bool_constant_for_query(partrel, i))
975  {
976  *partialkeys = true;
977  return retval;
978  }
979  }
980  }
981 
982  *partialkeys = false;
983  return retval;
984 }
985 
986 /*
987  * build_expression_pathkey
988  * Build a pathkeys list that describes an ordering by a single expression
989  * using the given sort operator.
990  *
991  * expr and rel are as for make_pathkey_from_sortinfo.
992  * We induce the other arguments assuming default sort order for the operator.
993  *
994  * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
995  * is false and the expression isn't already in some EquivalenceClass.
996  */
997 List *
999  Expr *expr,
1000  Oid opno,
1001  Relids rel,
1002  bool create_it)
1003 {
1004  List *pathkeys;
1005  Oid opfamily,
1006  opcintype;
1007  int16 strategy;
1008  PathKey *cpathkey;
1009 
1010  /* Find the operator in pg_amop --- failure shouldn't happen */
1011  if (!get_ordering_op_properties(opno,
1012  &opfamily, &opcintype, &strategy))
1013  elog(ERROR, "operator %u is not a valid ordering operator",
1014  opno);
1015 
1016  cpathkey = make_pathkey_from_sortinfo(root,
1017  expr,
1018  opfamily,
1019  opcintype,
1020  exprCollation((Node *) expr),
1021  (strategy == BTGreaterStrategyNumber),
1022  (strategy == BTGreaterStrategyNumber),
1023  0,
1024  rel,
1025  create_it);
1026 
1027  if (cpathkey)
1028  pathkeys = list_make1(cpathkey);
1029  else
1030  pathkeys = NIL;
1031 
1032  return pathkeys;
1033 }
1034 
1035 /*
1036  * convert_subquery_pathkeys
1037  * Build a pathkeys list that describes the ordering of a subquery's
1038  * result, in the terms of the outer query. This is essentially a
1039  * task of conversion.
1040  *
1041  * 'rel': outer query's RelOptInfo for the subquery relation.
1042  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
1043  * 'subquery_tlist': the subquery's output targetlist, in its terms.
1044  *
1045  * We intentionally don't do truncate_useless_pathkeys() here, because there
1046  * are situations where seeing the raw ordering of the subquery is helpful.
1047  * For example, if it returns ORDER BY x DESC, that may prompt us to
1048  * construct a mergejoin using DESC order rather than ASC order; but the
1049  * right_merge_direction heuristic would have us throw the knowledge away.
1050  */
1051 List *
1053  List *subquery_pathkeys,
1054  List *subquery_tlist)
1055 {
1056  List *retval = NIL;
1057  int retvallen = 0;
1058  int outer_query_keys = list_length(root->query_pathkeys);
1059  ListCell *i;
1060 
1061  foreach(i, subquery_pathkeys)
1062  {
1063  PathKey *sub_pathkey = (PathKey *) lfirst(i);
1064  EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
1065  PathKey *best_pathkey = NULL;
1066 
1067  if (sub_eclass->ec_has_volatile)
1068  {
1069  /*
1070  * If the sub_pathkey's EquivalenceClass is volatile, then it must
1071  * have come from an ORDER BY clause, and we have to match it to
1072  * that same targetlist entry.
1073  */
1074  TargetEntry *tle;
1075  Var *outer_var;
1076 
1077  if (sub_eclass->ec_sortref == 0) /* can't happen */
1078  elog(ERROR, "volatile EquivalenceClass has no sortref");
1079  tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
1080  Assert(tle);
1081  /* Is TLE actually available to the outer query? */
1082  outer_var = find_var_for_subquery_tle(rel, tle);
1083  if (outer_var)
1084  {
1085  /* We can represent this sub_pathkey */
1086  EquivalenceMember *sub_member;
1087  EquivalenceClass *outer_ec;
1088 
1089  Assert(list_length(sub_eclass->ec_members) == 1);
1090  sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
1091 
1092  /*
1093  * Note: it might look funny to be setting sortref = 0 for a
1094  * reference to a volatile sub_eclass. However, the
1095  * expression is *not* volatile in the outer query: it's just
1096  * a Var referencing whatever the subquery emitted. (IOW, the
1097  * outer query isn't going to re-execute the volatile
1098  * expression itself.) So this is okay.
1099  */
1100  outer_ec =
1102  (Expr *) outer_var,
1103  sub_eclass->ec_opfamilies,
1104  sub_member->em_datatype,
1105  sub_eclass->ec_collation,
1106  0,
1107  rel->relids,
1108  false);
1109 
1110  /*
1111  * If we don't find a matching EC, sub-pathkey isn't
1112  * interesting to the outer query
1113  */
1114  if (outer_ec)
1115  best_pathkey =
1117  outer_ec,
1118  sub_pathkey->pk_opfamily,
1119  sub_pathkey->pk_strategy,
1120  sub_pathkey->pk_nulls_first);
1121  }
1122  }
1123  else
1124  {
1125  /*
1126  * Otherwise, the sub_pathkey's EquivalenceClass could contain
1127  * multiple elements (representing knowledge that multiple items
1128  * are effectively equal). Each element might match none, one, or
1129  * more of the output columns that are visible to the outer query.
1130  * This means we may have multiple possible representations of the
1131  * sub_pathkey in the context of the outer query. Ideally we
1132  * would generate them all and put them all into an EC of the
1133  * outer query, thereby propagating equality knowledge up to the
1134  * outer query. Right now we cannot do so, because the outer
1135  * query's EquivalenceClasses are already frozen when this is
1136  * called. Instead we prefer the one that has the highest "score"
1137  * (number of EC peers, plus one if it matches the outer
1138  * query_pathkeys). This is the most likely to be useful in the
1139  * outer query.
1140  */
1141  int best_score = -1;
1142  ListCell *j;
1143 
1144  foreach(j, sub_eclass->ec_members)
1145  {
1146  EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
1147  Expr *sub_expr = sub_member->em_expr;
1148  Oid sub_expr_type = sub_member->em_datatype;
1149  Oid sub_expr_coll = sub_eclass->ec_collation;
1150  ListCell *k;
1151 
1152  if (sub_member->em_is_child)
1153  continue; /* ignore children here */
1154 
1155  foreach(k, subquery_tlist)
1156  {
1157  TargetEntry *tle = (TargetEntry *) lfirst(k);
1158  Var *outer_var;
1159  Expr *tle_expr;
1160  EquivalenceClass *outer_ec;
1161  PathKey *outer_pk;
1162  int score;
1163 
1164  /* Is TLE actually available to the outer query? */
1165  outer_var = find_var_for_subquery_tle(rel, tle);
1166  if (!outer_var)
1167  continue;
1168 
1169  /*
1170  * The targetlist entry is considered to match if it
1171  * matches after sort-key canonicalization. That is
1172  * needed since the sub_expr has been through the same
1173  * process.
1174  */
1175  tle_expr = canonicalize_ec_expression(tle->expr,
1176  sub_expr_type,
1177  sub_expr_coll);
1178  if (!equal(tle_expr, sub_expr))
1179  continue;
1180 
1181  /* See if we have a matching EC for the TLE */
1182  outer_ec = get_eclass_for_sort_expr(root,
1183  (Expr *) outer_var,
1184  sub_eclass->ec_opfamilies,
1185  sub_expr_type,
1186  sub_expr_coll,
1187  0,
1188  rel->relids,
1189  false);
1190 
1191  /*
1192  * If we don't find a matching EC, this sub-pathkey isn't
1193  * interesting to the outer query
1194  */
1195  if (!outer_ec)
1196  continue;
1197 
1198  outer_pk = make_canonical_pathkey(root,
1199  outer_ec,
1200  sub_pathkey->pk_opfamily,
1201  sub_pathkey->pk_strategy,
1202  sub_pathkey->pk_nulls_first);
1203  /* score = # of equivalence peers */
1204  score = list_length(outer_ec->ec_members) - 1;
1205  /* +1 if it matches the proper query_pathkeys item */
1206  if (retvallen < outer_query_keys &&
1207  list_nth(root->query_pathkeys, retvallen) == outer_pk)
1208  score++;
1209  if (score > best_score)
1210  {
1211  best_pathkey = outer_pk;
1212  best_score = score;
1213  }
1214  }
1215  }
1216  }
1217 
1218  /*
1219  * If we couldn't find a representation of this sub_pathkey, we're
1220  * done (we can't use the ones to its right, either).
1221  */
1222  if (!best_pathkey)
1223  break;
1224 
1225  /*
1226  * Eliminate redundant ordering info; could happen if outer query
1227  * equivalences subquery keys...
1228  */
1229  if (!pathkey_is_redundant(best_pathkey, retval))
1230  {
1231  retval = lappend(retval, best_pathkey);
1232  retvallen++;
1233  }
1234  }
1235 
1236  return retval;
1237 }
1238 
1239 /*
1240  * find_var_for_subquery_tle
1241  *
1242  * If the given subquery tlist entry is due to be emitted by the subquery's
1243  * scan node, return a Var for it, else return NULL.
1244  *
1245  * We need this to ensure that we don't return pathkeys describing values
1246  * that are unavailable above the level of the subquery scan.
1247  */
1248 static Var *
1250 {
1251  ListCell *lc;
1252 
1253  /* If the TLE is resjunk, it's certainly not visible to the outer query */
1254  if (tle->resjunk)
1255  return NULL;
1256 
1257  /* Search the rel's targetlist to see what it will return */
1258  foreach(lc, rel->reltarget->exprs)
1259  {
1260  Var *var = (Var *) lfirst(lc);
1261 
1262  /* Ignore placeholders */
1263  if (!IsA(var, Var))
1264  continue;
1265  Assert(var->varno == rel->relid);
1266 
1267  /* If we find a Var referencing this TLE, we're good */
1268  if (var->varattno == tle->resno)
1269  return copyObject(var); /* Make a copy for safety */
1270  }
1271  return NULL;
1272 }
1273 
1274 /*
1275  * build_join_pathkeys
1276  * Build the path keys for a join relation constructed by mergejoin or
1277  * nestloop join. This is normally the same as the outer path's keys.
1278  *
1279  * EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
1280  * result as having the outer path's path keys, because null lefthand rows
1281  * may be inserted at random points. It must be treated as unsorted.
1282  *
1283  * We truncate away any pathkeys that are uninteresting for higher joins.
1284  *
1285  * 'joinrel' is the join relation that paths are being formed for
1286  * 'jointype' is the join type (inner, left, full, etc)
1287  * 'outer_pathkeys' is the list of the current outer path's path keys
1288  *
1289  * Returns the list of new path keys.
1290  */
1291 List *
1293  RelOptInfo *joinrel,
1294  JoinType jointype,
1295  List *outer_pathkeys)
1296 {
1297  if (jointype == JOIN_FULL ||
1298  jointype == JOIN_RIGHT ||
1299  jointype == JOIN_RIGHT_ANTI)
1300  return NIL;
1301 
1302  /*
1303  * This used to be quite a complex bit of code, but now that all pathkey
1304  * sublists start out life canonicalized, we don't have to do a darn thing
1305  * here!
1306  *
1307  * We do, however, need to truncate the pathkeys list, since it may
1308  * contain pathkeys that were useful for forming this joinrel but are
1309  * uninteresting to higher levels.
1310  */
1311  return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
1312 }
1313 
1314 /****************************************************************************
1315  * PATHKEYS AND SORT CLAUSES
1316  ****************************************************************************/
1317 
1318 /*
1319  * make_pathkeys_for_sortclauses
1320  * Generate a pathkeys list that represents the sort order specified
1321  * by a list of SortGroupClauses
1322  *
1323  * The resulting PathKeys are always in canonical form. (Actually, there
1324  * is no longer any code anywhere that creates non-canonical PathKeys.)
1325  *
1326  * 'sortclauses' is a list of SortGroupClause nodes
1327  * 'tlist' is the targetlist to find the referenced tlist entries in
1328  */
1329 List *
1331  List *sortclauses,
1332  List *tlist)
1333 {
1334  List *result;
1335  bool sortable;
1336 
1338  &sortclauses,
1339  tlist,
1340  false,
1341  &sortable,
1342  false);
1343  /* It's caller error if not all clauses were sortable */
1344  Assert(sortable);
1345  return result;
1346 }
1347 
1348 /*
1349  * make_pathkeys_for_sortclauses_extended
1350  * Generate a pathkeys list that represents the sort order specified
1351  * by a list of SortGroupClauses
1352  *
1353  * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
1354  *
1355  * If remove_redundant is true, then any sort clauses that are found to
1356  * give rise to redundant pathkeys are removed from the sortclauses list
1357  * (which therefore must be pass-by-reference in this version).
1358  *
1359  * *sortable is set to true if all the sort clauses are in fact sortable.
1360  * If any are not, they are ignored except for setting *sortable false.
1361  * (In that case, the output pathkey list isn't really useful. However,
1362  * we process the whole sortclauses list anyway, because it's still valid
1363  * to remove any clauses that can be proven redundant via the eclass logic.
1364  * Even though we'll have to hash in that case, we might as well not hash
1365  * redundant columns.)
1366  *
1367  * If set_ec_sortref is true then sets the value of the pathkey's
1368  * EquivalenceClass unless it's already initialized.
1369  */
1370 List *
1372  List **sortclauses,
1373  List *tlist,
1374  bool remove_redundant,
1375  bool *sortable,
1376  bool set_ec_sortref)
1377 {
1378  List *pathkeys = NIL;
1379  ListCell *l;
1380 
1381  *sortable = true;
1382  foreach(l, *sortclauses)
1383  {
1384  SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
1385  Expr *sortkey;
1386  PathKey *pathkey;
1387 
1388  sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1389  if (!OidIsValid(sortcl->sortop))
1390  {
1391  *sortable = false;
1392  continue;
1393  }
1394  pathkey = make_pathkey_from_sortop(root,
1395  sortkey,
1396  sortcl->sortop,
1397  sortcl->nulls_first,
1398  sortcl->tleSortGroupRef,
1399  true);
1400  if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
1401  {
1402  /*
1403  * Copy the sortref if it hasn't been set yet. That may happen if
1404  * the EquivalenceClass was constructed from a WHERE clause, i.e.
1405  * it doesn't have a target reference at all.
1406  */
1407  pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
1408  }
1409 
1410  /* Canonical form eliminates redundant ordering keys */
1411  if (!pathkey_is_redundant(pathkey, pathkeys))
1412  pathkeys = lappend(pathkeys, pathkey);
1413  else if (remove_redundant)
1414  *sortclauses = foreach_delete_current(*sortclauses, l);
1415  }
1416  return pathkeys;
1417 }
1418 
1419 /****************************************************************************
1420  * PATHKEYS AND MERGECLAUSES
1421  ****************************************************************************/
1422 
1423 /*
1424  * initialize_mergeclause_eclasses
1425  * Set the EquivalenceClass links in a mergeclause restrictinfo.
1426  *
1427  * RestrictInfo contains fields in which we may cache pointers to
1428  * EquivalenceClasses for the left and right inputs of the mergeclause.
1429  * (If the mergeclause is a true equivalence clause these will be the
1430  * same EquivalenceClass, otherwise not.) If the mergeclause is either
1431  * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
1432  * then it's easy to set up the left_ec and right_ec members --- otherwise,
1433  * this function should be called to set them up. We will generate new
1434  * EquivalenceClauses if necessary to represent the mergeclause's left and
1435  * right sides.
1436  *
1437  * Note this is called before EC merging is complete, so the links won't
1438  * necessarily point to canonical ECs. Before they are actually used for
1439  * anything, update_mergeclause_eclasses must be called to ensure that
1440  * they've been updated to point to canonical ECs.
1441  */
1442 void
1444 {
1445  Expr *clause = restrictinfo->clause;
1446  Oid lefttype,
1447  righttype;
1448 
1449  /* Should be a mergeclause ... */
1450  Assert(restrictinfo->mergeopfamilies != NIL);
1451  /* ... with links not yet set */
1452  Assert(restrictinfo->left_ec == NULL);
1453  Assert(restrictinfo->right_ec == NULL);
1454 
1455  /* Need the declared input types of the operator */
1456  op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
1457 
1458  /* Find or create a matching EquivalenceClass for each side */
1459  restrictinfo->left_ec =
1461  (Expr *) get_leftop(clause),
1462  restrictinfo->mergeopfamilies,
1463  lefttype,
1464  ((OpExpr *) clause)->inputcollid,
1465  0,
1466  NULL,
1467  true);
1468  restrictinfo->right_ec =
1470  (Expr *) get_rightop(clause),
1471  restrictinfo->mergeopfamilies,
1472  righttype,
1473  ((OpExpr *) clause)->inputcollid,
1474  0,
1475  NULL,
1476  true);
1477 }
1478 
1479 /*
1480  * update_mergeclause_eclasses
1481  * Make the cached EquivalenceClass links valid in a mergeclause
1482  * restrictinfo.
1483  *
1484  * These pointers should have been set by process_equivalence or
1485  * initialize_mergeclause_eclasses, but they might have been set to
1486  * non-canonical ECs that got merged later. Chase up to the canonical
1487  * merged parent if so.
1488  */
1489 void
1491 {
1492  /* Should be a merge clause ... */
1493  Assert(restrictinfo->mergeopfamilies != NIL);
1494  /* ... with pointers already set */
1495  Assert(restrictinfo->left_ec != NULL);
1496  Assert(restrictinfo->right_ec != NULL);
1497 
1498  /* Chase up to the top as needed */
1499  while (restrictinfo->left_ec->ec_merged)
1500  restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
1501  while (restrictinfo->right_ec->ec_merged)
1502  restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
1503 }
1504 
1505 /*
1506  * find_mergeclauses_for_outer_pathkeys
1507  * This routine attempts to find a list of mergeclauses that can be
1508  * used with a specified ordering for the join's outer relation.
1509  * If successful, it returns a list of mergeclauses.
1510  *
1511  * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
1512  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1513  * join relation being formed, in no particular order.
1514  *
1515  * The restrictinfos must be marked (via outer_is_left) to show which side
1516  * of each clause is associated with the current outer path. (See
1517  * select_mergejoin_clauses())
1518  *
1519  * The result is NIL if no merge can be done, else a maximal list of
1520  * usable mergeclauses (represented as a list of their restrictinfo nodes).
1521  * The list is ordered to match the pathkeys, as required for execution.
1522  */
1523 List *
1525  List *pathkeys,
1526  List *restrictinfos)
1527 {
1528  List *mergeclauses = NIL;
1529  ListCell *i;
1530 
1531  /* make sure we have eclasses cached in the clauses */
1532  foreach(i, restrictinfos)
1533  {
1534  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1535 
1537  }
1538 
1539  foreach(i, pathkeys)
1540  {
1541  PathKey *pathkey = (PathKey *) lfirst(i);
1542  EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1543  List *matched_restrictinfos = NIL;
1544  ListCell *j;
1545 
1546  /*----------
1547  * A mergejoin clause matches a pathkey if it has the same EC.
1548  * If there are multiple matching clauses, take them all. In plain
1549  * inner-join scenarios we expect only one match, because
1550  * equivalence-class processing will have removed any redundant
1551  * mergeclauses. However, in outer-join scenarios there might be
1552  * multiple matches. An example is
1553  *
1554  * select * from a full join b
1555  * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1556  *
1557  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1558  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1559  * we *must* do so or we will be unable to form a valid plan.
1560  *
1561  * We expect that the given pathkeys list is canonical, which means
1562  * no two members have the same EC, so it's not possible for this
1563  * code to enter the same mergeclause into the result list twice.
1564  *
1565  * It's possible that multiple matching clauses might have different
1566  * ECs on the other side, in which case the order we put them into our
1567  * result makes a difference in the pathkeys required for the inner
1568  * input rel. However this routine hasn't got any info about which
1569  * order would be best, so we don't worry about that.
1570  *
1571  * It's also possible that the selected mergejoin clauses produce
1572  * a noncanonical ordering of pathkeys for the inner side, ie, we
1573  * might select clauses that reference b.v1, b.v2, b.v1 in that
1574  * order. This is not harmful in itself, though it suggests that
1575  * the clauses are partially redundant. Since the alternative is
1576  * to omit mergejoin clauses and thereby possibly fail to generate a
1577  * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1578  * has to delete duplicates when it constructs the inner pathkeys
1579  * list, and we also have to deal with such cases specially in
1580  * create_mergejoin_plan().
1581  *----------
1582  */
1583  foreach(j, restrictinfos)
1584  {
1585  RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1586  EquivalenceClass *clause_ec;
1587 
1588  clause_ec = rinfo->outer_is_left ?
1589  rinfo->left_ec : rinfo->right_ec;
1590  if (clause_ec == pathkey_ec)
1591  matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1592  }
1593 
1594  /*
1595  * If we didn't find a mergeclause, we're done --- any additional
1596  * sort-key positions in the pathkeys are useless. (But we can still
1597  * mergejoin if we found at least one mergeclause.)
1598  */
1599  if (matched_restrictinfos == NIL)
1600  break;
1601 
1602  /*
1603  * If we did find usable mergeclause(s) for this sort-key position,
1604  * add them to result list.
1605  */
1606  mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1607  }
1608 
1609  return mergeclauses;
1610 }
1611 
1612 /*
1613  * select_outer_pathkeys_for_merge
1614  * Builds a pathkey list representing a possible sort ordering
1615  * that can be used with the given mergeclauses.
1616  *
1617  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1618  * that will be used in a merge join.
1619  * 'joinrel' is the join relation we are trying to construct.
1620  *
1621  * The restrictinfos must be marked (via outer_is_left) to show which side
1622  * of each clause is associated with the current outer path. (See
1623  * select_mergejoin_clauses())
1624  *
1625  * Returns a pathkeys list that can be applied to the outer relation.
1626  *
1627  * Since we assume here that a sort is required, there is no particular use
1628  * in matching any available ordering of the outerrel. (joinpath.c has an
1629  * entirely separate code path for considering sort-free mergejoins.) Rather,
1630  * it's interesting to try to match, or match a prefix of the requested
1631  * query_pathkeys so that a second output sort may be avoided or an
1632  * incremental sort may be done instead. We can get away with just a prefix
1633  * of the query_pathkeys when that prefix covers the entire join condition.
1634  * Failing that, we try to list "more popular" keys (those with the most
1635  * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting
1636  * ordering useful for as many higher-level mergejoins as possible.
1637  */
1638 List *
1640  List *mergeclauses,
1641  RelOptInfo *joinrel)
1642 {
1643  List *pathkeys = NIL;
1644  int nClauses = list_length(mergeclauses);
1645  EquivalenceClass **ecs;
1646  int *scores;
1647  int necs;
1648  ListCell *lc;
1649  int j;
1650 
1651  /* Might have no mergeclauses */
1652  if (nClauses == 0)
1653  return NIL;
1654 
1655  /*
1656  * Make arrays of the ECs used by the mergeclauses (dropping any
1657  * duplicates) and their "popularity" scores.
1658  */
1659  ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1660  scores = (int *) palloc(nClauses * sizeof(int));
1661  necs = 0;
1662 
1663  foreach(lc, mergeclauses)
1664  {
1665  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1666  EquivalenceClass *oeclass;
1667  int score;
1668  ListCell *lc2;
1669 
1670  /* get the outer eclass */
1672 
1673  if (rinfo->outer_is_left)
1674  oeclass = rinfo->left_ec;
1675  else
1676  oeclass = rinfo->right_ec;
1677 
1678  /* reject duplicates */
1679  for (j = 0; j < necs; j++)
1680  {
1681  if (ecs[j] == oeclass)
1682  break;
1683  }
1684  if (j < necs)
1685  continue;
1686 
1687  /* compute score */
1688  score = 0;
1689  foreach(lc2, oeclass->ec_members)
1690  {
1692 
1693  /* Potential future join partner? */
1694  if (!em->em_is_const && !em->em_is_child &&
1695  !bms_overlap(em->em_relids, joinrel->relids))
1696  score++;
1697  }
1698 
1699  ecs[necs] = oeclass;
1700  scores[necs] = score;
1701  necs++;
1702  }
1703 
1704  /*
1705  * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1706  * can generate a sort order that's also useful for final output. If we
1707  * only have a prefix of the query_pathkeys, and that prefix is the entire
1708  * join condition, then it's useful to use the prefix as the pathkeys as
1709  * this increases the chances that an incremental sort will be able to be
1710  * used by the upper planner.
1711  */
1712  if (root->query_pathkeys)
1713  {
1714  int matches = 0;
1715 
1716  foreach(lc, root->query_pathkeys)
1717  {
1718  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1719  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1720 
1721  for (j = 0; j < necs; j++)
1722  {
1723  if (ecs[j] == query_ec)
1724  break; /* found match */
1725  }
1726  if (j >= necs)
1727  break; /* didn't find match */
1728 
1729  matches++;
1730  }
1731  /* if we got to the end of the list, we have them all */
1732  if (lc == NULL)
1733  {
1734  /* copy query_pathkeys as starting point for our output */
1735  pathkeys = list_copy(root->query_pathkeys);
1736  /* mark their ECs as already-emitted */
1737  foreach(lc, root->query_pathkeys)
1738  {
1739  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1740  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1741 
1742  for (j = 0; j < necs; j++)
1743  {
1744  if (ecs[j] == query_ec)
1745  {
1746  scores[j] = -1;
1747  break;
1748  }
1749  }
1750  }
1751  }
1752 
1753  /*
1754  * If we didn't match to all of the query_pathkeys, but did match to
1755  * all of the join clauses then we'll make use of these as partially
1756  * sorted input is better than nothing for the upper planner as it may
1757  * lead to incremental sorts instead of full sorts.
1758  */
1759  else if (matches == nClauses)
1760  {
1761  pathkeys = list_copy_head(root->query_pathkeys, matches);
1762 
1763  /* we have all of the join pathkeys, so nothing more to do */
1764  pfree(ecs);
1765  pfree(scores);
1766 
1767  return pathkeys;
1768  }
1769  }
1770 
1771  /*
1772  * Add remaining ECs to the list in popularity order, using a default sort
1773  * ordering. (We could use qsort() here, but the list length is usually
1774  * so small it's not worth it.)
1775  */
1776  for (;;)
1777  {
1778  int best_j;
1779  int best_score;
1780  EquivalenceClass *ec;
1781  PathKey *pathkey;
1782 
1783  best_j = 0;
1784  best_score = scores[0];
1785  for (j = 1; j < necs; j++)
1786  {
1787  if (scores[j] > best_score)
1788  {
1789  best_j = j;
1790  best_score = scores[j];
1791  }
1792  }
1793  if (best_score < 0)
1794  break; /* all done */
1795  ec = ecs[best_j];
1796  scores[best_j] = -1;
1797  pathkey = make_canonical_pathkey(root,
1798  ec,
1801  false);
1802  /* can't be redundant because no duplicate ECs */
1803  Assert(!pathkey_is_redundant(pathkey, pathkeys));
1804  pathkeys = lappend(pathkeys, pathkey);
1805  }
1806 
1807  pfree(ecs);
1808  pfree(scores);
1809 
1810  return pathkeys;
1811 }
1812 
1813 /*
1814  * make_inner_pathkeys_for_merge
1815  * Builds a pathkey list representing the explicit sort order that
1816  * must be applied to an inner path to make it usable with the
1817  * given mergeclauses.
1818  *
1819  * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1820  * that will be used in a merge join, in order.
1821  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1822  * side of the join.
1823  *
1824  * The restrictinfos must be marked (via outer_is_left) to show which side
1825  * of each clause is associated with the current outer path. (See
1826  * select_mergejoin_clauses())
1827  *
1828  * Returns a pathkeys list that can be applied to the inner relation.
1829  *
1830  * Note that it is not this routine's job to decide whether sorting is
1831  * actually needed for a particular input path. Assume a sort is necessary;
1832  * just make the keys, eh?
1833  */
1834 List *
1836  List *mergeclauses,
1837  List *outer_pathkeys)
1838 {
1839  List *pathkeys = NIL;
1840  EquivalenceClass *lastoeclass;
1841  PathKey *opathkey;
1842  ListCell *lc;
1843  ListCell *lop;
1844 
1845  lastoeclass = NULL;
1846  opathkey = NULL;
1847  lop = list_head(outer_pathkeys);
1848 
1849  foreach(lc, mergeclauses)
1850  {
1851  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1852  EquivalenceClass *oeclass;
1853  EquivalenceClass *ieclass;
1854  PathKey *pathkey;
1855 
1857 
1858  if (rinfo->outer_is_left)
1859  {
1860  oeclass = rinfo->left_ec;
1861  ieclass = rinfo->right_ec;
1862  }
1863  else
1864  {
1865  oeclass = rinfo->right_ec;
1866  ieclass = rinfo->left_ec;
1867  }
1868 
1869  /* outer eclass should match current or next pathkeys */
1870  /* we check this carefully for debugging reasons */
1871  if (oeclass != lastoeclass)
1872  {
1873  if (!lop)
1874  elog(ERROR, "too few pathkeys for mergeclauses");
1875  opathkey = (PathKey *) lfirst(lop);
1876  lop = lnext(outer_pathkeys, lop);
1877  lastoeclass = opathkey->pk_eclass;
1878  if (oeclass != lastoeclass)
1879  elog(ERROR, "outer pathkeys do not match mergeclause");
1880  }
1881 
1882  /*
1883  * Often, we'll have same EC on both sides, in which case the outer
1884  * pathkey is also canonical for the inner side, and we can skip a
1885  * useless search.
1886  */
1887  if (ieclass == oeclass)
1888  pathkey = opathkey;
1889  else
1890  pathkey = make_canonical_pathkey(root,
1891  ieclass,
1892  opathkey->pk_opfamily,
1893  opathkey->pk_strategy,
1894  opathkey->pk_nulls_first);
1895 
1896  /*
1897  * Don't generate redundant pathkeys (which can happen if multiple
1898  * mergeclauses refer to the same EC). Because we do this, the output
1899  * pathkey list isn't necessarily ordered like the mergeclauses, which
1900  * complicates life for create_mergejoin_plan(). But if we didn't,
1901  * we'd have a noncanonical sort key list, which would be bad; for one
1902  * reason, it certainly wouldn't match any available sort order for
1903  * the input relation.
1904  */
1905  if (!pathkey_is_redundant(pathkey, pathkeys))
1906  pathkeys = lappend(pathkeys, pathkey);
1907  }
1908 
1909  return pathkeys;
1910 }
1911 
1912 /*
1913  * trim_mergeclauses_for_inner_pathkeys
1914  * This routine trims a list of mergeclauses to include just those that
1915  * work with a specified ordering for the join's inner relation.
1916  *
1917  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1918  * join relation being formed, in an order known to work for the
1919  * currently-considered sort ordering of the join's outer rel.
1920  * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1921  * it should be equal to, or a truncation of, the result of
1922  * make_inner_pathkeys_for_merge for these mergeclauses.
1923  *
1924  * What we return will be a prefix of the given mergeclauses list.
1925  *
1926  * We need this logic because make_inner_pathkeys_for_merge's result isn't
1927  * necessarily in the same order as the mergeclauses. That means that if we
1928  * consider an inner-rel pathkey list that is a truncation of that result,
1929  * we might need to drop mergeclauses even though they match a surviving inner
1930  * pathkey. This happens when they are to the right of a mergeclause that
1931  * matches a removed inner pathkey.
1932  *
1933  * The mergeclauses must be marked (via outer_is_left) to show which side
1934  * of each clause is associated with the current outer path. (See
1935  * select_mergejoin_clauses())
1936  */
1937 List *
1939  List *mergeclauses,
1940  List *pathkeys)
1941 {
1942  List *new_mergeclauses = NIL;
1943  PathKey *pathkey;
1944  EquivalenceClass *pathkey_ec;
1945  bool matched_pathkey;
1946  ListCell *lip;
1947  ListCell *i;
1948 
1949  /* No pathkeys => no mergeclauses (though we don't expect this case) */
1950  if (pathkeys == NIL)
1951  return NIL;
1952  /* Initialize to consider first pathkey */
1953  lip = list_head(pathkeys);
1954  pathkey = (PathKey *) lfirst(lip);
1955  pathkey_ec = pathkey->pk_eclass;
1956  lip = lnext(pathkeys, lip);
1957  matched_pathkey = false;
1958 
1959  /* Scan mergeclauses to see how many we can use */
1960  foreach(i, mergeclauses)
1961  {
1962  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1963  EquivalenceClass *clause_ec;
1964 
1965  /* Assume we needn't do update_mergeclause_eclasses again here */
1966 
1967  /* Check clause's inner-rel EC against current pathkey */
1968  clause_ec = rinfo->outer_is_left ?
1969  rinfo->right_ec : rinfo->left_ec;
1970 
1971  /* If we don't have a match, attempt to advance to next pathkey */
1972  if (clause_ec != pathkey_ec)
1973  {
1974  /* If we had no clauses matching this inner pathkey, must stop */
1975  if (!matched_pathkey)
1976  break;
1977 
1978  /* Advance to next inner pathkey, if any */
1979  if (lip == NULL)
1980  break;
1981  pathkey = (PathKey *) lfirst(lip);
1982  pathkey_ec = pathkey->pk_eclass;
1983  lip = lnext(pathkeys, lip);
1984  matched_pathkey = false;
1985  }
1986 
1987  /* If mergeclause matches current inner pathkey, we can use it */
1988  if (clause_ec == pathkey_ec)
1989  {
1990  new_mergeclauses = lappend(new_mergeclauses, rinfo);
1991  matched_pathkey = true;
1992  }
1993  else
1994  {
1995  /* Else, no hope of adding any more mergeclauses */
1996  break;
1997  }
1998  }
1999 
2000  return new_mergeclauses;
2001 }
2002 
2003 
2004 /****************************************************************************
2005  * PATHKEY USEFULNESS CHECKS
2006  *
2007  * We only want to remember as many of the pathkeys of a path as have some
2008  * potential use, either for subsequent mergejoins or for meeting the query's
2009  * requested output ordering. This ensures that add_path() won't consider
2010  * a path to have a usefully different ordering unless it really is useful.
2011  * These routines check for usefulness of given pathkeys.
2012  ****************************************************************************/
2013 
2014 /*
2015  * pathkeys_useful_for_merging
2016  * Count the number of pathkeys that may be useful for mergejoins
2017  * above the given relation.
2018  *
2019  * We consider a pathkey potentially useful if it corresponds to the merge
2020  * ordering of either side of any joinclause for the rel. This might be
2021  * overoptimistic, since joinclauses that require different other relations
2022  * might never be usable at the same time, but trying to be exact is likely
2023  * to be more trouble than it's worth.
2024  *
2025  * To avoid doubling the number of mergejoin paths considered, we would like
2026  * to consider only one of the two scan directions (ASC or DESC) as useful
2027  * for merging for any given target column. The choice is arbitrary unless
2028  * one of the directions happens to match an ORDER BY key, in which case
2029  * that direction should be preferred, in hopes of avoiding a final sort step.
2030  * right_merge_direction() implements this heuristic.
2031  */
2032 static int
2034 {
2035  int useful = 0;
2036  ListCell *i;
2037 
2038  foreach(i, pathkeys)
2039  {
2040  PathKey *pathkey = (PathKey *) lfirst(i);
2041  bool matched = false;
2042  ListCell *j;
2043 
2044  /* If "wrong" direction, not useful for merging */
2045  if (!right_merge_direction(root, pathkey))
2046  break;
2047 
2048  /*
2049  * First look into the EquivalenceClass of the pathkey, to see if
2050  * there are any members not yet joined to the rel. If so, it's
2051  * surely possible to generate a mergejoin clause using them.
2052  */
2053  if (rel->has_eclass_joins &&
2054  eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
2055  matched = true;
2056  else
2057  {
2058  /*
2059  * Otherwise search the rel's joininfo list, which contains
2060  * non-EquivalenceClass-derivable join clauses that might
2061  * nonetheless be mergejoinable.
2062  */
2063  foreach(j, rel->joininfo)
2064  {
2065  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
2066 
2067  if (restrictinfo->mergeopfamilies == NIL)
2068  continue;
2069  update_mergeclause_eclasses(root, restrictinfo);
2070 
2071  if (pathkey->pk_eclass == restrictinfo->left_ec ||
2072  pathkey->pk_eclass == restrictinfo->right_ec)
2073  {
2074  matched = true;
2075  break;
2076  }
2077  }
2078  }
2079 
2080  /*
2081  * If we didn't find a mergeclause, we're done --- any additional
2082  * sort-key positions in the pathkeys are useless. (But we can still
2083  * mergejoin if we found at least one mergeclause.)
2084  */
2085  if (matched)
2086  useful++;
2087  else
2088  break;
2089  }
2090 
2091  return useful;
2092 }
2093 
2094 /*
2095  * right_merge_direction
2096  * Check whether the pathkey embodies the preferred sort direction
2097  * for merging its target column.
2098  */
2099 static bool
2101 {
2102  ListCell *l;
2103 
2104  foreach(l, root->query_pathkeys)
2105  {
2106  PathKey *query_pathkey = (PathKey *) lfirst(l);
2107 
2108  if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
2109  pathkey->pk_opfamily == query_pathkey->pk_opfamily)
2110  {
2111  /*
2112  * Found a matching query sort column. Prefer this pathkey's
2113  * direction iff it matches. Note that we ignore pk_nulls_first,
2114  * which means that a sort might be needed anyway ... but we still
2115  * want to prefer only one of the two possible directions, and we
2116  * might as well use this one.
2117  */
2118  return (pathkey->pk_strategy == query_pathkey->pk_strategy);
2119  }
2120  }
2121 
2122  /* If no matching ORDER BY request, prefer the ASC direction */
2123  return (pathkey->pk_strategy == BTLessStrategyNumber);
2124 }
2125 
2126 /*
2127  * pathkeys_useful_for_ordering
2128  * Count the number of pathkeys that are useful for meeting the
2129  * query's requested output ordering.
2130  *
2131  * Because we the have the possibility of incremental sort, a prefix list of
2132  * keys is potentially useful for improving the performance of the requested
2133  * ordering. Thus we return 0, if no valuable keys are found, or the number
2134  * of leading keys shared by the list and the requested ordering..
2135  */
2136 static int
2138 {
2139  int n_common_pathkeys;
2140 
2141  (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
2142  &n_common_pathkeys);
2143 
2144  return n_common_pathkeys;
2145 }
2146 
2147 /*
2148  * pathkeys_useful_for_grouping
2149  * Count the number of pathkeys that are useful for grouping (instead of
2150  * explicit sort)
2151  *
2152  * Group pathkeys could be reordered to benefit from the ordering. The
2153  * ordering may not be "complete" and may require incremental sort, but that's
2154  * fine. So we simply count prefix pathkeys with a matching group key, and
2155  * stop once we find the first pathkey without a match.
2156  *
2157  * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2158  * pathkeys are useful for grouping, and we might do incremental sort to get
2159  * path ordered by (a,b,e).
2160  *
2161  * This logic is necessary to retain paths with ordering not matching grouping
2162  * keys directly, without the reordering.
2163  *
2164  * Returns the length of pathkey prefix with matching group keys.
2165  */
2166 static int
2168 {
2169  ListCell *key;
2170  int n = 0;
2171 
2172  /* no special ordering requested for grouping */
2173  if (root->group_pathkeys == NIL)
2174  return 0;
2175 
2176  /* walk the pathkeys and search for matching group key */
2177  foreach(key, pathkeys)
2178  {
2179  PathKey *pathkey = (PathKey *) lfirst(key);
2180 
2181  /* no matching group key, we're done */
2182  if (!list_member_ptr(root->group_pathkeys, pathkey))
2183  break;
2184 
2185  n++;
2186  }
2187 
2188  return n;
2189 }
2190 
2191 /*
2192  * pathkeys_useful_for_setop
2193  * Count the number of leading common pathkeys root's 'setop_pathkeys' in
2194  * 'pathkeys'.
2195  */
2196 static int
2198 {
2199  int n_common_pathkeys;
2200 
2201  (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
2202  &n_common_pathkeys);
2203 
2204  return n_common_pathkeys;
2205 }
2206 
2207 /*
2208  * truncate_useless_pathkeys
2209  * Shorten the given pathkey list to just the useful pathkeys.
2210  */
2211 List *
2213  RelOptInfo *rel,
2214  List *pathkeys)
2215 {
2216  int nuseful;
2217  int nuseful2;
2218 
2219  nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
2220  nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
2221  if (nuseful2 > nuseful)
2222  nuseful = nuseful2;
2223  nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
2224  if (nuseful2 > nuseful)
2225  nuseful = nuseful2;
2226  nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
2227  if (nuseful2 > nuseful)
2228  nuseful = nuseful2;
2229 
2230  /*
2231  * Note: not safe to modify input list destructively, but we can avoid
2232  * copying the list if we're not actually going to change it
2233  */
2234  if (nuseful == 0)
2235  return NIL;
2236  else if (nuseful == list_length(pathkeys))
2237  return pathkeys;
2238  else
2239  return list_copy_head(pathkeys, nuseful);
2240 }
2241 
2242 /*
2243  * has_useful_pathkeys
2244  * Detect whether the specified rel could have any pathkeys that are
2245  * useful according to truncate_useless_pathkeys().
2246  *
2247  * This is a cheap test that lets us skip building pathkeys at all in very
2248  * simple queries. It's OK to err in the direction of returning "true" when
2249  * there really aren't any usable pathkeys, but erring in the other direction
2250  * is bad --- so keep this in sync with the routines above!
2251  *
2252  * We could make the test more complex, for example checking to see if any of
2253  * the joinclauses are really mergejoinable, but that likely wouldn't win
2254  * often enough to repay the extra cycles. Queries with neither a join nor
2255  * a sort are reasonably common, though, so this much work seems worthwhile.
2256  */
2257 bool
2259 {
2260  if (rel->joininfo != NIL || rel->has_eclass_joins)
2261  return true; /* might be able to use pathkeys for merging */
2262  if (root->group_pathkeys != NIL)
2263  return true; /* might be able to use pathkeys for grouping */
2264  if (root->query_pathkeys != NIL)
2265  return true; /* might be able to use them for ordering */
2266  return false; /* definitely useless */
2267 }
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:412
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
#define bms_is_empty(a)
Definition: bitmapset.h:118
signed short int16
Definition: c.h:493
#define Assert(condition)
Definition: c.h:858
unsigned int Index
Definition: c.h:614
#define OidIsValid(objectId)
Definition: c.h:775
bool enable_incremental_sort
Definition: costsize.c:140
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Expr * canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
Definition: equivclass.c:472
EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, List *opfamilies, Oid opcintype, Oid collation, Index sortref, Relids rel, bool create_it)
Definition: equivclass.c:587
bool eclass_useful_for_merging(PlannerInfo *root, EquivalenceClass *eclass, RelOptInfo *rel)
Definition: equivclass.c:3212
bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol)
Definition: indxpath.c:3614
int j
Definition: isn.c:74
int i
Definition: isn.c:73
List * list_difference_ptr(const List *list1, const List *list2)
Definition: list.c:1263
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_copy_head(const List *oldlist, int len)
Definition: list.c:1593
List * list_copy(const List *oldlist)
Definition: list.c:1573
bool list_member_ptr(const List *list, const void *datum)
Definition: list.c:682
List * list_concat_unique_ptr(List *list1, const List *list2)
Definition: list.c:1427
void list_free(List *list)
Definition: list.c:1546
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
List * list_difference(const List *list1, const List *list2)
Definition: list.c:1237
List * get_mergejoin_opfamilies(Oid opno)
Definition: lsyscache.c:366
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:166
bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy)
Definition: lsyscache.c:207
void op_input_types(Oid opno, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:1358
void pfree(void *pointer)
Definition: mcxt.c:1520
void * palloc(Size size)
Definition: mcxt.c:1316
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:132
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:93
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:123
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:81
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:224
#define makeNode(_type_)
Definition: nodes.h:155
JoinType
Definition: nodes.h:288
@ JOIN_FULL
Definition: nodes.h:295
@ JOIN_RIGHT
Definition: nodes.h:296
@ JOIN_RIGHT_ANTI
Definition: nodes.h:309
bool partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
Definition: partbounds.c:2852
static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:882
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1292
static int pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2197
List * get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
Definition: pathkeys.c:465
List * build_expression_pathkey(PlannerInfo *root, Expr *expr, Oid opno, Relids rel, bool create_it)
Definition: pathkeys.c:998
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey)
Definition: pathkeys.c:2100
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1835
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:618
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:556
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1524
static int group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, List **group_clauses, int num_groupby_pathkeys)
Definition: pathkeys.c:368
static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, bool create_it)
Definition: pathkeys.c:255
bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
Definition: pathkeys.c:2258
List * append_pathkeys(List *target, List *source)
Definition: pathkeys.c:106
List * truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2212
static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2137
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1938
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1639
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1490
static Var * find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
Definition: pathkeys.c:1249
List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir)
Definition: pathkeys.c:738
static int pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2167
static bool partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:842
bool enable_group_by_reordering
Definition: pathkeys.c:31
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Definition: pathkeys.c:158
PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first)
Definition: pathkeys.c:55
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:697
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1330
static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2033
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1052
List * make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, bool *sortable, bool set_ec_sortref)
Definition: pathkeys.c:1371
static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid opfamily, Oid opcintype, Oid collation, bool reverse_sort, bool nulls_first, Index sortref, Relids rel, bool create_it)
Definition: pathkeys.c:197
void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1443
Path * get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction)
Definition: pathkeys.c:664
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:341
PathKeysComparison compare_pathkeys(List *keys1, List *keys2)
Definition: pathkeys.c:302
List * build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, ScanDirection scandir, bool *partialkeys)
Definition: pathkeys.c:917
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:115
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:69
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1403
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:833
CostSelector
Definition: pathnodes.h:37
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1669
PathKeysComparison
Definition: paths.h:201
@ PATHKEYS_BETTER2
Definition: paths.h:204
@ PATHKEYS_BETTER1
Definition: paths.h:203
@ PATHKEYS_DIFFERENT
Definition: paths.h:205
@ PATHKEYS_EQUAL
Definition: paths.h:202
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial_node(type, l)
Definition: pg_list.h:181
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define foreach_current_index(var_or_cell)
Definition: pg_list.h:403
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define list_make1(x1)
Definition: pg_list.h:212
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define for_each_from(cell, lst, N)
Definition: pg_list.h:414
#define linitial(l)
Definition: pg_list.h:178
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define linitial_oid(l)
Definition: pg_list.h:180
static rewind_source * source
Definition: pg_rewind.c:89
unsigned int Oid
Definition: postgres_ext.h:31
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1884
static struct cvec * eclass(struct vars *v, chr c, int cases)
Definition: regc_locale.c:500
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ScanDirection
Definition: sdir.h:25
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
List * ec_opfamilies
Definition: pathnodes.h:1383
Definition: pg_list.h:54
Definition: nodes.h:129
bool pk_nulls_first
Definition: pathnodes.h:1471
int pk_strategy
Definition: pathnodes.h:1470
Oid pk_opfamily
Definition: pathnodes.h:1469
List * exprs
Definition: pathnodes.h:1533
List * pathkeys
Definition: pathnodes.h:1665
bool parallel_safe
Definition: pathnodes.h:1655
List * baserestrictinfo
Definition: pathnodes.h:979
List * joininfo
Definition: pathnodes.h:985
Relids relids
Definition: pathnodes.h:865
struct PathTarget * reltarget
Definition: pathnodes.h:887
Index relid
Definition: pathnodes.h:912
bool has_eclass_joins
Definition: pathnodes.h:987
Bitmapset * live_parts
Definition: pathnodes.h:1033
Expr * clause
Definition: pathnodes.h:2564
Index tleSortGroupRef
Definition: parsenodes.h:1442
Expr * expr
Definition: primnodes.h:2192
AttrNumber resno
Definition: primnodes.h:2194
Definition: primnodes.h:248
AttrNumber varattno
Definition: primnodes.h:260
int varno
Definition: primnodes.h:255
Definition: type.h:95
TargetEntry * get_sortgroupref_tle(Index sortref, List *targetList)
Definition: tlist.c:345
SortGroupClause * get_sortgroupref_clause_noerr(Index sortref, List *clauses)
Definition: tlist.c:443
Node * get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:379