PostgreSQL Source Code  git master
pathkeys.c
Go to the documentation of this file.
1 /*-------------------------------------------------------------------------
2  *
3  * pathkeys.c
4  * Utilities for matching and building path keys
5  *
6  * See src/backend/optimizer/README for a great deal of information about
7  * the nature and use of path keys.
8  *
9  *
10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  * src/backend/optimizer/path/pathkeys.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include "access/stratnum.h"
21 #include "catalog/pg_opfamily.h"
22 #include "nodes/nodeFuncs.h"
23 #include "optimizer/cost.h"
24 #include "optimizer/optimizer.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
28 #include "rewrite/rewriteManip.h"
29 #include "utils/lsyscache.h"
30 
31 /* Consider reordering of GROUP BY keys? */
33 
34 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
36  RelOptInfo *partrel,
37  int partkeycol);
39 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
40 
41 
42 /****************************************************************************
43  * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
44  ****************************************************************************/
45 
46 /*
47  * make_canonical_pathkey
48  * Given the parameters for a PathKey, find any pre-existing matching
49  * pathkey in the query's list of "canonical" pathkeys. Make a new
50  * entry if there's not one already.
51  *
52  * Note that this function must not be used until after we have completed
53  * merging EquivalenceClasses.
54  */
55 PathKey *
57  EquivalenceClass *eclass, Oid opfamily,
58  int strategy, bool nulls_first)
59 {
60  PathKey *pk;
61  ListCell *lc;
62  MemoryContext oldcontext;
63 
64  /* Can't make canonical pathkeys if the set of ECs might still change */
65  if (!root->ec_merging_done)
66  elog(ERROR, "too soon to build canonical pathkeys");
67 
68  /* The passed eclass might be non-canonical, so chase up to the top */
69  while (eclass->ec_merged)
70  eclass = eclass->ec_merged;
71 
72  foreach(lc, root->canon_pathkeys)
73  {
74  pk = (PathKey *) lfirst(lc);
75  if (eclass == pk->pk_eclass &&
76  opfamily == pk->pk_opfamily &&
77  strategy == pk->pk_strategy &&
78  nulls_first == pk->pk_nulls_first)
79  return pk;
80  }
81 
82  /*
83  * Be sure canonical pathkeys are allocated in the main planning context.
84  * Not an issue in normal planning, but it is for GEQO.
85  */
86  oldcontext = MemoryContextSwitchTo(root->planner_cxt);
87 
88  pk = makeNode(PathKey);
89  pk->pk_eclass = eclass;
90  pk->pk_opfamily = opfamily;
91  pk->pk_strategy = strategy;
92  pk->pk_nulls_first = nulls_first;
93 
94  root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
95 
96  MemoryContextSwitchTo(oldcontext);
97 
98  return pk;
99 }
100 
101 /*
102  * append_pathkeys
103  * Append all non-redundant PathKeys in 'source' onto 'target' and
104  * returns the updated 'target' list.
105  */
106 List *
108 {
109  ListCell *lc;
110 
111  Assert(target != NIL);
112 
113  foreach(lc, source)
114  {
115  PathKey *pk = lfirst_node(PathKey, lc);
116 
117  if (!pathkey_is_redundant(pk, target))
118  target = lappend(target, pk);
119  }
120  return target;
121 }
122 
123 /*
124  * pathkey_is_redundant
125  * Is a pathkey redundant with one already in the given list?
126  *
127  * We detect two cases:
128  *
129  * 1. If the new pathkey's equivalence class contains a constant, and isn't
130  * below an outer join, then we can disregard it as a sort key. An example:
131  * SELECT ... WHERE x = 42 ORDER BY x, y;
132  * We may as well just sort by y. Note that because of opfamily matching,
133  * this is semantically correct: we know that the equality constraint is one
134  * that actually binds the variable to a single value in the terms of any
135  * ordering operator that might go with the eclass. This rule not only lets
136  * us simplify (or even skip) explicit sorts, but also allows matching index
137  * sort orders to a query when there are don't-care index columns.
138  *
139  * 2. If the new pathkey's equivalence class is the same as that of any
140  * existing member of the pathkey list, then it is redundant. Some examples:
141  * SELECT ... ORDER BY x, x;
142  * SELECT ... ORDER BY x, x DESC;
143  * SELECT ... WHERE x = y ORDER BY x, y;
144  * In all these cases the second sort key cannot distinguish values that are
145  * considered equal by the first, and so there's no point in using it.
146  * Note in particular that we need not compare opfamily (all the opfamilies
147  * of the EC have the same notion of equality) nor sort direction.
148  *
149  * Both the given pathkey and the list members must be canonical for this
150  * to work properly, but that's okay since we no longer ever construct any
151  * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
152  * canonical includes the additional requirement of no redundant entries,
153  * which is exactly what we are checking for here.)
154  *
155  * Because the equivclass.c machinery forms only one copy of any EC per query,
156  * pointer comparison is enough to decide whether canonical ECs are the same.
157  */
158 static bool
159 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
160 {
161  EquivalenceClass *new_ec = new_pathkey->pk_eclass;
162  ListCell *lc;
163 
164  /* Check for EC containing a constant --- unconditionally redundant */
165  if (EC_MUST_BE_REDUNDANT(new_ec))
166  return true;
167 
168  /* If same EC already used in list, then redundant */
169  foreach(lc, pathkeys)
170  {
171  PathKey *old_pathkey = (PathKey *) lfirst(lc);
172 
173  if (new_ec == old_pathkey->pk_eclass)
174  return true;
175  }
176 
177  return false;
178 }
179 
180 /*
181  * make_pathkey_from_sortinfo
182  * Given an expression and sort-order information, create a PathKey.
183  * The result is always a "canonical" PathKey, but it might be redundant.
184  *
185  * If the PathKey is being generated from a SortGroupClause, sortref should be
186  * the SortGroupClause's SortGroupRef; otherwise zero.
187  *
188  * If rel is not NULL, it identifies a specific relation we're considering
189  * a path for, and indicates that child EC members for that relation can be
190  * considered. Otherwise child members are ignored. (See the comments for
191  * get_eclass_for_sort_expr.)
192  *
193  * create_it is true if we should create any missing EquivalenceClass
194  * needed to represent the sort key. If it's false, we return NULL if the
195  * sort key isn't already present in any EquivalenceClass.
196  */
197 static PathKey *
199  Expr *expr,
200  Oid opfamily,
201  Oid opcintype,
202  Oid collation,
203  bool reverse_sort,
204  bool nulls_first,
205  Index sortref,
206  Relids rel,
207  bool create_it)
208 {
209  int16 strategy;
210  Oid equality_op;
211  List *opfamilies;
213 
214  strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
215 
216  /*
217  * EquivalenceClasses need to contain opfamily lists based on the family
218  * membership of mergejoinable equality operators, which could belong to
219  * more than one opfamily. So we have to look up the opfamily's equality
220  * operator and get its membership.
221  */
222  equality_op = get_opfamily_member(opfamily,
223  opcintype,
224  opcintype,
226  if (!OidIsValid(equality_op)) /* shouldn't happen */
227  elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
228  BTEqualStrategyNumber, opcintype, opcintype, opfamily);
229  opfamilies = get_mergejoin_opfamilies(equality_op);
230  if (!opfamilies) /* certainly should find some */
231  elog(ERROR, "could not find opfamilies for equality operator %u",
232  equality_op);
233 
234  /* Now find or (optionally) create a matching EquivalenceClass */
236  opfamilies, opcintype, collation,
237  sortref, rel, create_it);
238 
239  /* Fail if no EC and !create_it */
240  if (!eclass)
241  return NULL;
242 
243  /* And finally we can find or create a PathKey node */
244  return make_canonical_pathkey(root, eclass, opfamily,
245  strategy, nulls_first);
246 }
247 
248 /*
249  * make_pathkey_from_sortop
250  * Like make_pathkey_from_sortinfo, but work from a sort operator.
251  *
252  * This should eventually go away, but we need to restructure SortGroupClause
253  * first.
254  */
255 static PathKey *
257  Expr *expr,
258  Oid ordering_op,
259  bool nulls_first,
260  Index sortref,
261  bool create_it)
262 {
263  Oid opfamily,
264  opcintype,
265  collation;
266  int16 strategy;
267 
268  /* Find the operator in pg_amop --- failure shouldn't happen */
269  if (!get_ordering_op_properties(ordering_op,
270  &opfamily, &opcintype, &strategy))
271  elog(ERROR, "operator %u is not a valid ordering operator",
272  ordering_op);
273 
274  /* Because SortGroupClause doesn't carry collation, consult the expr */
275  collation = exprCollation((Node *) expr);
276 
278  expr,
279  opfamily,
280  opcintype,
281  collation,
282  (strategy == BTGreaterStrategyNumber),
283  nulls_first,
284  sortref,
285  NULL,
286  create_it);
287 }
288 
289 
290 /****************************************************************************
291  * PATHKEY COMPARISONS
292  ****************************************************************************/
293 
294 /*
295  * compare_pathkeys
296  * Compare two pathkeys to see if they are equivalent, and if not whether
297  * one is "better" than the other.
298  *
299  * We assume the pathkeys are canonical, and so they can be checked for
300  * equality by simple pointer comparison.
301  */
303 compare_pathkeys(List *keys1, List *keys2)
304 {
305  ListCell *key1,
306  *key2;
307 
308  /*
309  * Fall out quickly if we are passed two identical lists. This mostly
310  * catches the case where both are NIL, but that's common enough to
311  * warrant the test.
312  */
313  if (keys1 == keys2)
314  return PATHKEYS_EQUAL;
315 
316  forboth(key1, keys1, key2, keys2)
317  {
318  PathKey *pathkey1 = (PathKey *) lfirst(key1);
319  PathKey *pathkey2 = (PathKey *) lfirst(key2);
320 
321  if (pathkey1 != pathkey2)
322  return PATHKEYS_DIFFERENT; /* no need to keep looking */
323  }
324 
325  /*
326  * If we reached the end of only one list, the other is longer and
327  * therefore not a subset.
328  */
329  if (key1 != NULL)
330  return PATHKEYS_BETTER1; /* key1 is longer */
331  if (key2 != NULL)
332  return PATHKEYS_BETTER2; /* key2 is longer */
333  return PATHKEYS_EQUAL;
334 }
335 
336 /*
337  * pathkeys_contained_in
338  * Common special case of compare_pathkeys: we just want to know
339  * if keys2 are at least as well sorted as keys1.
340  */
341 bool
343 {
344  switch (compare_pathkeys(keys1, keys2))
345  {
346  case PATHKEYS_EQUAL:
347  case PATHKEYS_BETTER2:
348  return true;
349  default:
350  break;
351  }
352  return false;
353 }
354 
355 /*
356  * group_keys_reorder_by_pathkeys
357  * Reorder GROUP BY pathkeys and clauses to match the input pathkeys.
358  *
359  * 'pathkeys' is an input list of pathkeys
360  * '*group_pathkeys' and '*group_clauses' are pathkeys and clauses lists to
361  * reorder. The pointers are redirected to new lists, original lists
362  * stay untouched.
363  * 'num_groupby_pathkeys' is the number of first '*group_pathkeys' items to
364  * search matching pathkeys.
365  *
366  * Returns the number of GROUP BY keys with a matching pathkey.
367  */
368 static int
369 group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
370  List **group_clauses,
371  int num_groupby_pathkeys)
372 {
373  List *new_group_pathkeys = NIL,
374  *new_group_clauses = NIL;
375  List *grouping_pathkeys;
376  ListCell *lc;
377  int n;
378 
379  if (pathkeys == NIL || *group_pathkeys == NIL)
380  return 0;
381 
382  /*
383  * We're going to search within just the first num_groupby_pathkeys of
384  * *group_pathkeys. The thing is that root->group_pathkeys is passed as
385  * *group_pathkeys containing grouping pathkeys altogether with aggregate
386  * pathkeys. If we process aggregate pathkeys we could get an invalid
387  * result of get_sortgroupref_clause_noerr(), because their
388  * pathkey->pk_eclass->ec_sortref doesn't reference query targetlist. So,
389  * we allocate a separate list of pathkeys for lookups.
390  */
391  grouping_pathkeys = list_copy_head(*group_pathkeys, num_groupby_pathkeys);
392 
393  /*
394  * Walk the pathkeys (determining ordering of the input path) and see if
395  * there's a matching GROUP BY key. If we find one, we append it to the
396  * list, and do the same for the clauses.
397  *
398  * Once we find the first pathkey without a matching GROUP BY key, the
399  * rest of the pathkeys are useless and can't be used to evaluate the
400  * grouping, so we abort the loop and ignore the remaining pathkeys.
401  */
402  foreach(lc, pathkeys)
403  {
404  PathKey *pathkey = (PathKey *) lfirst(lc);
405  SortGroupClause *sgc;
406 
407  /*
408  * Pathkeys are built in a way that allows simply comparing pointers.
409  * Give up if we can't find the matching pointer. Also give up if
410  * there is no sortclause reference for some reason.
411  */
412  if (foreach_current_index(lc) >= num_groupby_pathkeys ||
413  !list_member_ptr(grouping_pathkeys, pathkey) ||
414  pathkey->pk_eclass->ec_sortref == 0)
415  break;
416 
417  /*
418  * Since 1349d27 pathkey coming from underlying node can be in the
419  * root->group_pathkeys but not in the processed_groupClause. So, we
420  * should be careful here.
421  */
422  sgc = get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
423  *group_clauses);
424  if (!sgc)
425  /* The grouping clause does not cover this pathkey */
426  break;
427 
428  /*
429  * Sort group clause should have an ordering operator as long as there
430  * is an associated pathkey.
431  */
432  Assert(OidIsValid(sgc->sortop));
433 
434  new_group_pathkeys = lappend(new_group_pathkeys, pathkey);
435  new_group_clauses = lappend(new_group_clauses, sgc);
436  }
437 
438  /* remember the number of pathkeys with a matching GROUP BY key */
439  n = list_length(new_group_pathkeys);
440 
441  /* append the remaining group pathkeys (will be treated as not sorted) */
442  *group_pathkeys = list_concat_unique_ptr(new_group_pathkeys,
443  *group_pathkeys);
444  *group_clauses = list_concat_unique_ptr(new_group_clauses,
445  *group_clauses);
446 
447  list_free(grouping_pathkeys);
448  return n;
449 }
450 
451 /*
452  * get_useful_group_keys_orderings
453  * Determine which orderings of GROUP BY keys are potentially interesting.
454  *
455  * Returns a list of GroupByOrdering items, each representing an interesting
456  * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
457  * matching order.
458  *
459  * The function considers (and keeps) following GROUP BY orderings:
460  *
461  * - GROUP BY keys as ordered by preprocess_groupclause() to match target
462  * ORDER BY clause (as much as possible),
463  * - GROUP BY keys reordered to match 'path' ordering (as much as possible).
464  */
465 List *
467 {
468  Query *parse = root->parse;
469  List *infos = NIL;
470  GroupByOrdering *info;
471 
472  List *pathkeys = root->group_pathkeys;
473  List *clauses = root->processed_groupClause;
474 
475  /* always return at least the original pathkeys/clauses */
476  info = makeNode(GroupByOrdering);
477  info->pathkeys = pathkeys;
478  info->clauses = clauses;
479  infos = lappend(infos, info);
480 
481  /*
482  * Should we try generating alternative orderings of the group keys? If
483  * not, we produce only the order specified in the query, i.e. the
484  * optimization is effectively disabled.
485  */
487  return infos;
488 
489  /*
490  * Grouping sets have own and more complex logic to decide the ordering.
491  */
492  if (parse->groupingSets)
493  return infos;
494 
495  /*
496  * If the path is sorted in some way, try reordering the group keys to
497  * match the path as much of the ordering as possible. Then thanks to
498  * incremental sort we would get this sort as cheap as possible.
499  */
500  if (path->pathkeys &&
501  !pathkeys_contained_in(path->pathkeys, root->group_pathkeys))
502  {
503  int n;
504 
505  n = group_keys_reorder_by_pathkeys(path->pathkeys, &pathkeys, &clauses,
506  root->num_groupby_pathkeys);
507 
508  if (n > 0 &&
509  (enable_incremental_sort || n == root->num_groupby_pathkeys) &&
510  compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
511  {
512  info = makeNode(GroupByOrdering);
513  info->pathkeys = pathkeys;
514  info->clauses = clauses;
515 
516  infos = lappend(infos, info);
517  }
518  }
519 
520 #ifdef USE_ASSERT_CHECKING
521  {
523  ListCell *lc;
524 
525  /* Test consistency of info structures */
526  for_each_from(lc, infos, 1)
527  {
528  ListCell *lc1,
529  *lc2;
530 
531  info = lfirst_node(GroupByOrdering, lc);
532 
533  Assert(list_length(info->clauses) == list_length(pinfo->clauses));
534  Assert(list_length(info->pathkeys) == list_length(pinfo->pathkeys));
535  Assert(list_difference(info->clauses, pinfo->clauses) == NIL);
536  Assert(list_difference_ptr(info->pathkeys, pinfo->pathkeys) == NIL);
537 
538  forboth(lc1, info->clauses, lc2, info->pathkeys)
539  {
541  PathKey *pk = lfirst_node(PathKey, lc2);
542 
543  Assert(pk->pk_eclass->ec_sortref == sgc->tleSortGroupRef);
544  }
545  }
546  }
547 #endif
548  return infos;
549 }
550 
551 /*
552  * pathkeys_count_contained_in
553  * Same as pathkeys_contained_in, but also sets length of longest
554  * common prefix of keys1 and keys2.
555  */
556 bool
557 pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
558 {
559  int n = 0;
560  ListCell *key1,
561  *key2;
562 
563  /*
564  * See if we can avoiding looping through both lists. This optimization
565  * gains us several percent in planning time in a worst-case test.
566  */
567  if (keys1 == keys2)
568  {
569  *n_common = list_length(keys1);
570  return true;
571  }
572  else if (keys1 == NIL)
573  {
574  *n_common = 0;
575  return true;
576  }
577  else if (keys2 == NIL)
578  {
579  *n_common = 0;
580  return false;
581  }
582 
583  /*
584  * If both lists are non-empty, iterate through both to find out how many
585  * items are shared.
586  */
587  forboth(key1, keys1, key2, keys2)
588  {
589  PathKey *pathkey1 = (PathKey *) lfirst(key1);
590  PathKey *pathkey2 = (PathKey *) lfirst(key2);
591 
592  if (pathkey1 != pathkey2)
593  {
594  *n_common = n;
595  return false;
596  }
597  n++;
598  }
599 
600  /* If we ended with a null value, then we've processed the whole list. */
601  *n_common = n;
602  return (key1 == NULL);
603 }
604 
605 /*
606  * get_cheapest_path_for_pathkeys
607  * Find the cheapest path (according to the specified criterion) that
608  * satisfies the given pathkeys and parameterization, and is parallel-safe
609  * if required.
610  * Return NULL if no such path.
611  *
612  * 'paths' is a list of possible paths that all generate the same relation
613  * 'pathkeys' represents a required ordering (in canonical form!)
614  * 'required_outer' denotes allowable outer relations for parameterized paths
615  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
616  * 'require_parallel_safe' causes us to consider only parallel-safe paths
617  */
618 Path *
620  Relids required_outer,
621  CostSelector cost_criterion,
622  bool require_parallel_safe)
623 {
624  Path *matched_path = NULL;
625  ListCell *l;
626 
627  foreach(l, paths)
628  {
629  Path *path = (Path *) lfirst(l);
630 
631  /* If required, reject paths that are not parallel-safe */
632  if (require_parallel_safe && !path->parallel_safe)
633  continue;
634 
635  /*
636  * Since cost comparison is a lot cheaper than pathkey comparison, do
637  * that first. (XXX is that still true?)
638  */
639  if (matched_path != NULL &&
640  compare_path_costs(matched_path, path, cost_criterion) <= 0)
641  continue;
642 
643  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
644  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
645  matched_path = path;
646  }
647  return matched_path;
648 }
649 
650 /*
651  * get_cheapest_fractional_path_for_pathkeys
652  * Find the cheapest path (for retrieving a specified fraction of all
653  * the tuples) that satisfies the given pathkeys and parameterization.
654  * Return NULL if no such path.
655  *
656  * See compare_fractional_path_costs() for the interpretation of the fraction
657  * parameter.
658  *
659  * 'paths' is a list of possible paths that all generate the same relation
660  * 'pathkeys' represents a required ordering (in canonical form!)
661  * 'required_outer' denotes allowable outer relations for parameterized paths
662  * 'fraction' is the fraction of the total tuples expected to be retrieved
663  */
664 Path *
666  List *pathkeys,
667  Relids required_outer,
668  double fraction)
669 {
670  Path *matched_path = NULL;
671  ListCell *l;
672 
673  foreach(l, paths)
674  {
675  Path *path = (Path *) lfirst(l);
676 
677  /*
678  * Since cost comparison is a lot cheaper than pathkey comparison, do
679  * that first. (XXX is that still true?)
680  */
681  if (matched_path != NULL &&
682  compare_fractional_path_costs(matched_path, path, fraction) <= 0)
683  continue;
684 
685  if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
686  bms_is_subset(PATH_REQ_OUTER(path), required_outer))
687  matched_path = path;
688  }
689  return matched_path;
690 }
691 
692 
693 /*
694  * get_cheapest_parallel_safe_total_inner
695  * Find the unparameterized parallel-safe path with the least total cost.
696  */
697 Path *
699 {
700  ListCell *l;
701 
702  foreach(l, paths)
703  {
704  Path *innerpath = (Path *) lfirst(l);
705 
706  if (innerpath->parallel_safe &&
707  bms_is_empty(PATH_REQ_OUTER(innerpath)))
708  return innerpath;
709  }
710 
711  return NULL;
712 }
713 
714 /****************************************************************************
715  * NEW PATHKEY FORMATION
716  ****************************************************************************/
717 
718 /*
719  * build_index_pathkeys
720  * Build a pathkeys list that describes the ordering induced by an index
721  * scan using the given index. (Note that an unordered index doesn't
722  * induce any ordering, so we return NIL.)
723  *
724  * If 'scandir' is BackwardScanDirection, build pathkeys representing a
725  * backwards scan of the index.
726  *
727  * We iterate only key columns of covering indexes, since non-key columns
728  * don't influence index ordering. The result is canonical, meaning that
729  * redundant pathkeys are removed; it may therefore have fewer entries than
730  * there are key columns in the index.
731  *
732  * Another reason for stopping early is that we may be able to tell that
733  * an index column's sort order is uninteresting for this query. However,
734  * that test is just based on the existence of an EquivalenceClass and not
735  * on position in pathkey lists, so it's not complete. Caller should call
736  * truncate_useless_pathkeys() to possibly remove more pathkeys.
737  */
738 List *
741  ScanDirection scandir)
742 {
743  List *retval = NIL;
744  ListCell *lc;
745  int i;
746 
747  if (index->sortopfamily == NULL)
748  return NIL; /* non-orderable index */
749 
750  i = 0;
751  foreach(lc, index->indextlist)
752  {
753  TargetEntry *indextle = (TargetEntry *) lfirst(lc);
754  Expr *indexkey;
755  bool reverse_sort;
756  bool nulls_first;
757  PathKey *cpathkey;
758 
759  /*
760  * INCLUDE columns are stored in index unordered, so they don't
761  * support ordered index scan.
762  */
763  if (i >= index->nkeycolumns)
764  break;
765 
766  /* We assume we don't need to make a copy of the tlist item */
767  indexkey = indextle->expr;
768 
769  if (ScanDirectionIsBackward(scandir))
770  {
771  reverse_sort = !index->reverse_sort[i];
772  nulls_first = !index->nulls_first[i];
773  }
774  else
775  {
776  reverse_sort = index->reverse_sort[i];
777  nulls_first = index->nulls_first[i];
778  }
779 
780  /*
781  * OK, try to make a canonical pathkey for this sort key.
782  */
783  cpathkey = make_pathkey_from_sortinfo(root,
784  indexkey,
785  index->sortopfamily[i],
786  index->opcintype[i],
787  index->indexcollations[i],
788  reverse_sort,
789  nulls_first,
790  0,
791  index->rel->relids,
792  false);
793 
794  if (cpathkey)
795  {
796  /*
797  * We found the sort key in an EquivalenceClass, so it's relevant
798  * for this query. Add it to list, unless it's redundant.
799  */
800  if (!pathkey_is_redundant(cpathkey, retval))
801  retval = lappend(retval, cpathkey);
802  }
803  else
804  {
805  /*
806  * Boolean index keys might be redundant even if they do not
807  * appear in an EquivalenceClass, because of our special treatment
808  * of boolean equality conditions --- see the comment for
809  * indexcol_is_bool_constant_for_query(). If that applies, we can
810  * continue to examine lower-order index columns. Otherwise, the
811  * sort key is not an interesting sort order for this query, so we
812  * should stop considering index columns; any lower-order sort
813  * keys won't be useful either.
814  */
816  break;
817  }
818 
819  i++;
820  }
821 
822  return retval;
823 }
824 
825 /*
826  * partkey_is_bool_constant_for_query
827  *
828  * If a partition key column is constrained to have a constant value by the
829  * query's WHERE conditions, then it's irrelevant for sort-order
830  * considerations. Usually that means we have a restriction clause
831  * WHERE partkeycol = constant, which gets turned into an EquivalenceClass
832  * containing a constant, which is recognized as redundant by
833  * build_partition_pathkeys(). But if the partition key column is a
834  * boolean variable (or expression), then we are not going to see such a
835  * WHERE clause, because expression preprocessing will have simplified it
836  * to "WHERE partkeycol" or "WHERE NOT partkeycol". So we are not going
837  * to have a matching EquivalenceClass (unless the query also contains
838  * "ORDER BY partkeycol"). To allow such cases to work the same as they would
839  * for non-boolean values, this function is provided to detect whether the
840  * specified partition key column matches a boolean restriction clause.
841  */
842 static bool
844 {
845  PartitionScheme partscheme = partrel->part_scheme;
846  ListCell *lc;
847 
848  /*
849  * If the partkey isn't boolean, we can't possibly get a match.
850  *
851  * Partitioning currently can only use built-in AMs, so checking for
852  * built-in boolean opfamilies is good enough.
853  */
854  if (!IsBuiltinBooleanOpfamily(partscheme->partopfamily[partkeycol]))
855  return false;
856 
857  /* Check each restriction clause for the partitioned rel */
858  foreach(lc, partrel->baserestrictinfo)
859  {
860  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
861 
862  /* Ignore pseudoconstant quals, they won't match */
863  if (rinfo->pseudoconstant)
864  continue;
865 
866  /* See if we can match the clause's expression to the partkey column */
867  if (matches_boolean_partition_clause(rinfo, partrel, partkeycol))
868  return true;
869  }
870 
871  return false;
872 }
873 
874 /*
875  * matches_boolean_partition_clause
876  * Determine if the boolean clause described by rinfo matches
877  * partrel's partkeycol-th partition key column.
878  *
879  * "Matches" can be either an exact match (equivalent to partkey = true),
880  * or a NOT above an exact match (equivalent to partkey = false).
881  */
882 static bool
884  RelOptInfo *partrel, int partkeycol)
885 {
886  Node *clause = (Node *) rinfo->clause;
887  Node *partexpr = (Node *) linitial(partrel->partexprs[partkeycol]);
888 
889  /* Direct match? */
890  if (equal(partexpr, clause))
891  return true;
892  /* NOT clause? */
893  else if (is_notclause(clause))
894  {
895  Node *arg = (Node *) get_notclausearg((Expr *) clause);
896 
897  if (equal(partexpr, arg))
898  return true;
899  }
900 
901  return false;
902 }
903 
904 /*
905  * build_partition_pathkeys
906  * Build a pathkeys list that describes the ordering induced by the
907  * partitions of partrel, under either forward or backward scan
908  * as per scandir.
909  *
910  * Caller must have checked that the partitions are properly ordered,
911  * as detected by partitions_are_ordered().
912  *
913  * Sets *partialkeys to true if pathkeys were only built for a prefix of the
914  * partition key, or false if the pathkeys include all columns of the
915  * partition key.
916  */
917 List *
919  ScanDirection scandir, bool *partialkeys)
920 {
921  List *retval = NIL;
922  PartitionScheme partscheme = partrel->part_scheme;
923  int i;
924 
925  Assert(partscheme != NULL);
926  Assert(partitions_are_ordered(partrel->boundinfo, partrel->live_parts));
927  /* For now, we can only cope with baserels */
928  Assert(IS_SIMPLE_REL(partrel));
929 
930  for (i = 0; i < partscheme->partnatts; i++)
931  {
932  PathKey *cpathkey;
933  Expr *keyCol = (Expr *) linitial(partrel->partexprs[i]);
934 
935  /*
936  * Try to make a canonical pathkey for this partkey.
937  *
938  * We assume the PartitionDesc lists any NULL partition last, so we
939  * treat the scan like a NULLS LAST index: we have nulls_first for
940  * backwards scan only.
941  */
942  cpathkey = make_pathkey_from_sortinfo(root,
943  keyCol,
944  partscheme->partopfamily[i],
945  partscheme->partopcintype[i],
946  partscheme->partcollation[i],
947  ScanDirectionIsBackward(scandir),
948  ScanDirectionIsBackward(scandir),
949  0,
950  partrel->relids,
951  false);
952 
953 
954  if (cpathkey)
955  {
956  /*
957  * We found the sort key in an EquivalenceClass, so it's relevant
958  * for this query. Add it to list, unless it's redundant.
959  */
960  if (!pathkey_is_redundant(cpathkey, retval))
961  retval = lappend(retval, cpathkey);
962  }
963  else
964  {
965  /*
966  * Boolean partition keys might be redundant even if they do not
967  * appear in an EquivalenceClass, because of our special treatment
968  * of boolean equality conditions --- see the comment for
969  * partkey_is_bool_constant_for_query(). If that applies, we can
970  * continue to examine lower-order partition keys. Otherwise, the
971  * sort key is not an interesting sort order for this query, so we
972  * should stop considering partition columns; any lower-order sort
973  * keys won't be useful either.
974  */
975  if (!partkey_is_bool_constant_for_query(partrel, i))
976  {
977  *partialkeys = true;
978  return retval;
979  }
980  }
981  }
982 
983  *partialkeys = false;
984  return retval;
985 }
986 
987 /*
988  * build_expression_pathkey
989  * Build a pathkeys list that describes an ordering by a single expression
990  * using the given sort operator.
991  *
992  * expr and rel are as for make_pathkey_from_sortinfo.
993  * We induce the other arguments assuming default sort order for the operator.
994  *
995  * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
996  * is false and the expression isn't already in some EquivalenceClass.
997  */
998 List *
1000  Expr *expr,
1001  Oid opno,
1002  Relids rel,
1003  bool create_it)
1004 {
1005  List *pathkeys;
1006  Oid opfamily,
1007  opcintype;
1008  int16 strategy;
1009  PathKey *cpathkey;
1010 
1011  /* Find the operator in pg_amop --- failure shouldn't happen */
1012  if (!get_ordering_op_properties(opno,
1013  &opfamily, &opcintype, &strategy))
1014  elog(ERROR, "operator %u is not a valid ordering operator",
1015  opno);
1016 
1017  cpathkey = make_pathkey_from_sortinfo(root,
1018  expr,
1019  opfamily,
1020  opcintype,
1021  exprCollation((Node *) expr),
1022  (strategy == BTGreaterStrategyNumber),
1023  (strategy == BTGreaterStrategyNumber),
1024  0,
1025  rel,
1026  create_it);
1027 
1028  if (cpathkey)
1029  pathkeys = list_make1(cpathkey);
1030  else
1031  pathkeys = NIL;
1032 
1033  return pathkeys;
1034 }
1035 
1036 /*
1037  * convert_subquery_pathkeys
1038  * Build a pathkeys list that describes the ordering of a subquery's
1039  * result, in the terms of the outer query. This is essentially a
1040  * task of conversion.
1041  *
1042  * 'rel': outer query's RelOptInfo for the subquery relation.
1043  * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
1044  * 'subquery_tlist': the subquery's output targetlist, in its terms.
1045  *
1046  * We intentionally don't do truncate_useless_pathkeys() here, because there
1047  * are situations where seeing the raw ordering of the subquery is helpful.
1048  * For example, if it returns ORDER BY x DESC, that may prompt us to
1049  * construct a mergejoin using DESC order rather than ASC order; but the
1050  * right_merge_direction heuristic would have us throw the knowledge away.
1051  */
1052 List *
1054  List *subquery_pathkeys,
1055  List *subquery_tlist)
1056 {
1057  List *retval = NIL;
1058  int retvallen = 0;
1059  int outer_query_keys = list_length(root->query_pathkeys);
1060  ListCell *i;
1061 
1062  foreach(i, subquery_pathkeys)
1063  {
1064  PathKey *sub_pathkey = (PathKey *) lfirst(i);
1065  EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
1066  PathKey *best_pathkey = NULL;
1067 
1068  if (sub_eclass->ec_has_volatile)
1069  {
1070  /*
1071  * If the sub_pathkey's EquivalenceClass is volatile, then it must
1072  * have come from an ORDER BY clause, and we have to match it to
1073  * that same targetlist entry.
1074  */
1075  TargetEntry *tle;
1076  Var *outer_var;
1077 
1078  if (sub_eclass->ec_sortref == 0) /* can't happen */
1079  elog(ERROR, "volatile EquivalenceClass has no sortref");
1080  tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
1081  Assert(tle);
1082  /* Is TLE actually available to the outer query? */
1083  outer_var = find_var_for_subquery_tle(rel, tle);
1084  if (outer_var)
1085  {
1086  /* We can represent this sub_pathkey */
1087  EquivalenceMember *sub_member;
1088  EquivalenceClass *outer_ec;
1089 
1090  Assert(list_length(sub_eclass->ec_members) == 1);
1091  sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
1092 
1093  /*
1094  * Note: it might look funny to be setting sortref = 0 for a
1095  * reference to a volatile sub_eclass. However, the
1096  * expression is *not* volatile in the outer query: it's just
1097  * a Var referencing whatever the subquery emitted. (IOW, the
1098  * outer query isn't going to re-execute the volatile
1099  * expression itself.) So this is okay.
1100  */
1101  outer_ec =
1103  (Expr *) outer_var,
1104  sub_eclass->ec_opfamilies,
1105  sub_member->em_datatype,
1106  sub_eclass->ec_collation,
1107  0,
1108  rel->relids,
1109  false);
1110 
1111  /*
1112  * If we don't find a matching EC, sub-pathkey isn't
1113  * interesting to the outer query
1114  */
1115  if (outer_ec)
1116  best_pathkey =
1118  outer_ec,
1119  sub_pathkey->pk_opfamily,
1120  sub_pathkey->pk_strategy,
1121  sub_pathkey->pk_nulls_first);
1122  }
1123  }
1124  else
1125  {
1126  /*
1127  * Otherwise, the sub_pathkey's EquivalenceClass could contain
1128  * multiple elements (representing knowledge that multiple items
1129  * are effectively equal). Each element might match none, one, or
1130  * more of the output columns that are visible to the outer query.
1131  * This means we may have multiple possible representations of the
1132  * sub_pathkey in the context of the outer query. Ideally we
1133  * would generate them all and put them all into an EC of the
1134  * outer query, thereby propagating equality knowledge up to the
1135  * outer query. Right now we cannot do so, because the outer
1136  * query's EquivalenceClasses are already frozen when this is
1137  * called. Instead we prefer the one that has the highest "score"
1138  * (number of EC peers, plus one if it matches the outer
1139  * query_pathkeys). This is the most likely to be useful in the
1140  * outer query.
1141  */
1142  int best_score = -1;
1143  ListCell *j;
1144 
1145  foreach(j, sub_eclass->ec_members)
1146  {
1147  EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
1148  Expr *sub_expr = sub_member->em_expr;
1149  Oid sub_expr_type = sub_member->em_datatype;
1150  Oid sub_expr_coll = sub_eclass->ec_collation;
1151  ListCell *k;
1152 
1153  if (sub_member->em_is_child)
1154  continue; /* ignore children here */
1155 
1156  foreach(k, subquery_tlist)
1157  {
1158  TargetEntry *tle = (TargetEntry *) lfirst(k);
1159  Var *outer_var;
1160  Expr *tle_expr;
1161  EquivalenceClass *outer_ec;
1162  PathKey *outer_pk;
1163  int score;
1164 
1165  /* Is TLE actually available to the outer query? */
1166  outer_var = find_var_for_subquery_tle(rel, tle);
1167  if (!outer_var)
1168  continue;
1169 
1170  /*
1171  * The targetlist entry is considered to match if it
1172  * matches after sort-key canonicalization. That is
1173  * needed since the sub_expr has been through the same
1174  * process.
1175  */
1176  tle_expr = canonicalize_ec_expression(tle->expr,
1177  sub_expr_type,
1178  sub_expr_coll);
1179  if (!equal(tle_expr, sub_expr))
1180  continue;
1181 
1182  /* See if we have a matching EC for the TLE */
1183  outer_ec = get_eclass_for_sort_expr(root,
1184  (Expr *) outer_var,
1185  sub_eclass->ec_opfamilies,
1186  sub_expr_type,
1187  sub_expr_coll,
1188  0,
1189  rel->relids,
1190  false);
1191 
1192  /*
1193  * If we don't find a matching EC, this sub-pathkey isn't
1194  * interesting to the outer query
1195  */
1196  if (!outer_ec)
1197  continue;
1198 
1199  outer_pk = make_canonical_pathkey(root,
1200  outer_ec,
1201  sub_pathkey->pk_opfamily,
1202  sub_pathkey->pk_strategy,
1203  sub_pathkey->pk_nulls_first);
1204  /* score = # of equivalence peers */
1205  score = list_length(outer_ec->ec_members) - 1;
1206  /* +1 if it matches the proper query_pathkeys item */
1207  if (retvallen < outer_query_keys &&
1208  list_nth(root->query_pathkeys, retvallen) == outer_pk)
1209  score++;
1210  if (score > best_score)
1211  {
1212  best_pathkey = outer_pk;
1213  best_score = score;
1214  }
1215  }
1216  }
1217  }
1218 
1219  /*
1220  * If we couldn't find a representation of this sub_pathkey, we're
1221  * done (we can't use the ones to its right, either).
1222  */
1223  if (!best_pathkey)
1224  break;
1225 
1226  /*
1227  * Eliminate redundant ordering info; could happen if outer query
1228  * equivalences subquery keys...
1229  */
1230  if (!pathkey_is_redundant(best_pathkey, retval))
1231  {
1232  retval = lappend(retval, best_pathkey);
1233  retvallen++;
1234  }
1235  }
1236 
1237  return retval;
1238 }
1239 
1240 /*
1241  * find_var_for_subquery_tle
1242  *
1243  * If the given subquery tlist entry is due to be emitted by the subquery's
1244  * scan node, return a Var for it, else return NULL.
1245  *
1246  * We need this to ensure that we don't return pathkeys describing values
1247  * that are unavailable above the level of the subquery scan.
1248  */
1249 static Var *
1251 {
1252  ListCell *lc;
1253 
1254  /* If the TLE is resjunk, it's certainly not visible to the outer query */
1255  if (tle->resjunk)
1256  return NULL;
1257 
1258  /* Search the rel's targetlist to see what it will return */
1259  foreach(lc, rel->reltarget->exprs)
1260  {
1261  Var *var = (Var *) lfirst(lc);
1262 
1263  /* Ignore placeholders */
1264  if (!IsA(var, Var))
1265  continue;
1266  Assert(var->varno == rel->relid);
1267 
1268  /* If we find a Var referencing this TLE, we're good */
1269  if (var->varattno == tle->resno)
1270  return copyObject(var); /* Make a copy for safety */
1271  }
1272  return NULL;
1273 }
1274 
1275 /*
1276  * build_join_pathkeys
1277  * Build the path keys for a join relation constructed by mergejoin or
1278  * nestloop join. This is normally the same as the outer path's keys.
1279  *
1280  * EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
1281  * result as having the outer path's path keys, because null lefthand rows
1282  * may be inserted at random points. It must be treated as unsorted.
1283  *
1284  * We truncate away any pathkeys that are uninteresting for higher joins.
1285  *
1286  * 'joinrel' is the join relation that paths are being formed for
1287  * 'jointype' is the join type (inner, left, full, etc)
1288  * 'outer_pathkeys' is the list of the current outer path's path keys
1289  *
1290  * Returns the list of new path keys.
1291  */
1292 List *
1294  RelOptInfo *joinrel,
1295  JoinType jointype,
1296  List *outer_pathkeys)
1297 {
1298  /* RIGHT_SEMI should not come here */
1299  Assert(jointype != JOIN_RIGHT_SEMI);
1300 
1301  if (jointype == JOIN_FULL ||
1302  jointype == JOIN_RIGHT ||
1303  jointype == JOIN_RIGHT_ANTI)
1304  return NIL;
1305 
1306  /*
1307  * This used to be quite a complex bit of code, but now that all pathkey
1308  * sublists start out life canonicalized, we don't have to do a darn thing
1309  * here!
1310  *
1311  * We do, however, need to truncate the pathkeys list, since it may
1312  * contain pathkeys that were useful for forming this joinrel but are
1313  * uninteresting to higher levels.
1314  */
1315  return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
1316 }
1317 
1318 /****************************************************************************
1319  * PATHKEYS AND SORT CLAUSES
1320  ****************************************************************************/
1321 
1322 /*
1323  * make_pathkeys_for_sortclauses
1324  * Generate a pathkeys list that represents the sort order specified
1325  * by a list of SortGroupClauses
1326  *
1327  * The resulting PathKeys are always in canonical form. (Actually, there
1328  * is no longer any code anywhere that creates non-canonical PathKeys.)
1329  *
1330  * 'sortclauses' is a list of SortGroupClause nodes
1331  * 'tlist' is the targetlist to find the referenced tlist entries in
1332  */
1333 List *
1335  List *sortclauses,
1336  List *tlist)
1337 {
1338  List *result;
1339  bool sortable;
1340 
1342  &sortclauses,
1343  tlist,
1344  false,
1345  false,
1346  &sortable,
1347  false);
1348  /* It's caller error if not all clauses were sortable */
1349  Assert(sortable);
1350  return result;
1351 }
1352 
1353 /*
1354  * make_pathkeys_for_sortclauses_extended
1355  * Generate a pathkeys list that represents the sort order specified
1356  * by a list of SortGroupClauses
1357  *
1358  * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
1359  *
1360  * If remove_redundant is true, then any sort clauses that are found to
1361  * give rise to redundant pathkeys are removed from the sortclauses list
1362  * (which therefore must be pass-by-reference in this version).
1363  *
1364  * If remove_group_rtindex is true, then we need to remove the RT index of the
1365  * grouping step from the sort expressions before we make PathKeys for them.
1366  *
1367  * *sortable is set to true if all the sort clauses are in fact sortable.
1368  * If any are not, they are ignored except for setting *sortable false.
1369  * (In that case, the output pathkey list isn't really useful. However,
1370  * we process the whole sortclauses list anyway, because it's still valid
1371  * to remove any clauses that can be proven redundant via the eclass logic.
1372  * Even though we'll have to hash in that case, we might as well not hash
1373  * redundant columns.)
1374  *
1375  * If set_ec_sortref is true then sets the value of the pathkey's
1376  * EquivalenceClass unless it's already initialized.
1377  */
1378 List *
1380  List **sortclauses,
1381  List *tlist,
1382  bool remove_redundant,
1383  bool remove_group_rtindex,
1384  bool *sortable,
1385  bool set_ec_sortref)
1386 {
1387  List *pathkeys = NIL;
1388  ListCell *l;
1389 
1390  *sortable = true;
1391  foreach(l, *sortclauses)
1392  {
1393  SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
1394  Expr *sortkey;
1395  PathKey *pathkey;
1396 
1397  sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
1398  if (!OidIsValid(sortcl->sortop))
1399  {
1400  *sortable = false;
1401  continue;
1402  }
1403  if (remove_group_rtindex)
1404  {
1405  Assert(root->group_rtindex > 0);
1406  sortkey = (Expr *)
1407  remove_nulling_relids((Node *) sortkey,
1408  bms_make_singleton(root->group_rtindex),
1409  NULL);
1410  }
1411  pathkey = make_pathkey_from_sortop(root,
1412  sortkey,
1413  sortcl->sortop,
1414  sortcl->nulls_first,
1415  sortcl->tleSortGroupRef,
1416  true);
1417  if (pathkey->pk_eclass->ec_sortref == 0 && set_ec_sortref)
1418  {
1419  /*
1420  * Copy the sortref if it hasn't been set yet. That may happen if
1421  * the EquivalenceClass was constructed from a WHERE clause, i.e.
1422  * it doesn't have a target reference at all.
1423  */
1424  pathkey->pk_eclass->ec_sortref = sortcl->tleSortGroupRef;
1425  }
1426 
1427  /* Canonical form eliminates redundant ordering keys */
1428  if (!pathkey_is_redundant(pathkey, pathkeys))
1429  pathkeys = lappend(pathkeys, pathkey);
1430  else if (remove_redundant)
1431  *sortclauses = foreach_delete_current(*sortclauses, l);
1432  }
1433  return pathkeys;
1434 }
1435 
1436 /****************************************************************************
1437  * PATHKEYS AND MERGECLAUSES
1438  ****************************************************************************/
1439 
1440 /*
1441  * initialize_mergeclause_eclasses
1442  * Set the EquivalenceClass links in a mergeclause restrictinfo.
1443  *
1444  * RestrictInfo contains fields in which we may cache pointers to
1445  * EquivalenceClasses for the left and right inputs of the mergeclause.
1446  * (If the mergeclause is a true equivalence clause these will be the
1447  * same EquivalenceClass, otherwise not.) If the mergeclause is either
1448  * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
1449  * then it's easy to set up the left_ec and right_ec members --- otherwise,
1450  * this function should be called to set them up. We will generate new
1451  * EquivalenceClauses if necessary to represent the mergeclause's left and
1452  * right sides.
1453  *
1454  * Note this is called before EC merging is complete, so the links won't
1455  * necessarily point to canonical ECs. Before they are actually used for
1456  * anything, update_mergeclause_eclasses must be called to ensure that
1457  * they've been updated to point to canonical ECs.
1458  */
1459 void
1461 {
1462  Expr *clause = restrictinfo->clause;
1463  Oid lefttype,
1464  righttype;
1465 
1466  /* Should be a mergeclause ... */
1467  Assert(restrictinfo->mergeopfamilies != NIL);
1468  /* ... with links not yet set */
1469  Assert(restrictinfo->left_ec == NULL);
1470  Assert(restrictinfo->right_ec == NULL);
1471 
1472  /* Need the declared input types of the operator */
1473  op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
1474 
1475  /* Find or create a matching EquivalenceClass for each side */
1476  restrictinfo->left_ec =
1478  (Expr *) get_leftop(clause),
1479  restrictinfo->mergeopfamilies,
1480  lefttype,
1481  ((OpExpr *) clause)->inputcollid,
1482  0,
1483  NULL,
1484  true);
1485  restrictinfo->right_ec =
1487  (Expr *) get_rightop(clause),
1488  restrictinfo->mergeopfamilies,
1489  righttype,
1490  ((OpExpr *) clause)->inputcollid,
1491  0,
1492  NULL,
1493  true);
1494 }
1495 
1496 /*
1497  * update_mergeclause_eclasses
1498  * Make the cached EquivalenceClass links valid in a mergeclause
1499  * restrictinfo.
1500  *
1501  * These pointers should have been set by process_equivalence or
1502  * initialize_mergeclause_eclasses, but they might have been set to
1503  * non-canonical ECs that got merged later. Chase up to the canonical
1504  * merged parent if so.
1505  */
1506 void
1508 {
1509  /* Should be a merge clause ... */
1510  Assert(restrictinfo->mergeopfamilies != NIL);
1511  /* ... with pointers already set */
1512  Assert(restrictinfo->left_ec != NULL);
1513  Assert(restrictinfo->right_ec != NULL);
1514 
1515  /* Chase up to the top as needed */
1516  while (restrictinfo->left_ec->ec_merged)
1517  restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
1518  while (restrictinfo->right_ec->ec_merged)
1519  restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
1520 }
1521 
1522 /*
1523  * find_mergeclauses_for_outer_pathkeys
1524  * This routine attempts to find a list of mergeclauses that can be
1525  * used with a specified ordering for the join's outer relation.
1526  * If successful, it returns a list of mergeclauses.
1527  *
1528  * 'pathkeys' is a pathkeys list showing the ordering of an outer-rel path.
1529  * 'restrictinfos' is a list of mergejoinable restriction clauses for the
1530  * join relation being formed, in no particular order.
1531  *
1532  * The restrictinfos must be marked (via outer_is_left) to show which side
1533  * of each clause is associated with the current outer path. (See
1534  * select_mergejoin_clauses())
1535  *
1536  * The result is NIL if no merge can be done, else a maximal list of
1537  * usable mergeclauses (represented as a list of their restrictinfo nodes).
1538  * The list is ordered to match the pathkeys, as required for execution.
1539  */
1540 List *
1542  List *pathkeys,
1543  List *restrictinfos)
1544 {
1545  List *mergeclauses = NIL;
1546  ListCell *i;
1547 
1548  /* make sure we have eclasses cached in the clauses */
1549  foreach(i, restrictinfos)
1550  {
1551  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1552 
1554  }
1555 
1556  foreach(i, pathkeys)
1557  {
1558  PathKey *pathkey = (PathKey *) lfirst(i);
1559  EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
1560  List *matched_restrictinfos = NIL;
1561  ListCell *j;
1562 
1563  /*----------
1564  * A mergejoin clause matches a pathkey if it has the same EC.
1565  * If there are multiple matching clauses, take them all. In plain
1566  * inner-join scenarios we expect only one match, because
1567  * equivalence-class processing will have removed any redundant
1568  * mergeclauses. However, in outer-join scenarios there might be
1569  * multiple matches. An example is
1570  *
1571  * select * from a full join b
1572  * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
1573  *
1574  * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1575  * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1576  * we *must* do so or we will be unable to form a valid plan.
1577  *
1578  * We expect that the given pathkeys list is canonical, which means
1579  * no two members have the same EC, so it's not possible for this
1580  * code to enter the same mergeclause into the result list twice.
1581  *
1582  * It's possible that multiple matching clauses might have different
1583  * ECs on the other side, in which case the order we put them into our
1584  * result makes a difference in the pathkeys required for the inner
1585  * input rel. However this routine hasn't got any info about which
1586  * order would be best, so we don't worry about that.
1587  *
1588  * It's also possible that the selected mergejoin clauses produce
1589  * a noncanonical ordering of pathkeys for the inner side, ie, we
1590  * might select clauses that reference b.v1, b.v2, b.v1 in that
1591  * order. This is not harmful in itself, though it suggests that
1592  * the clauses are partially redundant. Since the alternative is
1593  * to omit mergejoin clauses and thereby possibly fail to generate a
1594  * plan altogether, we live with it. make_inner_pathkeys_for_merge()
1595  * has to delete duplicates when it constructs the inner pathkeys
1596  * list, and we also have to deal with such cases specially in
1597  * create_mergejoin_plan().
1598  *----------
1599  */
1600  foreach(j, restrictinfos)
1601  {
1602  RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1603  EquivalenceClass *clause_ec;
1604 
1605  clause_ec = rinfo->outer_is_left ?
1606  rinfo->left_ec : rinfo->right_ec;
1607  if (clause_ec == pathkey_ec)
1608  matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1609  }
1610 
1611  /*
1612  * If we didn't find a mergeclause, we're done --- any additional
1613  * sort-key positions in the pathkeys are useless. (But we can still
1614  * mergejoin if we found at least one mergeclause.)
1615  */
1616  if (matched_restrictinfos == NIL)
1617  break;
1618 
1619  /*
1620  * If we did find usable mergeclause(s) for this sort-key position,
1621  * add them to result list.
1622  */
1623  mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1624  }
1625 
1626  return mergeclauses;
1627 }
1628 
1629 /*
1630  * select_outer_pathkeys_for_merge
1631  * Builds a pathkey list representing a possible sort ordering
1632  * that can be used with the given mergeclauses.
1633  *
1634  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1635  * that will be used in a merge join.
1636  * 'joinrel' is the join relation we are trying to construct.
1637  *
1638  * The restrictinfos must be marked (via outer_is_left) to show which side
1639  * of each clause is associated with the current outer path. (See
1640  * select_mergejoin_clauses())
1641  *
1642  * Returns a pathkeys list that can be applied to the outer relation.
1643  *
1644  * Since we assume here that a sort is required, there is no particular use
1645  * in matching any available ordering of the outerrel. (joinpath.c has an
1646  * entirely separate code path for considering sort-free mergejoins.) Rather,
1647  * it's interesting to try to match, or match a prefix of the requested
1648  * query_pathkeys so that a second output sort may be avoided or an
1649  * incremental sort may be done instead. We can get away with just a prefix
1650  * of the query_pathkeys when that prefix covers the entire join condition.
1651  * Failing that, we try to list "more popular" keys (those with the most
1652  * unmatched EquivalenceClass peers) earlier, in hopes of making the resulting
1653  * ordering useful for as many higher-level mergejoins as possible.
1654  */
1655 List *
1657  List *mergeclauses,
1658  RelOptInfo *joinrel)
1659 {
1660  List *pathkeys = NIL;
1661  int nClauses = list_length(mergeclauses);
1662  EquivalenceClass **ecs;
1663  int *scores;
1664  int necs;
1665  ListCell *lc;
1666  int j;
1667 
1668  /* Might have no mergeclauses */
1669  if (nClauses == 0)
1670  return NIL;
1671 
1672  /*
1673  * Make arrays of the ECs used by the mergeclauses (dropping any
1674  * duplicates) and their "popularity" scores.
1675  */
1676  ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1677  scores = (int *) palloc(nClauses * sizeof(int));
1678  necs = 0;
1679 
1680  foreach(lc, mergeclauses)
1681  {
1682  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1683  EquivalenceClass *oeclass;
1684  int score;
1685  ListCell *lc2;
1686 
1687  /* get the outer eclass */
1689 
1690  if (rinfo->outer_is_left)
1691  oeclass = rinfo->left_ec;
1692  else
1693  oeclass = rinfo->right_ec;
1694 
1695  /* reject duplicates */
1696  for (j = 0; j < necs; j++)
1697  {
1698  if (ecs[j] == oeclass)
1699  break;
1700  }
1701  if (j < necs)
1702  continue;
1703 
1704  /* compute score */
1705  score = 0;
1706  foreach(lc2, oeclass->ec_members)
1707  {
1709 
1710  /* Potential future join partner? */
1711  if (!em->em_is_const && !em->em_is_child &&
1712  !bms_overlap(em->em_relids, joinrel->relids))
1713  score++;
1714  }
1715 
1716  ecs[necs] = oeclass;
1717  scores[necs] = score;
1718  necs++;
1719  }
1720 
1721  /*
1722  * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1723  * can generate a sort order that's also useful for final output. If we
1724  * only have a prefix of the query_pathkeys, and that prefix is the entire
1725  * join condition, then it's useful to use the prefix as the pathkeys as
1726  * this increases the chances that an incremental sort will be able to be
1727  * used by the upper planner.
1728  */
1729  if (root->query_pathkeys)
1730  {
1731  int matches = 0;
1732 
1733  foreach(lc, root->query_pathkeys)
1734  {
1735  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1736  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1737 
1738  for (j = 0; j < necs; j++)
1739  {
1740  if (ecs[j] == query_ec)
1741  break; /* found match */
1742  }
1743  if (j >= necs)
1744  break; /* didn't find match */
1745 
1746  matches++;
1747  }
1748  /* if we got to the end of the list, we have them all */
1749  if (lc == NULL)
1750  {
1751  /* copy query_pathkeys as starting point for our output */
1752  pathkeys = list_copy(root->query_pathkeys);
1753  /* mark their ECs as already-emitted */
1754  foreach(lc, root->query_pathkeys)
1755  {
1756  PathKey *query_pathkey = (PathKey *) lfirst(lc);
1757  EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1758 
1759  for (j = 0; j < necs; j++)
1760  {
1761  if (ecs[j] == query_ec)
1762  {
1763  scores[j] = -1;
1764  break;
1765  }
1766  }
1767  }
1768  }
1769 
1770  /*
1771  * If we didn't match to all of the query_pathkeys, but did match to
1772  * all of the join clauses then we'll make use of these as partially
1773  * sorted input is better than nothing for the upper planner as it may
1774  * lead to incremental sorts instead of full sorts.
1775  */
1776  else if (matches == nClauses)
1777  {
1778  pathkeys = list_copy_head(root->query_pathkeys, matches);
1779 
1780  /* we have all of the join pathkeys, so nothing more to do */
1781  pfree(ecs);
1782  pfree(scores);
1783 
1784  return pathkeys;
1785  }
1786  }
1787 
1788  /*
1789  * Add remaining ECs to the list in popularity order, using a default sort
1790  * ordering. (We could use qsort() here, but the list length is usually
1791  * so small it's not worth it.)
1792  */
1793  for (;;)
1794  {
1795  int best_j;
1796  int best_score;
1797  EquivalenceClass *ec;
1798  PathKey *pathkey;
1799 
1800  best_j = 0;
1801  best_score = scores[0];
1802  for (j = 1; j < necs; j++)
1803  {
1804  if (scores[j] > best_score)
1805  {
1806  best_j = j;
1807  best_score = scores[j];
1808  }
1809  }
1810  if (best_score < 0)
1811  break; /* all done */
1812  ec = ecs[best_j];
1813  scores[best_j] = -1;
1814  pathkey = make_canonical_pathkey(root,
1815  ec,
1818  false);
1819  /* can't be redundant because no duplicate ECs */
1820  Assert(!pathkey_is_redundant(pathkey, pathkeys));
1821  pathkeys = lappend(pathkeys, pathkey);
1822  }
1823 
1824  pfree(ecs);
1825  pfree(scores);
1826 
1827  return pathkeys;
1828 }
1829 
1830 /*
1831  * make_inner_pathkeys_for_merge
1832  * Builds a pathkey list representing the explicit sort order that
1833  * must be applied to an inner path to make it usable with the
1834  * given mergeclauses.
1835  *
1836  * 'mergeclauses' is a list of RestrictInfos for the mergejoin clauses
1837  * that will be used in a merge join, in order.
1838  * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1839  * side of the join.
1840  *
1841  * The restrictinfos must be marked (via outer_is_left) to show which side
1842  * of each clause is associated with the current outer path. (See
1843  * select_mergejoin_clauses())
1844  *
1845  * Returns a pathkeys list that can be applied to the inner relation.
1846  *
1847  * Note that it is not this routine's job to decide whether sorting is
1848  * actually needed for a particular input path. Assume a sort is necessary;
1849  * just make the keys, eh?
1850  */
1851 List *
1853  List *mergeclauses,
1854  List *outer_pathkeys)
1855 {
1856  List *pathkeys = NIL;
1857  EquivalenceClass *lastoeclass;
1858  PathKey *opathkey;
1859  ListCell *lc;
1860  ListCell *lop;
1861 
1862  lastoeclass = NULL;
1863  opathkey = NULL;
1864  lop = list_head(outer_pathkeys);
1865 
1866  foreach(lc, mergeclauses)
1867  {
1868  RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1869  EquivalenceClass *oeclass;
1870  EquivalenceClass *ieclass;
1871  PathKey *pathkey;
1872 
1874 
1875  if (rinfo->outer_is_left)
1876  {
1877  oeclass = rinfo->left_ec;
1878  ieclass = rinfo->right_ec;
1879  }
1880  else
1881  {
1882  oeclass = rinfo->right_ec;
1883  ieclass = rinfo->left_ec;
1884  }
1885 
1886  /* outer eclass should match current or next pathkeys */
1887  /* we check this carefully for debugging reasons */
1888  if (oeclass != lastoeclass)
1889  {
1890  if (!lop)
1891  elog(ERROR, "too few pathkeys for mergeclauses");
1892  opathkey = (PathKey *) lfirst(lop);
1893  lop = lnext(outer_pathkeys, lop);
1894  lastoeclass = opathkey->pk_eclass;
1895  if (oeclass != lastoeclass)
1896  elog(ERROR, "outer pathkeys do not match mergeclause");
1897  }
1898 
1899  /*
1900  * Often, we'll have same EC on both sides, in which case the outer
1901  * pathkey is also canonical for the inner side, and we can skip a
1902  * useless search.
1903  */
1904  if (ieclass == oeclass)
1905  pathkey = opathkey;
1906  else
1907  pathkey = make_canonical_pathkey(root,
1908  ieclass,
1909  opathkey->pk_opfamily,
1910  opathkey->pk_strategy,
1911  opathkey->pk_nulls_first);
1912 
1913  /*
1914  * Don't generate redundant pathkeys (which can happen if multiple
1915  * mergeclauses refer to the same EC). Because we do this, the output
1916  * pathkey list isn't necessarily ordered like the mergeclauses, which
1917  * complicates life for create_mergejoin_plan(). But if we didn't,
1918  * we'd have a noncanonical sort key list, which would be bad; for one
1919  * reason, it certainly wouldn't match any available sort order for
1920  * the input relation.
1921  */
1922  if (!pathkey_is_redundant(pathkey, pathkeys))
1923  pathkeys = lappend(pathkeys, pathkey);
1924  }
1925 
1926  return pathkeys;
1927 }
1928 
1929 /*
1930  * trim_mergeclauses_for_inner_pathkeys
1931  * This routine trims a list of mergeclauses to include just those that
1932  * work with a specified ordering for the join's inner relation.
1933  *
1934  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses for the
1935  * join relation being formed, in an order known to work for the
1936  * currently-considered sort ordering of the join's outer rel.
1937  * 'pathkeys' is a pathkeys list showing the ordering of an inner-rel path;
1938  * it should be equal to, or a truncation of, the result of
1939  * make_inner_pathkeys_for_merge for these mergeclauses.
1940  *
1941  * What we return will be a prefix of the given mergeclauses list.
1942  *
1943  * We need this logic because make_inner_pathkeys_for_merge's result isn't
1944  * necessarily in the same order as the mergeclauses. That means that if we
1945  * consider an inner-rel pathkey list that is a truncation of that result,
1946  * we might need to drop mergeclauses even though they match a surviving inner
1947  * pathkey. This happens when they are to the right of a mergeclause that
1948  * matches a removed inner pathkey.
1949  *
1950  * The mergeclauses must be marked (via outer_is_left) to show which side
1951  * of each clause is associated with the current outer path. (See
1952  * select_mergejoin_clauses())
1953  */
1954 List *
1956  List *mergeclauses,
1957  List *pathkeys)
1958 {
1959  List *new_mergeclauses = NIL;
1960  PathKey *pathkey;
1961  EquivalenceClass *pathkey_ec;
1962  bool matched_pathkey;
1963  ListCell *lip;
1964  ListCell *i;
1965 
1966  /* No pathkeys => no mergeclauses (though we don't expect this case) */
1967  if (pathkeys == NIL)
1968  return NIL;
1969  /* Initialize to consider first pathkey */
1970  lip = list_head(pathkeys);
1971  pathkey = (PathKey *) lfirst(lip);
1972  pathkey_ec = pathkey->pk_eclass;
1973  lip = lnext(pathkeys, lip);
1974  matched_pathkey = false;
1975 
1976  /* Scan mergeclauses to see how many we can use */
1977  foreach(i, mergeclauses)
1978  {
1979  RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
1980  EquivalenceClass *clause_ec;
1981 
1982  /* Assume we needn't do update_mergeclause_eclasses again here */
1983 
1984  /* Check clause's inner-rel EC against current pathkey */
1985  clause_ec = rinfo->outer_is_left ?
1986  rinfo->right_ec : rinfo->left_ec;
1987 
1988  /* If we don't have a match, attempt to advance to next pathkey */
1989  if (clause_ec != pathkey_ec)
1990  {
1991  /* If we had no clauses matching this inner pathkey, must stop */
1992  if (!matched_pathkey)
1993  break;
1994 
1995  /* Advance to next inner pathkey, if any */
1996  if (lip == NULL)
1997  break;
1998  pathkey = (PathKey *) lfirst(lip);
1999  pathkey_ec = pathkey->pk_eclass;
2000  lip = lnext(pathkeys, lip);
2001  matched_pathkey = false;
2002  }
2003 
2004  /* If mergeclause matches current inner pathkey, we can use it */
2005  if (clause_ec == pathkey_ec)
2006  {
2007  new_mergeclauses = lappend(new_mergeclauses, rinfo);
2008  matched_pathkey = true;
2009  }
2010  else
2011  {
2012  /* Else, no hope of adding any more mergeclauses */
2013  break;
2014  }
2015  }
2016 
2017  return new_mergeclauses;
2018 }
2019 
2020 
2021 /****************************************************************************
2022  * PATHKEY USEFULNESS CHECKS
2023  *
2024  * We only want to remember as many of the pathkeys of a path as have some
2025  * potential use, either for subsequent mergejoins or for meeting the query's
2026  * requested output ordering. This ensures that add_path() won't consider
2027  * a path to have a usefully different ordering unless it really is useful.
2028  * These routines check for usefulness of given pathkeys.
2029  ****************************************************************************/
2030 
2031 /*
2032  * pathkeys_useful_for_merging
2033  * Count the number of pathkeys that may be useful for mergejoins
2034  * above the given relation.
2035  *
2036  * We consider a pathkey potentially useful if it corresponds to the merge
2037  * ordering of either side of any joinclause for the rel. This might be
2038  * overoptimistic, since joinclauses that require different other relations
2039  * might never be usable at the same time, but trying to be exact is likely
2040  * to be more trouble than it's worth.
2041  *
2042  * To avoid doubling the number of mergejoin paths considered, we would like
2043  * to consider only one of the two scan directions (ASC or DESC) as useful
2044  * for merging for any given target column. The choice is arbitrary unless
2045  * one of the directions happens to match an ORDER BY key, in which case
2046  * that direction should be preferred, in hopes of avoiding a final sort step.
2047  * right_merge_direction() implements this heuristic.
2048  */
2049 static int
2051 {
2052  int useful = 0;
2053  ListCell *i;
2054 
2055  foreach(i, pathkeys)
2056  {
2057  PathKey *pathkey = (PathKey *) lfirst(i);
2058  bool matched = false;
2059  ListCell *j;
2060 
2061  /* If "wrong" direction, not useful for merging */
2062  if (!right_merge_direction(root, pathkey))
2063  break;
2064 
2065  /*
2066  * First look into the EquivalenceClass of the pathkey, to see if
2067  * there are any members not yet joined to the rel. If so, it's
2068  * surely possible to generate a mergejoin clause using them.
2069  */
2070  if (rel->has_eclass_joins &&
2071  eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
2072  matched = true;
2073  else
2074  {
2075  /*
2076  * Otherwise search the rel's joininfo list, which contains
2077  * non-EquivalenceClass-derivable join clauses that might
2078  * nonetheless be mergejoinable.
2079  */
2080  foreach(j, rel->joininfo)
2081  {
2082  RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
2083 
2084  if (restrictinfo->mergeopfamilies == NIL)
2085  continue;
2086  update_mergeclause_eclasses(root, restrictinfo);
2087 
2088  if (pathkey->pk_eclass == restrictinfo->left_ec ||
2089  pathkey->pk_eclass == restrictinfo->right_ec)
2090  {
2091  matched = true;
2092  break;
2093  }
2094  }
2095  }
2096 
2097  /*
2098  * If we didn't find a mergeclause, we're done --- any additional
2099  * sort-key positions in the pathkeys are useless. (But we can still
2100  * mergejoin if we found at least one mergeclause.)
2101  */
2102  if (matched)
2103  useful++;
2104  else
2105  break;
2106  }
2107 
2108  return useful;
2109 }
2110 
2111 /*
2112  * right_merge_direction
2113  * Check whether the pathkey embodies the preferred sort direction
2114  * for merging its target column.
2115  */
2116 static bool
2118 {
2119  ListCell *l;
2120 
2121  foreach(l, root->query_pathkeys)
2122  {
2123  PathKey *query_pathkey = (PathKey *) lfirst(l);
2124 
2125  if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
2126  pathkey->pk_opfamily == query_pathkey->pk_opfamily)
2127  {
2128  /*
2129  * Found a matching query sort column. Prefer this pathkey's
2130  * direction iff it matches. Note that we ignore pk_nulls_first,
2131  * which means that a sort might be needed anyway ... but we still
2132  * want to prefer only one of the two possible directions, and we
2133  * might as well use this one.
2134  */
2135  return (pathkey->pk_strategy == query_pathkey->pk_strategy);
2136  }
2137  }
2138 
2139  /* If no matching ORDER BY request, prefer the ASC direction */
2140  return (pathkey->pk_strategy == BTLessStrategyNumber);
2141 }
2142 
2143 /*
2144  * pathkeys_useful_for_ordering
2145  * Count the number of pathkeys that are useful for meeting the
2146  * query's requested output ordering.
2147  *
2148  * Because we the have the possibility of incremental sort, a prefix list of
2149  * keys is potentially useful for improving the performance of the requested
2150  * ordering. Thus we return 0, if no valuable keys are found, or the number
2151  * of leading keys shared by the list and the requested ordering..
2152  */
2153 static int
2155 {
2156  int n_common_pathkeys;
2157 
2158  (void) pathkeys_count_contained_in(root->query_pathkeys, pathkeys,
2159  &n_common_pathkeys);
2160 
2161  return n_common_pathkeys;
2162 }
2163 
2164 /*
2165  * pathkeys_useful_for_grouping
2166  * Count the number of pathkeys that are useful for grouping (instead of
2167  * explicit sort)
2168  *
2169  * Group pathkeys could be reordered to benefit from the ordering. The
2170  * ordering may not be "complete" and may require incremental sort, but that's
2171  * fine. So we simply count prefix pathkeys with a matching group key, and
2172  * stop once we find the first pathkey without a match.
2173  *
2174  * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2175  * pathkeys are useful for grouping, and we might do incremental sort to get
2176  * path ordered by (a,b,e).
2177  *
2178  * This logic is necessary to retain paths with ordering not matching grouping
2179  * keys directly, without the reordering.
2180  *
2181  * Returns the length of pathkey prefix with matching group keys.
2182  */
2183 static int
2185 {
2186  ListCell *key;
2187  int n = 0;
2188 
2189  /* no special ordering requested for grouping */
2190  if (root->group_pathkeys == NIL)
2191  return 0;
2192 
2193  /* walk the pathkeys and search for matching group key */
2194  foreach(key, pathkeys)
2195  {
2196  PathKey *pathkey = (PathKey *) lfirst(key);
2197 
2198  /* no matching group key, we're done */
2199  if (!list_member_ptr(root->group_pathkeys, pathkey))
2200  break;
2201 
2202  n++;
2203  }
2204 
2205  return n;
2206 }
2207 
2208 /*
2209  * pathkeys_useful_for_setop
2210  * Count the number of leading common pathkeys root's 'setop_pathkeys' in
2211  * 'pathkeys'.
2212  */
2213 static int
2215 {
2216  int n_common_pathkeys;
2217 
2218  (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys,
2219  &n_common_pathkeys);
2220 
2221  return n_common_pathkeys;
2222 }
2223 
2224 /*
2225  * truncate_useless_pathkeys
2226  * Shorten the given pathkey list to just the useful pathkeys.
2227  */
2228 List *
2230  RelOptInfo *rel,
2231  List *pathkeys)
2232 {
2233  int nuseful;
2234  int nuseful2;
2235 
2236  nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
2237  nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
2238  if (nuseful2 > nuseful)
2239  nuseful = nuseful2;
2240  nuseful2 = pathkeys_useful_for_grouping(root, pathkeys);
2241  if (nuseful2 > nuseful)
2242  nuseful = nuseful2;
2243  nuseful2 = pathkeys_useful_for_setop(root, pathkeys);
2244  if (nuseful2 > nuseful)
2245  nuseful = nuseful2;
2246 
2247  /*
2248  * Note: not safe to modify input list destructively, but we can avoid
2249  * copying the list if we're not actually going to change it
2250  */
2251  if (nuseful == 0)
2252  return NIL;
2253  else if (nuseful == list_length(pathkeys))
2254  return pathkeys;
2255  else
2256  return list_copy_head(pathkeys, nuseful);
2257 }
2258 
2259 /*
2260  * has_useful_pathkeys
2261  * Detect whether the specified rel could have any pathkeys that are
2262  * useful according to truncate_useless_pathkeys().
2263  *
2264  * This is a cheap test that lets us skip building pathkeys at all in very
2265  * simple queries. It's OK to err in the direction of returning "true" when
2266  * there really aren't any usable pathkeys, but erring in the other direction
2267  * is bad --- so keep this in sync with the routines above!
2268  *
2269  * We could make the test more complex, for example checking to see if any of
2270  * the joinclauses are really mergejoinable, but that likely wouldn't win
2271  * often enough to repay the extra cycles. Queries with neither a join nor
2272  * a sort are reasonably common, though, so this much work seems worthwhile.
2273  */
2274 bool
2276 {
2277  if (rel->joininfo != NIL || rel->has_eclass_joins)
2278  return true; /* might be able to use pathkeys for merging */
2279  if (root->group_pathkeys != NIL)
2280  return true; /* might be able to use pathkeys for grouping */
2281  if (root->query_pathkeys != NIL)
2282  return true; /* might be able to use them for ordering */
2283  return false; /* definitely useless */
2284 }
bool bms_is_subset(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:412
Bitmapset * bms_make_singleton(int x)
Definition: bitmapset.c:216
bool bms_overlap(const Bitmapset *a, const Bitmapset *b)
Definition: bitmapset.c:582
#define bms_is_empty(a)
Definition: bitmapset.h:118
signed short int16
Definition: c.h:493
#define Assert(condition)
Definition: c.h:858
unsigned int Index
Definition: c.h:614
#define OidIsValid(objectId)
Definition: c.h:775
bool enable_incremental_sort
Definition: costsize.c:151
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:225
bool equal(const void *a, const void *b)
Definition: equalfuncs.c:223
Expr * canonicalize_ec_expression(Expr *expr, Oid req_type, Oid req_collation)
Definition: equivclass.c:472
EquivalenceClass * get_eclass_for_sort_expr(PlannerInfo *root, Expr *expr, List *opfamilies, Oid opcintype, Oid collation, Index sortref, Relids rel, bool create_it)
Definition: equivclass.c:587
bool eclass_useful_for_merging(PlannerInfo *root, EquivalenceClass *eclass, RelOptInfo *rel)
Definition: equivclass.c:3232
bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol)
Definition: indxpath.c:3614
int j
Definition: isn.c:74
int i
Definition: isn.c:73
List * list_difference_ptr(const List *list1, const List *list2)
Definition: list.c:1263
List * lappend(List *list, void *datum)
Definition: list.c:339
List * list_copy_head(const List *oldlist, int len)
Definition: list.c:1593
List * list_copy(const List *oldlist)
Definition: list.c:1573
bool list_member_ptr(const List *list, const void *datum)
Definition: list.c:682
List * list_concat_unique_ptr(List *list1, const List *list2)
Definition: list.c:1427
void list_free(List *list)
Definition: list.c:1546
List * list_concat(List *list1, const List *list2)
Definition: list.c:561
List * list_difference(const List *list1, const List *list2)
Definition: list.c:1237
List * get_mergejoin_opfamilies(Oid opno)
Definition: lsyscache.c:366
Oid get_opfamily_member(Oid opfamily, Oid lefttype, Oid righttype, int16 strategy)
Definition: lsyscache.c:166
bool get_ordering_op_properties(Oid opno, Oid *opfamily, Oid *opcintype, int16 *strategy)
Definition: lsyscache.c:207
void op_input_types(Oid opno, Oid *lefttype, Oid *righttype)
Definition: lsyscache.c:1358
void pfree(void *pointer)
Definition: mcxt.c:1521
void * palloc(Size size)
Definition: mcxt.c:1317
Oid exprCollation(const Node *expr)
Definition: nodeFuncs.c:816
static Expr * get_notclausearg(const void *notclause)
Definition: nodeFuncs.h:134
static Node * get_rightop(const void *clause)
Definition: nodeFuncs.h:95
static bool is_notclause(const void *clause)
Definition: nodeFuncs.h:125
static Node * get_leftop(const void *clause)
Definition: nodeFuncs.h:83
#define IsA(nodeptr, _type_)
Definition: nodes.h:158
#define copyObject(obj)
Definition: nodes.h:224
#define makeNode(_type_)
Definition: nodes.h:155
JoinType
Definition: nodes.h:288
@ JOIN_FULL
Definition: nodes.h:295
@ JOIN_RIGHT
Definition: nodes.h:296
@ JOIN_RIGHT_SEMI
Definition: nodes.h:309
@ JOIN_RIGHT_ANTI
Definition: nodes.h:310
bool partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts)
Definition: partbounds.c:2852
static bool matches_boolean_partition_clause(RestrictInfo *rinfo, RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:883
List * build_join_pathkeys(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, List *outer_pathkeys)
Definition: pathkeys.c:1293
static int pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2214
List * get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
Definition: pathkeys.c:466
List * build_expression_pathkey(PlannerInfo *root, Expr *expr, Oid opno, Relids rel, bool create_it)
Definition: pathkeys.c:999
static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey)
Definition: pathkeys.c:2117
List * make_inner_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, List *outer_pathkeys)
Definition: pathkeys.c:1852
Path * get_cheapest_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, CostSelector cost_criterion, bool require_parallel_safe)
Definition: pathkeys.c:619
bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common)
Definition: pathkeys.c:557
List * make_pathkeys_for_sortclauses_extended(PlannerInfo *root, List **sortclauses, List *tlist, bool remove_redundant, bool remove_group_rtindex, bool *sortable, bool set_ec_sortref)
Definition: pathkeys.c:1379
List * find_mergeclauses_for_outer_pathkeys(PlannerInfo *root, List *pathkeys, List *restrictinfos)
Definition: pathkeys.c:1541
static int group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys, List **group_clauses, int num_groupby_pathkeys)
Definition: pathkeys.c:369
static PathKey * make_pathkey_from_sortop(PlannerInfo *root, Expr *expr, Oid ordering_op, bool nulls_first, Index sortref, bool create_it)
Definition: pathkeys.c:256
bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
Definition: pathkeys.c:2275
List * append_pathkeys(List *target, List *source)
Definition: pathkeys.c:107
List * truncate_useless_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2229
static int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2154
List * trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root, List *mergeclauses, List *pathkeys)
Definition: pathkeys.c:1955
List * select_outer_pathkeys_for_merge(PlannerInfo *root, List *mergeclauses, RelOptInfo *joinrel)
Definition: pathkeys.c:1656
void update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1507
static Var * find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
Definition: pathkeys.c:1250
List * build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir)
Definition: pathkeys.c:739
static int pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys)
Definition: pathkeys.c:2184
static bool partkey_is_bool_constant_for_query(RelOptInfo *partrel, int partkeycol)
Definition: pathkeys.c:843
bool enable_group_by_reordering
Definition: pathkeys.c:32
static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
Definition: pathkeys.c:159
PathKey * make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first)
Definition: pathkeys.c:56
Path * get_cheapest_parallel_safe_total_inner(List *paths)
Definition: pathkeys.c:698
List * make_pathkeys_for_sortclauses(PlannerInfo *root, List *sortclauses, List *tlist)
Definition: pathkeys.c:1334
static int pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
Definition: pathkeys.c:2050
List * convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel, List *subquery_pathkeys, List *subquery_tlist)
Definition: pathkeys.c:1053
static PathKey * make_pathkey_from_sortinfo(PlannerInfo *root, Expr *expr, Oid opfamily, Oid opcintype, Oid collation, bool reverse_sort, bool nulls_first, Index sortref, Relids rel, bool create_it)
Definition: pathkeys.c:198
void initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
Definition: pathkeys.c:1460
Path * get_cheapest_fractional_path_for_pathkeys(List *paths, List *pathkeys, Relids required_outer, double fraction)
Definition: pathkeys.c:665
bool pathkeys_contained_in(List *keys1, List *keys2)
Definition: pathkeys.c:342
PathKeysComparison compare_pathkeys(List *keys1, List *keys2)
Definition: pathkeys.c:303
List * build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, ScanDirection scandir, bool *partialkeys)
Definition: pathkeys.c:918
int compare_fractional_path_costs(Path *path1, Path *path2, double fraction)
Definition: pathnode.c:124
int compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
Definition: pathnode.c:69
#define EC_MUST_BE_REDUNDANT(eclass)
Definition: pathnodes.h:1409
#define IS_SIMPLE_REL(rel)
Definition: pathnodes.h:839
CostSelector
Definition: pathnodes.h:37
#define PATH_REQ_OUTER(path)
Definition: pathnodes.h:1676
PathKeysComparison
Definition: paths.h:202
@ PATHKEYS_BETTER2
Definition: paths.h:205
@ PATHKEYS_BETTER1
Definition: paths.h:204
@ PATHKEYS_DIFFERENT
Definition: paths.h:206
@ PATHKEYS_EQUAL
Definition: paths.h:203
void * arg
#define lfirst(lc)
Definition: pg_list.h:172
#define lfirst_node(type, lc)
Definition: pg_list.h:176
static int list_length(const List *l)
Definition: pg_list.h:152
#define linitial_node(type, l)
Definition: pg_list.h:181
#define NIL
Definition: pg_list.h:68
#define forboth(cell1, list1, cell2, list2)
Definition: pg_list.h:518
#define foreach_current_index(var_or_cell)
Definition: pg_list.h:403
#define foreach_delete_current(lst, var_or_cell)
Definition: pg_list.h:391
#define list_make1(x1)
Definition: pg_list.h:212
static ListCell * list_head(const List *l)
Definition: pg_list.h:128
#define for_each_from(cell, lst, N)
Definition: pg_list.h:414
#define linitial(l)
Definition: pg_list.h:178
static void * list_nth(const List *list, int n)
Definition: pg_list.h:299
static ListCell * lnext(const List *l, const ListCell *c)
Definition: pg_list.h:343
#define linitial_oid(l)
Definition: pg_list.h:180
static rewind_source * source
Definition: pg_rewind.c:89
unsigned int Oid
Definition: postgres_ext.h:31
MemoryContextSwitchTo(old_ctx)
tree ctl root
Definition: radixtree.h:1886
static struct cvec * eclass(struct vars *v, chr c, int cases)
Definition: regc_locale.c:500
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:715
Node * remove_nulling_relids(Node *node, const Bitmapset *removable_relids, const Bitmapset *except_relids)
#define ScanDirectionIsBackward(direction)
Definition: sdir.h:50
ScanDirection
Definition: sdir.h:25
#define BTGreaterStrategyNumber
Definition: stratnum.h:33
#define BTLessStrategyNumber
Definition: stratnum.h:29
#define BTEqualStrategyNumber
Definition: stratnum.h:31
List * ec_opfamilies
Definition: pathnodes.h:1389
Definition: pg_list.h:54
Definition: nodes.h:129
bool pk_nulls_first
Definition: pathnodes.h:1477
int pk_strategy
Definition: pathnodes.h:1476
Oid pk_opfamily
Definition: pathnodes.h:1475
List * exprs
Definition: pathnodes.h:1539
List * pathkeys
Definition: pathnodes.h:1672
bool parallel_safe
Definition: pathnodes.h:1661
List * baserestrictinfo
Definition: pathnodes.h:985
List * joininfo
Definition: pathnodes.h:991
Relids relids
Definition: pathnodes.h:871
struct PathTarget * reltarget
Definition: pathnodes.h:893
Index relid
Definition: pathnodes.h:918
bool has_eclass_joins
Definition: pathnodes.h:993
Bitmapset * live_parts
Definition: pathnodes.h:1039
Expr * clause
Definition: pathnodes.h:2571
Index tleSortGroupRef
Definition: parsenodes.h:1438
Expr * expr
Definition: primnodes.h:2186
AttrNumber resno
Definition: primnodes.h:2188
Definition: primnodes.h:248
AttrNumber varattno
Definition: primnodes.h:260
int varno
Definition: primnodes.h:255
Definition: type.h:95
TargetEntry * get_sortgroupref_tle(Index sortref, List *targetList)
Definition: tlist.c:345
SortGroupClause * get_sortgroupref_clause_noerr(Index sortref, List *clauses)
Definition: tlist.c:443
Node * get_sortgroupclause_expr(SortGroupClause *sgClause, List *targetList)
Definition: tlist.c:379