PostgreSQL Source Code  git master
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros
geqo_eval.c
Go to the documentation of this file.
1 /*------------------------------------------------------------------------
2  *
3  * geqo_eval.c
4  * Routines to evaluate query trees
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * src/backend/optimizer/geqo/geqo_eval.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 /* contributed by:
15  =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
16  * Martin Utesch * Institute of Automatic Control *
17  = = University of Mining and Technology =
18  * utesch@aut.tu-freiberg.de * Freiberg, Germany *
19  =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
20  */
21 
22 #include "postgres.h"
23 
24 #include <float.h>
25 #include <limits.h>
26 #include <math.h>
27 
28 #include "optimizer/geqo.h"
29 #include "optimizer/joininfo.h"
30 #include "optimizer/pathnode.h"
31 #include "optimizer/paths.h"
32 #include "utils/memutils.h"
33 
34 
35 /* A "clump" of already-joined relations within gimme_tree */
36 typedef struct
37 {
38  RelOptInfo *joinrel; /* joinrel for the set of relations */
39  int size; /* number of input relations in clump */
40 } Clump;
41 
42 static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump,
43  bool force);
44 static bool desirable_join(PlannerInfo *root,
45  RelOptInfo *outer_rel, RelOptInfo *inner_rel);
46 
47 
48 /*
49  * geqo_eval
50  *
51  * Returns cost of a query tree as an individual of the population.
52  *
53  * If no legal join order can be extracted from the proposed tour,
54  * returns DBL_MAX.
55  */
56 Cost
57 geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
58 {
59  MemoryContext mycontext;
60  MemoryContext oldcxt;
61  RelOptInfo *joinrel;
62  Cost fitness;
63  int savelength;
64  struct HTAB *savehash;
65 
66  /*
67  * Create a private memory context that will hold all temp storage
68  * allocated inside gimme_tree().
69  *
70  * Since geqo_eval() will be called many times, we can't afford to let all
71  * that memory go unreclaimed until end of statement. Note we make the
72  * temp context a child of the planner's normal context, so that it will
73  * be freed even if we abort via ereport(ERROR).
74  */
76  "GEQO",
78  oldcxt = MemoryContextSwitchTo(mycontext);
79 
80  /*
81  * gimme_tree will add entries to root->join_rel_list, which may or may
82  * not already contain some entries. The newly added entries will be
83  * recycled by the MemoryContextDelete below, so we must ensure that the
84  * list is restored to its former state before exiting. We can do this by
85  * truncating the list to its original length. NOTE this assumes that any
86  * added entries are appended at the end!
87  *
88  * We also must take care not to mess up the outer join_rel_hash, if there
89  * is one. We can do this by just temporarily setting the link to NULL.
90  * (If we are dealing with enough join rels, which we very likely are, a
91  * new hash table will get built and used locally.)
92  *
93  * join_rel_level[] shouldn't be in use, so just Assert it isn't.
94  */
95  savelength = list_length(root->join_rel_list);
96  savehash = root->join_rel_hash;
97  Assert(root->join_rel_level == NULL);
98 
99  root->join_rel_hash = NULL;
100 
101  /* construct the best path for the given combination of relations */
102  joinrel = gimme_tree(root, tour, num_gene);
103 
104  /*
105  * compute fitness, if we found a valid join
106  *
107  * XXX geqo does not currently support optimization for partial result
108  * retrieval, nor do we take any cognizance of possible use of
109  * parameterized paths --- how to fix?
110  */
111  if (joinrel)
112  {
113  Path *best_path = joinrel->cheapest_total_path;
114 
115  fitness = best_path->total_cost;
116  }
117  else
118  fitness = DBL_MAX;
119 
120  /*
121  * Restore join_rel_list to its former state, and put back original
122  * hashtable if any.
123  */
125  savelength);
126  root->join_rel_hash = savehash;
127 
128  /* release all the memory acquired within gimme_tree */
129  MemoryContextSwitchTo(oldcxt);
130  MemoryContextDelete(mycontext);
131 
132  return fitness;
133 }
134 
135 /*
136  * gimme_tree
137  * Form planner estimates for a join tree constructed in the specified
138  * order.
139  *
140  * 'tour' is the proposed join order, of length 'num_gene'
141  *
142  * Returns a new join relation whose cheapest path is the best plan for
143  * this join order. NB: will return NULL if join order is invalid and
144  * we can't modify it into a valid order.
145  *
146  * The original implementation of this routine always joined in the specified
147  * order, and so could only build left-sided plans (and right-sided and
148  * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
149  * It could never produce a "bushy" plan. This had a couple of big problems,
150  * of which the worst was that there are situations involving join order
151  * restrictions where the only valid plans are bushy.
152  *
153  * The present implementation takes the given tour as a guideline, but
154  * postpones joins that are illegal or seem unsuitable according to some
155  * heuristic rules. This allows correct bushy plans to be generated at need,
156  * and as a nice side-effect it seems to materially improve the quality of the
157  * generated plans. Note however that since it's just a heuristic, it can
158  * still fail in some cases. (In particular, we might clump together
159  * relations that actually mustn't be joined yet due to LATERAL restrictions;
160  * since there's no provision for un-clumping, this must lead to failure.)
161  */
162 RelOptInfo *
163 gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
164 {
166  List *clumps;
167  int rel_count;
168 
169  /*
170  * Sometimes, a relation can't yet be joined to others due to heuristics
171  * or actual semantic restrictions. We maintain a list of "clumps" of
172  * successfully joined relations, with larger clumps at the front. Each
173  * new relation from the tour is added to the first clump it can be joined
174  * to; if there is none then it becomes a new clump of its own. When we
175  * enlarge an existing clump we check to see if it can now be merged with
176  * any other clumps. After the tour is all scanned, we forget about the
177  * heuristics and try to forcibly join any remaining clumps. If we are
178  * unable to merge all the clumps into one, fail.
179  */
180  clumps = NIL;
181 
182  for (rel_count = 0; rel_count < num_gene; rel_count++)
183  {
184  int cur_rel_index;
185  RelOptInfo *cur_rel;
186  Clump *cur_clump;
187 
188  /* Get the next input relation */
189  cur_rel_index = (int) tour[rel_count];
190  cur_rel = (RelOptInfo *) list_nth(private->initial_rels,
191  cur_rel_index - 1);
192 
193  /* Make it into a single-rel clump */
194  cur_clump = (Clump *) palloc(sizeof(Clump));
195  cur_clump->joinrel = cur_rel;
196  cur_clump->size = 1;
197 
198  /* Merge it into the clumps list, using only desirable joins */
199  clumps = merge_clump(root, clumps, cur_clump, false);
200  }
201 
202  if (list_length(clumps) > 1)
203  {
204  /* Force-join the remaining clumps in some legal order */
205  List *fclumps;
206  ListCell *lc;
207 
208  fclumps = NIL;
209  foreach(lc, clumps)
210  {
211  Clump *clump = (Clump *) lfirst(lc);
212 
213  fclumps = merge_clump(root, fclumps, clump, true);
214  }
215  clumps = fclumps;
216  }
217 
218  /* Did we succeed in forming a single join relation? */
219  if (list_length(clumps) != 1)
220  return NULL;
221 
222  return ((Clump *) linitial(clumps))->joinrel;
223 }
224 
225 /*
226  * Merge a "clump" into the list of existing clumps for gimme_tree.
227  *
228  * We try to merge the clump into some existing clump, and repeat if
229  * successful. When no more merging is possible, insert the clump
230  * into the list, preserving the list ordering rule (namely, that
231  * clumps of larger size appear earlier).
232  *
233  * If force is true, merge anywhere a join is legal, even if it causes
234  * a cartesian join to be performed. When force is false, do only
235  * "desirable" joins.
236  */
237 static List *
238 merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
239 {
240  ListCell *prev;
241  ListCell *lc;
242 
243  /* Look for a clump that new_clump can join to */
244  prev = NULL;
245  foreach(lc, clumps)
246  {
247  Clump *old_clump = (Clump *) lfirst(lc);
248 
249  if (force ||
250  desirable_join(root, old_clump->joinrel, new_clump->joinrel))
251  {
252  RelOptInfo *joinrel;
253 
254  /*
255  * Construct a RelOptInfo representing the join of these two input
256  * relations. Note that we expect the joinrel not to exist in
257  * root->join_rel_list yet, and so the paths constructed for it
258  * will only include the ones we want.
259  */
260  joinrel = make_join_rel(root,
261  old_clump->joinrel,
262  new_clump->joinrel);
263 
264  /* Keep searching if join order is not valid */
265  if (joinrel)
266  {
267  /* Create GatherPaths for any useful partial paths for rel */
268  generate_gather_paths(root, joinrel);
269 
270  /* Find and save the cheapest paths for this joinrel */
271  set_cheapest(joinrel);
272 
273  /* Absorb new clump into old */
274  old_clump->joinrel = joinrel;
275  old_clump->size += new_clump->size;
276  pfree(new_clump);
277 
278  /* Remove old_clump from list */
279  clumps = list_delete_cell(clumps, lc, prev);
280 
281  /*
282  * Recursively try to merge the enlarged old_clump with
283  * others. When no further merge is possible, we'll reinsert
284  * it into the list.
285  */
286  return merge_clump(root, clumps, old_clump, force);
287  }
288  }
289  prev = lc;
290  }
291 
292  /*
293  * No merging is possible, so add new_clump as an independent clump, in
294  * proper order according to size. We can be fast for the common case
295  * where it has size 1 --- it should always go at the end.
296  */
297  if (clumps == NIL || new_clump->size == 1)
298  return lappend(clumps, new_clump);
299 
300  /* Check if it belongs at the front */
301  lc = list_head(clumps);
302  if (new_clump->size > ((Clump *) lfirst(lc))->size)
303  return lcons(new_clump, clumps);
304 
305  /* Else search for the place to insert it */
306  for (;;)
307  {
308  ListCell *nxt = lnext(lc);
309 
310  if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size)
311  break; /* it belongs after 'lc', before 'nxt' */
312  lc = nxt;
313  }
314  lappend_cell(clumps, lc, new_clump);
315 
316  return clumps;
317 }
318 
319 /*
320  * Heuristics for gimme_tree: do we want to join these two relations?
321  */
322 static bool
324  RelOptInfo *outer_rel, RelOptInfo *inner_rel)
325 {
326  /*
327  * Join if there is an applicable join clause, or if there is a join order
328  * restriction forcing these rels to be joined.
329  */
330  if (have_relevant_joinclause(root, outer_rel, inner_rel) ||
331  have_join_order_restriction(root, outer_rel, inner_rel))
332  return true;
333 
334  /* Otherwise postpone the join till later. */
335  return false;
336 }
#define NIL
Definition: pg_list.h:69
int size
Definition: geqo_eval.c:39
void MemoryContextDelete(MemoryContext context)
Definition: mcxt.c:200
Cost geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
Definition: geqo_eval.c:57
static List * merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, bool force)
Definition: geqo_eval.c:238
void * join_search_private
Definition: relation.h:316
RelOptInfo * gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
Definition: geqo_eval.c:163
int Gene
Definition: geqo_gene.h:30
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:109
List * list_truncate(List *list, int new_size)
Definition: list.c:350
bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
Definition: joinrels.c:919
RelOptInfo * joinrel
Definition: geqo_eval.c:38
List * join_rel_list
Definition: relation.h:215
ListCell * lappend_cell(List *list, ListCell *prev, void *datum)
Definition: list.c:209
Definition: dynahash.c:208
void pfree(void *pointer)
Definition: mcxt.c:950
#define linitial(l)
Definition: pg_list.h:111
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:165
struct Path * cheapest_total_path
Definition: relation.h:543
void * list_nth(const List *list, int n)
Definition: list.c:410
static ListCell * list_head(const List *l)
Definition: pg_list.h:77
MemoryContext CurrentMemoryContext
Definition: mcxt.c:37
void generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
Definition: allpaths.c:2199
#define lnext(lc)
Definition: pg_list.h:105
List * lappend(List *list, void *datum)
Definition: list.c:128
void set_cheapest(RelOptInfo *parent_rel)
Definition: pathnode.c:234
RelOptInfo * make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
Definition: joinrels.c:657
List * list_delete_cell(List *list, ListCell *cell, ListCell *prev)
Definition: list.c:528
MemoryContext AllocSetContextCreate(MemoryContext parent, const char *name, Size minContextSize, Size initBlockSize, Size maxBlockSize)
Definition: aset.c:322
Cost total_cost
Definition: relation.h:966
List * lcons(void *datum, List *list)
Definition: list.c:259
#define NULL
Definition: c.h:229
#define Assert(condition)
Definition: c.h:676
#define lfirst(lc)
Definition: pg_list.h:106
List ** join_rel_level
Definition: relation.h:225
static int list_length(const List *l)
Definition: pg_list.h:89
struct HTAB * join_rel_hash
Definition: relation.h:216
void * palloc(Size size)
Definition: mcxt.c:849
static bool desirable_join(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel)
Definition: geqo_eval.c:323
bool have_relevant_joinclause(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
Definition: joininfo.c:36
Definition: pg_list.h:45
Definition: relation.h:948
double Cost
Definition: nodes.h:640