PostgreSQL Source Code  git master
geqo_main.c
Go to the documentation of this file.
1 /*------------------------------------------------------------------------
2  *
3  * geqo_main.c
4  * solution to the query optimization problem
5  * by means of a Genetic Algorithm (GA)
6  *
7  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/optimizer/geqo/geqo_main.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 
15 /* contributed by:
16  =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
17  * Martin Utesch * Institute of Automatic Control *
18  = = University of Mining and Technology =
19  * utesch@aut.tu-freiberg.de * Freiberg, Germany *
20  =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
21  */
22 
23 /* -- parts of this are adapted from D. Whitley's Genitor algorithm -- */
24 
25 #include "postgres.h"
26 
27 #include <math.h>
28 
29 #include "optimizer/geqo.h"
30 
31 #include "optimizer/geqo_misc.h"
32 #if defined(CX)
34 #endif
35 #include "optimizer/geqo_pool.h"
36 #include "optimizer/geqo_random.h"
39 
40 
41 /*
42  * Configuration options
43  */
48 double Geqo_seed;
49 
50 
51 static int gimme_pool_size(int nr_rel);
52 static int gimme_number_generations(int pool_size);
53 
54 /* complain if no recombination mechanism is #define'd */
55 #if !defined(ERX) && \
56  !defined(PMX) && \
57  !defined(CX) && \
58  !defined(PX) && \
59  !defined(OX1) && \
60  !defined(OX2)
61 #error "must choose one GEQO recombination mechanism in geqo.h"
62 #endif
63 
64 
65 /*
66  * geqo
67  * solution of the query optimization problem
68  * similar to a constrained Traveling Salesman Problem (TSP)
69  */
70 
71 RelOptInfo *
72 geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
73 {
74  GeqoPrivateData private;
75  int generation;
76  Chromosome *momma;
77  Chromosome *daddy;
78  Chromosome *kid;
79  Pool *pool;
80  int pool_size,
81  number_generations;
82 
83 #ifdef GEQO_DEBUG
84  int status_interval;
85 #endif
86  Gene *best_tour;
87  RelOptInfo *best_rel;
88 
89 #if defined(ERX)
90  Edge *edge_table; /* list of edges */
91  int edge_failures = 0;
92 #endif
93 #if defined(CX) || defined(PX) || defined(OX1) || defined(OX2)
94  City *city_table; /* list of cities */
95 #endif
96 #if defined(CX)
97  int cycle_diffs = 0;
98  int mutations = 0;
99 #endif
100 
101 /* set up private information */
102  root->join_search_private = (void *) &private;
103  private.initial_rels = initial_rels;
104 
105 /* initialize private number generator */
106  geqo_set_seed(root, Geqo_seed);
107 
108 /* set GA parameters */
109  pool_size = gimme_pool_size(number_of_rels);
110  number_generations = gimme_number_generations(pool_size);
111 #ifdef GEQO_DEBUG
112  status_interval = 10;
113 #endif
114 
115 /* allocate genetic pool memory */
116  pool = alloc_pool(root, pool_size, number_of_rels);
117 
118 /* random initialization of the pool */
119  random_init_pool(root, pool);
120 
121 /* sort the pool according to cheapest path as fitness */
122  sort_pool(root, pool); /* we have to do it only one time, since all
123  * kids replace the worst individuals in
124  * future (-> geqo_pool.c:spread_chromo ) */
125 
126 #ifdef GEQO_DEBUG
127  elog(DEBUG1, "GEQO selected %d pool entries, best %.2f, worst %.2f",
128  pool_size,
129  pool->data[0].worth,
130  pool->data[pool_size - 1].worth);
131 #endif
132 
133 /* allocate chromosome momma and daddy memory */
134  momma = alloc_chromo(root, pool->string_length);
135  daddy = alloc_chromo(root, pool->string_length);
136 
137 #if defined (ERX)
138 #ifdef GEQO_DEBUG
139  elog(DEBUG2, "using edge recombination crossover [ERX]");
140 #endif
141 /* allocate edge table memory */
142  edge_table = alloc_edge_table(root, pool->string_length);
143 #elif defined(PMX)
144 #ifdef GEQO_DEBUG
145  elog(DEBUG2, "using partially matched crossover [PMX]");
146 #endif
147 /* allocate chromosome kid memory */
148  kid = alloc_chromo(root, pool->string_length);
149 #elif defined(CX)
150 #ifdef GEQO_DEBUG
151  elog(DEBUG2, "using cycle crossover [CX]");
152 #endif
153 /* allocate city table memory */
154  kid = alloc_chromo(root, pool->string_length);
155  city_table = alloc_city_table(root, pool->string_length);
156 #elif defined(PX)
157 #ifdef GEQO_DEBUG
158  elog(DEBUG2, "using position crossover [PX]");
159 #endif
160 /* allocate city table memory */
161  kid = alloc_chromo(root, pool->string_length);
162  city_table = alloc_city_table(root, pool->string_length);
163 #elif defined(OX1)
164 #ifdef GEQO_DEBUG
165  elog(DEBUG2, "using order crossover [OX1]");
166 #endif
167 /* allocate city table memory */
168  kid = alloc_chromo(root, pool->string_length);
169  city_table = alloc_city_table(root, pool->string_length);
170 #elif defined(OX2)
171 #ifdef GEQO_DEBUG
172  elog(DEBUG2, "using order crossover [OX2]");
173 #endif
174 /* allocate city table memory */
175  kid = alloc_chromo(root, pool->string_length);
176  city_table = alloc_city_table(root, pool->string_length);
177 #endif
178 
179 
180 /* my pain main part: */
181 /* iterative optimization */
182 
183  for (generation = 0; generation < number_generations; generation++)
184  {
185  /* SELECTION: using linear bias function */
186  geqo_selection(root, momma, daddy, pool, Geqo_selection_bias);
187 
188 #if defined (ERX)
189  /* EDGE RECOMBINATION CROSSOVER */
190  gimme_edge_table(root, momma->string, daddy->string, pool->string_length, edge_table);
191 
192  kid = momma;
193 
194  /* are there any edge failures ? */
195  edge_failures += gimme_tour(root, edge_table, kid->string, pool->string_length);
196 #elif defined(PMX)
197  /* PARTIALLY MATCHED CROSSOVER */
198  pmx(root, momma->string, daddy->string, kid->string, pool->string_length);
199 #elif defined(CX)
200  /* CYCLE CROSSOVER */
201  cycle_diffs = cx(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
202  /* mutate the child */
203  if (cycle_diffs == 0)
204  {
205  mutations++;
206  geqo_mutation(root, kid->string, pool->string_length);
207  }
208 #elif defined(PX)
209  /* POSITION CROSSOVER */
210  px(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
211 #elif defined(OX1)
212  /* ORDER CROSSOVER */
213  ox1(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
214 #elif defined(OX2)
215  /* ORDER CROSSOVER */
216  ox2(root, momma->string, daddy->string, kid->string, pool->string_length, city_table);
217 #endif
218 
219 
220  /* EVALUATE FITNESS */
221  kid->worth = geqo_eval(root, kid->string, pool->string_length);
222 
223  /* push the kid into the wilderness of life according to its worth */
224  spread_chromo(root, kid, pool);
225 
226 
227 #ifdef GEQO_DEBUG
228  if (status_interval && !(generation % status_interval))
229  print_gen(stdout, pool, generation);
230 #endif
231 
232  }
233 
234 
235 #if defined(ERX)
236 #if defined(GEQO_DEBUG)
237  if (edge_failures != 0)
238  elog(LOG, "[GEQO] failures: %d, average: %d",
239  edge_failures, (int) number_generations / edge_failures);
240  else
241  elog(LOG, "[GEQO] no edge failures detected");
242 #else
243  /* suppress variable-set-but-not-used warnings from some compilers */
244  (void) edge_failures;
245 #endif
246 #endif
247 
248 #if defined(CX) && defined(GEQO_DEBUG)
249  if (mutations != 0)
250  elog(LOG, "[GEQO] mutations: %d, generations: %d",
251  mutations, number_generations);
252  else
253  elog(LOG, "[GEQO] no mutations processed");
254 #endif
255 
256 #ifdef GEQO_DEBUG
257  print_pool(stdout, pool, 0, pool_size - 1);
258 #endif
259 
260 #ifdef GEQO_DEBUG
261  elog(DEBUG1, "GEQO best is %.2f after %d generations",
262  pool->data[0].worth, number_generations);
263 #endif
264 
265 
266  /*
267  * got the cheapest query tree processed by geqo; first element of the
268  * population indicates the best query tree
269  */
270  best_tour = (Gene *) pool->data[0].string;
271 
272  best_rel = gimme_tree(root, best_tour, pool->string_length);
273 
274  if (best_rel == NULL)
275  elog(ERROR, "geqo failed to make a valid plan");
276 
277  /* DBG: show the query plan */
278 #ifdef NOT_USED
279  print_plan(best_plan, root);
280 #endif
281 
282  /* ... free memory stuff */
283  free_chromo(root, momma);
284  free_chromo(root, daddy);
285 
286 #if defined (ERX)
287  free_edge_table(root, edge_table);
288 #elif defined(PMX)
289  free_chromo(root, kid);
290 #elif defined(CX)
291  free_chromo(root, kid);
292  free_city_table(root, city_table);
293 #elif defined(PX)
294  free_chromo(root, kid);
295  free_city_table(root, city_table);
296 #elif defined(OX1)
297  free_chromo(root, kid);
298  free_city_table(root, city_table);
299 #elif defined(OX2)
300  free_chromo(root, kid);
301  free_city_table(root, city_table);
302 #endif
303 
304  free_pool(root, pool);
305 
306  /* ... clear root pointer to our private storage */
307  root->join_search_private = NULL;
308 
309  return best_rel;
310 }
311 
312 
313 /*
314  * Return either configured pool size or a good default
315  *
316  * The default is based on query size (no. of relations) = 2^(QS+1),
317  * but constrained to a range based on the effort value.
318  */
319 static int
320 gimme_pool_size(int nr_rel)
321 {
322  double size;
323  int minsize;
324  int maxsize;
325 
326  /* Legal pool size *must* be at least 2, so ignore attempt to select 1 */
327  if (Geqo_pool_size >= 2)
328  return Geqo_pool_size;
329 
330  size = pow(2.0, nr_rel + 1.0);
331 
332  maxsize = 50 * Geqo_effort; /* 50 to 500 individuals */
333  if (size > maxsize)
334  return maxsize;
335 
336  minsize = 10 * Geqo_effort; /* 10 to 100 individuals */
337  if (size < minsize)
338  return minsize;
339 
340  return (int) ceil(size);
341 }
342 
343 
344 /*
345  * Return either configured number of generations or a good default
346  *
347  * The default is the same as the pool size, which allows us to be
348  * sure that less-fit individuals get pushed out of the breeding
349  * population before the run finishes.
350  */
351 static int
353 {
354  if (Geqo_generations > 0)
355  return Geqo_generations;
356 
357  return pool_size;
358 }
#define LOG
Definition: elog.h:31
#define DEBUG2
Definition: elog.h:29
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:224
float gimme_edge_table(PlannerInfo *root, Gene *tour1, Gene *tour2, int num_gene, Edge *edge_table)
Definition: geqo_erx.c:95
void free_edge_table(PlannerInfo *root, Edge *edge_table)
Definition: geqo_erx.c:76
int gimme_tour(PlannerInfo *root, Edge *edge_table, Gene *new_gene, int num_gene)
Definition: geqo_erx.c:196
Edge * alloc_edge_table(PlannerInfo *root, int num_gene)
Definition: geqo_erx.c:56
Cost geqo_eval(PlannerInfo *root, Gene *tour, int num_gene)
Definition: geqo_eval.c:57
RelOptInfo * gimme_tree(PlannerInfo *root, Gene *tour, int num_gene)
Definition: geqo_eval.c:163
int Gene
Definition: geqo_gene.h:30
int Geqo_pool_size
Definition: geqo_main.c:45
double Geqo_seed
Definition: geqo_main.c:48
int Geqo_generations
Definition: geqo_main.c:46
RelOptInfo * geqo(PlannerInfo *root, int number_of_rels, List *initial_rels)
Definition: geqo_main.c:72
static int gimme_number_generations(int pool_size)
Definition: geqo_main.c:352
static int gimme_pool_size(int nr_rel)
Definition: geqo_main.c:320
int Geqo_effort
Definition: geqo_main.c:44
double Geqo_selection_bias
Definition: geqo_main.c:47
void geqo_mutation(PlannerInfo *root, Gene *tour, int num_gene)
void sort_pool(PlannerInfo *root, Pool *pool)
Definition: geqo_pool.c:135
void free_chromo(PlannerInfo *root, Chromosome *chromo)
Definition: geqo_pool.c:176
void free_pool(PlannerInfo *root, Pool *pool)
Definition: geqo_pool.c:69
void random_init_pool(PlannerInfo *root, Pool *pool)
Definition: geqo_pool.c:91
Chromosome * alloc_chromo(PlannerInfo *root, int string_length)
Definition: geqo_pool.c:162
void spread_chromo(PlannerInfo *root, Chromosome *chromo, Pool *pool)
Definition: geqo_pool.c:187
Pool * alloc_pool(PlannerInfo *root, int pool_size, int string_length)
Definition: geqo_pool.c:42
void geqo_set_seed(PlannerInfo *root, double seed)
Definition: geqo_random.c:20
void free_city_table(PlannerInfo *root, City *city_table)
void pmx(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene)
void ox1(PlannerInfo *root, Gene *mom, Gene *dad, Gene *offspring, int num_gene, City *city_table)
int cx(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
void ox2(PlannerInfo *root, Gene *mom, Gene *dad, Gene *offspring, int num_gene, City *city_table)
void px(PlannerInfo *root, Gene *tour1, Gene *tour2, Gene *offspring, int num_gene, City *city_table)
City * alloc_city_table(PlannerInfo *root, int num_gene)
void geqo_selection(PlannerInfo *root, Chromosome *momma, Chromosome *daddy, Pool *pool, double bias)
static pg_noinline void Size size
Definition: slab.c:607
Cost worth
Definition: geqo_gene.h:35
Gene * string
Definition: geqo_gene.h:34
Definition: pg_list.h:54
Definition: geqo_gene.h:39
int string_length
Definition: geqo_gene.h:42
Chromosome * data
Definition: geqo_gene.h:40