66 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
77 * Portions Copyright (c) 1994, Regents of the University of California
88 *
9- * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.66 2003/11/29 19:51:50 pgsql Exp $
9+ * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.67 2004/01/23 23:54:21 tgl Exp $
1010 *
1111 *-------------------------------------------------------------------------
1212 */
3131#include "utils/memutils.h"
3232
3333
34+ static bool desirable_join (Query * root ,
35+ RelOptInfo * outer_rel ,RelOptInfo * inner_rel );
36+
37+
3438/*
3539 * geqo_eval
3640 *
3741 * Returns cost of a query tree as an individual of the population.
3842 */
3943Cost
40- geqo_eval (Query * root , List * initial_rels , Gene * tour ,int num_gene )
44+ geqo_eval (Gene * tour ,int num_gene , GeqoEvalData * evaldata )
4145{
4246MemoryContext mycontext ;
4347MemoryContext oldcxt ;
@@ -52,9 +56,9 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
5256 * redundant cost calculations, we simply reject tours where tour[0] >
5357 * tour[1], assigning them an artificially bad fitness.
5458 *
55- *(It would be better to tweak the GEQO logic to not generate such tours
56- *in thefirst place, but I'm not sure of all the implications in the
57- *mutation logic.)
59+ *init_tour() is aware of this rule and so we should never reject a
60+ *tour during theinitial filling of the pool. It seems difficult to
61+ *persuade the recombination logic never to break the rule, however.
5862 */
5963if (num_gene >=2 && tour [0 ]> tour [1 ])
6064return DBL_MAX ;
@@ -80,10 +84,10 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
8084 * this, it'll be pointing at recycled storage after the
8185 * MemoryContextDelete below.
8286 */
83- savelist = root -> join_rel_list ;
87+ savelist = evaldata -> root -> join_rel_list ;
8488
8589/* construct the best path for the given combination of relations */
86- joinrel = gimme_tree (root , initial_rels , tour ,num_gene );
90+ joinrel = gimme_tree (tour ,num_gene , evaldata );
8791
8892/*
8993 * compute fitness
@@ -97,7 +101,7 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
97101fitness = DBL_MAX ;
98102
99103/* restore join_rel_list */
100- root -> join_rel_list = savelist ;
104+ evaldata -> root -> join_rel_list = savelist ;
101105
102106/* release all the memory acquired within gimme_tree */
103107MemoryContextSwitchTo (oldcxt );
@@ -111,63 +115,156 @@ geqo_eval(Query *root, List *initial_rels, Gene *tour, int num_gene)
111115 * Form planner estimates for a join tree constructed in the specified
112116 * order.
113117 *
114- * 'root' is the Query
115- * 'initial_rels' is the list of initial relations (FROM-list items)
116118 * 'tour' is the proposed join order, of length 'num_gene'
119+ * 'evaldata' contains the context we need
117120 *
118121 * Returns a new join relation whose cheapest path is the best plan for
119122 * this join order. NB: will return NULL if join order is invalid.
120123 *
121- * Note that at each step we consider using the next rel as both left and
122- * right side of a join. However, we cannot build general ("bushy") plan
123- * trees this way, only left-sided and right-sided trees.
124+ * The original implementation of this routine always joined in the specified
125+ * order, and so could only build left-sided plans (and right-sided and
126+ * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
127+ * It could never produce a "bushy" plan. This had a couple of big problems,
128+ * of which the worst was that as of 7.4, there are situations involving IN
129+ * subqueries where the only valid plans are bushy.
130+ *
131+ * The present implementation takes the given tour as a guideline, but
132+ * postpones joins that seem unsuitable according to some heuristic rules.
133+ * This allows correct bushy plans to be generated at need, and as a nice
134+ * side-effect it seems to materially improve the quality of the generated
135+ * plans.
124136 */
125137RelOptInfo *
126- gimme_tree (Query * root ,List * initial_rels ,
127- Gene * tour ,int num_gene )
138+ gimme_tree (Gene * tour ,int num_gene ,GeqoEvalData * evaldata )
128139{
140+ RelOptInfo * * stack ;
141+ int stack_depth ;
129142RelOptInfo * joinrel ;
130- int cur_rel_index ;
131143int rel_count ;
132144
133145/*
134- *Start with the first relation .. .
146+ *Create a stack to hold not-yet-joined relations .
135147 */
136- cur_rel_index = (int )tour [0 ];
137-
138- joinrel = (RelOptInfo * )nth (cur_rel_index - 1 ,initial_rels );
148+ stack = (RelOptInfo * * )palloc (num_gene * sizeof (RelOptInfo * ));
149+ stack_depth = 0 ;
139150
140151/*
141- * And add on each relation in the specified order ...
152+ * Push each relation onto the stack in the specified order. After
153+ * pushing each relation, see whether the top two stack entries are
154+ * joinable according to the desirable_join() heuristics. If so,
155+ * join them into one stack entry, and try again to combine with the
156+ * next stack entry down (if any). When the stack top is no longer
157+ * joinable, continue to the next input relation. After we have pushed
158+ * the last input relation, the heuristics are disabled and we force
159+ * joining all the remaining stack entries.
160+ *
161+ * If desirable_join() always returns true, this produces a straight
162+ * left-to-right join just like the old code. Otherwise we may produce
163+ * a bushy plan or a left/right-sided plan that really corresponds to
164+ * some tour other than the one given. To the extent that the heuristics
165+ * are helpful, however, this will be a better plan than the raw tour.
166+ *
167+ * Also, when a join attempt fails (because of IN-clause constraints),
168+ * we may be able to recover and produce a workable plan, where the old
169+ * code just had to give up. This case acts the same as a false result
170+ * from desirable_join().
142171 */
143- for (rel_count = 1 ;rel_count < num_gene ;rel_count ++ )
172+ for (rel_count = 0 ;rel_count < num_gene ;rel_count ++ )
144173{
145- RelOptInfo * inner_rel ;
146- RelOptInfo * new_rel ;
174+ int cur_rel_index ;
147175
176+ /* Get the next input relation and push it */
148177cur_rel_index = (int )tour [rel_count ];
149-
150- inner_rel = (RelOptInfo * )nth (cur_rel_index - 1 ,initial_rels );
178+ stack [stack_depth ]= (RelOptInfo * )nth (cur_rel_index - 1 ,
179+ evaldata -> initial_rels );
180+ stack_depth ++ ;
151181
152182/*
153- * Construct a RelOptInfo representing the previous joinrel joined
154- * to inner_rel. These are always inner joins. Note that we
155- * expect the joinrel not to exist in root->join_rel_list yet, and
156- * so the paths constructed for it will only include the ones we
157- * want.
183+ * While it's feasible, pop the top two stack entries and replace
184+ * with their join.
158185 */
159- new_rel = make_join_rel (root ,joinrel ,inner_rel ,JOIN_INNER );
186+ while (stack_depth >=2 )
187+ {
188+ RelOptInfo * outer_rel = stack [stack_depth - 2 ];
189+ RelOptInfo * inner_rel = stack [stack_depth - 1 ];
190+
191+ /*
192+ * Don't pop if heuristics say not to join now. However,
193+ * once we have exhausted the input, the heuristics can't
194+ * prevent popping.
195+ */
196+ if (rel_count < num_gene - 1 &&
197+ !desirable_join (evaldata -> root ,outer_rel ,inner_rel ))
198+ break ;
160199
161- /* Fail if join order is not valid */
162- if (new_rel == NULL )
163- return NULL ;
200+ /*
201+ * Construct a RelOptInfo representing the join of these
202+ * two input relations. These are always inner joins.
203+ * Note that we expect the joinrel not to exist in
204+ * root->join_rel_list yet, and so the paths constructed for it
205+ * will only include the ones we want.
206+ */
207+ joinrel = make_join_rel (evaldata -> root ,outer_rel ,inner_rel ,
208+ JOIN_INNER );
164209
165- /* Find and save the cheapest paths for this rel */
166- set_cheapest (new_rel );
210+ /* Can't pop stack here if join order is not valid */
211+ if (!joinrel )
212+ break ;
167213
168- /* and repeat... */
169- joinrel = new_rel ;
214+ /* Find and save the cheapest paths for this rel */
215+ set_cheapest (joinrel );
216+
217+ /* Pop the stack and replace the inputs with their join */
218+ stack_depth -- ;
219+ stack [stack_depth - 1 ]= joinrel ;
220+ }
170221}
171222
223+ /* Did we succeed in forming a single join relation? */
224+ if (stack_depth == 1 )
225+ joinrel = stack [0 ];
226+ else
227+ joinrel = NULL ;
228+
229+ pfree (stack );
230+
172231return joinrel ;
173232}
233+
234+ /*
235+ * Heuristics for gimme_tree: do we want to join these two relations?
236+ */
237+ static bool
238+ desirable_join (Query * root ,
239+ RelOptInfo * outer_rel ,RelOptInfo * inner_rel )
240+ {
241+ List * i ;
242+
243+ /*
244+ * Join if there is an applicable join clause.
245+ */
246+ foreach (i ,outer_rel -> joininfo )
247+ {
248+ JoinInfo * joininfo = (JoinInfo * )lfirst (i );
249+
250+ if (bms_is_subset (joininfo -> unjoined_relids ,inner_rel -> relids ))
251+ return true;
252+ }
253+
254+ /*
255+ * Join if the rels are members of the same IN sub-select. This is
256+ * needed to improve the odds that we will find a valid solution in
257+ * a case where an IN sub-select has a clauseless join.
258+ */
259+ foreach (i ,root -> in_info_list )
260+ {
261+ InClauseInfo * ininfo = (InClauseInfo * )lfirst (i );
262+
263+ if (bms_is_subset (outer_rel -> relids ,ininfo -> righthand )&&
264+ bms_is_subset (inner_rel -> relids ,ininfo -> righthand ))
265+ return true;
266+ }
267+
268+ /* Otherwise postpone the join till later. */
269+ return false;
270+ }