1414 *
1515 *
1616 * IDENTIFICATION
17- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.86 2005/07/02 23:00:41 tgl Exp $
17+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.87 2005/08/27 22:13:43 tgl Exp $
1818 *
1919 *-------------------------------------------------------------------------
2020 */
2525#include "optimizer/pathnode.h"
2626#include "optimizer/paths.h"
2727#include "optimizer/planmain.h"
28+ #include "optimizer/tlist.h"
29+ #include "utils/selfuncs.h"
2830
2931
30- /*--------------------
32+ /*
3133 * query_planner
3234 * Generate a path (that is, a simplified plan) for a basic query,
3335 * which may involve joins but not any fancier features.
5153 * *cheapest_path receives the overall-cheapest path for the query
5254 * *sorted_path receives the cheapest presorted path for the query,
5355 *if any (NULL if there is no useful presorted path)
56+ * *num_groups receives the estimated number of groups, or 1 if query
57+ *does not use grouping
5458 *
5559 * Note: the PlannerInfo node also includes a query_pathkeys field, which is
5660 * both an input and an output of query_planner(). The input value signals
6165 * PlannerInfo field and not a passed parameter is that the low-level routines
6266 * in indxpath.c need to see it.)
6367 *
68+ * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys,
69+ * which like query_pathkeys need to be canonicalized once the info is
70+ * available.
71+ *
6472 * tuple_fraction is interpreted as follows:
6573 * 0: expect all tuples to be retrieved (normal case)
6674 * 0 < tuple_fraction < 1: expect the given fraction of tuples available
6775 *from the plan to be retrieved
6876 * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
6977 *expected to be retrieved (ie, a LIMIT specification)
70- *--------------------
7178 */
7279void
7380query_planner (PlannerInfo * root ,List * tlist ,double tuple_fraction ,
74- Path * * cheapest_path ,Path * * sorted_path )
81+ Path * * cheapest_path ,Path * * sorted_path ,
82+ double * num_groups )
7583{
7684Query * parse = root -> parse ;
7785List * constant_quals ;
@@ -82,6 +90,8 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
8290/* Make tuple_fraction accessible to lower-level routines */
8391root -> tuple_fraction = tuple_fraction ;
8492
93+ * num_groups = 1 ;/* default result */
94+
8595/*
8696 * If the query has an empty join tree, then it's something easy like
8797 * "SELECT 2+2;" or "INSERT ... VALUES()".Fall through quickly.
@@ -156,9 +166,12 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
156166/*
157167 * We should now have all the pathkey equivalence sets built, so it's
158168 * now possible to convert the requested query_pathkeys to canonical
159- * form.
169+ * form. Also canonicalize the groupClause and sortClause pathkeys
170+ * for use later.
160171 */
161172root -> query_pathkeys = canonicalize_pathkeys (root ,root -> query_pathkeys );
173+ root -> group_pathkeys = canonicalize_pathkeys (root ,root -> group_pathkeys );
174+ root -> sort_pathkeys = canonicalize_pathkeys (root ,root -> sort_pathkeys );
162175
163176/*
164177 * Ready to do the primary planning.
@@ -169,12 +182,87 @@ query_planner(PlannerInfo *root, List *tlist, double tuple_fraction,
169182elog (ERROR ,"failed to construct the join relation" );
170183
171184/*
172- * Now that we have an estimate of the final rel's size, we can
173- * convert a tuple_fraction specified as an absolute count (ie, a
174- * LIMIT option) into a fraction of the total tuples.
185+ * If there's grouping going on, estimate the number of result groups.
186+ * We couldn't do this any earlier because it depends on relation size
187+ * estimates that were set up above.
188+ *
189+ * Then convert tuple_fraction to fractional form if it is absolute,
190+ * and adjust it based on the knowledge that grouping_planner will be
191+ * doing grouping or aggregation work with our result.
192+ *
193+ * This introduces some undesirable coupling between this code and
194+ * grouping_planner, but the alternatives seem even uglier; we couldn't
195+ * pass back completed paths without making these decisions here.
175196 */
176- if (tuple_fraction >=1.0 )
177- tuple_fraction /=final_rel -> rows ;
197+ if (parse -> groupClause )
198+ {
199+ List * groupExprs ;
200+
201+ groupExprs = get_sortgrouplist_exprs (parse -> groupClause ,
202+ parse -> targetList );
203+ * num_groups = estimate_num_groups (root ,
204+ groupExprs ,
205+ final_rel -> rows );
206+
207+ /*
208+ * In GROUP BY mode, an absolute LIMIT is relative to the number
209+ * of groups not the number of tuples. If the caller gave us
210+ * a fraction, keep it as-is. (In both cases, we are effectively
211+ * assuming that all the groups are about the same size.)
212+ */
213+ if (tuple_fraction >=1.0 )
214+ tuple_fraction /=* num_groups ;
215+
216+ /*
217+ * If both GROUP BY and ORDER BY are specified, we will need two
218+ * levels of sort --- and, therefore, certainly need to read all
219+ * the tuples --- unless ORDER BY is a subset of GROUP BY.
220+ */
221+ if (parse -> groupClause && parse -> sortClause &&
222+ !pathkeys_contained_in (root -> sort_pathkeys ,root -> group_pathkeys ))
223+ tuple_fraction = 0.0 ;
224+ }
225+ else if (parse -> hasAggs || root -> hasHavingQual )
226+ {
227+ /*
228+ * Ungrouped aggregate will certainly want to read all the tuples,
229+ * and it will deliver a single result row (so leave *num_groups 1).
230+ */
231+ tuple_fraction = 0.0 ;
232+ }
233+ else if (parse -> distinctClause )
234+ {
235+ /*
236+ * Since there was no grouping or aggregation, it's reasonable to
237+ * assume the UNIQUE filter has effects comparable to GROUP BY.
238+ * Return the estimated number of output rows for use by caller.
239+ * (If DISTINCT is used with grouping, we ignore its effects for
240+ * rowcount estimation purposes; this amounts to assuming the grouped
241+ * rows are distinct already.)
242+ */
243+ List * distinctExprs ;
244+
245+ distinctExprs = get_sortgrouplist_exprs (parse -> distinctClause ,
246+ parse -> targetList );
247+ * num_groups = estimate_num_groups (root ,
248+ distinctExprs ,
249+ final_rel -> rows );
250+
251+ /*
252+ * Adjust tuple_fraction the same way as for GROUP BY, too.
253+ */
254+ if (tuple_fraction >=1.0 )
255+ tuple_fraction /=* num_groups ;
256+ }
257+ else
258+ {
259+ /*
260+ * Plain non-grouped, non-aggregated query: an absolute tuple
261+ * fraction can be divided by the number of tuples.
262+ */
263+ if (tuple_fraction >=1.0 )
264+ tuple_fraction /=final_rel -> rows ;
265+ }
178266
179267/*
180268 * Pick out the cheapest-total path and the cheapest presorted path