1515 *
1616 *
1717 * IDENTIFICATION
18- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.249 2008/05/12 00:00:51 alvherre Exp $
18+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.250 2008/07/07 20:24:55 tgl Exp $
1919 *
2020 *-------------------------------------------------------------------------
2121 */
@@ -2645,7 +2645,11 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
26452645 * case (all possible cross-product terms actually appear as groups) since
26462646 * very often the grouped-by Vars are highly correlated. Our current approach
26472647 * is as follows:
2648- *1.Reduce the given expressions to a list of unique Vars used. For
2648+ *1.Expressions yielding boolean are assumed to contribute two groups,
2649+ *independently of their content, and are ignored in the subsequent
2650+ *steps. This is mainly because tests like "col IS NULL" break the
2651+ *heuristic used in step 2 especially badly.
2652+ *2.Reduce the given expressions to a list of unique Vars used. For
26492653 *example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
26502654 *It is clearly correct not to count the same Var more than once.
26512655 *It is also reasonable to treat f(x) the same as x: f() cannot
@@ -2655,14 +2659,14 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
26552659 *As a special case, if a GROUP BY expression can be matched to an
26562660 *expressional index for which we have statistics, then we treat the
26572661 *whole expression as though it were just a Var.
2658- *2 .If the list contains Vars of different relations that are known equal
2662+ *3 .If the list contains Vars of different relations that are known equal
26592663 *due to equivalence classes, then drop all but one of the Vars from each
26602664 *known-equal set, keeping the one with smallest estimated # of values
26612665 *(since the extra values of the others can't appear in joined rows).
26622666 *Note the reason we only consider Vars of different relations is that
26632667 *if we considered ones of the same rel, we'd be double-counting the
26642668 *restriction selectivity of the equality in the next step.
2665- *3 .For Vars within a single source rel, we multiply together the numbers
2669+ *4 .For Vars within a single source rel, we multiply together the numbers
26662670 *of values, clamp to the number of rows in the rel (divided by 10 if
26672671 *more than one Var), and then multiply by the selectivity of the
26682672 *restriction clauses for that rel. When there's more than one Var,
@@ -2673,7 +2677,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
26732677 *by the restriction selectivity is effectively assuming that the
26742678 *restriction clauses are independent of the grouping, which is a crummy
26752679 *assumption, but it's hard to do better.
2676- *4 .If there are Vars from multiple rels, we repeat step3 for each such
2680+ *5 .If there are Vars from multiple rels, we repeat step4 for each such
26772681 *rel, and multiply the results together.
26782682 * Note that rels not containing grouped Vars are ignored completely, as are
26792683 * join clauses. Such rels cannot increase the number of groups, and we
@@ -2691,18 +2695,28 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
26912695Assert (groupExprs != NIL );
26922696
26932697/*
2694- * Steps 1/2: find the unique Vars used, treating an expression as a Var
2698+ * Count groups derived from boolean grouping expressions. For other
2699+ * expressions, find the unique Vars used, treating an expression as a Var
26952700 * if we can find stats for it. For each one, record the statistical
26962701 * estimate of number of distinct values (total in its table, without
26972702 * regard for filtering).
26982703 */
2704+ numdistinct = 1.0 ;
2705+
26992706foreach (l ,groupExprs )
27002707{
27012708Node * groupexpr = (Node * )lfirst (l );
27022709VariableStatData vardata ;
27032710List * varshere ;
27042711ListCell * l2 ;
27052712
2713+ /* Short-circuit for expressions returning boolean */
2714+ if (exprType (groupexpr )== BOOLOID )
2715+ {
2716+ numdistinct *=2.0 ;
2717+ continue ;
2718+ }
2719+
27062720/*
27072721 * If examine_variable is able to deduce anything about the GROUP BY
27082722 * expression, treat it as a single variable even if it's really more
@@ -2749,20 +2763,26 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
27492763}
27502764}
27512765
2752- /* If now no Vars, we must have an all-constant GROUP BY list. */
2766+ /*
2767+ * If now no Vars, we must have an all-constant or all-boolean GROUP BY
2768+ * list.
2769+ */
27532770if (varinfos == NIL )
2754- return 1.0 ;
2771+ {
2772+ /* Guard against out-of-range answers */
2773+ if (numdistinct > input_rows )
2774+ numdistinct = input_rows ;
2775+ return numdistinct ;
2776+ }
27552777
27562778/*
2757- *Steps 3/4: group Vars by relation and estimate total numdistinct.
2779+ *Group Vars by relation and estimate total numdistinct.
27582780 *
27592781 * For each iteration of the outer loop, we process the frontmost Var in
27602782 * varinfos, plus all other Vars in the same relation.We remove these
27612783 * Vars from the newvarinfos list for the next iteration. This is the
27622784 * easiest way to group Vars of same rel together.
27632785 */
2764- numdistinct = 1.0 ;
2765-
27662786do
27672787{
27682788GroupVarInfo * varinfo1 = (GroupVarInfo * )linitial (varinfos );