15
15
*
16
16
*
17
17
* IDENTIFICATION
18
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.249 2008/05/12 00:00:51 alvherre Exp $
18
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.250 2008/07/07 20:24:55 tgl Exp $
19
19
*
20
20
*-------------------------------------------------------------------------
21
21
*/
@@ -2645,7 +2645,11 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
2645
2645
* case (all possible cross-product terms actually appear as groups) since
2646
2646
* very often the grouped-by Vars are highly correlated. Our current approach
2647
2647
* is as follows:
2648
- *1.Reduce the given expressions to a list of unique Vars used. For
2648
+ *1.Expressions yielding boolean are assumed to contribute two groups,
2649
+ *independently of their content, and are ignored in the subsequent
2650
+ *steps. This is mainly because tests like "col IS NULL" break the
2651
+ *heuristic used in step 2 especially badly.
2652
+ *2.Reduce the given expressions to a list of unique Vars used. For
2649
2653
*example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
2650
2654
*It is clearly correct not to count the same Var more than once.
2651
2655
*It is also reasonable to treat f(x) the same as x: f() cannot
@@ -2655,14 +2659,14 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
2655
2659
*As a special case, if a GROUP BY expression can be matched to an
2656
2660
*expressional index for which we have statistics, then we treat the
2657
2661
*whole expression as though it were just a Var.
2658
- *2 .If the list contains Vars of different relations that are known equal
2662
+ *3 .If the list contains Vars of different relations that are known equal
2659
2663
*due to equivalence classes, then drop all but one of the Vars from each
2660
2664
*known-equal set, keeping the one with smallest estimated # of values
2661
2665
*(since the extra values of the others can't appear in joined rows).
2662
2666
*Note the reason we only consider Vars of different relations is that
2663
2667
*if we considered ones of the same rel, we'd be double-counting the
2664
2668
*restriction selectivity of the equality in the next step.
2665
- *3 .For Vars within a single source rel, we multiply together the numbers
2669
+ *4 .For Vars within a single source rel, we multiply together the numbers
2666
2670
*of values, clamp to the number of rows in the rel (divided by 10 if
2667
2671
*more than one Var), and then multiply by the selectivity of the
2668
2672
*restriction clauses for that rel. When there's more than one Var,
@@ -2673,7 +2677,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
2673
2677
*by the restriction selectivity is effectively assuming that the
2674
2678
*restriction clauses are independent of the grouping, which is a crummy
2675
2679
*assumption, but it's hard to do better.
2676
- *4 .If there are Vars from multiple rels, we repeat step3 for each such
2680
+ *5 .If there are Vars from multiple rels, we repeat step4 for each such
2677
2681
*rel, and multiply the results together.
2678
2682
* Note that rels not containing grouped Vars are ignored completely, as are
2679
2683
* join clauses. Such rels cannot increase the number of groups, and we
@@ -2691,18 +2695,28 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
2691
2695
Assert (groupExprs != NIL );
2692
2696
2693
2697
/*
2694
- * Steps 1/2: find the unique Vars used, treating an expression as a Var
2698
+ * Count groups derived from boolean grouping expressions. For other
2699
+ * expressions, find the unique Vars used, treating an expression as a Var
2695
2700
* if we can find stats for it. For each one, record the statistical
2696
2701
* estimate of number of distinct values (total in its table, without
2697
2702
* regard for filtering).
2698
2703
*/
2704
+ numdistinct = 1.0 ;
2705
+
2699
2706
foreach (l ,groupExprs )
2700
2707
{
2701
2708
Node * groupexpr = (Node * )lfirst (l );
2702
2709
VariableStatData vardata ;
2703
2710
List * varshere ;
2704
2711
ListCell * l2 ;
2705
2712
2713
+ /* Short-circuit for expressions returning boolean */
2714
+ if (exprType (groupexpr )== BOOLOID )
2715
+ {
2716
+ numdistinct *=2.0 ;
2717
+ continue ;
2718
+ }
2719
+
2706
2720
/*
2707
2721
* If examine_variable is able to deduce anything about the GROUP BY
2708
2722
* expression, treat it as a single variable even if it's really more
@@ -2749,20 +2763,26 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows)
2749
2763
}
2750
2764
}
2751
2765
2752
- /* If now no Vars, we must have an all-constant GROUP BY list. */
2766
+ /*
2767
+ * If now no Vars, we must have an all-constant or all-boolean GROUP BY
2768
+ * list.
2769
+ */
2753
2770
if (varinfos == NIL )
2754
- return 1.0 ;
2771
+ {
2772
+ /* Guard against out-of-range answers */
2773
+ if (numdistinct > input_rows )
2774
+ numdistinct = input_rows ;
2775
+ return numdistinct ;
2776
+ }
2755
2777
2756
2778
/*
2757
- *Steps 3/4: group Vars by relation and estimate total numdistinct.
2779
+ *Group Vars by relation and estimate total numdistinct.
2758
2780
*
2759
2781
* For each iteration of the outer loop, we process the frontmost Var in
2760
2782
* varinfos, plus all other Vars in the same relation.We remove these
2761
2783
* Vars from the newvarinfos list for the next iteration. This is the
2762
2784
* easiest way to group Vars of same rel together.
2763
2785
*/
2764
- numdistinct = 1.0 ;
2765
-
2766
2786
do
2767
2787
{
2768
2788
GroupVarInfo * varinfo1 = (GroupVarInfo * )linitial (varinfos );