88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
6969int64 * offset_est ,int64 * count_est );
7070static void preprocess_groupclause (PlannerInfo * root );
7171static Oid * extract_grouping_ops (List * groupClause );
72+ static bool grouping_is_sortable (List * groupClause );
73+ static bool grouping_is_hashable (List * groupClause );
7274static bool choose_hashed_grouping (PlannerInfo * root ,
7375double tuple_fraction ,double limit_tuples ,
7476Path * cheapest_path ,Path * sorted_path ,
75- Oid * groupOperators ,double dNumGroups ,
76- AggClauseCounts * agg_counts );
77+ double dNumGroups ,AggClauseCounts * agg_counts );
7778static List * make_subplanTargetList (PlannerInfo * root ,List * tlist ,
7879AttrNumber * * groupColIdx ,bool * need_tlist_eval );
7980static void locate_grouping_columns (PlannerInfo * root ,
@@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
839840List * sub_tlist ;
840841List * group_pathkeys ;
841842AttrNumber * groupColIdx = NULL ;
842- Oid * groupOperators = NULL ;
843843bool need_tlist_eval = true;
844844QualCost tlist_cost ;
845845Path * cheapest_path ;
@@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
877877 * DISTINCT and ORDER BY requirements. This should be changed
878878 * someday, but DISTINCT ON is a bit of a problem ...
879879 */
880- root -> group_pathkeys =
881- make_pathkeys_for_sortclauses (root ,
882- parse -> groupClause ,
883- tlist ,
884- false);
880+ if (parse -> groupClause && grouping_is_sortable (parse -> groupClause ))
881+ root -> group_pathkeys =
882+ make_pathkeys_for_sortclauses (root ,
883+ parse -> groupClause ,
884+ tlist ,
885+ false);
886+ else
887+ root -> group_pathkeys = NIL ;
888+
885889if (list_length (parse -> distinctClause )> list_length (parse -> sortClause ))
886890root -> sort_pathkeys =
887891make_pathkeys_for_sortclauses (root ,
@@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
915919/*
916920 * Figure out whether we need a sorted result from query_planner.
917921 *
918- * If we have a GROUP BY clause, then we want a result sorted properly
919- * for grouping. Otherwise, if there is an ORDER BY clause, we want
920- * to sort by the ORDER BY clause. (Note: if we have both, and ORDER
921- * BY is a superset of GROUP BY, it would be tempting to request sort
922- * by ORDER BY --- but that might just leave us failing to exploit an
923- * available sort order at all. Needs more thought...)
922+ * If we have asortable GROUP BY clause, then we want a result sorted
923+ *properly for grouping. Otherwise, if there is an ORDER BY clause,
924+ *we want to sort by the ORDER BY clause. (Note: if we have both, and
925+ *ORDER BY is a superset of GROUP BY, it would be tempting to request
926+ *sort by ORDER BY --- but that might just leave us failing to
927+ *exploit an available sort order at all. Needs more thought...)
924928 */
925929if (root -> group_pathkeys )
926930root -> query_pathkeys = root -> group_pathkeys ;
@@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
942946sort_pathkeys = root -> sort_pathkeys ;
943947
944948/*
945- * If grouping, extract the grouping operators and decide whether we
946- * want to use hashed grouping.
949+ * If grouping, decide whether to use sorted or hashed grouping.
947950 */
948951if (parse -> groupClause )
949952{
950- groupOperators = extract_grouping_ops (parse -> groupClause );
951- use_hashed_grouping =
952- choose_hashed_grouping (root ,tuple_fraction ,limit_tuples ,
953- cheapest_path ,sorted_path ,
954- groupOperators ,dNumGroups ,
955- & agg_counts );
953+ bool can_hash ;
954+ bool can_sort ;
955+
956+ /*
957+ * Executor doesn't support hashed aggregation with DISTINCT
958+ * aggregates. (Doing so would imply storing *all* the input
959+ * values in the hash table, which seems like a certain loser.)
960+ */
961+ can_hash = (agg_counts .numDistinctAggs == 0 &&
962+ grouping_is_hashable (parse -> groupClause ));
963+ can_sort = grouping_is_sortable (parse -> groupClause );
964+ if (can_hash && can_sort )
965+ {
966+ /* we have a meaningful choice to make ... */
967+ use_hashed_grouping =
968+ choose_hashed_grouping (root ,
969+ tuple_fraction ,limit_tuples ,
970+ cheapest_path ,sorted_path ,
971+ dNumGroups ,& agg_counts );
972+ }
973+ else if (can_hash )
974+ use_hashed_grouping = true;
975+ else if (can_sort )
976+ use_hashed_grouping = false;
977+ else
978+ ereport (ERROR ,
979+ (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
980+ errmsg ("could not implement GROUP BY" ),
981+ errdetail ("Some of the datatypes only support hashing, while others only support sorting." )));
956982
957983/* Also convert # groups to long int --- but 'ware overflow! */
958984numGroups = (long )Min (dNumGroups , (double )LONG_MAX );
@@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
10881114AGG_HASHED ,
10891115numGroupCols ,
10901116groupColIdx ,
1091- groupOperators ,
1117+ extract_grouping_ops ( parse -> groupClause ) ,
10921118numGroups ,
10931119agg_counts .numAggs ,
10941120result_plan );
@@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
11311157aggstrategy ,
11321158numGroupCols ,
11331159groupColIdx ,
1134- groupOperators ,
1160+ extract_grouping_ops ( parse -> groupClause ) ,
11351161numGroups ,
11361162agg_counts .numAggs ,
11371163result_plan );
@@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
11601186 (List * )parse -> havingQual ,
11611187numGroupCols ,
11621188groupColIdx ,
1163- groupOperators ,
1189+ extract_grouping_ops ( parse -> groupClause ) ,
11641190dNumGroups ,
11651191result_plan );
11661192/* The Group node won't change sort ordering */
@@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
14951521 * GROUP BY elements, which could match the sort ordering of other
14961522 * possible plans (eg an indexscan) and thereby reduce cost. We don't
14971523 * bother with that, though. Hashed grouping will frequently win anyway.
1524+ *
1525+ * Note: we need no comparable processing of the distinctClause because
1526+ * the parser already enforced that that matches ORDER BY.
14981527 */
14991528static void
15001529preprocess_groupclause (PlannerInfo * root )
@@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
15051534ListCell * sl ;
15061535ListCell * gl ;
15071536
1508- /* If no ORDER BY, nothing useful to do hereanyway */
1537+ /* If no ORDER BY, nothing useful to do here */
15091538if (parse -> sortClause == NIL )
15101539return ;
15111540
@@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
15461575 * were able to make a complete match. In other words, we only
15471576 * rearrange the GROUP BY list if the result is that one list is a
15481577 * prefix of the other --- otherwise there's no possibility of a
1549- * common sort.
1578+ * common sort. Also, give up if there are any non-sortable GROUP BY
1579+ * items, since then there's no hope anyway.
15501580 */
15511581foreach (gl ,parse -> groupClause )
15521582{
@@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
15561586continue ;/* it matched an ORDER BY item */
15571587if (partial_match )
15581588return ;/* give up, no common sort possible */
1589+ if (!OidIsValid (gc -> sortop ))
1590+ return ;/* give up, GROUP BY can't be sorted */
15591591new_groupclause = lappend (new_groupclause ,gc );
15601592}
15611593
@@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
15661598
15671599/*
15681600 * extract_grouping_ops - make an array of the equality operator OIDs
1569- *forthe GROUP BY clause
1601+ *fora SortGroupClause list
15701602 */
15711603static Oid *
15721604extract_grouping_ops (List * groupClause )
@@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
15901622return groupOperators ;
15911623}
15921624
1625+ /*
1626+ * grouping_is_sortable - is it possible to implement grouping list by sorting?
1627+ *
1628+ * This is easy since the parser will have included a sortop if one exists.
1629+ */
1630+ static bool
1631+ grouping_is_sortable (List * groupClause )
1632+ {
1633+ ListCell * glitem ;
1634+
1635+ foreach (glitem ,groupClause )
1636+ {
1637+ SortGroupClause * groupcl = (SortGroupClause * )lfirst (glitem );
1638+
1639+ if (!OidIsValid (groupcl -> sortop ))
1640+ return false;
1641+ }
1642+ return true;
1643+ }
1644+
1645+ /*
1646+ * grouping_is_hashable - is it possible to implement grouping list by hashing?
1647+ *
1648+ * We assume hashing is OK if the equality operators are marked oprcanhash.
1649+ * (If there isn't actually a supporting hash function, the executor will
1650+ * complain at runtime; but this is a misdeclaration of the operator, not
1651+ * a system bug.)
1652+ */
1653+ static bool
1654+ grouping_is_hashable (List * groupClause )
1655+ {
1656+ ListCell * glitem ;
1657+
1658+ foreach (glitem ,groupClause )
1659+ {
1660+ SortGroupClause * groupcl = (SortGroupClause * )lfirst (glitem );
1661+
1662+ if (!op_hashjoinable (groupcl -> eqop ))
1663+ return false;
1664+ }
1665+ return true;
1666+ }
1667+
15931668/*
15941669 * choose_hashed_grouping - should we use hashed grouping?
1670+ *
1671+ * Note: this is only applied when both alternatives are actually feasible.
15951672 */
15961673static bool
15971674choose_hashed_grouping (PlannerInfo * root ,
15981675double tuple_fraction ,double limit_tuples ,
15991676Path * cheapest_path ,Path * sorted_path ,
1600- Oid * groupOperators ,double dNumGroups ,
1601- AggClauseCounts * agg_counts )
1677+ double dNumGroups ,AggClauseCounts * agg_counts )
16021678{
16031679int numGroupCols = list_length (root -> parse -> groupClause );
16041680double cheapest_path_rows ;
@@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
16071683List * current_pathkeys ;
16081684Path hashed_p ;
16091685Path sorted_p ;
1610- int i ;
16111686
1612- /*
1613- * Check can't-do-it conditions, including whether the grouping operators
1614- * are hashjoinable. (We assume hashing is OK if they are marked
1615- * oprcanhash.If there isn't actually a supporting hash function, the
1616- * executor will complain at runtime.)
1617- *
1618- * Executor doesn't support hashed aggregation with DISTINCT aggregates.
1619- * (Doing so would imply storing *all* the input values in the hash table,
1620- * which seems like a certain loser.)
1621- */
1687+ /* Prefer sorting when enable_hashagg is off */
16221688if (!enable_hashagg )
16231689return false;
1624- if (agg_counts -> numDistinctAggs != 0 )
1625- return false;
1626- for (i = 0 ;i < numGroupCols ;i ++ )
1627- {
1628- if (!op_hashjoinable (groupOperators [i ]))
1629- return false;
1630- }
16311690
16321691/*
16331692 * Don't do it if it doesn't look like the hashtable will fit into