Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6985592

Browse files
committed
Split out into a separate function the code in grouping_planner() that
decides whether to use hashed grouping instead of sort-plus-uniqgrouping. The function needs an annoyingly large number of parameters,but this still seems like a win for legibility, since it removes overa hundred lines from grouping_planner (which is still too big :-().
1 parent313de22 commit6985592

File tree

1 file changed

+156
-140
lines changed

1 file changed

+156
-140
lines changed

‎src/backend/optimizer/plan/planner.c

Lines changed: 156 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind);
5858
staticvoidpreprocess_qual_conditions(Query*parse,Node*jtnode);
5959
staticPlan*inheritance_planner(Query*parse,List*inheritlist);
6060
staticPlan*grouping_planner(Query*parse,doubletuple_fraction);
61+
staticboolchoose_hashed_grouping(Query*parse,doubletuple_fraction,
62+
Path*cheapest_path,Path*sorted_path,
63+
List*sort_pathkeys,List*group_pathkeys,
64+
doubledNumGroups,AggClauseCounts*agg_counts);
6165
staticboolhash_safe_grouping(Query*parse);
6266
staticList*make_subplanTargetList(Query*parse,List*tlist,
6367
AttrNumber**groupColIdx,bool*need_tlist_eval);
@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)
920924
sort_pathkeys=canonicalize_pathkeys(parse,sort_pathkeys);
921925

922926
/*
923-
* Consider whether we might want to use hashed grouping.
927+
* If grouping, estimate the number of groups. (We can't do this
928+
* until after running query_planner(), either.) Then decide
929+
* whether we want to use hashed grouping.
924930
*/
925931
if (parse->groupClause)
926932
{
927933
List*groupExprs;
928934
doublecheapest_path_rows;
929-
intcheapest_path_width;
930935

931936
/*
932-
* Bewarein this sectionof the possibility that
933-
*cheapest_path->parent is NULL.This could happen if user
934-
* does something silly likeSELECT 'foo' GROUP BY 1;
937+
* Beware of the possibility that cheapest_path->parent is NULL.
938+
* This could happen if user does something silly like
939+
*SELECT 'foo' GROUP BY 1;
935940
*/
936941
if (cheapest_path->parent)
937-
{
938942
cheapest_path_rows=cheapest_path->parent->rows;
939-
cheapest_path_width=cheapest_path->parent->width;
940-
}
941943
else
942-
{
943944
cheapest_path_rows=1;/* assume non-set result */
944-
cheapest_path_width=100;/* arbitrary */
945-
}
946945

947-
/*
948-
* Always estimate the number of groups. We can't do this
949-
* until after running query_planner(), either.
950-
*/
951946
groupExprs=get_sortgrouplist_exprs(parse->groupClause,
952947
parse->targetList);
953948
dNumGroups=estimate_num_groups(parse,
@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)
956951
/* Also want it as a long int --- but 'ware overflow! */
957952
numGroups= (long)Min(dNumGroups, (double)LONG_MAX);
958953

959-
/*
960-
* Check can't-do-it conditions, including whether the
961-
* grouping operators are hashjoinable.
962-
*
963-
* Executor doesn't support hashed aggregation with DISTINCT
964-
* aggregates.(Doing so would imply storing *all* the input
965-
* values in the hash table, which seems like a certain
966-
* loser.)
967-
*/
968-
if (!enable_hashagg|| !hash_safe_grouping(parse))
969-
use_hashed_grouping= false;
970-
elseif (agg_counts.numDistinctAggs!=0)
971-
use_hashed_grouping= false;
972-
else
973-
{
974-
/*
975-
* Use hashed grouping if (a) we think we can fit the
976-
* hashtable into work_mem, *and* (b) the estimated cost
977-
* is no more than doing it the other way.While avoiding
978-
* the need for sorted input is usually a win, the fact
979-
* that the output won't be sorted may be a loss; so we
980-
* need to do an actual cost comparison.
981-
*/
982-
Sizehashentrysize;
983-
984-
/* Estimate per-hash-entry space at tuple width... */
985-
hashentrysize=cheapest_path_width;
986-
/* plus space for pass-by-ref transition values... */
987-
hashentrysize+=agg_counts.transitionSpace;
988-
/* plus the per-hash-entry overhead */
989-
hashentrysize+=hash_agg_entry_size(agg_counts.numAggs);
990-
991-
if (hashentrysize*dNumGroups <=work_mem*1024L)
992-
{
993-
/*
994-
* Okay, do the cost comparison. We need to consider
995-
* cheapest_path + hashagg [+ final sort] versus
996-
* either cheapest_path [+ sort] + group or agg [+
997-
* final sort] or presorted_path + group or agg [+
998-
* final sort] where brackets indicate a step that may
999-
* not be needed. We assume query_planner() will have
1000-
* returned a presorted path only if it's a winner
1001-
* compared to cheapest_path for this purpose.
1002-
*
1003-
* These path variables are dummies that just hold cost
1004-
* fields; we don't make actual Paths for these steps.
1005-
*/
1006-
Pathhashed_p;
1007-
Pathsorted_p;
1008-
1009-
cost_agg(&hashed_p,parse,
1010-
AGG_HASHED,agg_counts.numAggs,
1011-
numGroupCols,dNumGroups,
1012-
cheapest_path->startup_cost,
1013-
cheapest_path->total_cost,
1014-
cheapest_path_rows);
1015-
/* Result of hashed agg is always unsorted */
1016-
if (sort_pathkeys)
1017-
cost_sort(&hashed_p,parse,sort_pathkeys,
1018-
hashed_p.total_cost,
1019-
dNumGroups,
1020-
cheapest_path_width);
1021-
1022-
if (sorted_path)
1023-
{
1024-
sorted_p.startup_cost=sorted_path->startup_cost;
1025-
sorted_p.total_cost=sorted_path->total_cost;
1026-
current_pathkeys=sorted_path->pathkeys;
1027-
}
1028-
else
1029-
{
1030-
sorted_p.startup_cost=cheapest_path->startup_cost;
1031-
sorted_p.total_cost=cheapest_path->total_cost;
1032-
current_pathkeys=cheapest_path->pathkeys;
1033-
}
1034-
if (!pathkeys_contained_in(group_pathkeys,
1035-
current_pathkeys))
1036-
{
1037-
cost_sort(&sorted_p,parse,group_pathkeys,
1038-
sorted_p.total_cost,
1039-
cheapest_path_rows,
1040-
cheapest_path_width);
1041-
current_pathkeys=group_pathkeys;
1042-
}
1043-
if (parse->hasAggs)
1044-
cost_agg(&sorted_p,parse,
1045-
AGG_SORTED,agg_counts.numAggs,
1046-
numGroupCols,dNumGroups,
1047-
sorted_p.startup_cost,
1048-
sorted_p.total_cost,
1049-
cheapest_path_rows);
1050-
else
1051-
cost_group(&sorted_p,parse,
1052-
numGroupCols,dNumGroups,
1053-
sorted_p.startup_cost,
1054-
sorted_p.total_cost,
1055-
cheapest_path_rows);
1056-
/* The Agg or Group node will preserve ordering */
1057-
if (sort_pathkeys&&
1058-
!pathkeys_contained_in(sort_pathkeys,
1059-
current_pathkeys))
1060-
{
1061-
cost_sort(&sorted_p,parse,sort_pathkeys,
1062-
sorted_p.total_cost,
1063-
dNumGroups,
1064-
cheapest_path_width);
1065-
}
1066-
1067-
/*
1068-
* Now make the decision using the top-level tuple
1069-
* fraction. First we have to convert an absolute
1070-
* count (LIMIT) into fractional form.
1071-
*/
1072-
if (tuple_fraction >=1.0)
1073-
tuple_fraction /=dNumGroups;
1074-
1075-
if (compare_fractional_path_costs(&hashed_p,&sorted_p,
1076-
tuple_fraction)<0)
1077-
{
1078-
/* Hashed is cheaper, so use it */
1079-
use_hashed_grouping= true;
1080-
}
1081-
}
1082-
}
954+
use_hashed_grouping=
955+
choose_hashed_grouping(parse,tuple_fraction,
956+
cheapest_path,sorted_path,
957+
sort_pathkeys,group_pathkeys,
958+
dNumGroups,&agg_counts);
1083959
}
1084960

1085961
/*
@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)
13311207
returnresult_plan;
13321208
}
13331209

1210+
/*
1211+
* choose_hashed_grouping - should we use hashed grouping?
1212+
*/
1213+
staticbool
1214+
choose_hashed_grouping(Query*parse,doubletuple_fraction,
1215+
Path*cheapest_path,Path*sorted_path,
1216+
List*sort_pathkeys,List*group_pathkeys,
1217+
doubledNumGroups,AggClauseCounts*agg_counts)
1218+
{
1219+
intnumGroupCols=list_length(parse->groupClause);
1220+
doublecheapest_path_rows;
1221+
intcheapest_path_width;
1222+
Sizehashentrysize;
1223+
List*current_pathkeys;
1224+
Pathhashed_p;
1225+
Pathsorted_p;
1226+
1227+
/*
1228+
* Check can't-do-it conditions, including whether the grouping operators
1229+
* are hashjoinable.
1230+
*
1231+
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
1232+
* (Doing so would imply storing *all* the input values in the hash table,
1233+
* which seems like a certain loser.)
1234+
*/
1235+
if (!enable_hashagg)
1236+
return false;
1237+
if (agg_counts->numDistinctAggs!=0)
1238+
return false;
1239+
if (!hash_safe_grouping(parse))
1240+
return false;
1241+
1242+
/*
1243+
* Don't do it if it doesn't look like the hashtable will fit into
1244+
* work_mem.
1245+
*
1246+
* Beware here of the possibility that cheapest_path->parent is NULL.
1247+
* This could happen if user does something silly like
1248+
*SELECT 'foo' GROUP BY 1;
1249+
*/
1250+
if (cheapest_path->parent)
1251+
{
1252+
cheapest_path_rows=cheapest_path->parent->rows;
1253+
cheapest_path_width=cheapest_path->parent->width;
1254+
}
1255+
else
1256+
{
1257+
cheapest_path_rows=1;/* assume non-set result */
1258+
cheapest_path_width=100;/* arbitrary */
1259+
}
1260+
1261+
/* Estimate per-hash-entry space at tuple width... */
1262+
hashentrysize=cheapest_path_width;
1263+
/* plus space for pass-by-ref transition values... */
1264+
hashentrysize+=agg_counts->transitionSpace;
1265+
/* plus the per-hash-entry overhead */
1266+
hashentrysize+=hash_agg_entry_size(agg_counts->numAggs);
1267+
1268+
if (hashentrysize*dNumGroups>work_mem*1024L)
1269+
return false;
1270+
1271+
/*
1272+
* See if the estimated cost is no more than doing it the other way.
1273+
* While avoiding the need for sorted input is usually a win, the fact
1274+
* that the output won't be sorted may be a loss; so we need to do an
1275+
* actual cost comparison.
1276+
*
1277+
* We need to consider
1278+
*cheapest_path + hashagg [+ final sort]
1279+
* versus either
1280+
*cheapest_path [+ sort] + group or agg [+ final sort]
1281+
* or
1282+
*presorted_path + group or agg [+ final sort]
1283+
* where brackets indicate a step that may not be needed. We assume
1284+
* query_planner() will have returned a presorted path only if it's a
1285+
* winner compared to cheapest_path for this purpose.
1286+
*
1287+
* These path variables are dummies that just hold cost fields; we don't
1288+
* make actual Paths for these steps.
1289+
*/
1290+
cost_agg(&hashed_p,parse,AGG_HASHED,agg_counts->numAggs,
1291+
numGroupCols,dNumGroups,
1292+
cheapest_path->startup_cost,cheapest_path->total_cost,
1293+
cheapest_path_rows);
1294+
/* Result of hashed agg is always unsorted */
1295+
if (sort_pathkeys)
1296+
cost_sort(&hashed_p,parse,sort_pathkeys,hashed_p.total_cost,
1297+
dNumGroups,cheapest_path_width);
1298+
1299+
if (sorted_path)
1300+
{
1301+
sorted_p.startup_cost=sorted_path->startup_cost;
1302+
sorted_p.total_cost=sorted_path->total_cost;
1303+
current_pathkeys=sorted_path->pathkeys;
1304+
}
1305+
else
1306+
{
1307+
sorted_p.startup_cost=cheapest_path->startup_cost;
1308+
sorted_p.total_cost=cheapest_path->total_cost;
1309+
current_pathkeys=cheapest_path->pathkeys;
1310+
}
1311+
if (!pathkeys_contained_in(group_pathkeys,
1312+
current_pathkeys))
1313+
{
1314+
cost_sort(&sorted_p,parse,group_pathkeys,sorted_p.total_cost,
1315+
cheapest_path_rows,cheapest_path_width);
1316+
current_pathkeys=group_pathkeys;
1317+
}
1318+
1319+
if (parse->hasAggs)
1320+
cost_agg(&sorted_p,parse,AGG_SORTED,agg_counts->numAggs,
1321+
numGroupCols,dNumGroups,
1322+
sorted_p.startup_cost,sorted_p.total_cost,
1323+
cheapest_path_rows);
1324+
else
1325+
cost_group(&sorted_p,parse,numGroupCols,dNumGroups,
1326+
sorted_p.startup_cost,sorted_p.total_cost,
1327+
cheapest_path_rows);
1328+
/* The Agg or Group node will preserve ordering */
1329+
if (sort_pathkeys&&
1330+
!pathkeys_contained_in(sort_pathkeys,current_pathkeys))
1331+
cost_sort(&sorted_p,parse,sort_pathkeys,sorted_p.total_cost,
1332+
dNumGroups,cheapest_path_width);
1333+
1334+
/*
1335+
* Now make the decision using the top-level tuple fraction. First we
1336+
* have to convert an absolute count (LIMIT) into fractional form.
1337+
*/
1338+
if (tuple_fraction >=1.0)
1339+
tuple_fraction /=dNumGroups;
1340+
1341+
if (compare_fractional_path_costs(&hashed_p,&sorted_p,
1342+
tuple_fraction)<0)
1343+
{
1344+
/* Hashed is cheaper, so use it */
1345+
return true;
1346+
}
1347+
return false;
1348+
}
1349+
13341350
/*
13351351
* hash_safe_grouping - are grouping operators hashable?
13361352
*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp