NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit6985592

committed

Split out into a separate function the code in grouping_planner() that

decides whether to use hashed grouping instead of sort-plus-uniqgrouping. The function needs an annoyingly large number of parameters,but this still seems like a win for legibility, since it removes overa hundred lines from grouping_planner (which is still too big :-().

1 parent313de22 commit6985592Copy full SHA for 6985592

File tree

1 file changed

+156

-140

lines changed

src/backend/optimizer/plan
- planner.c

1 file changed

+156

-140

lines changed

`‎src/backend/optimizer/plan/planner.c`

Lines changed: 156 additions & 140 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -58,6 +58,10 @@ static Node preprocess_expression(Query parse, Node *expr, int kind);`
`58`	`58`	`staticvoidpreprocess_qual_conditions(Queryparse,Nodejtnode);`
`59`	`59`	`staticPlaninheritance_planner(Queryparse,List*inheritlist);`
`60`	`60`	`staticPlangrouping_planner(Queryparse,doubletuple_fraction);`
	`61`	`+staticboolchoose_hashed_grouping(Query*parse,doubletuple_fraction,`
	`62`	`+Pathcheapest_path,Pathsorted_path,`
	`63`	`+Listsort_pathkeys,Listgroup_pathkeys,`
	`64`	`+doubledNumGroups,AggClauseCounts*agg_counts);`
`61`	`65`	`staticboolhash_safe_grouping(Query*parse);`
`62`	`66`	`staticListmake_subplanTargetList(Queryparse,List*tlist,`
`63`	`67`	`AttrNumber*groupColIdx,boolneed_tlist_eval);`
`@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)`
`920`	`924`	`sort_pathkeys=canonicalize_pathkeys(parse,sort_pathkeys);`
`921`	`925`
`922`	`926`	`/*`
`923`		`- * Consider whether we might want to use hashed grouping.`
	`927`	`+ * If grouping, estimate the number of groups. (We can't do this`
	`928`	`+ * until after running query_planner(), either.) Then decide`
	`929`	`+ * whether we want to use hashed grouping.`
`924`	`930`	`*/`
`925`	`931`	`if (parse->groupClause)`
`926`	`932`	`{`
`927`	`933`	`List*groupExprs;`
`928`	`934`	`doublecheapest_path_rows;`
`929`		`-intcheapest_path_width;`
`930`	`935`
`931`	`936`	`/*`
`932`		`- * Bewarein this sectionof the possibility that`
`933`		`- *cheapest_path->parent is NULL.This could happen if user`
`934`		`- * does something silly likeSELECT 'foo' GROUP BY 1;`
	`937`	`+ * Beware of the possibility that cheapest_path->parent is NULL.`
	`938`	`+ * This could happen if user does something silly like`
	`939`	`+ *SELECT 'foo' GROUP BY 1;`
`935`	`940`	`*/`
`936`	`941`	`if (cheapest_path->parent)`
`937`		`-{`
`938`	`942`	`cheapest_path_rows=cheapest_path->parent->rows;`
`939`		`-cheapest_path_width=cheapest_path->parent->width;`
`940`		`-}`
`941`	`943`	`else`
`942`		`-{`
`943`	`944`	`cheapest_path_rows=1;/* assume non-set result */`
`944`		`-cheapest_path_width=100;/* arbitrary */`
`945`		`-}`
`946`	`945`
`947`		`-/*`
`948`		`- * Always estimate the number of groups. We can't do this`
`949`		`- * until after running query_planner(), either.`
`950`		`- */`
`951`	`946`	`groupExprs=get_sortgrouplist_exprs(parse->groupClause,`
`952`	`947`	`parse->targetList);`
`953`	`948`	`dNumGroups=estimate_num_groups(parse,`
`@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)`
`956`	`951`	`/* Also want it as a long int --- but 'ware overflow! */`
`957`	`952`	`numGroups= (long)Min(dNumGroups, (double)LONG_MAX);`
`958`	`953`
`959`		`-/*`
`960`		`- * Check can't-do-it conditions, including whether the`
`961`		`- * grouping operators are hashjoinable.`
`962`		`- *`
`963`		`- * Executor doesn't support hashed aggregation with DISTINCT`
`964`		`- * aggregates.(Doing so would imply storing all the input`
`965`		`- * values in the hash table, which seems like a certain`
`966`		`- * loser.)`
`967`		`- */`
`968`		`-if (!enable_hashagg\|\| !hash_safe_grouping(parse))`
`969`		`-use_hashed_grouping= false;`
`970`		`-elseif (agg_counts.numDistinctAggs!=0)`
`971`		`-use_hashed_grouping= false;`
`972`		`-else`
`973`		`-{`
`974`		`-/*`
`975`		`- * Use hashed grouping if (a) we think we can fit the`
`976`		`- * hashtable into work_mem, and (b) the estimated cost`
`977`		`- * is no more than doing it the other way.While avoiding`
`978`		`- * the need for sorted input is usually a win, the fact`
`979`		`- * that the output won't be sorted may be a loss; so we`
`980`		`- * need to do an actual cost comparison.`
`981`		`- */`
`982`		`-Sizehashentrysize;`
`983`		`-`
`984`		`-/* Estimate per-hash-entry space at tuple width... */`
`985`		`-hashentrysize=cheapest_path_width;`
`986`		`-/* plus space for pass-by-ref transition values... */`
`987`		`-hashentrysize+=agg_counts.transitionSpace;`
`988`		`-/* plus the per-hash-entry overhead */`
`989`		`-hashentrysize+=hash_agg_entry_size(agg_counts.numAggs);`
`990`		`-`
`991`		`-if (hashentrysizedNumGroups <=work_mem1024L)`
`992`		`-{`
`993`		`-/*`
`994`		`- * Okay, do the cost comparison. We need to consider`
`995`		`- * cheapest_path + hashagg [+ final sort] versus`
`996`		`- * either cheapest_path [+ sort] + group or agg [+`
`997`		`- * final sort] or presorted_path + group or agg [+`
`998`		`- * final sort] where brackets indicate a step that may`
`999`		`- * not be needed. We assume query_planner() will have`
`1000`		`- * returned a presorted path only if it's a winner`
`1001`		`- * compared to cheapest_path for this purpose.`
`1002`		`- *`
`1003`		`- * These path variables are dummies that just hold cost`
`1004`		`- * fields; we don't make actual Paths for these steps.`
`1005`		`- */`
`1006`		`-Pathhashed_p;`
`1007`		`-Pathsorted_p;`
`1008`		`-`
`1009`		`-cost_agg(&hashed_p,parse,`
`1010`		`-AGG_HASHED,agg_counts.numAggs,`
`1011`		`-numGroupCols,dNumGroups,`
`1012`		`-cheapest_path->startup_cost,`
`1013`		`-cheapest_path->total_cost,`
`1014`		`-cheapest_path_rows);`
`1015`		`-/* Result of hashed agg is always unsorted */`
`1016`		`-if (sort_pathkeys)`
`1017`		`-cost_sort(&hashed_p,parse,sort_pathkeys,`
`1018`		`-hashed_p.total_cost,`
`1019`		`-dNumGroups,`
`1020`		`-cheapest_path_width);`
`1021`		`-`
`1022`		`-if (sorted_path)`
`1023`		`-{`
`1024`		`-sorted_p.startup_cost=sorted_path->startup_cost;`
`1025`		`-sorted_p.total_cost=sorted_path->total_cost;`
`1026`		`-current_pathkeys=sorted_path->pathkeys;`
`1027`		`-}`
`1028`		`-else`
`1029`		`-{`
`1030`		`-sorted_p.startup_cost=cheapest_path->startup_cost;`
`1031`		`-sorted_p.total_cost=cheapest_path->total_cost;`
`1032`		`-current_pathkeys=cheapest_path->pathkeys;`
`1033`		`-}`
`1034`		`-if (!pathkeys_contained_in(group_pathkeys,`
`1035`		`-current_pathkeys))`
`1036`		`-{`
`1037`		`-cost_sort(&sorted_p,parse,group_pathkeys,`
`1038`		`-sorted_p.total_cost,`
`1039`		`-cheapest_path_rows,`
`1040`		`-cheapest_path_width);`
`1041`		`-current_pathkeys=group_pathkeys;`
`1042`		`-}`
`1043`		`-if (parse->hasAggs)`
`1044`		`-cost_agg(&sorted_p,parse,`
`1045`		`-AGG_SORTED,agg_counts.numAggs,`
`1046`		`-numGroupCols,dNumGroups,`
`1047`		`-sorted_p.startup_cost,`
`1048`		`-sorted_p.total_cost,`
`1049`		`-cheapest_path_rows);`
`1050`		`-else`
`1051`		`-cost_group(&sorted_p,parse,`
`1052`		`-numGroupCols,dNumGroups,`
`1053`		`-sorted_p.startup_cost,`
`1054`		`-sorted_p.total_cost,`
`1055`		`-cheapest_path_rows);`
`1056`		`-/* The Agg or Group node will preserve ordering */`
`1057`		`-if (sort_pathkeys&&`
`1058`		`-!pathkeys_contained_in(sort_pathkeys,`
`1059`		`-current_pathkeys))`
`1060`		`-{`
`1061`		`-cost_sort(&sorted_p,parse,sort_pathkeys,`
`1062`		`-sorted_p.total_cost,`
`1063`		`-dNumGroups,`
`1064`		`-cheapest_path_width);`
`1065`		`-}`
`1066`		`-`
`1067`		`-/*`
`1068`		`- * Now make the decision using the top-level tuple`
`1069`		`- * fraction. First we have to convert an absolute`
`1070`		`- * count (LIMIT) into fractional form.`
`1071`		`- */`
`1072`		`-if (tuple_fraction >=1.0)`
`1073`		`-tuple_fraction /=dNumGroups;`
`1074`		`-`
`1075`		`-if (compare_fractional_path_costs(&hashed_p,&sorted_p,`
`1076`		`-tuple_fraction)<0)`
`1077`		`-{`
`1078`		`-/* Hashed is cheaper, so use it */`
`1079`		`-use_hashed_grouping= true;`
`1080`		`-}`
`1081`		`-}`
`1082`		`-}`
	`954`	`+use_hashed_grouping=`
	`955`	`+choose_hashed_grouping(parse,tuple_fraction,`
	`956`	`+cheapest_path,sorted_path,`
	`957`	`+sort_pathkeys,group_pathkeys,`
	`958`	`+dNumGroups,&agg_counts);`
`1083`	`959`	`}`
`1084`	`960`
`1085`	`961`	`/*`
`@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)`
`1331`	`1207`	`returnresult_plan;`
`1332`	`1208`	`}`
`1333`	`1209`
	`1210`	`+/*`
	`1211`	`+ * choose_hashed_grouping - should we use hashed grouping?`
	`1212`	`+ */`
	`1213`	`+staticbool`
	`1214`	`+choose_hashed_grouping(Query*parse,doubletuple_fraction,`
	`1215`	`+Pathcheapest_path,Pathsorted_path,`
	`1216`	`+Listsort_pathkeys,Listgroup_pathkeys,`
	`1217`	`+doubledNumGroups,AggClauseCounts*agg_counts)`
	`1218`	`+{`
	`1219`	`+intnumGroupCols=list_length(parse->groupClause);`
	`1220`	`+doublecheapest_path_rows;`
	`1221`	`+intcheapest_path_width;`
	`1222`	`+Sizehashentrysize;`
	`1223`	`+List*current_pathkeys;`
	`1224`	`+Pathhashed_p;`
	`1225`	`+Pathsorted_p;`
	`1226`	`+`
	`1227`	`+/*`
	`1228`	`+ * Check can't-do-it conditions, including whether the grouping operators`
	`1229`	`+ * are hashjoinable.`
	`1230`	`+ *`
	`1231`	`+ * Executor doesn't support hashed aggregation with DISTINCT aggregates.`
	`1232`	`+ * (Doing so would imply storing all the input values in the hash table,`
	`1233`	`+ * which seems like a certain loser.)`
	`1234`	`+ */`
	`1235`	`+if (!enable_hashagg)`
	`1236`	`+return false;`
	`1237`	`+if (agg_counts->numDistinctAggs!=0)`
	`1238`	`+return false;`
	`1239`	`+if (!hash_safe_grouping(parse))`
	`1240`	`+return false;`
	`1241`	`+`
	`1242`	`+/*`
	`1243`	`+ * Don't do it if it doesn't look like the hashtable will fit into`
	`1244`	`+ * work_mem.`
	`1245`	`+ *`
	`1246`	`+ * Beware here of the possibility that cheapest_path->parent is NULL.`
	`1247`	`+ * This could happen if user does something silly like`
	`1248`	`+ *SELECT 'foo' GROUP BY 1;`
	`1249`	`+ */`
	`1250`	`+if (cheapest_path->parent)`
	`1251`	`+{`
	`1252`	`+cheapest_path_rows=cheapest_path->parent->rows;`
	`1253`	`+cheapest_path_width=cheapest_path->parent->width;`
	`1254`	`+}`
	`1255`	`+else`
	`1256`	`+{`
	`1257`	`+cheapest_path_rows=1;/* assume non-set result */`
	`1258`	`+cheapest_path_width=100;/* arbitrary */`
	`1259`	`+}`
	`1260`	`+`
	`1261`	`+/* Estimate per-hash-entry space at tuple width... */`
	`1262`	`+hashentrysize=cheapest_path_width;`
	`1263`	`+/* plus space for pass-by-ref transition values... */`
	`1264`	`+hashentrysize+=agg_counts->transitionSpace;`
	`1265`	`+/* plus the per-hash-entry overhead */`
	`1266`	`+hashentrysize+=hash_agg_entry_size(agg_counts->numAggs);`
	`1267`	`+`
	`1268`	`+if (hashentrysizedNumGroups>work_mem1024L)`
	`1269`	`+return false;`
	`1270`	`+`
	`1271`	`+/*`
	`1272`	`+ * See if the estimated cost is no more than doing it the other way.`
	`1273`	`+ * While avoiding the need for sorted input is usually a win, the fact`
	`1274`	`+ * that the output won't be sorted may be a loss; so we need to do an`
	`1275`	`+ * actual cost comparison.`
	`1276`	`+ *`
	`1277`	`+ * We need to consider`
	`1278`	`+ *cheapest_path + hashagg [+ final sort]`
	`1279`	`+ * versus either`
	`1280`	`+ *cheapest_path [+ sort] + group or agg [+ final sort]`
	`1281`	`+ * or`
	`1282`	`+ *presorted_path + group or agg [+ final sort]`
	`1283`	`+ * where brackets indicate a step that may not be needed. We assume`
	`1284`	`+ * query_planner() will have returned a presorted path only if it's a`
	`1285`	`+ * winner compared to cheapest_path for this purpose.`
	`1286`	`+ *`
	`1287`	`+ * These path variables are dummies that just hold cost fields; we don't`
	`1288`	`+ * make actual Paths for these steps.`
	`1289`	`+ */`
	`1290`	`+cost_agg(&hashed_p,parse,AGG_HASHED,agg_counts->numAggs,`
	`1291`	`+numGroupCols,dNumGroups,`
	`1292`	`+cheapest_path->startup_cost,cheapest_path->total_cost,`
	`1293`	`+cheapest_path_rows);`
	`1294`	`+/* Result of hashed agg is always unsorted */`
	`1295`	`+if (sort_pathkeys)`
	`1296`	`+cost_sort(&hashed_p,parse,sort_pathkeys,hashed_p.total_cost,`
	`1297`	`+dNumGroups,cheapest_path_width);`
	`1298`	`+`
	`1299`	`+if (sorted_path)`
	`1300`	`+{`
	`1301`	`+sorted_p.startup_cost=sorted_path->startup_cost;`
	`1302`	`+sorted_p.total_cost=sorted_path->total_cost;`
	`1303`	`+current_pathkeys=sorted_path->pathkeys;`
	`1304`	`+}`
	`1305`	`+else`
	`1306`	`+{`
	`1307`	`+sorted_p.startup_cost=cheapest_path->startup_cost;`
	`1308`	`+sorted_p.total_cost=cheapest_path->total_cost;`
	`1309`	`+current_pathkeys=cheapest_path->pathkeys;`
	`1310`	`+}`
	`1311`	`+if (!pathkeys_contained_in(group_pathkeys,`
	`1312`	`+current_pathkeys))`
	`1313`	`+{`
	`1314`	`+cost_sort(&sorted_p,parse,group_pathkeys,sorted_p.total_cost,`
	`1315`	`+cheapest_path_rows,cheapest_path_width);`
	`1316`	`+current_pathkeys=group_pathkeys;`
	`1317`	`+}`
	`1318`	`+`
	`1319`	`+if (parse->hasAggs)`
	`1320`	`+cost_agg(&sorted_p,parse,AGG_SORTED,agg_counts->numAggs,`
	`1321`	`+numGroupCols,dNumGroups,`
	`1322`	`+sorted_p.startup_cost,sorted_p.total_cost,`
	`1323`	`+cheapest_path_rows);`
	`1324`	`+else`
	`1325`	`+cost_group(&sorted_p,parse,numGroupCols,dNumGroups,`
	`1326`	`+sorted_p.startup_cost,sorted_p.total_cost,`
	`1327`	`+cheapest_path_rows);`
	`1328`	`+/* The Agg or Group node will preserve ordering */`
	`1329`	`+if (sort_pathkeys&&`
	`1330`	`+!pathkeys_contained_in(sort_pathkeys,current_pathkeys))`
	`1331`	`+cost_sort(&sorted_p,parse,sort_pathkeys,sorted_p.total_cost,`
	`1332`	`+dNumGroups,cheapest_path_width);`
	`1333`	`+`
	`1334`	`+/*`
	`1335`	`+ * Now make the decision using the top-level tuple fraction. First we`
	`1336`	`+ * have to convert an absolute count (LIMIT) into fractional form.`
	`1337`	`+ */`
	`1338`	`+if (tuple_fraction >=1.0)`
	`1339`	`+tuple_fraction /=dNumGroups;`
	`1340`	`+`
	`1341`	`+if (compare_fractional_path_costs(&hashed_p,&sorted_p,`
	`1342`	`+tuple_fraction)<0)`
	`1343`	`+{`
	`1344`	`+/* Hashed is cheaper, so use it */`
	`1345`	`+return true;`
	`1346`	`+}`
	`1347`	`+return false;`
	`1348`	`+}`
	`1349`	`+`
`1334`	`1350`	`/*`
`1335`	`1351`	`* hash_safe_grouping - are grouping operators hashable?`
`1336`	`1352`	`*`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit6985592

File tree

1 file changed

1 file changed

`‎src/backend/optimizer/plan/planner.c`

0 commit comments