NotificationsYou must be signed in to change notification settings
Fork5
Star26

Commit4e5fbb3

committed

Change the division of labor between grouping_planner and query_planner

so that the latter estimates the number of groups that grouping willproduce. This is needed because it is primarily query_planner thatmakes the decision between fast-start and fast-finish plans, and in theoriginal coding it was unable to make more than a crude rule-of-thumbchoice when the query involved grouping. This revision helps us makesaner choices for queries like SELECT ... GROUP BY ... LIMIT, as in arecent example from Mark Kirkwood. Also move the responsibility forcanonicalizing sort_pathkeys and group_pathkeys into query_planner;this information has to be available anyway to support the first change,and doing it this way lets us get rid of compare_noncanonical_pathkeysentirely.

1 parent9e56c5a commit4e5fbb3Copy full SHA for 4e5fbb3

File tree

7 files changed

+143

-205

lines changed

src
- backend
  - nodes
    - outfuncs.c
  - optimizer
    - path
      - pathkeys.c
    - plan
      - planmain.c
      - planner.c
- include
  - nodes
    - relation.h
  - optimizer
    - paths.h
    - planmain.h

7 files changed

+143

-205

lines changed

`‎src/backend/nodes/outfuncs.c`

Lines changed: 4 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.259 2005/08/01 20:31:08 tgl Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.260 2005/08/27 22:13:43 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`* NOTES`
`14`	`14`	`* Every node type that can appear in stored rules' parsetrees must`
`@@ -1169,6 +1169,9 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)`
`1169`	`1169`	`WRITE_NODE_FIELD(full_join_clauses);`
`1170`	`1170`	`WRITE_NODE_FIELD(in_info_list);`
`1171`	`1171`	`WRITE_NODE_FIELD(query_pathkeys);`
	`1172`	`+WRITE_NODE_FIELD(group_pathkeys);`
	`1173`	`+WRITE_NODE_FIELD(sort_pathkeys);`
	`1174`	`+WRITE_FLOAT_FIELD(tuple_fraction,"%.4f");`
`1172`	`1175`	`WRITE_BOOL_FIELD(hasJoinRTEs);`
`1173`	`1176`	`WRITE_BOOL_FIELD(hasOuterJoins);`
`1174`	`1177`	`WRITE_BOOL_FIELD(hasHavingQual);`

`‎src/backend/optimizer/path/pathkeys.c`

Lines changed: 1 addition & 67 deletions

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`	`* Portions Copyright (c) 1994, Regents of the University of California`
`12`	`12`	`*`
`13`	`13`	`* IDENTIFICATION`
`14`		`- * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.71 2005/07/28 22:27:00 tgl Exp $`
	`14`	`+ * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.72 2005/08/27 22:13:43 tgl Exp $`
`15`	`15`	`*`
`16`	`16`	`*-------------------------------------------------------------------------`
`17`	`17`	`*/`
`@@ -800,54 +800,6 @@ compare_pathkeys(List keys1, List keys2)`
`800`	`800`	`returnPATHKEYS_BETTER2;/* key2 is longer */`
`801`	`801`	`}`
`802`	`802`
`803`		`-/*`
`804`		`- * compare_noncanonical_pathkeys`
`805`		`- * Compare two pathkeys to see if they are equivalent, and if not whether`
`806`		`- * one is "better" than the other. This is used when we must compare`
`807`		`- * non-canonicalized pathkeys.`
`808`		`- *`
`809`		`- * A pathkey can be considered better than another if it is a superset:`
`810`		`- * it contains all the keys of the other plus more.For example, either`
`811`		`- * ((A) (B)) or ((A B)) is better than ((A)).`
`812`		`- *`
`813`		`- * Currently, the only user of this routine is grouping_planner(),`
`814`		`- * and it will only pass single-element sublists (from`
`815`		`- * make_pathkeys_for_sortclauses). Therefore we don't have to do the`
`816`		`- * full two-way-subset-inclusion test on each pair of sublists that is`
`817`		`- * implied by the above statement. Instead we just verify they are`
`818`		`- * singleton lists and then do an equal(). This could be improved if`
`819`		`- * necessary.`
`820`		`- */`
`821`		`-PathKeysComparison`
`822`		`-compare_noncanonical_pathkeys(Listkeys1,Listkeys2)`
`823`		`-{`
`824`		`-ListCell*key1,`
`825`		`-*key2;`
`826`		`-`
`827`		`-forboth(key1,keys1,key2,keys2)`
`828`		`-{`
`829`		`-Listsubkey1= (List)lfirst(key1);`
`830`		`-Listsubkey2= (List)lfirst(key2);`
`831`		`-`
`832`		`-Assert(list_length(subkey1)==1);`
`833`		`-Assert(list_length(subkey2)==1);`
`834`		`-if (!equal(subkey1,subkey2))`
`835`		`-returnPATHKEYS_DIFFERENT;/* no need to keep looking */`
`836`		`-}`
`837`		`-`
`838`		`-/*`
`839`		`- * If we reached the end of only one list, the other is longer and`
`840`		`- * therefore not a subset.(We assume the additional sublist(s) of`
`841`		`- * the other list are not NIL --- no pathkey list should ever have a`
`842`		`- * NIL sublist.)`
`843`		`- */`
`844`		`-if (key1==NULL&&key2==NULL)`
`845`		`-returnPATHKEYS_EQUAL;`
`846`		`-if (key1!=NULL)`
`847`		`-returnPATHKEYS_BETTER1;/* key1 is longer */`
`848`		`-returnPATHKEYS_BETTER2;/* key2 is longer */`
`849`		`-}`
`850`		`-`
`851`	`803`	`/*`
`852`	`804`	`* pathkeys_contained_in`
`853`	`805`	`* Common special case of compare_pathkeys: we just want to know`
`@@ -867,24 +819,6 @@ pathkeys_contained_in(List keys1, List keys2)`
`867`	`819`	`return false;`
`868`	`820`	`}`
`869`	`821`
`870`		`-/*`
`871`		`- * noncanonical_pathkeys_contained_in`
`872`		`- * The same, when we don't have canonical pathkeys.`
`873`		`- */`
`874`		`-bool`
`875`		`-noncanonical_pathkeys_contained_in(Listkeys1,Listkeys2)`
`876`		`-{`
`877`		`-switch (compare_noncanonical_pathkeys(keys1,keys2))`
`878`		`-{`
`879`		`-casePATHKEYS_EQUAL:`
`880`		`-casePATHKEYS_BETTER2:`
`881`		`-return true;`
`882`		`-default:`
`883`		`-break;`
`884`		`-}`
`885`		`-return false;`
`886`		`-}`
`887`		`-`
`888`	`822`	`/*`
`889`	`823`	`* get_cheapest_path_for_pathkeys`
`890`	`824`	`* Find the cheapest path (according to the specified criterion) that`

`‎src/backend/optimizer/plan/planmain.c`

Lines changed: 98 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`*`
`15`	`15`	`*`
`16`	`16`	`* IDENTIFICATION`
`17`		`- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.86 2005/07/02 23:00:41 tgl Exp $`
	`17`	`+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.87 2005/08/27 22:13:43 tgl Exp $`
`18`	`18`	`*`
`19`	`19`	`*-------------------------------------------------------------------------`
`20`	`20`	`*/`
`@@ -25,9 +25,11 @@`
`25`	`25`	`#include"optimizer/pathnode.h"`
`26`	`26`	`#include"optimizer/paths.h"`
`27`	`27`	`#include"optimizer/planmain.h"`
	`28`	`+#include"optimizer/tlist.h"`
	`29`	`+#include"utils/selfuncs.h"`
`28`	`30`
`29`	`31`
`30`		`-/*--------------------`
	`32`	`+/*`
`31`	`33`	`* query_planner`
`32`	`34`	`* Generate a path (that is, a simplified plan) for a basic query,`
`33`	`35`	`* which may involve joins but not any fancier features.`
`@@ -51,6 +53,8 @@`
`51`	`53`	`* *cheapest_path receives the overall-cheapest path for the query`
`52`	`54`	`* *sorted_path receives the cheapest presorted path for the query,`
`53`	`55`	`*if any (NULL if there is no useful presorted path)`
	`56`	`+ * *num_groups receives the estimated number of groups, or 1 if query`
	`57`	`+ *does not use grouping`
`54`	`58`	`*`
`55`	`59`	`* Note: the PlannerInfo node also includes a query_pathkeys field, which is`
`56`	`60`	`* both an input and an output of query_planner(). The input value signals`
`@@ -61,17 +65,21 @@`
`61`	`65`	`* PlannerInfo field and not a passed parameter is that the low-level routines`
`62`	`66`	`* in indxpath.c need to see it.)`
`63`	`67`	`*`
	`68`	`+ * Note: the PlannerInfo node also includes group_pathkeys and sort_pathkeys,`
	`69`	`+ * which like query_pathkeys need to be canonicalized once the info is`
	`70`	`+ * available.`
	`71`	`+ *`
`64`	`72`	`* tuple_fraction is interpreted as follows:`
`65`	`73`	`* 0: expect all tuples to be retrieved (normal case)`
`66`	`74`	`* 0 < tuple_fraction < 1: expect the given fraction of tuples available`
`67`	`75`	`*from the plan to be retrieved`
`68`	`76`	`* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples`
`69`	`77`	`*expected to be retrieved (ie, a LIMIT specification)`
`70`		`- *--------------------`
`71`	`78`	`*/`
`72`	`79`	`void`
`73`	`80`	`query_planner(PlannerInforoot,Listtlist,doubletuple_fraction,`
`74`		`-Pathcheapest_path,Pathsorted_path)`
	`81`	`+Pathcheapest_path,Pathsorted_path,`
	`82`	`+double*num_groups)`
`75`	`83`	`{`
`76`	`84`	`Query*parse=root->parse;`
`77`	`85`	`List*constant_quals;`
`@@ -82,6 +90,8 @@ query_planner(PlannerInfo root, List tlist, double tuple_fraction,`
`82`	`90`	`/* Make tuple_fraction accessible to lower-level routines */`
`83`	`91`	`root->tuple_fraction=tuple_fraction;`
`84`	`92`
	`93`	`+num_groups=1;/ default result */`
	`94`	`+`
`85`	`95`	`/*`
`86`	`96`	`* If the query has an empty join tree, then it's something easy like`
`87`	`97`	`* "SELECT 2+2;" or "INSERT ... VALUES()".Fall through quickly.`
`@@ -156,9 +166,12 @@ query_planner(PlannerInfo root, List tlist, double tuple_fraction,`
`156`	`166`	`/*`
`157`	`167`	`* We should now have all the pathkey equivalence sets built, so it's`
`158`	`168`	`* now possible to convert the requested query_pathkeys to canonical`
`159`		`- * form.`
	`169`	`+ * form. Also canonicalize the groupClause and sortClause pathkeys`
	`170`	`+ * for use later.`
`160`	`171`	`*/`
`161`	`172`	`root->query_pathkeys=canonicalize_pathkeys(root,root->query_pathkeys);`
	`173`	`+root->group_pathkeys=canonicalize_pathkeys(root,root->group_pathkeys);`
	`174`	`+root->sort_pathkeys=canonicalize_pathkeys(root,root->sort_pathkeys);`
`162`	`175`
`163`	`176`	`/*`
`164`	`177`	`* Ready to do the primary planning.`
`@@ -169,12 +182,87 @@ query_planner(PlannerInfo root, List tlist, double tuple_fraction,`
`169`	`182`	`elog(ERROR,"failed to construct the join relation");`
`170`	`183`
`171`	`184`	`/*`
`172`		`- * Now that we have an estimate of the final rel's size, we can`
`173`		`- * convert a tuple_fraction specified as an absolute count (ie, a`
`174`		`- * LIMIT option) into a fraction of the total tuples.`
	`185`	`+ * If there's grouping going on, estimate the number of result groups.`
	`186`	`+ * We couldn't do this any earlier because it depends on relation size`
	`187`	`+ * estimates that were set up above.`
	`188`	`+ *`
	`189`	`+ * Then convert tuple_fraction to fractional form if it is absolute,`
	`190`	`+ * and adjust it based on the knowledge that grouping_planner will be`
	`191`	`+ * doing grouping or aggregation work with our result.`
	`192`	`+ *`
	`193`	`+ * This introduces some undesirable coupling between this code and`
	`194`	`+ * grouping_planner, but the alternatives seem even uglier; we couldn't`
	`195`	`+ * pass back completed paths without making these decisions here.`
`175`	`196`	`*/`
`176`		`-if (tuple_fraction >=1.0)`
`177`		`-tuple_fraction /=final_rel->rows;`
	`197`	`+if (parse->groupClause)`
	`198`	`+{`
	`199`	`+List*groupExprs;`
	`200`	`+`
	`201`	`+groupExprs=get_sortgrouplist_exprs(parse->groupClause,`
	`202`	`+parse->targetList);`
	`203`	`+*num_groups=estimate_num_groups(root,`
	`204`	`+groupExprs,`
	`205`	`+final_rel->rows);`
	`206`	`+`
	`207`	`+/*`
	`208`	`+ * In GROUP BY mode, an absolute LIMIT is relative to the number`
	`209`	`+ * of groups not the number of tuples. If the caller gave us`
	`210`	`+ * a fraction, keep it as-is. (In both cases, we are effectively`
	`211`	`+ * assuming that all the groups are about the same size.)`
	`212`	`+ */`
	`213`	`+if (tuple_fraction >=1.0)`
	`214`	`+tuple_fraction /=*num_groups;`
	`215`	`+`
	`216`	`+/*`
	`217`	`+ * If both GROUP BY and ORDER BY are specified, we will need two`
	`218`	`+ * levels of sort --- and, therefore, certainly need to read all`
	`219`	`+ * the tuples --- unless ORDER BY is a subset of GROUP BY.`
	`220`	`+ */`
	`221`	`+if (parse->groupClause&&parse->sortClause&&`
	`222`	`+!pathkeys_contained_in(root->sort_pathkeys,root->group_pathkeys))`
	`223`	`+tuple_fraction=0.0;`
	`224`	`+}`
	`225`	`+elseif (parse->hasAggs\|\|root->hasHavingQual)`
	`226`	`+{`
	`227`	`+/*`
	`228`	`+ * Ungrouped aggregate will certainly want to read all the tuples,`
	`229`	`+ * and it will deliver a single result row (so leave *num_groups 1).`
	`230`	`+ */`
	`231`	`+tuple_fraction=0.0;`
	`232`	`+}`
	`233`	`+elseif (parse->distinctClause)`
	`234`	`+{`
	`235`	`+/*`
	`236`	`+ * Since there was no grouping or aggregation, it's reasonable to`
	`237`	`+ * assume the UNIQUE filter has effects comparable to GROUP BY.`
	`238`	`+ * Return the estimated number of output rows for use by caller.`
	`239`	`+ * (If DISTINCT is used with grouping, we ignore its effects for`
	`240`	`+ * rowcount estimation purposes; this amounts to assuming the grouped`
	`241`	`+ * rows are distinct already.)`
	`242`	`+ */`
	`243`	`+List*distinctExprs;`
	`244`	`+`
	`245`	`+distinctExprs=get_sortgrouplist_exprs(parse->distinctClause,`
	`246`	`+parse->targetList);`
	`247`	`+*num_groups=estimate_num_groups(root,`
	`248`	`+distinctExprs,`
	`249`	`+final_rel->rows);`
	`250`	`+`
	`251`	`+/*`
	`252`	`+ * Adjust tuple_fraction the same way as for GROUP BY, too.`
	`253`	`+ */`
	`254`	`+if (tuple_fraction >=1.0)`
	`255`	`+tuple_fraction /=*num_groups;`
	`256`	`+}`
	`257`	`+else`
	`258`	`+{`
	`259`	`+/*`
	`260`	`+ * Plain non-grouped, non-aggregated query: an absolute tuple`
	`261`	`+ * fraction can be divided by the number of tuples.`
	`262`	`+ */`
	`263`	`+if (tuple_fraction >=1.0)`
	`264`	`+tuple_fraction /=final_rel->rows;`
	`265`	`+}`
`178`	`266`
`179`	`267`	`/*`
`180`	`268`	`* Pick out the cheapest-total path and the cheapest presorted path`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit4e5fbb3

File tree

7 files changed

7 files changed

`‎src/backend/nodes/outfuncs.c`

`‎src/backend/optimizer/path/pathkeys.c`

`‎src/backend/optimizer/plan/planmain.c`

0 commit comments