NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commitfa52221

committed

Add GROUP-BY strategy: put the most distinct column at the head.

Let's allow GROUP-BY to utilize cost_sort feature which can differentiateorders of pathkeys lists according to the ndistinct of the first column.Task: 9578.Tags: optimized_group_by.

1 parente5f5238 commitfa52221Copy full SHA for fa52221

File tree

3 files changed

+96

-30

lines changed

src
- backend/optimizer/path
  - pathkeys.c
- test/regress
  - expected
    - aggregates.out
  - sql
    - aggregates.sql

3 files changed

+96

-30

lines changed

`‎src/backend/optimizer/path/pathkeys.c`

Lines changed: 70 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -26,6 +26,7 @@`
`26`	`26`	`#include"optimizer/paths.h"`
`27`	`27`	`#include"partitioning/partbounds.h"`
`28`	`28`	`#include"utils/lsyscache.h"`
	`29`	`+#include"utils/selfuncs.h"`
`29`	`30`
`30`	`31`	`/* Consider reordering of GROUP BY keys? */`
`31`	`32`	`boolenable_group_by_reordering= true;`
`@@ -471,6 +472,10 @@ get_useful_group_keys_orderings(PlannerInfo root, Path path)`
`471`	`472`	`List*pathkeys=root->group_pathkeys;`
`472`	`473`	`List*clauses=root->processed_groupClause;`
`473`	`474`
	`475`	`+doublend_max=0;`
	`476`	`+PathKey*pk_opt=NULL;`
	`477`	`+ListCelllc1,lc2;`
	`478`	`+`
`474`	`479`	`/* always return at least the original pathkeys/clauses */`
`475`	`480`	`info=makeNode(GroupByOrdering);`
`476`	`481`	`info->pathkeys=pathkeys;`
`@@ -524,9 +529,6 @@ get_useful_group_keys_orderings(PlannerInfo root, Path path)`
`524`	`529`	`/* Test consistency of info structures */`
`525`	`530`	`for_each_from(lc,infos,1)`
`526`	`531`	`{`
`527`		`-ListCell*lc1,`
`528`		`-*lc2;`
`529`		`-`
`530`	`532`	`info=lfirst_node(GroupByOrdering,lc);`
`531`	`533`
`532`	`534`	`Assert(list_length(info->clauses)==list_length(pinfo->clauses));`
`@@ -544,6 +546,71 @@ get_useful_group_keys_orderings(PlannerInfo root, Path path)`
`544`	`546`	`}`
`545`	`547`	`}`
`546`	`548`	`#endif`
	`549`	`+`
	`550`	`+/*`
	`551`	`+ * Let's try the order with the column having max ndistinct value`
	`552`	`+ */`
	`553`	`+`
	`554`	`+forboth(lc1,root->group_pathkeys,lc2,root->processed_groupClause)`
	`555`	`+{`
	`556`	`+PathKey*pkey=lfirst_node(PathKey,lc1);`
	`557`	`+SortGroupClausegc= (SortGroupClause)lfirst(lc2);`
	`558`	`+Node*node;`
	`559`	`+Bitmapset*relids;`
	`560`	`+VariableStatDatavardata;`
	`561`	`+doublend=-1;`
	`562`	`+boolisdefault;`
	`563`	`+`
	`564`	`+if (foreach_current_index(lc1) >=root->num_groupby_pathkeys)`
	`565`	`+break;`
	`566`	`+`
	`567`	`+node=get_sortgroupclause_expr(gc,root->parse->targetList);`
	`568`	`+relids=pull_varnos(root,node);`
	`569`	`+`
	`570`	`+if (bms_num_members(relids)!=1&&bms_is_member(0,relids))`
	`571`	`+/*`
	`572`	`+ *Although functional index can estimate distincts here, the chance`
	`573`	`+ * is too low.`
	`574`	`+ */`
	`575`	`+continue;`
	`576`	`+`
	`577`	`+examine_variable(root,node,0,&vardata);`
	`578`	`+if (!HeapTupleIsValid(vardata.statsTuple))`
	`579`	`+continue;`
	`580`	`+nd=get_variable_numdistinct(&vardata,&isdefault);`
	`581`	`+ReleaseVariableStats(vardata);`
	`582`	`+if (isdefault)`
	`583`	`+continue;`
	`584`	`+`
	`585`	`+Assert(nd >=0);`
	`586`	`+if (nd_max==0\|\|nd>nd_max)`
	`587`	`+{`
	`588`	`+nd_max=nd;`
	`589`	`+pk_opt=pkey;`
	`590`	`+}`
	`591`	`+}`
	`592`	`+`
	`593`	`+if (pk_opt!=NULL)`
	`594`	`+{`
	`595`	`+List*new_pathkeys=list_make1(pk_opt);`
	`596`	`+intn;`
	`597`	`+`
	`598`	`+new_pathkeys=list_concat_unique_ptr(new_pathkeys,root->group_pathkeys);`
	`599`	`+n=group_keys_reorder_by_pathkeys(new_pathkeys,&pathkeys,&clauses,`
	`600`	`+root->num_groupby_pathkeys);`
	`601`	`+`
	`602`	`+if (n>0&&`
	`603`	`+(enable_incremental_sort\|\|n==root->num_groupby_pathkeys)&&`
	`604`	`+compare_pathkeys(pathkeys,root->group_pathkeys)!=PATHKEYS_EQUAL)`
	`605`	`+{`
	`606`	`+info=makeNode(GroupByOrdering);`
	`607`	`+info->pathkeys=pathkeys;`
	`608`	`+info->clauses=clauses;`
	`609`	`+`
	`610`	`+infos=lappend(infos,info);`
	`611`	`+}`
	`612`	`+}`
	`613`	`+`
`547`	`614`	`returninfos;`
`548`	`615`	`}`
`549`	`616`

`‎src/test/regress/expected/aggregates.out`

Lines changed: 21 additions & 22 deletions

Original file line number	Diff line number	Diff line change
`@@ -2786,13 +2786,13 @@ SELECT balk(hundred) FROM tenk1;`
`2786`	`2786`	`ROLLBACK;`
`2787`	`2787`	`-- GROUP BY optimization by reordering GROUP BY clauses`
`2788`	`2788`	`CREATE TABLE btg AS SELECT`
`2789`		`- i %10 AS x,`
`2790`		`- i %10 AS y,`
`2791`		`- 'abc' \|\| i %10 AS z,`
	`2789`	`+ i %231 AS x,`
	`2790`	`+ i %49 AS y,`
	`2791`	`+ 'abc' \|\| i %2 AS z,`
`2792`	`2792`	`i AS w`
`2793`		`-FROM generate_series(1,100) AS i;`
	`2793`	`+FROM generate_series(1,1000) AS i;`
`2794`	`2794`	`CREATE INDEX btg_x_y_idx ON btg(x, y);`
`2795`		`-ANALYZE btg;`
	`2795`	`+VACUUMANALYZE btg;`
`2796`	`2796`	`SET enable_hashagg = off;`
`2797`	`2797`	`SET enable_seqscan = off;`
`2798`	`2798`	`-- Utilize the ordering of index scan to avoid a Sort operation`
`@@ -2839,21 +2839,19 @@ EXPLAIN (COSTS OFF)`
`2839`	`2839`	`SELECT count(*)`
`2840`	`2840`	`FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x`
`2841`	`2841`	`GROUP BY t1.x, t1.y, t1.z, t1.w;`
`2842`		`-QUERY PLAN`
`2843`		`--------------------------------------------------------------------------------`
	`2842`	`+ QUERY PLAN`
	`2843`	`+----------------------------------------------------------------`
`2844`	`2844`	`GroupAggregate`
`2845`		`- Group Key: t1.x, t1.y, t1.z, t1.w`
	`2845`	`+ Group Key: t1.w, t1.x, t1.y, t1.z`
`2846`	`2846`	`-> Sort`
`2847`		`- Sort Key: t1.x, t1.y, t1.z, t1.w`
	`2847`	`+ Sort Key: t1.w, t1.x, t1.y, t1.z`
`2848`	`2848`	`-> Merge Join`
`2849`		`- Merge Cond: ((t1.w = t2.w) AND (t1.z = t2.z) AND (t1.x = t2.x))`
`2850`		`- -> Sort`
`2851`		`- Sort Key: t1.w, t1.z, t1.x`
`2852`		`- -> Index Scan using btg_x_y_idx on btg t1`
`2853`		`- -> Sort`
`2854`		`- Sort Key: t2.w, t2.z, t2.x`
	`2849`	`+ Merge Cond: (t1.x = t2.x)`
	`2850`	`+ Join Filter: ((t2.z = t1.z) AND (t2.w = t1.w))`
	`2851`	`+ -> Index Scan using btg_x_y_idx on btg t1`
	`2852`	`+ -> Materialize`
`2855`	`2853`	`-> Index Scan using btg_x_y_idx on btg t2`
`2856`		`-(12 rows)`
	`2854`	`+(10 rows)`
`2857`	`2855`
`2858`	`2856`	`RESET enable_nestloop;`
`2859`	`2857`	`RESET enable_hashjoin;`
`@@ -2877,11 +2875,12 @@ SELECT count() FROM btg GROUP BY w, x, y, z ORDER BY xx, z;`
`2877`	`2875`	`Sort`
`2878`	`2876`	`Sort Key: ((x * x)), z`
`2879`	`2877`	`-> GroupAggregate`
`2880`		`- Group Key: w, x, y, z`
`2881`		`- -> Sort`
`2882`		`- Sort Key: w, x, y, z`
	`2878`	`+ Group Key: x, y, w, z`
	`2879`	`+ -> Incremental Sort`
	`2880`	`+ Sort Key: x, y, w, z`
	`2881`	`+ Presorted Key: x, y`
`2883`	`2882`	`-> Index Scan using btg_x_y_idx on btg`
`2884`		`-(7 rows)`
	`2883`	`+(8 rows)`
`2885`	`2884`
`2886`	`2885`	`-- Test the case where the number of incoming subtree path keys is more than`
`2887`	`2886`	`-- the number of grouping keys.`
`@@ -2918,9 +2917,9 @@ GROUP BY c1.w, c1.z;`
`2918`	`2917`	`QUERY PLAN`
`2919`	`2918`	`-----------------------------------------------------`
`2920`	`2919`	`GroupAggregate`
`2921`		`- Group Key: c1.w, c1.z`
	`2920`	`+ Group Key: c1.z, c1.w`
`2922`	`2921`	`-> Sort`
`2923`		`- Sort Key: c1.w, c1.z, c1.x, c1.y`
	`2922`	`+ Sort Key: c1.z, c1.w, c1.x, c1.y`
`2924`	`2923`	`-> Merge Join`
`2925`	`2924`	`Merge Cond: (c1.x = c2.x)`
`2926`	`2925`	`-> Sort`

`‎src/test/regress/sql/aggregates.sql`

Lines changed: 5 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -1199,13 +1199,13 @@ ROLLBACK;`
`1199`	`1199`
`1200`	`1200`	`-- GROUP BY optimization by reordering GROUP BY clauses`
`1201`	`1201`	`CREATETABLEbtgASSELECT`
`1202`		`- i %10AS x,`
`1203`		`- i %10AS y,`
`1204`		`-'abc'\|\| i %10AS z,`
	`1202`	`+ i %231AS x,`
	`1203`	`+ i %49AS y,`
	`1204`	`+'abc'\|\| i %2AS z,`
`1205`	`1205`	`iAS w`
`1206`		`-FROM generate_series(1,100)AS i;`
	`1206`	`+FROM generate_series(1,1000)AS i;`
`1207`	`1207`	`CREATEINDEXbtg_x_y_idxON btg(x, y);`
`1208`		`-ANALYZE btg;`
	`1208`	`+VACUUMANALYZE btg;`
`1209`	`1209`
`1210`	`1210`	`SET enable_hashagg= off;`
`1211`	`1211`	`SET enable_seqscan= off;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitfa52221

File tree

3 files changed

3 files changed

`‎src/backend/optimizer/path/pathkeys.c`

`‎src/test/regress/expected/aggregates.out`

`‎src/test/regress/sql/aggregates.sql`

0 commit comments