Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfa52221

Browse files
committed
Add GROUP-BY strategy: put the most distinct column at the head.
Let's allow GROUP-BY to utilize cost_sort feature which can differentiateorders of pathkeys lists according to the ndistinct of the first column.Task: 9578.Tags: optimized_group_by.
1 parente5f5238 commitfa52221

File tree

3 files changed

+96
-30
lines changed

3 files changed

+96
-30
lines changed

‎src/backend/optimizer/path/pathkeys.c

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include"optimizer/paths.h"
2727
#include"partitioning/partbounds.h"
2828
#include"utils/lsyscache.h"
29+
#include"utils/selfuncs.h"
2930

3031
/* Consider reordering of GROUP BY keys? */
3132
boolenable_group_by_reordering= true;
@@ -471,6 +472,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
471472
List*pathkeys=root->group_pathkeys;
472473
List*clauses=root->processed_groupClause;
473474

475+
doublend_max=0;
476+
PathKey*pk_opt=NULL;
477+
ListCell*lc1,*lc2;
478+
474479
/* always return at least the original pathkeys/clauses */
475480
info=makeNode(GroupByOrdering);
476481
info->pathkeys=pathkeys;
@@ -524,9 +529,6 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
524529
/* Test consistency of info structures */
525530
for_each_from(lc,infos,1)
526531
{
527-
ListCell*lc1,
528-
*lc2;
529-
530532
info=lfirst_node(GroupByOrdering,lc);
531533

532534
Assert(list_length(info->clauses)==list_length(pinfo->clauses));
@@ -544,6 +546,71 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
544546
}
545547
}
546548
#endif
549+
550+
/*
551+
* Let's try the order with the column having max ndistinct value
552+
*/
553+
554+
forboth(lc1,root->group_pathkeys,lc2,root->processed_groupClause)
555+
{
556+
PathKey*pkey=lfirst_node(PathKey,lc1);
557+
SortGroupClause*gc= (SortGroupClause*)lfirst(lc2);
558+
Node*node;
559+
Bitmapset*relids;
560+
VariableStatDatavardata;
561+
doublend=-1;
562+
boolisdefault;
563+
564+
if (foreach_current_index(lc1) >=root->num_groupby_pathkeys)
565+
break;
566+
567+
node=get_sortgroupclause_expr(gc,root->parse->targetList);
568+
relids=pull_varnos(root,node);
569+
570+
if (bms_num_members(relids)!=1&&bms_is_member(0,relids))
571+
/*
572+
*Although functional index can estimate distincts here, the chance
573+
* is too low.
574+
*/
575+
continue;
576+
577+
examine_variable(root,node,0,&vardata);
578+
if (!HeapTupleIsValid(vardata.statsTuple))
579+
continue;
580+
nd=get_variable_numdistinct(&vardata,&isdefault);
581+
ReleaseVariableStats(vardata);
582+
if (isdefault)
583+
continue;
584+
585+
Assert(nd >=0);
586+
if (nd_max==0||nd>nd_max)
587+
{
588+
nd_max=nd;
589+
pk_opt=pkey;
590+
}
591+
}
592+
593+
if (pk_opt!=NULL)
594+
{
595+
List*new_pathkeys=list_make1(pk_opt);
596+
intn;
597+
598+
new_pathkeys=list_concat_unique_ptr(new_pathkeys,root->group_pathkeys);
599+
n=group_keys_reorder_by_pathkeys(new_pathkeys,&pathkeys,&clauses,
600+
root->num_groupby_pathkeys);
601+
602+
if (n>0&&
603+
(enable_incremental_sort||n==root->num_groupby_pathkeys)&&
604+
compare_pathkeys(pathkeys,root->group_pathkeys)!=PATHKEYS_EQUAL)
605+
{
606+
info=makeNode(GroupByOrdering);
607+
info->pathkeys=pathkeys;
608+
info->clauses=clauses;
609+
610+
infos=lappend(infos,info);
611+
}
612+
}
613+
547614
returninfos;
548615
}
549616

‎src/test/regress/expected/aggregates.out

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2786,13 +2786,13 @@ SELECT balk(hundred) FROM tenk1;
27862786
ROLLBACK;
27872787
-- GROUP BY optimization by reordering GROUP BY clauses
27882788
CREATE TABLE btg AS SELECT
2789-
i %10 AS x,
2790-
i %10 AS y,
2791-
'abc' || i %10 AS z,
2789+
i %231 AS x,
2790+
i %49 AS y,
2791+
'abc' || i %2 AS z,
27922792
i AS w
2793-
FROM generate_series(1,100) AS i;
2793+
FROM generate_series(1,1000) AS i;
27942794
CREATE INDEX btg_x_y_idx ON btg(x, y);
2795-
ANALYZE btg;
2795+
VACUUMANALYZE btg;
27962796
SET enable_hashagg = off;
27972797
SET enable_seqscan = off;
27982798
-- Utilize the ordering of index scan to avoid a Sort operation
@@ -2839,21 +2839,19 @@ EXPLAIN (COSTS OFF)
28392839
SELECT count(*)
28402840
FROM btg t1 JOIN btg t2 ON t1.z = t2.z AND t1.w = t2.w AND t1.x = t2.x
28412841
GROUP BY t1.x, t1.y, t1.z, t1.w;
2842-
QUERY PLAN
2843-
-------------------------------------------------------------------------------
2842+
QUERY PLAN
2843+
----------------------------------------------------------------
28442844
GroupAggregate
2845-
Group Key: t1.x, t1.y, t1.z, t1.w
2845+
Group Key: t1.w, t1.x, t1.y, t1.z
28462846
-> Sort
2847-
Sort Key: t1.x, t1.y, t1.z, t1.w
2847+
Sort Key: t1.w, t1.x, t1.y, t1.z
28482848
-> Merge Join
2849-
Merge Cond: ((t1.w = t2.w) AND (t1.z = t2.z) AND (t1.x = t2.x))
2850-
-> Sort
2851-
Sort Key: t1.w, t1.z, t1.x
2852-
-> Index Scan using btg_x_y_idx on btg t1
2853-
-> Sort
2854-
Sort Key: t2.w, t2.z, t2.x
2849+
Merge Cond: (t1.x = t2.x)
2850+
Join Filter: ((t2.z = t1.z) AND (t2.w = t1.w))
2851+
-> Index Scan using btg_x_y_idx on btg t1
2852+
-> Materialize
28552853
-> Index Scan using btg_x_y_idx on btg t2
2856-
(12 rows)
2854+
(10 rows)
28572855

28582856
RESET enable_nestloop;
28592857
RESET enable_hashjoin;
@@ -2877,11 +2875,12 @@ SELECT count(*) FROM btg GROUP BY w, x, y, z ORDER BY x*x, z;
28772875
Sort
28782876
Sort Key: ((x * x)), z
28792877
-> GroupAggregate
2880-
Group Key: w, x, y, z
2881-
-> Sort
2882-
Sort Key: w, x, y, z
2878+
Group Key: x, y, w, z
2879+
-> Incremental Sort
2880+
Sort Key: x, y, w, z
2881+
Presorted Key: x, y
28832882
-> Index Scan using btg_x_y_idx on btg
2884-
(7 rows)
2883+
(8 rows)
28852884

28862885
-- Test the case where the number of incoming subtree path keys is more than
28872886
-- the number of grouping keys.
@@ -2918,9 +2917,9 @@ GROUP BY c1.w, c1.z;
29182917
QUERY PLAN
29192918
-----------------------------------------------------
29202919
GroupAggregate
2921-
Group Key: c1.w, c1.z
2920+
Group Key: c1.z, c1.w
29222921
-> Sort
2923-
Sort Key: c1.w, c1.z, c1.x, c1.y
2922+
Sort Key: c1.z, c1.w, c1.x, c1.y
29242923
-> Merge Join
29252924
Merge Cond: (c1.x = c2.x)
29262925
-> Sort

‎src/test/regress/sql/aggregates.sql

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,13 +1199,13 @@ ROLLBACK;
11991199

12001200
-- GROUP BY optimization by reordering GROUP BY clauses
12011201
CREATETABLEbtgASSELECT
1202-
i %10AS x,
1203-
i %10AS y,
1204-
'abc'|| i %10AS z,
1202+
i %231AS x,
1203+
i %49AS y,
1204+
'abc'|| i %2AS z,
12051205
iAS w
1206-
FROM generate_series(1,100)AS i;
1206+
FROM generate_series(1,1000)AS i;
12071207
CREATEINDEXbtg_x_y_idxON btg(x, y);
1208-
ANALYZE btg;
1208+
VACUUMANALYZE btg;
12091209

12101210
SET enable_hashagg= off;
12111211
SET enable_seqscan= off;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp