NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit505c008

committed

Restore preprocess_groupclause()

0452b46 made optimizer explore alternative orderings of group-by pathkeys.It eliminated preprocess_groupclause(), which was intended to match itemsbetween GROUP BY and ORDER BY. Instead, get_useful_group_keys_orderings()function generates orderings of GROUP BY elements at the time of groupingpaths generation. The get_useful_group_keys_orderings() function takes intoaccount 3 orderings of GROUP BY pathkeys and clauses: original order as writtenin GROUP BY, matching ORDER BY clauses as much as possible, and matching theinput path as much as possible. Given that even before0452b46,preprocess_groupclause() could change the original order of GROUP BY clauseswe don't need to consider it apart from ordering matching ORDER BY clauses.This commit restores preprocess_groupclause() to provide an ordering ofGROUP BY elements matching ORDER BY before generation of paths. The newversion of preprocess_groupclause() takes into account an incremental sort.The get_useful_group_keys_orderings() function now takes into 2 orderings ofGROUP BY elements: the order generated preprocess_groupclause() and the ordermatching the input path as much as possible.Discussion:https://postgr.es/m/CAPpHfdvyWLMGwvxaf%3D7KAp-z-4mxbSH8ti2f6mNOQv5metZFzg%40mail.gmail.comAuthor: Alexander KorotkovReviewed-by: Andrei Lepikhov, Pavel Borisov

1 parent0c1af2c commit505c008Copy full SHA for 505c008

File tree

4 files changed

+108

-67

lines changed

src
- backend/optimizer
  - path
    - pathkeys.c
  - plan
    - planner.c
- include/nodes
  - pathnodes.h
- test/regress/expected
  - partition_aggregate.out

4 files changed

+108

-67

lines changed

`‎src/backend/optimizer/path/pathkeys.c`

Lines changed: 5 additions & 50 deletions

Original file line number	Diff line number	Diff line change
`@@ -447,26 +447,6 @@ group_keys_reorder_by_pathkeys(List pathkeys, List *group_pathkeys,`
`447`	`447`	`returnn;`
`448`	`448`	`}`
`449`	`449`
`450`		`-/*`
`451`		`- * pathkeys_are_duplicate`
`452`		`- *Check if give pathkeys are already contained the list of`
`453`		`- *GroupByOrdering's.`
`454`		`- */`
`455`		`-staticbool`
`456`		`-pathkeys_are_duplicate(Listinfos,Listpathkeys)`
`457`		`-{`
`458`		`-ListCell*lc;`
`459`		`-`
`460`		`-foreach(lc,infos)`
`461`		`-{`
`462`		`-GroupByOrdering*info=lfirst_node(GroupByOrdering,lc);`
`463`		`-`
`464`		`-if (compare_pathkeys(pathkeys,info->pathkeys)==PATHKEYS_EQUAL)`
`465`		`-return true;`
`466`		`-}`
`467`		`-return false;`
`468`		`-}`
`469`		`-`
`470`	`450`	`/*`
`471`	`451`	`* get_useful_group_keys_orderings`
`472`	`452`	`*Determine which orderings of GROUP BY keys are potentially interesting.`
`@@ -475,11 +455,11 @@ pathkeys_are_duplicate(List infos, List pathkeys)`
`475`	`455`	`* ordering of GROUP BY keys. Each item stores pathkeys and clauses in the`
`476`	`456`	`* matching order.`
`477`	`457`	`*`
`478`		`- * The function considers (and keeps)multiple GROUP BY orderings:`
	`458`	`+ * The function considers (and keeps)following GROUP BY orderings:`
`479`	`459`	`*`
`480`		`- * -the original ordering, asspecified bythe GROUP BY clause,`
`481`		`- *- GROUPBYkeys reordered to match 'path' ordering (as much as possible),`
`482`		`- * - GROUP BY keys to matchtarget ORDER BY clause (as much as possible).`
	`460`	`+ * -GROUP BY keys asordered bypreprocess_groupclause() to match target`
	`461`	`+ * ORDERBYclause (as much as possible),`
	`462`	`+ * - GROUP BY keysreorderedto match'path' ordering (as much as possible).`
`483`	`463`	`*/`
`484`	`464`	`List*`
`485`	`465`	`get_useful_group_keys_orderings(PlannerInforoot,Pathpath)`
`@@ -526,32 +506,7 @@ get_useful_group_keys_orderings(PlannerInfo root, Path path)`
`526`	`506`
`527`	`507`	`if (n>0&&`
`528`	`508`	`(enable_incremental_sort\|\|n==root->num_groupby_pathkeys)&&`
`529`		`-!pathkeys_are_duplicate(infos,pathkeys))`
`530`		`-{`
`531`		`-info=makeNode(GroupByOrdering);`
`532`		`-info->pathkeys=pathkeys;`
`533`		`-info->clauses=clauses;`
`534`		`-`
`535`		`-infos=lappend(infos,info);`
`536`		`-}`
`537`		`-}`
`538`		`-`
`539`		`-/*`
`540`		`- * Try reordering pathkeys to minimize the sort cost (this time consider`
`541`		`- * the ORDER BY clause).`
`542`		`- */`
`543`		`-if (root->sort_pathkeys&&`
`544`		`-!pathkeys_contained_in(root->sort_pathkeys,root->group_pathkeys))`
`545`		`-{`
`546`		`-intn;`
`547`		`-`
`548`		`-n=group_keys_reorder_by_pathkeys(root->sort_pathkeys,&pathkeys,`
`549`		`-&clauses,`
`550`		`-root->num_groupby_pathkeys);`
`551`		`-`
`552`		`-if (n>0&&`
`553`		`-(enable_incremental_sort\|\|n==list_length(root->sort_pathkeys))&&`
`554`		`-!pathkeys_are_duplicate(infos,pathkeys))`
	`509`	`+compare_pathkeys(pathkeys,root->group_pathkeys)!=PATHKEYS_EQUAL)`
`555`	`510`	`{`
`556`	`511`	`info=makeNode(GroupByOrdering);`
`557`	`512`	`info->pathkeys=pathkeys;`

`‎src/backend/optimizer/plan/planner.c`

Lines changed: 95 additions & 13 deletions

Original file line number	Diff line number	Diff line change
`@@ -137,7 +137,7 @@ static double preprocess_limit(PlannerInfo *root,`
`137`	`137`	`doubletuple_fraction,`
`138`	`138`	`int64offset_est,int64count_est);`
`139`	`139`	`staticvoidremove_useless_groupby_columns(PlannerInfo*root);`
`140`		`-staticListgroupclause_apply_groupingset(PlannerInforoot,List*force);`
	`140`	`+staticListpreprocess_groupclause(PlannerInforoot,List*force);`
`141`	`141`	`staticListextract_rollup_sets(ListgroupingSets);`
`142`	`142`	`staticListreorder_grouping_sets(ListgroupingSets,List*sortclause);`
`143`	`143`	`staticvoidstandard_qp_callback(PlannerInforoot,voidextra);`
`@@ -1422,7 +1422,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,`
`1422`	`1422`	`elseif (parse->groupClause)`
`1423`	`1423`	`{`
`1424`	`1424`	`/* Preprocess regular GROUP BY clause, if any */`
`1425`		`-root->processed_groupClause=list_copy(parse->groupClause);`
	`1425`	`+root->processed_groupClause=preprocess_groupclause(root,NIL);`
`1426`	`1426`	`/* Remove any redundant GROUP BY columns */`
`1427`	`1427`	`remove_useless_groupby_columns(root);`
`1428`	`1428`	`}`
`@@ -2169,7 +2169,7 @@ preprocess_grouping_sets(PlannerInfo *root)`
`2169`	`2169`	`* The groupClauses for hashed grouping sets are built later on.)`
`2170`	`2170`	`*/`
`2171`	`2171`	`if (gs->set)`
`2172`		`-rollup->groupClause=groupclause_apply_groupingset(root,gs->set);`
	`2172`	`+rollup->groupClause=preprocess_groupclause(root,gs->set);`
`2173`	`2173`	`else`
`2174`	`2174`	`rollup->groupClause=NIL;`
`2175`	`2175`
`@@ -2821,24 +2821,106 @@ remove_useless_groupby_columns(PlannerInfo *root)`
`2821`	`2821`	`}`
`2822`	`2822`
`2823`	`2823`	`/*`
`2824`		`- * groupclause_apply_groupingset`
`2825`		`- *Apply the order of GROUP BY clauses defined by grouping sets. Items`
`2826`		`- *not in the grouping set are skipped.`
	`2824`	`+ * preprocess_groupclause - do preparatory work on GROUP BY clause`
	`2825`	`+ *`
	`2826`	`+ * The idea here is to adjust the ordering of the GROUP BY elements`
	`2827`	`+ * (which in itself is semantically insignificant) to match ORDER BY,`
	`2828`	`+ * thereby allowing a single sort operation to both implement the ORDER BY`
	`2829`	`+ * requirement and set up for a Unique step that implements GROUP BY.`
	`2830`	`+ * We also consider partial match between GROUP BY and ORDER BY elements,`
	`2831`	`+ * which could allow to implement ORDER BY using the incremental sort.`
	`2832`	`+ *`
	`2833`	`+ * We also consider other orderings of the GROUP BY elements, which could`
	`2834`	`+ * match the sort ordering of other possible plans (eg an indexscan) and`
	`2835`	`+ * thereby reduce cost. This is implemented during the generation of grouping`
	`2836`	`+ * paths. See get_useful_group_keys_orderings() for details.`
	`2837`	`+ *`
	`2838`	`+ * Note: we need no comparable processing of the distinctClause because`
	`2839`	`+ * the parser already enforced that that matches ORDER BY.`
	`2840`	`+ *`
	`2841`	`+ * Note: we return a fresh List, but its elements are the same`
	`2842`	`+ * SortGroupClauses appearing in parse->groupClause. This is important`
	`2843`	`+ * because later processing may modify the processed_groupClause list.`
	`2844`	`+ *`
	`2845`	`+ * For grouping sets, the order of items is instead forced to agree with that`
	`2846`	`+ * of the grouping set (and items not in the grouping set are skipped). The`
	`2847`	`+ * work of sorting the order of grouping set elements to match the ORDER BY if`
	`2848`	`+ * possible is done elsewhere.`
`2827`	`2849`	`*/`
`2828`	`2850`	`staticList*`
`2829`		`-groupclause_apply_groupingset(PlannerInforoot,Listgset)`
	`2851`	`+preprocess_groupclause(PlannerInforoot,Listforce)`
`2830`	`2852`	`{`
`2831`	`2853`	`Query*parse=root->parse;`
`2832`	`2854`	`List*new_groupclause=NIL;`
`2833`	`2855`	`ListCell*sl;`
	`2856`	`+ListCell*gl;`
`2834`	`2857`
`2835`		`-foreach(sl,gset)`
	`2858`	`+/* For grouping sets, we need to force the ordering */`
	`2859`	`+if (force)`
`2836`	`2860`	`{`
`2837`		`-Indexref=lfirst_int(sl);`
`2838`		`-SortGroupClause*cl=get_sortgroupref_clause(ref,parse->groupClause);`
	`2861`	`+foreach(sl,force)`
	`2862`	`+{`
	`2863`	`+Indexref=lfirst_int(sl);`
	`2864`	`+SortGroupClause*cl=get_sortgroupref_clause(ref,parse->groupClause);`
	`2865`	`+`
	`2866`	`+new_groupclause=lappend(new_groupclause,cl);`
	`2867`	`+}`
`2839`	`2868`
`2840`		`-new_groupclause=lappend(new_groupclause,cl);`
	`2869`	`+returnnew_groupclause;`
`2841`	`2870`	`}`
	`2871`	`+`
	`2872`	`+/* If no ORDER BY, nothing useful to do here */`
	`2873`	`+if (parse->sortClause==NIL)`
	`2874`	`+returnlist_copy(parse->groupClause);`
	`2875`	`+`
	`2876`	`+/*`
	`2877`	`+ * Scan the ORDER BY clause and construct a list of matching GROUP BY`
	`2878`	`+ * items, but only as far as we can make a matching prefix.`
	`2879`	`+ *`
	`2880`	`+ * This code assumes that the sortClause contains no duplicate items.`
	`2881`	`+ */`
	`2882`	`+foreach(sl,parse->sortClause)`
	`2883`	`+{`
	`2884`	`+SortGroupClause*sc=lfirst_node(SortGroupClause,sl);`
	`2885`	`+`
	`2886`	`+foreach(gl,parse->groupClause)`
	`2887`	`+{`
	`2888`	`+SortGroupClause*gc=lfirst_node(SortGroupClause,gl);`
	`2889`	`+`
	`2890`	`+if (equal(gc,sc))`
	`2891`	`+{`
	`2892`	`+new_groupclause=lappend(new_groupclause,gc);`
	`2893`	`+break;`
	`2894`	`+}`
	`2895`	`+}`
	`2896`	`+if (gl==NULL)`
	`2897`	`+break;/* no match, so stop scanning */`
	`2898`	`+}`
	`2899`	`+`
	`2900`	`+`
	`2901`	`+/* If no match at all, no point in reordering GROUP BY */`
	`2902`	`+if (new_groupclause==NIL)`
	`2903`	`+returnlist_copy(parse->groupClause);`
	`2904`	`+`
	`2905`	`+/*`
	`2906`	`+ * Add any remaining GROUP BY items to the new list. We don't require a`
	`2907`	`+ * complete match, because even partial match allows ORDER BY to be`
	`2908`	`+ * implemented using incremental sort. Also, give up if there are any`
	`2909`	`+ * non-sortable GROUP BY items, since then there's no hope anyway.`
	`2910`	`+ */`
	`2911`	`+foreach(gl,parse->groupClause)`
	`2912`	`+{`
	`2913`	`+SortGroupClause*gc=lfirst_node(SortGroupClause,gl);`
	`2914`	`+`
	`2915`	`+if (list_member_ptr(new_groupclause,gc))`
	`2916`	`+continue;/* it matched an ORDER BY item */`
	`2917`	`+if (!OidIsValid(gc->sortop))/* give up, GROUP BY can't be sorted */`
	`2918`	`+returnlist_copy(parse->groupClause);`
	`2919`	`+new_groupclause=lappend(new_groupclause,gc);`
	`2920`	`+}`
	`2921`	`+`
	`2922`	`+/* Success --- install the rearranged GROUP BY list */`
	`2923`	`+Assert(list_length(parse->groupClause)==list_length(new_groupclause));`
`2842`	`2924`	`returnnew_groupclause;`
`2843`	`2925`	`}`
`2844`	`2926`
`@@ -4170,7 +4252,7 @@ consider_groupingsets_paths(PlannerInfo *root,`
`4170`	`4252`	`{`
`4171`	`4253`	`rollup=makeNode(RollupData);`
`4172`	`4254`
`4173`		`-rollup->groupClause=groupclause_apply_groupingset(root,gset);`
	`4255`	`+rollup->groupClause=preprocess_groupclause(root,gset);`
`4174`	`4256`	`rollup->gsets_data=list_make1(gs);`
`4175`	`4257`	`rollup->gsets=remap_to_groupclause_idx(rollup->groupClause,`
`4176`	`4258`	`rollup->gsets_data,`
`@@ -4359,7 +4441,7 @@ consider_groupingsets_paths(PlannerInfo *root,`
`4359`	`4441`
`4360`	`4442`	`Assert(gs->set!=NIL);`
`4361`	`4443`
`4362`		`-rollup->groupClause=groupclause_apply_groupingset(root,gs->set);`
	`4444`	`+rollup->groupClause=preprocess_groupclause(root,gs->set);`
`4363`	`4445`	`rollup->gsets_data=list_make1(gs);`
`4364`	`4446`	`rollup->gsets=remap_to_groupclause_idx(rollup->groupClause,`
`4365`	`4447`	`rollup->gsets_data,`

`‎src/include/nodes/pathnodes.h`

Lines changed: 5 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -426,7 +426,11 @@ struct PlannerInfo`
`426`	`426`	`* items to be proven redundant, implying that there is only one group`
`427`	`427`	`* containing all the query's rows. Hence, if you want to check whether`
`428`	`428`	`* GROUP BY was specified, test for nonempty parse->groupClause, not for`
`429`		`- * nonempty processed_groupClause.`
	`429`	`+ * nonempty processed_groupClause. Optimizer chooses specific order of`
	`430`	`+ * group-by clauses during the upper paths generation process, attempting`
	`431`	`+ * to use different strategies to minimize number of sorts or engage`
	`432`	`+ * incremental sort. See preprocess_groupclause() and`
	`433`	`+ * get_useful_group_keys_orderings() for details.`
`430`	`434`	`*`
`431`	`435`	`* Currently, when grouping sets are specified we do not attempt to`
`432`	`436`	`* optimize the groupClause, so that processed_groupClause will be`

`‎src/test/regress/expected/partition_aggregate.out`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -901,15 +901,15 @@ SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAV`
`901`	`901`	`Sort Key: pagg_tab_m.a, pagg_tab_m.c, (sum(pagg_tab_m.b))`
`902`	`902`	`-> Append`
`903`	`903`	`-> HashAggregate`
`904`		`- Group Key: ((pagg_tab_m.a + pagg_tab_m.b) / 2), pagg_tab_m.c, pagg_tab_m.a`
	`904`	`+ Group Key:pagg_tab_m.a, pagg_tab_m.c,((pagg_tab_m.a + pagg_tab_m.b) / 2)`
`905`	`905`	`Filter: ((sum(pagg_tab_m.b) = 50) AND (avg(pagg_tab_m.c) > '25'::numeric))`
`906`	`906`	`-> Seq Scan on pagg_tab_m_p1 pagg_tab_m`
`907`	`907`	`-> HashAggregate`
`908`		`- Group Key: ((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2), pagg_tab_m_1.c, pagg_tab_m_1.a`
	`908`	`+ Group Key:pagg_tab_m_1.a, pagg_tab_m_1.c,((pagg_tab_m_1.a + pagg_tab_m_1.b) / 2)`
`909`	`909`	`Filter: ((sum(pagg_tab_m_1.b) = 50) AND (avg(pagg_tab_m_1.c) > '25'::numeric))`
`910`	`910`	`-> Seq Scan on pagg_tab_m_p2 pagg_tab_m_1`
`911`	`911`	`-> HashAggregate`
`912`		`- Group Key: ((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2), pagg_tab_m_2.c, pagg_tab_m_2.a`
	`912`	`+ Group Key:pagg_tab_m_2.a, pagg_tab_m_2.c,((pagg_tab_m_2.a + pagg_tab_m_2.b) / 2)`
`913`	`913`	`Filter: ((sum(pagg_tab_m_2.b) = 50) AND (avg(pagg_tab_m_2.c) > '25'::numeric))`
`914`	`914`	`-> Seq Scan on pagg_tab_m_p3 pagg_tab_m_2`
`915`	`915`	`(15 rows)`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit505c008

File tree

4 files changed

4 files changed

`‎src/backend/optimizer/path/pathkeys.c`

`‎src/backend/optimizer/plan/planner.c`

`‎src/include/nodes/pathnodes.h`

`‎src/test/regress/expected/partition_aggregate.out`

0 commit comments