NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit0452b46

committed

Explore alternative orderings of group-by pathkeys during optimization.

When evaluating a query with a multi-column GROUP BY clause, we can minimizesort operations or avoid them if we synchronize the order of GROUP BY clauseswith the ORDER BY sort clause or sort order, which comes from the underlyingquery tree. Grouping does not imply any ordering, so we can comparethe keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,we simply compared keys in the order specified in the query. This commitexplores alternative ordering of the keys, trying to find a cheaper one.The ordering of group keys may interact with other parts of the query, some ofwhich may not be known while planning the grouping. For example, there may bean explicit ORDER BY clause or some other ordering-dependent operation higher upin the query, and using the same ordering may allow using either incrementalsort or even eliminating the sort entirely.The patch always keeps the ordering specified in the query, assuming the usermight have additional insights.This introduces a new GUC enable_group_by_reordering so that the optimizationmay be disabled if needed.Discussion:https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ruAuthor: Andrei Lepikhov, Teodor SigaevReviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry DolgovReviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong YuReviewed-by: Tom Lane, Alexander Korotkov, Richard Guo, Alena Rybakina

1 parent7ab80ac commit0452b46Copy full SHA for 0452b46

File tree

11 files changed

+770

-223

lines changed

src
- backend
  - optimizer
    - path
      - equivclass.c
      - pathkeys.c
    - plan
      - planner.c
  - utils/misc
    - guc_tables.c
    - postgresql.conf.sample
- include
  - nodes
    - pathnodes.h
  - optimizer
    - paths.h
- test/regress
  - expected
    - aggregates.out
    - sysviews.out
  - sql
    - aggregates.sql
- tools/pgindent
  - typedefs.list

11 files changed

+770

-223

lines changed

`‎src/backend/optimizer/path/equivclass.c‎`

Lines changed: 12 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,`
`652`	`652`
`653`	`653`	`if (opcintype==cur_em->em_datatype&&`
`654`	`654`	`equal(expr,cur_em->em_expr))`
`655`		`-returncur_ec;/* Match! */`
	`655`	`+{`
	`656`	`+/*`
	`657`	`+ * Match!`
	`658`	`+ *`
	`659`	`+ * Copy the sortref if it wasn't set yet. That may happen if`
	`660`	`+ * the ec was constructed from a WHERE clause, i.e. it doesn't`
	`661`	`+ * have a target reference at all.`
	`662`	`+ */`
	`663`	`+if (cur_ec->ec_sortref==0&&sortref>0)`
	`664`	`+cur_ec->ec_sortref=sortref;`
	`665`	`+returncur_ec;`
	`666`	`+}`
`656`	`667`	`}`
`657`	`668`	`}`
`658`	`669`

`‎src/backend/optimizer/path/pathkeys.c‎`

Lines changed: 252 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -22,12 +22,15 @@`
`22`	`22`	`#include"nodes/makefuncs.h"`
`23`	`23`	`#include"nodes/nodeFuncs.h"`
`24`	`24`	`#include"nodes/plannodes.h"`
	`25`	`+#include"optimizer/cost.h"`
`25`	`26`	`#include"optimizer/optimizer.h"`
`26`	`27`	`#include"optimizer/pathnode.h"`
`27`	`28`	`#include"optimizer/paths.h"`
`28`	`29`	`#include"partitioning/partbounds.h"`
`29`	`30`	`#include"utils/lsyscache.h"`
`30`	`31`
	`32`	`+/* Consider reordering of GROUP BY keys? */`
	`33`	`+boolenable_group_by_reordering= true;`
`31`	`34`
`32`	`35`	`staticboolpathkey_is_redundant(PathKeynew_pathkey,Listpathkeys);`
`33`	`36`	`staticboolmatches_boolean_partition_clause(RestrictInfo*rinfo,`
`@@ -350,6 +353,202 @@ pathkeys_contained_in(List keys1, List keys2)`
`350`	`353`	`return false;`
`351`	`354`	`}`
`352`	`355`
	`356`	`+/*`
	`357`	`+ * group_keys_reorder_by_pathkeys`
	`358`	`+ *Reorder GROUP BY keys to match the input pathkeys.`
	`359`	`+ *`
	`360`	`+ * Function returns new lists (pathkeys and clauses), original GROUP BY lists`
	`361`	`+ * stay untouched.`
	`362`	`+ *`
	`363`	`+ * Returns the number of GROUP BY keys with a matching pathkey.`
	`364`	`+ */`
	`365`	`+staticint`
	`366`	`+group_keys_reorder_by_pathkeys(Listpathkeys,List*group_pathkeys,`
	`367`	`+List**group_clauses,`
	`368`	`+intnum_groupby_pathkeys)`
	`369`	`+{`
	`370`	`+List*new_group_pathkeys=NIL,`
	`371`	`+*new_group_clauses=NIL;`
	`372`	`+ListCell*lc;`
	`373`	`+intn;`
	`374`	`+`
	`375`	`+if (pathkeys==NIL\|\|*group_pathkeys==NIL)`
	`376`	`+return0;`
	`377`	`+`
	`378`	`+/*`
	`379`	`+ * Walk the pathkeys (determining ordering of the input path) and see if`
	`380`	`+ * there's a matching GROUP BY key. If we find one, we append it to the`
	`381`	`+ * list, and do the same for the clauses.`
	`382`	`+ *`
	`383`	`+ * Once we find the first pathkey without a matching GROUP BY key, the`
	`384`	`+ * rest of the pathkeys are useless and can't be used to evaluate the`
	`385`	`+ * grouping, so we abort the loop and ignore the remaining pathkeys.`
	`386`	`+ */`
	`387`	`+foreach(lc,pathkeys)`
	`388`	`+{`
	`389`	`+PathKeypathkey= (PathKey)lfirst(lc);`
	`390`	`+SortGroupClause*sgc;`
	`391`	`+`
	`392`	`+/*`
	`393`	`+ * Pathkeys are built in a way that allows simply comparing pointers.`
	`394`	`+ * Give up if we can't find the matching pointer. Also give up if`
	`395`	`+ * there is no sortclause reference for some reason.`
	`396`	`+ */`
	`397`	`+if (foreach_current_index(lc) >=num_groupby_pathkeys\|\|`
	`398`	`+!list_member_ptr(*group_pathkeys,pathkey)\|\|`
	`399`	`+pathkey->pk_eclass->ec_sortref==0)`
	`400`	`+break;`
	`401`	`+`
	`402`	`+/*`
	`403`	`+ * Since 1349d27 pathkey coming from underlying node can be in the`
	`404`	`+ * root->group_pathkeys but not in the processed_groupClause. So, we`
	`405`	`+ * should be careful here.`
	`406`	`+ */`
	`407`	`+sgc=get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,`
	`408`	`+*group_clauses);`
	`409`	`+if (!sgc)`
	`410`	`+/* The grouping clause does not cover this pathkey */`
	`411`	`+break;`
	`412`	`+`
	`413`	`+/*`
	`414`	`+ * Sort group clause should have an ordering operator as long as there`
	`415`	`+ * is an associated pathkey.`
	`416`	`+ */`
	`417`	`+Assert(OidIsValid(sgc->sortop));`
	`418`	`+`
	`419`	`+new_group_pathkeys=lappend(new_group_pathkeys,pathkey);`
	`420`	`+new_group_clauses=lappend(new_group_clauses,sgc);`
	`421`	`+}`
	`422`	`+`
	`423`	`+/* remember the number of pathkeys with a matching GROUP BY key */`
	`424`	`+n=list_length(new_group_pathkeys);`
	`425`	`+`
	`426`	`+/* append the remaining group pathkeys (will be treated as not sorted) */`
	`427`	`+*group_pathkeys=list_concat_unique_ptr(new_group_pathkeys,`
	`428`	`+*group_pathkeys);`
	`429`	`+*group_clauses=list_concat_unique_ptr(new_group_clauses,`
	`430`	`+*group_clauses);`
	`431`	`+`
	`432`	`+returnn;`
	`433`	`+}`
	`434`	`+`
	`435`	`+/*`
	`436`	`+ * pathkeys_are_duplicate`
	`437`	`+ *Check if give pathkeys are already contained the list of`
	`438`	`+ *PathKeyInfo's.`
	`439`	`+ */`
	`440`	`+staticbool`
	`441`	`+pathkeys_are_duplicate(Listinfos,Listpathkeys)`
	`442`	`+{`
	`443`	`+ListCell*lc;`
	`444`	`+`
	`445`	`+foreach(lc,infos)`
	`446`	`+{`
	`447`	`+PathKeyInfo*info=lfirst_node(PathKeyInfo,lc);`
	`448`	`+`
	`449`	`+if (compare_pathkeys(pathkeys,info->pathkeys)==PATHKEYS_EQUAL)`
	`450`	`+return true;`
	`451`	`+}`
	`452`	`+return false;`
	`453`	`+}`
	`454`	`+`
	`455`	`+/*`
	`456`	`+ * get_useful_group_keys_orderings`
	`457`	`+ *Determine which orderings of GROUP BY keys are potentially interesting.`
	`458`	`+ *`
	`459`	`+ * Returns a list of PathKeyInfo items, each representing an interesting`
	`460`	`+ * ordering of GROUP BY keys. Each item stores pathkeys and clauses in the`
	`461`	`+ * matching order.`
	`462`	`+ *`
	`463`	`+ * The function considers (and keeps) multiple GROUP BY orderings:`
	`464`	`+ *`
	`465`	`+ * - the original ordering, as specified by the GROUP BY clause,`
	`466`	`+ * - GROUP BY keys reordered to match 'path' ordering (as much as possible),`
	`467`	`+ * - GROUP BY keys to match target ORDER BY clause (as much as possible).`
	`468`	`+ */`
	`469`	`+List*`
	`470`	`+get_useful_group_keys_orderings(PlannerInforoot,Pathpath)`
	`471`	`+{`
	`472`	`+Query*parse=root->parse;`
	`473`	`+List*infos=NIL;`
	`474`	`+PathKeyInfo*info;`
	`475`	`+`
	`476`	`+List*pathkeys=root->group_pathkeys;`
	`477`	`+List*clauses=root->processed_groupClause;`
	`478`	`+`
	`479`	`+/* always return at least the original pathkeys/clauses */`
	`480`	`+info=makeNode(PathKeyInfo);`
	`481`	`+info->pathkeys=pathkeys;`
	`482`	`+info->clauses=clauses;`
	`483`	`+infos=lappend(infos,info);`
	`484`	`+`
	`485`	`+/*`
	`486`	`+ * Should we try generating alternative orderings of the group keys? If`
	`487`	`+ * not, we produce only the order specified in the query, i.e. the`
	`488`	`+ * optimization is effectively disabled.`
	`489`	`+ */`
	`490`	`+if (!enable_group_by_reordering)`
	`491`	`+returninfos;`
	`492`	`+`
	`493`	`+/*`
	`494`	`+ * Grouping sets have own and more complex logic to decide the ordering.`
	`495`	`+ */`
	`496`	`+if (parse->groupingSets)`
	`497`	`+returninfos;`
	`498`	`+`
	`499`	`+/*`
	`500`	`+ * If the path is sorted in some way, try reordering the group keys to`
	`501`	`+ * match the path as much of the ordering as possible. Then thanks to`
	`502`	`+ * incremental sort we would get this sort as cheap as possible.`
	`503`	`+ */`
	`504`	`+if (path->pathkeys&&`
	`505`	`+!pathkeys_contained_in(path->pathkeys,root->group_pathkeys))`
	`506`	`+{`
	`507`	`+intn;`
	`508`	`+`
	`509`	`+n=group_keys_reorder_by_pathkeys(path->pathkeys,&pathkeys,&clauses,`
	`510`	`+root->num_groupby_pathkeys);`
	`511`	`+`
	`512`	`+if (n>0&&`
	`513`	`+(enable_incremental_sort\|\|n==root->num_groupby_pathkeys)&&`
	`514`	`+!pathkeys_are_duplicate(infos,pathkeys))`
	`515`	`+{`
	`516`	`+info=makeNode(PathKeyInfo);`
	`517`	`+info->pathkeys=pathkeys;`
	`518`	`+info->clauses=clauses;`
	`519`	`+`
	`520`	`+infos=lappend(infos,info);`
	`521`	`+}`
	`522`	`+}`
	`523`	`+`
	`524`	`+/*`
	`525`	`+ * Try reordering pathkeys to minimize the sort cost (this time consider`
	`526`	`+ * the ORDER BY clause).`
	`527`	`+ */`
	`528`	`+if (root->sort_pathkeys&&`
	`529`	`+!pathkeys_contained_in(root->sort_pathkeys,root->group_pathkeys))`
	`530`	`+{`
	`531`	`+intn;`
	`532`	`+`
	`533`	`+n=group_keys_reorder_by_pathkeys(root->sort_pathkeys,&pathkeys,`
	`534`	`+&clauses,`
	`535`	`+root->num_groupby_pathkeys);`
	`536`	`+`
	`537`	`+if (n>0&&`
	`538`	`+(enable_incremental_sort\|\|n==list_length(root->sort_pathkeys))&&`
	`539`	`+!pathkeys_are_duplicate(infos,pathkeys))`
	`540`	`+{`
	`541`	`+info=makeNode(PathKeyInfo);`
	`542`	`+info->pathkeys=pathkeys;`
	`543`	`+info->clauses=clauses;`
	`544`	`+`
	`545`	`+infos=lappend(infos,info);`
	`546`	`+}`
	`547`	`+}`
	`548`	`+`
	`549`	`+returninfos;`
	`550`	`+}`
	`551`	`+`
`353`	`552`	`/*`
`354`	`553`	`* pathkeys_count_contained_in`
`355`	`554`	`* Same as pathkeys_contained_in, but also sets length of longest`
`@@ -1939,6 +2138,54 @@ pathkeys_useful_for_ordering(PlannerInfo root, List pathkeys)`
`1939`	`2138`	`returnn_common_pathkeys;`
`1940`	`2139`	`}`
`1941`	`2140`
	`2141`	`+/*`
	`2142`	`+ * pathkeys_useful_for_grouping`
	`2143`	`+ *Count the number of pathkeys that are useful for grouping (instead of`
	`2144`	`+ *explicit sort)`
	`2145`	`+ *`
	`2146`	`+ * Group pathkeys could be reordered to benefit from the ordering. The`
	`2147`	`+ * ordering may not be "complete" and may require incremental sort, but that's`
	`2148`	`+ * fine. So we simply count prefix pathkeys with a matching group key, and`
	`2149`	`+ * stop once we find the first pathkey without a match.`
	`2150`	`+ *`
	`2151`	`+ * So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)`
	`2152`	`+ * pathkeys are useful for grouping, and we might do incremental sort to get`
	`2153`	`+ * path ordered by (a,b,e).`
	`2154`	`+ *`
	`2155`	`+ * This logic is necessary to retain paths with ordering not matching grouping`
	`2156`	`+ * keys directly, without the reordering.`
	`2157`	`+ *`
	`2158`	`+ * Returns the length of pathkey prefix with matching group keys.`
	`2159`	`+ */`
	`2160`	`+staticint`
	`2161`	`+pathkeys_useful_for_grouping(PlannerInforoot,Listpathkeys)`
	`2162`	`+{`
	`2163`	`+ListCell*key;`
	`2164`	`+intn=0;`
	`2165`	`+`
	`2166`	`+/* no special ordering requested for grouping */`
	`2167`	`+if (root->group_pathkeys==NIL)`
	`2168`	`+return0;`
	`2169`	`+`
	`2170`	`+/* unordered path */`
	`2171`	`+if (pathkeys==NIL)`
	`2172`	`+return0;`
	`2173`	`+`
	`2174`	`+/* walk the pathkeys and search for matching group key */`
	`2175`	`+foreach(key,pathkeys)`
	`2176`	`+{`
	`2177`	`+PathKeypathkey= (PathKey)lfirst(key);`
	`2178`	`+`
	`2179`	`+/* no matching group key, we're done */`
	`2180`	`+if (!list_member_ptr(root->group_pathkeys,pathkey))`
	`2181`	`+break;`
	`2182`	`+`
	`2183`	`+n++;`
	`2184`	`+}`
	`2185`	`+`
	`2186`	`+returnn;`
	`2187`	`+}`
	`2188`	`+`
`1942`	`2189`	`/*`
`1943`	`2190`	`* truncate_useless_pathkeys`
`1944`	`2191`	`*Shorten the given pathkey list to just the useful pathkeys.`
`@@ -1953,6 +2200,9 @@ truncate_useless_pathkeys(PlannerInfo *root,`
`1953`	`2200`
`1954`	`2201`	`nuseful=pathkeys_useful_for_merging(root,rel,pathkeys);`
`1955`	`2202`	`nuseful2=pathkeys_useful_for_ordering(root,pathkeys);`
	`2203`	`+if (nuseful2>nuseful)`
	`2204`	`+nuseful=nuseful2;`
	`2205`	`+nuseful2=pathkeys_useful_for_grouping(root,pathkeys);`
`1956`	`2206`	`if (nuseful2>nuseful)`
`1957`	`2207`	`nuseful=nuseful2;`
`1958`	`2208`
`@@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo root, RelOptInfo rel)`
`1988`	`2238`	`{`
`1989`	`2239`	`if (rel->joininfo!=NIL\|\|rel->has_eclass_joins)`
`1990`	`2240`	`return true;/* might be able to use pathkeys for merging */`
	`2241`	`+if (root->group_pathkeys!=NIL)`
	`2242`	`+return true;/* might be able to use pathkeys for grouping */`
`1991`	`2243`	`if (root->query_pathkeys!=NIL)`
`1992`	`2244`	`return true;/* might be able to use them for ordering */`
`1993`	`2245`	`return false;/* definitely useless */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0452b46

File tree

11 files changed

11 files changed

`‎src/backend/optimizer/path/equivclass.c‎`

`‎src/backend/optimizer/path/pathkeys.c‎`

0 commit comments