Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0452b46

Browse files
committed
Explore alternative orderings of group-by pathkeys during optimization.
When evaluating a query with a multi-column GROUP BY clause, we can minimizesort operations or avoid them if we synchronize the order of GROUP BY clauseswith the ORDER BY sort clause or sort order, which comes from the underlyingquery tree. Grouping does not imply any ordering, so we can comparethe keys in arbitrary order, and a Hash Agg leverages this. But for Group Agg,we simply compared keys in the order specified in the query. This commitexplores alternative ordering of the keys, trying to find a cheaper one.The ordering of group keys may interact with other parts of the query, some ofwhich may not be known while planning the grouping. For example, there may bean explicit ORDER BY clause or some other ordering-dependent operation higher upin the query, and using the same ordering may allow using either incrementalsort or even eliminating the sort entirely.The patch always keeps the ordering specified in the query, assuming the usermight have additional insights.This introduces a new GUC enable_group_by_reordering so that the optimizationmay be disabled if needed.Discussion:https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ruAuthor: Andrei Lepikhov, Teodor SigaevReviewed-by: Tomas Vondra, Claudio Freire, Gavin Flower, Dmitry DolgovReviewed-by: Robert Haas, Pavel Borisov, David Rowley, Zhihong YuReviewed-by: Tom Lane, Alexander Korotkov, Richard Guo, Alena Rybakina
1 parent7ab80ac commit0452b46

File tree

11 files changed

+770
-223
lines changed

11 files changed

+770
-223
lines changed

‎src/backend/optimizer/path/equivclass.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
652652

653653
if (opcintype==cur_em->em_datatype&&
654654
equal(expr,cur_em->em_expr))
655-
returncur_ec;/* Match! */
655+
{
656+
/*
657+
* Match!
658+
*
659+
* Copy the sortref if it wasn't set yet. That may happen if
660+
* the ec was constructed from a WHERE clause, i.e. it doesn't
661+
* have a target reference at all.
662+
*/
663+
if (cur_ec->ec_sortref==0&&sortref>0)
664+
cur_ec->ec_sortref=sortref;
665+
returncur_ec;
666+
}
656667
}
657668
}
658669

‎src/backend/optimizer/path/pathkeys.c

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@
2222
#include"nodes/makefuncs.h"
2323
#include"nodes/nodeFuncs.h"
2424
#include"nodes/plannodes.h"
25+
#include"optimizer/cost.h"
2526
#include"optimizer/optimizer.h"
2627
#include"optimizer/pathnode.h"
2728
#include"optimizer/paths.h"
2829
#include"partitioning/partbounds.h"
2930
#include"utils/lsyscache.h"
3031

32+
/* Consider reordering of GROUP BY keys? */
33+
boolenable_group_by_reordering= true;
3134

3235
staticboolpathkey_is_redundant(PathKey*new_pathkey,List*pathkeys);
3336
staticboolmatches_boolean_partition_clause(RestrictInfo*rinfo,
@@ -350,6 +353,202 @@ pathkeys_contained_in(List *keys1, List *keys2)
350353
return false;
351354
}
352355

356+
/*
357+
* group_keys_reorder_by_pathkeys
358+
*Reorder GROUP BY keys to match the input pathkeys.
359+
*
360+
* Function returns new lists (pathkeys and clauses), original GROUP BY lists
361+
* stay untouched.
362+
*
363+
* Returns the number of GROUP BY keys with a matching pathkey.
364+
*/
365+
staticint
366+
group_keys_reorder_by_pathkeys(List*pathkeys,List**group_pathkeys,
367+
List**group_clauses,
368+
intnum_groupby_pathkeys)
369+
{
370+
List*new_group_pathkeys=NIL,
371+
*new_group_clauses=NIL;
372+
ListCell*lc;
373+
intn;
374+
375+
if (pathkeys==NIL||*group_pathkeys==NIL)
376+
return0;
377+
378+
/*
379+
* Walk the pathkeys (determining ordering of the input path) and see if
380+
* there's a matching GROUP BY key. If we find one, we append it to the
381+
* list, and do the same for the clauses.
382+
*
383+
* Once we find the first pathkey without a matching GROUP BY key, the
384+
* rest of the pathkeys are useless and can't be used to evaluate the
385+
* grouping, so we abort the loop and ignore the remaining pathkeys.
386+
*/
387+
foreach(lc,pathkeys)
388+
{
389+
PathKey*pathkey= (PathKey*)lfirst(lc);
390+
SortGroupClause*sgc;
391+
392+
/*
393+
* Pathkeys are built in a way that allows simply comparing pointers.
394+
* Give up if we can't find the matching pointer. Also give up if
395+
* there is no sortclause reference for some reason.
396+
*/
397+
if (foreach_current_index(lc) >=num_groupby_pathkeys||
398+
!list_member_ptr(*group_pathkeys,pathkey)||
399+
pathkey->pk_eclass->ec_sortref==0)
400+
break;
401+
402+
/*
403+
* Since 1349d27 pathkey coming from underlying node can be in the
404+
* root->group_pathkeys but not in the processed_groupClause. So, we
405+
* should be careful here.
406+
*/
407+
sgc=get_sortgroupref_clause_noerr(pathkey->pk_eclass->ec_sortref,
408+
*group_clauses);
409+
if (!sgc)
410+
/* The grouping clause does not cover this pathkey */
411+
break;
412+
413+
/*
414+
* Sort group clause should have an ordering operator as long as there
415+
* is an associated pathkey.
416+
*/
417+
Assert(OidIsValid(sgc->sortop));
418+
419+
new_group_pathkeys=lappend(new_group_pathkeys,pathkey);
420+
new_group_clauses=lappend(new_group_clauses,sgc);
421+
}
422+
423+
/* remember the number of pathkeys with a matching GROUP BY key */
424+
n=list_length(new_group_pathkeys);
425+
426+
/* append the remaining group pathkeys (will be treated as not sorted) */
427+
*group_pathkeys=list_concat_unique_ptr(new_group_pathkeys,
428+
*group_pathkeys);
429+
*group_clauses=list_concat_unique_ptr(new_group_clauses,
430+
*group_clauses);
431+
432+
returnn;
433+
}
434+
435+
/*
436+
* pathkeys_are_duplicate
437+
*Check if give pathkeys are already contained the list of
438+
*PathKeyInfo's.
439+
*/
440+
staticbool
441+
pathkeys_are_duplicate(List*infos,List*pathkeys)
442+
{
443+
ListCell*lc;
444+
445+
foreach(lc,infos)
446+
{
447+
PathKeyInfo*info=lfirst_node(PathKeyInfo,lc);
448+
449+
if (compare_pathkeys(pathkeys,info->pathkeys)==PATHKEYS_EQUAL)
450+
return true;
451+
}
452+
return false;
453+
}
454+
455+
/*
456+
* get_useful_group_keys_orderings
457+
*Determine which orderings of GROUP BY keys are potentially interesting.
458+
*
459+
* Returns a list of PathKeyInfo items, each representing an interesting
460+
* ordering of GROUP BY keys. Each item stores pathkeys and clauses in the
461+
* matching order.
462+
*
463+
* The function considers (and keeps) multiple GROUP BY orderings:
464+
*
465+
* - the original ordering, as specified by the GROUP BY clause,
466+
* - GROUP BY keys reordered to match 'path' ordering (as much as possible),
467+
* - GROUP BY keys to match target ORDER BY clause (as much as possible).
468+
*/
469+
List*
470+
get_useful_group_keys_orderings(PlannerInfo*root,Path*path)
471+
{
472+
Query*parse=root->parse;
473+
List*infos=NIL;
474+
PathKeyInfo*info;
475+
476+
List*pathkeys=root->group_pathkeys;
477+
List*clauses=root->processed_groupClause;
478+
479+
/* always return at least the original pathkeys/clauses */
480+
info=makeNode(PathKeyInfo);
481+
info->pathkeys=pathkeys;
482+
info->clauses=clauses;
483+
infos=lappend(infos,info);
484+
485+
/*
486+
* Should we try generating alternative orderings of the group keys? If
487+
* not, we produce only the order specified in the query, i.e. the
488+
* optimization is effectively disabled.
489+
*/
490+
if (!enable_group_by_reordering)
491+
returninfos;
492+
493+
/*
494+
* Grouping sets have own and more complex logic to decide the ordering.
495+
*/
496+
if (parse->groupingSets)
497+
returninfos;
498+
499+
/*
500+
* If the path is sorted in some way, try reordering the group keys to
501+
* match the path as much of the ordering as possible. Then thanks to
502+
* incremental sort we would get this sort as cheap as possible.
503+
*/
504+
if (path->pathkeys&&
505+
!pathkeys_contained_in(path->pathkeys,root->group_pathkeys))
506+
{
507+
intn;
508+
509+
n=group_keys_reorder_by_pathkeys(path->pathkeys,&pathkeys,&clauses,
510+
root->num_groupby_pathkeys);
511+
512+
if (n>0&&
513+
(enable_incremental_sort||n==root->num_groupby_pathkeys)&&
514+
!pathkeys_are_duplicate(infos,pathkeys))
515+
{
516+
info=makeNode(PathKeyInfo);
517+
info->pathkeys=pathkeys;
518+
info->clauses=clauses;
519+
520+
infos=lappend(infos,info);
521+
}
522+
}
523+
524+
/*
525+
* Try reordering pathkeys to minimize the sort cost (this time consider
526+
* the ORDER BY clause).
527+
*/
528+
if (root->sort_pathkeys&&
529+
!pathkeys_contained_in(root->sort_pathkeys,root->group_pathkeys))
530+
{
531+
intn;
532+
533+
n=group_keys_reorder_by_pathkeys(root->sort_pathkeys,&pathkeys,
534+
&clauses,
535+
root->num_groupby_pathkeys);
536+
537+
if (n>0&&
538+
(enable_incremental_sort||n==list_length(root->sort_pathkeys))&&
539+
!pathkeys_are_duplicate(infos,pathkeys))
540+
{
541+
info=makeNode(PathKeyInfo);
542+
info->pathkeys=pathkeys;
543+
info->clauses=clauses;
544+
545+
infos=lappend(infos,info);
546+
}
547+
}
548+
549+
returninfos;
550+
}
551+
353552
/*
354553
* pathkeys_count_contained_in
355554
* Same as pathkeys_contained_in, but also sets length of longest
@@ -1939,6 +2138,54 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
19392138
returnn_common_pathkeys;
19402139
}
19412140

2141+
/*
2142+
* pathkeys_useful_for_grouping
2143+
*Count the number of pathkeys that are useful for grouping (instead of
2144+
*explicit sort)
2145+
*
2146+
* Group pathkeys could be reordered to benefit from the ordering. The
2147+
* ordering may not be "complete" and may require incremental sort, but that's
2148+
* fine. So we simply count prefix pathkeys with a matching group key, and
2149+
* stop once we find the first pathkey without a match.
2150+
*
2151+
* So e.g. with pathkeys (a,b,c) and group keys (a,b,e) this determines (a,b)
2152+
* pathkeys are useful for grouping, and we might do incremental sort to get
2153+
* path ordered by (a,b,e).
2154+
*
2155+
* This logic is necessary to retain paths with ordering not matching grouping
2156+
* keys directly, without the reordering.
2157+
*
2158+
* Returns the length of pathkey prefix with matching group keys.
2159+
*/
2160+
staticint
2161+
pathkeys_useful_for_grouping(PlannerInfo*root,List*pathkeys)
2162+
{
2163+
ListCell*key;
2164+
intn=0;
2165+
2166+
/* no special ordering requested for grouping */
2167+
if (root->group_pathkeys==NIL)
2168+
return0;
2169+
2170+
/* unordered path */
2171+
if (pathkeys==NIL)
2172+
return0;
2173+
2174+
/* walk the pathkeys and search for matching group key */
2175+
foreach(key,pathkeys)
2176+
{
2177+
PathKey*pathkey= (PathKey*)lfirst(key);
2178+
2179+
/* no matching group key, we're done */
2180+
if (!list_member_ptr(root->group_pathkeys,pathkey))
2181+
break;
2182+
2183+
n++;
2184+
}
2185+
2186+
returnn;
2187+
}
2188+
19422189
/*
19432190
* truncate_useless_pathkeys
19442191
*Shorten the given pathkey list to just the useful pathkeys.
@@ -1953,6 +2200,9 @@ truncate_useless_pathkeys(PlannerInfo *root,
19532200

19542201
nuseful=pathkeys_useful_for_merging(root,rel,pathkeys);
19552202
nuseful2=pathkeys_useful_for_ordering(root,pathkeys);
2203+
if (nuseful2>nuseful)
2204+
nuseful=nuseful2;
2205+
nuseful2=pathkeys_useful_for_grouping(root,pathkeys);
19562206
if (nuseful2>nuseful)
19572207
nuseful=nuseful2;
19582208

@@ -1988,6 +2238,8 @@ has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
19882238
{
19892239
if (rel->joininfo!=NIL||rel->has_eclass_joins)
19902240
return true;/* might be able to use pathkeys for merging */
2241+
if (root->group_pathkeys!=NIL)
2242+
return true;/* might be able to use pathkeys for grouping */
19912243
if (root->query_pathkeys!=NIL)
19922244
return true;/* might be able to use them for ordering */
19932245
return false;/* definitely useless */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp