Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0bf2587

Browse files
committed
Improve planner's estimation of the space needed for HashAgg plans:
look at the actual aggregate transition datatypes and the actual overheadneeded by nodeAgg.c, instead of using pessimistic round numbers.Per a discussion with Michael Tiemann.
1 parentc3a4e22 commit0bf2587

File tree

5 files changed

+140
-70
lines changed

5 files changed

+140
-70
lines changed

‎src/backend/executor/nodeAgg.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
* Portions Copyright (c) 1994, Regents of the University of California
4646
*
4747
* IDENTIFICATION
48-
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.127 2005/01/27 23:42:18 tgl Exp $
48+
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $
4949
*
5050
*-------------------------------------------------------------------------
5151
*/
@@ -604,6 +604,26 @@ build_hash_table(AggState *aggstate)
604604
tmpmem);
605605
}
606606

607+
/*
608+
* Estimate per-hash-table-entry overhead for the planner.
609+
*
610+
* Note that the estimate does not include space for pass-by-reference
611+
* transition data values.
612+
*/
613+
Size
614+
hash_agg_entry_size(intnumAggs)
615+
{
616+
Sizeentrysize;
617+
618+
/* This must match build_hash_table */
619+
entrysize=sizeof(AggHashEntryData)+
620+
(numAggs-1)*sizeof(AggStatePerGroupData);
621+
/* Account for hashtable overhead */
622+
entrysize+=2*sizeof(void*);
623+
entrysize=MAXALIGN(entrysize);
624+
returnentrysize;
625+
}
626+
607627
/*
608628
* Find or create a hashtable entry for the tuple group containing the
609629
* given tuple.

‎src/backend/optimizer/plan/planner.c

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -20,6 +20,7 @@
2020
#include"catalog/pg_operator.h"
2121
#include"catalog/pg_type.h"
2222
#include"executor/executor.h"
23+
#include"executor/nodeAgg.h"
2324
#include"miscadmin.h"
2425
#include"nodes/makefuncs.h"
2526
#ifdefOPTIMIZER_DEBUG
@@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
660661
Path*sorted_path;
661662
doubledNumGroups=0;
662663
longnumGroups=0;
663-
intnumAggs=0;
664+
AggClauseCountsagg_counts;
664665
intnumGroupCols=list_length(parse->groupClause);
665666
booluse_hashed_grouping= false;
666667

668+
MemSet(&agg_counts,0,sizeof(AggClauseCounts));
669+
667670
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
668671
tlist=preprocess_targetlist(tlist,
669672
parse->commandType,
@@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
752755
* the aggregate semantics (eg, producing only one output row).
753756
*/
754757
if (parse->hasAggs)
755-
numAggs=count_agg_clause((Node*)tlist)+
756-
count_agg_clause(parse->havingQual);
758+
{
759+
count_agg_clauses((Node*)tlist,&agg_counts);
760+
count_agg_clauses(parse->havingQual,&agg_counts);
761+
}
757762

758763
/*
759764
* Figure out whether we need a sorted result from query_planner.
@@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
990995
*/
991996
if (!enable_hashagg|| !hash_safe_grouping(parse))
992997
use_hashed_grouping= false;
993-
elseif (parse->hasAggs&&
994-
(contain_distinct_agg_clause((Node*)tlist)||
995-
contain_distinct_agg_clause(parse->havingQual)))
998+
elseif (agg_counts.numDistinctAggs!=0)
996999
use_hashed_grouping= false;
9971000
else
9981001
{
@@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
10031006
* the need for sorted input is usually a win, the fact
10041007
* that the output won't be sorted may be a loss; so we
10051008
* need to do an actual cost comparison.
1006-
*
1007-
* In most cases we have no good way to estimate the size of
1008-
* the transition value needed by an aggregate;
1009-
* arbitrarily assume it is 100 bytes.Also set the
1010-
* overhead per hashtable entry at 64 bytes.
10111009
*/
1012-
inthashentrysize=cheapest_path_width+64+numAggs*100;
1010+
Sizehashentrysize;
1011+
1012+
/* Estimate per-hash-entry space at tuple width... */
1013+
hashentrysize=cheapest_path_width;
1014+
/* plus space for pass-by-ref transition values... */
1015+
hashentrysize+=agg_counts.transitionSpace;
1016+
/* plus the per-hash-entry overhead */
1017+
hashentrysize+=hash_agg_entry_size(agg_counts.numAggs);
10131018

10141019
if (hashentrysize*dNumGroups <=work_mem*1024L)
10151020
{
@@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
10301035
Pathsorted_p;
10311036

10321037
cost_agg(&hashed_p,parse,
1033-
AGG_HASHED,numAggs,
1038+
AGG_HASHED,agg_counts.numAggs,
10341039
numGroupCols,dNumGroups,
10351040
cheapest_path->startup_cost,
10361041
cheapest_path->total_cost,
@@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
10651070
}
10661071
if (parse->hasAggs)
10671072
cost_agg(&sorted_p,parse,
1068-
AGG_SORTED,numAggs,
1073+
AGG_SORTED,agg_counts.numAggs,
10691074
numGroupCols,dNumGroups,
10701075
sorted_p.startup_cost,
10711076
sorted_p.total_cost,
@@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
12021207
numGroupCols,
12031208
groupColIdx,
12041209
numGroups,
1205-
numAggs,
1210+
agg_counts.numAggs,
12061211
result_plan);
12071212
/* Hashed aggregation produces randomly-ordered results */
12081213
current_pathkeys=NIL;
@@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
12441249
numGroupCols,
12451250
groupColIdx,
12461251
numGroups,
1247-
numAggs,
1252+
agg_counts.numAggs,
12481253
result_plan);
12491254
}
12501255
else

‎src/backend/optimizer/util/clauses.c

Lines changed: 85 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.186 2004/12/31 22:00:23 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.187 2005/01/28 19:34:07 tgl Exp $
1212
*
1313
* HISTORY
1414
* AUTHORDATEMAJOR EVENT
@@ -19,6 +19,7 @@
1919

2020
#include"postgres.h"
2121

22+
#include"catalog/pg_aggregate.h"
2223
#include"catalog/pg_language.h"
2324
#include"catalog/pg_proc.h"
2425
#include"catalog/pg_type.h"
@@ -33,6 +34,7 @@
3334
#include"optimizer/var.h"
3435
#include"parser/analyze.h"
3536
#include"parser/parse_clause.h"
37+
#include"parser/parse_coerce.h"
3638
#include"parser/parse_expr.h"
3739
#include"tcop/tcopprot.h"
3840
#include"utils/acl.h"
@@ -58,8 +60,7 @@ typedef struct
5860
}substitute_actual_parameters_context;
5961

6062
staticboolcontain_agg_clause_walker(Node*node,void*context);
61-
staticboolcontain_distinct_agg_clause_walker(Node*node,void*context);
62-
staticboolcount_agg_clause_walker(Node*node,int*count);
63+
staticboolcount_agg_clauses_walker(Node*node,AggClauseCounts*counts);
6364
staticboolexpression_returns_set_walker(Node*node,void*context);
6465
staticboolcontain_subplans_walker(Node*node,void*context);
6566
staticboolcontain_mutable_functions_walker(Node*node,void*context);
@@ -358,71 +359,108 @@ contain_agg_clause_walker(Node *node, void *context)
358359
}
359360

360361
/*
361-
* contain_distinct_agg_clause
362-
* Recursively search for DISTINCT Aggref nodes within a clause.
362+
* count_agg_clauses
363+
* Recursively count the Aggref nodes in an expression tree.
364+
*
365+
* Note: this also checks for nested aggregates, which are an error.
363366
*
364-
* Returns true if any DISTINCT aggregate found.
367+
* We not only count the nodes, but attempt to estimate the total space
368+
* needed for their transition state values if all are evaluated in parallel
369+
* (as would be done in a HashAgg plan). See AggClauseCounts for the exact
370+
* set of statistics returned.
371+
*
372+
* NOTE that the counts are ADDED to those already in *counts ... so the
373+
* caller is responsible for zeroing the struct initially.
365374
*
366375
* This does not descend into subqueries, and so should be used only after
367376
* reduction of sublinks to subplans, or in contexts where it's known there
368377
* are no subqueries. There mustn't be outer-aggregate references either.
369378
*/
370-
bool
371-
contain_distinct_agg_clause(Node*clause)
379+
void
380+
count_agg_clauses(Node*clause,AggClauseCounts*counts)
372381
{
373-
returncontain_distinct_agg_clause_walker(clause,NULL);
382+
/* no setup needed */
383+
count_agg_clauses_walker(clause,counts);
374384
}
375385

376386
staticbool
377-
contain_distinct_agg_clause_walker(Node*node,void*context)
387+
count_agg_clauses_walker(Node*node,AggClauseCounts*counts)
378388
{
379389
if (node==NULL)
380390
return false;
381391
if (IsA(node,Aggref))
382392
{
383-
Assert(((Aggref*)node)->agglevelsup==0);
384-
if (((Aggref*)node)->aggdistinct)
385-
return true;/* abort the tree traversal and return
386-
* true */
387-
}
388-
Assert(!IsA(node,SubLink));
389-
returnexpression_tree_walker(node,contain_distinct_agg_clause_walker,context);
390-
}
393+
Aggref*aggref= (Aggref*)node;
394+
OidinputType;
395+
HeapTupleaggTuple;
396+
Form_pg_aggregateaggform;
397+
Oidaggtranstype;
398+
399+
Assert(aggref->agglevelsup==0);
400+
counts->numAggs++;
401+
if (aggref->aggdistinct)
402+
counts->numDistinctAggs++;
403+
404+
inputType=exprType((Node*)aggref->target);
405+
406+
/* fetch aggregate transition datatype from pg_aggregate */
407+
aggTuple=SearchSysCache(AGGFNOID,
408+
ObjectIdGetDatum(aggref->aggfnoid),
409+
0,0,0);
410+
if (!HeapTupleIsValid(aggTuple))
411+
elog(ERROR,"cache lookup failed for aggregate %u",
412+
aggref->aggfnoid);
413+
aggform= (Form_pg_aggregate)GETSTRUCT(aggTuple);
414+
aggtranstype=aggform->aggtranstype;
415+
ReleaseSysCache(aggTuple);
416+
417+
/* resolve actual type of transition state, if polymorphic */
418+
if (aggtranstype==ANYARRAYOID||aggtranstype==ANYELEMENTOID)
419+
{
420+
/* have to fetch the agg's declared input type... */
421+
Oidagg_arg_types[FUNC_MAX_ARGS];
422+
intagg_nargs;
423+
424+
(void)get_func_signature(aggref->aggfnoid,
425+
agg_arg_types,&agg_nargs);
426+
Assert(agg_nargs==1);
427+
aggtranstype=resolve_generic_type(aggtranstype,
428+
inputType,
429+
agg_arg_types[0]);
430+
}
391431

392-
/*
393-
* count_agg_clause
394-
* Recursively count the Aggref nodes in an expression tree.
395-
*
396-
* Note: this also checks for nested aggregates, which are an error.
397-
*
398-
* This does not descend into subqueries, and so should be used only after
399-
* reduction of sublinks to subplans, or in contexts where it's known there
400-
* are no subqueries. There mustn't be outer-aggregate references either.
401-
*/
402-
int
403-
count_agg_clause(Node*clause)
404-
{
405-
intresult=0;
432+
/*
433+
* If the transition type is pass-by-value then it doesn't add
434+
* anything to the required size of the hashtable. If it is
435+
* pass-by-reference then we have to add the estimated size of
436+
* the value itself, plus palloc overhead.
437+
*/
438+
if (!get_typbyval(aggtranstype))
439+
{
440+
int32aggtranstypmod;
441+
int32avgwidth;
406442

407-
count_agg_clause_walker(clause,&result);
408-
returnresult;
409-
}
443+
/*
444+
* If transition state is of same type as input, assume it's the
445+
* same typmod (same width) as well. This works for cases like
446+
* MAX/MIN and is probably somewhat reasonable otherwise.
447+
*/
448+
if (aggtranstype==inputType)
449+
aggtranstypmod=exprTypmod((Node*)aggref->target);
450+
else
451+
aggtranstypmod=-1;
410452

411-
staticbool
412-
count_agg_clause_walker(Node*node,int*count)
413-
{
414-
if (node==NULL)
415-
return false;
416-
if (IsA(node,Aggref))
417-
{
418-
Assert(((Aggref*)node)->agglevelsup==0);
419-
(*count)++;
453+
avgwidth=get_typavgwidth(aggtranstype,aggtranstypmod);
454+
avgwidth=MAXALIGN(avgwidth);
455+
456+
counts->transitionSpace+=avgwidth+2*sizeof(void*);
457+
}
420458

421459
/*
422460
* Complain if the aggregate's argument contains any aggregates;
423461
* nested agg functions are semantically nonsensical.
424462
*/
425-
if (contain_agg_clause((Node*)((Aggref*)node)->target))
463+
if (contain_agg_clause((Node*)aggref->target))
426464
ereport(ERROR,
427465
(errcode(ERRCODE_GROUPING_ERROR),
428466
errmsg("aggregate function calls may not be nested")));
@@ -433,8 +471,8 @@ count_agg_clause_walker(Node *node, int *count)
433471
return false;
434472
}
435473
Assert(!IsA(node,SubLink));
436-
returnexpression_tree_walker(node,count_agg_clause_walker,
437-
(void*)count);
474+
returnexpression_tree_walker(node,count_agg_clauses_walker,
475+
(void*)counts);
438476
}
439477

440478

‎src/include/executor/nodeAgg.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/executor/nodeAgg.h,v 1.23 2004/12/31 22:03:29 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/include/executor/nodeAgg.h,v 1.24 2005/01/28 19:34:18 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,6 +23,8 @@ extern TupleTableSlot *ExecAgg(AggState *node);
2323
externvoidExecEndAgg(AggState*node);
2424
externvoidExecReScanAgg(AggState*node,ExprContext*exprCtxt);
2525

26+
externSizehash_agg_entry_size(intnumAggs);
27+
2628
externDatumaggregate_dummy(PG_FUNCTION_ARGS);
2729

2830
#endif/* NODEAGG_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp