Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit84c7cef

Browse files
committed
Fix estimate_num_groups to be able to use expression-index statistics
when there is an expressional index matching a GROUP BY item.
1 parent089fb6c commit84c7cef

File tree

1 file changed

+112
-72
lines changed

1 file changed

+112
-72
lines changed

‎src/backend/utils/adt/selfuncs.c

Lines changed: 112 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.165 2004/08/30 02:54:39 momjian Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.166 2004/09/18 19:39:50 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1869,6 +1869,71 @@ mergejoinscansel(Query *root, Node *clause,
18691869
ReleaseVariableStats(rightvar);
18701870
}
18711871

1872+
1873+
/*
1874+
* Helper routine for estimate_num_groups: add an item to a list of
1875+
* GroupVarInfos, but only if it's not known equal to any of the existing
1876+
* entries.
1877+
*/
1878+
typedefstruct
1879+
{
1880+
Node*var;/* might be an expression, not just a Var */
1881+
RelOptInfo*rel;/* relation it belongs to */
1882+
doublendistinct;/* # distinct values */
1883+
}GroupVarInfo;
1884+
1885+
staticList*
1886+
add_unique_group_var(Query*root,List*varinfos,
1887+
Node*var,VariableStatData*vardata)
1888+
{
1889+
GroupVarInfo*varinfo;
1890+
doublendistinct;
1891+
ListCell*lc;
1892+
1893+
ndistinct=get_variable_numdistinct(vardata);
1894+
1895+
/* cannot use foreach here because of possible list_delete */
1896+
lc=list_head(varinfos);
1897+
while (lc)
1898+
{
1899+
varinfo= (GroupVarInfo*)lfirst(lc);
1900+
1901+
/* must advance lc before list_delete possibly pfree's it */
1902+
lc=lnext(lc);
1903+
1904+
/* Drop exact duplicates */
1905+
if (equal(var,varinfo->var))
1906+
returnvarinfos;
1907+
1908+
/*
1909+
* Drop known-equal vars, but only if they belong to different
1910+
* relations (see comments for estimate_num_groups)
1911+
*/
1912+
if (vardata->rel!=varinfo->rel&&
1913+
exprs_known_equal(root,var,varinfo->var))
1914+
{
1915+
if (varinfo->ndistinct <=ndistinct)
1916+
{
1917+
/* Keep older item, forget new one */
1918+
returnvarinfos;
1919+
}
1920+
else
1921+
{
1922+
/* Delete the older item */
1923+
varinfos=list_delete_ptr(varinfos,varinfo);
1924+
}
1925+
}
1926+
}
1927+
1928+
varinfo= (GroupVarInfo*)palloc(sizeof(GroupVarInfo));
1929+
1930+
varinfo->var=var;
1931+
varinfo->rel=vardata->rel;
1932+
varinfo->ndistinct=ndistinct;
1933+
varinfos=lappend(varinfos,varinfo);
1934+
returnvarinfos;
1935+
}
1936+
18721937
/*
18731938
* estimate_num_groups- Estimate number of groups in a grouped query
18741939
*
@@ -1900,6 +1965,9 @@ mergejoinscansel(Query *root, Node *clause,
19001965
*increase the number of distinct values (unless it is volatile,
19011966
*which we consider unlikely for grouping), but it probably won't
19021967
*reduce the number of distinct values much either.
1968+
*As a special case, if a GROUP BY expression can be matched to an
1969+
*expressional index for which we have statistics, then we treat the
1970+
*whole expression as though it were just a Var.
19031971
*2.If the list contains Vars of different relations that are known equal
19041972
*due to equijoin clauses, then drop all but one of the Vars from each
19051973
*known-equal set, keeping the one with smallest estimated # of values
@@ -1926,25 +1994,44 @@ mergejoinscansel(Query *root, Node *clause,
19261994
double
19271995
estimate_num_groups(Query*root,List*groupExprs,doubleinput_rows)
19281996
{
1929-
List*allvars=NIL;
19301997
List*varinfos=NIL;
19311998
doublenumdistinct;
19321999
ListCell*l;
1933-
typedefstruct
1934-
{/* varinfos is a List of these */
1935-
Var*var;
1936-
doublendistinct;
1937-
}MyVarInfo;
19382000

19392001
/* We should not be called unless query has GROUP BY (or DISTINCT) */
19402002
Assert(groupExprs!=NIL);
19412003

1942-
/* Step 1: get the unique Vars used */
2004+
/*
2005+
* Steps 1/2: find the unique Vars used, treating an expression as a Var
2006+
* if we can find stats for it. For each one, record the statistical
2007+
* estimate of number of distinct values (total in its table, without
2008+
* regard for filtering).
2009+
*/
19432010
foreach(l,groupExprs)
19442011
{
19452012
Node*groupexpr= (Node*)lfirst(l);
2013+
VariableStatDatavardata;
19462014
List*varshere;
2015+
ListCell*l2;
2016+
2017+
/*
2018+
* If examine_variable is able to deduce anything about the GROUP BY
2019+
* expression, treat it as a single variable even if it's really more
2020+
* complicated.
2021+
*/
2022+
examine_variable(root,groupexpr,0,&vardata);
2023+
if (vardata.statsTuple!=NULL||vardata.isunique)
2024+
{
2025+
varinfos=add_unique_group_var(root,varinfos,
2026+
groupexpr,&vardata);
2027+
ReleaseVariableStats(vardata);
2028+
continue;
2029+
}
2030+
ReleaseVariableStats(vardata);
19472031

2032+
/*
2033+
* Else pull out the component Vars
2034+
*/
19482035
varshere=pull_var_clause(groupexpr, false);
19492036

19502037
/*
@@ -1959,70 +2046,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
19592046
returninput_rows;
19602047
continue;
19612048
}
1962-
allvars=list_concat(allvars,varshere);
1963-
}
1964-
1965-
/* If now no Vars, we must have an all-constant GROUP BY list. */
1966-
if (allvars==NIL)
1967-
return1.0;
1968-
1969-
/* Use list_union() to discard duplicates */
1970-
allvars=list_union(NIL,allvars);
1971-
1972-
/*
1973-
* Step 2: acquire statistical estimate of number of distinct values
1974-
* of each Var (total in its table, without regard for filtering).
1975-
* Also, detect known-equal Vars and discard the ones we don't want.
1976-
*/
1977-
foreach(l,allvars)
1978-
{
1979-
Var*var= (Var*)lfirst(l);
1980-
VariableStatDatavardata;
1981-
doublendistinct;
1982-
boolkeep= true;
1983-
ListCell*l2;
1984-
1985-
examine_variable(root, (Node*)var,0,&vardata);
1986-
ndistinct=get_variable_numdistinct(&vardata);
1987-
ReleaseVariableStats(vardata);
1988-
1989-
/* cannot use foreach here because of possible list_delete */
1990-
l2=list_head(varinfos);
1991-
while (l2)
1992-
{
1993-
MyVarInfo*varinfo= (MyVarInfo*)lfirst(l2);
1994-
1995-
/* must advance l2 before list_delete possibly pfree's it */
1996-
l2=lnext(l2);
1997-
1998-
if (var->varno!=varinfo->var->varno&&
1999-
exprs_known_equal(root, (Node*)var, (Node*)varinfo->var))
2000-
{
2001-
/* Found a match */
2002-
if (varinfo->ndistinct <=ndistinct)
2003-
{
2004-
/* Keep older item, forget new one */
2005-
keep= false;
2006-
break;
2007-
}
2008-
else
2009-
{
2010-
/* Delete the older item */
2011-
varinfos=list_delete_ptr(varinfos,varinfo);
2012-
}
2013-
}
2014-
}
20152049

2016-
if (keep)
2050+
/*
2051+
* Else add variables to varinfos list
2052+
*/
2053+
foreach(l2,varshere)
20172054
{
2018-
MyVarInfo*varinfo= (MyVarInfo*)palloc(sizeof(MyVarInfo));
2055+
Node*var= (Node*)lfirst(l2);
20192056

2020-
varinfo->var=var;
2021-
varinfo->ndistinct=ndistinct;
2022-
varinfos=lcons(varinfo,varinfos);
2057+
examine_variable(root,var,0,&vardata);
2058+
varinfos=add_unique_group_var(root,varinfos,var,&vardata);
2059+
ReleaseVariableStats(vardata);
20232060
}
20242061
}
20252062

2063+
/* If now no Vars, we must have an all-constant GROUP BY list. */
2064+
if (varinfos==NIL)
2065+
return1.0;
2066+
20262067
/*
20272068
* Steps 3/4: group Vars by relation and estimate total numdistinct.
20282069
*
@@ -2031,25 +2072,24 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
20312072
* these Vars from the newvarinfos list for the next iteration. This
20322073
* is the easiest way to group Vars of same rel together.
20332074
*/
2034-
Assert(varinfos!=NIL);
20352075
numdistinct=1.0;
20362076

20372077
do
20382078
{
2039-
MyVarInfo*varinfo1= (MyVarInfo*)linitial(varinfos);
2040-
RelOptInfo*rel=find_base_rel(root,varinfo1->var->varno);
2079+
GroupVarInfo*varinfo1= (GroupVarInfo*)linitial(varinfos);
2080+
RelOptInfo*rel=varinfo1->rel;
20412081
doublereldistinct=varinfo1->ndistinct;
20422082
List*newvarinfos=NIL;
20432083

20442084
/*
2045-
* Get thelargestnumdistinctestimate of the Vars for this rel.
2085+
* Get theproduct ofnumdistinctestimates of the Vars for this rel.
20462086
* Also, construct new varinfos list of remaining Vars.
20472087
*/
20482088
for_each_cell(l,lnext(list_head(varinfos)))
20492089
{
2050-
MyVarInfo*varinfo2= (MyVarInfo*)lfirst(l);
2090+
GroupVarInfo*varinfo2= (GroupVarInfo*)lfirst(l);
20512091

2052-
if (varinfo2->var->varno==varinfo1->var->varno)
2092+
if (varinfo2->rel==varinfo1->rel)
20532093
reldistinct *=varinfo2->ndistinct;
20542094
else
20552095
{

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp