Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9ea2614

Browse files
committed
Stabilise incremental sort cost calculation.
Carefully identify an expression that can represent the path key on sortestimation. Columns may have different distinct estimations that can triggerwrong cost estimations and choice of sort strategy.Sorting has only pathkeys as input for the cost estimation. This patch, insteadof a blind choice of the first equivalence class member, attempts to find anexpression that chooses the most negligible ndistinct value.TODO: Filtering expressions from outer query subtrees is not a big deal. Butin that case, information about underlying relids is necessary for the costcalculation routine that causes API change. So, just wait for some time forthe community decision.Add into EquivalenceMember the number of distinct values - em_ndistinct.It may be additionally used later in groups number estimations.Task: 9578.Tags: optimized_group_by.
1 parentb8aa44f commit9ea2614

File tree

5 files changed

+152
-5
lines changed

5 files changed

+152
-5
lines changed

‎src/backend/optimizer/path/costsize.c

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ static int32 get_expr_width(PlannerInfo *root, const Node *expr);
192192
staticdoublerelation_byte_size(doubletuples,intwidth);
193193
staticdoublepage_size(doubletuples,intwidth);
194194
staticdoubleget_parallel_divisor(Path*path);
195+
staticEquivalenceMember*identify_sort_ecmember(PlannerInfo*root,
196+
PathKey*key);
195197

196198

197199
/*
@@ -2016,22 +2018,21 @@ cost_incremental_sort(Path *path,
20162018
*/
20172019
foreach(l,pathkeys)
20182020
{
2019-
PathKey*key= (PathKey*)lfirst(l);
2020-
EquivalenceMember*member= (EquivalenceMember*)
2021-
linitial(key->pk_eclass->ec_members);
2021+
PathKey*key= (PathKey*)lfirst(l);
2022+
EquivalenceMember*em=identify_sort_ecmember(root,key);
20222023

20232024
/*
20242025
* Check if the expression contains Var with "varno 0" so that we
20252026
* don't call estimate_num_groups in that case.
20262027
*/
2027-
if (bms_is_member(0,pull_varnos(root, (Node*)member->em_expr)))
2028+
if (bms_is_member(0,pull_varnos(root, (Node*)em->em_expr)))
20282029
{
20292030
unknown_varno= true;
20302031
break;
20312032
}
20322033

20332034
/* expression not containing any Vars with "varno 0" */
2034-
presortedExprs=lappend(presortedExprs,member->em_expr);
2035+
presortedExprs=lappend(presortedExprs,em->em_expr);
20352036

20362037
if (foreach_current_index(l)+1 >=presorted_keys)
20372038
break;
@@ -6491,3 +6492,64 @@ compute_gather_rows(Path *path)
64916492

64926493
returnclamp_row_est(path->rows*get_parallel_divisor(path));
64936494
}
6495+
6496+
/*
6497+
* Find suitable expression from the list of equalence members.
6498+
*
6499+
* TODO: consider RelOptInfo's relids - the level of ec_members. Right now we
6500+
* don't have tests enough to prove this code.
6501+
*/
6502+
staticEquivalenceMember*
6503+
identify_sort_ecmember(PlannerInfo*root,PathKey*key)
6504+
{
6505+
EquivalenceMember*candidate=linitial(key->pk_eclass->ec_members);
6506+
6507+
if (root==NULL)
6508+
/* Fast path */
6509+
returncandidate;
6510+
6511+
foreach_node(EquivalenceMember,em,key->pk_eclass->ec_members)
6512+
{
6513+
VariableStatDatavardata;
6514+
6515+
if (em->em_ndistinct==0.)
6516+
/* Nothing helpful */
6517+
continue;
6518+
6519+
if (em->em_is_child||em->em_is_const||bms_is_empty(em->em_relids)||
6520+
bms_is_member(0,em->em_relids))
6521+
{
6522+
em->em_ndistinct=0.;
6523+
continue;
6524+
}
6525+
6526+
if (em->em_ndistinct<0.0)
6527+
{
6528+
boolisdefault= true;
6529+
doublendist;
6530+
6531+
/* Let's check candidate's ndistinct value */
6532+
examine_variable(root, (Node*)em->em_expr,0,&vardata);
6533+
if (HeapTupleIsValid(vardata.statsTuple))
6534+
ndist=get_variable_numdistinct(&vardata,&isdefault);
6535+
ReleaseVariableStats(vardata);
6536+
6537+
if (isdefault)
6538+
{
6539+
em->em_ndistinct=0.;
6540+
continue;
6541+
}
6542+
6543+
em->em_ndistinct=ndist;
6544+
}
6545+
6546+
Assert(em->em_ndistinct>0.);
6547+
6548+
if (candidate->em_ndistinct==0.||
6549+
em->em_ndistinct<candidate->em_ndistinct)
6550+
candidate=em;
6551+
}
6552+
6553+
Assert(candidate!=NULL);
6554+
returncandidate;
6555+
}

‎src/backend/optimizer/path/equivclass.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
526526
em->em_datatype=datatype;
527527
em->em_jdomain=jdomain;
528528
em->em_parent=parent;
529+
em->em_ndistinct=-1.0;
529530

530531
if (bms_is_empty(relids))
531532
{

‎src/include/nodes/pathnodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,6 +1439,8 @@ typedef struct EquivalenceMember
14391439
JoinDomain*em_jdomain;/* join domain containing the source clause */
14401440
/* if em_is_child is true, this links to corresponding EM for top parent */
14411441
structEquivalenceMember*em_parentpg_node_attr(read_write_ignore);
1442+
1443+
doubleem_ndistinct;/* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */
14421444
}EquivalenceMember;
14431445

14441446
/*

‎src/test/regress/expected/incremental_sort.out

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,3 +1698,54 @@ explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order b
16981698
Order By: (a <-> '(5,5)'::point)
16991699
(6 rows)
17001700

1701+
-- Check:
1702+
-- commuting of sides in an expression doesn't influence cost estimation
1703+
CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
1704+
CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
1705+
INSERT INTO sort_ndist_t1 (x,y)
1706+
SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
1707+
INSERT INTO sort_ndist_t2 (x,y)
1708+
SELECT gs, gs FROM generate_series(1,1E4) AS gs;
1709+
CREATE INDEX t1_idx ON sort_ndist_t1 (x);
1710+
CREATE INDEX t2_idx ON sort_ndist_t2 (x);
1711+
VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
1712+
SET enable_hashjoin = 'off';
1713+
-- Having lots of duplicates after the join it is more effective to use plain
1714+
-- Sort instead of incremental sort: with small number of groups we do the same
1715+
-- stuff like Sort but with extra penalty.
1716+
EXPLAIN (COSTS OFF)
1717+
SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
1718+
WHERE t1.x=t2.x
1719+
ORDER BY t1.x,t1.y;
1720+
QUERY PLAN
1721+
--------------------------------------------------------------------
1722+
Sort
1723+
Sort Key: t1.x, t1.y
1724+
-> Nested Loop
1725+
-> Seq Scan on sort_ndist_t1 t1
1726+
-> Memoize
1727+
Cache Key: t1.x
1728+
Cache Mode: logical
1729+
-> Index Only Scan using t2_idx on sort_ndist_t2 t2
1730+
Index Cond: (x = t1.x)
1731+
(9 rows)
1732+
1733+
EXPLAIN (COSTS OFF) -- the plan must be the same as above
1734+
SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
1735+
WHERE t2.x=t1.x
1736+
ORDER BY t1.x,t1.y;
1737+
QUERY PLAN
1738+
--------------------------------------------------------------------
1739+
Sort
1740+
Sort Key: t1.x, t1.y
1741+
-> Nested Loop
1742+
-> Seq Scan on sort_ndist_t1 t1
1743+
-> Memoize
1744+
Cache Key: t1.x
1745+
Cache Mode: logical
1746+
-> Index Only Scan using t2_idx on sort_ndist_t2 t2
1747+
Index Cond: (x = t1.x)
1748+
(9 rows)
1749+
1750+
RESET enable_hashjoin;
1751+
DROP TABLE sort_ndist_t1, sort_ndist_t2;

‎src/test/regress/sql/incremental_sort.sql

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,34 @@ create index point_table_a_idx on point_table using gist(a);
292292
-- Ensure we get an incremental sort plan for both of the following queries
293293
explain (costs off)select a, b, a<->point(5,5) distfrom point_tableorder by dist, blimit1;
294294
explain (costs off)select a, b, a<->point(5,5) distfrom point_tableorder by dist, bdesclimit1;
295+
296+
-- Check:
297+
-- commuting of sides in an expression doesn't influence cost estimation
298+
CREATETABLEsort_ndist_t1 (xnumeric, ynumeric);
299+
CREATETABLEsort_ndist_t2 (xnumeric, ynumeric);
300+
301+
INSERT INTO sort_ndist_t1 (x,y)
302+
SELECT gs%10, gs%10FROM generate_series(1,1E4)AS gs;
303+
INSERT INTO sort_ndist_t2 (x,y)
304+
SELECT gs, gsFROM generate_series(1,1E4)AS gs;
305+
CREATEINDEXt1_idxON sort_ndist_t1 (x);
306+
CREATEINDEXt2_idxON sort_ndist_t2 (x);
307+
VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
308+
309+
SET enable_hashjoin='off';
310+
311+
-- Having lots of duplicates after the join it is more effective to use plain
312+
-- Sort instead of incremental sort: with small number of groups we do the same
313+
-- stuff like Sort but with extra penalty.
314+
EXPLAIN (COSTS OFF)
315+
SELECTt1.x,t1.yFROM sort_ndist_t1 t1, sort_ndist_t2 t2
316+
WHEREt1.x=t2.x
317+
ORDER BYt1.x,t1.y;
318+
319+
EXPLAIN (COSTS OFF)-- the plan must be the same as above
320+
SELECTt1.x,t1.yFROM sort_ndist_t1 t1, sort_ndist_t2 t2
321+
WHEREt2.x=t1.x
322+
ORDER BYt1.x,t1.y;
323+
324+
RESET enable_hashjoin;
325+
DROPTABLE sort_ndist_t1, sort_ndist_t2;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp