NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit9ea2614

committed

Stabilise incremental sort cost calculation.

Carefully identify an expression that can represent the path key on sortestimation. Columns may have different distinct estimations that can triggerwrong cost estimations and choice of sort strategy.Sorting has only pathkeys as input for the cost estimation. This patch, insteadof a blind choice of the first equivalence class member, attempts to find anexpression that chooses the most negligible ndistinct value.TODO: Filtering expressions from outer query subtrees is not a big deal. Butin that case, information about underlying relids is necessary for the costcalculation routine that causes API change. So, just wait for some time forthe community decision.Add into EquivalenceMember the number of distinct values - em_ndistinct.It may be additionally used later in groups number estimations.Task: 9578.Tags: optimized_group_by.

1 parentb8aa44f commit9ea2614Copy full SHA for 9ea2614

File tree

5 files changed

+152

-5

lines changed

src
- backend/optimizer/path
  - costsize.c
  - equivclass.c
- include/nodes
  - pathnodes.h
- test/regress
  - expected
    - incremental_sort.out
  - sql
    - incremental_sort.sql

5 files changed

+152

-5

lines changed

`‎src/backend/optimizer/path/costsize.c`

Lines changed: 67 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -192,6 +192,8 @@ static int32 get_expr_width(PlannerInfo root, const Node expr);`
`192`	`192`	`staticdoublerelation_byte_size(doubletuples,intwidth);`
`193`	`193`	`staticdoublepage_size(doubletuples,intwidth);`
`194`	`194`	`staticdoubleget_parallel_divisor(Path*path);`
	`195`	`+staticEquivalenceMemberidentify_sort_ecmember(PlannerInforoot,`
	`196`	`+PathKey*key);`
`195`	`197`
`196`	`198`
`197`	`199`	`/*`
`@@ -2016,22 +2018,21 @@ cost_incremental_sort(Path *path,`
`2016`	`2018`	`*/`
`2017`	`2019`	`foreach(l,pathkeys)`
`2018`	`2020`	`{`
`2019`		`-PathKeykey= (PathKey)lfirst(l);`
`2020`		`-EquivalenceMembermember= (EquivalenceMember)`
`2021`		`-linitial(key->pk_eclass->ec_members);`
	`2021`	`+PathKeykey= (PathKey)lfirst(l);`
	`2022`	`+EquivalenceMember*em=identify_sort_ecmember(root,key);`
`2022`	`2023`
`2023`	`2024`	`/*`
`2024`	`2025`	`* Check if the expression contains Var with "varno 0" so that we`
`2025`	`2026`	`* don't call estimate_num_groups in that case.`
`2026`	`2027`	`*/`
`2027`		`-if (bms_is_member(0,pull_varnos(root, (Node*)member->em_expr)))`
	`2028`	`+if (bms_is_member(0,pull_varnos(root, (Node*)em->em_expr)))`
`2028`	`2029`	`{`
`2029`	`2030`	`unknown_varno= true;`
`2030`	`2031`	`break;`
`2031`	`2032`	`}`
`2032`	`2033`
`2033`	`2034`	`/* expression not containing any Vars with "varno 0" */`
`2034`		`-presortedExprs=lappend(presortedExprs,member->em_expr);`
	`2035`	`+presortedExprs=lappend(presortedExprs,em->em_expr);`
`2035`	`2036`
`2036`	`2037`	`if (foreach_current_index(l)+1 >=presorted_keys)`
`2037`	`2038`	`break;`
`@@ -6491,3 +6492,64 @@ compute_gather_rows(Path *path)`
`6491`	`6492`
`6492`	`6493`	`returnclamp_row_est(path->rows*get_parallel_divisor(path));`
`6493`	`6494`	`}`
	`6495`	`+`
	`6496`	`+/*`
	`6497`	`+ * Find suitable expression from the list of equalence members.`
	`6498`	`+ *`
	`6499`	`+ * TODO: consider RelOptInfo's relids - the level of ec_members. Right now we`
	`6500`	`+ * don't have tests enough to prove this code.`
	`6501`	`+ */`
	`6502`	`+staticEquivalenceMember*`
	`6503`	`+identify_sort_ecmember(PlannerInforoot,PathKeykey)`
	`6504`	`+{`
	`6505`	`+EquivalenceMember*candidate=linitial(key->pk_eclass->ec_members);`
	`6506`	`+`
	`6507`	`+if (root==NULL)`
	`6508`	`+/* Fast path */`
	`6509`	`+returncandidate;`
	`6510`	`+`
	`6511`	`+foreach_node(EquivalenceMember,em,key->pk_eclass->ec_members)`
	`6512`	`+{`
	`6513`	`+VariableStatDatavardata;`
	`6514`	`+`
	`6515`	`+if (em->em_ndistinct==0.)`
	`6516`	`+/* Nothing helpful */`
	`6517`	`+continue;`
	`6518`	`+`
	`6519`	`+if (em->em_is_child\|\|em->em_is_const\|\|bms_is_empty(em->em_relids)\|\|`
	`6520`	`+bms_is_member(0,em->em_relids))`
	`6521`	`+{`
	`6522`	`+em->em_ndistinct=0.;`
	`6523`	`+continue;`
	`6524`	`+}`
	`6525`	`+`
	`6526`	`+if (em->em_ndistinct<0.0)`
	`6527`	`+{`
	`6528`	`+boolisdefault= true;`
	`6529`	`+doublendist;`
	`6530`	`+`
	`6531`	`+/* Let's check candidate's ndistinct value */`
	`6532`	`+examine_variable(root, (Node*)em->em_expr,0,&vardata);`
	`6533`	`+if (HeapTupleIsValid(vardata.statsTuple))`
	`6534`	`+ndist=get_variable_numdistinct(&vardata,&isdefault);`
	`6535`	`+ReleaseVariableStats(vardata);`
	`6536`	`+`
	`6537`	`+if (isdefault)`
	`6538`	`+{`
	`6539`	`+em->em_ndistinct=0.;`
	`6540`	`+continue;`
	`6541`	`+}`
	`6542`	`+`
	`6543`	`+em->em_ndistinct=ndist;`
	`6544`	`+}`
	`6545`	`+`
	`6546`	`+Assert(em->em_ndistinct>0.);`
	`6547`	`+`
	`6548`	`+if (candidate->em_ndistinct==0.\|\|`
	`6549`	`+em->em_ndistinct<candidate->em_ndistinct)`
	`6550`	`+candidate=em;`
	`6551`	`+}`
	`6552`	`+`
	`6553`	`+Assert(candidate!=NULL);`
	`6554`	`+returncandidate;`
	`6555`	`+}`

`‎src/backend/optimizer/path/equivclass.c`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -526,6 +526,7 @@ add_eq_member(EquivalenceClass ec, Expr expr, Relids relids,`
`526`	`526`	`em->em_datatype=datatype;`
`527`	`527`	`em->em_jdomain=jdomain;`
`528`	`528`	`em->em_parent=parent;`
	`529`	`+em->em_ndistinct=-1.0;`
`529`	`530`
`530`	`531`	`if (bms_is_empty(relids))`
`531`	`532`	`{`

`‎src/include/nodes/pathnodes.h`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1439,6 +1439,8 @@ typedef struct EquivalenceMember`
`1439`	`1439`	`JoinDomainem_jdomain;/ join domain containing the source clause */`
`1440`	`1440`	`/* if em_is_child is true, this links to corresponding EM for top parent */`
`1441`	`1441`	`structEquivalenceMember*em_parentpg_node_attr(read_write_ignore);`
	`1442`	`+`
	`1443`	`+doubleem_ndistinct;/* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */`
`1442`	`1444`	`}EquivalenceMember;`
`1443`	`1445`
`1444`	`1446`	`/*`

`‎src/test/regress/expected/incremental_sort.out`

Lines changed: 51 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1698,3 +1698,54 @@ explain (costs off) select a, b, a <-> point(5, 5) dist from point_table order b`
`1698`	`1698`	`Order By: (a <-> '(5,5)'::point)`
`1699`	`1699`	`(6 rows)`
`1700`	`1700`
	`1701`	`+-- Check:`
	`1702`	`+-- commuting of sides in an expression doesn't influence cost estimation`
	`1703`	`+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);`
	`1704`	`+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);`
	`1705`	`+INSERT INTO sort_ndist_t1 (x,y)`
	`1706`	`+ SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;`
	`1707`	`+INSERT INTO sort_ndist_t2 (x,y)`
	`1708`	`+ SELECT gs, gs FROM generate_series(1,1E4) AS gs;`
	`1709`	`+CREATE INDEX t1_idx ON sort_ndist_t1 (x);`
	`1710`	`+CREATE INDEX t2_idx ON sort_ndist_t2 (x);`
	`1711`	`+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;`
	`1712`	`+SET enable_hashjoin = 'off';`
	`1713`	`+-- Having lots of duplicates after the join it is more effective to use plain`
	`1714`	`+-- Sort instead of incremental sort: with small number of groups we do the same`
	`1715`	`+-- stuff like Sort but with extra penalty.`
	`1716`	`+EXPLAIN (COSTS OFF)`
	`1717`	`+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2`
	`1718`	`+WHERE t1.x=t2.x`
	`1719`	`+ORDER BY t1.x,t1.y;`
	`1720`	`+ QUERY PLAN`
	`1721`	`+--------------------------------------------------------------------`
	`1722`	`+ Sort`
	`1723`	`+ Sort Key: t1.x, t1.y`
	`1724`	`+ -> Nested Loop`
	`1725`	`+ -> Seq Scan on sort_ndist_t1 t1`
	`1726`	`+ -> Memoize`
	`1727`	`+ Cache Key: t1.x`
	`1728`	`+ Cache Mode: logical`
	`1729`	`+ -> Index Only Scan using t2_idx on sort_ndist_t2 t2`
	`1730`	`+ Index Cond: (x = t1.x)`
	`1731`	`+(9 rows)`
	`1732`	`+`
	`1733`	`+EXPLAIN (COSTS OFF) -- the plan must be the same as above`
	`1734`	`+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2`
	`1735`	`+WHERE t2.x=t1.x`
	`1736`	`+ORDER BY t1.x,t1.y;`
	`1737`	`+ QUERY PLAN`
	`1738`	`+--------------------------------------------------------------------`
	`1739`	`+ Sort`
	`1740`	`+ Sort Key: t1.x, t1.y`
	`1741`	`+ -> Nested Loop`
	`1742`	`+ -> Seq Scan on sort_ndist_t1 t1`
	`1743`	`+ -> Memoize`
	`1744`	`+ Cache Key: t1.x`
	`1745`	`+ Cache Mode: logical`
	`1746`	`+ -> Index Only Scan using t2_idx on sort_ndist_t2 t2`
	`1747`	`+ Index Cond: (x = t1.x)`
	`1748`	`+(9 rows)`
	`1749`	`+`
	`1750`	`+RESET enable_hashjoin;`
	`1751`	`+DROP TABLE sort_ndist_t1, sort_ndist_t2;`

`‎src/test/regress/sql/incremental_sort.sql`

Lines changed: 31 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -292,3 +292,34 @@ create index point_table_a_idx on point_table using gist(a);`
`292`	`292`	`-- Ensure we get an incremental sort plan for both of the following queries`
`293`	`293`	`explain (costs off)select a, b, a<->point(5,5) distfrom point_tableorder by dist, blimit1;`
`294`	`294`	`explain (costs off)select a, b, a<->point(5,5) distfrom point_tableorder by dist, bdesclimit1;`
	`295`	`+`
	`296`	`+-- Check:`
	`297`	`+-- commuting of sides in an expression doesn't influence cost estimation`
	`298`	`+CREATETABLEsort_ndist_t1 (xnumeric, ynumeric);`
	`299`	`+CREATETABLEsort_ndist_t2 (xnumeric, ynumeric);`
	`300`	`+`
	`301`	`+INSERT INTO sort_ndist_t1 (x,y)`
	`302`	`+SELECT gs%10, gs%10FROM generate_series(1,1E4)AS gs;`
	`303`	`+INSERT INTO sort_ndist_t2 (x,y)`
	`304`	`+SELECT gs, gsFROM generate_series(1,1E4)AS gs;`
	`305`	`+CREATEINDEXt1_idxON sort_ndist_t1 (x);`
	`306`	`+CREATEINDEXt2_idxON sort_ndist_t2 (x);`
	`307`	`+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;`
	`308`	`+`
	`309`	`+SET enable_hashjoin='off';`
	`310`	`+`
	`311`	`+-- Having lots of duplicates after the join it is more effective to use plain`
	`312`	`+-- Sort instead of incremental sort: with small number of groups we do the same`
	`313`	`+-- stuff like Sort but with extra penalty.`
	`314`	`+EXPLAIN (COSTS OFF)`
	`315`	`+SELECTt1.x,t1.yFROM sort_ndist_t1 t1, sort_ndist_t2 t2`
	`316`	`+WHEREt1.x=t2.x`
	`317`	`+ORDER BYt1.x,t1.y;`
	`318`	`+`
	`319`	`+EXPLAIN (COSTS OFF)-- the plan must be the same as above`
	`320`	`+SELECTt1.x,t1.yFROM sort_ndist_t1 t1, sort_ndist_t2 t2`
	`321`	`+WHEREt2.x=t1.x`
	`322`	`+ORDER BYt1.x,t1.y;`
	`323`	`+`
	`324`	`+RESET enable_hashjoin;`
	`325`	`+DROPTABLE sort_ndist_t1, sort_ndist_t2;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9ea2614

File tree

5 files changed

5 files changed

`‎src/backend/optimizer/path/costsize.c`

`‎src/backend/optimizer/path/equivclass.c`

`‎src/include/nodes/pathnodes.h`

`‎src/test/regress/expected/incremental_sort.out`

`‎src/test/regress/sql/incremental_sort.sql`

0 commit comments