NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit9c3dab9

committed

Introduce the number of columns in the cost-sort model.

The sort node calls the comparison operator for each pair of attributes foreach couple of tuples. However, the current cost model usesa '2.0*cpu_operator_cost' multiplier, which performs some sort of averaging.This technique can lead to incorrect estimations when sorting on three, four,or more columns, quite common in practice.Moreover, further elaboration of the optimiser forms more strict requirementsfor the balance of sortings, as caused by IncrementalSort, MergeAppend, andMergeJoin.In this patch, the multiplier is a linear function of a number of columns.Member 1.0 needs to smooth the fact that dependence on the number of columns isweaker than linear.It is an extreme formula. The number of comparisons depends on the distinctvalues in each column. As a TODO, we can natively elaborate this model by thenext step, involving distinct statistics to make estimations more precise.Task: 9578.Tags: optimized_group_by.

1 parent9ea2614 commit9c3dab9Copy full SHA for 9c3dab9

File tree

6 files changed

+78

-66

lines changed

contrib/postgres_fdw/expected
- postgres_fdw.out
src
- backend/optimizer/path
  - costsize.c
- test/regress/expected

6 files changed

+78

-66

lines changed

`‎contrib/postgres_fdw/expected/postgres_fdw.out`

Lines changed: 9 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -9984,13 +9984,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J`
`9984`	`9984`	`-- left outer join + nullable clause`
`9985`	`9985`	`EXPLAIN (VERBOSE, COSTS OFF)`
`9986`	`9986`	`SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;`
`9987`		`-QUERY PLAN`
`9988`		`-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------`
`9989`		`-Foreign Scan`
	`9987`	`+ QUERY PLAN`
	`9988`	`+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------`
	`9989`	`+Sort`
`9990`	`9990`	`Output: t1.a, fprt2.b, fprt2.c`
`9991`		`- Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)`
`9992`		`- Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST`
`9993`		`-(4 rows)`
	`9991`	`+ Sort Key: t1.a, fprt2.b, fprt2.c`
	`9992`	`+ -> Foreign Scan`
	`9993`	`+ Output: t1.a, fprt2.b, fprt2.c`
	`9994`	`+ Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)`
	`9995`	`+ Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))`
	`9996`	`+(7 rows)`
`9994`	`9997`
`9995`	`9998`	`SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;`
`9996`	`9999`	`a \| b \| c`

`‎src/backend/optimizer/path/costsize.c`

Lines changed: 15 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -483,6 +483,8 @@ cost_gather_merge(GatherMergePath path, PlannerInfo root,`
`483`	`483`	`Costcomparison_cost;`
`484`	`484`	`doubleN;`
`485`	`485`	`doublelogN;`
	`486`	`+intnpathkeys=list_length(((Path*)path)->pathkeys);`
	`487`	`+doublecmpMultiplier= (npathkeys==0) ?2.0 :npathkeys+1.0;`
`486`	`488`
`487`	`489`	`/* Mark the path with the correct row estimate */`
`488`	`490`	`if (rows)`
`@@ -505,7 +507,7 @@ cost_gather_merge(GatherMergePath path, PlannerInfo root,`
`505`	`507`	`logN=LOG2(N);`
`506`	`508`
`507`	`509`	`/* Assumed cost per tuple comparison */`
`508`		`-comparison_cost=2.0*cpu_operator_cost;`
	`510`	`+comparison_cost=cmpMultiplier*cpu_operator_cost;`
`509`	`511`
`510`	`512`	`/* Heap creation cost */`
`511`	`513`	`startup_cost+=comparison_costNlogN;`
`@@ -1863,7 +1865,7 @@ cost_recursive_union(Path runion, Path nrterm, Path *rterm)`
`1863`	`1865`	`*/`
`1864`	`1866`	`staticvoid`
`1865`	`1867`	`cost_tuplesort(Coststartup_cost,Costrun_cost,`
`1866`		`-doubletuples,intwidth,`
	`1868`	`+doubletuples,intwidth,doublecmpMultiplier,`
`1867`	`1869`	`Costcomparison_cost,intsort_mem,`
`1868`	`1870`	`doublelimit_tuples)`
`1869`	`1871`	`{`
`@@ -1880,7 +1882,7 @@ cost_tuplesort(Cost startup_cost, Cost run_cost,`
`1880`	`1882`	`tuples=2.0;`
`1881`	`1883`
`1882`	`1884`	`/* Include the default cost-per-comparison */`
`1883`		`-comparison_cost+=2.0*cpu_operator_cost;`
	`1885`	`+comparison_cost+=cmpMultiplier*cpu_operator_cost;`
`1884`	`1886`
`1885`	`1887`	`/* Do we have a useful LIMIT? */`
`1886`	`1888`	`if (limit_tuples>0&&limit_tuples<tuples)`
`@@ -2051,7 +2053,9 @@ cost_incremental_sort(Path *path,`
`2051`	`2053`	`* are equal.`
`2052`	`2054`	`*/`
`2053`	`2055`	`cost_tuplesort(&group_startup_cost,&group_run_cost,`
`2054`		`-group_tuples,width,comparison_cost,sort_mem,`
	`2056`	`+group_tuples,width,`
	`2057`	`+list_length(pathkeys)+1.0,`
	`2058`	`+comparison_cost,sort_mem,`
`2055`	`2059`	`limit_tuples);`
`2056`	`2060`
`2057`	`2061`	`/*`
`@@ -2075,7 +2079,7 @@ cost_incremental_sort(Path *path,`
`2075`	`2079`	`* detect the sort groups. This is roughly equal to one extra copy and`
`2076`	`2080`	`* comparison per tuple.`
`2077`	`2081`	`*/`
`2078`		`-run_cost+= (cpu_tuple_cost+comparison_cost)*input_tuples;`
	`2082`	`+run_cost+= (cpu_tuple_cost+(presorted_keys+1)comparison_cost)input_tuples;`
`2079`	`2083`
`2080`	`2084`	`/*`
`2081`	`2085`	`* Additionally, we charge double cpu_tuple_cost for each input group to`
`@@ -2109,9 +2113,11 @@ cost_sort(Path path, PlannerInfo root,`
`2109`	`2113`	`{`
`2110`	`2114`	`Coststartup_cost;`
`2111`	`2115`	`Costrun_cost;`
	`2116`	`+doublecmpMultiplier=`
	`2117`	`+(pathkeys==NIL) ?2.0 :list_length(pathkeys)+1.0;`
`2112`	`2118`
`2113`	`2119`	`cost_tuplesort(&startup_cost,&run_cost,`
`2114`		`-tuples,width,`
	`2120`	`+tuples,width,cmpMultiplier,`
`2115`	`2121`	`comparison_cost,sort_mem,`
`2116`	`2122`	`limit_tuples);`
`2117`	`2123`
`@@ -2391,6 +2397,8 @@ cost_merge_append(Path path, PlannerInfo root,`
`2391`	`2397`	`Costcomparison_cost;`
`2392`	`2398`	`doubleN;`
`2393`	`2399`	`doublelogN;`
	`2400`	`+doublecmpMultiplier=`
	`2401`	`+(pathkeys==NIL) ?2.0 :list_length(pathkeys)+1.0;`
`2394`	`2402`
`2395`	`2403`	`/*`
`2396`	`2404`	`* Avoid log(0)...`
`@@ -2399,7 +2407,7 @@ cost_merge_append(Path path, PlannerInfo root,`
`2399`	`2407`	`logN=LOG2(N);`
`2400`	`2408`
`2401`	`2409`	`/* Assumed cost per tuple comparison */`
`2402`		`-comparison_cost=2.0*cpu_operator_cost;`
	`2410`	`+comparison_cost=cmpMultiplier*cpu_operator_cost;`
`2403`	`2411`
`2404`	`2412`	`/* Heap creation cost */`
`2405`	`2413`	`startup_cost+=comparison_costNlogN;`

`‎src/test/regress/expected/aggregates.out`

Lines changed: 7 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -3001,17 +3001,18 @@ ANALYZE agg_sort_order;`
`3001`	`3001`	`EXPLAIN (COSTS OFF)`
`3002`	`3002`	`SELECT array_agg(c1 ORDER BY c2),c2`
`3003`	`3003`	`FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;`
`3004`		`-QUERY PLAN`
`3005`		`-----------------------------------------------------------------------------`
	`3004`	`+ QUERY PLAN`
	`3005`	`+--------------------------------------------------------------------------`
`3006`	`3006`	`Sort`
`3007`	`3007`	`Sort Key: c2`
`3008`	`3008`	`-> GroupAggregate`
`3009`	`3009`	`Group Key: c1`
`3010`		`- -> Sort`
	`3010`	`+ ->IncrementalSort`
`3011`	`3011`	`Sort Key: c1, c2`
`3012`		`- -> Index Scan using agg_sort_order_c2_idx on agg_sort_order`
`3013`		`- Index Cond: (c2 < 100)`
`3014`		`-(8 rows)`
	`3012`	`+ Presorted Key: c1`
	`3013`	`+ -> Index Scan using agg_sort_order_pkey on agg_sort_order`
	`3014`	`+ Filter: (c2 < 100)`
	`3015`	`+(9 rows)`
`3015`	`3016`
`3016`	`3017`	`DROP TABLE agg_sort_order CASCADE;`
`3017`	`3018`	`DROP TABLE btg;`

`‎src/test/regress/expected/join.out`

Lines changed: 11 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -5726,18 +5726,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s`
`5726`	`5726`	`explain (costs off)`
`5727`	`5727`	`select d.* from d left join (select distinct * from b) s`
`5728`	`5728`	`on d.a = s.id;`
`5729`		`- QUERY PLAN`
`5730`		`---------------------------------------`
`5731`		`- Merge Right Join`
`5732`		`- Merge Cond: (b.id = d.a)`
`5733`		`- -> Unique`
`5734`		`- -> Sort`
`5735`		`- Sort Key: b.id, b.c_id`
`5736`		`- -> Seq Scan on b`
	`5729`	`+ QUERY PLAN`
	`5730`	`+---------------------------------------------`
	`5731`	`+ Merge Left Join`
	`5732`	`+ Merge Cond: (d.a = s.id)`
`5737`	`5733`	`-> Sort`
`5738`	`5734`	`Sort Key: d.a`
`5739`	`5735`	`-> Seq Scan on d`
`5740`		`-(9 rows)`
	`5736`	`+ -> Sort`
	`5737`	`+ Sort Key: s.id`
	`5738`	`+ -> Subquery Scan on s`
	`5739`	`+ -> HashAggregate`
	`5740`	`+ Group Key: b.id, b.c_id`
	`5741`	`+ -> Seq Scan on b`
	`5742`	`+(11 rows)`
`5741`	`5743`
`5742`	`5744`	`-- join removal is not possible here`
`5743`	`5745`	`explain (costs off)`

`‎src/test/regress/expected/partition_join.out`

Lines changed: 5 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1235,9 +1235,11 @@ EXPLAIN (COSTS OFF)`
`1235`	`1235`	`SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;`
`1236`	`1236`	`QUERY PLAN`
`1237`	`1237`	`----------------------------------------------------------------------------`
`1238`		`- Sort`
	`1238`	`+IncrementalSort`
`1239`	`1239`	`Sort Key: t1.a, t2.b, ((t3.a + t3.b))`
`1240`		`- -> Append`
	`1240`	`+ Presorted Key: t1.a`
	`1241`	`+ -> Merge Append`
	`1242`	`+ Sort Key: t1.a`
`1241`	`1243`	`-> Merge Left Join`
`1242`	`1244`	`Merge Cond: (t1_1.a = t2_1.b)`
`1243`	`1245`	`-> Sort`
`@@ -1286,7 +1288,7 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2`
`1286`	`1288`	`-> Sort`
`1287`	`1289`	`Sort Key: t2_3.b`
`1288`	`1290`	`-> Seq Scan on prt2_p3 t2_3`
`1289`		`-(51 rows)`
	`1291`	`+(53 rows)`
`1290`	`1292`
`1291`	`1293`	`SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;`
`1292`	`1294`	`a \| c \| b \| c \| ?column? \| c`

`‎src/test/regress/expected/union.out`

Lines changed: 31 additions & 35 deletions

Original file line number	Diff line number	Diff line change
`@@ -1224,18 +1224,17 @@ SELECT * FROM`
`1224`	`1224`	`SELECT 2 AS t, 4 AS x) ss`
`1225`	`1225`	`WHERE x < 4`
`1226`	`1226`	`ORDER BY x;`
`1227`		`-QUERY PLAN`
`1228`		`---------------------------------------------------`
	`1227`	`+ QUERY PLAN`
	`1228`	`+--------------------------------------------`
`1229`	`1229`	`Sort`
`1230`	`1230`	`Sort Key: (2)`
`1231`		`- -> Unique`
`1232`		`- -> Sort`
`1233`		`- Sort Key: (1), (2)`
`1234`		`- -> Append`
`1235`		`- -> Result`
`1236`		`- -> Result`
`1237`		`- One-Time Filter: false`
`1238`		`-(9 rows)`
	`1231`	`+ -> HashAggregate`
	`1232`	`+ Group Key: (1), (2)`
	`1233`	`+ -> Append`
	`1234`	`+ -> Result`
	`1235`	`+ -> Result`
	`1236`	`+ One-Time Filter: false`
	`1237`	`+(8 rows)`
`1239`	`1238`
`1240`	`1239`	`SELECT * FROM`
`1241`	`1240`	`(SELECT 1 AS t, 2 AS x`
`@@ -1289,19 +1288,18 @@ SELECT * FROM`
`1289`	`1288`	`SELECT 2 AS t, 4 AS x) ss`
`1290`	`1289`	`WHERE x > 3`
`1291`	`1290`	`ORDER BY x;`
`1292`		`-QUERY PLAN`
`1293`		`-------------------------------------------------------------------------------------`
	`1291`	`+ QUERY PLAN`
	`1292`	`+-------------------------------------------------------------------------------`
`1294`	`1293`	`Sort`
`1295`	`1294`	`Sort Key: ss.x`
`1296`	`1295`	`-> Subquery Scan on ss`
`1297`	`1296`	`Filter: (ss.x > 3)`
`1298`		`- -> Unique`
`1299`		`- -> Sort`
`1300`		`- Sort Key: (1), (((random() * '3'::double precision))::integer)`
`1301`		`- -> Append`
`1302`		`- -> Result`
`1303`		`- -> Result`
`1304`		`-(10 rows)`
	`1297`	`+ -> HashAggregate`
	`1298`	`+ Group Key: (1), (((random() * '3'::double precision))::integer)`
	`1299`	`+ -> Append`
	`1300`	`+ -> Result`
	`1301`	`+ -> Result`
	`1302`	`+(9 rows)`
`1305`	`1303`
`1306`	`1304`	`SELECT * FROM`
`1307`	`1305`	`(SELECT 1 AS t, (random()*3)::int AS x`
`@@ -1322,24 +1320,22 @@ select distinct q1 from`
`1322`	`1320`	`union all`
`1323`	`1321`	`select distinct * from int8_tbl i82) ss`
`1324`	`1322`	`where q2 = q2;`
`1325`		`-QUERY PLAN`
`1326`		`-----------------------------------------------------------`
`1327`		`-Unique`
`1328`		`--> Merge Append`
`1329`		`- Sort Key: "SELECT 1".q1`
	`1323`	`+ QUERY PLAN`
	`1324`	`+----------------------------------------------------`
	`1325`	`+HashAggregate`
	`1326`	`+Group Key: "SELECT 1".q1`
	`1327`	`+-> Append`
`1330`	`1328`	`-> Subquery Scan on "SELECT 1"`
`1331`		`- -> Unique`
`1332`		`- -> Sort`
`1333`		`- Sort Key: i81.q1, i81.q2`
`1334`		`- -> Seq Scan on int8_tbl i81`
`1335`		`- Filter: (q2 IS NOT NULL)`
	`1329`	`+ -> HashAggregate`
	`1330`	`+ Group Key: i81.q1, i81.q2`
	`1331`	`+ -> Seq Scan on int8_tbl i81`
	`1332`	`+ Filter: (q2 IS NOT NULL)`
`1336`	`1333`	`-> Subquery Scan on "SELECT 2"`
`1337`		`- -> Unique`
`1338`		`- -> Sort`
`1339`		`- Sort Key: i82.q1, i82.q2`
`1340`		`- -> Seq Scan on int8_tbl i82`
`1341`		`- Filter: (q2 IS NOT NULL)`
`1342`		`-(15 rows)`
	`1334`	`+ -> HashAggregate`
	`1335`	`+ Group Key: i82.q1, i82.q2`
	`1336`	`+ -> Seq Scan on int8_tbl i82`
	`1337`	`+ Filter: (q2 IS NOT NULL)`
	`1338`	`+(13 rows)`
`1343`	`1339`
`1344`	`1340`	`select distinct q1 from`
`1345`	`1341`	`(select distinct * from int8_tbl i81`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit9c3dab9

File tree

6 files changed

6 files changed

`‎contrib/postgres_fdw/expected/postgres_fdw.out`

`‎src/backend/optimizer/path/costsize.c`

`‎src/test/regress/expected/aggregates.out`

`‎src/test/regress/expected/join.out`

`‎src/test/regress/expected/partition_join.out`

`‎src/test/regress/expected/union.out`

0 commit comments