Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9c3dab9

Browse files
committed
Introduce the number of columns in the cost-sort model.
The sort node calls the comparison operator for each pair of attributes foreach couple of tuples. However, the current cost model usesa '2.0*cpu_operator_cost' multiplier, which performs some sort of averaging.This technique can lead to incorrect estimations when sorting on three, four,or more columns, quite common in practice.Moreover, further elaboration of the optimiser forms more strict requirementsfor the balance of sortings, as caused by IncrementalSort, MergeAppend, andMergeJoin.In this patch, the multiplier is a linear function of a number of columns.Member 1.0 needs to smooth the fact that dependence on the number of columns isweaker than linear.It is an extreme formula. The number of comparisons depends on the distinctvalues in each column. As a TODO, we can natively elaborate this model by thenext step, involving distinct statistics to make estimations more precise.Task: 9578.Tags: optimized_group_by.
1 parent9ea2614 commit9c3dab9

File tree

6 files changed

+78
-66
lines changed

6 files changed

+78
-66
lines changed

‎contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9984,13 +9984,16 @@ SELECT t1.a,t2.b,t3.c FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) INNER J
99849984
-- left outer join + nullable clause
99859985
EXPLAIN (VERBOSE, COSTS OFF)
99869986
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
9987-
QUERY PLAN
9988-
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
9989-
Foreign Scan
9987+
QUERY PLAN
9988+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
9989+
Sort
99909990
Output: t1.a, fprt2.b, fprt2.c
9991-
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
9992-
Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10)) ORDER BY r5.a ASC NULLS LAST, r6.b ASC NULLS LAST, r6.c ASC NULLS LAST
9993-
(4 rows)
9991+
Sort Key: t1.a, fprt2.b, fprt2.c
9992+
-> Foreign Scan
9993+
Output: t1.a, fprt2.b, fprt2.c
9994+
Relations: (public.ftprt1_p1 t1) LEFT JOIN (public.ftprt2_p1 fprt2)
9995+
Remote SQL: SELECT r5.a, r6.b, r6.c FROM (public.fprt1_p1 r5 LEFT JOIN public.fprt2_p1 r6 ON (((r5.a = r6.b)) AND ((r5.b = r6.a)) AND ((r6.a < 10)))) WHERE ((r5.a < 10))
9996+
(7 rows)
99949997

99959998
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
99969999
a | b | c

‎src/backend/optimizer/path/costsize.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,8 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
483483
Costcomparison_cost;
484484
doubleN;
485485
doublelogN;
486+
intnpathkeys=list_length(((Path*)path)->pathkeys);
487+
doublecmpMultiplier= (npathkeys==0) ?2.0 :npathkeys+1.0;
486488

487489
/* Mark the path with the correct row estimate */
488490
if (rows)
@@ -505,7 +507,7 @@ cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
505507
logN=LOG2(N);
506508

507509
/* Assumed cost per tuple comparison */
508-
comparison_cost=2.0*cpu_operator_cost;
510+
comparison_cost=cmpMultiplier*cpu_operator_cost;
509511

510512
/* Heap creation cost */
511513
startup_cost+=comparison_cost*N*logN;
@@ -1863,7 +1865,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
18631865
*/
18641866
staticvoid
18651867
cost_tuplesort(Cost*startup_cost,Cost*run_cost,
1866-
doubletuples,intwidth,
1868+
doubletuples,intwidth,doublecmpMultiplier,
18671869
Costcomparison_cost,intsort_mem,
18681870
doublelimit_tuples)
18691871
{
@@ -1880,7 +1882,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
18801882
tuples=2.0;
18811883

18821884
/* Include the default cost-per-comparison */
1883-
comparison_cost+=2.0*cpu_operator_cost;
1885+
comparison_cost+=cmpMultiplier*cpu_operator_cost;
18841886

18851887
/* Do we have a useful LIMIT? */
18861888
if (limit_tuples>0&&limit_tuples<tuples)
@@ -2051,7 +2053,9 @@ cost_incremental_sort(Path *path,
20512053
* are equal.
20522054
*/
20532055
cost_tuplesort(&group_startup_cost,&group_run_cost,
2054-
group_tuples,width,comparison_cost,sort_mem,
2056+
group_tuples,width,
2057+
list_length(pathkeys)+1.0,
2058+
comparison_cost,sort_mem,
20552059
limit_tuples);
20562060

20572061
/*
@@ -2075,7 +2079,7 @@ cost_incremental_sort(Path *path,
20752079
* detect the sort groups. This is roughly equal to one extra copy and
20762080
* comparison per tuple.
20772081
*/
2078-
run_cost+= (cpu_tuple_cost+comparison_cost)*input_tuples;
2082+
run_cost+= (cpu_tuple_cost+(presorted_keys+1)*comparison_cost)*input_tuples;
20792083

20802084
/*
20812085
* Additionally, we charge double cpu_tuple_cost for each input group to
@@ -2109,9 +2113,11 @@ cost_sort(Path *path, PlannerInfo *root,
21092113
{
21102114
Coststartup_cost;
21112115
Costrun_cost;
2116+
doublecmpMultiplier=
2117+
(pathkeys==NIL) ?2.0 :list_length(pathkeys)+1.0;
21122118

21132119
cost_tuplesort(&startup_cost,&run_cost,
2114-
tuples,width,
2120+
tuples,width,cmpMultiplier,
21152121
comparison_cost,sort_mem,
21162122
limit_tuples);
21172123

@@ -2391,6 +2397,8 @@ cost_merge_append(Path *path, PlannerInfo *root,
23912397
Costcomparison_cost;
23922398
doubleN;
23932399
doublelogN;
2400+
doublecmpMultiplier=
2401+
(pathkeys==NIL) ?2.0 :list_length(pathkeys)+1.0;
23942402

23952403
/*
23962404
* Avoid log(0)...
@@ -2399,7 +2407,7 @@ cost_merge_append(Path *path, PlannerInfo *root,
23992407
logN=LOG2(N);
24002408

24012409
/* Assumed cost per tuple comparison */
2402-
comparison_cost=2.0*cpu_operator_cost;
2410+
comparison_cost=cmpMultiplier*cpu_operator_cost;
24032411

24042412
/* Heap creation cost */
24052413
startup_cost+=comparison_cost*N*logN;

‎src/test/regress/expected/aggregates.out

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,17 +3001,18 @@ ANALYZE agg_sort_order;
30013001
EXPLAIN (COSTS OFF)
30023002
SELECT array_agg(c1 ORDER BY c2),c2
30033003
FROM agg_sort_order WHERE c2 < 100 GROUP BY c1 ORDER BY 2;
3004-
QUERY PLAN
3005-
----------------------------------------------------------------------------
3004+
QUERY PLAN
3005+
--------------------------------------------------------------------------
30063006
Sort
30073007
Sort Key: c2
30083008
-> GroupAggregate
30093009
Group Key: c1
3010-
-> Sort
3010+
->IncrementalSort
30113011
Sort Key: c1, c2
3012-
-> Index Scan using agg_sort_order_c2_idx on agg_sort_order
3013-
Index Cond: (c2 < 100)
3014-
(8 rows)
3012+
Presorted Key: c1
3013+
-> Index Scan using agg_sort_order_pkey on agg_sort_order
3014+
Filter: (c2 < 100)
3015+
(9 rows)
30153016

30163017
DROP TABLE agg_sort_order CASCADE;
30173018
DROP TABLE btg;

‎src/test/regress/expected/join.out

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5726,18 +5726,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
57265726
explain (costs off)
57275727
select d.* from d left join (select distinct * from b) s
57285728
on d.a = s.id;
5729-
QUERY PLAN
5730-
--------------------------------------
5731-
Merge Right Join
5732-
Merge Cond: (b.id = d.a)
5733-
-> Unique
5734-
-> Sort
5735-
Sort Key: b.id, b.c_id
5736-
-> Seq Scan on b
5729+
QUERY PLAN
5730+
---------------------------------------------
5731+
Merge Left Join
5732+
Merge Cond: (d.a = s.id)
57375733
-> Sort
57385734
Sort Key: d.a
57395735
-> Seq Scan on d
5740-
(9 rows)
5736+
-> Sort
5737+
Sort Key: s.id
5738+
-> Subquery Scan on s
5739+
-> HashAggregate
5740+
Group Key: b.id, b.c_id
5741+
-> Seq Scan on b
5742+
(11 rows)
57415743

57425744
-- join removal is not possible here
57435745
explain (costs off)

‎src/test/regress/expected/partition_join.out

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,9 +1235,11 @@ EXPLAIN (COSTS OFF)
12351235
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
12361236
QUERY PLAN
12371237
----------------------------------------------------------------------------
1238-
Sort
1238+
IncrementalSort
12391239
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
1240-
-> Append
1240+
Presorted Key: t1.a
1241+
-> Merge Append
1242+
Sort Key: t1.a
12411243
-> Merge Left Join
12421244
Merge Cond: (t1_1.a = t2_1.b)
12431245
-> Sort
@@ -1286,7 +1288,7 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
12861288
-> Sort
12871289
Sort Key: t2_3.b
12881290
-> Seq Scan on prt2_p3 t2_3
1289-
(51 rows)
1291+
(53 rows)
12901292

12911293
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
12921294
a | c | b | c | ?column? | c

‎src/test/regress/expected/union.out

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,18 +1224,17 @@ SELECT * FROM
12241224
SELECT 2 AS t, 4 AS x) ss
12251225
WHERE x < 4
12261226
ORDER BY x;
1227-
QUERY PLAN
1228-
--------------------------------------------------
1227+
QUERY PLAN
1228+
--------------------------------------------
12291229
Sort
12301230
Sort Key: (2)
1231-
-> Unique
1232-
-> Sort
1233-
Sort Key: (1), (2)
1234-
-> Append
1235-
-> Result
1236-
-> Result
1237-
One-Time Filter: false
1238-
(9 rows)
1231+
-> HashAggregate
1232+
Group Key: (1), (2)
1233+
-> Append
1234+
-> Result
1235+
-> Result
1236+
One-Time Filter: false
1237+
(8 rows)
12391238

12401239
SELECT * FROM
12411240
(SELECT 1 AS t, 2 AS x
@@ -1289,19 +1288,18 @@ SELECT * FROM
12891288
SELECT 2 AS t, 4 AS x) ss
12901289
WHERE x > 3
12911290
ORDER BY x;
1292-
QUERY PLAN
1293-
------------------------------------------------------------------------------------
1291+
QUERY PLAN
1292+
-------------------------------------------------------------------------------
12941293
Sort
12951294
Sort Key: ss.x
12961295
-> Subquery Scan on ss
12971296
Filter: (ss.x > 3)
1298-
-> Unique
1299-
-> Sort
1300-
Sort Key: (1), (((random() * '3'::double precision))::integer)
1301-
-> Append
1302-
-> Result
1303-
-> Result
1304-
(10 rows)
1297+
-> HashAggregate
1298+
Group Key: (1), (((random() * '3'::double precision))::integer)
1299+
-> Append
1300+
-> Result
1301+
-> Result
1302+
(9 rows)
13051303

13061304
SELECT * FROM
13071305
(SELECT 1 AS t, (random()*3)::int AS x
@@ -1322,24 +1320,22 @@ select distinct q1 from
13221320
union all
13231321
select distinct * from int8_tbl i82) ss
13241322
where q2 = q2;
1325-
QUERY PLAN
1326-
----------------------------------------------------------
1327-
Unique
1328-
-> Merge Append
1329-
Sort Key: "*SELECT* 1".q1
1323+
QUERY PLAN
1324+
----------------------------------------------------
1325+
HashAggregate
1326+
Group Key: "*SELECT* 1".q1
1327+
-> Append
13301328
-> Subquery Scan on "*SELECT* 1"
1331-
-> Unique
1332-
-> Sort
1333-
Sort Key: i81.q1, i81.q2
1334-
-> Seq Scan on int8_tbl i81
1335-
Filter: (q2 IS NOT NULL)
1329+
-> HashAggregate
1330+
Group Key: i81.q1, i81.q2
1331+
-> Seq Scan on int8_tbl i81
1332+
Filter: (q2 IS NOT NULL)
13361333
-> Subquery Scan on "*SELECT* 2"
1337-
-> Unique
1338-
-> Sort
1339-
Sort Key: i82.q1, i82.q2
1340-
-> Seq Scan on int8_tbl i82
1341-
Filter: (q2 IS NOT NULL)
1342-
(15 rows)
1334+
-> HashAggregate
1335+
Group Key: i82.q1, i82.q2
1336+
-> Seq Scan on int8_tbl i82
1337+
Filter: (q2 IS NOT NULL)
1338+
(13 rows)
13431339

13441340
select distinct q1 from
13451341
(select distinct * from int8_tbl i81

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp