Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9fd8843

Browse files
committed
Fix mergejoin cost estimation so that we consider the statistical ranges of
the two join variables at both ends: not only trailing rows that need not bescanned because there cannot be a match on the other side, but initial rowsthat will be scanned without possibly having a match. This allows a morerealistic estimate of startup cost to be made, per recent pgsql-performancediscussion. In passing, fix a couple of bugs that had crept intomergejoinscansel: it was not quite up to speed for the task of estimatingdescending-order scans, which is a new requirement in 8.3.
1 parent8821612 commit9fd8843

File tree

4 files changed

+345
-148
lines changed

4 files changed

+345
-148
lines changed

‎src/backend/optimizer/path/costsize.c

Lines changed: 78 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
* Portions Copyright (c) 1994, Regents of the University of California
5555
*
5656
* IDENTIFICATION
57-
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $
57+
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
5858
*
5959
*-------------------------------------------------------------------------
6060
*/
@@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
13721372
doubleouter_path_rows=PATH_ROWS(outer_path);
13731373
doubleinner_path_rows=PATH_ROWS(inner_path);
13741374
doubleouter_rows,
1375-
inner_rows;
1375+
inner_rows,
1376+
outer_skip_rows,
1377+
inner_skip_rows;
13761378
doublemergejointuples,
13771379
rescannedtuples;
13781380
doublerescanratio;
1379-
Selectivityouterscansel,
1380-
innerscansel;
1381+
Selectivityouterstartsel,
1382+
outerendsel,
1383+
innerstartsel,
1384+
innerendsel;
13811385
Selectivityjoininfactor;
13821386
Pathsort_path;/* dummy for result of cost_sort */
13831387

@@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14441448
* A merge join will stop as soon as it exhausts either input stream
14451449
* (unless it's an outer join, in which case the outer side has to be
14461450
* scanned all the way anyway). Estimate fraction of the left and right
1447-
* inputs that will actually need to be scanned. We use only the first
1448-
* (most significant) merge clause for this purpose. Since
1449-
* mergejoinscansel() is a fairly expensive computation, we cache the
1450-
* results in the merge clause RestrictInfo.
1451+
* inputs that will actually need to be scanned. Likewise, we can
1452+
* estimate the number of rows that will be skipped before the first
1453+
* join pair is found, which should be factored into startup cost.
1454+
* We use only the first (most significant) merge clause for this purpose.
1455+
* Since mergejoinscansel() is a fairly expensive computation, we cache
1456+
* the results in the merge clause RestrictInfo.
14511457
*/
14521458
if (mergeclauses&&path->jpath.jointype!=JOIN_FULL)
14531459
{
@@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14781484
outer_path->parent->relids))
14791485
{
14801486
/* left side of clause is outer */
1481-
outerscansel=cache->leftscansel;
1482-
innerscansel=cache->rightscansel;
1487+
outerstartsel=cache->leftstartsel;
1488+
outerendsel=cache->leftendsel;
1489+
innerstartsel=cache->rightstartsel;
1490+
innerendsel=cache->rightendsel;
14831491
}
14841492
else
14851493
{
14861494
/* left side of clause is inner */
1487-
outerscansel=cache->rightscansel;
1488-
innerscansel=cache->leftscansel;
1495+
outerstartsel=cache->rightstartsel;
1496+
outerendsel=cache->rightendsel;
1497+
innerstartsel=cache->leftstartsel;
1498+
innerendsel=cache->leftendsel;
14891499
}
14901500
if (path->jpath.jointype==JOIN_LEFT)
1491-
outerscansel=1.0;
1501+
{
1502+
outerstartsel=0.0;
1503+
outerendsel=1.0;
1504+
}
14921505
elseif (path->jpath.jointype==JOIN_RIGHT)
1493-
innerscansel=1.0;
1506+
{
1507+
innerstartsel=0.0;
1508+
innerendsel=1.0;
1509+
}
14941510
}
14951511
else
14961512
{
14971513
/* cope with clauseless or full mergejoin */
1498-
outerscansel=innerscansel=1.0;
1514+
outerstartsel=innerstartsel=0.0;
1515+
outerendsel=innerendsel=1.0;
14991516
}
15001517

1501-
/* convert selectivity to row count; must scan at least one row */
1502-
outer_rows=clamp_row_est(outer_path_rows*outerscansel);
1503-
inner_rows=clamp_row_est(inner_path_rows*innerscansel);
1518+
/*
1519+
* Convert selectivities to row counts. We force outer_rows and
1520+
* inner_rows to be at least 1, but the skip_rows estimates can be zero.
1521+
*/
1522+
outer_skip_rows=rint(outer_path_rows*outerstartsel);
1523+
inner_skip_rows=rint(inner_path_rows*innerstartsel);
1524+
outer_rows=clamp_row_est(outer_path_rows*outerendsel);
1525+
inner_rows=clamp_row_est(inner_path_rows*innerendsel);
1526+
1527+
Assert(outer_skip_rows <=outer_rows);
1528+
Assert(inner_skip_rows <=inner_rows);
15041529

15051530
/*
15061531
* Readjust scan selectivities to account for above rounding. This is
15071532
* normally an insignificant effect, but when there are only a few rows in
15081533
* the inputs, failing to do this makes for a large percentage error.
15091534
*/
1510-
outerscansel=outer_rows /outer_path_rows;
1511-
innerscansel=inner_rows /inner_path_rows;
1535+
outerstartsel=outer_skip_rows /outer_path_rows;
1536+
innerstartsel=inner_skip_rows /inner_path_rows;
1537+
outerendsel=outer_rows /outer_path_rows;
1538+
innerendsel=inner_rows /inner_path_rows;
1539+
1540+
Assert(outerstartsel <=outerendsel);
1541+
Assert(innerstartsel <=innerendsel);
15121542

15131543
/* cost of source data */
15141544

@@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15221552
outer_path->parent->width,
15231553
-1.0);
15241554
startup_cost+=sort_path.startup_cost;
1555+
startup_cost+= (sort_path.total_cost-sort_path.startup_cost)
1556+
*outerstartsel;
15251557
run_cost+= (sort_path.total_cost-sort_path.startup_cost)
1526-
*outerscansel;
1558+
*(outerendsel-outerstartsel);
15271559
}
15281560
else
15291561
{
15301562
startup_cost+=outer_path->startup_cost;
1563+
startup_cost+= (outer_path->total_cost-outer_path->startup_cost)
1564+
*outerstartsel;
15311565
run_cost+= (outer_path->total_cost-outer_path->startup_cost)
1532-
*outerscansel;
1566+
*(outerendsel-outerstartsel);
15331567
}
15341568

15351569
if (innersortkeys)/* do we need to sort inner? */
@@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15421576
inner_path->parent->width,
15431577
-1.0);
15441578
startup_cost+=sort_path.startup_cost;
1579+
startup_cost+= (sort_path.total_cost-sort_path.startup_cost)
1580+
*innerstartsel*rescanratio;
15451581
run_cost+= (sort_path.total_cost-sort_path.startup_cost)
1546-
*innerscansel*rescanratio;
1582+
*(innerendsel-innerstartsel)*rescanratio;
15471583
}
15481584
else
15491585
{
15501586
startup_cost+=inner_path->startup_cost;
1587+
startup_cost+= (inner_path->total_cost-inner_path->startup_cost)
1588+
*innerstartsel*rescanratio;
15511589
run_cost+= (inner_path->total_cost-inner_path->startup_cost)
1552-
*innerscansel*rescanratio;
1590+
*(innerendsel-innerstartsel)*rescanratio;
15531591
}
15541592

15551593
/* CPU costs */
@@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15711609
* joininfactor.
15721610
*/
15731611
startup_cost+=merge_qual_cost.startup;
1612+
startup_cost+=merge_qual_cost.per_tuple*
1613+
(outer_skip_rows+inner_skip_rows*rescanratio);
15741614
run_cost+=merge_qual_cost.per_tuple*
1575-
(outer_rows+inner_rows*rescanratio);
1615+
((outer_rows-outer_skip_rows)+
1616+
(inner_rows-inner_skip_rows)*rescanratio);
15761617

15771618
/*
15781619
* For each tuple that gets through the mergejoin proper, we charge
@@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
15971638
{
15981639
MergeScanSelCache*cache;
15991640
ListCell*lc;
1600-
Selectivityleftscansel,
1601-
rightscansel;
1641+
Selectivityleftstartsel,
1642+
leftendsel,
1643+
rightstartsel,
1644+
rightendsel;
16021645
MemoryContextoldcontext;
16031646

16041647
/* Do we have this result already? */
@@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16171660
pathkey->pk_opfamily,
16181661
pathkey->pk_strategy,
16191662
pathkey->pk_nulls_first,
1620-
&leftscansel,
1621-
&rightscansel);
1663+
&leftstartsel,
1664+
&leftendsel,
1665+
&rightstartsel,
1666+
&rightendsel);
16221667

16231668
/* Cache the result in suitably long-lived workspace */
16241669
oldcontext=MemoryContextSwitchTo(root->planner_cxt);
@@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16271672
cache->opfamily=pathkey->pk_opfamily;
16281673
cache->strategy=pathkey->pk_strategy;
16291674
cache->nulls_first=pathkey->pk_nulls_first;
1630-
cache->leftscansel=leftscansel;
1631-
cache->rightscansel=rightscansel;
1675+
cache->leftstartsel=leftstartsel;
1676+
cache->leftendsel=leftendsel;
1677+
cache->rightstartsel=rightstartsel;
1678+
cache->rightendsel=rightendsel;
16321679

16331680
rinfo->scansel_cache=lappend(rinfo->scansel_cache,cache);
16341681

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp