5454 * Portions Copyright (c) 1994, Regents of the University of California
5555 *
5656 * IDENTIFICATION
57- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.189 2007/11/15 22:25:15 momjian Exp $
57+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.190 2007/12/08 21:05:11 tgl Exp $
5858 *
5959 *-------------------------------------------------------------------------
6060 */
@@ -1372,12 +1372,16 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
13721372double outer_path_rows = PATH_ROWS (outer_path );
13731373double inner_path_rows = PATH_ROWS (inner_path );
13741374double outer_rows ,
1375- inner_rows ;
1375+ inner_rows ,
1376+ outer_skip_rows ,
1377+ inner_skip_rows ;
13761378double mergejointuples ,
13771379rescannedtuples ;
13781380double rescanratio ;
1379- Selectivity outerscansel ,
1380- innerscansel ;
1381+ Selectivity outerstartsel ,
1382+ outerendsel ,
1383+ innerstartsel ,
1384+ innerendsel ;
13811385Selectivity joininfactor ;
13821386Path sort_path ;/* dummy for result of cost_sort */
13831387
@@ -1444,10 +1448,12 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14441448 * A merge join will stop as soon as it exhausts either input stream
14451449 * (unless it's an outer join, in which case the outer side has to be
14461450 * scanned all the way anyway). Estimate fraction of the left and right
1447- * inputs that will actually need to be scanned. We use only the first
1448- * (most significant) merge clause for this purpose. Since
1449- * mergejoinscansel() is a fairly expensive computation, we cache the
1450- * results in the merge clause RestrictInfo.
1451+ * inputs that will actually need to be scanned. Likewise, we can
1452+ * estimate the number of rows that will be skipped before the first
1453+ * join pair is found, which should be factored into startup cost.
1454+ * We use only the first (most significant) merge clause for this purpose.
1455+ * Since mergejoinscansel() is a fairly expensive computation, we cache
1456+ * the results in the merge clause RestrictInfo.
14511457 */
14521458if (mergeclauses && path -> jpath .jointype != JOIN_FULL )
14531459{
@@ -1478,37 +1484,61 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
14781484outer_path -> parent -> relids ))
14791485{
14801486/* left side of clause is outer */
1481- outerscansel = cache -> leftscansel ;
1482- innerscansel = cache -> rightscansel ;
1487+ outerstartsel = cache -> leftstartsel ;
1488+ outerendsel = cache -> leftendsel ;
1489+ innerstartsel = cache -> rightstartsel ;
1490+ innerendsel = cache -> rightendsel ;
14831491}
14841492else
14851493{
14861494/* left side of clause is inner */
1487- outerscansel = cache -> rightscansel ;
1488- innerscansel = cache -> leftscansel ;
1495+ outerstartsel = cache -> rightstartsel ;
1496+ outerendsel = cache -> rightendsel ;
1497+ innerstartsel = cache -> leftstartsel ;
1498+ innerendsel = cache -> leftendsel ;
14891499}
14901500if (path -> jpath .jointype == JOIN_LEFT )
1491- outerscansel = 1.0 ;
1501+ {
1502+ outerstartsel = 0.0 ;
1503+ outerendsel = 1.0 ;
1504+ }
14921505else if (path -> jpath .jointype == JOIN_RIGHT )
1493- innerscansel = 1.0 ;
1506+ {
1507+ innerstartsel = 0.0 ;
1508+ innerendsel = 1.0 ;
1509+ }
14941510}
14951511else
14961512{
14971513/* cope with clauseless or full mergejoin */
1498- outerscansel = innerscansel = 1.0 ;
1514+ outerstartsel = innerstartsel = 0.0 ;
1515+ outerendsel = innerendsel = 1.0 ;
14991516}
15001517
1501- /* convert selectivity to row count; must scan at least one row */
1502- outer_rows = clamp_row_est (outer_path_rows * outerscansel );
1503- inner_rows = clamp_row_est (inner_path_rows * innerscansel );
1518+ /*
1519+ * Convert selectivities to row counts. We force outer_rows and
1520+ * inner_rows to be at least 1, but the skip_rows estimates can be zero.
1521+ */
1522+ outer_skip_rows = rint (outer_path_rows * outerstartsel );
1523+ inner_skip_rows = rint (inner_path_rows * innerstartsel );
1524+ outer_rows = clamp_row_est (outer_path_rows * outerendsel );
1525+ inner_rows = clamp_row_est (inner_path_rows * innerendsel );
1526+
1527+ Assert (outer_skip_rows <=outer_rows );
1528+ Assert (inner_skip_rows <=inner_rows );
15041529
15051530/*
15061531 * Readjust scan selectivities to account for above rounding. This is
15071532 * normally an insignificant effect, but when there are only a few rows in
15081533 * the inputs, failing to do this makes for a large percentage error.
15091534 */
1510- outerscansel = outer_rows /outer_path_rows ;
1511- innerscansel = inner_rows /inner_path_rows ;
1535+ outerstartsel = outer_skip_rows /outer_path_rows ;
1536+ innerstartsel = inner_skip_rows /inner_path_rows ;
1537+ outerendsel = outer_rows /outer_path_rows ;
1538+ innerendsel = inner_rows /inner_path_rows ;
1539+
1540+ Assert (outerstartsel <=outerendsel );
1541+ Assert (innerstartsel <=innerendsel );
15121542
15131543/* cost of source data */
15141544
@@ -1522,14 +1552,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15221552outer_path -> parent -> width ,
15231553-1.0 );
15241554startup_cost += sort_path .startup_cost ;
1555+ startup_cost += (sort_path .total_cost - sort_path .startup_cost )
1556+ * outerstartsel ;
15251557run_cost += (sort_path .total_cost - sort_path .startup_cost )
1526- * outerscansel ;
1558+ * ( outerendsel - outerstartsel ) ;
15271559}
15281560else
15291561{
15301562startup_cost += outer_path -> startup_cost ;
1563+ startup_cost += (outer_path -> total_cost - outer_path -> startup_cost )
1564+ * outerstartsel ;
15311565run_cost += (outer_path -> total_cost - outer_path -> startup_cost )
1532- * outerscansel ;
1566+ * ( outerendsel - outerstartsel ) ;
15331567}
15341568
15351569if (innersortkeys )/* do we need to sort inner? */
@@ -1542,14 +1576,18 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15421576inner_path -> parent -> width ,
15431577-1.0 );
15441578startup_cost += sort_path .startup_cost ;
1579+ startup_cost += (sort_path .total_cost - sort_path .startup_cost )
1580+ * innerstartsel * rescanratio ;
15451581run_cost += (sort_path .total_cost - sort_path .startup_cost )
1546- * innerscansel * rescanratio ;
1582+ * ( innerendsel - innerstartsel ) * rescanratio ;
15471583}
15481584else
15491585{
15501586startup_cost += inner_path -> startup_cost ;
1587+ startup_cost += (inner_path -> total_cost - inner_path -> startup_cost )
1588+ * innerstartsel * rescanratio ;
15511589run_cost += (inner_path -> total_cost - inner_path -> startup_cost )
1552- * innerscansel * rescanratio ;
1590+ * ( innerendsel - innerstartsel ) * rescanratio ;
15531591}
15541592
15551593/* CPU costs */
@@ -1571,8 +1609,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
15711609 * joininfactor.
15721610 */
15731611startup_cost += merge_qual_cost .startup ;
1612+ startup_cost += merge_qual_cost .per_tuple *
1613+ (outer_skip_rows + inner_skip_rows * rescanratio );
15741614run_cost += merge_qual_cost .per_tuple *
1575- (outer_rows + inner_rows * rescanratio );
1615+ ((outer_rows - outer_skip_rows )+
1616+ (inner_rows - inner_skip_rows )* rescanratio );
15761617
15771618/*
15781619 * For each tuple that gets through the mergejoin proper, we charge
@@ -1597,8 +1638,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
15971638{
15981639MergeScanSelCache * cache ;
15991640ListCell * lc ;
1600- Selectivity leftscansel ,
1601- rightscansel ;
1641+ Selectivity leftstartsel ,
1642+ leftendsel ,
1643+ rightstartsel ,
1644+ rightendsel ;
16021645MemoryContext oldcontext ;
16031646
16041647/* Do we have this result already? */
@@ -1617,8 +1660,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16171660pathkey -> pk_opfamily ,
16181661pathkey -> pk_strategy ,
16191662pathkey -> pk_nulls_first ,
1620- & leftscansel ,
1621- & rightscansel );
1663+ & leftstartsel ,
1664+ & leftendsel ,
1665+ & rightstartsel ,
1666+ & rightendsel );
16221667
16231668/* Cache the result in suitably long-lived workspace */
16241669oldcontext = MemoryContextSwitchTo (root -> planner_cxt );
@@ -1627,8 +1672,10 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
16271672cache -> opfamily = pathkey -> pk_opfamily ;
16281673cache -> strategy = pathkey -> pk_strategy ;
16291674cache -> nulls_first = pathkey -> pk_nulls_first ;
1630- cache -> leftscansel = leftscansel ;
1631- cache -> rightscansel = rightscansel ;
1675+ cache -> leftstartsel = leftstartsel ;
1676+ cache -> leftendsel = leftendsel ;
1677+ cache -> rightstartsel = rightstartsel ;
1678+ cache -> rightendsel = rightendsel ;
16321679
16331680rinfo -> scansel_cache = lappend (rinfo -> scansel_cache ,cache );
16341681