5454 * Portions Copyright (c) 1994, Regents of the University of California
5555 *
5656 * IDENTIFICATION
57- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.172 2007/01/05 22:19:31 momjian Exp $
57+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $
5858 *
5959 *-------------------------------------------------------------------------
6060 */
@@ -1498,10 +1498,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
14981498double hashjointuples ;
14991499double outer_path_rows = PATH_ROWS (outer_path );
15001500double inner_path_rows = PATH_ROWS (inner_path );
1501- double outerbytes = relation_byte_size (outer_path_rows ,
1502- outer_path -> parent -> width );
1503- double innerbytes = relation_byte_size (inner_path_rows ,
1504- inner_path -> parent -> width );
15051501int num_hashclauses = list_length (hashclauses );
15061502int numbuckets ;
15071503int numbatches ;
@@ -1538,13 +1534,16 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
15381534
15391535/*
15401536 * Cost of computing hash function: must do it once per input tuple. We
1541- * charge one cpu_operator_cost for each column's hash function.
1537+ * charge one cpu_operator_cost for each column's hash function. Also,
1538+ * tack on one cpu_tuple_cost per inner row, to model the costs of
1539+ * inserting the row into the hashtable.
15421540 *
15431541 * XXX when a hashclause is more complex than a single operator, we really
15441542 * should charge the extra eval costs of the left or right side, as
15451543 * appropriate, here. This seems more work than it's worth at the moment.
15461544 */
1547- startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows ;
1545+ startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost )
1546+ * inner_path_rows ;
15481547run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows ;
15491548
15501549/* Get hash table size that executor would use for inner relation */
@@ -1624,8 +1623,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
16241623/*
16251624 * If inner relation is too big then we will need to "batch" the join,
16261625 * which implies writing and reading most of the tuples to disk an extra
1627- * time. Chargeone cost unit per page of I/O(correct since it should be
1628- *nice and sequential...) . Writing the inner rel counts as startup cost,
1626+ * time. Chargeseq_page_cost per page, since the I/O should be nice and
1627+ * sequential. Writing the inner rel counts as startup cost,
16291628 * all the rest as run cost.
16301629 */
16311630if (numbatches > 1 )
@@ -1635,8 +1634,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
16351634double innerpages = page_size (inner_path_rows ,
16361635inner_path -> parent -> width );
16371636
1638- startup_cost += innerpages ;
1639- run_cost += innerpages + 2 * outerpages ;
1637+ startup_cost += seq_page_cost * innerpages ;
1638+ run_cost += seq_page_cost * ( innerpages + 2 * outerpages ) ;
16401639}
16411640
16421641/* CPU costs */
@@ -1654,14 +1653,15 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
16541653 * The number of tuple comparisons needed is the number of outer tuples
16551654 * times the typical number of tuples in a hash bucket, which is the inner
16561655 * relation size times its bucketsize fraction. At each one, we need to
1657- * evaluate the hashjoin quals. (Note: charging the full qual eval cost
1658- * at each tuple is pessimistic, since we don't evaluate the quals unless
1659- * the hash values match exactly.)
1656+ * evaluate the hashjoin quals. But actually, charging the full qual eval
1657+ * cost at each tuple is pessimistic, since we don't evaluate the quals
1658+ * unless the hash values match exactly. For lack of a better idea, halve
1659+ * the cost estimate to allow for that.
16601660 */
16611661startup_cost += hash_qual_cost .startup ;
16621662run_cost += hash_qual_cost .per_tuple *
16631663outer_path_rows * clamp_row_est (inner_path_rows * innerbucketsize )*
1664- joininfactor ;
1664+ joininfactor * 0.5 ;
16651665
16661666/*
16671667 * For each tuple that gets through the hashjoin proper, we charge
@@ -1673,22 +1673,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
16731673cpu_per_tuple = cpu_tuple_cost + qp_qual_cost .per_tuple ;
16741674run_cost += cpu_per_tuple * hashjointuples * joininfactor ;
16751675
1676- /*
1677- * Bias against putting larger relation on inside.We don't want an
1678- * absolute prohibition, though, since larger relation might have better
1679- * bucketsize --- and we can't trust the size estimates unreservedly,
1680- * anyway.Instead, inflate the run cost by the square root of the size
1681- * ratio. (Why square root? No real good reason, but it seems
1682- * reasonable...)
1683- *
1684- * Note: before 7.4 we implemented this by inflating startup cost; but if
1685- * there's a disable_cost component in the input paths' startup cost, that
1686- * unfairly penalizes the hash. Probably it'd be better to keep track of
1687- * disable penalty separately from cost.
1688- */
1689- if (innerbytes > outerbytes && outerbytes > 0 )
1690- run_cost *=sqrt (innerbytes /outerbytes );
1691-
16921676path -> jpath .path .startup_cost = startup_cost ;
16931677path -> jpath .path .total_cost = startup_cost + run_cost ;
16941678}