77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.33 1999/02/15 03:22:04 momjian Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.34 1999/04/05 02:07:07 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
3737extern int NBuffers ;
3838
3939static int compute_attribute_width (TargetEntry * tlistentry );
40+ static double relation_byte_size (int tuples ,int width );
4041static double base_log (double x ,double b );
4142static int compute_targetlist_width (List * targetlist );
4243
@@ -323,27 +324,35 @@ cost_hashjoin(Cost outercost,
323324Cost temp = 0 ;
324325int outerpages = page_size (outersize ,outerwidth );
325326int innerpages = page_size (innersize ,innerwidth );
326- int nrun = ceil ((double )outerpages / (double )NBuffers );
327327
328- if (outerpages < innerpages )
329- return _disable_cost_ ;
330328if (!_enable_hashjoin_ )
331329temp += _disable_cost_ ;
332330
333- /*
334- * temp += outercost + (nrun + 1) * innercost;
335- *
336- * the innercost shouldn't be used it. Instead the cost of hashing the
337- * innerpath should be used
338- *
339- * ASSUME innercost is 1 for now -- a horrible hack - jolly temp +=
340- * outercost + (nrun + 1);
331+ /* Bias against putting larger relation on inside.
341332 *
342- * But we must add innercost to result.- vadim 04/24/97
333+ * Code used to use "outerpages < innerpages" but that has
334+ * poor resolution when both relations are small.
343335 */
344- temp += outercost + innercost + (nrun + 1 );
336+ if (relation_byte_size (outersize ,outerwidth )<
337+ relation_byte_size (innersize ,innerwidth ))
338+ temp += _disable_cost_ ;
339+
340+ /* cost of source data */
341+ temp += outercost + innercost ;
342+
343+ /* cost of computing hash function: must do it once per tuple */
344+ temp += _cpu_page_wight_ * (outersize + innersize );
345+
346+ /* cost of main-memory hashtable */
347+ temp += (innerpages < NBuffers ) ?innerpages :NBuffers ;
348+
349+ /* if inner relation is too big then we will need to "batch" the join,
350+ * which implies writing and reading most of the tuples to disk an
351+ * extra time.
352+ */
353+ if (innerpages > NBuffers )
354+ temp += 2 * (outerpages + innerpages );
345355
346- temp += _cpu_page_wight_ * (outersize + nrun * innersize );
347356Assert (temp >=0 );
348357
349358return temp ;
@@ -458,6 +467,19 @@ compute_joinrel_size(JoinPath *joinpath)
458467return temp1 ;
459468}
460469
470+ /*
471+ * relation_byte_size
472+ * Estimate the storage space in bytes for a given number of tuples
473+ * of a given width (size in bytes).
474+ * To avoid overflow with big relations, result is a double.
475+ */
476+
477+ static double
478+ relation_byte_size (int tuples ,int width )
479+ {
480+ return ((double )tuples )* ((double ) (width + sizeof (HeapTupleData )));
481+ }
482+
461483/*
462484 * page_size
463485 * Returns an estimate of the number of pages covered by a given
@@ -466,10 +488,9 @@ compute_joinrel_size(JoinPath *joinpath)
466488int
467489page_size (int tuples ,int width )
468490{
469- int temp = 0 ;
491+ int temp ;
470492
471- temp = ceil ((double ) (tuples * (width + sizeof (HeapTupleData )))
472- /BLCKSZ );
493+ temp = (int )ceil (relation_byte_size (tuples ,width ) /BLCKSZ );
473494Assert (temp >=0 );
474495return temp ;
475496}