4242 * Portions Copyright (c) 1994, Regents of the University of California
4343 *
4444 * IDENTIFICATION
45- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
45+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $
4646 *
4747 *-------------------------------------------------------------------------
4848 */
@@ -873,6 +873,9 @@ estimate_hash_bucketsize(Query *root, Var *var)
873873if (ndistinct < 0.0 )
874874ndistinct = - ndistinct * rel -> tuples ;
875875
876+ /* Also compute avg freq of all distinct data values in raw relation */
877+ avgfreq = (1.0 - stats -> stanullfrac ) /ndistinct ;
878+
876879/*
877880 * Adjust ndistinct to account for restriction clauses. Observe we are
878881 * assuming that the data distribution is affected uniformly by the
@@ -883,17 +886,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
883886 */
884887ndistinct *=rel -> rows /rel -> tuples ;
885888
886- /*
887- * Discourage use of hash join if there seem not to be very many distinct
888- * data values. The threshold here is somewhat arbitrary, as is the
889- * fraction used to "discourage" the choice.
890- */
891- if (ndistinct < 50.0 )
892- {
893- ReleaseSysCache (tuple );
894- return 0.5 ;
895- }
896-
897889/*
898890 * Form initial estimate of bucketsize fraction. Here we use rel->rows,
899891 * ie the number of rows after applying restriction clauses, because
@@ -903,8 +895,8 @@ estimate_hash_bucketsize(Query *root, Var *var)
903895estfract = (double )NTUP_PER_BUCKET /rel -> rows ;
904896
905897/*
906- * Adjust estimated bucketsize if too few distinct valuesto fill
907- * all the buckets.
898+ * Adjust estimated bucketsize if too few distinct values(after
899+ *restriction clauses) to fill all the buckets.
908900 */
909901needdistinct = rel -> rows / (double )NTUP_PER_BUCKET ;
910902if (ndistinct < needdistinct )
@@ -931,8 +923,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
931923/*
932924 * Adjust estimated bucketsize upward to account for skewed distribution.
933925 */
934- avgfreq = (1.0 - stats -> stanullfrac ) /ndistinct ;
935-
936926if (avgfreq > 0.0 && mcvfreq > avgfreq )
937927estfract *=mcvfreq /avgfreq ;
938928