4949 * Portions Copyright (c) 1994, Regents of the University of California
5050 *
5151 * IDENTIFICATION
52- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
52+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
5353 *
5454 *-------------------------------------------------------------------------
5555 */
@@ -104,7 +104,8 @@ boolenable_hashjoin = true;
104104static Selectivity estimate_hash_bucketsize (Query * root ,Var * var ,
105105int nbuckets );
106106static bool cost_qual_eval_walker (Node * node ,QualCost * total );
107- static Selectivity approx_selectivity (Query * root ,List * quals );
107+ static Selectivity approx_selectivity (Query * root ,List * quals ,
108+ JoinType jointype );
108109static void set_rel_width (Query * root ,RelOptInfo * rel );
109110static double relation_byte_size (double tuples ,int width );
110111static double page_size (double tuples ,int width );
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
697698 */
698699if (path -> jointype == JOIN_IN )
699700{
700- Selectivity qual_selec = approx_selectivity (root ,restrictlist );
701+ Selectivity qual_selec = approx_selectivity (root ,restrictlist ,
702+ path -> jointype );
701703double qptuples ;
702704
703705qptuples = ceil (qual_selec * outer_path_rows * inner_path_rows );
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
816818 * Note: it's probably bogus to use the normal selectivity calculation
817819 * here when either the outer or inner path is a UniquePath.
818820 */
819- merge_selec = approx_selectivity (root ,mergeclauses );
821+ merge_selec = approx_selectivity (root ,mergeclauses ,
822+ path -> jpath .jointype );
820823cost_qual_eval (& merge_qual_cost ,mergeclauses );
821824qpquals = set_ptrDifference (restrictlist ,mergeclauses );
822- qp_selec = approx_selectivity (root ,qpquals );
825+ qp_selec = approx_selectivity (root ,qpquals ,
826+ path -> jpath .jointype );
823827cost_qual_eval (& qp_qual_cost ,qpquals );
824828freeList (qpquals );
825829
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
10441048 * Note: it's probably bogus to use the normal selectivity calculation
10451049 * here when either the outer or inner path is a UniquePath.
10461050 */
1047- hash_selec = approx_selectivity (root ,hashclauses );
1051+ hash_selec = approx_selectivity (root ,hashclauses ,
1052+ path -> jpath .jointype );
10481053cost_qual_eval (& hash_qual_cost ,hashclauses );
10491054qpquals = set_ptrDifference (restrictlist ,hashclauses );
1050- qp_selec = approx_selectivity (root ,qpquals );
1055+ qp_selec = approx_selectivity (root ,qpquals ,
1056+ path -> jpath .jointype );
10511057cost_qual_eval (& qp_qual_cost ,qpquals );
10521058freeList (qpquals );
10531059
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
10841090 * Determine bucketsize fraction for inner relation. We use the
10851091 * smallest bucketsize estimated for any individual hashclause;
10861092 * this is undoubtedly conservative.
1093+ *
1094+ * BUT: if inner relation has been unique-ified, we can assume it's
1095+ * good for hashing. This is important both because it's the right
1096+ * answer, and because we avoid contaminating the cache with a value
1097+ * that's wrong for non-unique-ified paths.
10871098 */
1088- innerbucketsize = 1.0 ;
1089- foreach (hcl ,hashclauses )
1099+ if (IsA (inner_path ,UniquePath ))
1100+ innerbucketsize = 1.0 /virtualbuckets ;
1101+ else
10901102{
1091- RestrictInfo * restrictinfo = (RestrictInfo * )lfirst (hcl );
1092- Selectivity thisbucketsize ;
1103+ innerbucketsize = 1.0 ;
1104+ foreach (hcl ,hashclauses )
1105+ {
1106+ RestrictInfo * restrictinfo = (RestrictInfo * )lfirst (hcl );
1107+ Selectivity thisbucketsize ;
10931108
1094- Assert (IsA (restrictinfo ,RestrictInfo ));
1109+ Assert (IsA (restrictinfo ,RestrictInfo ));
10951110
1096- /*
1097- * First we have to figure out which side of the hashjoin clause is the
1098- * inner side.
1099- *
1100- * Since we tend to visit the same clauses over and over when planning
1101- * a large query, we cache the bucketsize estimate in the RestrictInfo
1102- * node to avoid repeated lookups of statistics.
1103- */
1104- if (is_subseti (restrictinfo -> right_relids ,inner_path -> parent -> relids ))
1105- {
1106- /* righthand side is inner */
1107- thisbucketsize = restrictinfo -> right_bucketsize ;
1108- if (thisbucketsize < 0 )
1111+ /*
1112+ * First we have to figure out which side of the hashjoin clause
1113+ * is the inner side.
1114+ *
1115+ * Since we tend to visit the same clauses over and over when
1116+ * planning a large query, we cache the bucketsize estimate in the
1117+ * RestrictInfo node to avoid repeated lookups of statistics.
1118+ */
1119+ if (is_subseti (restrictinfo -> right_relids ,
1120+ inner_path -> parent -> relids ))
11091121{
1110- /* not cached yet */
1111- thisbucketsize = estimate_hash_bucketsize (root ,
1122+ /* righthand side is inner */
1123+ thisbucketsize = restrictinfo -> right_bucketsize ;
1124+ if (thisbucketsize < 0 )
1125+ {
1126+ /* not cached yet */
1127+ thisbucketsize =
1128+ estimate_hash_bucketsize (root ,
11121129(Var * )get_rightop (restrictinfo -> clause ),
1113- virtualbuckets );
1114- restrictinfo -> right_bucketsize = thisbucketsize ;
1130+ virtualbuckets );
1131+ restrictinfo -> right_bucketsize = thisbucketsize ;
1132+ }
11151133}
1116- }
1117- else
1118- {
1119- Assert (is_subseti (restrictinfo -> left_relids ,
1120- inner_path -> parent -> relids ));
1121- /* lefthand side is inner */
1122- thisbucketsize = restrictinfo -> left_bucketsize ;
1123- if (thisbucketsize < 0 )
1134+ else
11241135{
1125- /* not cached yet */
1126- thisbucketsize = estimate_hash_bucketsize (root ,
1136+ Assert (is_subseti (restrictinfo -> left_relids ,
1137+ inner_path -> parent -> relids ));
1138+ /* lefthand side is inner */
1139+ thisbucketsize = restrictinfo -> left_bucketsize ;
1140+ if (thisbucketsize < 0 )
1141+ {
1142+ /* not cached yet */
1143+ thisbucketsize =
1144+ estimate_hash_bucketsize (root ,
11271145(Var * )get_leftop (restrictinfo -> clause ),
1128- virtualbuckets );
1129- restrictinfo -> left_bucketsize = thisbucketsize ;
1146+ virtualbuckets );
1147+ restrictinfo -> left_bucketsize = thisbucketsize ;
1148+ }
11301149}
1131- }
11321150
1133- if (innerbucketsize > thisbucketsize )
1134- innerbucketsize = thisbucketsize ;
1151+ if (innerbucketsize > thisbucketsize )
1152+ innerbucketsize = thisbucketsize ;
1153+ }
11351154}
11361155
11371156/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
15571576 * seems OK to live with the approximation.
15581577 */
15591578static Selectivity
1560- approx_selectivity (Query * root ,List * quals )
1579+ approx_selectivity (Query * root ,List * quals , JoinType jointype )
15611580{
15621581Selectivity total = 1.0 ;
15631582List * l ;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
15821601restrictinfo -> this_selec =
15831602clause_selectivity (root ,
15841603 (Node * )restrictinfo -> clause ,
1585- 0 );
1604+ 0 ,
1605+ jointype );
15861606selec = restrictinfo -> this_selec ;
15871607}
15881608else
15891609{
15901610/* If it's a bare expression, must always do it the hard way */
1591- selec = clause_selectivity (root ,qual ,0 );
1611+ selec = clause_selectivity (root ,qual ,0 , jointype );
15921612}
15931613total *=selec ;
15941614}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
16201640temp = rel -> tuples *
16211641restrictlist_selectivity (root ,
16221642rel -> baserestrictinfo ,
1623- lfirsti (rel -> relids ));
1643+ lfirsti (rel -> relids ),
1644+ JOIN_INNER );
16241645
16251646/*
16261647 * Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16821703 */
16831704selec = restrictlist_selectivity (root ,
16841705restrictlist ,
1685- 0 );
1706+ 0 ,
1707+ jointype );
16861708
16871709/*
16881710 * Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16941716 * For JOIN_IN and variants, the Cartesian product is figured with
16951717 * respect to a unique-ified input, and then we can clamp to the size
16961718 * of the other input.
1697- * XXX it's not at all clear that the ordinary selectivity calculation
1698- * is appropriate in this case.
16991719 */
17001720switch (jointype )
17011721{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
17981818temp = rel -> tuples *
17991819restrictlist_selectivity (root ,
18001820rel -> baserestrictinfo ,
1801- lfirsti (rel -> relids ));
1821+ lfirsti (rel -> relids ),
1822+ JOIN_INNER );
18021823
18031824/*
18041825 * Force estimate to be at least one row, to make explain output look