88 *
99 *
1010 * IDENTIFICATION
11- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.21 2000/01/26 05:56:39 momjian Exp $
11+ * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
@@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
3232static Expr * and_normalize (List * andlist );
3333static Expr * qual_cleanup (Expr * qual );
3434static List * remove_duplicates (List * list );
35- static int count_bool_nodes (Expr * qual );
35+ static void count_bool_nodes (Expr * qual ,double * nodes ,
36+ double * cnfnodes ,double * dnfnodes );
3637
3738/*****************************************************************************
3839 *
@@ -84,12 +85,12 @@ static intcount_bool_nodes(Expr *qual);
8485List *
8586canonicalize_qual (Expr * qual ,bool removeAndFlag )
8687{
87- Expr * newqual ,
88- * cnfqual ,
89- * dnfqual ;
90- int qualcnt ,
91- cnfcnt ,
92- dnfcnt ;
88+ Expr * newqual ;
89+ double nodes ,
90+ cnfnodes ,
91+ dnfnodes ;
92+ bool cnfok ,
93+ dnfok ;
9394
9495if (qual == NULL )
9596return NIL ;
@@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
9899 * This improvement is always worthwhile, so do it unconditionally.
99100 */
100101qual = flatten_andors (qual );
102+
101103/* Push down NOTs. We do this only in the top-level boolean
102104 * expression, without examining arguments of operators/functions.
103105 * Even so, it might not be a win if we are unable to find negators
104- * for all the operators involved;so wekeep the flattened-but-not -
105- *NOT-pushed qual as the reference point for comparsions.
106+ * for all the operators involved;perhaps weshould compare before -
107+ *and-after tree sizes?
106108 */
107109newqual = find_nots (qual );
108- /*
109- * Generate both CNF and DNF forms from newqual.
110- */
111- /* Normalize into conjunctive normal form, and clean up the result. */
112- cnfqual = qual_cleanup (find_ors (newqual ));
113- /* Likewise for DNF */
114- dnfqual = qual_cleanup (find_ands (newqual ));
115110
116111/*
117- *Now, choose whether toreturn qual, cnfqual , ordnfqual .
112+ *Choose whether toconvert to CNF, or DNF , orleave well enough alone .
118113 *
119- * First heuristic is to forget about either CNF or DNF if it shows
114+ * We make an approximate estimate of the number of bottom-level nodes
115+ * that will appear in the CNF and DNF forms of the query.
116+ */
117+ count_bool_nodes (newqual ,& nodes ,& cnfnodes ,& dnfnodes );
118+ /*
119+ * First heuristic is to forget about *both* normal forms if there are
120+ * a huge number of terms in the qual clause. This would only happen
121+ * with machine-generated queries, presumably; and most likely such
122+ * a query is already in either CNF or DNF.
123+ */
124+ cnfok = dnfok = true;
125+ if (nodes >=500.0 )
126+ cnfok = dnfok = false;
127+ /*
128+ * Second heuristic is to forget about either CNF or DNF if it shows
120129 * unreasonable growth compared to the original form of the qual,
121130 * where we define "unreasonable" a tad arbitrarily as 4x more
122131 * operators.
123132 */
124- qualcnt = count_bool_nodes (qual );
125- cnfcnt = count_bool_nodes (cnfqual );
126- dnfcnt = count_bool_nodes (dnfqual );
127- if (cnfcnt >=4 * qualcnt )
128- cnfqual = NULL ;/* mark CNF not usable */
129- if (dnfcnt >=4 * qualcnt )
130- dnfqual = NULL ;/* mark DNF not usable */
131-
133+ if (cnfnodes >=4.0 * nodes )
134+ cnfok = false;
135+ if (dnfnodes >=4.0 * nodes )
136+ dnfok = false;
132137/*
133- * Second heuristic is to prefer DNF if only one relation is mentioned
134- * and it is smaller than the CNF representation.
138+ * Third heuristic is to prefer DNF if top level is already an OR,
139+ * and only one relation is mentioned, and DNF is no larger than
140+ * the CNF representation. (Pretty shaky; can we improve on this?)
135141 */
136- if (dnfqual && dnfcnt < cnfcnt && NumRelids ((Node * )dnfqual ) == 1 )
137- cnfqual = NULL ;
138-
142+ if (dnfok && dnfnodes <= cnfnodes && or_clause ((Node * )newqual ) &&
143+ NumRelids (( Node * ) newqual ) == 1 )
144+ cnfok = false;
139145/*
140146 * Otherwise, we prefer CNF.
141147 *
142148 * XXX obviously, these rules could be improved upon.
143149 */
144-
145- /* pick preferred survivor */
146- if (cnfqual )
147- newqual = cnfqual ;
148- else if (dnfqual )
149- newqual = dnfqual ;
150- else
151- newqual = qual ;
150+ if (cnfok )
151+ {
152+ /* Normalize into conjunctive normal form, and clean up the result. */
153+ newqual = qual_cleanup (find_ors (newqual ));
154+ }
155+ else if (dnfok )
156+ {
157+ /* Normalize into disjunctive normal form, and clean up the result. */
158+ newqual = qual_cleanup (find_ands (newqual ));
159+ }
152160
153161/* Convert to implicit-AND list if requested */
154162if (removeAndFlag )
@@ -828,27 +836,72 @@ remove_duplicates(List *list)
828836/*
829837 * count_bool_nodes
830838 *Support for heuristics in canonicalize_qual(): count the
831- *number of nodes in the top level AND/OR/NOT part of a qual tree
839+ *number of nodes that are inputs to the top level AND/OR/NOT
840+ *part of a qual tree, and estimate how many nodes will appear
841+ *in the CNF'ified or DNF'ified equivalent of the expression.
842+ *
843+ * This is just an approximate calculation; it doesn't deal with NOTs
844+ * very well, and of course it cannot detect possible simplifications
845+ * from eliminating duplicate subclauses. The idea is just to cheaply
846+ * determine whether CNF will be markedly worse than DNF or vice versa.
847+ *
848+ * The counts/estimates are represented as doubles to avoid risk of overflow.
832849 */
833- static int
834- count_bool_nodes (Expr * qual )
850+ static void
851+ count_bool_nodes (Expr * qual ,
852+ double * nodes ,
853+ double * cnfnodes ,
854+ double * dnfnodes )
835855{
836- if ( qual == NULL )
837- return 0 ;
856+ List * temp ;
857+ double subnodes , subcnfnodes , subdnfnodes ;
838858
839- if (and_clause ((Node * )qual )||
840- or_clause ((Node * )qual ))
859+ if (and_clause ((Node * )qual ))
841860{
842- int sum = 1 ; /* for the and/or itself */
843- List * temp ;
861+ * nodes = * cnfnodes = 0.0 ;
862+ * dnfnodes = 1.0 ; /* DNF nodes will be product of sub-counts */
844863
845864foreach (temp ,qual -> args )
846- sum += count_bool_nodes (lfirst (temp ));
865+ {
866+ count_bool_nodes (lfirst (temp ),
867+ & subnodes ,& subcnfnodes ,& subdnfnodes );
868+ * nodes += subnodes ;
869+ * cnfnodes += subcnfnodes ;
870+ * dnfnodes *=subdnfnodes ;
871+ }
872+ /* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
873+ * simple ones with count 1. Make sure dnfnodes isn't too small.
874+ */
875+ if (* dnfnodes < * cnfnodes )
876+ * dnfnodes = * cnfnodes ;
877+ }
878+ else if (or_clause ((Node * )qual ))
879+ {
880+ * nodes = * dnfnodes = 0.0 ;
881+ * cnfnodes = 1.0 ;/* CNF nodes will be product of sub-counts */
847882
848- return sum ;
883+ foreach (temp ,qual -> args )
884+ {
885+ count_bool_nodes (lfirst (temp ),
886+ & subnodes ,& subcnfnodes ,& subdnfnodes );
887+ * nodes += subnodes ;
888+ * cnfnodes *=subcnfnodes ;
889+ * dnfnodes += subdnfnodes ;
890+ }
891+ /* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
892+ * simple ones with count 1. Make sure cnfnodes isn't too small.
893+ */
894+ if (* cnfnodes < * dnfnodes )
895+ * cnfnodes = * dnfnodes ;
849896}
850897else if (not_clause ((Node * )qual ))
851- return count_bool_nodes (get_notclausearg (qual ))+ 1 ;
898+ {
899+ count_bool_nodes (get_notclausearg (qual ),
900+ nodes ,cnfnodes ,dnfnodes );
901+ }
852902else
853- return 1 ;/* anything else counts 1 for my purposes */
903+ {
904+ /* anything else counts 1 for my purposes */
905+ * nodes = * cnfnodes = * dnfnodes = 1.0 ;
906+ }
854907}