77 *
88 *
99 * IDENTIFICATION
10- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.19 1999/09/12 18:08:17 tgl Exp $
10+ * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.20 1999/09/13 00:17:13 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -29,6 +29,9 @@ static Expr *find_ors(Expr *qual);
2929static Expr * or_normalize (List * orlist );
3030static Expr * find_ands (Expr * qual );
3131static Expr * and_normalize (List * andlist );
32+ static Expr * qual_cleanup (Expr * qual );
33+ static List * remove_duplicates (List * list );
34+ static int count_bool_nodes (Expr * qual );
3235
3336/*****************************************************************************
3437 *
@@ -37,20 +40,124 @@ static Expr *and_normalize(List *andlist);
3740 *These routines convert an arbitrary boolean expression into
3841 *conjunctive normal form or disjunctive normal form.
3942 *
40- *The result of these routines differs from a "true" CNF/DNF in that
41- *we do not bother to detect common subexpressions; e.g., ("AND" A A)
42- *does not get simplified to A. Testing for identical subexpressions
43- *is a waste of time if the query is written intelligently, and it
44- *takes an unreasonable amount of time if there are many subexpressions
45- *(since it's roughly O(N^2) in the number of subexpressions).
43+ *Normalization is only carried out in the top AND/OR/NOT portion
44+ *of the given tree; we do not attempt to normalize boolean expressions
45+ *that may appear as arguments of operators or functions in the tree.
4646 *
47- *Because of that restriction, it would be unwise to apply dnfify()
48- *to the result of cnfify() or vice versa. Instead apply both to
49- *the original user-written qual expression.
47+ *Query qualifications (WHERE clauses) are ordinarily transformed into
48+ *CNF, ie, AND-of-ORs form, because then the optimizer can use any one
49+ *of the independent AND clauses as a filtering qualification. However,
50+ *quals that are naturally expressed as OR-of-ANDs can suffer an
51+ *exponential growth in size in this transformation, so we also consider
52+ *converting to DNF (OR-of-ANDs), and we may also leave well enough alone
53+ *if both transforms cause unreasonable growth. The OR-of-ANDs format
54+ *is useful for indexscan implementation, so we prefer that format when
55+ *there is just one relation involved.
5056 *
57+ *canonicalize_qual() does "smart" conversion to either CNF or DNF, per
58+ *the above considerations, while cnfify() and dnfify() simply perform
59+ *the demanded transformation. The latter two may become dead code
60+ *eventually.
5161 *****************************************************************************/
5262
5363
64+ /*
65+ * canonicalize_qual
66+ * Convert a qualification to the most useful normalized form.
67+ *
68+ * Returns the modified qualification.
69+ *
70+ * If 'removeAndFlag' is true then it removes explicit AND at the top level,
71+ * producing a list of implicitly-ANDed conditions. Otherwise, a regular
72+ * boolean expression is returned. Since most callers pass 'true', we
73+ * prefer to declare the result as List *, not Expr *.
74+ *
75+ * XXX This code could be much smarter, at the cost of also being slower,
76+ * if we tried to compute selectivities and/or see whether there are
77+ * actually indexes to support an indexscan implementation of a DNF qual.
78+ * We could even try converting the CNF clauses that mention a single
79+ * relation into a single DNF clause to see if that looks cheaper to
80+ * implement. For now, though, we just try to avoid doing anything
81+ * quite as stupid as unconditionally converting to CNF was...
82+ */
83+ List *
84+ canonicalize_qual (Expr * qual ,bool removeAndFlag )
85+ {
86+ Expr * newqual ,
87+ * cnfqual ,
88+ * dnfqual ;
89+ int qualcnt ,
90+ cnfcnt ,
91+ dnfcnt ;
92+
93+ if (qual == NULL )
94+ return NIL ;
95+
96+ /* Flatten AND and OR groups throughout the tree.
97+ * This improvement is always worthwhile, so do it unconditionally.
98+ */
99+ qual = flatten_andors (qual );
100+ /* Push down NOTs. We do this only in the top-level boolean
101+ * expression, without examining arguments of operators/functions.
102+ * Even so, it might not be a win if we are unable to find negators
103+ * for all the operators involved; so we keep the flattened-but-not-
104+ * NOT-pushed qual as the reference point for comparsions.
105+ */
106+ newqual = find_nots (qual );
107+ /*
108+ * Generate both CNF and DNF forms from newqual.
109+ */
110+ /* Normalize into conjunctive normal form, and clean up the result. */
111+ cnfqual = qual_cleanup (find_ors (newqual ));
112+ /* Likewise for DNF */
113+ dnfqual = qual_cleanup (find_ands (newqual ));
114+
115+ /*
116+ * Now, choose whether to return qual, cnfqual, or dnfqual.
117+ *
118+ * First heuristic is to forget about either CNF or DNF if it shows
119+ * unreasonable growth compared to the original form of the qual,
120+ * where we define "unreasonable" a tad arbitrarily as 4x more
121+ * operators.
122+ */
123+ qualcnt = count_bool_nodes (qual );
124+ cnfcnt = count_bool_nodes (cnfqual );
125+ dnfcnt = count_bool_nodes (dnfqual );
126+ if (cnfcnt >=4 * qualcnt )
127+ cnfqual = NULL ;/* mark CNF not usable */
128+ if (dnfcnt >=4 * qualcnt )
129+ dnfqual = NULL ;/* mark DNF not usable */
130+
131+ /*
132+ * Second heuristic is to prefer DNF if only one relation is mentioned
133+ * and it is smaller than the CNF representation.
134+ */
135+ if (dnfqual && dnfcnt < cnfcnt && NumRelids ((Node * )dnfqual )== 1 )
136+ cnfqual = NULL ;
137+
138+ /*
139+ * Otherwise, we prefer CNF.
140+ *
141+ * XXX obviously, these rules could be improved upon.
142+ */
143+
144+ /* pick preferred survivor */
145+ if (cnfqual )
146+ newqual = cnfqual ;
147+ else if (dnfqual )
148+ newqual = dnfqual ;
149+ else
150+ newqual = qual ;
151+
152+ /* Convert to implicit-AND list if requested */
153+ if (removeAndFlag )
154+ {
155+ newqual = (Expr * )make_ands_implicit (newqual );
156+ }
157+
158+ return (List * )newqual ;
159+ }
160+
54161/*
55162 * cnfify
56163 * Convert a qualification to conjunctive normal form by applying
@@ -81,6 +188,8 @@ cnfify(Expr *qual, bool removeAndFlag)
81188newqual = find_nots (newqual );
82189/* Normalize into conjunctive normal form. */
83190newqual = find_ors (newqual );
191+ /* Clean up the result. */
192+ newqual = qual_cleanup (newqual );
84193
85194if (removeAndFlag )
86195{
@@ -118,6 +227,8 @@ dnfify(Expr *qual)
118227newqual = find_nots (newqual );
119228/* Normalize into disjunctive normal form. */
120229newqual = find_ands (newqual );
230+ /* Clean up the result. */
231+ newqual = qual_cleanup (newqual );
121232
122233return newqual ;
123234}
@@ -641,3 +752,102 @@ and_normalize(List *andlist)
641752/* pull_ors is needed in case any sub-and_normalize succeeded */
642753return make_orclause (pull_ors (orclauses ));
643754}
755+
756+ /*
757+ * qual_cleanup
758+ * Fix up a qualification by removing duplicate entries (which could be
759+ * created during normalization, if identical subexpressions from different
760+ * parts of the tree are brought together). Also, check for AND and OR
761+ * clauses with only one remaining subexpression, and simplify.
762+ *
763+ * Returns the modified qualification.
764+ */
765+ static Expr *
766+ qual_cleanup (Expr * qual )
767+ {
768+ if (qual == NULL )
769+ return NULL ;
770+
771+ if (and_clause ((Node * )qual ))
772+ {
773+ List * andlist = NIL ;
774+ List * temp ;
775+
776+ foreach (temp ,qual -> args )
777+ andlist = lappend (andlist ,qual_cleanup (lfirst (temp )));
778+
779+ andlist = remove_duplicates (pull_ands (andlist ));
780+
781+ if (length (andlist )> 1 )
782+ return make_andclause (andlist );
783+ else
784+ return lfirst (andlist );
785+ }
786+ else if (or_clause ((Node * )qual ))
787+ {
788+ List * orlist = NIL ;
789+ List * temp ;
790+
791+ foreach (temp ,qual -> args )
792+ orlist = lappend (orlist ,qual_cleanup (lfirst (temp )));
793+
794+ orlist = remove_duplicates (pull_ors (orlist ));
795+
796+ if (length (orlist )> 1 )
797+ return make_orclause (orlist );
798+ else
799+ return lfirst (orlist );
800+ }
801+ else if (not_clause ((Node * )qual ))
802+ return make_notclause (qual_cleanup (get_notclausearg (qual )));
803+ else
804+ return qual ;
805+ }
806+
807+ /*
808+ * remove_duplicates
809+ */
810+ static List *
811+ remove_duplicates (List * list )
812+ {
813+ List * result = NIL ;
814+ List * i ;
815+
816+ if (length (list ) <=1 )
817+ return list ;
818+
819+ foreach (i ,list )
820+ {
821+ if (!member (lfirst (i ),result ))
822+ result = lappend (result ,lfirst (i ));
823+ }
824+ return result ;
825+ }
826+
827+ /*
828+ * count_bool_nodes
829+ *Support for heuristics in canonicalize_qual(): count the
830+ *number of nodes in the top level AND/OR/NOT part of a qual tree
831+ */
832+ static int
833+ count_bool_nodes (Expr * qual )
834+ {
835+ if (qual == NULL )
836+ return 0 ;
837+
838+ if (and_clause ((Node * )qual )||
839+ or_clause ((Node * )qual ))
840+ {
841+ int sum = 1 ;/* for the and/or itself */
842+ List * temp ;
843+
844+ foreach (temp ,qual -> args )
845+ sum += count_bool_nodes (lfirst (temp ));
846+
847+ return sum ;
848+ }
849+ else if (not_clause ((Node * )qual ))
850+ return count_bool_nodes (get_notclausearg (qual ))+ 1 ;
851+ else
852+ return 1 ;/* anything else counts 1 for my purposes */
853+ }