88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.122 2005/06/05 22:32:56 tgl Exp $
11+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.123 2005/07/15 17:09:25 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
3434#include "utils/syscache.h"
3535
3636
37- static bool is_distinct_query (Query * query );
37+ static List * translate_sub_tlist (List * tlist ,int relid );
38+ static bool query_is_distinct_for (Query * query ,List * colnos );
3839static bool hash_safe_tlist (List * tlist );
3940
4041
@@ -800,14 +801,41 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
800801pathnode -> subpath = subpath ;
801802
802803/*
803- * If the input is a subquery whose output must be unique already, we
804- * don't need to do anything.
804+ * Try to identify the targetlist that will actually be unique-ified.
805+ * In current usage, this routine is only used for sub-selects of IN
806+ * clauses, so we should be able to find the tlist in in_info_list.
807+ */
808+ sub_targetlist = NIL ;
809+ foreach (l ,root -> in_info_list )
810+ {
811+ InClauseInfo * ininfo = (InClauseInfo * )lfirst (l );
812+
813+ if (bms_equal (ininfo -> righthand ,rel -> relids ))
814+ {
815+ sub_targetlist = ininfo -> sub_targetlist ;
816+ break ;
817+ }
818+ }
819+
820+ /*
821+ * If the input is a subquery whose output must be unique already,
822+ * then we don't need to do anything. The test for uniqueness has
823+ * to consider exactly which columns we are extracting; for example
824+ * "SELECT DISTINCT x,y" doesn't guarantee that x alone is distinct.
825+ * So we cannot check for this optimization unless we found our own
826+ * targetlist above, and it consists only of simple Vars referencing
827+ * subquery outputs. (Possibly we could do something with expressions
828+ * in the subquery outputs, too, but for now keep it simple.)
805829 */
806- if (rel -> rtekind == RTE_SUBQUERY )
830+ if (sub_targetlist && rel -> rtekind == RTE_SUBQUERY )
807831{
808832RangeTblEntry * rte = rt_fetch (rel -> relid ,root -> parse -> rtable );
833+ List * sub_tlist_colnos ;
834+
835+ sub_tlist_colnos = translate_sub_tlist (sub_targetlist ,rel -> relid );
809836
810- if (is_distinct_query (rte -> subquery ))
837+ if (sub_tlist_colnos &&
838+ query_is_distinct_for (rte -> subquery ,sub_tlist_colnos ))
811839{
812840pathnode -> umethod = UNIQUE_PATH_NOOP ;
813841pathnode -> rows = rel -> rows ;
@@ -821,23 +849,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
821849}
822850}
823851
824- /*
825- * Try to identify the targetlist that will actually be unique-ified.
826- * In current usage, this routine is only used for sub-selects of IN
827- * clauses, so we should be able to find the tlist in in_info_list.
828- */
829- sub_targetlist = NIL ;
830- foreach (l ,root -> in_info_list )
831- {
832- InClauseInfo * ininfo = (InClauseInfo * )lfirst (l );
833-
834- if (bms_equal (ininfo -> righthand ,rel -> relids ))
835- {
836- sub_targetlist = ininfo -> sub_targetlist ;
837- break ;
838- }
839- }
840-
841852/*
842853 * If we know the targetlist, try to estimate number of result rows;
843854 * otherwise punt.
@@ -913,46 +924,83 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
913924}
914925
915926/*
916- * is_distinct_query - does query never return duplicate rows?
927+ * translate_sub_tlist - get subquery column numbers represented by tlist
928+ *
929+ * The given targetlist should contain only Vars referencing the given relid.
930+ * Extract their varattnos (ie, the column numbers of the subquery) and return
931+ * as an integer List.
932+ *
933+ * If any of the tlist items is not a simple Var, we cannot determine whether
934+ * the subquery's uniqueness condition (if any) matches ours, so punt and
935+ * return NIL.
917936 */
918- static bool
919- is_distinct_query ( Query * query )
937+ static List *
938+ translate_sub_tlist ( List * tlist , int relid )
920939{
921- /* DISTINCT (but not DISTINCT ON) guarantees uniqueness */
922- if (has_distinct_clause (query ))
923- return true;
940+ List * result = NIL ;
941+ ListCell * l ;
924942
925- /* UNION, INTERSECT, EXCEPT guarantee uniqueness, except with ALL */
926- if (query -> setOperations )
943+ foreach (l ,tlist )
927944{
928- SetOperationStmt * topop = (SetOperationStmt * )query -> setOperations ;
945+ Var * var = (Var * )lfirst ( l ) ;
929946
930- Assert (IsA (topop ,SetOperationStmt ));
931- Assert (topop -> op != SETOP_NONE );
947+ if (!var || !IsA (var ,Var )||
948+ var -> varno != relid )
949+ return NIL ;/* punt */
932950
933- if (!topop -> all )
951+ result = lappend_int (result ,var -> varattno );
952+ }
953+ return result ;
954+ }
955+
956+ /*
957+ * query_is_distinct_for - does query never return duplicates of the
958+ *specified columns?
959+ *
960+ * colnos is an integer list of output column numbers (resno's). We are
961+ * interested in whether rows consisting of just these columns are certain
962+ * to be distinct.
963+ */
964+ static bool
965+ query_is_distinct_for (Query * query ,List * colnos )
966+ {
967+ ListCell * l ;
968+
969+ /*
970+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
971+ * columns in the DISTINCT clause appear in colnos.
972+ */
973+ if (query -> distinctClause )
974+ {
975+ foreach (l ,query -> distinctClause )
976+ {
977+ SortClause * scl = (SortClause * )lfirst (l );
978+ TargetEntry * tle = get_sortgroupclause_tle (scl ,
979+ query -> targetList );
980+
981+ if (!list_member_int (colnos ,tle -> resno ))
982+ break ;/* exit early if no match */
983+ }
984+ if (l == NULL )/* had matches for all? */
934985return true;
935986}
936987
937988/*
938- * GROUP BY guarantees uniqueness if all the grouped columns appear in
939- * the output.In our implementation this means checking they are non
940- * resjunk columns.
989+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
990+ * appear in colnos.
941991 */
942992if (query -> groupClause )
943993{
944- ListCell * gl ;
945-
946- foreach (gl ,query -> groupClause )
994+ foreach (l ,query -> groupClause )
947995{
948- GroupClause * grpcl = (GroupClause * )lfirst (gl );
996+ GroupClause * grpcl = (GroupClause * )lfirst (l );
949997TargetEntry * tle = get_sortgroupclause_tle (grpcl ,
950998query -> targetList );
951999
952- if (tle -> resjunk )
953- break ;
1000+ if (! list_member_int ( colnos , tle -> resno ) )
1001+ break ;/* exit early if no match */
9541002}
955- if (! gl )/*got to the end ? */
1003+ if (l == NULL )/*had matches for all ? */
9561004return true;
9571005}
9581006else
@@ -965,6 +1013,33 @@ is_distinct_query(Query *query)
9651013return true;
9661014}
9671015
1016+ /*
1017+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1018+ * except with ALL
1019+ */
1020+ if (query -> setOperations )
1021+ {
1022+ SetOperationStmt * topop = (SetOperationStmt * )query -> setOperations ;
1023+
1024+ Assert (IsA (topop ,SetOperationStmt ));
1025+ Assert (topop -> op != SETOP_NONE );
1026+
1027+ if (!topop -> all )
1028+ {
1029+ /* We're good if all the nonjunk output columns are in colnos */
1030+ foreach (l ,query -> targetList )
1031+ {
1032+ TargetEntry * tle = (TargetEntry * )lfirst (l );
1033+
1034+ if (!tle -> resjunk &&
1035+ !list_member_int (colnos ,tle -> resno ))
1036+ break ;/* exit early if no match */
1037+ }
1038+ if (l == NULL )/* had matches for all? */
1039+ return true;
1040+ }
1041+ }
1042+
9681043/*
9691044 * XXX Are there any other cases in which we can easily see the result
9701045 * must be distinct?