Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0182951

Browse files
committed
Fix overenthusiastic optimization of 'x IN (SELECT DISTINCT ...)' and related
cases: we can't just consider whether the subquery's output is unique on itsown terms, we have to check whether the set of output columns we are going touse will be unique. Per complaint from Luca Pireddu and test case fromMichael Fuhr.
1 parentaa11106 commit0182951

File tree

3 files changed

+210
-45
lines changed

3 files changed

+210
-45
lines changed

‎src/backend/optimizer/util/pathnode.c

Lines changed: 120 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.122 2005/06/05 22:32:56 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.123 2005/07/15 17:09:25 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -34,7 +34,8 @@
3434
#include"utils/syscache.h"
3535

3636

37-
staticboolis_distinct_query(Query*query);
37+
staticList*translate_sub_tlist(List*tlist,intrelid);
38+
staticboolquery_is_distinct_for(Query*query,List*colnos);
3839
staticboolhash_safe_tlist(List*tlist);
3940

4041

@@ -800,14 +801,41 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
800801
pathnode->subpath=subpath;
801802

802803
/*
803-
* If the input is a subquery whose output must be unique already, we
804-
* don't need to do anything.
804+
* Try to identify the targetlist that will actually be unique-ified.
805+
* In current usage, this routine is only used for sub-selects of IN
806+
* clauses, so we should be able to find the tlist in in_info_list.
807+
*/
808+
sub_targetlist=NIL;
809+
foreach(l,root->in_info_list)
810+
{
811+
InClauseInfo*ininfo= (InClauseInfo*)lfirst(l);
812+
813+
if (bms_equal(ininfo->righthand,rel->relids))
814+
{
815+
sub_targetlist=ininfo->sub_targetlist;
816+
break;
817+
}
818+
}
819+
820+
/*
821+
* If the input is a subquery whose output must be unique already,
822+
* then we don't need to do anything. The test for uniqueness has
823+
* to consider exactly which columns we are extracting; for example
824+
* "SELECT DISTINCT x,y" doesn't guarantee that x alone is distinct.
825+
* So we cannot check for this optimization unless we found our own
826+
* targetlist above, and it consists only of simple Vars referencing
827+
* subquery outputs. (Possibly we could do something with expressions
828+
* in the subquery outputs, too, but for now keep it simple.)
805829
*/
806-
if (rel->rtekind==RTE_SUBQUERY)
830+
if (sub_targetlist&&rel->rtekind==RTE_SUBQUERY)
807831
{
808832
RangeTblEntry*rte=rt_fetch(rel->relid,root->parse->rtable);
833+
List*sub_tlist_colnos;
834+
835+
sub_tlist_colnos=translate_sub_tlist(sub_targetlist,rel->relid);
809836

810-
if (is_distinct_query(rte->subquery))
837+
if (sub_tlist_colnos&&
838+
query_is_distinct_for(rte->subquery,sub_tlist_colnos))
811839
{
812840
pathnode->umethod=UNIQUE_PATH_NOOP;
813841
pathnode->rows=rel->rows;
@@ -821,23 +849,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
821849
}
822850
}
823851

824-
/*
825-
* Try to identify the targetlist that will actually be unique-ified.
826-
* In current usage, this routine is only used for sub-selects of IN
827-
* clauses, so we should be able to find the tlist in in_info_list.
828-
*/
829-
sub_targetlist=NIL;
830-
foreach(l,root->in_info_list)
831-
{
832-
InClauseInfo*ininfo= (InClauseInfo*)lfirst(l);
833-
834-
if (bms_equal(ininfo->righthand,rel->relids))
835-
{
836-
sub_targetlist=ininfo->sub_targetlist;
837-
break;
838-
}
839-
}
840-
841852
/*
842853
* If we know the targetlist, try to estimate number of result rows;
843854
* otherwise punt.
@@ -913,46 +924,83 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
913924
}
914925

915926
/*
916-
* is_distinct_query - does query never return duplicate rows?
927+
* translate_sub_tlist - get subquery column numbers represented by tlist
928+
*
929+
* The given targetlist should contain only Vars referencing the given relid.
930+
* Extract their varattnos (ie, the column numbers of the subquery) and return
931+
* as an integer List.
932+
*
933+
* If any of the tlist items is not a simple Var, we cannot determine whether
934+
* the subquery's uniqueness condition (if any) matches ours, so punt and
935+
* return NIL.
917936
*/
918-
staticbool
919-
is_distinct_query(Query*query)
937+
staticList*
938+
translate_sub_tlist(List*tlist,intrelid)
920939
{
921-
/* DISTINCT (but not DISTINCT ON) guarantees uniqueness */
922-
if (has_distinct_clause(query))
923-
return true;
940+
List*result=NIL;
941+
ListCell*l;
924942

925-
/* UNION, INTERSECT, EXCEPT guarantee uniqueness, except with ALL */
926-
if (query->setOperations)
943+
foreach(l,tlist)
927944
{
928-
SetOperationStmt*topop= (SetOperationStmt*)query->setOperations;
945+
Var*var= (Var*)lfirst(l);
929946

930-
Assert(IsA(topop,SetOperationStmt));
931-
Assert(topop->op!=SETOP_NONE);
947+
if (!var|| !IsA(var,Var)||
948+
var->varno!=relid)
949+
returnNIL;/* punt */
932950

933-
if (!topop->all)
951+
result=lappend_int(result,var->varattno);
952+
}
953+
returnresult;
954+
}
955+
956+
/*
957+
* query_is_distinct_for - does query never return duplicates of the
958+
*specified columns?
959+
*
960+
* colnos is an integer list of output column numbers (resno's). We are
961+
* interested in whether rows consisting of just these columns are certain
962+
* to be distinct.
963+
*/
964+
staticbool
965+
query_is_distinct_for(Query*query,List*colnos)
966+
{
967+
ListCell*l;
968+
969+
/*
970+
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
971+
* columns in the DISTINCT clause appear in colnos.
972+
*/
973+
if (query->distinctClause)
974+
{
975+
foreach(l,query->distinctClause)
976+
{
977+
SortClause*scl= (SortClause*)lfirst(l);
978+
TargetEntry*tle=get_sortgroupclause_tle(scl,
979+
query->targetList);
980+
981+
if (!list_member_int(colnos,tle->resno))
982+
break;/* exit early if no match */
983+
}
984+
if (l==NULL)/* had matches for all? */
934985
return true;
935986
}
936987

937988
/*
938-
* GROUP BY guarantees uniqueness if all the grouped columns appear in
939-
* the output.In our implementation this means checking they are non
940-
* resjunk columns.
989+
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
990+
* appear in colnos.
941991
*/
942992
if (query->groupClause)
943993
{
944-
ListCell*gl;
945-
946-
foreach(gl,query->groupClause)
994+
foreach(l,query->groupClause)
947995
{
948-
GroupClause*grpcl= (GroupClause*)lfirst(gl);
996+
GroupClause*grpcl= (GroupClause*)lfirst(l);
949997
TargetEntry*tle=get_sortgroupclause_tle(grpcl,
950998
query->targetList);
951999

952-
if (tle->resjunk)
953-
break;
1000+
if (!list_member_int(colnos,tle->resno))
1001+
break;/* exit early if no match */
9541002
}
955-
if (!gl)/*got to the end? */
1003+
if (l==NULL)/*had matches for all? */
9561004
return true;
9571005
}
9581006
else
@@ -965,6 +1013,33 @@ is_distinct_query(Query *query)
9651013
return true;
9661014
}
9671015

1016+
/*
1017+
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
1018+
* except with ALL
1019+
*/
1020+
if (query->setOperations)
1021+
{
1022+
SetOperationStmt*topop= (SetOperationStmt*)query->setOperations;
1023+
1024+
Assert(IsA(topop,SetOperationStmt));
1025+
Assert(topop->op!=SETOP_NONE);
1026+
1027+
if (!topop->all)
1028+
{
1029+
/* We're good if all the nonjunk output columns are in colnos */
1030+
foreach(l,query->targetList)
1031+
{
1032+
TargetEntry*tle= (TargetEntry*)lfirst(l);
1033+
1034+
if (!tle->resjunk&&
1035+
!list_member_int(colnos,tle->resno))
1036+
break;/* exit early if no match */
1037+
}
1038+
if (l==NULL)/* had matches for all? */
1039+
return true;
1040+
}
1041+
}
1042+
9681043
/*
9691044
* XXX Are there any other cases in which we can easily see the result
9701045
* must be distinct?

‎src/test/regress/expected/subselect.out

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,63 @@ select count(distinct ss.ten) from
202202
10
203203
(1 row)
204204

205+
--
206+
-- Test cases to check for overenthusiastic optimization of
207+
-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
208+
-- Luca Pireddu and Michael Fuhr.
209+
--
210+
CREATE TEMP TABLE foo (id integer);
211+
CREATE TEMP TABLE bar (id1 integer, id2 integer);
212+
INSERT INTO foo VALUES (1);
213+
INSERT INTO bar VALUES (1, 1);
214+
INSERT INTO bar VALUES (2, 2);
215+
INSERT INTO bar VALUES (3, 1);
216+
-- These cases require an extra level of distinct-ing above subquery s
217+
SELECT * FROM foo WHERE id IN
218+
(SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);
219+
id
220+
----
221+
1
222+
(1 row)
223+
224+
SELECT * FROM foo WHERE id IN
225+
(SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);
226+
id
227+
----
228+
1
229+
(1 row)
230+
231+
SELECT * FROM foo WHERE id IN
232+
(SELECT id2 FROM (SELECT id1, id2 FROM bar UNION
233+
SELECT id1, id2 FROM bar) AS s);
234+
id
235+
----
236+
1
237+
(1 row)
238+
239+
-- These cases do not
240+
SELECT * FROM foo WHERE id IN
241+
(SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);
242+
id
243+
----
244+
1
245+
(1 row)
246+
247+
SELECT * FROM foo WHERE id IN
248+
(SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);
249+
id
250+
----
251+
1
252+
(1 row)
253+
254+
SELECT * FROM foo WHERE id IN
255+
(SELECT id2 FROM (SELECT id2 FROM bar UNION
256+
SELECT id2 FROM bar) AS s);
257+
id
258+
----
259+
1
260+
(1 row)
261+
205262
--
206263
-- Test case to catch problems with multiply nested sub-SELECTs not getting
207264
-- recalculated properly. Per bug report from Didier Moens.

‎src/test/regress/sql/subselect.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,39 @@ select count(distinct ss.ten) from
9595
(select tenfrom tenk1 a
9696
where unique1IN (select distinct hundredfrom tenk1 b)) ss;
9797

98+
--
99+
-- Test cases to check for overenthusiastic optimization of
100+
-- "IN (SELECT DISTINCT ...)" and related cases. Per example from
101+
-- Luca Pireddu and Michael Fuhr.
102+
--
103+
104+
CREATE TEMP TABLE foo (idinteger);
105+
CREATE TEMP TABLE bar (id1integer, id2integer);
106+
107+
INSERT INTO fooVALUES (1);
108+
109+
INSERT INTO barVALUES (1,1);
110+
INSERT INTO barVALUES (2,2);
111+
INSERT INTO barVALUES (3,1);
112+
113+
-- These cases require an extra level of distinct-ing above subquery s
114+
SELECT*FROM fooWHERE idIN
115+
(SELECT id2FROM (SELECT DISTINCT id1, id2FROM bar)AS s);
116+
SELECT*FROM fooWHERE idIN
117+
(SELECT id2FROM (SELECT id1,id2FROM barGROUP BY id1,id2)AS s);
118+
SELECT*FROM fooWHERE idIN
119+
(SELECT id2FROM (SELECT id1, id2FROM barUNION
120+
SELECT id1, id2FROM bar)AS s);
121+
122+
-- These cases do not
123+
SELECT*FROM fooWHERE idIN
124+
(SELECT id2FROM (SELECT DISTINCTON (id2) id1, id2FROM bar)AS s);
125+
SELECT*FROM fooWHERE idIN
126+
(SELECT id2FROM (SELECT id2FROM barGROUP BY id2)AS s);
127+
SELECT*FROM fooWHERE idIN
128+
(SELECT id2FROM (SELECT id2FROM barUNION
129+
SELECT id2FROM bar)AS s);
130+
98131
--
99132
-- Test case to catch problems with multiply nested sub-SELECTs not getting
100133
-- recalculated properly. Per bug report from Didier Moens.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp