NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit0182951

committed

Fix overenthusiastic optimization of 'x IN (SELECT DISTINCT ...)' and related

cases: we can't just consider whether the subquery's output is unique on itsown terms, we have to check whether the set of output columns we are going touse will be unique. Per complaint from Luca Pireddu and test case fromMichael Fuhr.

1 parentaa11106 commit0182951Copy full SHA for 0182951

File tree

3 files changed

+210

-45

lines changed

src
- backend/optimizer/util
  - pathnode.c
- test/regress
  - expected
    - subselect.out
  - sql
    - subselect.sql

3 files changed

+210

-45

lines changed

`‎src/backend/optimizer/util/pathnode.c‎`

Lines changed: 120 additions & 45 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`*`
`9`	`9`	`*`
`10`	`10`	`* IDENTIFICATION`
`11`		`- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.122 2005/06/05 22:32:56 tgl Exp $`
	`11`	`+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.123 2005/07/15 17:09:25 tgl Exp $`
`12`	`12`	`*`
`13`	`13`	`*-------------------------------------------------------------------------`
`14`	`14`	`*/`
`@@ -34,7 +34,8 @@`
`34`	`34`	`#include"utils/syscache.h"`
`35`	`35`
`36`	`36`
`37`		`-staticboolis_distinct_query(Query*query);`
	`37`	`+staticListtranslate_sub_tlist(Listtlist,intrelid);`
	`38`	`+staticboolquery_is_distinct_for(Queryquery,Listcolnos);`
`38`	`39`	`staticboolhash_safe_tlist(List*tlist);`
`39`	`40`
`40`	`41`
`@@ -800,14 +801,41 @@ create_unique_path(PlannerInfo root, RelOptInfo rel, Path *subpath)`
`800`	`801`	`pathnode->subpath=subpath;`
`801`	`802`
`802`	`803`	`/*`
`803`		`- * If the input is a subquery whose output must be unique already, we`
`804`		`- * don't need to do anything.`
	`804`	`+ * Try to identify the targetlist that will actually be unique-ified.`
	`805`	`+ * In current usage, this routine is only used for sub-selects of IN`
	`806`	`+ * clauses, so we should be able to find the tlist in in_info_list.`
	`807`	`+ */`
	`808`	`+sub_targetlist=NIL;`
	`809`	`+foreach(l,root->in_info_list)`
	`810`	`+{`
	`811`	`+InClauseInfoininfo= (InClauseInfo)lfirst(l);`
	`812`	`+`
	`813`	`+if (bms_equal(ininfo->righthand,rel->relids))`
	`814`	`+{`
	`815`	`+sub_targetlist=ininfo->sub_targetlist;`
	`816`	`+break;`
	`817`	`+}`
	`818`	`+}`
	`819`	`+`
	`820`	`+/*`
	`821`	`+ * If the input is a subquery whose output must be unique already,`
	`822`	`+ * then we don't need to do anything. The test for uniqueness has`
	`823`	`+ * to consider exactly which columns we are extracting; for example`
	`824`	`+ * "SELECT DISTINCT x,y" doesn't guarantee that x alone is distinct.`
	`825`	`+ * So we cannot check for this optimization unless we found our own`
	`826`	`+ * targetlist above, and it consists only of simple Vars referencing`
	`827`	`+ * subquery outputs. (Possibly we could do something with expressions`
	`828`	`+ * in the subquery outputs, too, but for now keep it simple.)`
`805`	`829`	`*/`
`806`		`-if (rel->rtekind==RTE_SUBQUERY)`
	`830`	`+if (sub_targetlist&&rel->rtekind==RTE_SUBQUERY)`
`807`	`831`	`{`
`808`	`832`	`RangeTblEntry*rte=rt_fetch(rel->relid,root->parse->rtable);`
	`833`	`+List*sub_tlist_colnos;`
	`834`	`+`
	`835`	`+sub_tlist_colnos=translate_sub_tlist(sub_targetlist,rel->relid);`
`809`	`836`
`810`		`-if (is_distinct_query(rte->subquery))`
	`837`	`+if (sub_tlist_colnos&&`
	`838`	`+query_is_distinct_for(rte->subquery,sub_tlist_colnos))`
`811`	`839`	`{`
`812`	`840`	`pathnode->umethod=UNIQUE_PATH_NOOP;`
`813`	`841`	`pathnode->rows=rel->rows;`
`@@ -821,23 +849,6 @@ create_unique_path(PlannerInfo root, RelOptInfo rel, Path *subpath)`
`821`	`849`	`}`
`822`	`850`	`}`
`823`	`851`
`824`		`-/*`
`825`		`- * Try to identify the targetlist that will actually be unique-ified.`
`826`		`- * In current usage, this routine is only used for sub-selects of IN`
`827`		`- * clauses, so we should be able to find the tlist in in_info_list.`
`828`		`- */`
`829`		`-sub_targetlist=NIL;`
`830`		`-foreach(l,root->in_info_list)`
`831`		`-{`
`832`		`-InClauseInfoininfo= (InClauseInfo)lfirst(l);`
`833`		`-`
`834`		`-if (bms_equal(ininfo->righthand,rel->relids))`
`835`		`-{`
`836`		`-sub_targetlist=ininfo->sub_targetlist;`
`837`		`-break;`
`838`		`-}`
`839`		`-}`
`840`		`-`
`841`	`852`	`/*`
`842`	`853`	`* If we know the targetlist, try to estimate number of result rows;`
`843`	`854`	`* otherwise punt.`
`@@ -913,46 +924,83 @@ create_unique_path(PlannerInfo root, RelOptInfo rel, Path *subpath)`
`913`	`924`	`}`
`914`	`925`
`915`	`926`	`/*`
`916`		`- * is_distinct_query - does query never return duplicate rows?`
	`927`	`+ * translate_sub_tlist - get subquery column numbers represented by tlist`
	`928`	`+ *`
	`929`	`+ * The given targetlist should contain only Vars referencing the given relid.`
	`930`	`+ * Extract their varattnos (ie, the column numbers of the subquery) and return`
	`931`	`+ * as an integer List.`
	`932`	`+ *`
	`933`	`+ * If any of the tlist items is not a simple Var, we cannot determine whether`
	`934`	`+ * the subquery's uniqueness condition (if any) matches ours, so punt and`
	`935`	`+ * return NIL.`
`917`	`936`	`*/`
`918`		`-staticbool`
`919`		`-is_distinct_query(Query*query)`
	`937`	`+staticList*`
	`938`	`+translate_sub_tlist(List*tlist,intrelid)`
`920`	`939`	`{`
`921`		`-/* DISTINCT (but not DISTINCT ON) guarantees uniqueness */`
`922`		`-if (has_distinct_clause(query))`
`923`		`-return true;`
	`940`	`+List*result=NIL;`
	`941`	`+ListCell*l;`
`924`	`942`
`925`		`-/* UNION, INTERSECT, EXCEPT guarantee uniqueness, except with ALL */`
`926`		`-if (query->setOperations)`
	`943`	`+foreach(l,tlist)`
`927`	`944`	`{`
`928`		`-SetOperationStmttopop= (SetOperationStmt)query->setOperations;`
	`945`	`+Varvar= (Var)lfirst(l);`
`929`	`946`
`930`		`-Assert(IsA(topop,SetOperationStmt));`
`931`		`-Assert(topop->op!=SETOP_NONE);`
	`947`	`+if (!var\|\| !IsA(var,Var)\|\|`
	`948`	`+var->varno!=relid)`
	`949`	`+returnNIL;/* punt */`
`932`	`950`
`933`		`-if (!topop->all)`
	`951`	`+result=lappend_int(result,var->varattno);`
	`952`	`+}`
	`953`	`+returnresult;`
	`954`	`+}`
	`955`	`+`
	`956`	`+/*`
	`957`	`+ * query_is_distinct_for - does query never return duplicates of the`
	`958`	`+ *specified columns?`
	`959`	`+ *`
	`960`	`+ * colnos is an integer list of output column numbers (resno's). We are`
	`961`	`+ * interested in whether rows consisting of just these columns are certain`
	`962`	`+ * to be distinct.`
	`963`	`+ */`
	`964`	`+staticbool`
	`965`	`+query_is_distinct_for(Queryquery,Listcolnos)`
	`966`	`+{`
	`967`	`+ListCell*l;`
	`968`	`+`
	`969`	`+/*`
	`970`	`+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the`
	`971`	`+ * columns in the DISTINCT clause appear in colnos.`
	`972`	`+ */`
	`973`	`+if (query->distinctClause)`
	`974`	`+{`
	`975`	`+foreach(l,query->distinctClause)`
	`976`	`+{`
	`977`	`+SortClausescl= (SortClause)lfirst(l);`
	`978`	`+TargetEntry*tle=get_sortgroupclause_tle(scl,`
	`979`	`+query->targetList);`
	`980`	`+`
	`981`	`+if (!list_member_int(colnos,tle->resno))`
	`982`	`+break;/* exit early if no match */`
	`983`	`+}`
	`984`	`+if (l==NULL)/* had matches for all? */`
`934`	`985`	`return true;`
`935`	`986`	`}`
`936`	`987`
`937`	`988`	`/*`
`938`		`- * GROUP BY guarantees uniqueness if all the grouped columns appear in`
`939`		`- * the output.In our implementation this means checking they are non`
`940`		`- * resjunk columns.`
	`989`	`+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns`
	`990`	`+ * appear in colnos.`
`941`	`991`	`*/`
`942`	`992`	`if (query->groupClause)`
`943`	`993`	`{`
`944`		`-ListCell*gl;`
`945`		`-`
`946`		`-foreach(gl,query->groupClause)`
	`994`	`+foreach(l,query->groupClause)`
`947`	`995`	`{`
`948`		`-GroupClausegrpcl= (GroupClause)lfirst(gl);`
	`996`	`+GroupClausegrpcl= (GroupClause)lfirst(l);`
`949`	`997`	`TargetEntry*tle=get_sortgroupclause_tle(grpcl,`
`950`	`998`	`query->targetList);`
`951`	`999`
`952`		`-if (tle->resjunk)`
`953`		`-break;`
	`1000`	`+if (!list_member_int(colnos,tle->resno))`
	`1001`	`+break;/* exit early if no match */`
`954`	`1002`	`}`
`955`		`-if (!gl)/got to the end? /`
	`1003`	`+if (l==NULL)/had matches for all? /`
`956`	`1004`	`return true;`
`957`	`1005`	`}`
`958`	`1006`	`else`
`@@ -965,6 +1013,33 @@ is_distinct_query(Query *query)`
`965`	`1013`	`return true;`
`966`	`1014`	`}`
`967`	`1015`
	`1016`	`+/*`
	`1017`	`+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,`
	`1018`	`+ * except with ALL`
	`1019`	`+ */`
	`1020`	`+if (query->setOperations)`
	`1021`	`+{`
	`1022`	`+SetOperationStmttopop= (SetOperationStmt)query->setOperations;`
	`1023`	`+`
	`1024`	`+Assert(IsA(topop,SetOperationStmt));`
	`1025`	`+Assert(topop->op!=SETOP_NONE);`
	`1026`	`+`
	`1027`	`+if (!topop->all)`
	`1028`	`+{`
	`1029`	`+/* We're good if all the nonjunk output columns are in colnos */`
	`1030`	`+foreach(l,query->targetList)`
	`1031`	`+{`
	`1032`	`+TargetEntrytle= (TargetEntry)lfirst(l);`
	`1033`	`+`
	`1034`	`+if (!tle->resjunk&&`
	`1035`	`+!list_member_int(colnos,tle->resno))`
	`1036`	`+break;/* exit early if no match */`
	`1037`	`+}`
	`1038`	`+if (l==NULL)/* had matches for all? */`
	`1039`	`+return true;`
	`1040`	`+}`
	`1041`	`+}`
	`1042`	`+`
`968`	`1043`	`/*`
`969`	`1044`	`* XXX Are there any other cases in which we can easily see the result`
`970`	`1045`	`* must be distinct?`

`‎src/test/regress/expected/subselect.out‎`

Lines changed: 57 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -202,6 +202,63 @@ select count(distinct ss.ten) from`
`202`	`202`	`10`
`203`	`203`	`(1 row)`
`204`	`204`
	`205`	`+--`
	`206`	`+-- Test cases to check for overenthusiastic optimization of`
	`207`	`+-- "IN (SELECT DISTINCT ...)" and related cases. Per example from`
	`208`	`+-- Luca Pireddu and Michael Fuhr.`
	`209`	`+--`
	`210`	`+CREATE TEMP TABLE foo (id integer);`
	`211`	`+CREATE TEMP TABLE bar (id1 integer, id2 integer);`
	`212`	`+INSERT INTO foo VALUES (1);`
	`213`	`+INSERT INTO bar VALUES (1, 1);`
	`214`	`+INSERT INTO bar VALUES (2, 2);`
	`215`	`+INSERT INTO bar VALUES (3, 1);`
	`216`	`+-- These cases require an extra level of distinct-ing above subquery s`
	`217`	`+SELECT * FROM foo WHERE id IN`
	`218`	`+ (SELECT id2 FROM (SELECT DISTINCT id1, id2 FROM bar) AS s);`
	`219`	`+ id`
	`220`	`+----`
	`221`	`+ 1`
	`222`	`+(1 row)`
	`223`	`+`
	`224`	`+SELECT * FROM foo WHERE id IN`
	`225`	`+ (SELECT id2 FROM (SELECT id1,id2 FROM bar GROUP BY id1,id2) AS s);`
	`226`	`+ id`
	`227`	`+----`
	`228`	`+ 1`
	`229`	`+(1 row)`
	`230`	`+`
	`231`	`+SELECT * FROM foo WHERE id IN`
	`232`	`+ (SELECT id2 FROM (SELECT id1, id2 FROM bar UNION`
	`233`	`+ SELECT id1, id2 FROM bar) AS s);`
	`234`	`+ id`
	`235`	`+----`
	`236`	`+ 1`
	`237`	`+(1 row)`
	`238`	`+`
	`239`	`+-- These cases do not`
	`240`	`+SELECT * FROM foo WHERE id IN`
	`241`	`+ (SELECT id2 FROM (SELECT DISTINCT ON (id2) id1, id2 FROM bar) AS s);`
	`242`	`+ id`
	`243`	`+----`
	`244`	`+ 1`
	`245`	`+(1 row)`
	`246`	`+`
	`247`	`+SELECT * FROM foo WHERE id IN`
	`248`	`+ (SELECT id2 FROM (SELECT id2 FROM bar GROUP BY id2) AS s);`
	`249`	`+ id`
	`250`	`+----`
	`251`	`+ 1`
	`252`	`+(1 row)`
	`253`	`+`
	`254`	`+SELECT * FROM foo WHERE id IN`
	`255`	`+ (SELECT id2 FROM (SELECT id2 FROM bar UNION`
	`256`	`+ SELECT id2 FROM bar) AS s);`
	`257`	`+ id`
	`258`	`+----`
	`259`	`+ 1`
	`260`	`+(1 row)`
	`261`	`+`
`205`	`262`	`--`
`206`	`263`	`-- Test case to catch problems with multiply nested sub-SELECTs not getting`
`207`	`264`	`-- recalculated properly. Per bug report from Didier Moens.`

`‎src/test/regress/sql/subselect.sql‎`

Lines changed: 33 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,39 @@ select count(distinct ss.ten) from`
`95`	`95`	`(select tenfrom tenk1 a`
`96`	`96`	`where unique1IN (select distinct hundredfrom tenk1 b)) ss;`
`97`	`97`
	`98`	`+--`
	`99`	`+-- Test cases to check for overenthusiastic optimization of`
	`100`	`+-- "IN (SELECT DISTINCT ...)" and related cases. Per example from`
	`101`	`+-- Luca Pireddu and Michael Fuhr.`
	`102`	`+--`
	`103`	`+`
	`104`	`+CREATE TEMP TABLE foo (idinteger);`
	`105`	`+CREATE TEMP TABLE bar (id1integer, id2integer);`
	`106`	`+`
	`107`	`+INSERT INTO fooVALUES (1);`
	`108`	`+`
	`109`	`+INSERT INTO barVALUES (1,1);`
	`110`	`+INSERT INTO barVALUES (2,2);`
	`111`	`+INSERT INTO barVALUES (3,1);`
	`112`	`+`
	`113`	`+-- These cases require an extra level of distinct-ing above subquery s`
	`114`	`+SELECT*FROM fooWHERE idIN`
	`115`	`+ (SELECT id2FROM (SELECT DISTINCT id1, id2FROM bar)AS s);`
	`116`	`+SELECT*FROM fooWHERE idIN`
	`117`	`+ (SELECT id2FROM (SELECT id1,id2FROM barGROUP BY id1,id2)AS s);`
	`118`	`+SELECT*FROM fooWHERE idIN`
	`119`	`+ (SELECT id2FROM (SELECT id1, id2FROM barUNION`
	`120`	`+SELECT id1, id2FROM bar)AS s);`
	`121`	`+`
	`122`	`+-- These cases do not`
	`123`	`+SELECT*FROM fooWHERE idIN`
	`124`	`+ (SELECT id2FROM (SELECT DISTINCTON (id2) id1, id2FROM bar)AS s);`
	`125`	`+SELECT*FROM fooWHERE idIN`
	`126`	`+ (SELECT id2FROM (SELECT id2FROM barGROUP BY id2)AS s);`
	`127`	`+SELECT*FROM fooWHERE idIN`
	`128`	`+ (SELECT id2FROM (SELECT id2FROM barUNION`
	`129`	`+SELECT id2FROM bar)AS s);`
	`130`	`+`
`98`	`131`	`--`
`99`	`132`	`-- Test case to catch problems with multiply nested sub-SELECTs not getting`
`100`	`133`	`-- recalculated properly. Per bug report from Didier Moens.`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0182951

File tree

3 files changed

3 files changed

`‎src/backend/optimizer/util/pathnode.c‎`

`‎src/test/regress/expected/subselect.out‎`

`‎src/test/regress/sql/subselect.sql‎`

0 commit comments