Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf15821e

Browse files
committed
Allow join removal in some cases involving a left join to a subquery.
We can remove a left join to a relation if the relation's output isprovably distinct for the columns involved in the join clause (consideringonly equijoin clauses) and the relation supplies no variables needed abovethe join. Previously, the join removal logic could only prove distinctnessby reference to unique indexes of a table. This patch extends the logicto consider subquery relations, wherein distinctness might be proven byreference to GROUP BY, DISTINCT, etc.We actually already had some code to check that a subquery's output wasprovably distinct, but it was hidden inside pathnode.c; which was a prettybad place for it really, since that file is mostly boilerplate Pathconstruction and comparison. Move that code to analyzejoins.c, which isarguably a more appropriate location, and is certainly the site of thenew usage for it.David Rowley, reviewed by Simon Riggs
1 parent5571caf commitf15821e

File tree

5 files changed

+417
-186
lines changed

5 files changed

+417
-186
lines changed

‎src/backend/optimizer/plan/analyzejoins.c

Lines changed: 281 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,21 @@
2222
*/
2323
#include"postgres.h"
2424

25+
#include"nodes/nodeFuncs.h"
26+
#include"optimizer/clauses.h"
2527
#include"optimizer/joininfo.h"
2628
#include"optimizer/pathnode.h"
2729
#include"optimizer/paths.h"
2830
#include"optimizer/planmain.h"
29-
#include"optimizer/var.h"
31+
#include"optimizer/tlist.h"
32+
#include"utils/lsyscache.h"
3033

3134
/* local functions */
3235
staticbooljoin_is_removable(PlannerInfo*root,SpecialJoinInfo*sjinfo);
3336
staticvoidremove_rel_from_query(PlannerInfo*root,intrelid,
3437
Relidsjoinrelids);
3538
staticList*remove_rel_from_joinlist(List*joinlist,intrelid,int*nremoved);
39+
staticOiddistinct_col_search(intcolno,List*colnos,List*opids);
3640

3741

3842
/*
@@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
147151
{
148152
intinnerrelid;
149153
RelOptInfo*innerrel;
154+
Query*subquery=NULL;
150155
Relidsjoinrelids;
151156
List*clause_list=NIL;
152157
ListCell*l;
153158
intattroff;
154159

155160
/*
156-
* Currently, we only know how to remove left joins to a baserel with
157-
* unique indexes. We can check most of these criteria pretty trivially
158-
* to avoid doing useless extra work. But checking whether any of the
159-
* indexes are unique would require iterating over the indexlist, so for
160-
* now we just make sure there are indexes of some sort or other. If none
161-
* of them are unique, join removal will still fail, just slightly later.
161+
* Must be a non-delaying left join to a single baserel, else we aren't
162+
* going to be able to do anything with it.
162163
*/
163164
if (sjinfo->jointype!=JOIN_LEFT||
164165
sjinfo->delay_upper_joins||
@@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
168169
innerrelid=bms_singleton_member(sjinfo->min_righthand);
169170
innerrel=find_base_rel(root,innerrelid);
170171

171-
if (innerrel->reloptkind!=RELOPT_BASEREL||
172-
innerrel->rtekind!=RTE_RELATION||
173-
innerrel->indexlist==NIL)
172+
if (innerrel->reloptkind!=RELOPT_BASEREL)
174173
return false;
175174

175+
/*
176+
* Before we go to the effort of checking whether any innerrel variables
177+
* are needed above the join, make a quick check to eliminate cases in
178+
* which we will surely be unable to prove uniqueness of the innerrel.
179+
*/
180+
if (innerrel->rtekind==RTE_RELATION)
181+
{
182+
/*
183+
* For a plain-relation innerrel, we only know how to prove uniqueness
184+
* by reference to unique indexes. If there are no indexes then
185+
* there's certainly no unique indexes so there's no point in going
186+
* further.
187+
*/
188+
if (innerrel->indexlist==NIL)
189+
return false;
190+
}
191+
elseif (innerrel->rtekind==RTE_SUBQUERY)
192+
{
193+
subquery=root->simple_rte_array[innerrelid]->subquery;
194+
195+
/*
196+
* If the subquery has no qualities that support distinctness proofs
197+
* then there's no point in going further.
198+
*/
199+
if (!query_supports_distinctness(subquery))
200+
return false;
201+
}
202+
else
203+
return false;/* unsupported rtekind */
204+
176205
/* Compute the relid set for the join we are considering */
177206
joinrelids=bms_union(sjinfo->min_lefthand,sjinfo->min_righthand);
178207

@@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
272301

273302
/*
274303
* relation_has_unique_index_for automatically adds any usable restriction
275-
* clauses for the innerrel, so we needn't do that here.
304+
* clauses for the innerrel, so we needn't do that here. (XXX we are not
305+
* considering restriction clauses for subqueries; is that worth doing?)
276306
*/
277307

278-
/* Now examine the indexes to see if we have a matching unique index */
279-
if (relation_has_unique_index_for(root,innerrel,clause_list,NIL,NIL))
280-
return true;
308+
if (innerrel->rtekind==RTE_RELATION)
309+
{
310+
/* Now examine the indexes to see if we have a matching unique index */
311+
if (relation_has_unique_index_for(root,innerrel,clause_list,NIL,NIL))
312+
return true;
313+
}
314+
else/* innerrel->rtekind == RTE_SUBQUERY */
315+
{
316+
List*colnos=NIL;
317+
List*opids=NIL;
318+
319+
/*
320+
* Build the argument lists for query_is_distinct_for: a list of
321+
* output column numbers that the query needs to be distinct over, and
322+
* a list of equality operators that the output columns need to be
323+
* distinct according to.
324+
*/
325+
foreach(l,clause_list)
326+
{
327+
RestrictInfo*rinfo= (RestrictInfo*)lfirst(l);
328+
Oidop;
329+
Var*var;
330+
331+
/*
332+
* Get the equality operator we need uniqueness according to.
333+
* (This might be a cross-type operator and thus not exactly the
334+
* same operator the subquery would consider; that's all right
335+
* since query_is_distinct_for can resolve such cases.) The
336+
* mergejoinability test above should have selected only OpExprs.
337+
*/
338+
Assert(IsA(rinfo->clause,OpExpr));
339+
op= ((OpExpr*)rinfo->clause)->opno;
340+
341+
/* clause_sides_match_join identified the inner side for us */
342+
if (rinfo->outer_is_left)
343+
var= (Var*)get_rightop(rinfo->clause);
344+
else
345+
var= (Var*)get_leftop(rinfo->clause);
346+
347+
/*
348+
* If inner side isn't a Var referencing a subquery output column,
349+
* this clause doesn't help us.
350+
*/
351+
if (!var|| !IsA(var,Var)||
352+
var->varno!=innerrelid||var->varlevelsup!=0)
353+
continue;
354+
355+
colnos=lappend_int(colnos,var->varattno);
356+
opids=lappend_oid(opids,op);
357+
}
358+
359+
if (query_is_distinct_for(subquery,colnos,opids))
360+
return true;
361+
}
281362

282363
/*
283364
* Some day it would be nice to check for other methods of establishing
@@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
481562

482563
returnresult;
483564
}
565+
566+
567+
/*
568+
* query_supports_distinctness - could the query possibly be proven distinct
569+
*on some set of output columns?
570+
*
571+
* This is effectively a pre-checking function for query_is_distinct_for().
572+
* It must return TRUE if query_is_distinct_for() could possibly return TRUE
573+
* with this query, but it should not expend a lot of cycles. The idea is
574+
* that callers can avoid doing possibly-expensive processing to compute
575+
* query_is_distinct_for()'s argument lists if the call could not possibly
576+
* succeed.
577+
*/
578+
bool
579+
query_supports_distinctness(Query*query)
580+
{
581+
if (query->distinctClause!=NIL||
582+
query->groupClause!=NIL||
583+
query->hasAggs||
584+
query->havingQual||
585+
query->setOperations)
586+
return true;
587+
588+
return false;
589+
}
590+
591+
/*
592+
* query_is_distinct_for - does query never return duplicates of the
593+
*specified columns?
594+
*
595+
* query is a not-yet-planned subquery (in current usage, it's always from
596+
* a subquery RTE, which the planner avoids scribbling on).
597+
*
598+
* colnos is an integer list of output column numbers (resno's). We are
599+
* interested in whether rows consisting of just these columns are certain
600+
* to be distinct. "Distinctness" is defined according to whether the
601+
* corresponding upper-level equality operators listed in opids would think
602+
* the values are distinct. (Note: the opids entries could be cross-type
603+
* operators, and thus not exactly the equality operators that the subquery
604+
* would use itself. We use equality_ops_are_compatible() to check
605+
* compatibility. That looks at btree or hash opfamily membership, and so
606+
* should give trustworthy answers for all operators that we might need
607+
* to deal with here.)
608+
*/
609+
bool
610+
query_is_distinct_for(Query*query,List*colnos,List*opids)
611+
{
612+
ListCell*l;
613+
Oidopid;
614+
615+
Assert(list_length(colnos)==list_length(opids));
616+
617+
/*
618+
* A set-returning function in the query's targetlist can result in
619+
* returning duplicate rows, if the SRF is evaluated after the
620+
* de-duplication step; so we play it safe and say "no" if there are any
621+
* SRFs. (We could be certain that it's okay if SRFs appear only in the
622+
* specified columns, since those must be evaluated before de-duplication;
623+
* but it doesn't presently seem worth the complication to check that.)
624+
*/
625+
if (expression_returns_set((Node*)query->targetList))
626+
return false;
627+
628+
/*
629+
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
630+
* columns in the DISTINCT clause appear in colnos and operator semantics
631+
* match.
632+
*/
633+
if (query->distinctClause)
634+
{
635+
foreach(l,query->distinctClause)
636+
{
637+
SortGroupClause*sgc= (SortGroupClause*)lfirst(l);
638+
TargetEntry*tle=get_sortgroupclause_tle(sgc,
639+
query->targetList);
640+
641+
opid=distinct_col_search(tle->resno,colnos,opids);
642+
if (!OidIsValid(opid)||
643+
!equality_ops_are_compatible(opid,sgc->eqop))
644+
break;/* exit early if no match */
645+
}
646+
if (l==NULL)/* had matches for all? */
647+
return true;
648+
}
649+
650+
/*
651+
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
652+
* appear in colnos and operator semantics match.
653+
*/
654+
if (query->groupClause)
655+
{
656+
foreach(l,query->groupClause)
657+
{
658+
SortGroupClause*sgc= (SortGroupClause*)lfirst(l);
659+
TargetEntry*tle=get_sortgroupclause_tle(sgc,
660+
query->targetList);
661+
662+
opid=distinct_col_search(tle->resno,colnos,opids);
663+
if (!OidIsValid(opid)||
664+
!equality_ops_are_compatible(opid,sgc->eqop))
665+
break;/* exit early if no match */
666+
}
667+
if (l==NULL)/* had matches for all? */
668+
return true;
669+
}
670+
else
671+
{
672+
/*
673+
* If we have no GROUP BY, but do have aggregates or HAVING, then the
674+
* result is at most one row so it's surely unique, for any operators.
675+
*/
676+
if (query->hasAggs||query->havingQual)
677+
return true;
678+
}
679+
680+
/*
681+
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
682+
* except with ALL.
683+
*/
684+
if (query->setOperations)
685+
{
686+
SetOperationStmt*topop= (SetOperationStmt*)query->setOperations;
687+
688+
Assert(IsA(topop,SetOperationStmt));
689+
Assert(topop->op!=SETOP_NONE);
690+
691+
if (!topop->all)
692+
{
693+
ListCell*lg;
694+
695+
/* We're good if all the nonjunk output columns are in colnos */
696+
lg=list_head(topop->groupClauses);
697+
foreach(l,query->targetList)
698+
{
699+
TargetEntry*tle= (TargetEntry*)lfirst(l);
700+
SortGroupClause*sgc;
701+
702+
if (tle->resjunk)
703+
continue;/* ignore resjunk columns */
704+
705+
/* non-resjunk columns should have grouping clauses */
706+
Assert(lg!=NULL);
707+
sgc= (SortGroupClause*)lfirst(lg);
708+
lg=lnext(lg);
709+
710+
opid=distinct_col_search(tle->resno,colnos,opids);
711+
if (!OidIsValid(opid)||
712+
!equality_ops_are_compatible(opid,sgc->eqop))
713+
break;/* exit early if no match */
714+
}
715+
if (l==NULL)/* had matches for all? */
716+
return true;
717+
}
718+
}
719+
720+
/*
721+
* XXX Are there any other cases in which we can easily see the result
722+
* must be distinct?
723+
*
724+
* If you do add more smarts to this function, be sure to update
725+
* query_supports_distinctness() to match.
726+
*/
727+
728+
return false;
729+
}
730+
731+
/*
732+
* distinct_col_search - subroutine for query_is_distinct_for
733+
*
734+
* If colno is in colnos, return the corresponding element of opids,
735+
* else return InvalidOid. (Ordinarily colnos would not contain duplicates,
736+
* but if it does, we arbitrarily select the first match.)
737+
*/
738+
staticOid
739+
distinct_col_search(intcolno,List*colnos,List*opids)
740+
{
741+
ListCell*lc1,
742+
*lc2;
743+
744+
forboth(lc1,colnos,lc2,opids)
745+
{
746+
if (colno==lfirst_int(lc1))
747+
returnlfirst_oid(lc2);
748+
}
749+
returnInvalidOid;
750+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp