Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitdea0709

Browse files
committed
Be more careful about the shape of hashable subplan clauses.
nodeSubplan.c expects that the testexpr for a hashable ANY SubPlanhas the form of one or more OpExprs whose LHS is an expression of theouter query's, while the RHS is an expression over Params representingoutput columns of the subquery. However, the planner only went as faras verifying that the clauses were all binary OpExprs. This works99.99% of the time, because the clauses have the right shape whenemitted by the parser --- but it's possible for function inlining tobreak that, as reported by PegoraroF10. To fix, teach the plannerto check that the LHS and RHS contain the right things, or moreaccurately don't contain the wrong things. Given that this has beenbroken for years without anyone noticing, it seems sufficient to justgive up hashing when it happens, rather than go to the trouble ofcommuting the clauses back again (which wouldn't necessarily workanyway).While poking at that, I also noticed that nodeSubplan.c had a baked-inassumption that the number of hash clauses is identical to the numberof subquery output columns. Again, that's fine as far as parser outputgoes, but it's not hard to break it via function inlining. There seemslittle reason for that assumption though --- AFAICS, the only thingit's buying us is not having to store the number of hash clausesexplicitly. Adding code to the planner to reject such cases would takemore code than getting nodeSubplan.c to cope, so I fixed it that way.This has been broken for as long as we've had hashable SubPlans,so back-patch to all supported branches.Discussion:https://postgr.es/m/1549209182255-0.post@n3.nabble.com
1 parent326b5fe commitdea0709

File tree

7 files changed

+223
-29
lines changed

7 files changed

+223
-29
lines changed

‎src/backend/executor/nodeSubplan.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
472472
{
473473
SubPlan*subplan=node->subplan;
474474
PlanState*planstate=node->planstate;
475-
intncols=list_length(subplan->paramIds);
475+
intncols=node->numCols;
476476
ExprContext*innerecontext=node->innerecontext;
477477
MemoryContextoldcontext;
478478
longnbuckets;
@@ -787,11 +787,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
787787
ALLOCSET_SMALL_SIZES);
788788
/* and a short-lived exprcontext for function evaluation */
789789
sstate->innerecontext=CreateExprContext(estate);
790-
/* Silly little array of column numbers 1..n */
791-
ncols=list_length(subplan->paramIds);
792-
sstate->keyColIdx= (AttrNumber*)palloc(ncols*sizeof(AttrNumber));
793-
for (i=0;i<ncols;i++)
794-
sstate->keyColIdx[i]=i+1;
795790

796791
/*
797792
* We use ExecProject to evaluate the lefthand and righthand
@@ -823,9 +818,11 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
823818
(int)nodeTag(subplan->testexpr));
824819
oplist=NIL;/* keep compiler quiet */
825820
}
826-
Assert(list_length(oplist)==ncols);
821+
ncols=list_length(oplist);
827822

828823
lefttlist=righttlist=NIL;
824+
sstate->numCols=ncols;
825+
sstate->keyColIdx= (AttrNumber*)palloc(ncols*sizeof(AttrNumber));
829826
sstate->tab_hash_funcs= (FmgrInfo*)palloc(ncols*sizeof(FmgrInfo));
830827
sstate->tab_eq_funcs= (FmgrInfo*)palloc(ncols*sizeof(FmgrInfo));
831828
sstate->lhs_hash_funcs= (FmgrInfo*)palloc(ncols*sizeof(FmgrInfo));
@@ -877,6 +874,9 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
877874
fmgr_info(left_hashfn,&sstate->lhs_hash_funcs[i-1]);
878875
fmgr_info(right_hashfn,&sstate->tab_hash_funcs[i-1]);
879876

877+
/* keyColIdx is just column numbers 1..n */
878+
sstate->keyColIdx[i-1]=i;
879+
880880
i++;
881881
}
882882

‎src/backend/optimizer/plan/subselect.c

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ typedef struct finalize_primnode_context
6161
staticNode*build_subplan(PlannerInfo*root,Plan*plan,PlannerInfo*subroot,
6262
List*plan_params,
6363
SubLinkTypesubLinkType,intsubLinkId,
64-
Node*testexpr,booladjust_testexpr,
64+
Node*testexpr,List*testexpr_paramids,
6565
boolunknownEqFalse);
6666
staticList*generate_subquery_params(PlannerInfo*root,List*tlist,
6767
List**paramIds);
@@ -73,7 +73,8 @@ static Node *convert_testexpr(PlannerInfo *root,
7373
staticNode*convert_testexpr_mutator(Node*node,
7474
convert_testexpr_context*context);
7575
staticboolsubplan_is_hashable(Plan*plan);
76-
staticbooltestexpr_is_hashable(Node*testexpr);
76+
staticbooltestexpr_is_hashable(Node*testexpr,List*param_ids);
77+
staticbooltest_opexpr_is_hashable(OpExpr*testexpr,List*param_ids);
7778
staticboolhash_ok_operator(OpExpr*expr);
7879
staticboolsimplify_EXISTS_query(PlannerInfo*root,Query*query);
7980
staticQuery*convert_EXISTS_to_ANY(PlannerInfo*root,Query*subselect,
@@ -240,7 +241,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
240241
/* And convert to SubPlan or InitPlan format. */
241242
result=build_subplan(root,plan,subroot,plan_params,
242243
subLinkType,subLinkId,
243-
testexpr,true,isTopQual);
244+
testexpr,NIL,isTopQual);
244245

245246
/*
246247
* If it's a correlated EXISTS with an unimportant targetlist, we might be
@@ -294,12 +295,11 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
294295
plan_params,
295296
ANY_SUBLINK,0,
296297
newtestexpr,
297-
false, true));
298+
paramIds,
299+
true));
298300
/* Check we got what we expected */
299301
Assert(hashplan->parParam==NIL);
300302
Assert(hashplan->useHashTable);
301-
/* build_subplan won't have filled in paramIds */
302-
hashplan->paramIds=paramIds;
303303

304304
/* Leave it to the executor to decide which plan to use */
305305
asplan=makeNode(AlternativeSubPlan);
@@ -322,7 +322,7 @@ static Node *
322322
build_subplan(PlannerInfo*root,Plan*plan,PlannerInfo*subroot,
323323
List*plan_params,
324324
SubLinkTypesubLinkType,intsubLinkId,
325-
Node*testexpr,booladjust_testexpr,
325+
Node*testexpr,List*testexpr_paramids,
326326
boolunknownEqFalse)
327327
{
328328
Node*result;
@@ -487,10 +487,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
487487
else
488488
{
489489
/*
490-
* Adjust the Params in the testexpr, unless callersaid it's not
491-
*needed.
490+
* Adjust the Params in the testexpr, unless calleralready took care
491+
*of it (as indicated by passing a list of Param IDs).
492492
*/
493-
if (testexpr&&adjust_testexpr)
493+
if (testexpr&&testexpr_paramids==NIL)
494494
{
495495
List*params;
496496

@@ -502,7 +502,10 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
502502
params);
503503
}
504504
else
505+
{
505506
splan->testexpr=testexpr;
507+
splan->paramIds=testexpr_paramids;
508+
}
506509

507510
/*
508511
* We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
@@ -514,7 +517,7 @@ build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot,
514517
if (subLinkType==ANY_SUBLINK&&
515518
splan->parParam==NIL&&
516519
subplan_is_hashable(plan)&&
517-
testexpr_is_hashable(splan->testexpr))
520+
testexpr_is_hashable(splan->testexpr,splan->paramIds))
518521
splan->useHashTable= true;
519522

520523
/*
@@ -736,24 +739,20 @@ subplan_is_hashable(Plan *plan)
736739

737740
/*
738741
* testexpr_is_hashable: is an ANY SubLink's test expression hashable?
742+
*
743+
* To identify LHS vs RHS of the hash expression, we must be given the
744+
* list of output Param IDs of the SubLink's subquery.
739745
*/
740746
staticbool
741-
testexpr_is_hashable(Node*testexpr)
747+
testexpr_is_hashable(Node*testexpr,List*param_ids)
742748
{
743749
/*
744750
* The testexpr must be a single OpExpr, or an AND-clause containing only
745-
* OpExprs.
746-
*
747-
* The combining operators must be hashable and strict. The need for
748-
* hashability is obvious, since we want to use hashing. Without
749-
* strictness, behavior in the presence of nulls is too unpredictable. We
750-
* actually must assume even more than plain strictness: they can't yield
751-
* NULL for non-null inputs, either (see nodeSubplan.c). However, hash
752-
* indexes and hash joins assume that too.
751+
* OpExprs, each of which satisfy test_opexpr_is_hashable().
753752
*/
754753
if (testexpr&&IsA(testexpr,OpExpr))
755754
{
756-
if (hash_ok_operator((OpExpr*)testexpr))
755+
if (test_opexpr_is_hashable((OpExpr*)testexpr,param_ids))
757756
return true;
758757
}
759758
elseif (and_clause(testexpr))
@@ -766,7 +765,7 @@ testexpr_is_hashable(Node *testexpr)
766765

767766
if (!IsA(andarg,OpExpr))
768767
return false;
769-
if (!hash_ok_operator((OpExpr*)andarg))
768+
if (!test_opexpr_is_hashable((OpExpr*)andarg,param_ids))
770769
return false;
771770
}
772771
return true;
@@ -775,6 +774,40 @@ testexpr_is_hashable(Node *testexpr)
775774
return false;
776775
}
777776

777+
staticbool
778+
test_opexpr_is_hashable(OpExpr*testexpr,List*param_ids)
779+
{
780+
/*
781+
* The combining operator must be hashable and strict. The need for
782+
* hashability is obvious, since we want to use hashing. Without
783+
* strictness, behavior in the presence of nulls is too unpredictable. We
784+
* actually must assume even more than plain strictness: it can't yield
785+
* NULL for non-null inputs, either (see nodeSubplan.c). However, hash
786+
* indexes and hash joins assume that too.
787+
*/
788+
if (!hash_ok_operator(testexpr))
789+
return false;
790+
791+
/*
792+
* The left and right inputs must belong to the outer and inner queries
793+
* respectively; hence Params that will be supplied by the subquery must
794+
* not appear in the LHS, and Vars of the outer query must not appear in
795+
* the RHS. (Ordinarily, this must be true because of the way that the
796+
* parser builds an ANY SubLink's testexpr ... but inlining of functions
797+
* could have changed the expression's structure, so we have to check.
798+
* Such cases do not occur often enough to be worth trying to optimize, so
799+
* we don't worry about trying to commute the clause or anything like
800+
* that; we just need to be sure not to build an invalid plan.)
801+
*/
802+
if (list_length(testexpr->args)!=2)
803+
return false;
804+
if (contain_exec_param((Node*)linitial(testexpr->args),param_ids))
805+
return false;
806+
if (contain_var_clause((Node*)lsecond(testexpr->args)))
807+
return false;
808+
return true;
809+
}
810+
778811
/*
779812
* Check expression is hashable + strict
780813
*

‎src/backend/optimizer/util/clauses.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ static bool contain_volatile_functions_not_nextval_walker(Node *node, void *cont
107107
staticboolmax_parallel_hazard_walker(Node*node,
108108
max_parallel_hazard_context*context);
109109
staticboolcontain_nonstrict_functions_walker(Node*node,void*context);
110+
staticboolcontain_exec_param_walker(Node*node,List*param_ids);
110111
staticboolcontain_context_dependent_node(Node*clause);
111112
staticboolcontain_context_dependent_node_walker(Node*node,int*flags);
112113
staticboolcontain_leaked_vars_walker(Node*node,void*context);
@@ -1410,6 +1411,40 @@ contain_nonstrict_functions_walker(Node *node, void *context)
14101411
context);
14111412
}
14121413

1414+
/*****************************************************************************
1415+
*Check clauses for Params
1416+
*****************************************************************************/
1417+
1418+
/*
1419+
* contain_exec_param
1420+
* Recursively search for PARAM_EXEC Params within a clause.
1421+
*
1422+
* Returns true if the clause contains any PARAM_EXEC Param with a paramid
1423+
* appearing in the given list of Param IDs. Does not descend into
1424+
* subqueries!
1425+
*/
1426+
bool
1427+
contain_exec_param(Node*clause,List*param_ids)
1428+
{
1429+
returncontain_exec_param_walker(clause,param_ids);
1430+
}
1431+
1432+
staticbool
1433+
contain_exec_param_walker(Node*node,List*param_ids)
1434+
{
1435+
if (node==NULL)
1436+
return false;
1437+
if (IsA(node,Param))
1438+
{
1439+
Param*p= (Param*)node;
1440+
1441+
if (p->paramkind==PARAM_EXEC&&
1442+
list_member_int(param_ids,p->paramid))
1443+
return true;
1444+
}
1445+
returnexpression_tree_walker(node,contain_exec_param_walker,param_ids);
1446+
}
1447+
14131448
/*****************************************************************************
14141449
*Check clauses for context-dependent nodes
14151450
*****************************************************************************/

‎src/include/nodes/execnodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,11 +773,13 @@ typedef struct SubPlanState
773773
MemoryContexthashtablecxt;/* memory context containing hash tables */
774774
MemoryContexthashtempcxt;/* temp memory context for hash tables */
775775
ExprContext*innerecontext;/* econtext for computing inner tuples */
776+
/* each of the following fields is an array of length numCols: */
776777
AttrNumber*keyColIdx;/* control data for hash tables */
777778
FmgrInfo*tab_hash_funcs;/* hash functions for table datatype(s) */
778779
FmgrInfo*tab_eq_funcs;/* equality functions for table datatype(s) */
779780
FmgrInfo*lhs_hash_funcs;/* hash functions for lefthand datatype(s) */
780781
FmgrInfo*cur_eq_funcs;/* equality functions for LHS vs. table */
782+
intnumCols;/* number of columns being hashed */
781783
}SubPlanState;
782784

783785
/* ----------------

‎src/include/optimizer/clauses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ extern bool contain_volatile_functions_not_nextval(Node *clause);
6363
externcharmax_parallel_hazard(Query*parse);
6464
externboolis_parallel_safe(PlannerInfo*root,Node*node);
6565
externboolcontain_nonstrict_functions(Node*clause);
66+
externboolcontain_exec_param(Node*clause,List*param_ids);
6667
externboolcontain_leaked_vars(Node*clause);
6768

6869
externRelidsfind_nonnullable_rels(Node*clause);

‎src/test/regress/expected/subselect.out

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,85 @@ select '1'::text in (select '1'::name union all select '1'::name);
754754
t
755755
(1 row)
756756

757+
--
758+
-- Test that we don't try to use a hashed subplan if the simplified
759+
-- testexpr isn't of the right shape
760+
--
761+
create temp table inner_text (c1 text, c2 text);
762+
insert into inner_text values ('a', null);
763+
insert into inner_text values ('123', '456');
764+
-- this fails by default, of course
765+
select * from int8_tbl where q1 in (select c1 from inner_text);
766+
ERROR: operator does not exist: bigint = text
767+
LINE 1: select * from int8_tbl where q1 in (select c1 from inner_tex...
768+
^
769+
HINT: No operator matches the given name and argument type(s). You might need to add explicit type casts.
770+
begin;
771+
-- make an operator to allow it to succeed
772+
create function bogus_int8_text_eq(int8, text) returns boolean
773+
language sql as 'select $1::text = $2';
774+
create operator = (procedure=bogus_int8_text_eq, leftarg=int8, rightarg=text);
775+
explain (costs off)
776+
select * from int8_tbl where q1 in (select c1 from inner_text);
777+
QUERY PLAN
778+
--------------------------------
779+
Seq Scan on int8_tbl
780+
Filter: (hashed SubPlan 1)
781+
SubPlan 1
782+
-> Seq Scan on inner_text
783+
(4 rows)
784+
785+
select * from int8_tbl where q1 in (select c1 from inner_text);
786+
q1 | q2
787+
-----+------------------
788+
123 | 456
789+
123 | 4567890123456789
790+
(2 rows)
791+
792+
-- inlining of this function results in unusual number of hash clauses,
793+
-- which we can still cope with
794+
create or replace function bogus_int8_text_eq(int8, text) returns boolean
795+
language sql as 'select $1::text = $2 and $1::text = $2';
796+
explain (costs off)
797+
select * from int8_tbl where q1 in (select c1 from inner_text);
798+
QUERY PLAN
799+
--------------------------------
800+
Seq Scan on int8_tbl
801+
Filter: (hashed SubPlan 1)
802+
SubPlan 1
803+
-> Seq Scan on inner_text
804+
(4 rows)
805+
806+
select * from int8_tbl where q1 in (select c1 from inner_text);
807+
q1 | q2
808+
-----+------------------
809+
123 | 456
810+
123 | 4567890123456789
811+
(2 rows)
812+
813+
-- inlining of this function causes LHS and RHS to be switched,
814+
-- which we can't cope with, so hashing should be abandoned
815+
create or replace function bogus_int8_text_eq(int8, text) returns boolean
816+
language sql as 'select $2 = $1::text';
817+
explain (costs off)
818+
select * from int8_tbl where q1 in (select c1 from inner_text);
819+
QUERY PLAN
820+
--------------------------------------
821+
Seq Scan on int8_tbl
822+
Filter: (SubPlan 1)
823+
SubPlan 1
824+
-> Materialize
825+
-> Seq Scan on inner_text
826+
(5 rows)
827+
828+
select * from int8_tbl where q1 in (select c1 from inner_text);
829+
q1 | q2
830+
-----+------------------
831+
123 | 456
832+
123 | 4567890123456789
833+
(2 rows)
834+
835+
rollback; -- to get rid of the bogus operator
757836
--
758837
-- Test case for planner bug with nested EXISTS handling
759838
--

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp