Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2e46b76

Browse files
committed
Extend join-selectivity API (oprjoin interface) so that join type is
passed to join selectivity estimators. Make use of this in eqjoinselto derive non-bogus selectivity for IN clauses. Further tweaking ofcost estimation for IN.initdb forced because of pg_proc.h changes.
1 parent955a1f8 commit2e46b76

File tree

16 files changed

+221
-136
lines changed

16 files changed

+221
-136
lines changed

‎doc/src/sgml/indexcost.sgml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $
33
-->
44

55
<chapter id="indexcost">
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
205205

206206
<programlisting>
207207
*indexSelectivity = clauselist_selectivity(root, indexQuals,
208-
lfirsti(rel->relids));
208+
lfirsti(rel->relids),
209+
JOIN_INNER);
209210
</programlisting>
210211
</para>
211212
</step>

‎src/backend/catalog/pg_operator.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
1212
*
1313
* NOTES
1414
* these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
485485
typeId[0]=INTERNALOID;/* Query */
486486
typeId[1]=OIDOID;/* operator OID */
487487
typeId[2]=INTERNALOID;/* args list */
488+
typeId[3]=INT2OID;/* jointype */
488489

489-
joinOid=LookupFuncName(joinName,3,typeId);
490+
joinOid=LookupFuncName(joinName,4,typeId);
490491
if (!OidIsValid(joinOid))
491-
func_error("OperatorDef",joinName,3,typeId,NULL);
492+
func_error("OperatorDef",joinName,4,typeId,NULL);
492493
}
493494
else
494495
joinOid=InvalidOid;

‎src/backend/optimizer/path/clausesel.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
6565
Selectivity
6666
restrictlist_selectivity(Query*root,
6767
List*restrictinfo_list,
68-
intvarRelid)
68+
intvarRelid,
69+
JoinTypejointype)
6970
{
7071
List*clauselist=get_actual_clauses(restrictinfo_list);
7172
Selectivityresult;
7273

73-
result=clauselist_selectivity(root,clauselist,varRelid);
74+
result=clauselist_selectivity(root,clauselist,varRelid,jointype);
7475
freeList(clauselist);
7576
returnresult;
7677
}
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
8182
* expression clauses. The list can be empty, in which case 1.0
8283
* must be returned.
8384
*
84-
* See clause_selectivity() for the meaning of thevarRelid parameter.
85+
* See clause_selectivity() for the meaning of theadditional parameters.
8586
*
8687
* Our basic approach is to take the product of the selectivities of the
8788
* subclauses.However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
113114
Selectivity
114115
clauselist_selectivity(Query*root,
115116
List*clauses,
116-
intvarRelid)
117+
intvarRelid,
118+
JoinTypejointype)
117119
{
118120
Selectivitys1=1.0;
119121
RangeQueryClause*rqlist=NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
184186
}
185187
}
186188
/* Not the right form, so treat it generically. */
187-
s2=clause_selectivity(root,clause,varRelid);
189+
s2=clause_selectivity(root,clause,varRelid,jointype);
188190
s1=s1*s2;
189191
}
190192

@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
362364
*
363365
* When varRelid is 0, all variables are treated as variables.This
364366
* is appropriate for ordinary join clauses and restriction clauses.
367+
*
368+
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
369+
* if the clause isn't a join clause or the context is uncertain.
365370
*/
366371
Selectivity
367372
clause_selectivity(Query*root,
368373
Node*clause,
369-
intvarRelid)
374+
intvarRelid,
375+
JoinTypejointype)
370376
{
371377
Selectivitys1=1.0;/* default for any unhandled clause type */
372378

@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
424430
/* inverse of the selectivity of the underlying clause */
425431
s1=1.0-clause_selectivity(root,
426432
(Node*)get_notclausearg((Expr*)clause),
427-
varRelid);
433+
varRelid,
434+
jointype);
428435
}
429436
elseif (and_clause(clause))
430437
{
431438
/* share code with clauselist_selectivity() */
432439
s1=clauselist_selectivity(root,
433440
((BoolExpr*)clause)->args,
434-
varRelid);
441+
varRelid,
442+
jointype);
435443
}
436444
elseif (or_clause(clause))
437445
{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
447455
{
448456
Selectivitys2=clause_selectivity(root,
449457
(Node*)lfirst(arg),
450-
varRelid);
458+
varRelid,
459+
jointype);
451460

452461
s1=s1+s2-s1*s2;
453462
}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
479488
{
480489
/* Estimate selectivity for a join clause. */
481490
s1=join_selectivity(root,opno,
482-
((OpExpr*)clause)->args);
491+
((OpExpr*)clause)->args,
492+
jointype);
483493
}
484494
else
485495
{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
519529
s1=booltestsel(root,
520530
((BooleanTest*)clause)->booltesttype,
521531
(Node*) ((BooleanTest*)clause)->arg,
522-
varRelid);
532+
varRelid,
533+
jointype);
523534
}
524535
elseif (IsA(clause,RelabelType))
525536
{
526537
/* Not sure this case is needed, but it can't hurt */
527538
s1=clause_selectivity(root,
528539
(Node*) ((RelabelType*)clause)->arg,
529-
varRelid);
540+
varRelid,
541+
jointype);
530542
}
531543

532544
#ifdefSELECTIVITY_DEBUG

‎src/backend/optimizer/path/costsize.c

Lines changed: 73 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
* Portions Copyright (c) 1994, Regents of the University of California
5050
*
5151
* IDENTIFICATION
52-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
52+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
5353
*
5454
*-------------------------------------------------------------------------
5555
*/
@@ -104,7 +104,8 @@ boolenable_hashjoin = true;
104104
staticSelectivityestimate_hash_bucketsize(Query*root,Var*var,
105105
intnbuckets);
106106
staticboolcost_qual_eval_walker(Node*node,QualCost*total);
107-
staticSelectivityapprox_selectivity(Query*root,List*quals);
107+
staticSelectivityapprox_selectivity(Query*root,List*quals,
108+
JoinTypejointype);
108109
staticvoidset_rel_width(Query*root,RelOptInfo*rel);
109110
staticdoublerelation_byte_size(doubletuples,intwidth);
110111
staticdoublepage_size(doubletuples,intwidth);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
697698
*/
698699
if (path->jointype==JOIN_IN)
699700
{
700-
Selectivityqual_selec=approx_selectivity(root,restrictlist);
701+
Selectivityqual_selec=approx_selectivity(root,restrictlist,
702+
path->jointype);
701703
doubleqptuples;
702704

703705
qptuples=ceil(qual_selec*outer_path_rows*inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
816818
* Note: it's probably bogus to use the normal selectivity calculation
817819
* here when either the outer or inner path is a UniquePath.
818820
*/
819-
merge_selec=approx_selectivity(root,mergeclauses);
821+
merge_selec=approx_selectivity(root,mergeclauses,
822+
path->jpath.jointype);
820823
cost_qual_eval(&merge_qual_cost,mergeclauses);
821824
qpquals=set_ptrDifference(restrictlist,mergeclauses);
822-
qp_selec=approx_selectivity(root,qpquals);
825+
qp_selec=approx_selectivity(root,qpquals,
826+
path->jpath.jointype);
823827
cost_qual_eval(&qp_qual_cost,qpquals);
824828
freeList(qpquals);
825829

@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
10441048
* Note: it's probably bogus to use the normal selectivity calculation
10451049
* here when either the outer or inner path is a UniquePath.
10461050
*/
1047-
hash_selec=approx_selectivity(root,hashclauses);
1051+
hash_selec=approx_selectivity(root,hashclauses,
1052+
path->jpath.jointype);
10481053
cost_qual_eval(&hash_qual_cost,hashclauses);
10491054
qpquals=set_ptrDifference(restrictlist,hashclauses);
1050-
qp_selec=approx_selectivity(root,qpquals);
1055+
qp_selec=approx_selectivity(root,qpquals,
1056+
path->jpath.jointype);
10511057
cost_qual_eval(&qp_qual_cost,qpquals);
10521058
freeList(qpquals);
10531059

@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
10841090
* Determine bucketsize fraction for inner relation. We use the
10851091
* smallest bucketsize estimated for any individual hashclause;
10861092
* this is undoubtedly conservative.
1093+
*
1094+
* BUT: if inner relation has been unique-ified, we can assume it's
1095+
* good for hashing. This is important both because it's the right
1096+
* answer, and because we avoid contaminating the cache with a value
1097+
* that's wrong for non-unique-ified paths.
10871098
*/
1088-
innerbucketsize=1.0;
1089-
foreach(hcl,hashclauses)
1099+
if (IsA(inner_path,UniquePath))
1100+
innerbucketsize=1.0 /virtualbuckets;
1101+
else
10901102
{
1091-
RestrictInfo*restrictinfo= (RestrictInfo*)lfirst(hcl);
1092-
Selectivitythisbucketsize;
1103+
innerbucketsize=1.0;
1104+
foreach(hcl,hashclauses)
1105+
{
1106+
RestrictInfo*restrictinfo= (RestrictInfo*)lfirst(hcl);
1107+
Selectivitythisbucketsize;
10931108

1094-
Assert(IsA(restrictinfo,RestrictInfo));
1109+
Assert(IsA(restrictinfo,RestrictInfo));
10951110

1096-
/*
1097-
* First we have to figure out which side of the hashjoin clause is the
1098-
* inner side.
1099-
*
1100-
* Since we tend to visit the same clauses over and over when planning
1101-
* a large query, we cache the bucketsize estimate in the RestrictInfo
1102-
* node to avoid repeated lookups of statistics.
1103-
*/
1104-
if (is_subseti(restrictinfo->right_relids,inner_path->parent->relids))
1105-
{
1106-
/* righthand side is inner */
1107-
thisbucketsize=restrictinfo->right_bucketsize;
1108-
if (thisbucketsize<0)
1111+
/*
1112+
* First we have to figure out which side of the hashjoin clause
1113+
* is the inner side.
1114+
*
1115+
* Since we tend to visit the same clauses over and over when
1116+
* planning a large query, we cache the bucketsize estimate in the
1117+
* RestrictInfo node to avoid repeated lookups of statistics.
1118+
*/
1119+
if (is_subseti(restrictinfo->right_relids,
1120+
inner_path->parent->relids))
11091121
{
1110-
/* not cached yet */
1111-
thisbucketsize=estimate_hash_bucketsize(root,
1122+
/* righthand side is inner */
1123+
thisbucketsize=restrictinfo->right_bucketsize;
1124+
if (thisbucketsize<0)
1125+
{
1126+
/* not cached yet */
1127+
thisbucketsize=
1128+
estimate_hash_bucketsize(root,
11121129
(Var*)get_rightop(restrictinfo->clause),
1113-
virtualbuckets);
1114-
restrictinfo->right_bucketsize=thisbucketsize;
1130+
virtualbuckets);
1131+
restrictinfo->right_bucketsize=thisbucketsize;
1132+
}
11151133
}
1116-
}
1117-
else
1118-
{
1119-
Assert(is_subseti(restrictinfo->left_relids,
1120-
inner_path->parent->relids));
1121-
/* lefthand side is inner */
1122-
thisbucketsize=restrictinfo->left_bucketsize;
1123-
if (thisbucketsize<0)
1134+
else
11241135
{
1125-
/* not cached yet */
1126-
thisbucketsize=estimate_hash_bucketsize(root,
1136+
Assert(is_subseti(restrictinfo->left_relids,
1137+
inner_path->parent->relids));
1138+
/* lefthand side is inner */
1139+
thisbucketsize=restrictinfo->left_bucketsize;
1140+
if (thisbucketsize<0)
1141+
{
1142+
/* not cached yet */
1143+
thisbucketsize=
1144+
estimate_hash_bucketsize(root,
11271145
(Var*)get_leftop(restrictinfo->clause),
1128-
virtualbuckets);
1129-
restrictinfo->left_bucketsize=thisbucketsize;
1146+
virtualbuckets);
1147+
restrictinfo->left_bucketsize=thisbucketsize;
1148+
}
11301149
}
1131-
}
11321150

1133-
if (innerbucketsize>thisbucketsize)
1134-
innerbucketsize=thisbucketsize;
1151+
if (innerbucketsize>thisbucketsize)
1152+
innerbucketsize=thisbucketsize;
1153+
}
11351154
}
11361155

11371156
/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
15571576
* seems OK to live with the approximation.
15581577
*/
15591578
staticSelectivity
1560-
approx_selectivity(Query*root,List*quals)
1579+
approx_selectivity(Query*root,List*quals,JoinTypejointype)
15611580
{
15621581
Selectivitytotal=1.0;
15631582
List*l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
15821601
restrictinfo->this_selec=
15831602
clause_selectivity(root,
15841603
(Node*)restrictinfo->clause,
1585-
0);
1604+
0,
1605+
jointype);
15861606
selec=restrictinfo->this_selec;
15871607
}
15881608
else
15891609
{
15901610
/* If it's a bare expression, must always do it the hard way */
1591-
selec=clause_selectivity(root,qual,0);
1611+
selec=clause_selectivity(root,qual,0,jointype);
15921612
}
15931613
total *=selec;
15941614
}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
16201640
temp=rel->tuples*
16211641
restrictlist_selectivity(root,
16221642
rel->baserestrictinfo,
1623-
lfirsti(rel->relids));
1643+
lfirsti(rel->relids),
1644+
JOIN_INNER);
16241645

16251646
/*
16261647
* Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16821703
*/
16831704
selec=restrictlist_selectivity(root,
16841705
restrictlist,
1685-
0);
1706+
0,
1707+
jointype);
16861708

16871709
/*
16881710
* Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16941716
* For JOIN_IN and variants, the Cartesian product is figured with
16951717
* respect to a unique-ified input, and then we can clamp to the size
16961718
* of the other input.
1697-
* XXX it's not at all clear that the ordinary selectivity calculation
1698-
* is appropriate in this case.
16991719
*/
17001720
switch (jointype)
17011721
{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
17981818
temp=rel->tuples*
17991819
restrictlist_selectivity(root,
18001820
rel->baserestrictinfo,
1801-
lfirsti(rel->relids));
1821+
lfirsti(rel->relids),
1822+
JOIN_INNER);
18021823

18031824
/*
18041825
* Force estimate to be at least one row, to make explain output look

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp