Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb557226

Browse files
committed
Improve planner's cost estimation in the presence of semijoins.
If we have a semijoin, saySELECT * FROM x WHERE x1 IN (SELECT y1 FROM y)and we're estimating the cost of a parameterized indexscan on x, the numberof repetitions of the indexscan should not be taken as the size of y; it'llreally only be the number of distinct values of y1, because the only validplan with y on the outside of a nestloop would require y to be unique-ifiedbefore joining it to x. Most of the time this doesn't make that muchdifference, but sometimes it can lead to drastically underestimating thecost of the indexscan and hence choosing a bad plan, as pointed out byDavid Kubečka.Fixing this is a bit difficult because parameterized indexscans are costedout quite early in the planning process, before we have the informationthat would be needed to call estimate_num_groups() and thereby estimate thenumber of distinct values of the join column(s). However we can move thecode that extracts a semijoin RHS's unique-ification columns, so that it'sdone in initsplan.c rather than on-the-fly in create_unique_path(). Thatshouldn't make any difference speed-wise and it's really a bit cleaner too.The other bit of information we need is the size of the semijoin RHS,which is easy if it's a single relation (we make those estimates beforeconsidering indexscan costs) but problematic if it's a join relation.The solution adopted here is just to use the product of the sizes of thejoin component rels. That will generally be an overestimate, but sinceestimate_num_groups() only uses this input as a clamp, an overestimateshouldn't hurt us too badly. In any case we don't allow this new logicto produce a value larger than we would have chosen before, so that atworst an overestimate leaves us no wiser than we were before.
1 parentff2faee commitb557226

File tree

10 files changed

+385
-224
lines changed

10 files changed

+385
-224
lines changed

‎src/backend/nodes/copyfuncs.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1942,7 +1942,10 @@ _copySpecialJoinInfo(const SpecialJoinInfo *from)
19421942
COPY_SCALAR_FIELD(jointype);
19431943
COPY_SCALAR_FIELD(lhs_strict);
19441944
COPY_SCALAR_FIELD(delay_upper_joins);
1945-
COPY_NODE_FIELD(join_quals);
1945+
COPY_SCALAR_FIELD(semi_can_btree);
1946+
COPY_SCALAR_FIELD(semi_can_hash);
1947+
COPY_NODE_FIELD(semi_operators);
1948+
COPY_NODE_FIELD(semi_rhs_exprs);
19461949

19471950
returnnewnode;
19481951
}

‎src/backend/nodes/equalfuncs.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,7 +798,10 @@ _equalSpecialJoinInfo(const SpecialJoinInfo *a, const SpecialJoinInfo *b)
798798
COMPARE_SCALAR_FIELD(jointype);
799799
COMPARE_SCALAR_FIELD(lhs_strict);
800800
COMPARE_SCALAR_FIELD(delay_upper_joins);
801-
COMPARE_NODE_FIELD(join_quals);
801+
COMPARE_SCALAR_FIELD(semi_can_btree);
802+
COMPARE_SCALAR_FIELD(semi_can_hash);
803+
COMPARE_NODE_FIELD(semi_operators);
804+
COMPARE_NODE_FIELD(semi_rhs_exprs);
802805

803806
return true;
804807
}

‎src/backend/nodes/outfuncs.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1945,7 +1945,10 @@ _outSpecialJoinInfo(StringInfo str, const SpecialJoinInfo *node)
19451945
WRITE_ENUM_FIELD(jointype,JoinType);
19461946
WRITE_BOOL_FIELD(lhs_strict);
19471947
WRITE_BOOL_FIELD(delay_upper_joins);
1948-
WRITE_NODE_FIELD(join_quals);
1948+
WRITE_BOOL_FIELD(semi_can_btree);
1949+
WRITE_BOOL_FIELD(semi_can_hash);
1950+
WRITE_NODE_FIELD(semi_operators);
1951+
WRITE_NODE_FIELD(semi_rhs_exprs);
19491952
}
19501953

19511954
staticvoid

‎src/backend/optimizer/path/costsize.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3294,7 +3294,10 @@ compute_semi_anti_join_factors(PlannerInfo *root,
32943294
/* we don't bother trying to make the remaining fields valid */
32953295
norm_sjinfo.lhs_strict= false;
32963296
norm_sjinfo.delay_upper_joins= false;
3297-
norm_sjinfo.join_quals=NIL;
3297+
norm_sjinfo.semi_can_btree= false;
3298+
norm_sjinfo.semi_can_hash= false;
3299+
norm_sjinfo.semi_operators=NIL;
3300+
norm_sjinfo.semi_rhs_exprs=NIL;
32983301

32993302
nselec=clauselist_selectivity(root,
33003303
joinquals,
@@ -3456,7 +3459,10 @@ approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
34563459
/* we don't bother trying to make the remaining fields valid */
34573460
sjinfo.lhs_strict= false;
34583461
sjinfo.delay_upper_joins= false;
3459-
sjinfo.join_quals=NIL;
3462+
sjinfo.semi_can_btree= false;
3463+
sjinfo.semi_can_hash= false;
3464+
sjinfo.semi_operators=NIL;
3465+
sjinfo.semi_rhs_exprs=NIL;
34603466

34613467
/* Get the approximate selectivity */
34623468
foreach(l,quals)

‎src/backend/optimizer/path/indxpath.c

Lines changed: 128 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,12 @@ static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
130130
staticvoidfind_indexpath_quals(Path*bitmapqual,List**quals,List**preds);
131131
staticintfind_list_position(Node*node,List**nodelist);
132132
staticboolcheck_index_only(RelOptInfo*rel,IndexOptInfo*index);
133-
staticdoubleget_loop_count(PlannerInfo*root,Relidsouter_relids);
133+
staticdoubleget_loop_count(PlannerInfo*root,Indexcur_relid,Relidsouter_relids);
134+
staticdoubleadjust_rowcount_for_semijoins(PlannerInfo*root,
135+
Indexcur_relid,
136+
Indexouter_relid,
137+
doublerowcount);
138+
staticdoubleapproximate_joinrel_size(PlannerInfo*root,Relidsrelids);
134139
staticvoidmatch_restriction_clauses_to_index(RelOptInfo*rel,
135140
IndexOptInfo*index,
136141
IndexClauseSet*clauseset);
@@ -402,7 +407,7 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
402407

403408
/* And push that path into the mix */
404409
required_outer=get_bitmap_tree_required_outer(bitmapqual);
405-
loop_count=get_loop_count(root,required_outer);
410+
loop_count=get_loop_count(root,rel->relid,required_outer);
406411
bpath=create_bitmap_heap_path(root,rel,bitmapqual,
407412
required_outer,loop_count);
408413
add_path(rel, (Path*)bpath);
@@ -969,7 +974,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
969974
outer_relids=NULL;
970975

971976
/* Compute loop_count for cost estimation purposes */
972-
loop_count=get_loop_count(root,outer_relids);
977+
loop_count=get_loop_count(root,rel->relid,outer_relids);
973978

974979
/*
975980
* 2. Compute pathkeys describing index's ordering, if any, then see how
@@ -1553,7 +1558,7 @@ bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath)
15531558
cost_bitmap_heap_scan(&bpath.path,root,rel,
15541559
bpath.path.param_info,
15551560
ipath,
1556-
get_loop_count(root,required_outer));
1561+
get_loop_count(root,rel->relid,required_outer));
15571562

15581563
returnbpath.path.total_cost;
15591564
}
@@ -1594,7 +1599,7 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths)
15941599
cost_bitmap_heap_scan(&bpath.path,root,rel,
15951600
bpath.path.param_info,
15961601
(Path*)&apath,
1597-
get_loop_count(root,required_outer));
1602+
get_loop_count(root,rel->relid,required_outer));
15981603

15991604
returnbpath.path.total_cost;
16001605
}
@@ -1861,48 +1866,142 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
18611866
* answer for single-other-relation cases, and it seems like a reasonable
18621867
* zero-order approximation for multiway-join cases.
18631868
*
1869+
* In addition, we check to see if the other side of each join clause is on
1870+
* the inside of some semijoin that the current relation is on the outside of.
1871+
* If so, the only way that a parameterized path could be used is if the
1872+
* semijoin RHS has been unique-ified, so we should use the number of unique
1873+
* RHS rows rather than using the relation's raw rowcount.
1874+
*
18641875
* Note: for this to work, allpaths.c must establish all baserel size
18651876
* estimates before it begins to compute paths, or at least before it
18661877
* calls create_index_paths().
18671878
*/
18681879
staticdouble
1869-
get_loop_count(PlannerInfo*root,Relidsouter_relids)
1880+
get_loop_count(PlannerInfo*root,Indexcur_relid,Relidsouter_relids)
18701881
{
1871-
doubleresult=1.0;
1882+
doubleresult;
1883+
intouter_relid;
18721884

18731885
/* For a non-parameterized path, just return 1.0 quickly */
1874-
if (outer_relids!=NULL)
1886+
if (outer_relids==NULL)
1887+
return1.0;
1888+
1889+
result=1.0;
1890+
outer_relid=-1;
1891+
while ((outer_relid=bms_next_member(outer_relids,outer_relid)) >=0)
18751892
{
1876-
intrelid;
1893+
RelOptInfo*outer_rel;
1894+
doublerowcount;
18771895

1878-
relid=-1;
1879-
while ((relid=bms_next_member(outer_relids,relid)) >=0)
1880-
{
1881-
RelOptInfo*outer_rel;
1896+
/* Paranoia: ignore bogus relid indexes */
1897+
if (outer_relid >=root->simple_rel_array_size)
1898+
continue;
1899+
outer_rel=root->simple_rel_array[outer_relid];
1900+
if (outer_rel==NULL)
1901+
continue;
1902+
Assert(outer_rel->relid==outer_relid);/* sanity check on array */
18821903

1883-
/* Paranoia: ignore bogus relid indexes */
1884-
if (relid >=root->simple_rel_array_size)
1885-
continue;
1886-
outer_rel=root->simple_rel_array[relid];
1887-
if (outer_rel==NULL)
1888-
continue;
1889-
Assert(outer_rel->relid==relid);/* sanity check on array */
1904+
/* Other relation could be proven empty, if so ignore */
1905+
if (IS_DUMMY_REL(outer_rel))
1906+
continue;
18901907

1891-
/* Other relation could be proven empty, if so ignore */
1892-
if (IS_DUMMY_REL(outer_rel))
1893-
continue;
1908+
/* Otherwise, rel's rows estimate should be valid by now */
1909+
Assert(outer_rel->rows>0);
18941910

1895-
/* Otherwise, rel's rows estimate should be valid by now */
1896-
Assert(outer_rel->rows>0);
1911+
/* Check to see if rel is on the inside of any semijoins */
1912+
rowcount=adjust_rowcount_for_semijoins(root,
1913+
cur_relid,
1914+
outer_relid,
1915+
outer_rel->rows);
18971916

1898-
/* Remember smallest row count estimate among the outer rels */
1899-
if (result==1.0||result>outer_rel->rows)
1900-
result=outer_rel->rows;
1901-
}
1917+
/* Remember smallest row count estimate among the outer rels */
1918+
if (result==1.0||result>rowcount)
1919+
result=rowcount;
19021920
}
19031921
returnresult;
19041922
}
19051923

1924+
/*
1925+
* Check to see if outer_relid is on the inside of any semijoin that cur_relid
1926+
* is on the outside of. If so, replace rowcount with the estimated number of
1927+
* unique rows from the semijoin RHS (assuming that's smaller, which it might
1928+
* not be). The estimate is crude but it's the best we can do at this stage
1929+
* of the proceedings.
1930+
*/
1931+
staticdouble
1932+
adjust_rowcount_for_semijoins(PlannerInfo*root,
1933+
Indexcur_relid,
1934+
Indexouter_relid,
1935+
doublerowcount)
1936+
{
1937+
ListCell*lc;
1938+
1939+
foreach(lc,root->join_info_list)
1940+
{
1941+
SpecialJoinInfo*sjinfo= (SpecialJoinInfo*)lfirst(lc);
1942+
1943+
if (sjinfo->jointype==JOIN_SEMI&&
1944+
bms_is_member(cur_relid,sjinfo->syn_lefthand)&&
1945+
bms_is_member(outer_relid,sjinfo->syn_righthand))
1946+
{
1947+
/* Estimate number of unique-ified rows */
1948+
doublenraw;
1949+
doublenunique;
1950+
1951+
nraw=approximate_joinrel_size(root,sjinfo->syn_righthand);
1952+
nunique=estimate_num_groups(root,
1953+
sjinfo->semi_rhs_exprs,
1954+
nraw);
1955+
if (rowcount>nunique)
1956+
rowcount=nunique;
1957+
}
1958+
}
1959+
returnrowcount;
1960+
}
1961+
1962+
/*
1963+
* Make an approximate estimate of the size of a joinrel.
1964+
*
1965+
* We don't have enough info at this point to get a good estimate, so we
1966+
* just multiply the base relation sizes together. Fortunately, this is
1967+
* the right answer anyway for the most common case with a single relation
1968+
* on the RHS of a semijoin. Also, estimate_num_groups() has only a weak
1969+
* dependency on its input_rows argument (it basically uses it as a clamp).
1970+
* So we might be able to get a fairly decent end result even with a severe
1971+
* overestimate of the RHS's raw size.
1972+
*/
1973+
staticdouble
1974+
approximate_joinrel_size(PlannerInfo*root,Relidsrelids)
1975+
{
1976+
doublerowcount=1.0;
1977+
intrelid;
1978+
1979+
relid=-1;
1980+
while ((relid=bms_next_member(relids,relid)) >=0)
1981+
{
1982+
RelOptInfo*rel;
1983+
1984+
/* Paranoia: ignore bogus relid indexes */
1985+
if (relid >=root->simple_rel_array_size)
1986+
continue;
1987+
rel=root->simple_rel_array[relid];
1988+
if (rel==NULL)
1989+
continue;
1990+
Assert(rel->relid==relid);/* sanity check on array */
1991+
1992+
/* Relation could be proven empty, if so ignore */
1993+
if (IS_DUMMY_REL(rel))
1994+
continue;
1995+
1996+
/* Otherwise, rel's rows estimate should be valid by now */
1997+
Assert(rel->rows>0);
1998+
1999+
/* Accumulate product */
2000+
rowcount *=rel->rows;
2001+
}
2002+
returnrowcount;
2003+
}
2004+
19062005

19072006
/****************************************************************************
19082007
*---- ROUTINES TO CHECK QUERY CLAUSES ----

‎src/backend/optimizer/path/joinrels.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,10 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
624624
/* we don't bother trying to make the remaining fields valid */
625625
sjinfo->lhs_strict= false;
626626
sjinfo->delay_upper_joins= false;
627-
sjinfo->join_quals=NIL;
627+
sjinfo->semi_can_btree= false;
628+
sjinfo->semi_can_hash= false;
629+
sjinfo->semi_operators=NIL;
630+
sjinfo->semi_rhs_exprs=NIL;
628631
}
629632

630633
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp