Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7e534ad

Browse files
committed
Fix BRIN cost estimation
The original code was overly optimistic about the cost of scanning aBRIN index, leading to BRIN indexes being selected when they'd be aworse choice than some other index. This complete rewrite should bemore accurate.Author: David Rowley, based on an earlier patch by Emre HasegeliReviewed-by: Emre HasegeliDiscussion:https://postgr.es/m/CAKJS1f9n-Wapop5Xz1dtGdpdqmzeGqQK4sV2MK-zZugfC14Xtw@mail.gmail.com
1 parentb2ff37d commit7e534ad

File tree

5 files changed

+248
-26
lines changed

5 files changed

+248
-26
lines changed

‎src/backend/access/brin/brin.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,27 @@ brin_free_desc(BrinDesc *bdesc)
10491049
MemoryContextDelete(bdesc->bd_context);
10501050
}
10511051

1052+
/*
1053+
* Fetch index's statistical data into *stats
1054+
*/
1055+
void
1056+
brinGetStats(Relationindex,BrinStatsData*stats)
1057+
{
1058+
Buffermetabuffer;
1059+
Pagemetapage;
1060+
BrinMetaPageData*metadata;
1061+
1062+
metabuffer=ReadBuffer(index,BRIN_METAPAGE_BLKNO);
1063+
LockBuffer(metabuffer,BUFFER_LOCK_SHARE);
1064+
metapage=BufferGetPage(metabuffer);
1065+
metadata= (BrinMetaPageData*)PageGetContents(metapage);
1066+
1067+
stats->pagesPerRange=metadata->pagesPerRange;
1068+
stats->revmapNumPages=metadata->lastRevmapPage-1;
1069+
1070+
UnlockReleaseBuffer(metabuffer);
1071+
}
1072+
10521073
/*
10531074
* Initialize a BrinBuildState appropriate to create tuples on the given index.
10541075
*/

‎src/backend/utils/adt/selfuncs.c

Lines changed: 171 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
#include<float.h>
102102
#include<math.h>
103103

104+
#include"access/brin.h"
104105
#include"access/gin.h"
105106
#include"access/htup_details.h"
106107
#include"access/sysattr.h"
@@ -7721,56 +7722,200 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
77217722
{
77227723
IndexOptInfo*index=path->indexinfo;
77237724
List*indexQuals=path->indexquals;
7724-
List*indexOrderBys=path->indexorderbys;
77257725
doublenumPages=index->pages;
7726-
doublenumTuples=index->tuples;
7726+
RelOptInfo*baserel=index->rel;
7727+
RangeTblEntry*rte=planner_rt_fetch(baserel->relid,root);
77277728
List*qinfos;
77287729
Costspc_seq_page_cost;
77297730
Costspc_random_page_cost;
7730-
doublequal_op_cost;
77317731
doublequal_arg_cost;
7732+
doublequalSelectivity;
7733+
BrinStatsDatastatsData;
7734+
doubleindexRanges;
7735+
doubleminimalRanges;
7736+
doubleestimatedRanges;
7737+
doubleselec;
7738+
RelationindexRel;
7739+
ListCell*l;
7740+
VariableStatDatavardata;
77327741

7733-
/* Do preliminary analysis of indexquals */
7734-
qinfos=deconstruct_indexquals(path);
7742+
Assert(rte->rtekind==RTE_RELATION);
77357743

7736-
/* fetch estimated page cost for tablespace containing index */
7744+
/* fetch estimated page cost forthetablespace containing the index */
77377745
get_tablespace_page_costs(index->reltablespace,
77387746
&spc_random_page_cost,
77397747
&spc_seq_page_cost);
77407748

77417749
/*
7742-
* BRIN indexes are always read in full; use that as startup cost.
7750+
* Obtain some data from the index itself.
7751+
*/
7752+
indexRel=index_open(index->indexoid,AccessShareLock);
7753+
brinGetStats(indexRel,&statsData);
7754+
index_close(indexRel,AccessShareLock);
7755+
7756+
/*
7757+
* Compute index correlation
77437758
*
7744-
* XXX maybe only include revmap pages here?
7759+
* Because we can use all index quals equally when scanning, we can use
7760+
* the largest correlation (in absolute value) among columns used by the
7761+
* query. Start at zero, the worst possible case. If we cannot find
7762+
* any correlation statistics, we will keep it as 0.
7763+
*/
7764+
*indexCorrelation=0;
7765+
7766+
qinfos=deconstruct_indexquals(path);
7767+
foreach(l,qinfos)
7768+
{
7769+
IndexQualInfo*qinfo= (IndexQualInfo*)lfirst(l);
7770+
AttrNumberattnum=index->indexkeys[qinfo->indexcol];
7771+
7772+
/* attempt to lookup stats in relation for this index column */
7773+
if (attnum!=0)
7774+
{
7775+
/* Simple variable -- look to stats for the underlying table */
7776+
if (get_relation_stats_hook&&
7777+
(*get_relation_stats_hook) (root,rte,attnum,&vardata))
7778+
{
7779+
/*
7780+
* The hook took control of acquiring a stats tuple. If it
7781+
* did supply a tuple, it'd better have supplied a freefunc.
7782+
*/
7783+
if (HeapTupleIsValid(vardata.statsTuple)&& !vardata.freefunc)
7784+
elog(ERROR,
7785+
"no function provided to release variable stats with");
7786+
}
7787+
else
7788+
{
7789+
vardata.statsTuple=
7790+
SearchSysCache3(STATRELATTINH,
7791+
ObjectIdGetDatum(rte->relid),
7792+
Int16GetDatum(attnum),
7793+
BoolGetDatum(false));
7794+
vardata.freefunc=ReleaseSysCache;
7795+
}
7796+
}
7797+
else
7798+
{
7799+
/*
7800+
* Looks like we've found an expression column in the index. Let's
7801+
* see if there's any stats for it.
7802+
*/
7803+
7804+
/* get the attnum from the 0-based index. */
7805+
attnum=qinfo->indexcol+1;
7806+
7807+
if (get_index_stats_hook&&
7808+
(*get_index_stats_hook) (root,index->indexoid,attnum,&vardata))
7809+
{
7810+
/*
7811+
* The hook took control of acquiring a stats tuple. If it did
7812+
* supply a tuple, it'd better have supplied a freefunc.
7813+
*/
7814+
if (HeapTupleIsValid(vardata.statsTuple)&&
7815+
!vardata.freefunc)
7816+
elog(ERROR,"no function provided to release variable stats with");
7817+
}
7818+
else
7819+
{
7820+
vardata.statsTuple=SearchSysCache3(STATRELATTINH,
7821+
ObjectIdGetDatum(index->indexoid),
7822+
Int16GetDatum(attnum),
7823+
BoolGetDatum(false));
7824+
vardata.freefunc=ReleaseSysCache;
7825+
}
7826+
}
7827+
7828+
if (HeapTupleIsValid(vardata.statsTuple))
7829+
{
7830+
float4*numbers;
7831+
intnnumbers;
7832+
7833+
if (get_attstatsslot(vardata.statsTuple,InvalidOid,0,
7834+
STATISTIC_KIND_CORRELATION,
7835+
InvalidOid,
7836+
NULL,
7837+
NULL,NULL,
7838+
&numbers,&nnumbers))
7839+
{
7840+
doublevarCorrelation=0.0;
7841+
7842+
if (nnumbers>0)
7843+
varCorrelation=Abs(numbers[0]);
7844+
7845+
if (varCorrelation>*indexCorrelation)
7846+
*indexCorrelation=varCorrelation;
7847+
7848+
free_attstatsslot(InvalidOid,NULL,0,numbers,nnumbers);
7849+
}
7850+
}
7851+
7852+
ReleaseVariableStats(vardata);
7853+
}
7854+
7855+
qualSelectivity=clauselist_selectivity(root,indexQuals,
7856+
baserel->relid,
7857+
JOIN_INNER,NULL,
7858+
baserel);
7859+
7860+
/* work out the actual number of ranges in the index */
7861+
indexRanges=Max(ceil((double)baserel->pages /statsData.pagesPerRange),
7862+
1.0);
7863+
7864+
/*
7865+
* Now calculate the minimum possible ranges we could match with if all of
7866+
* the rows were in the perfect order in the table's heap.
77457867
*/
7746-
*indexStartupCost=spc_seq_page_cost*numPages*loop_count;
7868+
minimalRanges=ceil(indexRanges*qualSelectivity);
77477869

77487870
/*
7749-
*To read a BRIN index there might be a bit of back and forth over
7750-
*regular pages, as revmap might point tothem out of sequential order;
7751-
*calculate this as readingthewhole index in random order.
7871+
*Now estimate the number of ranges that we'll touch by using the
7872+
*indexCorrelation from the stats. Careful not todivide by zero
7873+
*(note we're usingtheabsolute value of the correlation).
77527874
*/
7753-
*indexTotalCost=spc_random_page_cost*numPages*loop_count;
7875+
if (*indexCorrelation<1.0e-10)
7876+
estimatedRanges=indexRanges;
7877+
else
7878+
estimatedRanges=Min(minimalRanges /*indexCorrelation,indexRanges);
77547879

7755-
*indexSelectivity=
7756-
clauselist_selectivity(root,indexQuals,
7757-
path->indexinfo->rel->relid,
7758-
JOIN_INNER,NULL,
7759-
path->indexinfo->rel);
7760-
*indexCorrelation=1;
7880+
/* we expect to visit this portion of the table */
7881+
selec=estimatedRanges /indexRanges;
7882+
7883+
CLAMP_PROBABILITY(selec);
7884+
7885+
*indexSelectivity=selec;
77617886

77627887
/*
7763-
* Add on index qual eval costs, much as in genericcostestimate.
7888+
* Compute the index qual costs, much as in genericcostestimate, to add
7889+
* to the index costs.
77647890
*/
77657891
qual_arg_cost=other_operands_eval_cost(root,qinfos)+
77667892
orderby_operands_eval_cost(root,path);
7767-
qual_op_cost=cpu_operator_cost*
7768-
(list_length(indexQuals)+list_length(indexOrderBys));
77697893

7894+
/*
7895+
* Compute the startup cost as the cost to read the whole revmap
7896+
* sequentially, including the cost to execute the index quals.
7897+
*/
7898+
*indexStartupCost=
7899+
spc_seq_page_cost*statsData.revmapNumPages*loop_count;
77707900
*indexStartupCost+=qual_arg_cost;
7771-
*indexTotalCost+=qual_arg_cost;
7772-
*indexTotalCost+= (numTuples**indexSelectivity)* (cpu_index_tuple_cost+qual_op_cost);
7773-
*indexPages=index->pages;
77747901

7775-
/* XXX what about pages_per_range? */
7902+
/*
7903+
* To read a BRIN index there might be a bit of back and forth over
7904+
* regular pages, as revmap might point to them out of sequential order;
7905+
* calculate the total cost as reading the whole index in random order.
7906+
*/
7907+
*indexTotalCost=*indexStartupCost+
7908+
spc_random_page_cost* (numPages-statsData.revmapNumPages)*loop_count;
7909+
7910+
/*
7911+
* Charge a small amount per range tuple which we expect to match to. This
7912+
* is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7913+
* will set a bit for each page in the range when we find a matching
7914+
* range, so we must multiply the charge by the number of pages in the
7915+
* range.
7916+
*/
7917+
*indexTotalCost+=0.1*cpu_operator_cost*estimatedRanges*
7918+
statsData.pagesPerRange;
7919+
7920+
*indexPages=index->pages;
77767921
}

‎src/include/access/brin.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ typedef struct BrinOptions
2525
boolautosummarize;
2626
}BrinOptions;
2727

28+
29+
/*
30+
* BrinStatsData represents stats data for planner use
31+
*/
32+
typedefstructBrinStatsData
33+
{
34+
BlockNumberpagesPerRange;
35+
BlockNumberrevmapNumPages;
36+
}BrinStatsData;
37+
38+
2839
#defineBRIN_DEFAULT_PAGES_PER_RANGE128
2940
#defineBrinGetPagesPerRange(relation) \
3041
((relation)->rd_options ? \
@@ -35,4 +46,7 @@ typedef struct BrinOptions
3546
((BrinOptions *) (relation)->rd_options)->autosummarize : \
3647
false)
3748

49+
50+
externvoidbrinGetStats(Relationindex,BrinStatsData*stats);
51+
3852
#endif/* BRIN_H */

‎src/test/regress/expected/brin.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,8 @@ BEGIN
363363
END LOOP;
364364
END;
365365
$x$;
366+
RESET enable_seqscan;
367+
RESET enable_bitmapscan;
366368
INSERT INTO brintest SELECT
367369
repeat(stringu1, 42)::bytea,
368370
substr(stringu1, 1, 1)::"char",
@@ -481,3 +483,27 @@ SELECT brin_summarize_range('brin_summarize_idx', -1);
481483
ERROR: block number out of range: -1
482484
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
483485
ERROR: block number out of range: 4294967296
486+
-- test brin cost estimates behave sanely based on correlation of values
487+
CREATE TABLE brin_test (a INT, b INT);
488+
INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
489+
CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
490+
CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
491+
VACUUM ANALYZE brin_test;
492+
-- Ensure brin index is used when columns are perfectly correlated
493+
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
494+
QUERY PLAN
495+
--------------------------------------------
496+
Bitmap Heap Scan on brin_test
497+
Recheck Cond: (a = 1)
498+
-> Bitmap Index Scan on brin_test_a_idx
499+
Index Cond: (a = 1)
500+
(4 rows)
501+
502+
-- Ensure brin index is not used when values are not correlated
503+
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
504+
QUERY PLAN
505+
-----------------------
506+
Seq Scan on brin_test
507+
Filter: (b = 1)
508+
(2 rows)
509+

‎src/test/regress/sql/brin.sql

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,9 @@ BEGIN
370370
END;
371371
$x$;
372372

373+
RESET enable_seqscan;
374+
RESET enable_bitmapscan;
375+
373376
INSERT INTO brintestSELECT
374377
repeat(stringu1,42)::bytea,
375378
substr(stringu1,1,1)::"char",
@@ -444,3 +447,16 @@ SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
444447
-- invalid block number values
445448
SELECT brin_summarize_range('brin_summarize_idx',-1);
446449
SELECT brin_summarize_range('brin_summarize_idx',4294967296);
450+
451+
452+
-- test brin cost estimates behave sanely based on correlation of values
453+
CREATETABLEbrin_test (aINT, bINT);
454+
INSERT INTO brin_testSELECT x/100,x%100FROM generate_series(1,10000) x(x);
455+
CREATEINDEXbrin_test_a_idxON brin_test USING brin (a) WITH (pages_per_range=2);
456+
CREATEINDEXbrin_test_b_idxON brin_test USING brin (b) WITH (pages_per_range=2);
457+
VACUUM ANALYZE brin_test;
458+
459+
-- Ensure brin index is used when columns are perfectly correlated
460+
EXPLAIN (COSTS OFF)SELECT*FROM brin_testWHERE a=1;
461+
-- Ensure brin index is not used when values are not correlated
462+
EXPLAIN (COSTS OFF)SELECT*FROM brin_testWHERE b=1;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp