Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4c2777d

Browse files
committed
Change get_variable_numdistinct's API to flag default estimates explicitly.
Formerly, callers tested for DEFAULT_NUM_DISTINCT, which had the problemthat a perfectly solid estimate might be mistaken for a content-freedefault.
1 parent1cb108e commit4c2777d

File tree

2 files changed

+45
-28
lines changed

2 files changed

+45
-28
lines changed

‎src/backend/utils/adt/selfuncs.c

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
244244
boolvaronleft)
245245
{
246246
doubleselec;
247+
boolisdefault;
247248

248249
/*
249250
* If the constant is NULL, assume operator is strict and return zero, ie,
@@ -344,7 +345,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
344345
* all the not-common values share this remaining fraction
345346
* equally, so we divide by the number of other distinct values.
346347
*/
347-
otherdistinct=get_variable_numdistinct(vardata)-nnumbers;
348+
otherdistinct=get_variable_numdistinct(vardata,&isdefault)-nnumbers;
348349
if (otherdistinct>1)
349350
selec /=otherdistinct;
350351

@@ -366,7 +367,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
366367
* of distinct values and assuming they are equally common. (The guess
367368
* is unlikely to be very good, but we do know a few special cases.)
368369
*/
369-
selec=1.0 /get_variable_numdistinct(vardata);
370+
selec=1.0 /get_variable_numdistinct(vardata,&isdefault);
370371
}
371372

372373
/* result should be in range, but make sure... */
@@ -384,6 +385,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
384385
boolvaronleft)
385386
{
386387
doubleselec;
388+
boolisdefault;
387389

388390
/*
389391
* If we matched the var to a unique index, assume there is exactly one
@@ -414,7 +416,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
414416
* idea?)
415417
*/
416418
selec=1.0-stats->stanullfrac;
417-
ndistinct=get_variable_numdistinct(vardata);
419+
ndistinct=get_variable_numdistinct(vardata,&isdefault);
418420
if (ndistinct>1)
419421
selec /=ndistinct;
420422

@@ -441,7 +443,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
441443
* of distinct values and assuming they are equally common. (The guess
442444
* is unlikely to be very good, but we do know a few special cases.)
443445
*/
444-
selec=1.0 /get_variable_numdistinct(vardata);
446+
selec=1.0 /get_variable_numdistinct(vardata,&isdefault);
445447
}
446448

447449
/* result should be in range, but make sure... */
@@ -2071,6 +2073,8 @@ eqjoinsel_inner(Oid operator,
20712073
doubleselec;
20722074
doublend1;
20732075
doublend2;
2076+
boolisdefault1;
2077+
boolisdefault2;
20742078
Form_pg_statisticstats1=NULL;
20752079
Form_pg_statisticstats2=NULL;
20762080
boolhave_mcvs1= false;
@@ -2084,8 +2088,8 @@ eqjoinsel_inner(Oid operator,
20842088
float4*numbers2=NULL;
20852089
intnnumbers2=0;
20862090

2087-
nd1=get_variable_numdistinct(vardata1);
2088-
nd2=get_variable_numdistinct(vardata2);
2091+
nd1=get_variable_numdistinct(vardata1,&isdefault1);
2092+
nd2=get_variable_numdistinct(vardata2,&isdefault2);
20892093

20902094
if (HeapTupleIsValid(vardata1->statsTuple))
20912095
{
@@ -2296,6 +2300,8 @@ eqjoinsel_semi(Oid operator,
22962300
doubleselec;
22972301
doublend1;
22982302
doublend2;
2303+
boolisdefault1;
2304+
boolisdefault2;
22992305
Form_pg_statisticstats1=NULL;
23002306
boolhave_mcvs1= false;
23012307
Datum*values1=NULL;
@@ -2308,8 +2314,8 @@ eqjoinsel_semi(Oid operator,
23082314
float4*numbers2=NULL;
23092315
intnnumbers2=0;
23102316

2311-
nd1=get_variable_numdistinct(vardata1);
2312-
nd2=get_variable_numdistinct(vardata2);
2317+
nd1=get_variable_numdistinct(vardata1,&isdefault1);
2318+
nd2=get_variable_numdistinct(vardata2,&isdefault2);
23132319

23142320
/*
23152321
* We clamp nd2 to be not more than what we estimate the inner relation's
@@ -2441,7 +2447,7 @@ eqjoinsel_semi(Oid operator,
24412447
* nd2 is default, punt and assume half of the uncertain rows have
24422448
* join partners.
24432449
*/
2444-
if (nd1!=DEFAULT_NUM_DISTINCT&&nd2!=DEFAULT_NUM_DISTINCT)
2450+
if (!isdefault1&&!isdefault2)
24452451
{
24462452
nd1-=nmatches;
24472453
nd2-=nmatches;
@@ -2464,7 +2470,7 @@ eqjoinsel_semi(Oid operator,
24642470
*/
24652471
doublenullfrac1=stats1 ?stats1->stanullfrac :0.0;
24662472

2467-
if (nd1!=DEFAULT_NUM_DISTINCT&&nd2!=DEFAULT_NUM_DISTINCT)
2473+
if (!isdefault1&&!isdefault2)
24682474
{
24692475
if (nd1 <=nd2||nd2<0)
24702476
selec=1.0-nullfrac1;
@@ -2955,9 +2961,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
29552961
{
29562962
GroupVarInfo*varinfo;
29572963
doublendistinct;
2964+
boolisdefault;
29582965
ListCell*lc;
29592966

2960-
ndistinct=get_variable_numdistinct(vardata);
2967+
ndistinct=get_variable_numdistinct(vardata,&isdefault);
29612968

29622969
/* cannot use foreach here because of possible list_delete */
29632970
lc=list_head(varinfos);
@@ -3292,14 +3299,23 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
32923299
stanullfrac,
32933300
mcvfreq,
32943301
avgfreq;
3302+
boolisdefault;
32953303
float4*numbers;
32963304
intnnumbers;
32973305

32983306
examine_variable(root,hashkey,0,&vardata);
32993307

3300-
/* Get number of distinct valuesand fraction that are null*/
3301-
ndistinct=get_variable_numdistinct(&vardata);
3308+
/* Get number of distinct values */
3309+
ndistinct=get_variable_numdistinct(&vardata,&isdefault);
33023310

3311+
/* If ndistinct isn't real, punt and return 0.1, per comments above */
3312+
if (isdefault)
3313+
{
3314+
ReleaseVariableStats(vardata);
3315+
return (Selectivity)0.1;
3316+
}
3317+
3318+
/* Get fraction that are null */
33033319
if (HeapTupleIsValid(vardata.statsTuple))
33043320
{
33053321
Form_pg_statisticstats;
@@ -3308,19 +3324,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
33083324
stanullfrac=stats->stanullfrac;
33093325
}
33103326
else
3311-
{
3312-
/*
3313-
* Believe a default ndistinct only if it came from stats. Otherwise
3314-
* punt and return 0.1, per comments above.
3315-
*/
3316-
if (ndistinct==DEFAULT_NUM_DISTINCT)
3317-
{
3318-
ReleaseVariableStats(vardata);
3319-
return (Selectivity)0.1;
3320-
}
3321-
33223327
stanullfrac=0.0;
3323-
}
33243328

33253329
/* Compute avg freq of all distinct data values in raw relation */
33263330
avgfreq= (1.0-stanullfrac) /ndistinct;
@@ -4414,16 +4418,20 @@ examine_simple_variable(PlannerInfo *root, Var *var,
44144418
* Estimate the number of distinct values of a variable.
44154419
*
44164420
* vardata: results of examine_variable
4421+
* *isdefault: set to TRUE if the result is a default rather than based on
4422+
* anything meaningful.
44174423
*
44184424
* NB: be careful to produce an integral result, since callers may compare
44194425
* the result to exact integer counts.
44204426
*/
44214427
double
4422-
get_variable_numdistinct(VariableStatData*vardata)
4428+
get_variable_numdistinct(VariableStatData*vardata,bool*isdefault)
44234429
{
44244430
doublestadistinct;
44254431
doublentuples;
44264432

4433+
*isdefault= false;
4434+
44274435
/*
44284436
* Determine the stadistinct value to use.There are cases where we can
44294437
* get an estimate even without a pg_statistic entry, or can get a better
@@ -4496,10 +4504,16 @@ get_variable_numdistinct(VariableStatData *vardata)
44964504
* Otherwise we need to get the relation size; punt if not available.
44974505
*/
44984506
if (vardata->rel==NULL)
4507+
{
4508+
*isdefault= true;
44994509
returnDEFAULT_NUM_DISTINCT;
4510+
}
45004511
ntuples=vardata->rel->tuples;
45014512
if (ntuples <=0.0)
4513+
{
4514+
*isdefault= true;
45024515
returnDEFAULT_NUM_DISTINCT;
4516+
}
45034517

45044518
/*
45054519
* If we had a relative estimate, use that.
@@ -4509,11 +4523,13 @@ get_variable_numdistinct(VariableStatData *vardata)
45094523

45104524
/*
45114525
* With no data, estimate ndistinct = ntuples if the table is small, else
4512-
* use default.
4526+
* use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small"
4527+
* so that the behavior isn't discontinuous.
45134528
*/
45144529
if (ntuples<DEFAULT_NUM_DISTINCT)
45154530
returnntuples;
45164531

4532+
*isdefault= true;
45174533
returnDEFAULT_NUM_DISTINCT;
45184534
}
45194535

‎src/include/utils/selfuncs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ extern void get_join_variables(PlannerInfo *root, List *args,
121121
VariableStatData*vardata1,
122122
VariableStatData*vardata2,
123123
bool*join_is_reversed);
124-
externdoubleget_variable_numdistinct(VariableStatData*vardata);
124+
externdoubleget_variable_numdistinct(VariableStatData*vardata,
125+
bool*isdefault);
125126
externdoublemcv_selectivity(VariableStatData*vardata,FmgrInfo*opproc,
126127
Datumconstval,boolvaronleft,
127128
double*sumcommonp);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp