Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf4230d2

Browse files
committed
Change patternsel() so that instead of switching from a pure
pattern-examination heuristic method to purely histogram-driven selectivity athistogram size 100, we compute both estimates and use a weighted average.The weight put on the heuristic estimate decreases linearly with histogramsize, dropping to zero for 100 or more histogram entries.Likewise in ltreeparentsel(). After a patch by Greg Stark, though Ireorganized the logic a bit to give the caller of histogram_selectivity()more control.
1 parent422495d commitf4230d2

File tree

3 files changed

+75
-35
lines changed

3 files changed

+75
-35
lines changed

‎contrib/ltree/ltree_op.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* op function for ltree
33
* Teodor Sigaev <teodor@stack.net>
4-
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.16 2007/02/28 22:44:38 tgl Exp $
4+
* $PostgreSQL: pgsql/contrib/ltree/ltree_op.c,v 1.17 2008/03/09 00:32:09 tgl Exp $
55
*/
66

77
#include"ltree.h"
@@ -609,6 +609,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
609609
doublemcvsum;
610610
doublemcvsel;
611611
doublenullfrac;
612+
inthist_size;
612613

613614
fmgr_info(get_opcode(operator),&contproc);
614615

@@ -626,21 +627,31 @@ ltreeparentsel(PG_FUNCTION_ARGS)
626627
*/
627628
selec=histogram_selectivity(&vardata,&contproc,
628629
constval,varonleft,
629-
100,1);
630+
10,1,&hist_size);
630631
if (selec<0)
631632
{
632633
/* Nope, fall back on default */
633634
selec=DEFAULT_PARENT_SEL;
634635
}
635-
else
636+
elseif (hist_size<100)
636637
{
637-
/* Yes, but don't believe extremely small or large estimates. */
638-
if (selec<0.0001)
639-
selec=0.0001;
640-
elseif (selec>0.9999)
641-
selec=0.9999;
638+
/*
639+
* For histogram sizes from 10 to 100, we combine the
640+
* histogram and default selectivities, putting increasingly
641+
* more trust in the histogram for larger sizes.
642+
*/
643+
doublehist_weight=hist_size /100.0;
644+
645+
selec=selec*hist_weight+
646+
DEFAULT_PARENT_SEL* (1.0-hist_weight);
642647
}
643648

649+
/* In any case, don't believe extremely small or large estimates. */
650+
if (selec<0.0001)
651+
selec=0.0001;
652+
elseif (selec>0.9999)
653+
selec=0.9999;
654+
644655
if (HeapTupleIsValid(vardata.statsTuple))
645656
nullfrac= ((Form_pg_statistic)GETSTRUCT(vardata.statsTuple))->stanullfrac;
646657
else

‎src/backend/utils/adt/selfuncs.c

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.244 2008/03/08 22:41:38 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.245 2008/03/09 00:32:09 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -567,17 +567,23 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
567567
* or not it has anything to do with the histogram sort operator. We are
568568
* essentially using the histogram just as a representative sample. However,
569569
* small histograms are unlikely to be all that representative, so the caller
570-
* should specify a minimum histogram size to use, and fall back on some
571-
* other approach if this routine fails.
570+
* should be prepared to fall back on some other estimation approach when the
571+
* histogram is missing or very small. It may also be prudent to combine this
572+
* approach with another one when the histogram is small.
572573
*
573-
* The caller also specifies n_skip, which causes us to ignore the first and
574-
* last n_skip histogram elements, on the grounds that they are outliers and
575-
* hence not very representative. If in doubt, min_hist_size = 100 and
576-
* n_skip = 1 are reasonable values.
574+
* If the actual histogram size is not at least min_hist_size, we won't bother
575+
* to do the calculation at all. Also, if the n_skip parameter is > 0, we
576+
* ignore the first and last n_skip histogram elements, on the grounds that
577+
* they are outliers and hence not very representative. Typical values for
578+
* these parameters are 10 and 1.
577579
*
578580
* The function result is the selectivity, or -1 if there is no histogram
579581
* or it's smaller than min_hist_size.
580582
*
583+
* The output parameter *hist_size receives the actual histogram size,
584+
* or zero if no histogram. Callers may use this number to decide how
585+
* much faith to put in the function result.
586+
*
581587
* Note that the result disregards both the most-common-values (if any) and
582588
* null entries. The caller is expected to combine this result with
583589
* statistics for those portions of the column population.It may also be
@@ -586,7 +592,8 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
586592
double
587593
histogram_selectivity(VariableStatData*vardata,FmgrInfo*opproc,
588594
Datumconstval,boolvaronleft,
589-
intmin_hist_size,intn_skip)
595+
intmin_hist_size,intn_skip,
596+
int*hist_size)
590597
{
591598
doubleresult;
592599
Datum*values;
@@ -603,6 +610,7 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
603610
&values,&nvalues,
604611
NULL,NULL))
605612
{
613+
*hist_size=nvalues;
606614
if (nvalues >=min_hist_size)
607615
{
608616
intnmatch=0;
@@ -626,7 +634,10 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
626634
free_attstatsslot(vardata->atttype,values,nvalues,NULL,0);
627635
}
628636
else
637+
{
638+
*hist_size=0;
629639
result=-1;
640+
}
630641

631642
returnresult;
632643
}
@@ -1117,13 +1128,16 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11171128
* selectivity of the fixed prefix and remainder of pattern
11181129
* separately, then combine the two to get an estimate of the
11191130
* selectivity for the part of the column population represented by
1120-
* the histogram. We then add up data for any most-common-values
1121-
* values; these are not in the histogram population, and we can get
1122-
* exact answers for them by applying the pattern operator, so there's
1123-
* no reason to approximate. (If the MCVs cover a significant part of
1124-
* the total population, this gives us a big leg up in accuracy.)
1131+
* the histogram. (For small histograms, we combine these approaches.)
1132+
*
1133+
* We then add up data for any most-common-values values; these are
1134+
* not in the histogram population, and we can get exact answers for
1135+
* them by applying the pattern operator, so there's no reason to
1136+
* approximate. (If the MCVs cover a significant part of the total
1137+
* population, this gives us a big leg up in accuracy.)
11251138
*/
11261139
Selectivityselec;
1140+
inthist_size;
11271141
FmgrInfoopproc;
11281142
doublenullfrac,
11291143
mcv_selec,
@@ -1133,10 +1147,12 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11331147
fmgr_info(get_opcode(operator),&opproc);
11341148

11351149
selec=histogram_selectivity(&vardata,&opproc,constval, true,
1136-
100,1);
1137-
if (selec<0)
1150+
10,1,&hist_size);
1151+
1152+
/* If not at least 100 entries, use the heuristic method */
1153+
if (hist_size<100)
11381154
{
1139-
/* Nope, so fake it with the heuristic method */
1155+
Selectivityheursel;
11401156
Selectivityprefixsel;
11411157
Selectivityrestsel;
11421158

@@ -1146,17 +1162,29 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
11461162
else
11471163
prefixsel=1.0;
11481164
restsel=pattern_selectivity(rest,ptype);
1149-
selec=prefixsel*restsel;
1150-
}
1151-
else
1152-
{
1153-
/* Yes, but don't believe extremely small or large estimates. */
1154-
if (selec<0.0001)
1155-
selec=0.0001;
1156-
elseif (selec>0.9999)
1157-
selec=0.9999;
1165+
heursel=prefixsel*restsel;
1166+
1167+
if (selec<0)/* fewer than 10 histogram entries? */
1168+
selec=heursel;
1169+
else
1170+
{
1171+
/*
1172+
* For histogram sizes from 10 to 100, we combine the
1173+
* histogram and heuristic selectivities, putting increasingly
1174+
* more trust in the histogram for larger sizes.
1175+
*/
1176+
doublehist_weight=hist_size /100.0;
1177+
1178+
selec=selec*hist_weight+heursel* (1.0-hist_weight);
1179+
}
11581180
}
11591181

1182+
/* In any case, don't believe extremely small or large estimates. */
1183+
if (selec<0.0001)
1184+
selec=0.0001;
1185+
elseif (selec>0.9999)
1186+
selec=0.9999;
1187+
11601188
/*
11611189
* If we have most-common-values info, add up the fractions of the MCV
11621190
* entries that satisfy MCV OP PATTERN. These fractions contribute

‎src/include/utils/selfuncs.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.43 2008/01/01 19:45:59 momjian Exp $
11+
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.44 2008/03/09 00:32:09 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -112,7 +112,8 @@ extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
112112
double*sumcommonp);
113113
externdoublehistogram_selectivity(VariableStatData*vardata,FmgrInfo*opproc,
114114
Datumconstval,boolvaronleft,
115-
intmin_hist_size,intn_skip);
115+
intmin_hist_size,intn_skip,
116+
int*hist_size);
116117

117118
externPattern_Prefix_Statuspattern_fixed_prefix(Const*patt,
118119
Pattern_Typeptype,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp