Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0700965

Browse files
committed
Repair roundoff-error problem for stddev/variance results near zero,
per complaint from Kemin Zhou.Fix lack of precision in numeric stddev/variance.
1 parent63cc56d commit0700965

File tree

2 files changed

+98
-41
lines changed

2 files changed

+98
-41
lines changed

‎src/backend/utils/adt/float.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/float.c,v 1.77 2001/11/05 17:46:29 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/float.c,v 1.78 2001/12/11 02:02:12 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -1580,7 +1580,8 @@ float8_variance(PG_FUNCTION_ARGS)
15801580
float8*transvalues;
15811581
float8N,
15821582
sumX,
1583-
sumX2;
1583+
sumX2,
1584+
numerator;
15841585

15851586
transvalues=check_float8_array(transarray,"float8_variance");
15861587
N=transvalues[0];
@@ -1594,7 +1595,13 @@ float8_variance(PG_FUNCTION_ARGS)
15941595
if (N <=1.0)
15951596
PG_RETURN_FLOAT8(0.0);
15961597

1597-
PG_RETURN_FLOAT8((N*sumX2-sumX*sumX) / (N* (N-1.0)));
1598+
numerator=N*sumX2-sumX*sumX;
1599+
1600+
/* Watch out for roundoff error producing a negative numerator */
1601+
if (numerator <=0.0)
1602+
PG_RETURN_FLOAT8(0.0);
1603+
1604+
PG_RETURN_FLOAT8(numerator / (N* (N-1.0)));
15981605
}
15991606

16001607
Datum
@@ -1604,7 +1611,8 @@ float8_stddev(PG_FUNCTION_ARGS)
16041611
float8*transvalues;
16051612
float8N,
16061613
sumX,
1607-
sumX2;
1614+
sumX2,
1615+
numerator;
16081616

16091617
transvalues=check_float8_array(transarray,"float8_stddev");
16101618
N=transvalues[0];
@@ -1618,7 +1626,13 @@ float8_stddev(PG_FUNCTION_ARGS)
16181626
if (N <=1.0)
16191627
PG_RETURN_FLOAT8(0.0);
16201628

1621-
PG_RETURN_FLOAT8(sqrt((N*sumX2-sumX*sumX) / (N* (N-1.0))));
1629+
numerator=N*sumX2-sumX*sumX;
1630+
1631+
/* Watch out for roundoff error producing a negative numerator */
1632+
if (numerator <=0.0)
1633+
PG_RETURN_FLOAT8(0.0);
1634+
1635+
PG_RETURN_FLOAT8(sqrt(numerator / (N* (N-1.0))));
16221636
}
16231637

16241638

‎src/backend/utils/adt/numeric.c

Lines changed: 79 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
*1998 Jan Wieck
77
*
8-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v 1.48 2001/11/05 17:46:29 momjian Exp $
8+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v 1.49 2001/12/11 02:02:12 tgl Exp $
99
*
1010
* ----------
1111
*/
@@ -159,6 +159,7 @@ static void add_var(NumericVar *var1, NumericVar *var2, NumericVar *result);
159159
staticvoidsub_var(NumericVar*var1,NumericVar*var2,NumericVar*result);
160160
staticvoidmul_var(NumericVar*var1,NumericVar*var2,NumericVar*result);
161161
staticvoiddiv_var(NumericVar*var1,NumericVar*var2,NumericVar*result);
162+
staticintselect_div_scale(NumericVar*var1,NumericVar*var2);
162163
staticvoidmod_var(NumericVar*var1,NumericVar*var2,NumericVar*result);
163164
staticvoidceil_var(NumericVar*var,NumericVar*result);
164165
staticvoidfloor_var(NumericVar*var,NumericVar*result);
@@ -999,28 +1000,7 @@ numeric_div(PG_FUNCTION_ARGS)
9991000
set_var_from_num(num1,&arg1);
10001001
set_var_from_num(num2,&arg2);
10011002

1002-
/* ----------
1003-
* The result scale of a division isn't specified in any
1004-
* SQL standard. For Postgres it is the following (where
1005-
* SR, DR are the result- and display-scales of the returned
1006-
* value, S1, D1, S2 and D2 are the scales of the two arguments,
1007-
* The minimum and maximum scales are compile time options from
1008-
* numeric.h):
1009-
*
1010-
*DR = MIN(MAX(D1 + D2, MIN_DISPLAY_SCALE), MAX_DISPLAY_SCALE)
1011-
*SR = MIN(MAX(MAX(S1 + S2, MIN_RESULT_SCALE), DR + 4), MAX_RESULT_SCALE)
1012-
*
1013-
* By default, any result is computed with a minimum of 34 digits
1014-
* after the decimal point or at least with 4 digits more than
1015-
* displayed.
1016-
* ----------
1017-
*/
1018-
res_dscale=MAX(arg1.dscale+arg2.dscale,NUMERIC_MIN_DISPLAY_SCALE);
1019-
res_dscale=MIN(res_dscale,NUMERIC_MAX_DISPLAY_SCALE);
1020-
global_rscale=MAX(arg1.rscale+arg2.rscale,
1021-
NUMERIC_MIN_RESULT_SCALE);
1022-
global_rscale=MAX(global_rscale,res_dscale+4);
1023-
global_rscale=MIN(global_rscale,NUMERIC_MAX_RESULT_SCALE);
1003+
res_dscale=select_div_scale(&arg1,&arg2);
10241004

10251005
/*
10261006
* Do the divide, set the display scale and return the result
@@ -1884,6 +1864,7 @@ numeric_variance(PG_FUNCTION_ARGS)
18841864
vsumX,
18851865
vsumX2,
18861866
vNminus1;
1867+
intdiv_dscale;
18871868

18881869
/* We assume the input is array of numeric */
18891870
deconstruct_array(transarray,
@@ -1924,10 +1905,21 @@ numeric_variance(PG_FUNCTION_ARGS)
19241905
mul_var(&vsumX,&vsumX,&vsumX);/* now vsumX contains sumX * sumX */
19251906
mul_var(&vN,&vsumX2,&vsumX2);/* now vsumX2 contains N * sumX2 */
19261907
sub_var(&vsumX2,&vsumX,&vsumX2);/* N * sumX2 - sumX * sumX */
1927-
mul_var(&vN,&vNminus1,&vNminus1);/* N * (N - 1) */
1928-
div_var(&vsumX2,&vNminus1,&vsumX);/* variance */
19291908

1930-
res=make_result(&vsumX);
1909+
if (cmp_var(&vsumX2,&const_zero) <=0)
1910+
{
1911+
/* Watch out for roundoff error producing a negative numerator */
1912+
res=make_result(&const_zero);
1913+
}
1914+
else
1915+
{
1916+
mul_var(&vN,&vNminus1,&vNminus1);/* N * (N - 1) */
1917+
div_dscale=select_div_scale(&vsumX2,&vNminus1);
1918+
div_var(&vsumX2,&vNminus1,&vsumX);/* variance */
1919+
vsumX.dscale=div_dscale;
1920+
1921+
res=make_result(&vsumX);
1922+
}
19311923

19321924
free_var(&vN);
19331925
free_var(&vNminus1);
@@ -1951,6 +1943,7 @@ numeric_stddev(PG_FUNCTION_ARGS)
19511943
vsumX,
19521944
vsumX2,
19531945
vNminus1;
1946+
intdiv_dscale;
19541947

19551948
/* We assume the input is array of numeric */
19561949
deconstruct_array(transarray,
@@ -1991,11 +1984,22 @@ numeric_stddev(PG_FUNCTION_ARGS)
19911984
mul_var(&vsumX,&vsumX,&vsumX);/* now vsumX contains sumX * sumX */
19921985
mul_var(&vN,&vsumX2,&vsumX2);/* now vsumX2 contains N * sumX2 */
19931986
sub_var(&vsumX2,&vsumX,&vsumX2);/* N * sumX2 - sumX * sumX */
1994-
mul_var(&vN,&vNminus1,&vNminus1);/* N * (N - 1) */
1995-
div_var(&vsumX2,&vNminus1,&vsumX);/* variance */
1996-
sqrt_var(&vsumX,&vsumX);/* stddev */
19971987

1998-
res=make_result(&vsumX);
1988+
if (cmp_var(&vsumX2,&const_zero) <=0)
1989+
{
1990+
/* Watch out for roundoff error producing a negative numerator */
1991+
res=make_result(&const_zero);
1992+
}
1993+
else
1994+
{
1995+
mul_var(&vN,&vNminus1,&vNminus1);/* N * (N - 1) */
1996+
div_dscale=select_div_scale(&vsumX2,&vNminus1);
1997+
div_var(&vsumX2,&vNminus1,&vsumX);/* variance */
1998+
vsumX.dscale=div_dscale;
1999+
sqrt_var(&vsumX,&vsumX);/* stddev */
2000+
2001+
res=make_result(&vsumX);
2002+
}
19992003

20002004
free_var(&vN);
20012005
free_var(&vNminus1);
@@ -3318,6 +3322,50 @@ div_var(NumericVar *var1, NumericVar *var2, NumericVar *result)
33183322
}
33193323

33203324

3325+
/*
3326+
* Default scale selection for division
3327+
*
3328+
* Returns the appropriate display scale for the division result,
3329+
* and sets global_rscale to the result scale to use during div_var.
3330+
*
3331+
* Note that this must be called before div_var.
3332+
*/
3333+
staticint
3334+
select_div_scale(NumericVar*var1,NumericVar*var2)
3335+
{
3336+
intres_dscale;
3337+
intres_rscale;
3338+
3339+
/* ----------
3340+
* The result scale of a division isn't specified in any
3341+
* SQL standard. For Postgres it is the following (where
3342+
* SR, DR are the result- and display-scales of the returned
3343+
* value, S1, D1, S2 and D2 are the scales of the two arguments,
3344+
* The minimum and maximum scales are compile time options from
3345+
* numeric.h):
3346+
*
3347+
*DR = MIN(MAX(D1 + D2, MIN_DISPLAY_SCALE), MAX_DISPLAY_SCALE)
3348+
*SR = MIN(MAX(MAX(S1 + S2, DR + 4), MIN_RESULT_SCALE), MAX_RESULT_SCALE)
3349+
*
3350+
* By default, any result is computed with a minimum of 34 digits
3351+
* after the decimal point or at least with 4 digits more than
3352+
* displayed.
3353+
* ----------
3354+
*/
3355+
res_dscale=var1->dscale+var2->dscale;
3356+
res_dscale=MAX(res_dscale,NUMERIC_MIN_DISPLAY_SCALE);
3357+
res_dscale=MIN(res_dscale,NUMERIC_MAX_DISPLAY_SCALE);
3358+
3359+
res_rscale=var1->rscale+var2->rscale;
3360+
res_rscale=MAX(res_rscale,res_dscale+4);
3361+
res_rscale=MAX(res_rscale,NUMERIC_MIN_RESULT_SCALE);
3362+
res_rscale=MIN(res_rscale,NUMERIC_MAX_RESULT_SCALE);
3363+
global_rscale=res_rscale;
3364+
3365+
returnres_dscale;
3366+
}
3367+
3368+
33213369
/* ----------
33223370
* mod_var() -
33233371
*
@@ -3343,12 +3391,7 @@ mod_var(NumericVar *var1, NumericVar *var2, NumericVar *result)
33433391
*/
33443392
save_global_rscale=global_rscale;
33453393

3346-
div_dscale=MAX(var1->dscale+var2->dscale,NUMERIC_MIN_DISPLAY_SCALE);
3347-
div_dscale=MIN(div_dscale,NUMERIC_MAX_DISPLAY_SCALE);
3348-
global_rscale=MAX(var1->rscale+var2->rscale,
3349-
NUMERIC_MIN_RESULT_SCALE);
3350-
global_rscale=MAX(global_rscale,div_dscale+4);
3351-
global_rscale=MIN(global_rscale,NUMERIC_MAX_RESULT_SCALE);
3394+
div_dscale=select_div_scale(var1,var2);
33523395

33533396
div_var(var1,var2,&tmp);
33543397

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp