Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd1b307e

Browse files
committed
Optimise numeric division for one and two base-NBASE digit divisors.
Formerly div_var() had "fast path" short division code that wassignificantly faster when the divisor was just one base-NBASE digit,but otherwise used long division.This commit adds a new function div_var_int() that divides by anarbitrary 32-bit integer, using the fast short division algorithm, andupdates both div_var() and div_var_fast() to use it for one and twodigit divisors. In the case of div_var(), this is slightly faster inthe one-digit case, because it avoids some digit array copying, and ismuch faster in the two-digit case where it replaces long division. Fordiv_var_fast(), it is much faster in both cases because the maindiv_var_fast() algorithm is optimised for larger inputs.Additionally, optimise exp() and ln() by using div_var_int(), allowinga NumericVar to be replaced by an int in a couple of places, mostnotably in the Taylor series code. This produces a significant speedupof exp(), ln() and the numeric_big regression test.Dean Rasheed, reviewed by Tom Lane.Discussion:https://postgr.es/m/CAEZATCVwsBi-ND-t82Cuuh1=8ee6jdOpzsmGN+CUZB6yjLg9jw@mail.gmail.com
1 parentd996d64 commitd1b307e

File tree

1 file changed

+180
-43
lines changed

1 file changed

+180
-43
lines changed

‎src/backend/utils/adt/numeric.c

Lines changed: 180 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ static void div_var(const NumericVar *var1, const NumericVar *var2,
551551
intrscale,boolround);
552552
staticvoiddiv_var_fast(constNumericVar*var1,constNumericVar*var2,
553553
NumericVar*result,intrscale,boolround);
554+
staticvoiddiv_var_int(constNumericVar*var,intival,intival_weight,
555+
NumericVar*result,intrscale,boolround);
554556
staticintselect_div_scale(constNumericVar*var1,constNumericVar*var2);
555557
staticvoidmod_var(constNumericVar*var1,constNumericVar*var2,
556558
NumericVar*result);
@@ -8451,8 +8453,33 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
84518453
errmsg("division by zero")));
84528454

84538455
/*
8454-
* Now result zero check
8456+
* If the divisor has just one or two digits, delegate to div_var_int(),
8457+
* which uses fast short division.
84558458
*/
8459+
if (var2ndigits <=2)
8460+
{
8461+
intidivisor;
8462+
intidivisor_weight;
8463+
8464+
idivisor=var2->digits[0];
8465+
idivisor_weight=var2->weight;
8466+
if (var2ndigits==2)
8467+
{
8468+
idivisor=idivisor*NBASE+var2->digits[1];
8469+
idivisor_weight--;
8470+
}
8471+
if (var2->sign==NUMERIC_NEG)
8472+
idivisor=-idivisor;
8473+
8474+
div_var_int(var1,idivisor,idivisor_weight,result,rscale,round);
8475+
return;
8476+
}
8477+
8478+
/*
8479+
* Otherwise, perform full long division.
8480+
*/
8481+
8482+
/* Result zero check */
84568483
if (var1ndigits==0)
84578484
{
84588485
zero_var(result);
@@ -8510,23 +8537,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
85108537
alloc_var(result,res_ndigits);
85118538
res_digits=result->digits;
85128539

8513-
if (var2ndigits==1)
8514-
{
8515-
/*
8516-
* If there's only a single divisor digit, we can use a fast path (cf.
8517-
* Knuth section 4.3.1 exercise 16).
8518-
*/
8519-
divisor1=divisor[1];
8520-
carry=0;
8521-
for (i=0;i<res_ndigits;i++)
8522-
{
8523-
carry=carry*NBASE+dividend[i+1];
8524-
res_digits[i]=carry /divisor1;
8525-
carry=carry %divisor1;
8526-
}
8527-
}
8528-
else
8529-
{
85308540
/*
85318541
* The full multiple-place algorithm is taken from Knuth volume 2,
85328542
* Algorithm 4.3.1D.
@@ -8659,7 +8669,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
86598669
/* And we're done with this quotient digit */
86608670
res_digits[j]=qhat;
86618671
}
8662-
}
86638672

86648673
pfree(dividend);
86658674

@@ -8735,8 +8744,33 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
87358744
errmsg("division by zero")));
87368745

87378746
/*
8738-
* Now result zero check
8747+
* If the divisor has just one or two digits, delegate to div_var_int(),
8748+
* which uses fast short division.
87398749
*/
8750+
if (var2ndigits <=2)
8751+
{
8752+
intidivisor;
8753+
intidivisor_weight;
8754+
8755+
idivisor=var2->digits[0];
8756+
idivisor_weight=var2->weight;
8757+
if (var2ndigits==2)
8758+
{
8759+
idivisor=idivisor*NBASE+var2->digits[1];
8760+
idivisor_weight--;
8761+
}
8762+
if (var2->sign==NUMERIC_NEG)
8763+
idivisor=-idivisor;
8764+
8765+
div_var_int(var1,idivisor,idivisor_weight,result,rscale,round);
8766+
return;
8767+
}
8768+
8769+
/*
8770+
* Otherwise, perform full long division.
8771+
*/
8772+
8773+
/* Result zero check */
87408774
if (var1ndigits==0)
87418775
{
87428776
zero_var(result);
@@ -9008,6 +9042,118 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
90089042
}
90099043

90109044

9045+
/*
9046+
* div_var_int() -
9047+
*
9048+
*Divide a numeric variable by a 32-bit integer with the specified weight.
9049+
*The quotient var / (ival * NBASE^ival_weight) is stored in result.
9050+
*/
9051+
staticvoid
9052+
div_var_int(constNumericVar*var,intival,intival_weight,
9053+
NumericVar*result,intrscale,boolround)
9054+
{
9055+
NumericDigit*var_digits=var->digits;
9056+
intvar_ndigits=var->ndigits;
9057+
intres_sign;
9058+
intres_weight;
9059+
intres_ndigits;
9060+
NumericDigit*res_buf;
9061+
NumericDigit*res_digits;
9062+
uint32divisor;
9063+
inti;
9064+
9065+
/* Guard against division by zero */
9066+
if (ival==0)
9067+
ereport(ERROR,
9068+
errcode(ERRCODE_DIVISION_BY_ZERO),
9069+
errmsg("division by zero"));
9070+
9071+
/* Result zero check */
9072+
if (var_ndigits==0)
9073+
{
9074+
zero_var(result);
9075+
result->dscale=rscale;
9076+
return;
9077+
}
9078+
9079+
/*
9080+
* Determine the result sign, weight and number of digits to calculate.
9081+
* The weight figured here is correct if the emitted quotient has no
9082+
* leading zero digits; otherwise strip_var() will fix things up.
9083+
*/
9084+
if (var->sign==NUMERIC_POS)
9085+
res_sign=ival>0 ?NUMERIC_POS :NUMERIC_NEG;
9086+
else
9087+
res_sign=ival>0 ?NUMERIC_NEG :NUMERIC_POS;
9088+
res_weight=var->weight-ival_weight;
9089+
/* The number of accurate result digits we need to produce: */
9090+
res_ndigits=res_weight+1+ (rscale+DEC_DIGITS-1) /DEC_DIGITS;
9091+
/* ... but always at least 1 */
9092+
res_ndigits=Max(res_ndigits,1);
9093+
/* If rounding needed, figure one more digit to ensure correct result */
9094+
if (round)
9095+
res_ndigits++;
9096+
9097+
res_buf=digitbuf_alloc(res_ndigits+1);
9098+
res_buf[0]=0;/* spare digit for later rounding */
9099+
res_digits=res_buf+1;
9100+
9101+
/*
9102+
* Now compute the quotient digits. This is the short division algorithm
9103+
* described in Knuth volume 2, section 4.3.1 exercise 16, except that we
9104+
* allow the divisor to exceed the internal base.
9105+
*
9106+
* In this algorithm, the carry from one digit to the next is at most
9107+
* divisor - 1. Therefore, while processing the next digit, carry may
9108+
* become as large as divisor * NBASE - 1, and so it requires a 64-bit
9109+
* integer if this exceeds UINT_MAX.
9110+
*/
9111+
divisor=Abs(ival);
9112+
9113+
if (divisor <=UINT_MAX /NBASE)
9114+
{
9115+
/* carry cannot overflow 32 bits */
9116+
uint32carry=0;
9117+
9118+
for (i=0;i<res_ndigits;i++)
9119+
{
9120+
carry=carry*NBASE+ (i<var_ndigits ?var_digits[i] :0);
9121+
res_digits[i]= (NumericDigit) (carry /divisor);
9122+
carry=carry %divisor;
9123+
}
9124+
}
9125+
else
9126+
{
9127+
/* carry may exceed 32 bits */
9128+
uint64carry=0;
9129+
9130+
for (i=0;i<res_ndigits;i++)
9131+
{
9132+
carry=carry*NBASE+ (i<var_ndigits ?var_digits[i] :0);
9133+
res_digits[i]= (NumericDigit) (carry /divisor);
9134+
carry=carry %divisor;
9135+
}
9136+
}
9137+
9138+
/* Store the quotient in result */
9139+
digitbuf_free(result->buf);
9140+
result->ndigits=res_ndigits;
9141+
result->buf=res_buf;
9142+
result->digits=res_digits;
9143+
result->weight=res_weight;
9144+
result->sign=res_sign;
9145+
9146+
/* Round or truncate to target rscale (and set result->dscale) */
9147+
if (round)
9148+
round_var(result,rscale);
9149+
else
9150+
trunc_var(result,rscale);
9151+
9152+
/* Strip leading/trailing zeroes */
9153+
strip_var(result);
9154+
}
9155+
9156+
90119157
/*
90129158
* Default scale selection for division
90139159
*
@@ -9783,7 +9929,7 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97839929
{
97849930
NumericVarx;
97859931
NumericVarelem;
9786-
NumericVarni;
9932+
intni;
97879933
doubleval;
97889934
intdweight;
97899935
intndiv2;
@@ -9792,7 +9938,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97929938

97939939
init_var(&x);
97949940
init_var(&elem);
9795-
init_var(&ni);
97969941

97979942
set_var_from_var(arg,&x);
97989943

@@ -9820,29 +9965,24 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
98209965

98219966
/*
98229967
* Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by
9823-
* 2^n, to improve the convergence rate of the Taylor series.
9968+
* 2^ndiv2, to improve the convergence rate of the Taylor series.
9969+
*
9970+
* Note that the overflow check above ensures that Abs(x) < 6000, which
9971+
* means that ndiv2 <= 20 here.
98249972
*/
98259973
if (Abs(val)>0.01)
98269974
{
9827-
NumericVartmp;
9828-
9829-
init_var(&tmp);
9830-
set_var_from_var(&const_two,&tmp);
9831-
98329975
ndiv2=1;
98339976
val /=2;
98349977

98359978
while (Abs(val)>0.01)
98369979
{
98379980
ndiv2++;
98389981
val /=2;
9839-
add_var(&tmp,&tmp,&tmp);
98409982
}
98419983

98429984
local_rscale=x.dscale+ndiv2;
9843-
div_var_fast(&x,&tmp,&x,local_rscale, true);
9844-
9845-
free_var(&tmp);
9985+
div_var_int(&x,1 <<ndiv2,0,&x,local_rscale, true);
98469986
}
98479987
else
98489988
ndiv2=0;
@@ -9870,16 +10010,16 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
987010010
add_var(&const_one,&x,result);
987110011

987210012
mul_var(&x,&x,&elem,local_rscale);
9873-
set_var_from_var(&const_two,&ni);
9874-
div_var_fast(&elem,&ni,&elem,local_rscale, true);
10013+
ni=2;
10014+
div_var_int(&elem,ni,0,&elem,local_rscale, true);
987510015

987610016
while (elem.ndigits!=0)
987710017
{
987810018
add_var(result,&elem,result);
987910019

988010020
mul_var(&elem,&x,&elem,local_rscale);
9881-
add_var(&ni,&const_one,&ni);
9882-
div_var_fast(&elem,&ni,&elem,local_rscale, true);
10021+
ni++;
10022+
div_var_int(&elem,ni,0,&elem,local_rscale, true);
988310023
}
988410024

988510025
/*
@@ -9899,7 +10039,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
989910039

990010040
free_var(&x);
990110041
free_var(&elem);
9902-
free_var(&ni);
990310042
}
990410043

990510044

@@ -9993,7 +10132,7 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
999310132
{
999410133
NumericVarx;
999510134
NumericVarxx;
9996-
NumericVarni;
10135+
intni;
999710136
NumericVarelem;
999810137
NumericVarfact;
999910138
intnsqrt;
@@ -10012,7 +10151,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1001210151

1001310152
init_var(&x);
1001410153
init_var(&xx);
10015-
init_var(&ni);
1001610154
init_var(&elem);
1001710155
init_var(&fact);
1001810156

@@ -10073,13 +10211,13 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1007310211
set_var_from_var(result,&xx);
1007410212
mul_var(result,result,&x,local_rscale);
1007510213

10076-
set_var_from_var(&const_one,&ni);
10214+
ni=1;
1007710215

1007810216
for (;;)
1007910217
{
10080-
add_var(&ni,&const_two,&ni);
10218+
ni+=2;
1008110219
mul_var(&xx,&x,&xx,local_rscale);
10082-
div_var_fast(&xx,&ni,&elem,local_rscale, true);
10220+
div_var_int(&xx,ni,0,&elem,local_rscale, true);
1008310221

1008410222
if (elem.ndigits==0)
1008510223
break;
@@ -10095,7 +10233,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1009510233

1009610234
free_var(&x);
1009710235
free_var(&xx);
10098-
free_var(&ni);
1009910236
free_var(&elem);
1010010237
free_var(&fact);
1010110238
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp