Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6b9bba2

Browse files
committed
Marginal performance hacking in erand48.c.
Get rid of the multiplier and addend variables in favor of hard-wiredconstants. Do the multiply-and-add using uint64 arithmetic, ratherthan manually combining several narrower multiplications and additions.Make _dorand48 return the full-width new random value, and have itscallers use that directly (after suitable masking) rather thanreconstructing what they need from the unsigned short[] representation.On my machine, this is good for a nearly factor-of-2 speedup ofpg_erand48(), probably mostly from needing just one call of ldexp()rather than three. The wins for the other functions are smallerbut measurable. While none of the existing call sites are reallyperformance-critical, a cycle saved is a cycle earned; and besidesthe machine code is smaller this way (at least on x86_64).Patch by me, but the original idea to optimize this by switchingto int64 arithmetic is from Fabien Coelho.Discussion:https://postgr.es/m/1551.1546018192@sss.pgh.pa.us
1 parente090466 commit6b9bba2

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

‎src/port/erand48.c

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,48 +37,46 @@
3737

3838
#include<math.h>
3939

40+
/* These values are specified by POSIX */
41+
#defineRAND48_MULTUINT64CONST(0x0005deece66d)
42+
#defineRAND48_ADDUINT64CONST(0x000b)
43+
44+
/* POSIX specifies 0x330e's use in srand48, but the other bits are arbitrary */
4045
#defineRAND48_SEED_0(0x330e)
4146
#defineRAND48_SEED_1(0xabcd)
4247
#defineRAND48_SEED_2(0x1234)
43-
#defineRAND48_MULT_0(0xe66d)
44-
#defineRAND48_MULT_1(0xdeec)
45-
#defineRAND48_MULT_2(0x0005)
46-
#defineRAND48_ADD(0x000b)
4748

4849
staticunsigned short_rand48_seed[3]= {
4950
RAND48_SEED_0,
5051
RAND48_SEED_1,
5152
RAND48_SEED_2
5253
};
53-
staticunsigned short_rand48_mult[3]= {
54-
RAND48_MULT_0,
55-
RAND48_MULT_1,
56-
RAND48_MULT_2
57-
};
58-
staticunsigned short_rand48_add=RAND48_ADD;
5954

6055

6156
/*
6257
* Advance the 48-bit value stored in xseed[] to the next "random" number.
58+
*
59+
* Also returns the value of that number --- without masking it to 48 bits.
60+
* If caller uses the result, it must mask off the bits it wants.
6361
*/
64-
staticvoid
62+
staticuint64
6563
_dorand48(unsigned shortxseed[3])
6664
{
67-
unsigned longaccu;
68-
unsigned shorttemp[2];
69-
70-
accu= (unsigned long)_rand48_mult[0]* (unsigned long)xseed[0]+
71-
(unsigned long)_rand48_add;
72-
temp[0]= (unsigned short)accu;/* lower 16 bits */
73-
accu >>=sizeof(unsigned short)*8;
74-
accu+= (unsigned long)_rand48_mult[0]* (unsigned long)xseed[1]+
75-
(unsigned long)_rand48_mult[1]*(unsigned long)xseed[0];
76-
temp[1]= (unsigned short)accu;/* middle 16 bits */
77-
accu >>=sizeof(unsigned short)*8;
78-
accu+=_rand48_mult[0]*xseed[2]+_rand48_mult[1]*xseed[1]+_rand48_mult[2]*xseed[0];
79-
xseed[0]=temp[0];
80-
xseed[1]=temp[1];
81-
xseed[2]= (unsigned short)accu;
65+
/*
66+
* We do the arithmetic in uint64; any type wider than 48 bits would work.
67+
*/
68+
uint64in;
69+
uint64out;
70+
71+
in= (uint64)xseed[2] <<32 | (uint64)xseed[1] <<16 | (uint64)xseed[0];
72+
73+
out=in*RAND48_MULT+RAND48_ADD;
74+
75+
xseed[0]=out&0xFFFF;
76+
xseed[1]= (out >>16)&0xFFFF;
77+
xseed[2]=(out >>32)&0xFFFF;
78+
79+
returnout;
8280
}
8381

8482

@@ -89,10 +87,9 @@ _dorand48(unsigned short xseed[3])
8987
double
9088
pg_erand48(unsigned shortxseed[3])
9189
{
92-
_dorand48(xseed);
93-
returnldexp((double)xseed[0],-48)+
94-
ldexp((double)xseed[1],-32)+
95-
ldexp((double)xseed[2],-16);
90+
uint64x=_dorand48(xseed);
91+
92+
returnldexp((double) (x&UINT64CONST(0xFFFFFFFFFFFF)),-48);
9693
}
9794

9895
/*
@@ -102,8 +99,9 @@ pg_erand48(unsigned short xseed[3])
10299
long
103100
pg_lrand48(void)
104101
{
105-
_dorand48(_rand48_seed);
106-
return ((long)_rand48_seed[2] <<15)+ ((long)_rand48_seed[1] >>1);
102+
uint64x=_dorand48(_rand48_seed);
103+
104+
return (x >>17)&UINT64CONST(0x7FFFFFFF);
107105
}
108106

109107
/*
@@ -113,8 +111,9 @@ pg_lrand48(void)
113111
long
114112
pg_jrand48(unsigned shortxseed[3])
115113
{
116-
_dorand48(xseed);
117-
return (int32) (((uint32)xseed[2] <<16)+ (uint32)xseed[1]);
114+
uint64x=_dorand48(xseed);
115+
116+
return (int32) ((x >>16)&UINT64CONST(0xFFFFFFFF));
118117
}
119118

120119
/*
@@ -134,8 +133,4 @@ pg_srand48(long seed)
134133
_rand48_seed[0]=RAND48_SEED_0;
135134
_rand48_seed[1]= (unsigned short)seed;
136135
_rand48_seed[2]= (unsigned short) (seed >>16);
137-
_rand48_mult[0]=RAND48_MULT_0;
138-
_rand48_mult[1]=RAND48_MULT_1;
139-
_rand48_mult[2]=RAND48_MULT_2;
140-
_rand48_add=RAND48_ADD;
141136
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp