Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commited802e7

Browse files
committed
pgbench: Allow \setrandom to generate Gaussian/exponential distributions.
Mitsumasa KONDO and Fabien COELHO, with further wordsmithing by me.
1 parente280c63 commited802e7

File tree

2 files changed

+231
-13
lines changed

2 files changed

+231
-13
lines changed

‎contrib/pgbench/pgbench.c

Lines changed: 173 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ static intpthread_join(pthread_t th, void **thread_return);
9898
#defineLOG_STEP_SECONDS5/* seconds between log messages */
9999
#defineDEFAULT_NXACTS10/* default nxacts */
100100

101+
#defineMIN_GAUSSIAN_THRESHOLD2.0/* minimum threshold for gauss */
102+
101103
intnxacts=0;/* number of transactions per client */
102104
intduration=0;/* duration in seconds */
103105

@@ -471,6 +473,76 @@ getrand(TState *thread, int64 min, int64 max)
471473
returnmin+ (int64) ((max-min+1)*pg_erand48(thread->random_state));
472474
}
473475

476+
/*
477+
* random number generator: exponential distribution from min to max inclusive.
478+
* the threshold is so that the density of probability for the last cut-off max
479+
* value is exp(-threshold).
480+
*/
481+
staticint64
482+
getExponentialRand(TState*thread,int64min,int64max,doublethreshold)
483+
{
484+
doublecut,uniform,rand;
485+
Assert(threshold>0.0);
486+
cut=exp(-threshold);
487+
/* erand in [0, 1), uniform in (0, 1] */
488+
uniform=1.0-pg_erand48(thread->random_state);
489+
/*
490+
* inner expresion in (cut, 1] (if threshold > 0),
491+
* rand in [0, 1)
492+
*/
493+
Assert((1.0-cut)!=0.0);
494+
rand=-log(cut+ (1.0-cut)*uniform) /threshold;
495+
/* return int64 random number within between min and max */
496+
returnmin+ (int64)((max-min+1)*rand);
497+
}
498+
499+
/* random number generator: gaussian distribution from min to max inclusive */
500+
staticint64
501+
getGaussianRand(TState*thread,int64min,int64max,doublethreshold)
502+
{
503+
doublestdev;
504+
doublerand;
505+
506+
/*
507+
* Get user specified random number from this loop, with
508+
* -threshold < stdev <= threshold
509+
*
510+
* This loop is executed until the number is in the expected range.
511+
*
512+
* As the minimum threshold is 2.0, the probability of looping is low:
513+
* sqrt(-2 ln(r)) <= 2 => r >= e^{-2} ~ 0.135, then when taking the average
514+
* sinus multiplier as 2/pi, we have a 8.6% looping probability in the
515+
* worst case. For a 5.0 threshold value, the looping probability
516+
* is about e^{-5} * 2 / pi ~ 0.43%.
517+
*/
518+
do
519+
{
520+
/*
521+
* pg_erand48 generates [0,1), but for the basic version of the
522+
* Box-Muller transform the two uniformly distributed random numbers
523+
* are expected in (0, 1] (see http://en.wikipedia.org/wiki/Box_muller)
524+
*/
525+
doublerand1=1.0-pg_erand48(thread->random_state);
526+
doublerand2=1.0-pg_erand48(thread->random_state);
527+
528+
/* Box-Muller basic form transform */
529+
doublevar_sqrt=sqrt(-2.0*log(rand1));
530+
stdev=var_sqrt*sin(2.0*M_PI*rand2);
531+
532+
/*
533+
* we may try with cos, but there may be a bias induced if the previous
534+
* value fails the test. To be on the safe side, let us try over.
535+
*/
536+
}
537+
while (stdev<-threshold||stdev >=threshold);
538+
539+
/* stdev is in [-threshold, threshold), normalization to [0,1) */
540+
rand= (stdev+threshold) / (threshold*2.0);
541+
542+
/* return int64 random number within between min and max */
543+
returnmin+ (int64)((max-min+1)*rand);
544+
}
545+
474546
/* call PQexec() and exit() on failure */
475547
staticvoid
476548
executeStatement(PGconn*con,constchar*sql)
@@ -1319,6 +1391,7 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa
13191391
char*var;
13201392
int64min,
13211393
max;
1394+
doublethreshold=0;
13221395
charres[64];
13231396

13241397
if (*argv[2]==':')
@@ -1364,11 +1437,11 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa
13641437
}
13651438

13661439
/*
1367-
*getrand() needsto be able to subtract max from min and add one
1368-
*to the result without overflowing. Since we know max > min, we
1369-
* can detect overflow just by checking for a negative result. But
1370-
* we must check both that the subtraction doesn't overflow, and
1371-
* that adding one to the result doesn't overflow either.
1440+
*Generate random number functions needto be able to subtract
1441+
*max from min and add one to the result without overflowing.
1442+
*Since we know max > min, wecan detect overflow just by checking
1443+
*for a negative result. Butwe must check both that the subtraction
1444+
*doesn't overflow, andthat adding one to the result doesn't overflow either.
13721445
*/
13731446
if (max-min<0|| (max-min)+1<0)
13741447
{
@@ -1377,10 +1450,64 @@ doCustom(TState *thread, CState *st, instr_time *conn_time, FILE *logfile, AggVa
13771450
return true;
13781451
}
13791452

1453+
if (argc==4||/* uniform without or with "uniform" keyword */
1454+
(argc==5&&pg_strcasecmp(argv[4],"uniform")==0))
1455+
{
1456+
#ifdefDEBUG
1457+
printf("min: "INT64_FORMAT" max: "INT64_FORMAT" random: "INT64_FORMAT"\n",min,max,getrand(thread,min,max));
1458+
#endif
1459+
snprintf(res,sizeof(res),INT64_FORMAT,getrand(thread,min,max));
1460+
}
1461+
elseif (argc==6&&
1462+
((pg_strcasecmp(argv[4],"gaussian")==0)||
1463+
(pg_strcasecmp(argv[4],"exponential")==0)))
1464+
{
1465+
if (*argv[5]==':')
1466+
{
1467+
if ((var=getVariable(st,argv[5]+1))==NULL)
1468+
{
1469+
fprintf(stderr,"%s: invalid threshold number %s\n",argv[0],argv[5]);
1470+
st->ecnt++;
1471+
return true;
1472+
}
1473+
threshold=strtod(var,NULL);
1474+
}
1475+
else
1476+
threshold=strtod(argv[5],NULL);
1477+
1478+
if (pg_strcasecmp(argv[4],"gaussian")==0)
1479+
{
1480+
if (threshold<MIN_GAUSSIAN_THRESHOLD)
1481+
{
1482+
fprintf(stderr,"%s: gaussian threshold must be at least %f\n,",argv[5],MIN_GAUSSIAN_THRESHOLD);
1483+
st->ecnt++;
1484+
return true;
1485+
}
1486+
#ifdefDEBUG
1487+
printf("min: "INT64_FORMAT" max: "INT64_FORMAT" random: "INT64_FORMAT"\n",min,max,getGaussianRand(thread,min,max,threshold));
1488+
#endif
1489+
snprintf(res,sizeof(res),INT64_FORMAT,getGaussianRand(thread,min,max,threshold));
1490+
}
1491+
elseif (pg_strcasecmp(argv[4],"exponential")==0)
1492+
{
1493+
if (threshold <=0.0)
1494+
{
1495+
fprintf(stderr,"%s: exponential threshold must be strictly positive\n,",argv[5]);
1496+
st->ecnt++;
1497+
return true;
1498+
}
13801499
#ifdefDEBUG
1381-
printf("min: "INT64_FORMAT" max: "INT64_FORMAT" random: "INT64_FORMAT"\n",min,max,getrand(thread,min,max));
1500+
printf("min: "INT64_FORMAT" max: "INT64_FORMAT" random: "INT64_FORMAT"\n",min,max,getExponentialRand(thread,min,max,threshold));
13821501
#endif
1383-
snprintf(res,sizeof(res),INT64_FORMAT,getrand(thread,min,max));
1502+
snprintf(res,sizeof(res),INT64_FORMAT,getExponentialRand(thread,min,max,threshold));
1503+
}
1504+
}
1505+
else/* this means an error somewhere in the parsing phase... */
1506+
{
1507+
fprintf(stderr,"%s: unexpected arguments\n",argv[0]);
1508+
st->ecnt++;
1509+
return true;
1510+
}
13841511

13851512
if (!putVariable(st,argv[0],argv[1],res))
13861513
{
@@ -1914,15 +2041,51 @@ process_commands(char *buf)
19142041

19152042
if (pg_strcasecmp(my_commands->argv[0],"setrandom")==0)
19162043
{
2044+
/* parsing:
2045+
* \setrandom variable min max [uniform]
2046+
* \setrandom variable min max (gaussian|exponential) threshold
2047+
*/
2048+
19172049
if (my_commands->argc<4)
19182050
{
19192051
fprintf(stderr,"%s: missing argument\n",my_commands->argv[0]);
19202052
exit(1);
19212053
}
2054+
/* argc >= 4 */
19222055

1923-
for (j=4;j<my_commands->argc;j++)
1924-
fprintf(stderr,"%s: extra argument \"%s\" ignored\n",
1925-
my_commands->argv[0],my_commands->argv[j]);
2056+
if (my_commands->argc==4||/* uniform without/with "uniform" keyword */
2057+
(my_commands->argc==5&&
2058+
pg_strcasecmp(my_commands->argv[4],"uniform")==0))
2059+
{
2060+
/* nothing to do */
2061+
}
2062+
elseif (/* argc >= 5 */
2063+
(pg_strcasecmp(my_commands->argv[4],"gaussian")==0)||
2064+
(pg_strcasecmp(my_commands->argv[4],"exponential")==0))
2065+
{
2066+
if (my_commands->argc<6)
2067+
{
2068+
fprintf(stderr,"%s(%s): missing threshold argument\n",my_commands->argv[0],my_commands->argv[4]);
2069+
exit(1);
2070+
}
2071+
elseif (my_commands->argc>6)
2072+
{
2073+
fprintf(stderr,"%s(%s): too many arguments (extra:",
2074+
my_commands->argv[0],my_commands->argv[4]);
2075+
for (j=6;j<my_commands->argc;j++)
2076+
fprintf(stderr," %s",my_commands->argv[j]);
2077+
fprintf(stderr,")\n");
2078+
exit(1);
2079+
}
2080+
}
2081+
else/* cannot parse, unexpected arguments */
2082+
{
2083+
fprintf(stderr,"%s: unexpected arguments (bad:",my_commands->argv[0]);
2084+
for (j=4;j<my_commands->argc;j++)
2085+
fprintf(stderr," %s",my_commands->argv[j]);
2086+
fprintf(stderr,")\n");
2087+
exit(1);
2088+
}
19262089
}
19272090
elseif (pg_strcasecmp(my_commands->argv[0],"set")==0)
19282091
{

‎doc/src/sgml/pgbench.sgml

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -748,8 +748,8 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
748748

749749
<varlistentry>
750750
<term>
751-
<literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</></literal>
752-
</term>
751+
<literal>\setrandom <replaceable>varname</> <replaceable>min</> <replaceable>max</> [ uniform | [ { gaussian | exponential } <replaceable>threshold</> ] ]</literal>
752+
</term>
753753

754754
<listitem>
755755
<para>
@@ -760,10 +760,65 @@ pgbench <optional> <replaceable>options</> </optional> <replaceable>dbname</>
760760
having an integer value.
761761
</para>
762762

763+
<para>
764+
By default, or when <literal>uniform</> is specified, all values in the
765+
range are drawn with equal probability. Specifiying <literal>gaussian</>
766+
or <literal>exponential</> options modifies this behavior; each
767+
requires a mandatory threshold which determines the precise shape of the
768+
distribution.
769+
</para>
770+
771+
<para>
772+
For a Gaussian distribution, the interval is mapped onto a standard
773+
normal distribution (the classical bell-shaped Gaussian curve) truncated
774+
at <literal>-threshold</> on the left and <literal>+threshold</>
775+
on the right.
776+
To be precise, if <literal>PHI(x)</> is the cumulative distribution
777+
function of the standard normal distribution, with mean <literal>mu</>
778+
defined as <literal>(max + min) / 2.0</>, then value <replaceable>i</>
779+
between <replaceable>min</> and <replaceable>max</> inclusive is drawn
780+
with probability:
781+
<literal>
782+
(PHI(2.0 * threshold * (i - min - mu + 0.5) / (max - min + 1)) -
783+
PHI(2.0 * threshold * (i - min - mu - 0.5) / (max - min + 1))) /
784+
(2.0 * PHI(threshold) - 1.0)
785+
</>
786+
Intuitively, the larger the <replaceable>threshold</>, the more
787+
frequently values close to the middle of the interval are drawn, and the
788+
less frequently values close to the <replaceable>min</> and
789+
<replaceable>max</> bounds.
790+
About 67% of values are drawn from the middle <literal>1.0 / threshold</>
791+
and 95% in the middle <literal>2.0 / threshold</>; for instance, if
792+
<replaceable>threshold</> is 4.0, 67% of values are drawn from the middle
793+
quarter and 95% from the middle half of the interval.
794+
The minimum <replaceable>threshold</> is 2.0 for performance of
795+
the Box-Muller transform.
796+
</para>
797+
798+
<para>
799+
For an exponential distribution, the <replaceable>threshold</>
800+
parameter controls the distribution by truncating a quickly-decreasing
801+
exponential distribution at <replaceable>threshold</>, and then
802+
projecting onto integers between the bounds.
803+
To be precise, value <replaceable>i</> between <replaceable>min</> and
804+
<replaceable>max</> inclusive is drawn with probability:
805+
<literal>(exp(-threshold*(i-min)/(max+1-min)) -
806+
exp(-threshold*(i+1-min)/(max+1-min))) / (1.0 - exp(-threshold))</>.
807+
Intuitively, the larger the <replaceable>threshold</>, the more
808+
frequently values close to <replaceable>min</> are accessed, and the
809+
less frequently values close to <replaceable>max</> are accessed.
810+
The closer to 0 the threshold, the flatter (more uniform) the access
811+
distribution.
812+
A crude approximation of the distribution is that the most frequent 1%
813+
values in the range, close to <replaceable>min</>, are drawn
814+
<replaceable>threshold</>% of the time.
815+
The <replaceable>threshold</> value must be strictly positive.
816+
</para>
817+
763818
<para>
764819
Example:
765820
<programlisting>
766-
\setrandom aid 1 :naccounts
821+
\setrandom aid 1 :naccounts gaussian 5.0
767822
</programlisting></para>
768823
</listitem>
769824
</varlistentry>

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp