Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1fcd0ad

Browse files
committed
Add approximated Zipfian-distributed random generator to pgbench.
Generator helps to make close to real-world tests.Author: Alik KhilazhevReviewed-By: Fabien COELHODiscussion:https://www.postgresql.org/message-id/flat/BF3B6F54-68C3-417A-BFAB-FB4D66F2B410@postgrespro.ru
1 parent538d114 commit1fcd0ad

File tree

5 files changed

+263
-3
lines changed

5 files changed

+263
-3
lines changed

‎doc/src/sgml/ref/pgbench.sgml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,6 +1092,14 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
10921092
<entry><literal>random_gaussian(1, 10, 2.5)</literal></entry>
10931093
<entry>an integer between <literal>1</literal> and <literal>10</literal></entry>
10941094
</row>
1095+
<row>
1096+
<entry><literal><function>random_zipfian(<replaceable>lb</replaceable>, <replaceable>ub</replaceable>, <replaceable>parameter</replaceable>)</function></literal></entry>
1097+
<entry>integer</entry>
1098+
<entry>Zipfian-distributed random integer in <literal>[lb, ub]</literal>,
1099+
see below</entry>
1100+
<entry><literal>random_zipfian(1, 10, 1.5)</literal></entry>
1101+
<entry>an integer between <literal>1</literal> and <literal>10</literal></entry>
1102+
</row>
10951103
<row>
10961104
<entry><literal><function>sqrt(<replaceable>x</replaceable>)</function></literal></entry>
10971105
<entry>double</entry>
@@ -1173,6 +1181,27 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
11731181
of the Box-Muller transform.
11741182
</para>
11751183
</listitem>
1184+
<listitem>
1185+
<para>
1186+
<literal>random_zipfian</literal> generates an approximated bounded zipfian
1187+
distribution. For <replaceable>parameter</replaceable> in (0, 1), an
1188+
approximated algorithm is taken from
1189+
"Quickly Generating Billion-Record Synthetic Databases",
1190+
Jim Gray et al, SIGMOD 1994. For <replaceable>parameter</replaceable>
1191+
in (1, 1000), a rejection method is used, based on
1192+
"Non-Uniform Random Variate Generation", Luc Devroye, p. 550-551,
1193+
Springer 1986. The distribution is not defined when the parameter's
1194+
value is 1.0. The drawing performance is poor for parameter values
1195+
close and above 1.0 and on a small range.
1196+
</para>
1197+
<para>
1198+
<replaceable>parameter</replaceable>
1199+
defines how skewed the distribution is. The larger the <replaceable>parameter</replaceable>, the more
1200+
frequently values to the beginning of the interval are drawn.
1201+
The closer to 0 <replaceable>parameter</replaceable> is,
1202+
the flatter (more uniform) the access distribution.
1203+
</para>
1204+
</listitem>
11761205
</itemizedlist>
11771206

11781207
<para>

‎src/bin/pgbench/exprparse.y

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ static const struct
191191
{
192192
"random_exponential",3, PGBENCH_RANDOM_EXPONENTIAL
193193
},
194+
{
195+
"random_zipfian",3, PGBENCH_RANDOM_ZIPFIAN
196+
},
194197
/* keep as last array element*/
195198
{
196199
NULL,0,0

‎src/bin/pgbench/pgbench.c

Lines changed: 192 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ static intpthread_join(pthread_t th, void **thread_return);
9595
#defineLOG_STEP_SECONDS5/* seconds between log messages */
9696
#defineDEFAULT_NXACTS10/* default nxacts */
9797

98+
#defineZIPF_CACHE_SIZE15/* cache cells number */
99+
98100
#defineMIN_GAUSSIAN_PARAM2.0/* minimum parameter for gauss */
101+
#defineMAX_ZIPFIAN_PARAM1000/* maximum parameter for zipfian */
99102

100103
intnxacts=0;/* number of transactions per client */
101104
intduration=0;/* duration in seconds */
@@ -330,6 +333,35 @@ typedef struct
330333
intecnt;/* error count */
331334
}CState;
332335

336+
/*
337+
* Cache cell for zipfian_random call
338+
*/
339+
typedefstruct
340+
{
341+
/* cell keys */
342+
doubles;/* s - parameter of zipfan_random function */
343+
int64n;/* number of elements in range (max - min + 1) */
344+
345+
doubleharmonicn;/* generalizedHarmonicNumber(n, s) */
346+
doublealpha;
347+
doublebeta;
348+
doubleeta;
349+
350+
uint64last_used;/* last used logical time */
351+
}ZipfCell;
352+
353+
/*
354+
* Zipf cache for zeta values
355+
*/
356+
typedefstruct
357+
{
358+
uint64current;/* counter for LRU cache replacement algorithm */
359+
360+
intnb_cells;/* number of filled cells */
361+
intoverflowCount;/* number of cache overflows */
362+
ZipfCellcells[ZIPF_CACHE_SIZE];
363+
}ZipfCache;
364+
333365
/*
334366
* Thread state
335367
*/
@@ -342,6 +374,8 @@ typedef struct
342374
unsigned shortrandom_state[3];/* separate randomness for each thread */
343375
int64throttle_trigger;/* previous/next throttling (us) */
344376
FILE*logfile;/* where to log, or NULL */
377+
ZipfCachezipf_cache;/* for thread-safe zipfian random number
378+
* generation */
345379

346380
/* per thread collected stats */
347381
instr_timestart_time;/* thread start time */
@@ -746,6 +780,137 @@ getPoissonRand(TState *thread, int64 center)
746780
return (int64) (-log(uniform)* ((double)center)+0.5);
747781
}
748782

783+
/* helper function for getZipfianRand */
784+
staticdouble
785+
generalizedHarmonicNumber(int64n,doubles)
786+
{
787+
inti;
788+
doubleans=0.0;
789+
790+
for (i=n;i>1;i--)
791+
ans+=pow(i,-s);
792+
returnans+1.0;
793+
}
794+
795+
/* set harmonicn and other parameters to cache cell */
796+
staticvoid
797+
zipfSetCacheCell(ZipfCell*cell,int64n,doubles)
798+
{
799+
doubleharmonic2;
800+
801+
cell->n=n;
802+
cell->s=s;
803+
804+
harmonic2=generalizedHarmonicNumber(2,s);
805+
cell->harmonicn=generalizedHarmonicNumber(n,s);
806+
807+
cell->alpha=1.0 / (1.0-s);
808+
cell->beta=pow(0.5,s);
809+
cell->eta= (1.0-pow(2.0 /n,1.0-s)) / (1.0-harmonic2 /cell->harmonicn);
810+
}
811+
812+
/*
813+
* search for cache cell with keys (n, s)
814+
* and create new cell if it does not exist
815+
*/
816+
staticZipfCell*
817+
zipfFindOrCreateCacheCell(ZipfCache*cache,int64n,doubles)
818+
{
819+
inti,
820+
least_recently_used=0;
821+
ZipfCell*cell;
822+
823+
/* search cached cell for given parameters */
824+
for (i=0;i<cache->nb_cells;i++)
825+
{
826+
cell=&cache->cells[i];
827+
if (cell->n==n&&cell->s==s)
828+
return&cache->cells[i];
829+
830+
if (cell->last_used<cache->cells[least_recently_used].last_used)
831+
least_recently_used=i;
832+
}
833+
834+
/* create new one if it does not exist */
835+
if (cache->nb_cells<ZIPF_CACHE_SIZE)
836+
i=cache->nb_cells++;
837+
else
838+
{
839+
/* replace LRU cell if cache is full */
840+
i=least_recently_used;
841+
cache->overflowCount++;
842+
}
843+
844+
zipfSetCacheCell(&cache->cells[i],n,s);
845+
846+
cache->cells[i].last_used=cache->current++;
847+
return&cache->cells[i];
848+
}
849+
850+
/*
851+
* Computing zipfian using rejection method, based on
852+
* "Non-Uniform Random Variate Generation",
853+
* Luc Devroye, p. 550-551, Springer 1986.
854+
*/
855+
staticint64
856+
computeIterativeZipfian(TState*thread,int64n,doubles)
857+
{
858+
doubleb=pow(2.0,s-1.0);
859+
doublex,
860+
t,
861+
u,
862+
v;
863+
864+
while (true)
865+
{
866+
/* random variates */
867+
u=pg_erand48(thread->random_state);
868+
v=pg_erand48(thread->random_state);
869+
870+
x=floor(pow(u,-1.0 / (s-1.0)));
871+
872+
t=pow(1.0+1.0 /x,s-1.0);
873+
/* reject if too large or out of bound */
874+
if (v*x* (t-1.0) / (b-1.0) <=t /b&&x <=n)
875+
break;
876+
}
877+
return (int64)x;
878+
}
879+
880+
/*
881+
* Computing zipfian using harmonic numbers, based on algorithm described in
882+
* "Quickly Generating Billion-Record Synthetic Databases",
883+
* Jim Gray et al, SIGMOD 1994
884+
*/
885+
staticint64
886+
computeHarmonicZipfian(TState*thread,int64n,doubles)
887+
{
888+
ZipfCell*cell=zipfFindOrCreateCacheCell(&thread->zipf_cache,n,s);
889+
doubleuniform=pg_erand48(thread->random_state);
890+
doubleuz=uniform*cell->harmonicn;
891+
892+
if (uz<1.0)
893+
return1;
894+
if (uz<1.0+cell->beta)
895+
return2;
896+
return1+ (int64) (cell->n*pow(cell->eta*uniform-cell->eta+1.0,cell->alpha));
897+
}
898+
899+
/* random number generator: zipfian distribution from min to max inclusive */
900+
staticint64
901+
getZipfianRand(TState*thread,int64min,int64max,doubles)
902+
{
903+
int64n=max-min+1;
904+
905+
/* abort if parameter is invalid */
906+
Assert(s>0.0&&s!=1.0&&s <=MAX_ZIPFIAN_PARAM);
907+
908+
909+
returnmin-1+ ((s>1)
910+
?computeIterativeZipfian(thread,n,s)
911+
:computeHarmonicZipfian(thread,n,s));
912+
}
913+
749914
/*
750915
* Initialize the given SimpleStats struct to all zeroes
751916
*/
@@ -1303,7 +1468,6 @@ coerceToDouble(PgBenchValue *pval, double *dval)
13031468
return true;
13041469
}
13051470
}
1306-
13071471
/* assign an integer value */
13081472
staticvoid
13091473
setIntValue(PgBenchValue*pv,int64ival)
@@ -1605,6 +1769,7 @@ evalFunc(TState *thread, CState *st,
16051769
casePGBENCH_RANDOM:
16061770
casePGBENCH_RANDOM_EXPONENTIAL:
16071771
casePGBENCH_RANDOM_GAUSSIAN:
1772+
casePGBENCH_RANDOM_ZIPFIAN:
16081773
{
16091774
int64imin,
16101775
imax;
@@ -1655,6 +1820,18 @@ evalFunc(TState *thread, CState *st,
16551820
setIntValue(retval,
16561821
getGaussianRand(thread,imin,imax,param));
16571822
}
1823+
elseif (func==PGBENCH_RANDOM_ZIPFIAN)
1824+
{
1825+
if (param <=0.0||param==1.0||param>MAX_ZIPFIAN_PARAM)
1826+
{
1827+
fprintf(stderr,
1828+
"zipfian parameter must be in range (0, 1) U (1, %d]"
1829+
" (got %f)\n",MAX_ZIPFIAN_PARAM,param);
1830+
return false;
1831+
}
1832+
setIntValue(retval,
1833+
getZipfianRand(thread,imin,imax,param));
1834+
}
16581835
else/* exponential */
16591836
{
16601837
if (param <=0.0)
@@ -3683,6 +3860,8 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
36833860
tps_include,
36843861
tps_exclude;
36853862
int64ntx=total->cnt-total->skipped;
3863+
inti,
3864+
totalCacheOverflows=0;
36863865

36873866
time_include=INSTR_TIME_GET_DOUBLE(total_time);
36883867

@@ -3710,6 +3889,15 @@ printResults(TState *threads, StatsData *total, instr_time total_time,
37103889
printf("number of transactions actually processed: "INT64_FORMAT"\n",
37113890
ntx);
37123891
}
3892+
/* Report zipfian cache overflow */
3893+
for (i=0;i<nthreads;i++)
3894+
{
3895+
totalCacheOverflows+=threads[i].zipf_cache.overflowCount;
3896+
}
3897+
if (totalCacheOverflows>0)
3898+
{
3899+
printf("zipfian cache array overflowed %d time(s)\n",totalCacheOverflows);
3900+
}
37133901

37143902
/* Remaining stats are nonsensical if we failed to execute any xacts */
37153903
if (total->cnt <=0)
@@ -4513,6 +4701,9 @@ main(int argc, char **argv)
45134701
thread->random_state[2]=random();
45144702
thread->logfile=NULL;/* filled in later */
45154703
thread->latency_late=0;
4704+
thread->zipf_cache.nb_cells=0;
4705+
thread->zipf_cache.current=0;
4706+
thread->zipf_cache.overflowCount=0;
45164707
initStats(&thread->stats,0);
45174708

45184709
nclients_dealt+=thread->nstate;

‎src/bin/pgbench/pgbench.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ typedef enum PgBenchFunction
7575
PGBENCH_SQRT,
7676
PGBENCH_RANDOM,
7777
PGBENCH_RANDOM_GAUSSIAN,
78-
PGBENCH_RANDOM_EXPONENTIAL
78+
PGBENCH_RANDOM_EXPONENTIAL,
79+
PGBENCH_RANDOM_ZIPFIAN
7980
}PgBenchFunction;
8081

8182
typedefstructPgBenchExprPgBenchExpr;

‎src/bin/pgbench/t/001_pgbench_with_server.pl

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,8 @@ sub pgbench
231231
qr{command=18.: double 18\b},
232232
qr{command=19.: double 19\b},
233233
qr{command=20.: double 20\b},
234-
qr{command=21.: int 9223372036854775807\b}, ],
234+
qr{command=21.: int 9223372036854775807\b},
235+
qr{command=23.: int [1-9]\b}, ],
235236
'pgbench expressions',
236237
{'001_pgbench_expressions'=>q{-- integer functions
237238
\set i1 debug(random(1, 100))
@@ -261,6 +262,8 @@ sub pgbench
261262
\set maxint debug(:minint - 1)
262263
-- reset a variable
263264
\set i1 0
265+
-- yet another integer function
266+
\set id debug(random_zipfian(1, 9, 1.3))
264267
} });
265268

266269
# backslash commands
@@ -371,6 +374,14 @@ sub pgbench
371374
0,
372375
[qr{exponential parameter must be greater}],
373376
q{\set i random_exponential(0, 10, 0.0)} ],
377+
['set zipfian param to 1',
378+
0,
379+
[qr{zipfian parameter must be in range\(0, 1\) U\(1,\d+\]}],
380+
q{\set i random_zipfian(0, 10, 1)} ],
381+
['set zipfian param too large',
382+
0,
383+
[qr{zipfian parameter must be in range\(0, 1\) U\(1,\d+\]}],
384+
q{\set i random_zipfian(0, 10, 1000000)} ],
374385
['set non numeric value', 0,
375386
[qr{malformed variable "foo" value: "bla"}],q{\set i :foo + 1} ],
376387
['set no expression', 1, [qr{syntax error}],q{\set i} ],
@@ -412,6 +423,31 @@ sub pgbench
412423
{$n=>$script });
413424
}
414425

426+
# zipfian cache array overflow
427+
pgbench(
428+
'-t 1', 0,
429+
[qr{processed: 1/1},qr{zipfian cache array overflowed 1 time\(s\)} ],
430+
[qr{^} ],
431+
'pgbench zipfian array overflow on random_zipfian',
432+
{'001_pgbench_random_zipfian'=>q{
433+
\set i random_zipfian(1, 100, 0.5)
434+
\set i random_zipfian(2, 100, 0.5)
435+
\set i random_zipfian(3, 100, 0.5)
436+
\set i random_zipfian(4, 100, 0.5)
437+
\set i random_zipfian(5, 100, 0.5)
438+
\set i random_zipfian(6, 100, 0.5)
439+
\set i random_zipfian(7, 100, 0.5)
440+
\set i random_zipfian(8, 100, 0.5)
441+
\set i random_zipfian(9, 100, 0.5)
442+
\set i random_zipfian(10, 100, 0.5)
443+
\set i random_zipfian(11, 100, 0.5)
444+
\set i random_zipfian(12, 100, 0.5)
445+
\set i random_zipfian(13, 100, 0.5)
446+
\set i random_zipfian(14, 100, 0.5)
447+
\set i random_zipfian(15, 100, 0.5)
448+
\set i random_zipfian(16, 100, 0.5)
449+
} });
450+
415451
# throttling
416452
pgbench(
417453
'-t 100 -S --rate=100000 --latency-limit=1000000 -c 2 -n -r',

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp