Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite51a048

Browse files
committed
Add general purpose hasing functions to pgbench.
Hashing function is useful for simulating real-world workload in test likeWEB workload, as an example - YCSB benchmarks.Author: Ildar Musin with minor editorization by meReviewed by: Fabien Coelho, meDiscussion:https://www.postgresql.org/message-id/flat/0e8bd39e-dfcd-2879-f88f-272799ad7ef2@postgrespro.ru
1 parent8bb3c7d commite51a048

File tree

5 files changed

+239
-34
lines changed

5 files changed

+239
-34
lines changed

‎doc/src/sgml/ref/pgbench.sgml

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -874,13 +874,18 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
874874

875875
<tbody>
876876
<row>
877-
<entry> <literal>scale</literal> </entry>
878-
<entry>current scale factor</entry>
877+
<entry> <literal>client_id</literal> </entry>
878+
<entry>unique number identifying the client session (starts from zero)</entry>
879879
</row>
880880

881881
<row>
882-
<entry> <literal>client_id</literal> </entry>
883-
<entry>unique number identifying the client session (starts from zero)</entry>
882+
<entry> <literal>default_seed</literal> </entry>
883+
<entry>seed used in hash functions by default</entry>
884+
</row>
885+
886+
<row>
887+
<entry> <literal>scale</literal> </entry>
888+
<entry>current scale factor</entry>
884889
</row>
885890
</tbody>
886891
</tgroup>
@@ -1245,6 +1250,27 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
12451250
<entry><literal>greatest(5, 4, 3, 2)</literal></entry>
12461251
<entry><literal>5</literal></entry>
12471252
</row>
1253+
<row>
1254+
<entry><literal><function>hash(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1255+
<entry>integer</entry>
1256+
<entry>alias for <literal>hash_murmur2()</literal></entry>
1257+
<entry><literal>hash(10, 5432)</literal></entry>
1258+
<entry><literal>-5817877081768721676</literal></entry>
1259+
</row>
1260+
<row>
1261+
<entry><literal><function>hash_fnv1a(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1262+
<entry>integer</entry>
1263+
<entry><ulink url="https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function">FNV-1a hash</ulink></entry>
1264+
<entry><literal>hash_fnv1a(10, 5432)</literal></entry>
1265+
<entry><literal>-7793829335365542153</literal></entry>
1266+
</row>
1267+
<row>
1268+
<entry><literal><function>hash_murmur2(<replaceable>a</replaceable> [, <replaceable>seed</replaceable> ] )</function></literal></entry>
1269+
<entry>integer</entry>
1270+
<entry><ulink url="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash2 hash</ulink></entry>
1271+
<entry><literal>hash_murmur2(10, 5432)</literal></entry>
1272+
<entry><literal>-5817877081768721676</literal></entry>
1273+
</row>
12481274
<row>
12491275
<entry><literal><function>int(<replaceable>x</replaceable>)</function></literal></entry>
12501276
<entry>integer</entry>
@@ -1423,6 +1449,31 @@ f(x) = PHI(2.0 * parameter * (x - mu) / (max - min + 1)) /
14231449
</listitem>
14241450
</itemizedlist>
14251451

1452+
<para>
1453+
Hash functions <literal>hash</literal>, <literal>hash_murmur2</literal> and
1454+
<literal>hash_fnv1a</literal> accept an input value and an optional seed parameter.
1455+
In case the seed isn't provided the value of <literal>:default_seed</literal>
1456+
is used, which is initialized randomly unless set by the command-line
1457+
<literal>-D</literal> option. Hash functions can be used to scatter the
1458+
distribution of random functions such as <literal>random_zipfian</literal> or
1459+
<literal>random_exponential</literal>. For instance, the following pgbench
1460+
script simulates possible real world workload typical for social media and
1461+
blogging platforms where few accounts generate excessive load:
1462+
1463+
<programlisting>
1464+
\set r random_zipfian(0, 100000000, 1.07)
1465+
\set k abs(hash(:r)) % 1000000
1466+
</programlisting>
1467+
1468+
In some cases several distinct distributions are needed which don't correlate
1469+
with each other and this is when implicit seed parameter comes in handy:
1470+
1471+
<programlisting>
1472+
\set k1 abs(hash(:r), :default_seed + 123) % 1000000
1473+
\set k2 abs(hash(:r), :default_seed + 321) % 1000000
1474+
</programlisting>
1475+
</para>
1476+
14261477
<para>
14271478
As an example, the full definition of the built-in TPC-B-like
14281479
transaction is:

‎src/bin/pgbench/exprparse.y

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616

1717
#include"pgbench.h"
1818

19+
#definePGBENCH_NARGS_VARIABLE(-1)
20+
#definePGBENCH_NARGS_CASE(-2)
21+
#definePGBENCH_NARGS_HASH(-3)
22+
1923
PgBenchExpr *expr_parse_result;
2024

2125
static PgBenchExprList *make_elist(PgBenchExpr *exp, PgBenchExprList *list);
@@ -226,9 +230,13 @@ make_uop(yyscan_t yyscanner, const char *operator, PgBenchExpr *expr)
226230
/*
227231
* List of available functions:
228232
* - fname: function name, "!..." for special internal functions
229-
* - nargs: number of arguments
230-
*-1 is a special value for least & greatest meaning #args >= 1
231-
*-2 is for the "CASE WHEN ..." function, which has #args >= 3 and odd
233+
* - nargs: number of arguments. Special cases:
234+
*- PGBENCH_NARGS_VARIABLE is a special value for least & greatest
235+
* meaning #args >= 1;
236+
*- PGBENCH_NARGS_CASE is for the "CASE WHEN ..." function, which
237+
* has #args >= 3 and odd;
238+
* - PGBENCH_NARGS_HASH is for hash functions, which have one required
239+
* and one optional argument;
232240
* - tag: function identifier from PgBenchFunction enum
233241
*/
234242
staticconststruct
@@ -259,10 +267,10 @@ static const struct
259267
"abs",1, PGBENCH_ABS
260268
},
261269
{
262-
"least",-1, PGBENCH_LEAST
270+
"least",PGBENCH_NARGS_VARIABLE, PGBENCH_LEAST
263271
},
264272
{
265-
"greatest",-1, PGBENCH_GREATEST
273+
"greatest",PGBENCH_NARGS_VARIABLE, PGBENCH_GREATEST
266274
},
267275
{
268276
"debug",1, PGBENCH_DEBUG
@@ -347,7 +355,25 @@ static const struct
347355
},
348356
/* "case when ... then ... else ... end" construction*/
349357
{
350-
"!case_end", -2, PGBENCH_CASE
358+
"!case_end", PGBENCH_NARGS_CASE, PGBENCH_CASE
359+
},
360+
{
361+
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
362+
},
363+
{
364+
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
365+
},
366+
{
367+
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
368+
},
369+
{
370+
"hash", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
371+
},
372+
{
373+
"hash_murmur2", PGBENCH_NARGS_HASH, PGBENCH_HASH_MURMUR2
374+
},
375+
{
376+
"hash_fnv1a", PGBENCH_NARGS_HASH, PGBENCH_HASH_FNV1A
351377
},
352378
/* keep as last array element*/
353379
{
@@ -423,29 +449,51 @@ elist_length(PgBenchExprList *list)
423449
static PgBenchExpr *
424450
make_func(yyscan_t yyscanner,int fnumber, PgBenchExprList *args)
425451
{
452+
int len =elist_length(args);
453+
426454
PgBenchExpr *expr =pg_malloc(sizeof(PgBenchExpr));
427455

428456
Assert(fnumber >=0);
429457

430-
if (PGBENCH_FUNCTIONS[fnumber].nargs >=0 &&
431-
PGBENCH_FUNCTIONS[fnumber].nargs !=elist_length(args))
432-
expr_yyerror_more(yyscanner,"unexpected number of arguments",
433-
PGBENCH_FUNCTIONS[fnumber].fname);
434-
435-
/* check at least one arg for least & greatest*/
436-
if (PGBENCH_FUNCTIONS[fnumber].nargs == -1 &&
437-
elist_length(args) ==0)
438-
expr_yyerror_more(yyscanner,"at least one argument expected",
439-
PGBENCH_FUNCTIONS[fnumber].fname);
440-
/* special case: case (when ... then ...)+ (else ...)? end*/
441-
if (PGBENCH_FUNCTIONS[fnumber].nargs == -2)
442-
{
443-
int len =elist_length(args);
444-
445-
/* 'else' branch is always present, but could be a NULL-constant*/
446-
if (len <3 || len %2 !=1)
447-
expr_yyerror_more(yyscanner,"odd and >= 3 number of arguments expected",
448-
"case control structure");
458+
/* validate arguments number including few special cases*/
459+
switch (PGBENCH_FUNCTIONS[fnumber].nargs)
460+
{
461+
/* check at least one arg for least & greatest*/
462+
case PGBENCH_NARGS_VARIABLE:
463+
if (len ==0)
464+
expr_yyerror_more(yyscanner,"at least one argument expected",
465+
PGBENCH_FUNCTIONS[fnumber].fname);
466+
break;
467+
468+
/* case (when ... then ...)+ (else ...)? end*/
469+
case PGBENCH_NARGS_CASE:
470+
/* 'else' branch is always present, but could be a NULL-constant*/
471+
if (len <3 || len %2 !=1)
472+
expr_yyerror_more(yyscanner,
473+
"odd and >= 3 number of arguments expected",
474+
"case control structure");
475+
break;
476+
477+
/* hash functions with optional seed argument*/
478+
case PGBENCH_NARGS_HASH:
479+
if (len >2)
480+
expr_yyerror_more(yyscanner,"unexpected number of arguments",
481+
PGBENCH_FUNCTIONS[fnumber].fname);
482+
483+
if (len ==1)
484+
{
485+
PgBenchExpr *var =make_variable("default_seed");
486+
args =make_elist(var, args);
487+
}
488+
break;
489+
490+
/* common case: positive arguments number*/
491+
default:
492+
Assert(PGBENCH_FUNCTIONS[fnumber].nargs >=0);
493+
494+
if (PGBENCH_FUNCTIONS[fnumber].nargs != len)
495+
expr_yyerror_more(yyscanner,"unexpected number of arguments",
496+
PGBENCH_FUNCTIONS[fnumber].fname);
449497
}
450498

451499
expr->etype = ENODE_FUNCTION;

‎src/bin/pgbench/pgbench.c

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@
6060

6161
#defineERRCODE_UNDEFINED_TABLE "42P01"
6262

63+
/*
64+
* Hashing constants
65+
*/
66+
#defineFNV_PRIME 0x100000001b3
67+
#defineFNV_OFFSET_BASIS 0xcbf29ce484222325
68+
#defineMM2_MUL 0xc6a4a7935bd1e995
69+
#defineMM2_ROT 47
70+
6371
/*
6472
* Multi-platform pthread implementations
6573
*/
@@ -915,6 +923,54 @@ getZipfianRand(TState *thread, int64 min, int64 max, double s)
915923
:computeHarmonicZipfian(thread,n,s));
916924
}
917925

926+
/*
927+
* FNV-1a hash function
928+
*/
929+
staticint64
930+
getHashFnv1a(int64val,uint64seed)
931+
{
932+
int64result;
933+
inti;
934+
935+
result=FNV_OFFSET_BASIS ^seed;
936+
for (i=0;i<8;++i)
937+
{
938+
int32octet=val&0xff;
939+
940+
val=val >>8;
941+
result=result ^octet;
942+
result=result*FNV_PRIME;
943+
}
944+
945+
returnresult;
946+
}
947+
948+
/*
949+
* Murmur2 hash function
950+
*
951+
* Based on original work of Austin Appleby
952+
* https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp
953+
*/
954+
staticint64
955+
getHashMurmur2(int64val,uint64seed)
956+
{
957+
uint64result=seed ^ (sizeof(int64)*MM2_MUL);
958+
uint64k= (uint64)val;
959+
960+
k *=MM2_MUL;
961+
k ^=k >>MM2_ROT;
962+
k *=MM2_MUL;
963+
964+
result ^=k;
965+
result *=MM2_MUL;
966+
967+
result ^=result >>MM2_ROT;
968+
result *=MM2_MUL;
969+
result ^=result >>MM2_ROT;
970+
971+
return (int64)result;
972+
}
973+
918974
/*
919975
* Initialize the given SimpleStats struct to all zeroes
920976
*/
@@ -2211,6 +2267,30 @@ evalStandardFunc(TState *thread, CState *st,
22112267
return true;
22122268
}
22132269

2270+
/* hashing */
2271+
casePGBENCH_HASH_FNV1A:
2272+
casePGBENCH_HASH_MURMUR2:
2273+
{
2274+
int64val,
2275+
seed;
2276+
2277+
Assert(nargs==2);
2278+
2279+
if (!coerceToInt(&vargs[0],&val)||
2280+
!coerceToInt(&vargs[1],&seed))
2281+
return false;
2282+
2283+
if (func==PGBENCH_HASH_MURMUR2)
2284+
setIntValue(retval,getHashMurmur2(val,seed));
2285+
elseif (func==PGBENCH_HASH_FNV1A)
2286+
setIntValue(retval,getHashFnv1a(val,seed));
2287+
else
2288+
/* cannot get here */
2289+
Assert(0);
2290+
2291+
return true;
2292+
}
2293+
22142294
default:
22152295
/* cannot get here */
22162296
Assert(0);
@@ -4963,6 +5043,10 @@ main(int argc, char **argv)
49635043
exit(1);
49645044
}
49655045

5046+
/* set random seed */
5047+
INSTR_TIME_SET_CURRENT(start_time);
5048+
srandom((unsignedint)INSTR_TIME_GET_MICROSEC(start_time));
5049+
49665050
if (internal_script_used)
49675051
{
49685052
/*
@@ -5024,6 +5108,19 @@ main(int argc, char **argv)
50245108
}
50255109
}
50265110

5111+
/* set default seed for hash functions */
5112+
if (lookupVariable(&state[0],"default_seed")==NULL)
5113+
{
5114+
uint64seed= ((uint64) (random()&0xFFFF) <<48) |
5115+
((uint64) (random()&0xFFFF) <<32) |
5116+
((uint64) (random()&0xFFFF) <<16) |
5117+
(uint64) (random()&0xFFFF);
5118+
5119+
for (i=0;i<nclients;i++)
5120+
if (!putVariableInt(&state[i],"startup","default_seed", (int64)seed))
5121+
exit(1);
5122+
}
5123+
50275124
if (!is_no_vacuum)
50285125
{
50295126
fprintf(stderr,"starting vacuum...");
@@ -5041,10 +5138,6 @@ main(int argc, char **argv)
50415138
}
50425139
PQfinish(con);
50435140

5044-
/* set random seed */
5045-
INSTR_TIME_SET_CURRENT(start_time);
5046-
srandom((unsignedint)INSTR_TIME_GET_MICROSEC(start_time));
5047-
50485141
/* set up thread data structures */
50495142
threads= (TState*)pg_malloc(sizeof(TState)*nthreads);
50505143
nclients_dealt=0;

‎src/bin/pgbench/pgbench.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ typedef enum PgBenchFunction
9797
PGBENCH_LE,
9898
PGBENCH_LT,
9999
PGBENCH_IS,
100-
PGBENCH_CASE
100+
PGBENCH_CASE,
101+
PGBENCH_HASH_FNV1A,
102+
PGBENCH_HASH_MURMUR2
101103
}PgBenchFunction;
102104

103105
typedefstructPgBenchExprPgBenchExpr;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp