Movatterモバイル変換
[0]ホーム
This is the mail archive of thelibc-alpha@sourceware.orgmailing list for theglibc project.
Re: [PATCH] Add math benchmark latency test
- From: Siddhesh Poyarekar <siddhesh at gotplt dot org>
- To: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>, "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
- Cc: nd <nd at arm dot com>
- Date: Wed, 16 Aug 2017 18:37:21 +0530
- Subject: Re: [PATCH] Add math benchmark latency test
- Authentication-results: sourceware.org; auth=none
- References: <DB6PR0801MB20530E94B6016F1E3F7CEDEC83820@DB6PR0801MB2053.eurprd08.prod.outlook.com>
On Wednesday 16 August 2017 04:55 PM, Wilco Dijkstra wrote:> This patch further improves math function benchmarking by adding a latency> test in addition to throughput. This enables more accurate comparisons of the> math functions. The latency test works by creating a dependency on the previous> iteration: func_res = F (func_res * zero + input[i]). The multiply by zero avoids> changing the input.> > The powf test now shows:> > "workload-spec2006.wrf": {> "throughput": 200,> "latency": 100> }> > OK for commit?> ChangeLog:> 2017-08-16 Wilco Dijkstra <wdijkstr@arm.com>> > * benchtests/bench-skeleton.c (main): Add support for> latency benchmarking.> * benchtests/scripts/bench.py: Add support for latency benchmarking.> > --> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c> index 3c6dad705594ac0a53edcb4e09686252c13127cf..60753ede1aa3cc05cc0e9eccc74dd12a609a1a67 100644> --- a/benchtests/bench-skeleton.c> +++ b/benchtests/bench-skeleton.c> @@ -71,8 +71,10 @@ main (int argc, char **argv)> bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;> double d_total_i = 0;> timing_t total = 0, max = 0, min = 0x7fffffffffffffff;> + timing_t throughput = 0, latency = 0;> int64_t c = 0;> uint64_t cur;> + BENCH_VARS;> while (1)> {> if (is_bench)> @@ -86,7 +88,16 @@ main (int argc, char **argv)> BENCH_FUNC (v, i);> TIMING_NOW (end);> TIMING_DIFF (cur, start, end);> - TIMING_ACCUM (total, cur);> + TIMING_ACCUM (throughput, cur);> +> + TIMING_NOW (start);> + for (k = 0; k < iters; k++)> +for (i = 0; i < NUM_SAMPLES (v); i++)> + BENCH_FUNC_LAT (v, i);> + TIMING_NOW (end);> + TIMING_DIFF (cur, start, end);> + TIMING_ACCUM (latency, cur);> +> d_total_i += iters * NUM_SAMPLES (v);> }> else> @@ -131,12 +142,15 @@ main (int argc, char **argv)> /* Begin variant. */> json_attr_object_begin (&json_ctx, VARIANT (v));> > - json_attr_double (&json_ctx, "duration", d_total_s);> - json_attr_double (&json_ctx, "iterations", d_total_i);> if (is_bench)> -json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);> +{> + json_attr_double (&json_ctx, "throughput", throughput / d_total_i);I didn't notice this earlier, but shouldn't throughput beiterations/cycle and not the other way around? That is, throughputshould be the inverse of latency.> + json_attr_double (&json_ctx, "latency", latency / d_total_i);> +}> else> {> + json_attr_double (&json_ctx, "duration", d_total_s);> + json_attr_double (&json_ctx, "iterations", d_total_i);> json_attr_double (&json_ctx, "max", max / d_iters);> json_attr_double (&json_ctx, "min", min / d_iters);> json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);> diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py> index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755> --- a/benchtests/scripts/bench.py> +++ b/benchtests/scripts/bench.py> @@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''> # variant is represented by the _VARIANT structure. The ARGS structure> # represents a single set of arguments.> STRUCT_TEMPLATE = '''> -#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)> +#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)> > struct args> {> @@ -84,7 +84,9 @@ EPILOGUE = '''> #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)> #define RESULT_ACCUM(r, v, i, old, new) \\> ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))> -#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})> +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})> +#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})> +#define BENCH_VARS %(defvar)s> #define FUNCNAME "%(func)s"> #include "bench-skeleton.c"'''> > @@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals):> # If we have a return value from the function, make sure it is> # assigned to prevent the compiler from optimizing out the> # call.> + getret = ''> + latarg = ''> + defvar = ''> +> if directives['ret']:> print('static %s volatile ret;' % directives['ret'])> - getret = 'ret = '> - else:> - getret = ''> + print('static %s zero __attribute__((used)) = 0;' % directives['ret'])> + getret = 'ret = func_res = '> + latarg = 'func_res * zero +'> + defvar = '%s func_res = 0;' % directives['ret']> > # Test initialization.> if directives['init']:> print('#define BENCH_INIT %s' % directives['init'])> > - print(EPILOGUE % {'getret': getret, 'func': func})> + print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })> > > def _print_arg_data(func, directives, all_vals):>
[8]ページ先頭