Movatterモバイル変換

This is the mail archive of thelibc-alpha@sourceware.orgmailing list for theglibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [PATCH] Add math benchmark latency test

From: Siddhesh Poyarekar <siddhesh at gotplt dot org>
To: Wilco Dijkstra <Wilco dot Dijkstra at arm dot com>, "libc-alpha at sourceware dot org" <libc-alpha at sourceware dot org>
Cc: nd <nd at arm dot com>
Date: Wed, 16 Aug 2017 18:37:21 +0530
Subject: Re: [PATCH] Add math benchmark latency test
Authentication-results: sourceware.org; auth=none
References: <DB6PR0801MB20530E94B6016F1E3F7CEDEC83820@DB6PR0801MB2053.eurprd08.prod.outlook.com>

On Wednesday 16 August 2017 04:55 PM, Wilco Dijkstra wrote:> This patch further improves math function benchmarking by adding a latency> test in addition to throughput.  This enables more accurate comparisons of the> math functions. The latency test works by creating a dependency on the previous> iteration: func_res = F (func_res * zero + input[i]). The multiply by zero avoids> changing the input.> > The powf test now shows:> >    "workload-spec2006.wrf": {>     "throughput": 200,>     "latency": 100>    }> > OK for commit?> ChangeLog:> 2017-08-16  Wilco Dijkstra  <wdijkstr@arm.com>>   >         * benchtests/bench-skeleton.c (main): Add support for>         latency benchmarking.>         * benchtests/scripts/bench.py: Add support for latency benchmarking.> > --> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c> index 3c6dad705594ac0a53edcb4e09686252c13127cf..60753ede1aa3cc05cc0e9eccc74dd12a609a1a67 100644> --- a/benchtests/bench-skeleton.c> +++ b/benchtests/bench-skeleton.c> @@ -71,8 +71,10 @@ main (int argc, char **argv)>        bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;>        double d_total_i = 0;>        timing_t total = 0, max = 0, min = 0x7fffffffffffffff;> +      timing_t throughput = 0, latency = 0;>        int64_t c = 0;>        uint64_t cur;> +      BENCH_VARS;>        while (1)>  {>    if (is_bench)> @@ -86,7 +88,16 @@ main (int argc, char **argv)>    BENCH_FUNC (v, i);>        TIMING_NOW (end);>        TIMING_DIFF (cur, start, end);> -      TIMING_ACCUM (total, cur);> +      TIMING_ACCUM (throughput, cur);> +> +      TIMING_NOW (start);> +      for (k = 0; k < iters; k++)> +for (i = 0; i < NUM_SAMPLES (v); i++)> +  BENCH_FUNC_LAT (v, i);> +      TIMING_NOW (end);> +      TIMING_DIFF (cur, start, end);> +      TIMING_ACCUM (latency, cur);> +>        d_total_i += iters * NUM_SAMPLES (v);>      }>    else> @@ -131,12 +142,15 @@ main (int argc, char **argv)>        /* Begin variant.  */>        json_attr_object_begin (&json_ctx, VARIANT (v));>  > -      json_attr_double (&json_ctx, "duration", d_total_s);> -      json_attr_double (&json_ctx, "iterations", d_total_i);>        if (is_bench)> -json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);> +{> +  json_attr_double (&json_ctx, "throughput", throughput / d_total_i);I didn't notice this earlier, but shouldn't throughput beiterations/cycle and not the other way around?  That is, throughputshould be the inverse of latency.> +  json_attr_double (&json_ctx, "latency", latency / d_total_i);> +}>        else>  {> +  json_attr_double (&json_ctx, "duration", d_total_s);> +  json_attr_double (&json_ctx, "iterations", d_total_i);>    json_attr_double (&json_ctx, "max", max / d_iters);>    json_attr_double (&json_ctx, "min", min / d_iters);>    json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);> diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py> index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755> --- a/benchtests/scripts/bench.py> +++ b/benchtests/scripts/bench.py> @@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''>  # variant is represented by the _VARIANT structure.  The ARGS structure>  # represents a single set of arguments.>  STRUCT_TEMPLATE = '''> -#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)> +#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)>  >  struct args>  {> @@ -84,7 +84,9 @@ EPILOGUE = '''>  #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)>  #define RESULT_ACCUM(r, v, i, old, new) \\>          ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))> -#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})> +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})> +#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})> +#define BENCH_VARS %(defvar)s>  #define FUNCNAME "%(func)s">  #include "bench-skeleton.c"'''>  > @@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals):>      # If we have a return value from the function, make sure it is>      # assigned to prevent the compiler from optimizing out the>      # call.> +    getret = ''> +    latarg = ''> +    defvar = ''> +>      if directives['ret']:>          print('static %s volatile ret;' % directives['ret'])> -        getret = 'ret = '> -    else:> -        getret = ''> +        print('static %s zero __attribute__((used)) = 0;' % directives['ret'])> +        getret = 'ret = func_res = '> +        latarg = 'func_res * zero +'> +        defvar = '%s func_res = 0;' % directives['ret']>  >      # Test initialization.>      if directives['init']:>          print('#define BENCH_INIT %s' % directives['init'])>  > -    print(EPILOGUE % {'getret': getret, 'func': func})> +    print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })>  >  >  def _print_arg_data(func, directives, all_vals):>

Follow-Ups:
- Re: [PATCH] Add math benchmark latency test
  - From: Szabolcs Nagy
- Re: Re: [PATCH] Add math benchmark latency test
  - From: Arjan van de Ven

References:
- [PATCH] Add math benchmark latency test
  - From: Wilco Dijkstra

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]

[8]ページ先頭