Improve math benchmark infrastructure

Improve support for math function benchmarking. This patch adds a feature that allows accurate benchmarking of traces extracted from real workloads. This is done by iterating over all samples rather than repeating each sample many times (which completely ignores branch prediction and cache effects). A trace can be added to existing math function inputs via "## name: workload-<name>", followed by the trace. * benchtests/README: Describe workload feature. * benchtests/bench-skeleton.c (main): Add support for benchmarking traces from workloads.
author: Wilco Dijkstra <wdijkstr@arm.com> 2017-06-20 16:26:26 +0100
committer: Wilco Dijkstra <wdijkstr@arm.com> 2017-06-20 16:26:26 +0100
commit: beb52f502f0477465313675d2a0fbf3962e130b8 (patch)
tree: 6d8953c8fab0f45f01c04ab89171a2211b599686 /benchtests/bench-skeleton.c
parent: c0b23001a89b79f8d0bebe41bfbe64d840b13191 (diff)
1 files changed, 39 insertions, 18 deletions
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 09eb78df1b..3c6dad7055 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -68,34 +68,50 @@ main (int argc, char **argv)
       clock_gettime (CLOCK_MONOTONIC_RAW, &runtime);
       runtime.tv_sec += DURATION;
 
+      bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
       double d_total_i = 0;
       timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
       int64_t c = 0;
+      uint64_t cur;
       while (1)
 	{
-	  for (i = 0; i < NUM_SAMPLES (v); i++)
+	  if (is_bench)
 	    {
-	      uint64_t cur;
+	      /* Benchmark a real trace of calls - all samples are iterated
+		 over once before repeating.  This models actual use more
+		 accurately than repeating the same sample many times.  */
 	      TIMING_NOW (start);
 	      for (k = 0; k < iters; k++)
-		BENCH_FUNC (v, i);
+		for (i = 0; i < NUM_SAMPLES (v); i++)
+		  BENCH_FUNC (v, i);
 	      TIMING_NOW (end);
-
 	      TIMING_DIFF (cur, start, end);
+	      TIMING_ACCUM (total, cur);
+	      d_total_i += iters * NUM_SAMPLES (v);
+	    }
+	  else
+	    for (i = 0; i < NUM_SAMPLES (v); i++)
+	      {
+		TIMING_NOW (start);
+		for (k = 0; k < iters; k++)
+		  BENCH_FUNC (v, i);
+		TIMING_NOW (end);
 
-	      if (cur > max)
-		max = cur;
+		TIMING_DIFF (cur, start, end);
 
-	      if (cur < min)
-		min = cur;
+		if (cur > max)
+		  max = cur;
 
-	      TIMING_ACCUM (total, cur);
-	      /* Accumulate timings for the value.  In the end we will divide
-	         by the total iterations.  */
-	      RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+		if (cur < min)
+		  min = cur;
 
-	      d_total_i += iters;
-	    }
+		TIMING_ACCUM (total, cur);
+		/* Accumulate timings for the value.  In the end we will divide
+		   by the total iterations.  */
+		RESULT_ACCUM (cur, v, i, c * iters, (c + 1) * iters);
+
+		d_total_i += iters;
+	      }
 	  c++;
 	  struct timespec curtime;
 
@@ -117,11 +133,16 @@ main (int argc, char **argv)
 
       json_attr_double (&json_ctx, "duration", d_total_s);
       json_attr_double (&json_ctx, "iterations", d_total_i);
-      json_attr_double (&json_ctx, "max", max / d_iters);
-      json_attr_double (&json_ctx, "min", min / d_iters);
-      json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+      if (is_bench)
+	json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+      else
+	{
+	  json_attr_double (&json_ctx, "max", max / d_iters);
+	  json_attr_double (&json_ctx, "min", min / d_iters);
+	  json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
+	}
 
-      if (detailed)
+      if (detailed && !is_bench)
 	{
 	  json_array_begin (&json_ctx, "timings");
author	Wilco Dijkstra <wdijkstr@arm.com>	2017-06-20 16:26:26 +0100
committer	Wilco Dijkstra <wdijkstr@arm.com>	2017-06-20 16:26:26 +0100
commit	beb52f502f0477465313675d2a0fbf3962e130b8 (patch)
tree	6d8953c8fab0f45f01c04ab89171a2211b599686 /benchtests/bench-skeleton.c
parent	c0b23001a89b79f8d0bebe41bfbe64d840b13191 (diff)