Different engines take different number of cycles for MI_NOOP. As we
specify workloads in us, we need to take into account the different
calibration values so that the workloads behave as expected.

Signed-off-by: Chris Wilson <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
---
 benchmarks/gem_wsim.c | 72 +++++++++++++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 9564dcb70..50a062f0e 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -238,7 +238,7 @@ struct workload
 };
 
 static const unsigned int nop_calibration_us = 1000;
-static unsigned long nop_calibration;
+static unsigned long nop_calibration[NUM_ENGINES];
 
 static unsigned int context_vcs_rr;
 
@@ -808,9 +808,9 @@ static unsigned int get_duration(struct w_step *w)
                       (dur->max + 1 - dur->min);
 }
 
-static unsigned long get_bb_sz(unsigned int duration)
+static unsigned long get_bb_sz(unsigned int engine, unsigned int duration)
 {
-       return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
+       return ALIGN(duration * nop_calibration[engine] * sizeof(uint32_t) /
                     nop_calibration_us, sizeof(uint32_t));
 }
 
@@ -818,7 +818,7 @@ static void
 init_bb(struct w_step *w, unsigned int flags)
 {
        const unsigned int arb_period =
-                       get_bb_sz(w->preempt_us) / sizeof(uint32_t);
+                       get_bb_sz(w->engine, w->preempt_us) / sizeof(uint32_t);
        const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
        unsigned int i;
        uint32_t *ptr;
@@ -1043,10 +1043,10 @@ alloc_step_batch(struct workload *wrk, struct w_step 
*w, unsigned int flags)
 
        if (w->unbound_duration)
                /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
-               w->bb_sz = max(64, get_bb_sz(w->preempt_us)) +
+               w->bb_sz = max(64, get_bb_sz(w->engine, w->preempt_us)) +
                           (1 + 3) * sizeof(uint32_t);
        else
-               w->bb_sz = get_bb_sz(w->duration.max);
+               w->bb_sz = get_bb_sz(w->engine, w->duration.max);
        w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + 
(w->unbound_duration ? 4096 : 0));
        init_bb(w, flags);
        terminate_bb(w, flags);
@@ -2300,7 +2300,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum 
intel_engine_id engine,
        w->eb.batch_start_offset =
                w->unbound_duration ?
                0 :
-               ALIGN(w->bb_sz - get_bb_sz(get_duration(w)),
+               ALIGN(w->bb_sz - get_bb_sz(engine, get_duration(w)),
                      2 * sizeof(uint32_t));
 
        for (i = 0; i < w->fence_deps.nr; i++) {
@@ -2580,17 +2580,23 @@ static void fini_workload(struct workload *wrk)
        free(wrk);
 }
 
-static unsigned long calibrate_nop(unsigned int tolerance_pct)
+static unsigned long calibrate_nop(unsigned int engine, double tolerance_pct)
 {
        const uint32_t bbe = 0xa << 23;
        unsigned int loops = 17;
        unsigned int usecs = nop_calibration_us;
        struct drm_i915_gem_exec_object2 obj = {};
-       struct drm_i915_gem_execbuffer2 eb =
-               { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
+       struct drm_i915_gem_execbuffer2 eb = {
+               .buffer_count = 1,
+               .buffers_ptr = (uintptr_t)&obj,
+               .flags = eb_engine_map[engine],
+       };
        long size, last_size;
        struct timespec t_0, t_end;
 
+       if (__gem_execbuf(fd, &eb) != -ENOENT)
+               return 0;
+
        clock_gettime(CLOCK_MONOTONIC, &t_0);
 
        size = 256 * 1024;
@@ -2803,8 +2809,8 @@ int main(int argc, char **argv)
        int master_workload = -1;
        char *append_workload_arg = NULL;
        struct w_arg *w_args = NULL;
-       unsigned int tolerance_pct = 1;
        const struct workload_balancer *balancer = NULL;
+       double tolerance_pct = 1;
        char *endptr = NULL;
        int prio = 0;
        double t;
@@ -2852,10 +2858,28 @@ int main(int argc, char **argv)
                        clients = strtol(optarg, NULL, 0);
                        break;
                case 't':
-                       tolerance_pct = strtol(optarg, NULL, 0);
+                       tolerance_pct = strtod(optarg, NULL);
                        break;
                case 'n':
-                       nop_calibration = strtol(optarg, NULL, 0);
+                       if (strchr(optarg, ',')) {
+                               char *ctx = NULL;
+                               char *str = optarg;
+                               char *token;
+
+                               while ((token = strtok_r(str, ",", &ctx)) != 
NULL) {
+                                       unsigned long nop;
+                                       int engine;
+
+                                       str = NULL;
+                                       if (sscanf(token, "%d:%lu",
+                                                  &engine, &nop) == 2)
+                                               nop_calibration[engine] = nop;
+                               }
+                       } else {
+                               nop_calibration[0] = strtol(optarg, NULL, 0);
+                               for (i = 1; i < NUM_ENGINES; i++)
+                                       nop_calibration[i] = nop_calibration[0];
+                       }
                        break;
                case 'r':
                        repeat = strtol(optarg, NULL, 0);
@@ -2930,14 +2954,22 @@ int main(int argc, char **argv)
                return 1;
        }
 
-       if (!nop_calibration) {
+       if (!nop_calibration[0]) {
+               int engine;
+
                if (verbose > 1)
-                       printf("Calibrating nop delay with %u%% tolerance...\n",
+                       printf("Calibrating nop delay with %.1f%% 
tolerance...\n",
                                tolerance_pct);
-               nop_calibration = calibrate_nop(tolerance_pct);
-               if (verbose)
-                       printf("Nop calibration for %uus delay is %lu.\n",
-                              nop_calibration_us, nop_calibration);
+
+               for (engine = 0; engine < NUM_ENGINES; engine++) {
+                       nop_calibration[engine] = calibrate_nop(engine, 
tolerance_pct);
+                       if (!nop_calibration[engine])
+                               continue;
+
+                       if (verbose)
+                               printf("Nop(engine:%d) calibration for %uus 
delay is %lu.\n",
+                                      engine, nop_calibration_us, 
nop_calibration[engine]);
+               }
 
                return 0;
        }
@@ -2997,7 +3029,7 @@ int main(int argc, char **argv)
 
        if (verbose > 1) {
                printf("Using %lu nop calibration for %uus delay.\n",
-                      nop_calibration, nop_calibration_us);
+                      nop_calibration[0], nop_calibration_us);
                printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
                if (flags & SWAPVCS)
                        printf("Swapping VCS rings between clients.\n");
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to