selftests: Exercise CS TLB invalidation

Mika Kuoppala Thu, 19 Sep 2019 05:58:20 -0700

Chris Wilson <[email protected]> writes:

> Check that we are correctly invalidating the TLB at the start of a
> batch after updating the GTT.
>
> Signed-off-by: Chris Wilson <[email protected]>
> Cc: Mika Kuoppala <[email protected]>
> Cc: Daniele Ceraolo Spurio <[email protected]>
> ---
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 227 ++++++++++++++++++
>  1 file changed, 227 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
> b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 598c18d10640..f8709b332bd7 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -25,13 +25,16 @@
>  #include <linux/list_sort.h>
>  #include <linux/prime_numbers.h>
>  
> +#include "gem/i915_gem_context.h"
>  #include "gem/selftests/mock_context.h"
> +#include "gt/intel_context.h"
>  
>  #include "i915_random.h"
>  #include "i915_selftest.h"
>  
>  #include "mock_drm.h"
>  #include "mock_gem_device.h"
> +#include "igt_flush_test.h"
>  
>  static void cleanup_freed_objects(struct drm_i915_private *i915)
>  {
> @@ -1705,6 +1708,229 @@ int i915_gem_gtt_mock_selftests(void)
>       return err;
>  }
>  
> +static int context_sync(struct intel_context *ce)
> +{
> +     struct i915_request *rq;
> +     long timeout;
> +
> +     rq = intel_context_create_request(ce);
> +     if (IS_ERR(rq))
> +             return PTR_ERR(rq);
> +
> +     i915_request_get(rq);
> +     i915_request_add(rq);
> +
> +     timeout = i915_request_wait(rq, 0, HZ / 5);
> +     i915_request_put(rq);
> +
> +     return timeout < 0 ? -EIO : 0;
> +}
> +
> +static int submit_batch(struct intel_context *ce, u64 addr)
> +{
> +     struct i915_request *rq;
> +     int err;
> +
> +     rq = intel_context_create_request(ce);
> +     if (IS_ERR(rq))
> +             return PTR_ERR(rq);
> +
> +     err = 0;
> +     if (rq->engine->emit_init_breadcrumb) /* detect a hang */


for seqno write?

> +             err = rq->engine->emit_init_breadcrumb(rq);
> +     if (err == 0)
> +             err = rq->engine->emit_bb_start(rq, addr, 0, 0);
> +

In context_sync part we grabbed a reference. In here we
don't. I don't see how we can get away without it even
if we don't wait in here.

> +     i915_request_add(rq);
> +
> +     return err;
> +}
> +
> +static int igt_cs_tlb(void *arg)
> +{
> +     const unsigned int count = PAGE_SIZE / 64;
> +     const unsigned int chunk_size = count * PAGE_SIZE;
> +     struct drm_i915_private *i915 = arg;
> +     struct drm_i915_gem_object *bbe, *out;
> +     struct i915_gem_engines_iter it;
> +     struct i915_address_space *vm;
> +     struct i915_gem_context *ctx;
> +     struct intel_context *ce;
> +     struct drm_file *file;
> +     struct i915_vma *vma;
> +     unsigned int i;
> +     u32 *result;
> +     u32 *batch;
> +     int err = 0;
> +
> +     file = mock_file(i915);
> +     if (IS_ERR(file))
> +             return PTR_ERR(file);
> +
> +     mutex_lock(&i915->drm.struct_mutex);
> +     ctx = live_context(i915, file);
> +     if (IS_ERR(ctx)) {
> +             err = PTR_ERR(ctx);
> +             goto out_unlock;
> +     }
> +
> +     vm = ctx->vm;
> +     if (!vm)
> +             goto out_unlock;
> +
> +     bbe = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +     if (IS_ERR(bbe)) {
> +             err = PTR_ERR(bbe);
> +             goto out_unlock;
> +     }
> +
> +     batch = i915_gem_object_pin_map(bbe, I915_MAP_WC);
> +     if (IS_ERR(batch)) {
> +             err = PTR_ERR(batch);
> +             goto out_bbe;
> +     }
> +     for (i = 0; i < count; i++) {
> +             u32 *cs = batch + i * 64 / sizeof(*cs);
> +             u64 addr = (vm->total - PAGE_SIZE) + i * sizeof(u32);
> +
> +             GEM_BUG_ON(INTEL_GEN(i915) < 6);
> +             cs[0] = MI_STORE_DWORD_IMM_GEN4;
> +             if (INTEL_GEN(i915) >= 8) {
> +                     cs[1] = lower_32_bits(addr);
> +                     cs[2] = upper_32_bits(addr);
> +                     cs[3] = i;
> +                     cs[4] = MI_NOOP;
> +                     cs[5] = MI_BATCH_BUFFER_START_GEN8;
> +             } else {
> +                     cs[1] = 0;
> +                     cs[2] = lower_32_bits(addr);
> +                     cs[3] = i;
> +                     cs[4] = MI_NOOP;
> +                     cs[5] = MI_BATCH_BUFFER_START;
> +             }
> +     }
> +
> +     out = i915_gem_object_create_internal(i915, PAGE_SIZE);
> +     if (IS_ERR(out)) {
> +             err = PTR_ERR(out);
> +             goto out_batch;
> +     }
> +     i915_gem_object_set_cache_coherency(out, I915_CACHING_CACHED);
> +
> +     vma = i915_vma_instance(out, vm, NULL);
> +     if (IS_ERR(vma)) {
> +             err = PTR_ERR(vma);
> +             goto out_batch;
> +     }
> +
> +     err = i915_vma_pin(vma, 0, 0,
> +                        PIN_USER |
> +                        PIN_OFFSET_FIXED |
> +                        (vm->total - PAGE_SIZE));
> +     if (err)
> +             goto out_out;

out_put?

Oh and we don't have to do anything with the instance on
error paths?

> +     GEM_BUG_ON(vma->node.start != vm->total - PAGE_SIZE);
> +
> +     result = i915_gem_object_pin_map(out, I915_MAP_WB);
> +     if (IS_ERR(result)) {
> +             err = PTR_ERR(result);
> +             goto out_out;
> +     }
> +
> +     for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> +             IGT_TIMEOUT(end_time);
> +             unsigned long pass = 0;
> +
> +             if (!intel_engine_can_store_dword(ce->engine))
> +                     continue;
> +
> +             while (!__igt_timeout(end_time, NULL)) {
> +                     u64 offset;
> +
> +                     offset = random_offset(0, vm->total - PAGE_SIZE,
> +                                            chunk_size, PAGE_SIZE);
> +
> +                     err = vm->allocate_va_range(vm, offset, chunk_size);
> +                     if (err)
> +                             goto end;
> +
> +                     memset32(result, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
> +
> +                     vma = i915_vma_instance(bbe, vm, NULL);
> +                     if (IS_ERR(vma)) {
> +                             err = PTR_ERR(vma);
> +                             goto end;
> +                     }
> +
> +                     err = vma->ops->set_pages(vma);
> +                     if (err)
> +                             goto end;
> +
> +                     /* Replace the TLB with target batches */
> +                     for (i = 0; i < count; i++) {
> +                             u32 *cs = batch + i * 64 / sizeof(*cs);
> +                             u64 addr;
> +
> +                             vma->node.start = offset + i * PAGE_SIZE;

on previous loop, we have now primed the pte to tlb in here?

> +                             vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);

..now changing in it here...

> +
> +                             addr = vma->node.start + i * 64;
> +                             cs[4] = MI_NOOP;
> +                             cs[6] = lower_32_bits(addr);
> +                             cs[7] = upper_32_bits(addr);
> +                             wmb();
> +
> +                             err = submit_batch(ce, addr);

in hope that with this submission hardware will see the old one and
completely miss the store+bb start?

Perhaps rewiring a more dummy emit only to prove this case
is pushing it.

But I am curious to know if you did try it out by removing
the invalidate on the emits and managed to bring
out the missing writes?

-Mika

> +                             if (err)
> +                                     goto end;
> +                     }
> +
> +                     yield();
> +                     for (i = 0; i < count; i++)
> +                             batch[i * 64 / sizeof(*batch) + 4] =
> +                                     MI_BATCH_BUFFER_END;
> +                     wmb();
> +
> +                     vma->ops->clear_pages(vma);
> +
> +                     err = context_sync(ce);
> +                     if (err) {
> +                             pr_err("%s: writes timed out\n",
> +                                    ce->engine->name);
> +                             goto end;
> +                     }
> +
> +                     for (i = 0; i < count; i++) {
> +                             if (result[i] != i) {
> +                                     pr_err("%s: Write lost on pass %lu, at 
> offset %llx, index %d, found %x, expected %x\n",
> +                                            ce->engine->name, pass,
> +                                            offset, i, result[i], i);
> +                                     err = -EINVAL;
> +                                     goto end;
> +                             }
> +                     }
> +
> +                     vm->clear_range(vm, offset, chunk_size);
> +                     pass++;
> +             }
> +     }
> +end:
> +     if (igt_flush_test(i915, I915_WAIT_LOCKED))
> +             err = -EIO;
> +     i915_gem_context_unlock_engines(ctx);
> +     i915_gem_object_unpin_map(out);
> +out_out:
> +     i915_gem_object_put(out);
> +out_batch:
> +     i915_gem_object_unpin_map(bbe);
> +out_bbe:
> +     i915_gem_object_put(bbe);
> +out_unlock:
> +     mutex_unlock(&i915->drm.struct_mutex);
> +     mock_file_free(i915, file);
> +     return err;
> +}
> +
>  int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
>  {
>       static const struct i915_subtest tests[] = {
> @@ -1722,6 +1948,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private 
> *i915)
>               SUBTEST(igt_ggtt_pot),
>               SUBTEST(igt_ggtt_fill),
>               SUBTEST(igt_ggtt_page),
> +             SUBTEST(igt_cs_tlb),
>       };
>  
>       GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
> -- 
> 2.23.0
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915/selftests: Exercise CS TLB invalidation

Reply via email to