On 27/06/2018 20:44, Chris Wilson wrote:
Setup a userptr object that only has a read-only mapping back to a file
store (memfd). Then attempt to write into that mapping using the GPU and
assert that those writes do not land (while also writing via a writable
userptr mapping into the same memfd to verify that the GPU is working!)

Signed-off-by: Chris Wilson <[email protected]>
Cc: Tvrtko Ursulin <[email protected]>
Cc: Joonas Lahtinen <[email protected]>
---
  configure.ac              |   1 +
  lib/ioctl_wrappers.c      |   4 +-
  lib/ioctl_wrappers.h      |   4 +-
  lib/meson.build           |   1 +
  meson.build               |   1 +
  tests/Makefile.am         |   4 +-
  tests/gem_userptr_blits.c | 337 +++++++++++++++++++++++++++++++++++++-
  7 files changed, 342 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1ee4e90e9..195963d4f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
  PKG_CHECK_MODULES(KMOD, [libkmod])
  PKG_CHECK_MODULES(PROCPS, [libprocps])
  PKG_CHECK_MODULES(LIBUNWIND, [libunwind])
+PKG_CHECK_MODULES(SSL, [openssl])
  PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], 
[have_valgrind=no])
if test x$have_valgrind = xyes; then
diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c
index 79db44a8c..d5d2a4e4c 100644
--- a/lib/ioctl_wrappers.c
+++ b/lib/ioctl_wrappers.c
@@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state)
        return madv.retained;
  }
-int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle)
+int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t 
flags, uint32_t *handle)
  {
        struct drm_i915_gem_userptr userptr;
@@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui
   *
   * Returns userptr handle for the GEM object.
   */
-void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, 
uint32_t *handle)
+void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t 
flags, uint32_t *handle)
  {
        igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 
0);
  }
diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h
index b966f72c9..8e2cd380b 100644
--- a/lib/ioctl_wrappers.h
+++ b/lib/ioctl_wrappers.h
@@ -133,8 +133,8 @@ struct local_i915_gem_userptr {
  #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31)
        uint32_t handle;
  };
-void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, 
uint32_t *handle);
-int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, 
uint32_t *handle);
+void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t 
flags, uint32_t *handle);
+int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t 
flags, uint32_t *handle);
void gem_sw_finish(int fd, uint32_t handle); diff --git a/lib/meson.build b/lib/meson.build
index 1a355414e..939167f91 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -62,6 +62,7 @@ lib_deps = [
        pthreads,
        math,
        realtime,
+       ssl,
  ]
if libdrm_intel.found()
diff --git a/meson.build b/meson.build
index 4d15d6238..638c01066 100644
--- a/meson.build
+++ b/meson.build
@@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10')
  libkmod = dependency('libkmod')
  libprocps = dependency('libprocps', required : true)
  libunwind = dependency('libunwind', required : true)
+ssl = dependency('openssl', required : true)
valgrind = null_dep
  valgrindinfo = 'No'
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f41ad5096..ba307b220 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
  gem_tiled_swapping_LDADD = $(LDADD) -lpthread
  prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
  prime_self_import_LDADD = $(LDADD) -lpthread
-gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
-gem_userptr_blits_LDADD = $(LDADD) -lpthread
+gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS)
+gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread
  perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
gem_eio_LDADD = $(LDADD) -lrt
diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c
index 7e3b6ef38..30c6bc48c 100644
--- a/tests/gem_userptr_blits.c
+++ b/tests/gem_userptr_blits.c
@@ -43,13 +43,17 @@
  #include <fcntl.h>
  #include <inttypes.h>
  #include <errno.h>
+#include <setjmp.h>
  #include <sys/stat.h>
  #include <sys/time.h>
  #include <sys/mman.h>
+#include <openssl/sha.h>
  #include <signal.h>
  #include <pthread.h>
  #include <time.h>
+#include <linux/memfd.h>
+
  #include "drm.h"
  #include "i915_drm.h"
@@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo)
        return ret;
  }
+static void store_dword(int fd, uint32_t target,
+                       uint32_t offset, uint32_t value)
+{
+       const int gen = intel_gen(intel_get_drm_devid(fd));
+       struct drm_i915_gem_exec_object2 obj[2];
+       struct drm_i915_gem_relocation_entry reloc;
+       struct drm_i915_gem_execbuffer2 execbuf;
+       uint32_t batch[16];
+       int i;
+
+       memset(&execbuf, 0, sizeof(execbuf));
+       execbuf.buffers_ptr = to_user_pointer(obj);
+       execbuf.buffer_count = ARRAY_SIZE(obj);
+       execbuf.flags = 0;
+       if (gen < 6)
+               execbuf.flags |= I915_EXEC_SECURE;
+
+       memset(obj, 0, sizeof(obj));
+       obj[0].handle = target;
+       obj[1].handle = gem_create(fd, 4096);
+
+       memset(&reloc, 0, sizeof(reloc));
+       reloc.target_handle = obj[0].handle;
+       reloc.presumed_offset = 0;
+       reloc.offset = sizeof(uint32_t);
+       reloc.delta = offset;
+       reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+       reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+       obj[1].relocs_ptr = to_user_pointer(&reloc);
+       obj[1].relocation_count = 1;
+
+       i = 0;
+       batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+       if (gen >= 8) {
+               batch[++i] = offset;
+               batch[++i] = 0;
+       } else if (gen >= 4) {
+               batch[++i] = 0;
+               batch[++i] = offset;
+               reloc.offset += sizeof(uint32_t);
+       } else {
+               batch[i]--;
+               batch[++i] = offset;
+       }
+       batch[++i] = value;
+       batch[++i] = MI_BATCH_BUFFER_END;
+       gem_write(fd, obj[1].handle, 0, batch, sizeof(batch));
+       gem_execbuf(fd, &execbuf);
+       gem_close(fd, obj[1].handle);
+}
+
  static uint32_t
  create_userptr(int fd, uint32_t val, uint32_t *ptr)
  {
@@ -941,6 +996,275 @@ static int test_dmabuf(void)
        return 0;
  }
+static void test_readonly(int i915)

Hm.. nice interesting and novel fd naming I think. fd, gem_fd I know we have. I wonder if we have drm_fd as well somewhere. Just thinking out loud...

+{
+       unsigned char orig[SHA_DIGEST_LENGTH];
+       uint64_t aperture_size;
+       uint32_t whandle, rhandle;
+       size_t sz, total;
+       void *pages, *space;
+       int memfd;
+
+       /*
+        * A small batch of pages; small enough to cheaply check for stray
+        * writes but large enough that we don't create too many VMA pointing
+        * back to this set from the large arena. The limit on total number
+        * of VMA for a process is 65,536 (at least on this kernel).
+        */
+       sz = 16 << 12;

12 for page size, so 16 pages? How it is related to VMA limit from the comment?

+       memfd = memfd_create("pages", 0);
+       igt_require(memfd != -1);

igt_require_fd is available if you care for it...

+       igt_require(ftruncate(memfd, sz) == 0);

..and igt_require_eq, the double sided sword of API growing rich, huh? :)

+
+       pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0);
+       igt_assert(pages != MAP_FAILED);
+
+       igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, 
&rhandle) == 0);
+       gem_close(i915, rhandle);
+
+       gem_userptr(i915, pages, sz, false, userptr_flags, &whandle);
+
+       total = 2048ull << 20;

Why 2GiB? Express with the VMA limit and sz or just accidentally half of the VMA limit?

+       aperture_size = gem_aperture_size(i915) / 2;
+       if (aperture_size < total)
+               total = aperture_size;
+       total = total / sz * sz;

There is round_down in lib/igt_primes but it would need exporting.

+       igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n",
+                total, total >> 12, sz >> 12);
+
+       /* Create an arena all pointing to the same set of pages */
+       space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0);

Allocating address space only?

+       igt_require(space != MAP_FAILED);
+       for (size_t offset = 0; offset < total; offset += sz) {
+               igt_assert(mmap(space + offset, sz,
+                               PROT_WRITE, MAP_SHARED | MAP_FIXED,
+                               memfd, 0) != MAP_FAILED);
+               *(uint32_t *)(space + offset) = offset;
+       }
+       igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz));

Checking that "arena" somewhat works, ok..

+       igt_assert(mlock(space, total) == 0);
+       close(memfd);
+
+       /* Check we can create a normal userptr bo wrapping the wrapper */
+       gem_userptr(i915, space, total, false, userptr_flags, &rhandle);
+       gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0);
+       for (size_t offset = 0; offset < total; offset += sz)
+               store_dword(i915, rhandle, offset + 4, offset / sz);
+       gem_sync(i915, rhandle);

Do you need to move it back to CPU domain before the asserts?

+       igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz));
+       igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1));

Please add a comment somewhere higher up explaining the layout - I got lost what is in the first dword and what in the second of each page, and who writes each.

+       gem_close(i915, rhandle);
+
+       /* Now enforce read-only henceforth */
+       igt_assert(mprotect(space, total, PROT_READ) == 0);

No writes from the CPU, ok, I suppose to guarantee if there is a write where it came from.

Please add a high level comment what the following block will test and how.

+
+       SHA1(pages, sz, orig);
+       igt_fork(child, 1) {
+               const int gen = intel_gen(intel_get_drm_devid(i915));
+               const int nreloc = 1024;

This has a relationship to the size of the batch buffer created lower below?

+               struct drm_i915_gem_relocation_entry *reloc;
+               struct drm_i915_gem_exec_object2 obj[2];
+               struct drm_i915_gem_execbuffer2 exec;
+               unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH];
+               uint32_t *batch;
+               int i;
+
+               reloc = calloc(sizeof(*reloc), nreloc);
+               gem_userptr(i915, space, total, true, userptr_flags, &rhandle);
+
+

Extra newline.

+               memset(obj, 0, sizeof(obj));
+               obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B;
+               obj[1].handle = gem_create(i915, 4096*16);

This is the size of store dw times times nreloc? Relationships need to be clearer and expressed in one place.

+               obj[1].relocation_count = nreloc;
+               obj[1].relocs_ptr = to_user_pointer(reloc);
+
+               batch = gem_mmap__wc(i915, obj[1].handle, 0, 4096*16, 
PROT_WRITE);
+
+               memset(&exec, 0, sizeof(exec));
+               exec.buffer_count =2;
+               exec.buffers_ptr = to_user_pointer(obj);
+
+               for_each_engine(i915, exec.flags) {
+                       /* First tweak the backing store through the write */
+                       i = 0;
+                       obj[0].handle = whandle;
+                       for (int n = 0; n < nreloc; n++) {
+                               uint64_t offset;
+
+                               reloc[n].target_handle = obj[0].handle;
+                               reloc[n].delta = 4*(rand() % (sz/4));
+                               reloc[n].offset = (i+1) * sizeof(uint32_t);

You can add spaces around operators to follow our coding style since space is not constrained here.

+                               reloc[n].presumed_offset = obj[0].offset;
+                               reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+                               reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
+
+                               offset = reloc[n].presumed_offset + 
reloc[n].delta;
+
+                               batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 
22 : 0);
+                               if (gen >= 8) {
+                                       batch[++i] = offset;
+                                       batch[++i] = offset >> 32;
+                               } else if (gen >= 4) {
+                                       batch[++i] = 0;
+                                       batch[++i] = offset;
+                                       reloc[n].offset += sizeof(uint32_t);
+                               } else {
+                                       batch[i]--;
+                                       batch[++i] = offset;
+                               }
+                               batch[++i] = rand();
+                               i++;
+                       }
+                       batch[i] = MI_BATCH_BUFFER_END;

Somehow make this possible via previously added store_dword helper instead of duplicating?

+
+                       gem_execbuf(i915, &exec);
+                       gem_sync(i915, obj[0].handle);
+                       SHA1(pages, sz, ref);
+
+                       igt_assert(memcmp(ref, orig, sizeof(ref)));
+                       memcpy(orig, ref, sizeof(orig));
+
+                       /* Now try the same through the read-only handle */
+                       i = 0;
+                       obj[0].handle = rhandle;
+                       for (int n = 0; n < nreloc; n++) {
+                               uint64_t offset;
+
+                               reloc[n].target_handle = obj[0].handle;
+                               reloc[n].delta = 4*(rand() % (total/4));
+                               reloc[n].offset = (i+1) * sizeof(uint32_t);
+                               reloc[n].presumed_offset = obj[0].offset;
+                               reloc[n].read_domains = I915_GEM_DOMAIN_RENDER;
+                               reloc[n].write_domain = I915_GEM_DOMAIN_RENDER;
+
+                               offset = reloc[n].presumed_offset + 
reloc[n].delta;
+
+                               batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 
22 : 0);
+                               if (gen >= 8) {
+                                       batch[++i] = offset;
+                                       batch[++i] = offset >> 32;
+                               } else if (gen >= 4) {
+                                       batch[++i] = 0;
+                                       batch[++i] = offset;
+                                       reloc[n].offset += sizeof(uint32_t);
+                               } else {
+                                       batch[i]--;
+                                       batch[++i] = offset;
+                               }
+                               batch[++i] = rand();
+                               i++;
+                       }
+                       batch[i] = MI_BATCH_BUFFER_END;

Am I seeing a copy-pasted loop? You know what's next! :D

+
+                       gem_execbuf(i915, &exec);
+                       gem_sync(i915, obj[0].handle);
+                       SHA1(pages, sz, result);
+
+                       /*
+                        * As the writes into the read-only GPU bo should fail,
+                        * the SHA1 hash of the backing store should be
+                        * unaffected.
+                        */
+                       igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0);
+               }
+
+               munmap(batch, 16*4096);
+               gem_close(i915, obj[1].handle);
+               gem_close(i915, rhandle);
+       }
+       igt_waitchildren();
+
+       munmap(space, total);
+       munmap(pages, sz);
+}

Okay more or less. Just want some tweaks and high level description since I (or anyone in the future) don't need/want to reverse engineer the patterns.

+
+static jmp_buf sigjmp;
+static void sigjmp_handler(int sig)
+{
+       siglongjmp(sigjmp, sig);
+}
+
+static void test_readonly_mmap(int i915)
+{

Please add high level test description since there is some trickery below.

+       unsigned char original[SHA_DIGEST_LENGTH];
+       unsigned char result[SHA_DIGEST_LENGTH];
+       uint32_t handle;
+       uint32_t sz;
+       void *pages;
+       void *ptr;
+       int sig;
+
+       igt_require(igt_setup_clflush());
+
+       sz = 16 << 12;
+       pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+       igt_assert(pages != MAP_FAILED);
+
+       igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, 
&handle) == 0);
+       gem_set_caching(i915, handle, 0);
+
+       memset(pages, 0xa5, sz);
+       igt_clflush_range(pages, sz);

Why are cache flushed needed in this test? Because they cannot be done via domain management?

+       SHA1(pages, sz, original);
+
+       ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE);
+       igt_assert(ptr == NULL);
+
+       ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ);
+       gem_close(i915, handle);
+
+       if (!(sig = sigsetjmp(sigjmp, 1))) {

What does this do? Comment?

+               signal(SIGBUS, sigjmp_handler);
+               signal(SIGSEGV, sigjmp_handler);
+               memset(ptr, 0x5a, sz);
+               igt_assert(0);
+       }
+       igt_assert_eq(sig, SIGSEGV);
+
+       igt_assert(mprotect(ptr, sz, PROT_WRITE));

Why is this needed?

+       munmap(ptr, sz);
+
+       igt_clflush_range(pages, sz);
+       SHA1(pages, sz, result);
+       igt_assert(!memcmp(original, result, sizeof(original)));
+
+       munmap(pages, sz);
+}
+
+static void test_readonly_pwrite(int i915)
+{
+       unsigned char original[SHA_DIGEST_LENGTH];
+       unsigned char result[SHA_DIGEST_LENGTH];
+       uint32_t handle;
+       uint32_t sz;
+       void *pages;
+
+       igt_require(igt_setup_clflush());
+
+       sz = 16 << 12;
+       pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+       igt_assert(pages != MAP_FAILED);
+
+       igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, 
&handle) == 0);
+       memset(pages, 0xa5, sz);
+       SHA1(pages, sz, original);
+
+       for (int page = 0; page < 16; page++) {
+               char data[4096];
+
+               memset(data, page, sizeof(data));
+               igt_assert_eq(__gem_write(i915, handle, page << 12, data, 
sizeof(data)), -EINVAL);
+       }
+
+       gem_close(i915, handle);
+
+       SHA1(pages, sz, result);
+       igt_assert(!memcmp(original, result, sizeof(original)));
+
+       munmap(pages, sz);
+}
+
  static int test_usage_restrictions(int fd)
  {
        void *ptr;
@@ -961,10 +1285,6 @@ static int test_usage_restrictions(int fd)
        ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, 
&handle);
        igt_assert_neq(ret, 0);
- /* Read-only not supported. */
-       ret = __gem_userptr(fd, (char *)ptr, PAGE_SIZE, 1, userptr_flags, 
&handle);
-       igt_assert_neq(ret, 0);
-
        free(ptr);
return 0;
@@ -1502,6 +1822,15 @@ int main(int argc, char **argv)
                igt_subtest("dmabuf-unsync")
                        test_dmabuf();
+ igt_subtest("readonly-unsync")
+                       test_readonly(fd);
+
+               igt_subtest("readonly-mmap-unsync")
+                       test_readonly_mmap(fd);
+
+               igt_subtest("readonly-pwrite-unsync")
+                       test_readonly_pwrite(fd);
+
                for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; 
flags++) {
                        igt_subtest_f("forked-unsync%s%s%s-%s",
                                        flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" 
: "",


Looks like thorough coverage, just some final polish needed.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to