Combining the multiple access checks into a few batches and a single serialising read can reduce detection times from around 100us to 70us on a fast Haswell system.
Signed-off-by: Chris Wilson <[email protected]> Cc: Kenneth Graunke <[email protected]> --- src/mesa/drivers/dri/i965/intel_screen.c | 177 +++++++++++++++++++------------ 1 file changed, 109 insertions(+), 68 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index cb49e9a..595d2dc 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1156,6 +1156,12 @@ intel_detect_timestamp(struct intel_screen *screen) return loop > 0; } +struct detect_pipelined_register { + uint32_t reg; + uint32_t expected_value; + bool *result; +}; + /** * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer. * @@ -1163,107 +1169,143 @@ intel_detect_timestamp(struct intel_screen *screen) * while others don't. Instead of trying to enumerate every case, just * try and write a register and see if works. */ -static bool -intel_detect_pipelined_register(struct intel_screen *screen, - int reg, uint32_t expected_value) +static void +__intel_detect_pipelined_registers(struct intel_screen *screen, + struct detect_pipelined_register *r, + int count) { const int offset = 100; - - drm_intel_bo *bo; - uint32_t buf[100]; - uint32_t *batch = buf; + int i; uint32_t *data; + + if (count == 0) + return; + + if (drm_intel_bo_map(screen->workaround_bo, true)) + return; + /* Set a value in a BO to a known quantity. The workaround BO already * exists and doesn't contain anything important, so we may as well use it. */ - if (drm_intel_bo_map(screen->workaround_bo, true)) - return false; - data = screen->workaround_bo->virtual; - data[offset] = 0xffffffff; + for (i = 0; i < count; i++) + data[offset+i] = 0xffffffff; drm_intel_bo_unmap(screen->workaround_bo); - bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); - if (bo == NULL) - return false; + /* Emit each access in a separate batch buffer so that if the kernel + * rejects an individual access attempt, we don't incorrectly assume + * all the register accesses are invalid. + */ + for (i = 0; i < count; i++) { + drm_intel_bo *bo; + uint32_t buf[100]; + uint32_t *batch = buf; + + bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); + if (bo == NULL) + continue; + + /* Write the register. */ + *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); + *batch++ = r[i].reg; + *batch++ = r[i].expected_value; + + /* Force a command barrier between the write then read */ + *batch++ = _3DSTATE_PIPE_CONTROL | (5 - 2); + *batch++ = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; - /* Write the register. */ - *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = expected_value; - - /* Force a command barrier between the write then read */ - *batch++ = _3DSTATE_PIPE_CONTROL | (5 - 2); - *batch++ = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; - - /* Save the register's value back to the buffer. */ - *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); - *batch++ = reg; - drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)buf, - screen->workaround_bo, offset*sizeof(uint32_t), - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION); - *batch++ = screen->workaround_bo->offset + offset*sizeof(uint32_t); - - /* And afterwards clear the register */ - *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = 0; - - *batch++ = MI_BATCH_BUFFER_END; - if ((batch - buf) & 1) + /* Save the register's value back to the buffer. */ + *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); + *batch++ = r[i].reg; + drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)buf, + screen->workaround_bo, + (offset+i)*sizeof(uint32_t), + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION); + *batch++ = screen->workaround_bo->offset + (offset+i)*sizeof(uint32_t); + + /* And afterwards clear the register */ + *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); + *batch++ = r[i].reg; *batch++ = 0; - if (drm_intel_bo_subdata(bo, 0, (char *)batch - (char *)buf, buf) == 0) - drm_intel_bo_mrb_exec(bo, (char *)batch - (char *)buf, - NULL, 0, 0, - I915_EXEC_RENDER); + *batch++ = MI_BATCH_BUFFER_END; + if ((batch - buf) & 1) + *batch++ = 0; - drm_intel_bo_unreference(bo); + if (drm_intel_bo_subdata(bo, 0, (char *)batch - (char *)buf, buf) == 0) + drm_intel_bo_mrb_exec(bo, (char *)batch - (char *)buf, + NULL, 0, 0, + I915_EXEC_RENDER); - /* Check whether the value got written. */ - bool success = false; + drm_intel_bo_unreference(bo); + } + + /* Check whether the values got written. */ if (drm_intel_bo_map(screen->workaround_bo, false) == 0) { data = screen->workaround_bo->virtual; - success = data[offset] == expected_value; + for (i = 0; i < count; i++) + *r[i].result = data[offset+i] == r[i].expected_value; drm_intel_bo_unmap(screen->workaround_bo); } - - return success; } static bool -intel_detect_pipelined_so(struct intel_screen *screen) +intel_detect_pipelined_so(struct intel_screen *screen, + struct detect_pipelined_register *detect) { - /* Supposedly, Broadwell just works. */ - if (screen->devinfo->gen >= 8) - return true; - + screen->hw_has_pipelined_so = false; if (screen->devinfo->gen <= 6) - return false; + return 0; + + /* Supposedly, Broadwell just works. */ + if (screen->devinfo->gen >= 8) { + screen->hw_has_pipelined_so = true; + return 0; + } /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the * statistics registers), and we already reset it to zero before using it. */ - return intel_detect_pipelined_register(screen, - GEN7_SO_WRITE_OFFSET(0), - 0x1337d0d0); + detect->reg = GEN7_SO_WRITE_OFFSET(0); + detect->expected_value = 0x1337d0d0; + detect->result = &screen->hw_has_pipelined_so; + return 1; } -static bool -intel_detect_pipelined_oacontrol(struct intel_screen *screen) +static int +intel_detect_pipelined_oacontrol(struct intel_screen *screen, + struct detect_pipelined_register *reg) { + screen->hw_has_pipelined_oacontrol = false; if (screen->devinfo->gen < 6 || screen->devinfo->gen >= 8) - return false; + return 0; /* Set "Select Context ID" to a particular address (which is likely not a * context), but leave all counting disabled. This should be harmless. */ - return intel_detect_pipelined_register(screen, OACONTROL, 0x31337000); + reg->reg = OACONTROL; + reg->expected_value = 0x31337000; + reg->result = &screen->hw_has_pipelined_oacontrol; + return 1; +} + +static void +intel_detect_pipelined_register_access(struct intel_screen *screen) +{ + struct detect_pipelined_register regs[2], *r =regs; + + /* Combine the multiple register access validation into a single + * round trip through the kernel + GPU. + */ + r += intel_detect_pipelined_so(screen, r); + r += intel_detect_pipelined_oacontrol(screen, r); + + __intel_detect_pipelined_registers(screen, regs, r-regs); } /** @@ -1525,9 +1567,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen); - intelScreen->hw_has_pipelined_so = intel_detect_pipelined_so(intelScreen); - intelScreen->hw_has_pipelined_oacontrol = - intel_detect_pipelined_oacontrol(intelScreen); + + intel_detect_pipelined_register_access(intelScreen); const char *force_msaa = getenv("INTEL_FORCE_MSAA"); if (force_msaa) { -- 2.1.4 _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
