Wa_22013059131 sets FORCE_1_SUB_MESSAGE_PER_FRAGMENT in LSC_CHICKEN_BIT_0 at engine init, but this is known to cause GPU hangs in certain workloads. Add I915_CONTEXT_PARAM_WA_22013059131 so userspace that handles the workaround itself (e.g. by limiting SLM size) can set it to 1 to let the kernel know bit 15 programming is not needed for that context.
LSC_CHICKEN_BIT_0 is not context-saved by hardware, so the kernel restores the correct value on every context switch via the indirect context batchbuffer to avoid leaking state between contexts. The old unconditional application of Wa22013059131 in intel_workarounds.c is removed. v8: - Clarify in the uAPI comment that setting this parameter only opts out of LSC_CHICKEN_BIT_0 bit 15 (FORCE_1_SUB_MESSAGE_PER_FRAGMENT); LSC_CHICKEN_BIT_0_UDW MAXREQS_PER_BANK remains unconditionally programmed by the kernel as the other part of Wa_22013059131 v7: - Reject ioctl with -ENODEV on non-DG2-G11 platforms v6: - Remove excessive blank lines v5: - Remove fix and stable v4: - Add a link of the userspace using this API v3: - Kernel-internal context will not change workaround settings Bspec: 54833 Link: https://github.com/intel/compute-runtime/pull/919 Cc: [email protected] Cc: Shuicheng Lin <[email protected]> Cc: Matt Roper <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Rodrigo Vivi <[email protected]> Cc: Maciej Plewka <[email protected]> Cc: Andi Shyti <[email protected]> Signed-off-by: Jia Yao <[email protected]> Reviewed-by: Matt Roper <[email protected]> Reviewed-by: Andi Shyti <[email protected]> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 14 ++++++ .../gpu/drm/i915/gem/i915_gem_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 43 ++++++++++++++++++- drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 ++--- include/uapi/drm/i915_drm.h | 15 +++++++ 6 files changed, 78 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 6ac0f23570f3..048393264ede 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -911,6 +911,17 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, ret = -EINVAL; break; + case I915_CONTEXT_PARAM_WA_22013059131: + if (args->size) + ret = -EINVAL; + else if (!IS_DG2_G11(i915)) + ret = -ENODEV; + else if (args->value) + pc->user_flags |= BIT(UCONTEXT_WA_22013059131); + else + pc->user_flags &= ~BIT(UCONTEXT_WA_22013059131); + break; + case I915_CONTEXT_PARAM_RECOVERABLE: if (args->size) ret = -EINVAL; @@ -1003,6 +1014,9 @@ static int intel_context_set_gem(struct intel_context *ce, if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags)) __set_bit(CONTEXT_LOW_LATENCY, &ce->flags); + if (test_bit(UCONTEXT_WA_22013059131, &ctx->user_flags)) + __set_bit(CONTEXT_WA_22013059131, &ce->flags); + return ret; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 0267c924634b..4efc0e758d3b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -338,6 +338,7 @@ struct i915_gem_context { #define UCONTEXT_RECOVERABLE 3 #define UCONTEXT_PERSISTENCE 4 #define UCONTEXT_LOW_LATENCY 5 +#define UCONTEXT_WA_22013059131 6 /** * @flags: small set of booleans diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 10070ee4d74c..84011ce7c84d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -133,6 +133,7 @@ struct intel_context { #define CONTEXT_EXITING 13 #define CONTEXT_LOW_LATENCY 14 #define CONTEXT_OWN_STATE 15 +#define CONTEXT_WA_22013059131 16 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 147d22907960..13344ebb847e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1348,6 +1348,37 @@ gen12_invalidate_state_cache(u32 *cs) return cs; } +static u32 * +dg2_g11_emit_wa_22013059131(const struct intel_context *ce, u32 *cs) +{ + /* + * While re-writing LSC_CHICKEN_BIT_0 for Wa_22013059131, the + * other bits of the register will also get overwritten. The + * hardware default for all other bits is 0, but any workarounds + * that adjust the other bits in the lower dword of the register + * also need to be re-applied here. At the moment that's just + * Wa_22014226127, which is always set for DG2-G11 platforms. + */ + u32 val = DISABLE_D8_D16_COASLESCE; + + /* + * Wa_22013059131: only set FORCE_1_SUB_MESSAGE_PER_FRAGMENT for + * userspace contexts that have not opted out. Kernel-internal + * contexts (gem_context == NULL) never run shader workloads that + * require this workaround, so skip them unconditionally. + */ + if (rcu_access_pointer(ce->gem_context) && + !test_bit(CONTEXT_WA_22013059131, &ce->flags)) { + val |= FORCE_1_SUB_MESSAGE_PER_FRAGMENT; + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(LSC_CHICKEN_BIT_0); + *cs++ = val; + + return cs; +} + static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { @@ -1371,6 +1402,10 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); + /* Wa_22013059131:dg2 */ + if (IS_DG2_G11(ce->engine->i915)) + cs = dg2_g11_emit_wa_22013059131(ce, cs); + return cs; } @@ -1387,7 +1422,13 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); - return gen12_emit_aux_table_inv(ce->engine, cs); + cs = gen12_emit_aux_table_inv(ce->engine, cs); + + /* Wa_22013059131:dg2 */ + if (IS_DG2_G11(ce->engine->i915)) + cs = dg2_g11_emit_wa_22013059131(ce, cs); + + return cs; } static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 24ea5d8d529c..ef6eea3ab597 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2840,7 +2840,11 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(i915)) { - /* Wa_22014226127 */ + /* + * Wa_22014226127: Note that this workaround also needs to be + * re-applied in intel_lrc.c when LSC_CHICKEN_BIT_0 is + * re-written for Wa_22013059131. + */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } @@ -2867,10 +2871,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li MAXREQS_PER_BANK, REG_FIELD_PREP(MAXREQS_PER_BANK, 2)); - /* Wa_22013059131:dg2 */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, - FORCE_1_SUB_MESSAGE_PER_FRAGMENT); - /* * Wa_22012654132 * diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 535cb68fdb5c..dc7e143536ef 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2172,6 +2172,21 @@ struct drm_i915_gem_context_param { * Note that this is a debug API not available on production kernel builds. */ #define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf + +/* + * I915_CONTEXT_PARAM_WA_22013059131: + * + * Default value 0 means the kernel sets LSC_CHICKEN_BIT_0 bit 15 + * (FORCE_1_SUB_MESSAGE_PER_FRAGMENT) for this context as part of + * Wa_22013059131. Set to 1 to inform the kernel that userspace is + * handling the SLM contention workaround itself (e.g. by limiting SLM + * size), so bit 15 programming is not needed for this context. + * + * Note: LSC_CHICKEN_BIT_0_UDW MAXREQS_PER_BANK (bits 39:37) is the + * other part of Wa_22013059131 and remains unconditionally programmed + * by the kernel regardless of this setting. DG2-G11 only. + */ +#define I915_CONTEXT_PARAM_WA_22013059131 0x10 /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ -- 2.43.0
