On Tue, Jun 15, 2021 at 05:42:13PM -0700, Matt Roper wrote:
> Because Render Power Gating restricts us to just a single subslice as a
> valid steering target for reads of multicast registers in a SUBSLICE
> range, the default steering we setup at init may not lead to a suitable
> target for L3BANK multicast register.  In cases where it does not, use
> explicit runtime steering whenever an L3BANK multicast register is read.
> 
> While we're at it, let's simplify the function a little bit and drop its
> support for gen10/CNL since no such platforms ever materialized for real
> use.  Multicast register steering is already an area that causes enough
> confusion; no need to complicate it with what's effectively dead code.
> 
> v2:
>  - Use gt->uncore instead of gt->i915->uncore.  (Tvrtko)
>  - Use {} as table terminator.  (Rodrigo)
> 
> Cc: Tvrtko Ursulin <[email protected]>
> Cc: Rodrigo Vivi <[email protected]>
> Signed-off-by: Matt Roper <[email protected]>

Reviewed-by: Rodrigo Vivi <[email protected]>

> ---
>  drivers/gpu/drm/i915/gt/intel_gt.c          | 18 +++++
>  drivers/gpu/drm/i915/gt/intel_gt_types.h    |  4 +
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 84 ++++++---------------
>  3 files changed, 46 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> b/drivers/gpu/drm/i915/gt/intel_gt.c
> index 66299105da66..25a3ecf9892a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> @@ -83,6 +83,11 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct 
> i915_ggtt *ggtt)
>       gt->ggtt = ggtt;
>  }
>  
> +static const struct intel_mmio_range icl_l3bank_steering_table[] = {
> +     { 0x00B100, 0x00B3FF },
> +     {},
> +};
> +
>  int intel_gt_init_mmio(struct intel_gt *gt)
>  {
>       intel_gt_init_clock_frequency(gt);
> @@ -90,6 +95,13 @@ int intel_gt_init_mmio(struct intel_gt *gt)
>       intel_uc_init_mmio(&gt->uc);
>       intel_sseu_info_init(gt);
>  
> +     if (GRAPHICS_VER(gt->i915) >= 11) {
> +             gt->steering_table[L3BANK] = icl_l3bank_steering_table;
> +             gt->info.l3bank_mask =
> +                     intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) &
> +                     GEN10_L3BANK_MASK;
> +     }
> +
>       return intel_engines_init_mmio(gt);
>  }
>  
> @@ -744,6 +756,12 @@ static void intel_gt_get_valid_steering(struct intel_gt 
> *gt,
>                                       u8 *sliceid, u8 *subsliceid)
>  {
>       switch (type) {
> +     case L3BANK:
> +             GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be 
> impossible! */
> +
> +             *sliceid = __ffs(gt->info.l3bank_mask);
> +             *subsliceid = 0;        /* unused */
> +             break;
>       default:
>               MISSING_CASE(type);
>               *sliceid = 0;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> index f2c274eee1e6..80dc131e862f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> @@ -48,6 +48,8 @@ struct intel_mmio_range {
>   * need to explicitly re-steer reads of registers of the other type.
>   */
>  enum intel_steering_type {
> +     L3BANK,
> +
>       NUM_STEERING_TYPES
>  };
>  
> @@ -174,6 +176,8 @@ struct intel_gt {
>               /* Media engine access to SFC per instance */
>               u8 vdbox_sfc_access;
>  
> +             u32 l3bank_mask;
> +
>               /* Slice/subslice/EU info */
>               struct sseu_dev_info sseu;
>       } info;
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 93c74d4cae02..d9a5a445ceec 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -945,71 +945,37 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, 
> struct i915_wa_list *wal)
>  }
>  
>  static void
> -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
> +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
>  {
>       const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
>       unsigned int slice, subslice;
> -     u32 l3_en, mcr, mcr_mask;
> +     u32 mcr, mcr_mask;
>  
> -     GEM_BUG_ON(GRAPHICS_VER(i915) < 10);
> +     GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
> +     GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
> +     slice = 0;
>  
>       /*
> -      * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
> -      * L3Banks could be fused off in single slice scenario. If that is
> -      * the case, we might need to program MCR select to a valid L3Bank
> -      * by default, to make sure we correctly read certain registers
> -      * later on (in the range 0xB100 - 0xB3FF).
> -      *
> -      * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
> -      * Before any MMIO read into slice/subslice specific registers, MCR
> -      * packet control register needs to be programmed to point to any
> -      * enabled s/ss pair. Otherwise, incorrect values will be returned.
> -      * This means each subsequent MMIO read will be forwarded to an
> -      * specific s/ss combination, but this is OK since these registers
> -      * are consistent across s/ss in almost all cases. In the rare
> -      * occasions, such as INSTDONE, where this value is dependent
> -      * on s/ss combo, the read should be done with read_subslice_reg.
> -      *
> -      * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
> -      * to which subslice, or to which L3 bank, the respective mmio reads
> -      * will go, we have to find a common index which works for both
> -      * accesses.
> -      *
> -      * Case where we cannot find a common index fortunately should not
> -      * happen in production hardware, so we only emit a warning instead of
> -      * implementing something more complex that requires checking the range
> -      * of every MMIO read.
> +      * Although a platform may have subslices, we need to always steer
> +      * reads to the lowest instance that isn't fused off.  When Render
> +      * Power Gating is enabled, grabbing forcewake will only power up a
> +      * single subslice (the "minconfig") if there isn't a real workload
> +      * that needs to be run; this means that if we steer register reads to
> +      * one of the higher subslices, we run the risk of reading back 0's or
> +      * random garbage.
>        */
> +     subslice = __ffs(intel_sseu_get_subslices(sseu, slice));
>  
> -     if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
> -             u32 l3_fuse =
> -                     intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
> -                     GEN10_L3BANK_MASK;
> -
> -             drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse);
> -             l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
> -     } else {
> -             l3_en = ~0;
> -     }
> +     /*
> +      * If the subslice we picked above also steers us to a valid L3 bank,
> +      * then we can just rely on the default steering and won't need to
> +      * worry about explicitly re-steering L3BANK reads later.
> +      */
> +     if (i915->gt.info.l3bank_mask & BIT(subslice))
> +             i915->gt.steering_table[L3BANK] = NULL;
>  
> -     slice = fls(sseu->slice_mask) - 1;
> -     subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
> -     if (!subslice) {
> -             drm_warn(&i915->drm,
> -                      "No common index found between subslice mask %x and L3 
> bank mask %x!\n",
> -                      intel_sseu_get_subslices(sseu, slice), l3_en);
> -             subslice = fls(l3_en);
> -             drm_WARN_ON(&i915->drm, !subslice);
> -     }
> -     subslice--;
> -
> -     if (GRAPHICS_VER(i915) >= 11) {
> -             mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
> -             mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
> -     } else {
> -             mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
> -             mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
> -     }
> +     mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
> +     mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
>  
>       drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
>  
> @@ -1019,8 +985,6 @@ wa_init_mcr(struct drm_i915_private *i915, struct 
> i915_wa_list *wal)
>  static void
>  cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list 
> *wal)
>  {
> -     wa_init_mcr(i915, wal);
> -
>       /* WaInPlaceDecompressionHang:cnl */
>       wa_write_or(wal,
>                   GEN9_GAMT_ECO_REG_RW_IA,
> @@ -1030,7 +994,7 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, 
> struct i915_wa_list *wal)
>  static void
>  icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list 
> *wal)
>  {
> -     wa_init_mcr(i915, wal);
> +     icl_wa_init_mcr(i915, wal);
>  
>       /* WaInPlaceDecompressionHang:icl */
>       wa_write_or(wal,
> @@ -1112,7 +1076,7 @@ static void
>  gen12_gt_workarounds_init(struct drm_i915_private *i915,
>                         struct i915_wa_list *wal)
>  {
> -     wa_init_mcr(i915, wal);
> +     icl_wa_init_mcr(i915, wal);
>  
>       /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */
>       wa_14011060649(i915, wal);
> -- 
> 2.25.4
> 
> _______________________________________________
> Intel-gfx mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to