[Public]

AFAIK, that expression is legal (some code analyzer may warn on value of 
4*max_wgp_per_sh); similar kind is used in rotate shift operations.

Thanks,
Lijo

-----Original Message-----
From: Quan, Evan <[email protected]> 
Sent: Tuesday, June 22, 2021 7:56 AM
To: Lazar, Lijo <[email protected]>; [email protected]
Cc: Deucher, Alexander <[email protected]>
Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting

[AMD Official Use Only]

Thanks Lijo.
However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a 
valid expression since it kind of triggers some overflow.
Can that work for non-x86 platform or even work reliably for x86 platform?

BR
Evan
> -----Original Message-----
> From: Lazar, Lijo <[email protected]>
> Sent: Monday, June 21, 2021 8:08 PM
> To: Quan, Evan <[email protected]>; [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> One minor comment below, apart from that the series looks good.
> 
> Reviewed-by: Lijo Lazar <[email protected]>
> 
> On 6/21/2021 12:30 PM, Evan Quan wrote:
> > Add missing settings for SQC bits. And correct some confusing logics 
> > around active wgp bitmap calculation.
> >
> > Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
> > Signed-off-by: Evan Quan <[email protected]>
> > ---
> > V1->V2:
> >    - restore correct tcp_harvest setting for NV10 and NV12
> >    - move asic type guard upper layer for better readability
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++------
> -----
> >   1 file changed, 57 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 15ae9e33b925..384b95fbad8b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct
> amdgpu_device *adev)
> >             4 + /* RMI */
> >             1); /* SQG */
> >
> > -   if (adev->asic_type == CHIP_NAVI10 ||
> > -       adev->asic_type == CHIP_NAVI14 ||
> > -       adev->asic_type == CHIP_NAVI12) {
> > -           mutex_lock(&adev->grbm_idx_mutex);
> > -           for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > -                   for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > -                           gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > -                           wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > -                           /*
> > -                            * Set corresponding TCP bits for the inactive
> WGPs in
> > -                            * GCRD_SA_TARGETS_DISABLE
> > -                            */
> > -                           gcrd_targets_disable_tcp = 0;
> > -                           /* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > -                           utcl_invreq_disable = 0;
> > -
> > -                           for (k = 0; k < max_wgp_per_sh; k++) {
> > -                                   if (!(wgp_active_bitmap & (1 << k))) {
> > -                                           gcrd_targets_disable_tcp |=
> 3 << (2 * k);
> > -                                           utcl_invreq_disable |= (3 <<
> (2 * k)) |
> > -                                                   (3 << (2 *
> (max_wgp_per_sh + k)));
> > -                                   }
> > +   mutex_lock(&adev->grbm_idx_mutex);
> > +   for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > +           for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > +                   gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > +                   wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > +                   /*
> > +                    * Set corresponding TCP bits for the inactive WGPs in
> > +                    * GCRD_SA_TARGETS_DISABLE
> > +                    */
> > +                   gcrd_targets_disable_tcp = 0;
> > +                   /* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > +                   utcl_invreq_disable = 0;
> > +
> > +                   for (k = 0; k < max_wgp_per_sh; k++) {
> > +                           if (!(wgp_active_bitmap & (1 << k))) {
> > +                                   gcrd_targets_disable_tcp |= 3 << (2 *
> k);
> > +                                   gcrd_targets_disable_tcp |= 1 << (k +
> (max_wgp_per_sh * 2));
> > +                                   utcl_invreq_disable |= (3 << (2 * k)) |
> > +                                           (3 << (2 * (max_wgp_per_sh
> + k)));
> >                             }
> > -
> > -                           tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > -                           /* only override TCP & SQC bits */
> > -                           tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> > -                           tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > -                           WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > -
> > -                           tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > -                           /* only override TCP bits */
> > -                           tmp &= 0xffffffff << (2 * max_wgp_per_sh);
> > -                           tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > -                           WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> >                     }
> > -           }
> >
> > -           gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 
> > 0xffffffff);
> > -           mutex_unlock(&adev->grbm_idx_mutex);
> > +                   tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > +                   /* only override TCP & SQC bits */
> > +                   if (adev->asic_type == CHIP_NAVI14)
> > +                           tmp &= 0xff000000;
> > +                   else
> > +                           tmp &=0xfff00000;
> 
> For the disable field mask calculation (which is the value that is 
> applied finally), there is no ASIC check. The above code may utilize 
> the same method as in the original code without ASIC check.
> 
> tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> 
> Same for below case also - 3*max_wgp_per_sh.
> 
> Thanks,
> Lijo
> 
> > +                   tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > +                   WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > +
> > +                   tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > +                   /* only override TCP & SQC bits */
> > +                   if (adev->asic_type == CHIP_NAVI14)
> > +                           tmp &= 0xfffc0000;
> > +                   else
> > +                           tmp &= 0xffff8000;
> > +                   tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > +                   WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> > +           }
> >     }
> > +
> > +   gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > +   mutex_unlock(&adev->grbm_idx_mutex);
> >   }
> >
> >   static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@
> > -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
> >      * init golden registers and rlc resume may override some registers,
> >      * reconfig them here
> >      */
> > -   gfx_v10_0_tcp_harvest(adev);
> > +   if (adev->asic_type == CHIP_NAVI10 ||
> > +       adev->asic_type == CHIP_NAVI14 ||
> > +       adev->asic_type == CHIP_NAVI12)
> > +           gfx_v10_0_tcp_harvest(adev);
> >
> >     r = gfx_v10_0_cp_resume(adev);
> >     if (r)
> > @@ -9328,17 +9334,22 @@ static void
> > gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
> *
> >
> >   static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct
> amdgpu_device *adev)
> >   {
> > -   u32 data, wgp_bitmask;
> > -   data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> > -   data |= RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +   u32 disabled_mask =
> > +           ~amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +   u32 efuse_setting = 0;
> > +   u32 vbios_setting = 0;
> > +
> > +   efuse_setting = RREG32_SOC15(GC, 0,
> mmCC_GC_SHADER_ARRAY_CONFIG);
> > +   efuse_setting &=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +   efuse_setting >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -   data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > -   data >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> > +   vbios_setting = RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +   vbios_setting &=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +   vbios_setting >>=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -   wgp_bitmask =
> > -           amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +   disabled_mask |= efuse_setting | vbios_setting;
> >
> > -   return (~data) & wgp_bitmask;
> > +   return (~disabled_mask);
> >   }
> >
> >   static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct
> > amdgpu_device *adev)
> >
_______________________________________________
amd-gfx mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to