On Tue, Sep 30, 2025 at 11:18:17AM +0530, Akhil P Oommen wrote: > A8x is the next generation of Adreno GPUs, featuring a significant > hardware design change. A major update to the design is the introduction > of Slice architecture. Slices are sort of mini-GPUs within the GPU which > are more independent in processing Graphics and compute workloads. Also, > in addition to the BV and BR pipe we saw in A7x, CP has more concurrency > with additional pipes. > > From a software interface perspective, these changes have a significant > impact on the KMD side. First, the GPU register space has been extensively > reorganized. Second, to avoid a register space explosion caused by the > new slice architecture and additional pipes, many registers are now > virtualized, instead of duplicated as in A7x. KMD must configure an > aperture register with the appropriate slice and pipe ID before accessing > these virtualized registers. > > This patch adds only a skeleton support for the A8x family. An A8x GPU > support will be added in an upcoming patch.
Consider this lands in a commit message. What would it mean in the Git history? > > Signed-off-by: Akhil P Oommen <[email protected]> > --- > drivers/gpu/drm/msm/Makefile | 1 + > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 103 +- > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 21 + > drivers/gpu/drm/msm/adreno/a8xx_gpu.c | 1238 > +++++++++++++++++++++ > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 7 + > drivers/gpu/drm/msm/registers/adreno/a6xx.xml | 1 - > drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml | 1 + > 7 files changed, 1344 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile > index > 7acf2cc13cd047eb7f5b3f14e1a42a1cc145e087..8aa7d07303fb0cd66869767cb6298b38a621b366 > 100644 > --- a/drivers/gpu/drm/msm/Makefile > +++ b/drivers/gpu/drm/msm/Makefile > @@ -24,6 +24,7 @@ adreno-y := \ > adreno/a6xx_gmu.o \ > adreno/a6xx_hfi.o \ > adreno/a6xx_preempt.o \ > + adreno/a8xx_gpu.o \ > > adreno-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o \ > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index > bd4f98b5457356c5454d0316e59d7e8253401712..4aeeaceb1fb30a9d68ac636c14249e3853ef73ac > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -239,14 +239,21 @@ static void a6xx_set_pagetable(struct a6xx_gpu > *a6xx_gpu, > } > > if (!sysprof) { > - if (!adreno_is_a7xx(adreno_gpu)) { > + if (!(adreno_is_a7xx(adreno_gpu) || > adreno_is_a8xx(adreno_gpu))) { Here and in several other similar places: if (!adreno_is_a7xx(adreno_gpu) && !adreno_is_a8xx(adreno_gpu))) { > /* Turn off protected mode to write to special > registers */ > OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); > OUT_RING(ring, 0); > } > > - OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); > - OUT_RING(ring, 1); > + if (adreno_is_a8xx(adreno_gpu)) { > + OUT_PKT4(ring, REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); > + OUT_RING(ring, 1); > + OUT_PKT4(ring, > REG_A8XX_RBBM_SLICE_PERFCTR_SRAM_INIT_CMD, 1); > + OUT_RING(ring, 1); > + } else { > + OUT_PKT4(ring, REG_A6XX_RBBM_PERFCTR_SRAM_INIT_CMD, 1); > + OUT_RING(ring, 1); > + } > } > > /* Execute the table update */ > @@ -275,7 +282,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, > * to make sure BV doesn't race ahead while BR is still switching > * pagetables. > */ > - if (adreno_is_a7xx(&a6xx_gpu->base)) { > + if (adreno_is_a7xx(&a6xx_gpu->base) && adreno_is_a8xx(&a6xx_gpu->base)) > { > OUT_PKT7(ring, CP_THREAD_CONTROL, 1); > OUT_RING(ring, CP_THREAD_CONTROL_0_SYNC_THREADS | > CP_SET_THREAD_BR); > } > @@ -289,20 +296,22 @@ static void a6xx_set_pagetable(struct a6xx_gpu > *a6xx_gpu, > OUT_RING(ring, CACHE_INVALIDATE); > > if (!sysprof) { > + u32 reg_status = adreno_is_a8xx(adreno_gpu) ? > + REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS : > + REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS; > /* > * Wait for SRAM clear after the pgtable update, so the > * two can happen in parallel: > */ > OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); > OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ)); > - OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO( > - REG_A6XX_RBBM_PERFCTR_SRAM_INIT_STATUS)); > + OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_LO(reg_status)); > OUT_RING(ring, CP_WAIT_REG_MEM_POLL_ADDR_HI(0)); > OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(0x1)); > OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(0x1)); > OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0)); > > - if (!adreno_is_a7xx(adreno_gpu)) { > + if (!(adreno_is_a7xx(adreno_gpu) || > adreno_is_a8xx(adreno_gpu))) { > /* Re-enable protected mode: */ > OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); > OUT_RING(ring, 1); > @@ -441,6 +450,7 @@ static void a7xx_submit(struct msm_gpu *gpu, struct > msm_gem_submit *submit) > struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > struct msm_ringbuffer *ring = submit->ring; > unsigned int i, ibs = 0; > + u32 rbbm_perfctr_cp0, cp_always_on_counter; > > adreno_check_and_reenable_stall(adreno_gpu); > > @@ -460,10 +470,16 @@ static void a7xx_submit(struct msm_gpu *gpu, struct > msm_gem_submit *submit) > if (gpu->nr_rings > 1) > a6xx_emit_set_pseudo_reg(ring, a6xx_gpu, submit->queue); > > - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), > - rbmemptr_stats(ring, index, cpcycles_start)); > - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, > - rbmemptr_stats(ring, index, alwayson_start)); > + if (adreno_is_a8xx(adreno_gpu)) { > + rbbm_perfctr_cp0 = REG_A8XX_RBBM_PERFCTR_CP(0); > + cp_always_on_counter = REG_A8XX_CP_ALWAYS_ON_COUNTER; > + } else { > + rbbm_perfctr_cp0 = REG_A7XX_RBBM_PERFCTR_CP(0); > + cp_always_on_counter = REG_A6XX_CP_ALWAYS_ON_COUNTER; > + } > + > + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, > cpcycles_start)); > + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, > index, alwayson_start)); > > OUT_PKT7(ring, CP_THREAD_CONTROL, 1); > OUT_RING(ring, CP_SET_THREAD_BOTH); > @@ -510,10 +526,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct > msm_gem_submit *submit) > OUT_RING(ring, 0x00e); /* IB1LIST end */ > } > > - get_stats_counter(ring, REG_A7XX_RBBM_PERFCTR_CP(0), > - rbmemptr_stats(ring, index, cpcycles_end)); > - get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER, > - rbmemptr_stats(ring, index, alwayson_end)); > + get_stats_counter(ring, rbbm_perfctr_cp0, rbmemptr_stats(ring, index, > cpcycles_end)); > + get_stats_counter(ring, cp_always_on_counter, rbmemptr_stats(ring, > index, alwayson_end)); > > /* Write the fence to the scratch register */ > OUT_PKT4(ring, REG_A6XX_CP_SCRATCH(2), 1); > @@ -706,8 +720,11 @@ static int a6xx_calc_ubwc_config(struct adreno_gpu *gpu) > /* Copy the data into the internal struct to drop the const qualifier > (temporarily) */ > *cfg = *common_cfg; > > - cfg->ubwc_swizzle = 0x6; > - cfg->highest_bank_bit = 15; > + /* Use common config as is for A8x */ > + if (!adreno_is_a8xx(gpu)) { > + cfg->ubwc_swizzle = 0x6; > + cfg->highest_bank_bit = 15; > + } > > if (adreno_is_a610(gpu)) { > cfg->highest_bank_bit = 13; > @@ -818,7 +835,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu) > cfg->macrotile_mode); > } > > -static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) > +void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > @@ -868,7 +885,7 @@ static void a7xx_patch_pwrup_reglist(struct msm_gpu *gpu) > lock->dynamic_list_len = 0; > } > > -static int a7xx_preempt_start(struct msm_gpu *gpu) > +int a7xx_preempt_start(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > @@ -925,7 +942,7 @@ static int a6xx_cp_init(struct msm_gpu *gpu) > return a6xx_idle(gpu, ring) ? 0 : -EINVAL; > } > > -static int a7xx_cp_init(struct msm_gpu *gpu) > +int a7xx_cp_init(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > @@ -993,7 +1010,7 @@ static bool a6xx_ucode_check_version(struct a6xx_gpu > *a6xx_gpu, > return false; > > /* A7xx is safe! */ > - if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu)) > + if (adreno_is_a7xx(adreno_gpu) || adreno_is_a702(adreno_gpu) || > adreno_is_a8xx(adreno_gpu)) > return true; > > /* > @@ -2161,7 +2178,7 @@ void a6xx_bus_clear_pending_transactions(struct > adreno_gpu *adreno_gpu, bool gx_ > void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert) > { > /* 11nm chips (e.g. ones with A610) have hw issues with the reset line! > */ > - if (adreno_is_a610(to_adreno_gpu(gpu))) > + if (adreno_is_a610(to_adreno_gpu(gpu)) || > adreno_is_a8xx(to_adreno_gpu(gpu))) > return; > > gpu_write(gpu, REG_A6XX_RBBM_SW_RESET_CMD, assert); > @@ -2192,7 +2209,12 @@ static int a6xx_gmu_pm_resume(struct msm_gpu *gpu) > > msm_devfreq_resume(gpu); > > - adreno_is_a7xx(adreno_gpu) ? a7xx_llc_activate(a6xx_gpu) : > a6xx_llc_activate(a6xx_gpu); > + if (adreno_is_a8xx(adreno_gpu)) > + a8xx_llc_activate(a6xx_gpu); > + else if (adreno_is_a7xx(adreno_gpu)) > + a7xx_llc_activate(a6xx_gpu); > + else > + a6xx_llc_activate(a6xx_gpu); > > return ret; > } > @@ -2561,10 +2583,8 @@ static struct msm_gpu *a6xx_gpu_init(struct drm_device > *dev) > adreno_gpu->base.hw_apriv = > !!(config->info->quirks & ADRENO_QUIRK_HAS_HW_APRIV); > > - /* gpu->info only gets assigned in adreno_gpu_init() */ > - is_a7xx = config->info->family == ADRENO_7XX_GEN1 || > - config->info->family == ADRENO_7XX_GEN2 || > - config->info->family == ADRENO_7XX_GEN3; > + /* gpu->info only gets assigned in adreno_gpu_init(). A8x is included > intentionally */ > + is_a7xx = config->info->family >= ADRENO_7XX_GEN1; Is A8xx also a part of is_a7xx? What about the A9XX which will come at some point in future? > > a6xx_llc_slices_init(pdev, a6xx_gpu, is_a7xx); > > + > +int a8xx_gpu_feature_probe(struct msm_gpu *gpu) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + u32 fuse_val; > + int ret; > + > + /* > + * Assume that if qcom scm isn't available, that whatever > + * replacement allows writing the fuse register ourselves. > + * Users of alternative firmware need to make sure this > + * register is writeable or indicate that it's not somehow. > + * Print a warning because if you mess this up you're about to > + * crash horribly. > + */ > + if (!qcom_scm_is_available()) { How can it be not available here? > + dev_warn_once(gpu->dev->dev, > + "SCM is not available, poking fuse register\n"); > + a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_SW_FUSE_VALUE, > + A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | > + A7XX_CX_MISC_SW_FUSE_VALUE_FASTBLEND | > + A7XX_CX_MISC_SW_FUSE_VALUE_LPAC); > + adreno_gpu->has_ray_tracing = true; > + return 0; > + } > + > + ret = qcom_scm_gpu_init_regs(QCOM_SCM_GPU_ALWAYS_EN_REQ | > + QCOM_SCM_GPU_TSENSE_EN_REQ); > + if (ret) > + return ret; > + > + /* > + * On a750 raytracing may be disabled by the firmware, find out It's a8xx-related code, why do you have a750 in the comment? > + * whether that's the case. The scm call above sets the fuse > + * register. > + */ > + fuse_val = a6xx_llc_read(a6xx_gpu, > + REG_A7XX_CX_MISC_SW_FUSE_VALUE); > + adreno_gpu->has_ray_tracing = > + !!(fuse_val & A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING); > + > + return 0; > +} > + > + > +#define GBIF_CLIENT_HALT_MASK BIT(0) > +#define GBIF_ARB_HALT_MASK BIT(1) > +#define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) > +#define VBIF_RESET_ACK_MASK 0xF0 > +#define GPR0_GBIF_HALT_REQUEST 0x1E0 > + > +void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool > gx_off) > +{ > + struct msm_gpu *gpu = &adreno_gpu->base; > + > + if (gx_off) { > + /* Halt the gx side of GBIF */ > + gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); > + spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); > + } > + > + /* Halt new client requests on GBIF */ > + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); > + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & > + (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); > + > + /* Halt all AXI requests on GBIF */ > + gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); > + spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & > + (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); > + > + /* The GBIF halt needs to be explicitly cleared */ > + gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); > +} > + > +int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + > + mutex_lock(&a6xx_gpu->gmu.lock); > + > + /* Force the GPU power on so we can read this register */ > + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); > + > + *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); > + > + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); > + > + mutex_unlock(&a6xx_gpu->gmu.lock); > + > + return 0; > +} > + > +u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); > + u64 busy_cycles; > + > + /* 19.2MHz */ > + *out_sample_rate = 19200000; > + > + busy_cycles = gmu_read64(&a6xx_gpu->gmu, > + REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, > + REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); > + > + return busy_cycles; > +} > + > +bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) > +{ > + return true; > +} > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > index > 9831401c3bc865b803c2f9759d5e2ffcd79d19f8..6a2157f31122ba0c2f2a7005c98e3e4f1ada6acc > 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > @@ -90,6 +90,13 @@ struct adreno_reglist { > u32 value; > }; > > +/* Reglist with pipe information */ > +struct adreno_reglist_pipe { > + u32 offset; > + u32 value; > + u32 pipe; > +}; > + > struct adreno_speedbin { > uint16_t fuse; > uint16_t speedbin; > diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml > b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml > index > ddde2e03b748f447b5e57571e2b04c68f8f2efc2..c3a202c8dce65d414c89bf76f1cb458b206b4eca > 100644 > --- a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml > +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml > @@ -4876,7 +4876,6 @@ by a particular renderpass/blit. > <domain name="A6XX_CX_MISC" width="32" prefix="variant" varset="chip"> > <reg32 offset="0x0001" name="SYSTEM_CACHE_CNTL_0"/> > <reg32 offset="0x0002" name="SYSTEM_CACHE_CNTL_1"/> > - <reg32 offset="0x0087" name="SLICE_ENABLE_FINAL" variants="A8XX-"/> Why? > <reg32 offset="0x0039" name="CX_MISC_TCM_RET_CNTL" variants="A7XX-"/> > <reg32 offset="0x0087" name="CX_MISC_SLICE_ENABLE_FINAL" > variants="A8XX"/> > <reg32 offset="0x0400" name="CX_MISC_SW_FUSE_VALUE" variants="A7XX-"> > diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml > b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml > index > 5dce7934056dd6472c368309b4894f0ed4a4d960..c4e00b1263cda65dce89c2f16860e5bf6f1c6244 > 100644 > --- a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml > +++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml > @@ -60,6 +60,7 @@ > xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> > <reg32 offset="0x1f400" name="GMU_ICACHE_CONFIG"/> > <reg32 offset="0x1f401" name="GMU_DCACHE_CONFIG"/> > <reg32 offset="0x1f40f" name="GMU_SYS_BUS_CONFIG"/> > + <reg32 offset="0x1f50b" name="GMU_MRC_GBIF_QOS_CTRL"/> > <reg32 offset="0x1f800" name="GMU_CM3_SYSRESET"/> > <reg32 offset="0x1f801" name="GMU_CM3_BOOT_CONFIG"/> > <reg32 offset="0x1f81a" name="GMU_CM3_FW_BUSY"/> > > -- > 2.51.0 > -- With best wishes Dmitry
