APU platforms (identified by `adev->gmc.is_app_apu`) do not initialize dedicated
VRAM management structures (`adev->mman.vram_mgr.manager`) because they rely on
system memory instead of discrete VRAM. Accessing this uninitialized structure
via `ttm_resource_manager_usage()` triggers a NULL pointer dereference 
(typically
in `_raw_spin_lock()` when trying to acquire the manager's lock), leading to
kernel OOPS—especially when tools like rocm-smi query VRAM usage or during
power/VM operations.

Fix this by adding explicit APU checks in all code paths that access VRAM
manager structures:

1. **amdgpu_cs.c**: Extend the existing bandwidth control check in
   `amdgpu_cs_get_threshold_for_moves()` to include APU devices. Return 0 for
   migration thresholds immediately, skipping VRAM-specific logic that would
   access uninitialized data.

2. **amdgpu_kms.c**: Modify the `AMDGPU_INFO_VRAM_USAGE` ioctl and memory info
   reporting to return 0 for VRAM usage on APUs. This avoids calling
   `ttm_resource_manager_usage()` with an invalid manager pointer.

3. **amdgpu_virt.c**: Skip VRAM usage calculation for APUs when writing vf2pf
   (virtual function to physical function) data. Use 0 for `fb_usage` since APUs
   have no discrete framebuffer memory to report.

These changes ensure APUs never access uninitialized VRAM manager structures,
resolving the NULL dereference while preserving correct behavior for discrete
GPUs (which retain full VRAM usage tracking).

Signed-off-by: Jesse Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5f515fdcc775..d80414b32015 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -709,7 +709,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct 
amdgpu_device *adev,
         */
        const s64 us_upper_bound = 200000;
 
-       if (!adev->mm_stats.log2_max_MBps) {
+       if ((!adev->mm_stats.log2_max_MBps) || adev->gmc.is_app_apu) {
                *max_bytes = 0;
                *max_vis_bytes = 0;
                return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index a9327472c651..e6bf9f6a2713 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -758,7 +758,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
                ui64 = atomic64_read(&adev->num_vram_cpu_page_faults);
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VRAM_USAGE:
-               ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
+               ui64 = adev->gmc.is_app_apu ? 0 : 
ttm_resource_manager_usage(&adev->mman.vram_mgr.manager);
                return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
        case AMDGPU_INFO_VIS_VRAM_USAGE:
                ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr);
@@ -805,7 +805,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, 
struct drm_file *filp)
                        atomic64_read(&adev->vram_pin_size) -
                        AMDGPU_VM_RESERVED_VRAM;
                mem.vram.heap_usage =
-                       ttm_resource_manager_usage(vram_man);
+                       adev->gmc.is_app_apu ? 0 : 
ttm_resource_manager_usage(vram_man);
                mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
 
                mem.cpu_accessible_vram.total_heap_size =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3328ab63376b..5ff856bef199 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -599,7 +599,7 @@ static int amdgpu_virt_write_vf2pf_data(struct 
amdgpu_device *adev)
        vf2pf_info->os_info.all = 0;
 
        vf2pf_info->fb_usage =
-               ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
+               adev->gmc.is_app_apu ? 0 : 
ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
        vf2pf_info->fb_vis_usage =
                amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
        vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
-- 
2.49.0

Reply via email to