On 1/12/2025 19:42, Jiang Liu wrote:
Free all allocated resources on error recovery path in function
amdgpu_ras_init().

Signed-off-by: Jiang Liu <[email protected]>

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 ++++++++++++++-----
  1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index c10ea3fd3e16..6b508a9b1abe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -3864,6 +3864,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct 
amdgpu_device *adev)
  int amdgpu_ras_init(struct amdgpu_device *adev)
  {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       struct amdgpu_ras_block_list *ras_node, *tmp;
        int r;
if (con)
@@ -3953,20 +3954,20 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
         * to handle fatal error */
        r = amdgpu_nbio_ras_sw_init(adev);
        if (r)
-               return r;
+               goto release_con;
if (adev->nbio.ras &&
            adev->nbio.ras->init_ras_controller_interrupt) {
                r = adev->nbio.ras->init_ras_controller_interrupt(adev);
                if (r)
-                       goto release_con;
+                       goto free_blocks;
        }
if (adev->nbio.ras &&
            adev->nbio.ras->init_ras_err_event_athub_interrupt) {
                r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
                if (r)
-                       goto release_con;
+                       goto free_blocks;
        }
/* Packed socket_id to ras feature mask bits[31:29] */
@@ -3982,7 +3983,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (amdgpu_ras_fs_init(adev)) {
                r = -EINVAL;
-               goto release_con;
+               goto free_blocks;

Reviewing this shows there is a mistake in amdgpu_ras_fs_init(). If sysfs fails to init there it still returns 0.

Please modify amdgpu_ras_fs_init() to 'return r' and then use that value. IE:

r = amdgpu_ras_fs_init(adev);
if (r)
        goto free_blocks;

        }
if (amdgpu_ras_aca_is_supported(adev)) {
@@ -3991,7 +3992,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
                else
                        r = amdgpu_mca_init(adev);
                if (r)
-                       goto release_con;
+                       goto clear_ras_fs;
        }
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
@@ -3999,6 +4000,14 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
                 adev->ras_hw_enabled, adev->ras_enabled);
return 0;
+
+clear_ras_fs:
+       amdgpu_ras_fs_fini(adev);
+free_blocks:
+       list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) {
+               list_del(&ras_node->node);
+               kfree(ras_node);
+       }
  release_con:
        amdgpu_ras_set_context(adev, NULL);
        kfree(con);

Reply via email to