add amdgpu mca debug sysfs support.

Signed-off-by: Yang Wang <[email protected]>
Reviewed-by: Hawking Zhang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c | 116 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |   2 +
 3 files changed, 120 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 9fa88ae81b12..35d592293106 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -210,3 +210,119 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device 
*adev,enum amdgpu_mca_erro
        return -ENOTSUPP;
 }
 
+#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)data;
+       int ret;
+
+       ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false);
+       if (ret)
+               return ret;
+
+       dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? 
"on" : "off");
+
+       return 0;
+}
+
+static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
+{
+       int i, idx = entry->idx;
+
+       seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == 
AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE");
+       seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip);
+       seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x 
mcatype:0x%04x\n",
+                  idx, entry->info.socket_id, entry->info.aid, 
entry->info.hwid, entry->info.mcatype);
+
+       for (i = 0; i < ARRAY_SIZE(entry->regs); i++)
+               seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, i, 
entry->regs[i]);
+}
+
+static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
+       struct mca_bank_entry *entry;
+       uint32_t count = 0;
+       int i, ret;
+
+       ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
+       if (ret)
+               return ret;
+
+       seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
+                  type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", count);
+
+       if (!count)
+               return 0;
+
+       entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       for (i = 0; i < count; i++) {
+               memset(entry, 0, sizeof(*entry));
+
+               ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, entry);
+               if (ret)
+                       goto err_free_entry;
+
+               mca_dump_entry(m, entry);
+       }
+
+err_free_entry:
+       kfree(entry);
+
+       return ret;
+}
+
+static int mca_dump_ce_show(struct seq_file *m, void *unused)
+{
+       return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE);
+}
+
+static int mca_dump_ce_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mca_dump_ce_show, inode->i_private);
+}
+
+static const struct file_operations mca_ce_dump_debug_fops = {
+       .owner = THIS_MODULE,
+       .open = mca_dump_ce_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static int mca_dump_ue_show(struct seq_file *m, void *unused)
+{
+       return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE);
+}
+
+static int mca_dump_ue_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, mca_dump_ue_show, inode->i_private);
+}
+
+static const struct file_operations mca_ue_dump_debug_fops = {
+       .owner = THIS_MODULE,
+       .open = mca_dump_ue_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, 
amdgpu_mca_smu_debug_mode_set, "%llu\n");
+#endif
+
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry 
*root)
+{
+#if defined(CONFIG_DEBUG_FS)
+       if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
+               return;
+
+       debugfs_create_file("mca_debug_mode", 0200, root, adev, 
&mca_debug_mode_fops);
+       debugfs_create_file("mca_ue_dump", 0400, root, adev, 
&mca_ue_dump_debug_fops);
+       debugfs_create_file("mca_ce_dump", 0400, root, adev, 
&mca_ce_dump_debug_fops);
+#endif
+}
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index 6915ae0d5b92..0f8cbfb929dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -110,4 +110,6 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device 
*adev, enum amdgpu_m
 int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum 
amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *count);
 int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev,enum 
amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry);
 
+void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry 
*root);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index eeb695f9ff1c..4e4ba2149595 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1570,6 +1570,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device 
*adev)
                        amdgpu_ras_debugfs_create(adev, &fs_info, dir);
                }
        }
+
+       amdgpu_mca_smu_debugfs_init(adev, dir);
 }
 
 /* debugfs end */
-- 
2.34.1

Reply via email to