From 5af14a0bfe5a4d444108de82a567baa7cd7f4054 Mon Sep 17 00:00:00 2001
From: David Panariti <David.Panariti@amd.com>
Date: Mon, 10 Apr 2017 19:00:13 -0400
Subject: [PATCH] drm/amdgpu: Add kernel parameter to manage memory error
 features.

Currently Carrizo provides error detection and correction (EDC).

Change-Id: Id9287b25a6f51a64064500468e525e6be00b3820
Signed-off-by: David Panariti <David.Panariti@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 18 ++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/vi.c            |  2 ++
 drivers/gpu/drm/amd/include/amd_shared.h   | 14 ++++++++++++++
 6 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b7e7156..d5d2eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -102,6 +102,7 @@ extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
 extern unsigned amdgpu_cg_mask;
 extern unsigned amdgpu_pg_mask;
+extern unsigned amdgpu_ecc_mask;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern unsigned amdgpu_pp_feature_mask;
@@ -1564,6 +1565,7 @@ struct amdgpu_device {
 	struct amdgpu_pm		pm;
 	u32				cg_flags;
 	u32				pg_flags;
+	u32				ecc_flags;
 
 	/* amdgpu smumgr */
 	struct amdgpu_smumgr smu;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8fce309..d1e7950 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1487,6 +1487,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 
 	adev->cg_flags &= amdgpu_cg_mask;
 	adev->pg_flags &= amdgpu_pg_mask;
+	adev->ecc_flags &= amdgpu_ecc_mask;
 
 	return 0;
 }
@@ -3275,6 +3276,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
 	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
 	config[no_regs++] = adev->gfx.config.gb_addr_config;
 	config[no_regs++] = adev->gfx.config.num_rbs;
+	config[no_regs++] = adev->ecc_flags;
 
 	/* rev==1 */
 	config[no_regs++] = adev->rev_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6238e2e..68bf881 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -101,6 +101,7 @@ unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
 unsigned amdgpu_cg_mask = 0xffffffff;
 unsigned amdgpu_pg_mask = 0xffffffff;
+unsigned amdgpu_ecc_mask = 0xffffffff;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
 unsigned amdgpu_pp_feature_mask = 0xffffffff;
@@ -212,6 +213,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
 MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
 module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
 
+MODULE_PARM_DESC(ecc_mask, "ECC/EDC flags mask (0 = disable ECC/EDC)");
+module_param_named(ecc_mask, amdgpu_ecc_mask, uint, 0444);
+
 MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
 module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index df591fb..9b9adbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1664,6 +1664,24 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	if (adev->asic_type != CHIP_CARRIZO)
 		return 0;
 
+	DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): ecc_flags: 0x%08x\n",
+		 adev->ecc_flags);
+
+	/*
+	 * Check if EDC has been requested.
+	 * For Carrizo, EDC is the best/safest mode WRT error handling.
+	 */
+	if (!(adev->ecc_flags
+	      & (AMD_ECC_SUPPORT_BEST | AMD_ECC_SUPPORT_EDC))) {
+		DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): "
+			 "skipping workarounds and not enabling EDC.\n");
+
+		return 0;
+	}
+
+	DRM_INFO("gfx_v8_0_do_edc_gpr_workarounds(): "
+		 "running workarounds and enabling EDC.\n");
+
 	/* bail if the compute ring is not ready */
 	if (!ring->ready)
 		return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index f1c2bff..3469c71 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1081,6 +1081,8 @@ static int vi_common_early_init(void *handle)
 				AMD_PG_SUPPORT_UVD |
 				AMD_PG_SUPPORT_VCE;
 		}
+		adev->ecc_flags = AMD_ECC_SUPPORT_EDC |
+			AMD_ECC_SUPPORT_BEST;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	case CHIP_STONEY:
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 2ccf44e..c4fd013 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -179,6 +179,20 @@ struct amd_pp_profile {
 #define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
 #define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
 
+/*
+ * ECC flags
+ * Allows the user to choose what kind of error detection/correction is used.
+ * Currently, EDC is supported on Carrizo.
+ *
+ * The AMD_ECC_SUPPORT_BEST bit is used to allow a user to have the driver
+ * set what it thinks is best/safest mode.  This may not be the same as the
+ * default, depending on the GPU and the application.
+ * Using a single bit makes it easy to request the best support without
+ * needing to know all currently supported modes.
+ */
+#define AMD_ECC_SUPPORT_BEST			(1 << 0)
+#define AMD_ECC_SUPPORT_EDC			(1 << 1)
+
 enum amd_pm_state_type {
 	/* not used for dpm */
 	POWER_STATE_TYPE_DEFAULT,
-- 
2.7.4

