>From 2d81d6435c73e573779e5162755fbae4ab5ff036 Mon Sep 17 00:00:00 2001 From: dann frazier <da...@debian.org> Date: Thu, 16 Apr 2020 12:44:23 -0600 Subject: [PATCH] Add support for new AMD SMCA bank types. Closes: #956919.
--- debian/changelog | 7 + ...-support-for-new-AMD-SMCA-bank-types.patch | 212 ++++++++++++++++++ .../rasdaemon-rename-CPU_NAPLES-cputype.patch | 71 ++++++ debian/patches/series | 2 + 4 files changed, 292 insertions(+) create mode 100644 debian/patches/rasdaemon-add-support-for-new-AMD-SMCA-bank-types.patch create mode 100644 debian/patches/rasdaemon-rename-CPU_NAPLES-cputype.patch diff --git a/debian/changelog b/debian/changelog index 21fb2b1..2f5c217 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +rasdaemon (0.6.5-2) UNRELEASED; urgency=medium + + * Use CPU feature flag instead of family to detect AMD SMCA support. + * Add support for new AMD SMCA bank types. Closes: #956919 + + -- dann frazier <da...@debian.org> Thu, 16 Apr 2020 12:43:23 -0600 + rasdaemon (0.6.5-1) unstable; urgency=medium * Update to latest upstream diff --git a/debian/patches/rasdaemon-add-support-for-new-AMD-SMCA-bank-types.patch b/debian/patches/rasdaemon-add-support-for-new-AMD-SMCA-bank-types.patch new file mode 100644 index 0000000..3fd2ee5 --- /dev/null +++ b/debian/patches/rasdaemon-add-support-for-new-AMD-SMCA-bank-types.patch @@ -0,0 +1,212 @@ +From 8704a85d8dc3483423ec2934fee8132f85f8fdb6 Mon Sep 17 00:00:00 2001 +From: "Brian WoodsGhannam, Yazen" <brian.woods@amd.comyazen.ghan...@amd.com> +Date: Fri, 1 Nov 2019 15:48:14 +0100 +Subject: [PATCH] rasdaemon: add support for new AMD SMCA bank types + +Going forward, the Scalable Machine Check Architecture (SMCA) has some +updated and additional bank types which show up in Zen2. The differing +bank types include: CS_V2, PSP_V2, SMU_V2, MP5, NBIO, and PCIE. The V2 +bank types replace the original bank types but have unique HWID/MCAtype +IDs from the originals so there's no conflicts between different +versions or other bank types. All of the differing bank types have new +MCE descriptions which have been added as well. + +CC: "mchehab+sams...@kernel.org" <mchehab+sams...@kernel.org>, "Namburu, Chandu-babu" <cha...@amd.com> # Thread-Topic: [PATCH 2/2] rasdaemon: add support for new AMD SMCA bank types +Signed-off-by: Brian Woods <brian.wo...@amd.com> +Signed-off-by: Yazen Ghannam <yazen.ghan...@amd.com> +Cc: Chandu-babu Namburu <cha...@amd.com> +Signed-off-by: Mauro Carvalho Chehab <mchehab+hua...@kernel.org> +--- + mce-amd-smca.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 112 insertions(+) + +diff --git a/mce-amd-smca.c b/mce-amd-smca.c +index 6c3e8a5..114e786 100644 +--- a/mce-amd-smca.c ++++ b/mce-amd-smca.c +@@ -49,11 +49,17 @@ enum smca_bank_types { + SMCA_FP, /* Floating Point */ + SMCA_L3_CACHE, /* L3 Cache */ + SMCA_CS, /* Coherent Slave */ ++ SMCA_CS_V2, /* Coherent Slave V2 */ + SMCA_PIE, /* Power, Interrupts, etc. */ + SMCA_UMC, /* Unified Memory Controller */ + SMCA_PB, /* Parameter Block */ + SMCA_PSP, /* Platform Security Processor */ ++ SMCA_PSP_V2, /* Platform Security Processor V2 */ + SMCA_SMU, /* System Management Unit */ ++ SMCA_SMU_V2, /* System Management Unit V2 */ ++ SMCA_MP5, /* Microprocessor 5 Unit */ ++ SMCA_NBIO, /* Northbridge IO Unit */ ++ SMCA_PCIE, /* PCI Express Unit */ + N_SMCA_BANK_TYPES + }; + +@@ -165,6 +171,23 @@ static const char * const smca_cs_mce_desc[] = { + "Atomic request parity", + "ECC error on probe filter access", + }; ++/* Coherent Slave Unit V2 */ ++static const char * const smca_cs2_mce_desc[] = { ++ "Illegal Request", ++ "Address Violation", ++ "Security Violation", ++ "Illegal Response", ++ "Unexpected Response", ++ "Request or Probe Parity Error", ++ "Read Response Parity Error", ++ "Atomic Request Parity Error", ++ "SDP read response had no match in the CS queue", ++ "Probe Filter Protocol Error", ++ "Probe Filter ECC Error", ++ "SDP read response had an unexpected RETRY error", ++ "Counter overflow error", ++ "Counter underflow error", ++}; + /* Power, Interrupt, etc.. */ + static const char * const smca_pie_mce_desc[] = { + "HW assert", +@@ -189,10 +212,75 @@ static const char * const smca_pb_mce_desc[] = { + static const char * const smca_psp_mce_desc[] = { + "PSP RAM ECC or parity error", + }; ++/* Platform Security Processor V2 */ ++static const char * const smca_psp2_mce_desc[] = { ++ "High SRAM ECC or parity error", ++ "Low SRAM ECC or parity error", ++ "Instruction Cache Bank 0 ECC or parity error", ++ "Instruction Cache Bank 1 ECC or parity error", ++ "Instruction Tag Ram 0 parity error", ++ "Instruction Tag Ram 1 parity error", ++ "Data Cache Bank 0 ECC or parity error", ++ "Data Cache Bank 1 ECC or parity error", ++ "Data Cache Bank 2 ECC or parity error", ++ "Data Cache Bank 3 ECC or parity error", ++ "Data Tag Bank 0 parity error", ++ "Data Tag Bank 1 parity error", ++ "Data Tag Bank 2 parity error", ++ "Data Tag Bank 3 parity error", ++ "Dirty Data Ram parity error", ++ "TLB Bank 0 parity error", ++ "TLB Bank 1 parity error", ++ "System Hub Read Buffer ECC or parity error", ++}; + /* System Management Unit */ + static const char * const smca_smu_mce_desc[] = { + "SMU RAM ECC or parity error", + }; ++/* System Management Unit V2 */ ++static const char * const smca_smu2_mce_desc[] = { ++ "High SRAM ECC or parity error", ++ "Low SRAM ECC or parity error", ++ "Data Cache Bank A ECC or parity error", ++ "Data Cache Bank B ECC or parity error", ++ "Data Tag Cache Bank A ECC or parity error", ++ "Data Tag Cache Bank B ECC or parity error", ++ "Instruction Cache Bank A ECC or parity error", ++ "Instruction Cache Bank B ECC or parity error", ++ "Instruction Tag Cache Bank A ECC or parity error", ++ "Instruction Tag Cache Bank B ECC or parity error", ++ "System Hub Read Buffer ECC or parity error", ++}; ++/* Microprocessor 5 Unit */ ++static const char * const smca_mp5_mce_desc[] = { ++ "High SRAM ECC or parity error", ++ "Low SRAM ECC or parity error", ++ "Data Cache Bank A ECC or parity error", ++ "Data Cache Bank B ECC or parity error", ++ "Data Tag Cache Bank A ECC or parity error", ++ "Data Tag Cache Bank B ECC or parity error", ++ "Instruction Cache Bank A ECC or parity error", ++ "Instruction Cache Bank B ECC or parity error", ++ "Instruction Tag Cache Bank A ECC or parity error", ++ "Instruction Tag Cache Bank B ECC or parity error", ++}; ++/* Northbridge IO Unit */ ++static const char * const smca_nbio_mce_desc[] = { ++ "ECC or Parity error", ++ "PCIE error", ++ "SDP ErrEvent error", ++ "SDP Egress Poison Error", ++ "IOHC Internal Poison Error", ++}; ++/* PCI Express Unit */ ++static const char * const smca_pcie_mce_desc[] = { ++ "CCIX PER Message logging", ++ "CCIX Read Response with Status: Non-Data Error", ++ "CCIX Write Response with Status: Non-Data Error", ++ "CCIX Read Response with Status: Data Error", ++ "CCIX Non-okay write response with data error", ++}; ++ + + struct smca_mce_desc { + const char * const *descs; +@@ -208,11 +296,17 @@ static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, + [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, + [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, ++ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, ++ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)}, + [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, ++ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)}, ++ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, ++ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)}, ++ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)}, + }; + + struct smca_hwid { +@@ -235,6 +329,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* Data Fabric MCA types */ + { SMCA_CS, 0x0000002E }, ++ { SMCA_CS_V2, 0x0002002E }, + { SMCA_PIE, 0x0001002E }, + + /* Unified Memory Controller MCA type */ +@@ -245,9 +340,20 @@ static struct smca_hwid smca_hwid_mcatypes[] = { + + /* Platform Security Processor MCA type */ + { SMCA_PSP, 0x000000FF }, ++ { SMCA_PSP_V2, 0x000100FF }, + + /* System Management Unit MCA type */ + { SMCA_SMU, 0x00000001 }, ++ { SMCA_SMU_V2, 0x00010001 }, ++ ++ /* Microprocessor 5 Unit MCA type */ ++ { SMCA_MP5, 0x00020001 }, ++ ++ /* Northbridge IO Unit MCA type */ ++ { SMCA_NBIO, 0x00000018 }, ++ ++ /* PCI Express Unit MCA type */ ++ { SMCA_PCIE, 0x00000046 }, + }; + + struct smca_bank_name { +@@ -264,11 +370,17 @@ static struct smca_bank_name smca_names[] = { + [SMCA_FP] = { "Floating Point Unit" }, + [SMCA_L3_CACHE] = { "L3 Cache" }, + [SMCA_CS] = { "Coherent Slave" }, ++ [SMCA_CS_V2] = { "Coherent Slave" }, + [SMCA_PIE] = { "Power, Interrupts, etc." }, + [SMCA_UMC] = { "Unified Memory Controller" }, + [SMCA_PB] = { "Parameter Block" }, + [SMCA_PSP] = { "Platform Security Processor" }, ++ [SMCA_PSP_V2] = { "Platform Security Processor" }, + [SMCA_SMU] = { "System Management Unit" }, ++ [SMCA_SMU_V2] = { "System Management Unit" }, ++ [SMCA_MP5] = { "Microprocessor 5 Unit" }, ++ [SMCA_NBIO] = { "Northbridge IO Unit" }, ++ [SMCA_PCIE] = { "PCI Express Unit" }, + }; + + static void amd_decode_errcode(struct mce_event *e) +-- +2.26.0 + diff --git a/debian/patches/rasdaemon-rename-CPU_NAPLES-cputype.patch b/debian/patches/rasdaemon-rename-CPU_NAPLES-cputype.patch new file mode 100644 index 0000000..b1f620b --- /dev/null +++ b/debian/patches/rasdaemon-rename-CPU_NAPLES-cputype.patch @@ -0,0 +1,71 @@ +From 2a1d217660351c08eb2f8bccebf939abba2f7e69 Mon Sep 17 00:00:00 2001 +From: "Brian WoodsGhannam, Yazen" <brian.woods@amd.comyazen.ghan...@amd.com> +Date: Fri, 1 Nov 2019 15:48:13 +0100 +Subject: [PATCH] rasdaemon: rename CPU_NAPLES cputype + +Change CPU_NAPLES to CPU_AMD_SMCA to reflect that it isn't just NAPLES +that is supported, but AMD's Scalable Machine Check Architecture (SMCA). + + [ Yazen: change family check to feature check, and change CPU name. ] + +CC: "mchehab+sams...@kernel.org" <mchehab+sams...@kernel.org>, "Namburu, Chandu-babu" <cha...@amd.com> # Thread-Topic: [PATCH 1/2] rasdaemon: rename CPU_NAPLES cputype +Signed-off-by: Brian Woods <brian.wo...@amd.com> +Signed-off-by: Yazen Ghannam <yazen.ghan...@amd.com> +Cc: Chandu-babu Namburu <cha...@amd.com> +Signed-off-by: Mauro Carvalho Chehab <mchehab+hua...@kernel.org> +--- + ras-mce-handler.c | 10 ++++++---- + ras-mce-handler.h | 2 +- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/ras-mce-handler.c b/ras-mce-handler.c +index fd3ef3b..016acae 100644 +--- a/ras-mce-handler.c ++++ b/ras-mce-handler.c +@@ -55,7 +55,7 @@ static char *cputype_name[] = { + [CPU_KNIGHTS_LANDING] = "Knights Landing", + [CPU_KNIGHTS_MILL] = "Knights Mill", + [CPU_SKYLAKE_XEON] = "Skylake server", +- [CPU_NAPLES] = "AMD Family 17h Zen1", ++ [CPU_AMD_SMCA] = "AMD Scalable MCA", + [CPU_DHYANA] = "Hygon Family 18h Moksha" + }; + +@@ -192,8 +192,10 @@ static int detect_cpu(struct ras_events *ras) + if (!strcmp(mce->vendor, "AuthenticAMD")) { + if (mce->family == 15) + mce->cputype = CPU_K8; +- if (mce->family == 23) +- mce->cputype = CPU_NAPLES; ++ if (strstr(mce->processor_flags, "smca")) { ++ mce->cputype = CPU_AMD_SMCA; ++ goto ret; ++ } + if (mce->family > 23) { + log(ALL, LOG_INFO, + "Can't parse MCE for this AMD CPU yet %d\n", +@@ -441,7 +443,7 @@ int ras_mce_event_handler(struct trace_seq *s, + case CPU_K8: + rc = parse_amd_k8_event(ras, &e); + break; +- case CPU_NAPLES: ++ case CPU_AMD_SMCA: + case CPU_DHYANA: + rc = parse_amd_smca_event(ras, &e); + break; +diff --git a/ras-mce-handler.h b/ras-mce-handler.h +index 4d615b4..ec9a076 100644 +--- a/ras-mce-handler.h ++++ b/ras-mce-handler.h +@@ -47,7 +47,7 @@ enum cputype { + CPU_KNIGHTS_LANDING, + CPU_KNIGHTS_MILL, + CPU_SKYLAKE_XEON, +- CPU_NAPLES, ++ CPU_AMD_SMCA, + CPU_DHYANA, + }; + +-- +2.26.0 + diff --git a/debian/patches/series b/debian/patches/series index 112b296..6e02bee 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -2,3 +2,5 @@ mv-manpage.patch systemd-targets.patch add-man-options.patch systemd-requires.patch +rasdaemon-rename-CPU_NAPLES-cputype.patch +rasdaemon-add-support-for-new-AMD-SMCA-bank-types.patch -- 2.26.1