https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/146515
>From 7857d5536c4145bc3b2c797222b25f3533db518f Mon Sep 17 00:00:00 2001 From: Ricardo Jesus <r...@nvidia.com> Date: Wed, 2 Apr 2025 10:37:59 -0700 Subject: [PATCH 1/3] [AArch64] Add support for -mcpu=gb10. This patch adds support for -mcpu=gb10 (NVIDIA GB10). This is a big.LITTLE cluster of Cortex-X925 and Cortex-A725 cores. The appropriate MIDR numbers are added to detect them in -mcpu=native. We did not add an -mcpu=cortex-x925.cortex-a725 option because GB10 does include the crypto instructions which we want on by default, and the current convention is to not enable such extensions for Arm Cortex cores in -mcpu where they are optional in the IP. --- .../print-enabled-extensions/aarch64-gb10.c | 69 +++++++++++++++++++ .../Misc/target-invalid-cpu-note/aarch64.c | 1 + llvm/lib/Target/AArch64/AArch64Processors.td | 3 + llvm/lib/TargetParser/Host.cpp | 21 +++++- llvm/unittests/TargetParser/Host.cpp | 8 +++ .../TargetParser/TargetParserTest.cpp | 3 +- 6 files changed, 102 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/print-enabled-extensions/aarch64-gb10.c diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c b/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c new file mode 100644 index 0000000000000..589f7e3e5ee4e --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c @@ -0,0 +1,69 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=gb10 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing +// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension +// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_SVE_SHA3 Enable SVE SHA3 instructions +// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState +// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction diff --git a/clang/test/Misc/target-invalid-cpu-note/aarch64.c b/clang/test/Misc/target-invalid-cpu-note/aarch64.c index 0a5c485e896be..0346ab2bb6b13 100644 --- a/clang/test/Misc/target-invalid-cpu-note/aarch64.c +++ b/clang/test/Misc/target-invalid-cpu-note/aarch64.c @@ -75,6 +75,7 @@ // CHECK-SAME: {{^}}, exynos-m5 // CHECK-SAME: {{^}}, falkor // CHECK-SAME: {{^}}, fujitsu-monaka +// CHECK-SAME: {{^}}, gb10 // CHECK-SAME: {{^}}, generic // CHECK-SAME: {{^}}, grace // CHECK-SAME: {{^}}, kryo diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index dcccde4a4d666..4484951821337 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -1117,6 +1117,7 @@ def ProcessorFeatures { FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8, FeatureSSBS, FeatureCCIDX, FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM]; + list<SubtargetFeature> GB10 = !listconcat(X925, [FeatureSVEAES, FeatureSVESHA3, FeatureSVE2SM4]); list<SubtargetFeature> Grace = !listconcat(NeoverseV2, [FeatureSVE2SM4, FeatureSVEAES, FeatureSVESHA3]); // ETE and TRBE are future architecture extensions. We temporarily enable them @@ -1203,6 +1204,8 @@ def : ProcessorModel<"cortex-x4", NeoverseV2Model, ProcessorFeatures.X4, [TuneX4]>; def : ProcessorModel<"cortex-x925", NeoverseV2Model, ProcessorFeatures.X925, [TuneX925]>; +def : ProcessorModel<"gb10", NeoverseV2Model, ProcessorFeatures.GB10, + [TuneX925]>; def : ProcessorModel<"grace", NeoverseV2Model, ProcessorFeatures.Grace, [TuneNeoverseV2]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 5957e1befe2da..97f6de4be8cb2 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -179,22 +179,39 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { // Look for the CPU implementer line. StringRef Implementer; StringRef Hardware; - StringRef Part; + SmallVector<StringRef, 32> Parts; for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].starts_with("CPU implementer")) Implementer = Lines[I].substr(15).ltrim("\t :"); if (Lines[I].starts_with("Hardware")) Hardware = Lines[I].substr(8).ltrim("\t :"); if (Lines[I].starts_with("CPU part")) - Part = Lines[I].substr(8).ltrim("\t :"); + Parts.emplace_back(Lines[I].substr(8).ltrim("\t :")); } + // Last `Part' seen, in case we don't analyse all `Parts' parsed. + StringRef Part = Parts.empty() ? StringRef() : Parts.back(); + + // Remove duplicate `Parts'. + llvm::sort(Parts); + Parts.erase(llvm::unique(Parts), Parts.end()); + + auto MatchBL = [](auto const &Parts, StringRef Big, StringRef Little) { + if (Parts.size() == 2) + return (Parts[0] == Big && Parts[1] == Little) || + (Parts[1] == Big && Parts[0] == Little); + return false; + }; + if (Implementer == "0x41") { // ARM Ltd. // MSM8992/8994 may give cpu part for the core that the kernel is running on, // which is undeterministic and wrong. Always return cortex-a53 for these SoC. if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996")) return "cortex-a53"; + // Detect big.LITTLE systems. + if (MatchBL(Parts, "0xd85", "0xd87")) + return "gb10"; // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index c13d33e32c6d9..898a716c28d48 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -149,6 +149,14 @@ TEST(getLinuxHostCPUName, AArch64) { EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x61\n" "CPU part : 0x039"), "apple-m2"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd85\n" + "CPU part : 0xd87"), + "gb10"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd87\n" + "CPU part : 0xd85"), + "gb10"); // MSM8992/4 weirdness StringRef MSM8992ProcCpuInfo = R"( diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 97ee8dd1cb67b..1d7ad8ac075f4 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1164,6 +1164,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64CPUTestParams("a64fx", "armv8.2-a"), AArch64CPUTestParams("fujitsu-monaka", "armv9.3-a"), AArch64CPUTestParams("carmel", "armv8.2-a"), + AArch64CPUTestParams("gb10", "armv9.2-a"), AArch64CPUTestParams("grace", "armv9-a"), AArch64CPUTestParams("olympus", "armv9.2-a"), AArch64CPUTestParams("saphira", "armv8.4-a"), @@ -1260,7 +1261,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64CPUAliasTestParams::PrintToStringParamName); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 90; +static constexpr unsigned NumAArch64CPUArchs = 91; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector<StringRef, NumAArch64CPUArchs> List; >From b603412e3f8bba3e97d1fb38cc7cc1ccf945fcde Mon Sep 17 00:00:00 2001 From: Ricardo Jesus <r...@nvidia.com> Date: Tue, 1 Jul 2025 08:50:35 -0700 Subject: [PATCH 2/3] Rename MatchBL to MatchBigLittle. --- llvm/lib/TargetParser/Host.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 97f6de4be8cb2..032a2ce240403 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -176,7 +176,8 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { SmallVector<StringRef, 32> Lines; ProcCpuinfoContent.split(Lines, '\n'); - // Look for the CPU implementer line. + // Look for the CPU implementer and hardware lines, and store the CPU part + // numbers found. StringRef Implementer; StringRef Hardware; SmallVector<StringRef, 32> Parts; @@ -196,7 +197,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { llvm::sort(Parts); Parts.erase(llvm::unique(Parts), Parts.end()); - auto MatchBL = [](auto const &Parts, StringRef Big, StringRef Little) { + auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) { if (Parts.size() == 2) return (Parts[0] == Big && Parts[1] == Little) || (Parts[1] == Big && Parts[0] == Little); @@ -210,7 +211,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { return "cortex-a53"; // Detect big.LITTLE systems. - if (MatchBL(Parts, "0xd85", "0xd87")) + if (MatchBigLittle(Parts, "0xd85", "0xd87")) return "gb10"; // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The >From 990a9d1ddc59b449b60379b0e4f83d139aa0ef14 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus <r...@nvidia.com> Date: Thu, 3 Jul 2025 03:53:31 -0700 Subject: [PATCH 3/3] Pick cortex-x925 in native handling. --- llvm/lib/TargetParser/Host.cpp | 2 +- llvm/unittests/TargetParser/Host.cpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 032a2ce240403..e7c4e19eeb072 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -212,7 +212,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { // Detect big.LITTLE systems. if (MatchBigLittle(Parts, "0xd85", "0xd87")) - return "gb10"; + return "cortex-x925"; // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index 898a716c28d48..0a9ac9bb0596d 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -122,6 +122,14 @@ TEST(getLinuxHostCPUName, AArch64) { EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" "CPU part : 0xd48"), "cortex-x2"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd85\n" + "CPU part : 0xd87"), + "cortex-x925"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" + "CPU part : 0xd87\n" + "CPU part : 0xd85"), + "cortex-x925"); EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n" "CPU part : 0xc00"), "falkor"); @@ -149,14 +157,6 @@ TEST(getLinuxHostCPUName, AArch64) { EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x61\n" "CPU part : 0x039"), "apple-m2"); - EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" - "CPU part : 0xd85\n" - "CPU part : 0xd87"), - "gb10"); - EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n" - "CPU part : 0xd87\n" - "CPU part : 0xd85"), - "gb10"); // MSM8992/4 weirdness StringRef MSM8992ProcCpuInfo = R"( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits