https://github.com/kinoshita-fj updated https://github.com/llvm/llvm-project/pull/118432
>From bd4c80f2c16e1380be077b586cd13e4bf39f762b Mon Sep 17 00:00:00 2001 From: Kinoshita Kotaro <k.kot...@fujitsu.com> Date: Tue, 3 Dec 2024 05:30:33 +0000 Subject: [PATCH 1/2] [AArch64] Add initial support for FUJITSU-MONAKA This patch adds initial support for FUJITSU-MONAKA CPU (-mcpu=fujitsu-monaka). The scheduling model will be corrected in the future. --- clang/test/Driver/aarch64-fujitsu-monaka.c | 13 +++ .../aarch64-fujitsu-monaka.c | 82 +++++++++++++++++++ .../Misc/target-invalid-cpu-note/aarch64.c | 1 + llvm/lib/Target/AArch64/AArch64Processors.td | 18 ++++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 3 + llvm/lib/TargetParser/Host.cpp | 5 +- llvm/test/CodeGen/AArch64/cpus.ll | 1 + llvm/unittests/TargetParser/Host.cpp | 3 + .../TargetParser/TargetParserTest.cpp | 3 +- 9 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/aarch64-fujitsu-monaka.c create mode 100644 clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c diff --git a/clang/test/Driver/aarch64-fujitsu-monaka.c b/clang/test/Driver/aarch64-fujitsu-monaka.c new file mode 100644 index 00000000000000..df96b36bace681 --- /dev/null +++ b/clang/test/Driver/aarch64-fujitsu-monaka.c @@ -0,0 +1,13 @@ +// RUN: %clang --target=aarch64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s +// RUN: %clang --target=aarch64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka %s +// RUN: %clang --target=aarch64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s +// RUN: %clang --target=aarch64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=fujitsu-monaka-TUNE %s +// fujitsu-monaka: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "fujitsu-monaka" +// fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" + +// RUN: %clang --target=arm64 -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s +// RUN: %clang --target=arm64 -mlittle-endian -mcpu=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka %s +// RUN: %clang --target=arm64 -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s +// RUN: %clang --target=arm64 -mlittle-endian -mtune=fujitsu-monaka -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-fujitsu-monaka-TUNE %s +// ARM64-fujitsu-monaka: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "fujitsu-monaka" +// ARM64-fujitsu-monaka-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic" diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c new file mode 100644 index 00000000000000..fce76a49c07d38 --- /dev/null +++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c @@ -0,0 +1,82 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=fujitsu-monaka | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s + +// CHECK: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-NEXT: Architecture Feature(s) Description +// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension +// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support +// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension +// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets +// CHECK-NEXT: FEAT_CLRBHB Enable Clear BHB instruction +// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions +// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence +// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence +// CHECK-NEXT: FEAT_DotProd Enable dot product support +// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension +// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension +// CHECK-NEXT: FEAT_FAMINMAX Enable FAMIN and FAMAX instructions +// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support +// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension +// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions +// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing +// CHECK-NEXT: FEAT_FP8 Enable FP8 instructions +// CHECK-NEXT: FEAT_FP8DOT2 Enable FP8 2-way dot instructions +// CHECK-NEXT: FEAT_FP8DOT4 Enable FP8 4-way dot instructions +// CHECK-NEXT: FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions +// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement +// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions +// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-NEXT: FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension +// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register +// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions +// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension +// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension +// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets +// CHECK-NEXT: FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension +// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions +// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules +// CHECK-NEXT: FEAT_LUT Enable Lookup Table instructions +// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension +// CHECK-NEXT: FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions +// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension +// CHECK-NEXT: FEAT_NMI, FEAT_GICv3_NMI Enable Armv8.8-A Non-maskable Interrupts +// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement +// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension +// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants +// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension +// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions +// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions +// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier +// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension +// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support +// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support +// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions +// CHECK-NEXT: FEAT_SPECRES2 Enable Speculation Restriction Instruction +// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions +// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-NEXT: FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions +// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions +// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions +// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension +// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState +// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension +// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction +// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction \ No newline at end of file diff --git a/clang/test/Misc/target-invalid-cpu-note/aarch64.c b/clang/test/Misc/target-invalid-cpu-note/aarch64.c index ab83f299ac5997..e6ff09557fe070 100644 --- a/clang/test/Misc/target-invalid-cpu-note/aarch64.c +++ b/clang/test/Misc/target-invalid-cpu-note/aarch64.c @@ -67,6 +67,7 @@ // CHECK-SAME: {{^}}, exynos-m4 // CHECK-SAME: {{^}}, exynos-m5 // CHECK-SAME: {{^}}, falkor +// CHECK-SAME: {{^}}, fujitsu-monaka // CHECK-SAME: {{^}}, generic // CHECK-SAME: {{^}}, grace // CHECK-SAME: {{^}}, kryo diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 6886df5392565d..83fdb85c50f6bf 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -266,6 +266,14 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; +def TuneMONAKA : SubtargetFeature<"fujitsu-monaka", "ARMProcFamily", "MONAKA", + "Fujitsu FUJITSU-MONAKA processors", [ + FeaturePredictableSelectIsExpensive, + FeatureEnableSelectOptimize, + FeaturePostRAScheduler, + FeatureArithmeticBccFusion, + ]>; + def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", "Nvidia Carmel processors">; @@ -843,6 +851,12 @@ def ProcessorFeatures { FeatureSHA2, FeaturePerfMon, FeatureFullFP16, FeatureSVE, FeatureComplxNum, FeatureAES, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM]; + list<SubtargetFeature> MONAKA = [HasV9_3aOps, FeaturePerfMon, FeatureCCIDX, + FeatureFPAC, FeatureFP16FML, FeatureRandGen, + FeatureSSBS, FeatureLS64, FeatureCLRBHB, + FeatureSPECRES2, FeatureSVE2AES, FeatureSVE2SM4, + FeatureSVE2SHA3, FeatureSVE2BitPerm, FeatureETE, + FeatureMEC, FeatureFP8DOT2]; list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureSHA2, FeatureAES, FeatureFullFP16, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM, FeatureFPARMv8]; @@ -1225,6 +1239,10 @@ def : ProcessorAlias<"apple-latest", "apple-m4">; def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX, [TuneA64FX]>; +// Fujitsu FUJITSU-MONAKA +def : ProcessorModel<"fujitsu-monaka", A64FXModel, ProcessorFeatures.MONAKA, + [TuneMONAKA]>; + // Nvidia Carmel def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel, [TuneCarmel]>; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index e37e2cacc7852e..3767b34bd5b0c5 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -195,6 +195,9 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { MaxPrefetchIterationsAhead = 4; VScaleForTuning = 4; break; + case MONAKA: + VScaleForTuning = 2; + break; case AppleA7: case AppleA10: case AppleA11: diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 4457e481def10a..7c28f12d6b6811 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -280,8 +280,9 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { if (Implementer == "0x46") { // Fujitsu Ltd. return StringSwitch<const char *>(Part) - .Case("0x001", "a64fx") - .Default("generic"); + .Case("0x001", "a64fx") + .Case("0x003", "fujitsu-monaka") + .Default("generic"); } if (Implementer == "0x4e") { // NVIDIA Corporation diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll index 3e593a82fdf288..e9722f348f4113 100644 --- a/llvm/test/CodeGen/AArch64/cpus.ll +++ b/llvm/test/CodeGen/AArch64/cpus.ll @@ -36,6 +36,7 @@ ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=tsv110 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=apple-latest 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=a64fx 2>&1 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=fujitsu-monaka 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1a 2>&1 | FileCheck %s ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=ampere1b 2>&1 | FileCheck %s diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp index 5e2edcef09bf8c..67c727a6c1c5df 100644 --- a/llvm/unittests/TargetParser/Host.cpp +++ b/llvm/unittests/TargetParser/Host.cpp @@ -140,6 +140,9 @@ TEST(getLinuxHostCPUName, AArch64) { EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n" "CPU part : 0x001"), "oryon-1"); + EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x46\n" + "CPU part : 0x003"), + "fujitsu-monaka"); // MSM8992/4 weirdness StringRef MSM8992ProcCpuInfo = R"( diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index d281c2cbd55d35..6b07e848def37f 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1150,13 +1150,14 @@ INSTANTIATE_TEST_SUITE_P( AArch64CPUTestParams("thunderxt88", "armv8-a"), AArch64CPUTestParams("tsv110", "armv8.2-a"), AArch64CPUTestParams("a64fx", "armv8.2-a"), + AArch64CPUTestParams("fujitsu-monaka", "armv9.3-a"), AArch64CPUTestParams("carmel", "armv8.2-a"), AArch64CPUTestParams("saphira", "armv8.4-a"), AArch64CPUTestParams("oryon-1", "armv8.6-a")), AArch64CPUTestParams::PrintToStringParamName); // Note: number of CPUs includes aliases. -static constexpr unsigned NumAArch64CPUArchs = 81; +static constexpr unsigned NumAArch64CPUArchs = 82; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector<StringRef, NumAArch64CPUArchs> List; >From 318b63aeb9f18b34e3898e0a631d1040fca5279d Mon Sep 17 00:00:00 2001 From: Kinoshita Kotaro <k.kot...@fujitsu.com> Date: Tue, 3 Dec 2024 21:32:18 +0900 Subject: [PATCH 2/2] fixup! [AArch64] Add initial support for FUJITSU-MONAKA --- .../Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c | 2 +- llvm/lib/Target/AArch64/AArch64Processors.td | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c index fce76a49c07d38..3c74e3620df034 100644 --- a/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c +++ b/clang/test/Driver/print-enabled-extensions/aarch64-fujitsu-monaka.c @@ -69,7 +69,7 @@ // CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit // CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions // CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions -// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions +// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions // CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions // CHECK-NEXT: FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions // CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 83fdb85c50f6bf..e902568fd6daa2 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -854,7 +854,7 @@ def ProcessorFeatures { list<SubtargetFeature> MONAKA = [HasV9_3aOps, FeaturePerfMon, FeatureCCIDX, FeatureFPAC, FeatureFP16FML, FeatureRandGen, FeatureSSBS, FeatureLS64, FeatureCLRBHB, - FeatureSPECRES2, FeatureSVE2AES, FeatureSVE2SM4, + FeatureSPECRES2, FeatureSVEAES, FeatureSVE2SM4, FeatureSVE2SHA3, FeatureSVE2BitPerm, FeatureETE, FeatureMEC, FeatureFP8DOT2]; list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureSHA2, FeatureAES, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits