https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/111677
>From efabe14befb5c35ae35826d79af0b64756cb3e9b Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Wed, 9 Oct 2024 10:52:42 +0000 Subject: [PATCH 1/4] [AArch64] Introduce new armv9.6 features --- .../print-supported-extensions-aarch64.c | 11 ++ llvm/lib/Target/AArch64/AArch64Features.td | 37 +++++- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 35 +++++- .../TargetParser/TargetParserTest.cpp | 119 ++++++++++++++++-- 4 files changed, 191 insertions(+), 11 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index e6247307c7219f..c582154bd0370d 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -8,6 +8,7 @@ // CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension // CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension // CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: cmpbr FEAT_CMPBR Enable A64 base compare and branch instructions // CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support // CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic // CHECK-NEXT: crc FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions @@ -18,6 +19,8 @@ // CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support // CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension // CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension +// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable FP8 to Half-Precision Matrix Multiplication +// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable FP8 to Single-Precision Matrix Multiplication // CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions // CHECK-NEXT: flagm FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions // CHECK-NEXT: fp FEAT_FP Enable Armv8.0-A Floating Point Extensions @@ -26,6 +29,7 @@ // CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable FP8 2-way dot instructions // CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable FP8 4-way dot instructions // CHECK-NEXT: fp8fma FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions +// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable A64 base convert instructions for SIMD&FP scalar register operands of different input and output sizes // CHECK-NEXT: fp16 FEAT_FP16 Enable half-precision floating-point data processing // CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension // CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension @@ -35,6 +39,7 @@ // CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension // CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions // CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions +// CHECK-NEXT: lsfe FEAT_LSFE Enable A64 base Atomic floating-point in-memory instructions // CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions // CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions // CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension @@ -64,20 +69,26 @@ // CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions // CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions // CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions +// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Scalable Matrix Extension 2.2 instructions // CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension // CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction // CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable SVE2 AES support in streaming SVE mode // CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions // CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions // CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions // CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable SVE multi-vector AES and 128-bit PMULL instructions // CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions +// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable SVE BFloat16 scaling instructions +// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable FP16 to FP32 Matrix Multiply // CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions // CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions // CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions // CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions // CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions // CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions +// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Scalable Vector Extension 2.2 instructions // CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension // CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State // CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 97671bc59f6b9e..10dbf900271a32 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -522,6 +522,39 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW", // Armv9.6 Architecture Extensions //===----------------------------------------------------------------------===// +def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR", + "Enable A64 base compare and branch instructions">; + +def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM", + "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>; + +def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM", + "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>; + +def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT", + "Enable A64 base convert instructions for SIMD&FP scalar register operands of" + " different input and output sizes", [FeatureFPARMv8]>; + +def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE", + "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>; + +def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2", + "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>; + +def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES", + "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>; + +def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2", + "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>; + +def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2", + "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>; + +def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE", + "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>; + +def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM", + "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>; //===----------------------------------------------------------------------===// // Other Features @@ -833,8 +866,8 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a", [HasV9_4aOps, FeatureCPA], !listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>; def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a", - [HasV9_5aOps], - !listconcat(HasV9_5aOps.DefaultExts, [])>; + [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2], + !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2])>; def HasV8_0rOps : Architecture64<8, 0, "r", "v8r", [ //v8.1 FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 4374d92a5b7b16..a41d5080c909fc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -213,12 +213,35 @@ def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8 def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">, AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">; +def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">, + AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">; +def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">, + AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">; +def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">, + AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">; +def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">, + AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">; +def HasLSFE : Predicate<"Subtarget->hasLSFE()">, + AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">; +def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">, + AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">; +def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">, + AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">; +def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">, + AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">; +def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">, + AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">; // A subset of SVE(2) instructions are legal in Streaming SVE execution mode, // they should be enabled if either has been specified. def HasSVEorSME : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), "sve or sme">; +def HasSVEorSME2p2 + : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||" + "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2), + "sve or sme2p2">; def HasSVE2orSME : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME), @@ -227,6 +250,10 @@ def HasSVE2orSME2 : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">, AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2), "sve2 or sme2">; +def HasSVE2orSSVE_AES + : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||" + "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">, + AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES), "sve2 or ssve-aes">; def HasSVE2p1_or_HasSME : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">; @@ -236,7 +263,13 @@ def HasSVE2p1_or_HasSME2 def HasSVE2p1_or_HasSME2p1 : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">, AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">; - +def HasSVE2p2orSME2p2 + : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">; +def HasSVE2p1orSSVE_AES + : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()) ||" + "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">, + AssemblerPredicateWithAll<(any_of FeatureSVE2p1, FeatureSSVE_AES), "sve2p1 or ssve-aes">; def HasSMEF16F16orSMEF8F16 : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">, AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16), diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 5b5d45f6c574bf..0d59b41a2f7510 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1323,8 +1323,12 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_CPA, AArch64::AEK_PAUTHLR, AArch64::AEK_TLBIW, AArch64::AEK_JSCVT, AArch64::AEK_FCMA, AArch64::AEK_FP8, - AArch64::AEK_SVEB16B16, - }; + AArch64::AEK_SVEB16B16, AArch64::AEK_SVE2P2, + AArch64::AEK_SME2P2, AArch64::AEK_SVE_BFSCALE, + AArch64::AEK_SVE_F16F32MM, AArch64::AEK_SVE_AES2, + AArch64::AEK_SSVE_AES, AArch64::AEK_F8F32MM, + AArch64::AEK_F8F16MM, AArch64::AEK_LSFE, + AArch64::AEK_FPRCVT, AArch64::AEK_CMPBR}; std::vector<StringRef> Features; @@ -1356,12 +1360,16 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+ras")); EXPECT_TRUE(llvm::is_contained(Features, "+sve")); EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve-aes2")); + EXPECT_TRUE(llvm::is_contained(Features, "+ssve-aes")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve2p2")); EXPECT_TRUE(llvm::is_contained(Features, "+rcpc")); EXPECT_TRUE(llvm::is_contained(Features, "+rand")); EXPECT_TRUE(llvm::is_contained(Features, "+mte")); @@ -1384,6 +1392,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sme2")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1")); + EXPECT_TRUE(llvm::is_contained(Features, "+sme2p2")); EXPECT_TRUE(llvm::is_contained(Features, "+hbc")); EXPECT_TRUE(llvm::is_contained(Features, "+mops")); EXPECT_TRUE(llvm::is_contained(Features, "+perfmon")); @@ -1403,6 +1412,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2")); EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4")); EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4")); + EXPECT_TRUE(llvm::is_contained(Features, "+f8f32mm")); + EXPECT_TRUE(llvm::is_contained(Features, "+f8f16mm")); EXPECT_TRUE(llvm::is_contained(Features, "+lut")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-lutv2")); EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f16")); @@ -1413,6 +1424,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+tlbiw")); EXPECT_TRUE(llvm::is_contained(Features, "+jsconv")); EXPECT_TRUE(llvm::is_contained(Features, "+complxnum")); + EXPECT_TRUE(llvm::is_contained(Features, "+lsfe")); + EXPECT_TRUE(llvm::is_contained(Features, "+fprcvt")); + EXPECT_TRUE(llvm::is_contained(Features, "+cmpbr")); // Assuming we listed every extension above, this should produce the same // result. @@ -1510,12 +1524,16 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"rdm", "nordm", "+rdm", "-rdm"}, {"sve", "nosve", "+sve", "-sve"}, {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"}, + {"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"}, {"sve2", "nosve2", "+sve2", "-sve2"}, {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"}, {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"}, {"sve2-sha3", "nosve2-sha3", "+sve2-sha3", "-sve2-sha3"}, {"sve2p1", "nosve2p1", "+sve2p1", "-sve2p1"}, + {"sve2p2", "nosve2p2", "+sve2p2", "-sve2p2"}, {"sve2-bitperm", "nosve2-bitperm", "+sve2-bitperm", "-sve2-bitperm"}, + {"sve-aes2", "nosve-aes2", "+sve-aes2", "-sve-aes2"}, + {"ssve-aes", "nossve-aes", "+ssve-aes", "-ssve-aes"}, {"dotprod", "nodotprod", "+dotprod", "-dotprod"}, {"rcpc", "norcpc", "+rcpc", "-rcpc"}, {"rng", "norng", "+rand", "-rand"}, @@ -1528,6 +1546,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"i8mm", "noi8mm", "+i8mm", "-i8mm"}, {"f32mm", "nof32mm", "+f32mm", "-f32mm"}, {"f64mm", "nof64mm", "+f64mm", "-f64mm"}, + {"f8f32mm", "nof8f32mm", "+f8f32mm", "-f8f32mm"}, + {"f8f16mm", "nof8f16mm", "+f8f16mm", "-f8f16mm"}, {"sme", "nosme", "+sme", "-sme"}, {"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"}, {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"}, @@ -1536,6 +1556,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sme2", "nosme2", "+sme2", "-sme2"}, {"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"}, {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"}, + {"sme2p2", "nosme2p2", "+sme2p2", "-sme2p2"}, {"hbc", "nohbc", "+hbc", "-hbc"}, {"mops", "nomops", "+mops", "-mops"}, {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"}, @@ -1554,7 +1575,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sme-lutv2", "nosme-lutv2", "+sme-lutv2", "-sme-lutv2"}, {"sme-f8f16", "nosme-f8f16", "+sme-f8f16", "-sme-f8f16"}, {"sme-f8f32", "nosme-f8f32", "+sme-f8f32", "-sme-f8f32"}, - }; + {"lsfe", "nolsfe", "+lsfe", "-lsfe"}, + {"fprcvt", "nofprcvt", "+fprcvt", "-fprcvt"}, + {"cmpbr", "nocmpbr", "+cmpbr", "-cmpbr"}}; for (unsigned i = 0; i < std::size(ArchExt); i++) { EXPECT_EQ(StringRef(ArchExt[i][2]), @@ -1744,6 +1767,14 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nofp", "jscvt"}, {"fp-armv8", "jsconv"}, {}}, {AArch64::ARMV8A, {"jscvt", "nofp"}, {}, {"fp-armv8", "jsconv"}}, + // fp -> lsfe + {AArch64::ARMV9_6A, {"nofp", "lsfe"}, {"fp-armv8", "lsfe"}, {}}, + {AArch64::ARMV9_6A, {"lsfe", "nofp"}, {}, {"fp-armv8", "lsfe"}}, + + // fp -> fprcvt + {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}}, + {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}}, + // simd -> {aes, sha2, sha3, sm4} {AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}}, {AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}}, @@ -1780,7 +1811,7 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nosve", "f64mm"}, {"sve", "f64mm"}, {}}, {AArch64::ARMV8A, {"f64mm", "nosve"}, {}, {"sve", "f64mm"}}, - // sve2 -> {sve2p1, sve2-bitperm, sve2-aes, sve2-sha3, sve2-sm4} + // sve2 -> {sve2p1, sve2-bitperm, sve2-sha3, sve2-sm4} {AArch64::ARMV8A, {"nosve2", "sve2p1"}, {"sve2", "sve2p1"}, {}}, {AArch64::ARMV8A, {"sve2p1", "nosve2"}, {}, {"sve2", "sve2p1"}}, {AArch64::ARMV8A, @@ -1791,22 +1822,52 @@ AArch64ExtensionDependenciesBaseArchTestParams {"sve2-bitperm", "nosve2"}, {}, {"sve2", "sve2-bitperm"}}, - {AArch64::ARMV8A, {"nosve2", "sve2-aes"}, {"sve2", "sve2-aes"}, {}}, - {AArch64::ARMV8A, {"sve2-aes", "nosve2"}, {}, {"sve2", "sve2-aes"}}, {AArch64::ARMV8A, {"nosve2", "sve2-sha3"}, {"sve2", "sve2-sha3"}, {}}, {AArch64::ARMV8A, {"sve2-sha3", "nosve2"}, {}, {"sve2", "sve2-sha3"}}, {AArch64::ARMV8A, {"nosve2", "sve2-sm4"}, {"sve2", "sve2-sm4"}, {}}, {AArch64::ARMV8A, {"sve2-sm4", "nosve2"}, {}, {"sve2", "sve2-sm4"}}, - // sve-b16b16 -> {sme-b16b16} + // sve-b16b16 -> {sme-b16b16, sve-bfscale} {AArch64::ARMV8A, {"nosve-b16b16", "sme-b16b16"}, {"sve-b16b16", "sme-b16b16"}, {}}, - {AArch64::ARMV8A, + {AArch64::ARMV9_6A, {"sme-b16b16", "nosve-b16b16"}, {}, {"sve-b16b16", "sme-b16b16"}}, + {AArch64::ARMV9_6A, + {"nosve-b16b16", "sve-bfscale"}, + {"sve-b16b16", "sve-bfscale"}, + {}}, + {AArch64::ARMV9_6A, + {"sve-bfscale", "nosve-b16b16"}, + {}, + {"sve-b16b16", "sve-bfscale"}}, + + // sve2p1 -> {sve2p2} + {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}}, + {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}}, + + // sve2p1 -> sve-f16f32mm + {AArch64::ARMV9_6A, + {"nosve2p1", "sve-f16f32mm"}, + {"sve2p1", "sve-f16f32mm"}, + {}}, + {AArch64::ARMV9_6A, + {"sve-f16f32mm", "nosve2p1"}, + {}, + {"sve2p1", "sve-f16f32mm"}}, + + // sve2-aes -> {sve-aes2} + {AArch64::ARMV9_6A, + {"nosve2-aes", "sve-aes2"}, + {"sve2-aes", "sve-aes2"}, + {}}, + {AArch64::ARMV9_6A, + {"sve-aes2", "nosve2-aes"}, + {}, + {"sve2-aes", "sve-aes2"}}, // sme -> {sme2, sme-f16f16, sme-f64f64, sme-i16i64, sme-fa64} {AArch64::ARMV8A, {"nosme", "sme2"}, {"sme", "sme2"}, {}}, @@ -1855,6 +1916,18 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}}, {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}}, + // sme2p1 -> {sme2p2, ssve-aes} + {AArch64::ARMV9_6A, {"nosme2p1", "sme2p2"}, {"sme2p2", "sme2p1"}, {}}, + {AArch64::ARMV9_6A, {"sme2p2", "nosme2p1"}, {}, {"sme2p1", "sme2p2"}}, + {AArch64::ARMV9_6A, + {"nosme2p1", "ssve-aes"}, + {"sme2p1", "ssve-aes"}, + {}}, + {AArch64::ARMV9_6A, + {"ssve-aes", "nosme2p1"}, + {}, + {"ssve-aes", "sme2p1"}}, + // fp8 -> {sme-f8f16, sme-f8f32} {AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}}, {AArch64::ARMV8A, {"sme-f8f16", "nofp8"}, {}, {"fp8", "sme-f8f16"}}, @@ -1882,6 +1955,36 @@ AArch64ExtensionDependenciesBaseArchTestParams // rcpc -> rcpc3 {AArch64::ARMV8A, {"norcpc", "rcpc3"}, {"rcpc", "rcpc3"}, {}}, {AArch64::ARMV8A, {"rcpc3", "norcpc"}, {}, {"rcpc", "rcpc3"}}, + + // fp8dot4 -> f8f32mm + {AArch64::ARMV9_6A, + {"nofp8dot4", "f8f32mm"}, + {"fp8dot4", "f8f32mm"}, + {}}, + {AArch64::ARMV9_6A, + {"f8f32mm", "nofp8dot4"}, + {}, + {"f8f32mm", "fp8dot4"}}, + + // f8f32mm -> f8f16mm + {AArch64::ARMV9_6A, + {"nof8f32mm", "f8f16mm"}, + {"f8f16mm", "f8f32mm"}, + {}}, + {AArch64::ARMV9_6A, + {"f8f16mm", "nof8f32mm"}, + {}, + {"f8f16mm", "f8f32mm"}}, + + // fp8dot2 -> f8f16mm + {AArch64::ARMV9_6A, + {"nofp8dot2", "f8f16mm"}, + {"f8f16mm", "fp8dot2"}, + {}}, + {AArch64::ARMV9_6A, + {"f8f16mm", "nofp8dot2"}, + {}, + {"f8f16mm", "fp8dot2"}}, }; INSTANTIATE_TEST_SUITE_P( >From 1bd148cc61382543b49ec09ca370e7d17309cd1d Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Fri, 11 Oct 2024 12:52:36 +0000 Subject: [PATCH 2/4] Add Armv-9.6A to feature description and add missing tests for sve-f16f32mm --- llvm/lib/Target/AArch64/AArch64Features.td | 22 +++++++++---------- .../TargetParser/TargetParserTest.cpp | 5 +++-- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 10dbf900271a32..0f45cf481a8ada 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -523,38 +523,38 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW", //===----------------------------------------------------------------------===// def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR", - "Enable A64 base compare and branch instructions">; + "Enable Armv9.6-A base compare and branch instructions">; def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM", - "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>; + "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>; def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM", - "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>; + "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>; def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT", - "Enable A64 base convert instructions for SIMD&FP scalar register operands of" + "Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of" " different input and output sizes", [FeatureFPARMv8]>; def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE", - "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>; + "Enable Armv9.6-A base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>; def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2", - "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>; + "Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>; def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES", - "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>; + "Enable Armv9.6-A SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>; def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2", - "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>; + "Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>; def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2", - "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>; + "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>; def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE", - "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>; + "Enable Armv9.6-A SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>; def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM", - "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>; + "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>; //===----------------------------------------------------------------------===// // Other Features diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 0d59b41a2f7510..31fed84cc0a817 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1361,6 +1361,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+sve")); EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16")); EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale")); + EXPECT_TRUE(llvm::is_contained(Features, "+sve-f16f32mm")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes")); EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4")); @@ -1525,6 +1526,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"sve", "nosve", "+sve", "-sve"}, {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"}, {"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"}, + {"sve-f16f32mm", "nosve-f16f32mm", "+sve-f16f32mm", "-sve-f16f32mm"}, {"sve2", "nosve2", "+sve2", "-sve2"}, {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"}, {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"}, @@ -1845,11 +1847,10 @@ AArch64ExtensionDependenciesBaseArchTestParams {}, {"sve-b16b16", "sve-bfscale"}}, - // sve2p1 -> {sve2p2} + // sve2p1 -> {sve2p2, sve-f16f32mm} {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}}, {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}}, - // sve2p1 -> sve-f16f32mm {AArch64::ARMV9_6A, {"nosve2p1", "sve-f16f32mm"}, {"sve2p1", "sve-f16f32mm"}, >From dba9fb7b28773874e9987423b5a0d94d71857f30 Mon Sep 17 00:00:00 2001 From: Marian Lukac <marian.lu...@arm.com> Date: Fri, 11 Oct 2024 15:23:59 +0000 Subject: [PATCH 3/4] Fix test --- .../print-supported-extensions-aarch64.c | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index c582154bd0370d..fbc0d70c4901c9 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -8,7 +8,7 @@ // CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension // CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension // CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification -// CHECK-NEXT: cmpbr FEAT_CMPBR Enable A64 base compare and branch instructions +// CHECK-NEXT: cmpbr FEAT_CMPBR Enable Armv9.6-A base compare and branch instructions // CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support // CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic // CHECK-NEXT: crc FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions @@ -19,8 +19,8 @@ // CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support // CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension // CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension -// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable FP8 to Half-Precision Matrix Multiplication -// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable FP8 to Single-Precision Matrix Multiplication +// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication +// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication // CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions // CHECK-NEXT: flagm FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions // CHECK-NEXT: fp FEAT_FP Enable Armv8.0-A Floating Point Extensions @@ -29,7 +29,7 @@ // CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable FP8 2-way dot instructions // CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable FP8 4-way dot instructions // CHECK-NEXT: fp8fma FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions -// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable A64 base convert instructions for SIMD&FP scalar register operands of different input and output sizes +// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of different input and output sizes // CHECK-NEXT: fp16 FEAT_FP16 Enable half-precision floating-point data processing // CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension // CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension @@ -39,7 +39,7 @@ // CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension // CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions // CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions -// CHECK-NEXT: lsfe FEAT_LSFE Enable A64 base Atomic floating-point in-memory instructions +// CHECK-NEXT: lsfe FEAT_LSFE Enable Armv9.6-A base Atomic floating-point in-memory instructions // CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions // CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions // CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension @@ -69,26 +69,26 @@ // CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions // CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions // CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions -// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Scalable Matrix Extension 2.2 instructions +// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions // CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension // CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction // CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit -// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable SVE2 AES support in streaming SVE mode +// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable Armv9.6-A SVE2 AES support in streaming SVE mode // CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions // CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions // CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions // CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions -// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable SVE multi-vector AES and 128-bit PMULL instructions +// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions // CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions -// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable SVE BFloat16 scaling instructions -// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable FP16 to FP32 Matrix Multiply +// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable Armv9.6-A SVE BFloat16 scaling instructions +// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable Armv9.6-A FP16 to FP32 Matrix Multiply // CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions // CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions // CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions // CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions // CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions // CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions -// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Scalable Vector Extension 2.2 instructions +// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Armv9.6-A Scalable Vector Extension 2.2 instructions // CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension // CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State // CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension >From 9fa8dd3b6d49ac866a1361ea4c68c031619b8c05 Mon Sep 17 00:00:00 2001 From: Spencer Abson <spencer.ab...@arm.com> Date: Tue, 15 Oct 2024 12:36:51 +0000 Subject: [PATCH 4/4] Remove ext dependencies and test armv9.6 driver behavior --- clang/test/Driver/aarch64-v96a.c | 42 +++++++++- llvm/lib/Target/AArch64/AArch64Features.td | 16 ++-- .../TargetParser/TargetParserTest.cpp | 83 ++----------------- 3 files changed, 52 insertions(+), 89 deletions(-) diff --git a/clang/test/Driver/aarch64-v96a.c b/clang/test/Driver/aarch64-v96a.c index 0aaadddb2842f8..80c99be934334e 100644 --- a/clang/test/Driver/aarch64-v96a.c +++ b/clang/test/Driver/aarch64-v96a.c @@ -6,7 +6,7 @@ // RUN: %clang -target aarch64 -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s // RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s // RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s -// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a" +// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2" // RUN: %clang -target aarch64_be -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s // RUN: %clang -target aarch64_be -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s @@ -14,6 +14,42 @@ // RUN: %clang -target aarch64 -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s // RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s // RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s -// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a" -// +// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2" + // ===== Features supported on aarch64 ===== + +// RUN: %clang -target aarch64 -march=armv9.6a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s +// RUN: %clang -target aarch64 -march=armv9.6-a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s +// V96A-F8F16MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f16mm" + +// RUN: %clang -target aarch64 -march=armv9.6a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s +// RUN: %clang -target aarch64 -march=armv9.6-a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s +// V96A-F8F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f32mm" + +// RUN: %clang -target aarch64 -march=armv9.6a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s +// RUN: %clang -target aarch64 -march=armv9.6-a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s +// V96A-LSFE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+lsfe" + +// RUN: %clang -target aarch64 -march=armv9.6a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s +// RUN: %clang -target aarch64 -march=armv9.6-a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s +// V96A-SME2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sme2p2" + +// RUN: %clang -target aarch64 -march=armv9.6a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s +// RUN: %clang -target aarch64 -march=armv9.6-a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s +// V96A-SSVE-AES: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+ssve-aes" + +// RUN: %clang -target aarch64 -march=armv9.6a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s +// RUN: %clang -target aarch64 -march=armv9.6-a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s +// V96A-SVE2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve2p2" + +// RUN: %clang -target aarch64 -march=armv9.6a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s +// RUN: %clang -target aarch64 -march=armv9.6-a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s +// V96A-SVE-AES2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-aes2" + +// RUN: %clang -target aarch64 -march=armv9.6a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s +// RUN: %clang -target aarch64 -march=armv9.6-a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s +// V96A-SVE-BFSCALE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-bfscale" + +// RUN: %clang -target aarch64 -march=armv9.6a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s +// RUN: %clang -target aarch64 -march=armv9.6-a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s +// V96A-SVE-F16F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-f16f32mm" diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 0f45cf481a8ada..831f311b236441 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -526,35 +526,35 @@ def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR", "Enable Armv9.6-A base compare and branch instructions">; def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM", - "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>; + "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication">; def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM", - "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>; + "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication">; def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT", "Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of" - " different input and output sizes", [FeatureFPARMv8]>; + " different input and output sizes">; def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE", - "Enable Armv9.6-A base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>; + "Enable Armv9.6-A base Atomic floating-point in-memory instructions">; def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2", "Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>; def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES", - "Enable Armv9.6-A SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>; + "Enable Armv9.6-A SVE2 AES support in streaming SVE mode">; def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2", "Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>; def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2", - "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>; + "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions">; def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE", - "Enable Armv9.6-A SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>; + "Enable Armv9.6-A SVE BFloat16 scaling instructions">; def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM", - "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>; + "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions">; //===----------------------------------------------------------------------===// // Other Features diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 31fed84cc0a817..786d01221b799e 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1769,14 +1769,6 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nofp", "jscvt"}, {"fp-armv8", "jsconv"}, {}}, {AArch64::ARMV8A, {"jscvt", "nofp"}, {}, {"fp-armv8", "jsconv"}}, - // fp -> lsfe - {AArch64::ARMV9_6A, {"nofp", "lsfe"}, {"fp-armv8", "lsfe"}, {}}, - {AArch64::ARMV9_6A, {"lsfe", "nofp"}, {}, {"fp-armv8", "lsfe"}}, - - // fp -> fprcvt - {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}}, - {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}}, - // simd -> {aes, sha2, sha3, sm4} {AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}}, {AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}}, @@ -1829,47 +1821,20 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nosve2", "sve2-sm4"}, {"sve2", "sve2-sm4"}, {}}, {AArch64::ARMV8A, {"sve2-sm4", "nosve2"}, {}, {"sve2", "sve2-sm4"}}, - // sve-b16b16 -> {sme-b16b16, sve-bfscale} - {AArch64::ARMV8A, + // sve-b16b16 -> {sme-b16b16} + {AArch64::ARMV9_4A, {"nosve-b16b16", "sme-b16b16"}, {"sve-b16b16", "sme-b16b16"}, {}}, - {AArch64::ARMV9_6A, + {AArch64::ARMV9_4A, {"sme-b16b16", "nosve-b16b16"}, {}, {"sve-b16b16", "sme-b16b16"}}, - {AArch64::ARMV9_6A, - {"nosve-b16b16", "sve-bfscale"}, - {"sve-b16b16", "sve-bfscale"}, - {}}, - {AArch64::ARMV9_6A, - {"sve-bfscale", "nosve-b16b16"}, - {}, - {"sve-b16b16", "sve-bfscale"}}, - // sve2p1 -> {sve2p2, sve-f16f32mm} + // sve2p1 -> {sve2p2} {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}}, {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}}, - {AArch64::ARMV9_6A, - {"nosve2p1", "sve-f16f32mm"}, - {"sve2p1", "sve-f16f32mm"}, - {}}, - {AArch64::ARMV9_6A, - {"sve-f16f32mm", "nosve2p1"}, - {}, - {"sve2p1", "sve-f16f32mm"}}, - - // sve2-aes -> {sve-aes2} - {AArch64::ARMV9_6A, - {"nosve2-aes", "sve-aes2"}, - {"sve2-aes", "sve-aes2"}, - {}}, - {AArch64::ARMV9_6A, - {"sve-aes2", "nosve2-aes"}, - {}, - {"sve2-aes", "sve-aes2"}}, - // sme -> {sme2, sme-f16f16, sme-f64f64, sme-i16i64, sme-fa64} {AArch64::ARMV8A, {"nosme", "sme2"}, {"sme", "sme2"}, {}}, {AArch64::ARMV8A, {"sme2", "nosme"}, {}, {"sme", "sme2"}}, @@ -1917,17 +1882,9 @@ AArch64ExtensionDependenciesBaseArchTestParams {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}}, {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}}, - // sme2p1 -> {sme2p2, ssve-aes} + // sme2p1 -> {sme2p2} {AArch64::ARMV9_6A, {"nosme2p1", "sme2p2"}, {"sme2p2", "sme2p1"}, {}}, {AArch64::ARMV9_6A, {"sme2p2", "nosme2p1"}, {}, {"sme2p1", "sme2p2"}}, - {AArch64::ARMV9_6A, - {"nosme2p1", "ssve-aes"}, - {"sme2p1", "ssve-aes"}, - {}}, - {AArch64::ARMV9_6A, - {"ssve-aes", "nosme2p1"}, - {}, - {"ssve-aes", "sme2p1"}}, // fp8 -> {sme-f8f16, sme-f8f32} {AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}}, @@ -1956,36 +1913,6 @@ AArch64ExtensionDependenciesBaseArchTestParams // rcpc -> rcpc3 {AArch64::ARMV8A, {"norcpc", "rcpc3"}, {"rcpc", "rcpc3"}, {}}, {AArch64::ARMV8A, {"rcpc3", "norcpc"}, {}, {"rcpc", "rcpc3"}}, - - // fp8dot4 -> f8f32mm - {AArch64::ARMV9_6A, - {"nofp8dot4", "f8f32mm"}, - {"fp8dot4", "f8f32mm"}, - {}}, - {AArch64::ARMV9_6A, - {"f8f32mm", "nofp8dot4"}, - {}, - {"f8f32mm", "fp8dot4"}}, - - // f8f32mm -> f8f16mm - {AArch64::ARMV9_6A, - {"nof8f32mm", "f8f16mm"}, - {"f8f16mm", "f8f32mm"}, - {}}, - {AArch64::ARMV9_6A, - {"f8f16mm", "nof8f32mm"}, - {}, - {"f8f16mm", "f8f32mm"}}, - - // fp8dot2 -> f8f16mm - {AArch64::ARMV9_6A, - {"nofp8dot2", "f8f16mm"}, - {"f8f16mm", "fp8dot2"}, - {}}, - {AArch64::ARMV9_6A, - {"f8f16mm", "nofp8dot2"}, - {}, - {"f8f16mm", "fp8dot2"}}, }; INSTANTIATE_TEST_SUITE_P( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits