https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72274
>From 86c61659cf99486965dffe201385b28420e93f41 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Thu, 9 Nov 2023 16:08:57 +0000 Subject: [PATCH 1/3] Add SME2 builtins for zero { zt0 } Patch by: Kerry McLaughlin kerry.mclaugh...@arm.com --- clang/include/clang/Basic/arm_sme.td | 5 +++ .../acle_sme2_zero_zt.c | 32 +++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +++ .../Target/AArch64/AArch64ISelLowering.cpp | 26 ++++++++------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +-- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 11 +++++++ .../AArch64/sme2-intrinsics-zero-zt.ll | 13 ++++++++ 8 files changed, 83 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7aae3c832bb1fe20..48afd6431fc8b692 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -321,4 +321,9 @@ let TargetGuard = "sme2" in { let TargetGuard = "sme2" in { def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + +// +// Zero ZT0 +// + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c new file mode 100644 index 0000000000000000..4ea26119301cab23 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme_draft_spec_subject_to_change.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1) A1 +#else +#define SVE_ACLE_FUNC(A1) A1 +#endif + +// CHECK-LABEL: @test_svzero_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_ztv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svzero_zt(0); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 60a8d98f3bc0d262..3c0a07be50607bbc 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3544,6 +3544,10 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; + // + // Zero ZT0 + // + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWriteMem]>; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4379c3fde6f3c5dd..f0f0fe1e807b4be8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2753,17 +2753,19 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } -MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, - MachineBasicBlock *BB, - bool IsSpill) const { +MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI, + MachineBasicBlock *BB, + unsigned Opcode, + bool Op0IsDef) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB; - unsigned Opc = IsSpill ? AArch64::STR_TX : AArch64::LDR_TX; - auto Rs = IsSpill ? RegState::Kill : RegState::Define; - MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); - MIB.addReg(MI.getOperand(0).getReg(), Rs); - MIB.add(MI.getOperand(1)); // Base - MI.eraseFromParent(); // The pseudo is gone now. + + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode)) + .addReg(MI.getOperand(0).getReg(), IsZTDest ? RegState::Define : 0); + for (unsigned I = 1; I < MI.getNumOperands(); ++I) + MIB.add(MI.getOperand(I)); + + MI.eraseFromParent(); // The pseudo is gone now. return BB; } @@ -2884,11 +2886,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); case AArch64::LDR_TX_PSEUDO: - return EmitZTSpillFill(MI, BB, /*IsSpill=*/false); + return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*IsZTDest=*/true); case AArch64::STR_TX_PSEUDO: - return EmitZTSpillFill(MI, BB, /*IsSpill=*/true); + return EmitZTInstr(MI, BB, AArch64::STR_TX, /*IsZTDest=*/false); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); + case AArch64::ZERO_T_PSEUDO: + return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*IsZTDest=*/true); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 009f8744b408a9df..092e19c962a1a61c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -623,8 +623,8 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const; - MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB, - bool IsSpill) const; + MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, + unsigned Opcode, bool IsZTDest) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index fcfa5f82a3809c2c..84ec88d4fd49b692 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -539,7 +539,7 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>; defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>; -def ZERO_T : sme2_zero_zt<"zero", 0b0001>; +defm ZERO_T : sme2_zero_zt<"zero", 0b0001>; defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>; defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index ef9c323e25bc3585..c13c1b4e81faade8 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -3119,6 +3119,17 @@ class sme2_zero_zt<string mnemonic, bits<4> opc> let Inst{3-0} = opc; } +multiclass sme2_zero_zt<string mnemonic, bits<4> opc> { + def NAME : sme2_zero_zt<mnemonic, opc>; + def NAME # _PSEUDO + : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> { + // Translated to actual instruction in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)), + (!cast<Instruction>(NAME # _PSEUDO) $zt)>; +} + //===----------------------------------------------------------------------===// // SME2 lookup table load/store class sme2_spill_fill_vector<string mnemonic, bits<8> opc> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll new file mode 100644 index 0000000000000000..14a4dba2466bf3e0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s + +define void @zero_zt0() { +; CHECK-LABEL: zero_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: zero { zt0 } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.zero.zt(i32 0) + ret void +} + +declare void @llvm.aarch64.sme.zero.zt(i32) >From a9a5e7c6169efd3794f8c7ab286a6da608f8f829 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Fri, 1 Dec 2023 13:19:19 +0000 Subject: [PATCH 2/3] Removes IsPreservesZA and refactor EmitZTInstr --- clang/include/clang/Basic/arm_sme.td | 2 +- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 48afd6431fc8b692..34dbfff6c4c85cf1 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -325,5 +325,5 @@ let TargetGuard = "sme2" in { // // Zero ZT0 // - def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], [ImmCheck<0, ImmCheck0_0>]>; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f0f0fe1e807b4be8..68fa58dea5beb1f4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2761,7 +2761,7 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI, MachineInstrBuilder MIB; MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode)) - .addReg(MI.getOperand(0).getReg(), IsZTDest ? RegState::Define : 0); + .addReg(MI.getOperand(0).getReg(), Op0IsDef ? RegState::Define : 0); for (unsigned I = 1; I < MI.getNumOperands(); ++I) MIB.add(MI.getOperand(I)); @@ -2886,13 +2886,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); case AArch64::LDR_TX_PSEUDO: - return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*IsZTDest=*/true); + return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*Op0IsDef=*/true); case AArch64::STR_TX_PSEUDO: - return EmitZTInstr(MI, BB, AArch64::STR_TX, /*IsZTDest=*/false); + return EmitZTInstr(MI, BB, AArch64::STR_TX, /*Op0IsDef=*/false); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); case AArch64::ZERO_T_PSEUDO: - return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*IsZTDest=*/true); + return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true); } } >From 73bbd9c6b9548a19e0248ad1747881ae2665a5ac Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Fri, 1 Dec 2023 13:28:45 +0000 Subject: [PATCH 3/3] Remove overloaded tests and rename operand in AArch64ISelLowering.h --- .../CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c | 9 --------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c index 4ea26119301cab23..35d35ec6f50b7f72 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c @@ -4,19 +4,10 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sme_draft_spec_subject_to_change.h> -#ifdef SVE_OVERLOADED_FORMS -// A simple used,unused... macro, long enough to represent any SVE builtin. -#define SVE_ACLE_FUNC(A1) A1 -#else -#define SVE_ACLE_FUNC(A1) A1 -#endif - // CHECK-LABEL: @test_svzero_zt( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 092e19c962a1a61c..2b16d2471770d08a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -624,7 +624,7 @@ class AArch64TargetLowering : public TargetLowering { MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const; MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB, - unsigned Opcode, bool IsZTDest) const; + unsigned Opcode, bool Op0IsDef) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits