https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/72849
>From 3b1d34afb1ae365f48716ae5eb9202a474adf234 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Mon, 20 Nov 2023 10:49:27 +0000 Subject: [PATCH] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics --- clang/include/clang/Basic/arm_sme.td | 8 +++ .../acle_sme2_ldr_str_zt.c | 51 +++++++++++++++++++ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 8 ++- .../Target/AArch64/AArch64ISelLowering.cpp | 20 ++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../Target/AArch64/AArch64RegisterInfo.cpp | 6 +++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 23 +++++++-- .../CodeGen/AArch64/sme2-intrinsics-zt0.ll | 27 ++++++++++ 9 files changed, 140 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index b5655afdf419ecf2..fb3f54ecff95080d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,3 +298,11 @@ multiclass ZAAddSub<string n_suffix> { defm SVADD : ZAAddSub<"add">; defm SVSUB : ZAAddSub<"sub">; + +// +// Spill and fill of ZT0 +// +let TargetGuard = "sme2" in { + def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; + def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c new file mode 100644 index 0000000000000000..3d70ded6b469ba15 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c @@ -0,0 +1,51 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include <arm_sme_draft_spec_subject_to_change.h> + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + +// LDR ZT0 + +// CHECK-LABEL: @test_svldr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svldr_ztPKv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svldr_zt(0, base); +} ; + + +// STR ZT0 + +// CHECK-LABEL: @test_svstr_zt( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z13test_svstr_ztPv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_zt(void *base) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { + svstr_zt(0, base); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7617dccdeee397f7..abfe14e52509d58a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -326,15 +326,19 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } - template <unsigned BaseReg> bool ImmToTile(SDValue N, SDValue &Imm) { + template <unsigned BaseReg, unsigned Max> + bool ImmToTile(SDValue N, SDValue &Imm) { if (auto *CI = dyn_cast<ConstantSDNode>(N)) { uint64_t C = CI->getZExtValue(); + + if (C > Max) + return false; + Imm = CurDAG->getRegister(BaseReg + C, MVT::Other); return true; } return false; } - /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have /// between 1 and 4 elements. If it contains a single element that is returned diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d42ae4ff93a4442b..c4b40b7c854f6274 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2748,6 +2748,22 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const { return BB; } +MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI, + MachineBasicBlock *BB, + bool IsSpill) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB; + if (IsSpill) { + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STR_TX)); + MIB.addReg(MI.getOperand(0).getReg()); + } else + MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_TX), + MI.getOperand(0).getReg()); + MIB.add(MI.getOperand(1)); // Base + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, @@ -2864,6 +2880,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); + case AArch64::LDR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/false); + case AArch64::STR_TX_PSEUDO: + return EmitZTSpillFill(MI, BB, /*IsSpill=*/true); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2a039488f2a9ab36..b690b996d8cddff8 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -612,6 +612,8 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB, bool HasTile) const; + MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB, + bool IsSpill) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index ed64a7b4984c17cd..24ba9dd95004c6f2 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -440,6 +440,12 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const { Reserved.set(SubReg); } + if (MF.getSubtarget<AArch64Subtarget>().hasSME2()) { + for (MCSubRegIterator SubReg(AArch64::ZT0, this, /*self=*/true); + SubReg.isValid(); ++SubReg) + Reserved.set(*SubReg); + } + markSuperRegs(Reserved, AArch64::FPCR); if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) { diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index bb9464a8d2e1cf28..fcfa5f82a3809c2c 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -541,8 +541,8 @@ defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops def ZERO_T : sme2_zero_zt<"zero", 0b0001>; -def LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100>; -def STR_TX : sme2_spill_fill_vector<"str", 0b11111100>; +defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>; +defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>; def MOVT_XTI : sme2_movt_zt_to_scalar<"movt", 0b0011111>; def MOVT_TIX : sme2_movt_scalar_to_zt<"movt", 0b0011111>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 6c9b1f11a4decdee..ef9c323e25bc3585 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,11 +10,12 @@ // //===----------------------------------------------------------------------===// -def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0>", []>; -def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0>", []>; -def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0>", []>; -def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0>", []>; -def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0>", []>; +def imm_to_tile8 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAB0, 0>", []>; +def imm_to_tile16 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAH0, 1>", []>; +def imm_to_tile32 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAS0, 3>", []>; +def imm_to_tile64 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAD0, 7>", []>; +def imm_to_tile128 : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZAQ0, 15>", []>; +def imm_to_zt : ComplexPattern<i32, 1, "ImmToTile<AArch64::ZT0, 0>", []>; def tileslice8 : ComplexPattern<i32 , 2, "SelectSMETileSlice<15, 1>", []>; def tileslice16 : ComplexPattern<i32 , 2, "SelectSMETileSlice<7, 1>", []>; @@ -3137,6 +3138,18 @@ class sme2_spill_fill_vector<string mnemonic, bits<8> opc> let mayStore = opc{7}; } + +multiclass sme2_spill_fill_vector<string mnemonic, bits<8> opc, SDPatternOperator op> { + def NAME : sme2_spill_fill_vector<mnemonic, opc>; + def NAME # _PSEUDO + : Pseudo<(outs), (ins ZTR:$ZTt, GPR64sp:$base), []>, Sched<[]> { + // Translated to actual instruction in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + def : Pat<(op (imm_to_zt untyped:$tile), GPR64sp:$base), + (!cast<Instruction>(NAME # _PSEUDO) $tile, $base)>; +} + //===----------------------------------------------------------------------===/// // SME2 move to/from lookup table class sme2_movt_zt_to_scalar<string mnemonic, bits<7> opc> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll new file mode 100644 index 0000000000000000..30205d86f2fb2034 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zt0.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; LDR + +define void @ldr_zt0(ptr %ptr) { +; CHECK-LABEL: ldr_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptr) + ret void; +} + +; STR + +define void @str_zt0(ptr %ptr) { +; CHECK-LABEL: str_zt0: +; CHECK: // %bb.0: +; CHECK-NEXT: str zt0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.str.zt(i32 0, ptr %ptr) + ret void; +} + +declare void @llvm.aarch64.sme.ldr.zt(i32, ptr) +declare void @llvm.aarch64.sme.str.zt(i32, ptr) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits