[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4825,6 +4827,72 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector number is an immediate between 0 and 15 inclusive then we can + // put that directly into the immediate field of the instruction. If it's + // outside of that range then we modify the base and slice by the greatest + // multiple of 15 smaller than that number and put the remainder in the + // instruction field. If it's not an immediate then we modify the base and + // slice registers by that number and put 0 in the instruction. + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + SDValue Remainder = DAG.getTargetConstant(0, DL, MVT::i32); + + // true if the base and slice registers need to me modified + bool NeedsAdd = true; + if (auto ImmNode = dyn_cast(VecNum)) { +int Imm = ImmNode->getSExtValue(); +if (Imm >= 0 && Imm <= 15) { + Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32); + NeedsAdd = false; +} else { + Remainder = DAG.getTargetConstant(Imm % 15, DL, MVT::i32); + NeedsAdd = true; + VecNum = DAG.getConstant(Imm - (Imm % 15), DL, MVT::i32); +} + } else if (VecNum.getOpcode() == ISD::ADD) { +// If the vnum is an add, we can fold that add into the instruction if the +// operand is an immediate in range +if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) { + int Imm = ImmNode->getSExtValue(); + if (Imm >= 0 && Imm <= 15) { +VecNum = VecNum.getOperand(0); +Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32); +NeedsAdd = true; + } +} + } + if (NeedsAdd) { +// Get the vector length that will be multiplied by vnum +auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getConstant(1, DL, MVT::i32)); + +// Multiply SVL and vnum then add it to the base +// Just add vnum to the tileslice +SDValue BaseMulOps[] = { +SVL, VecNum.getValueType() == MVT::i32 + ? DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VecNum) SamTebbs33 wrote: We do indeed, I'll remove the condition. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 1/6] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -1741,6 +1742,69 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector number is an immediate between 0 and 15 inclusive then we can + // put that directly into the immediate field of the instruction. If it's + // outside of that range then we modify the base and slice by the greatest + // multiple of 15 smaller than that number and put the remainder in the + // instruction field. If it's not an immediate then we modify the base and + // slice registers by that number and put 0 in the instruction. + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + SDValue Remainder = CurDAG->getTargetConstant(0, DL, MVT::i32); + + // true if the base and slice registers need to me modified + bool NeedsAdd = true; + if (auto ImmNode = dyn_cast(VecNum)) { +int Imm = ImmNode->getSExtValue(); +if (Imm >= 0 && Imm <= 15) { + Remainder = CurDAG->getTargetConstant(Imm, DL, MVT::i32); + NeedsAdd = false; +} else { + Remainder = CurDAG->getTargetConstant(Imm % 15, DL, MVT::i32); SamTebbs33 wrote: You are right, thank you. I've updated it to modulo 16 instead and added some tests for higher immediates. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 1/7] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 1/9] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4825,6 +4827,72 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector number is an immediate between 0 and 15 inclusive then we can + // put that directly into the immediate field of the instruction. If it's + // outside of that range then we modify the base and slice by the greatest + // multiple of 15 smaller than that number and put the remainder in the + // instruction field. If it's not an immediate then we modify the base and + // slice registers by that number and put 0 in the instruction. + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + SDValue Remainder = DAG.getTargetConstant(0, DL, MVT::i32); + + // true if the base and slice registers need to me modified + bool NeedsAdd = true; + if (auto ImmNode = dyn_cast(VecNum)) { +int Imm = ImmNode->getSExtValue(); +if (Imm >= 0 && Imm <= 15) { + Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32); + NeedsAdd = false; +} else { + Remainder = DAG.getTargetConstant(Imm % 15, DL, MVT::i32); + NeedsAdd = true; + VecNum = DAG.getConstant(Imm - (Imm % 15), DL, MVT::i32); +} + } else if (VecNum.getOpcode() == ISD::ADD) { +// If the vnum is an add, we can fold that add into the instruction if the +// operand is an immediate in range +if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) { + int Imm = ImmNode->getSExtValue(); + if (Imm >= 0 && Imm <= 15) { SamTebbs33 wrote: My latest two commits do this, which improves the codegen as well since we properly fold successive loads/stores with adds of large immediates. See the test [here](https://media.tenor.com/vYhPH4gA184C/vroom-car.gif). Thanks for the idea! https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 01/10] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj
[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)
https://github.com/SamTebbs33 approved this pull request. https://github.com/llvm/llvm-project/pull/71176 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -296,5 +296,28 @@ multiclass ZAAddSub { } } + +// SME2 - MIN, MAX + +multiclass MinMaxIntr { + def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; + def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; +} + +let TargetGuard = "sme2" in { + // SMAX / UMAX / FMAX + defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">; + defm MAX_MULTI_X2 : MinMaxIntr<"max", "","x2", "222">; + defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">; + defm MAX_MULTI_X4 : MinMaxIntr<"max", "","x4", "444">; + + // SMIN / UMIN / FMIN + defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">; + defm MIN_MULTI_X2 : MinMaxIntr<"min", "","x2", "222">; + defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">; + defm MIN_MULTI_X4 : MinMaxIntr<"min", "","x4", "444">; SamTebbs33 wrote: Done, thanks Kerry. https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)
https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/71707 Adds the following SME2 builtins: svminnm_single_f(16|32|64)_x(2|4) svminnm_f(16|32|64)_x(2|4) svmaxnm_single_f(16|32|64)_x(2|4) svmaxnm_f(16|32|64)_x(2|4) See [ARM-software/acle#217](https://github.com/ARM-software/acle/pull/217) >From 1ca052c831f0ec5a56320b78fb7b9755d9ff93da Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Wed, 8 Nov 2023 17:19:10 + Subject: [PATCH] [Clang][SME2] Add single and multi min/max by vector builtins Adds the following SME2 builtins: svminnm_single_f(16|32|64)_x(2|4) svminnm_f(16|32|64)_x(2|4) svmaxnm_single_f(16|32|64)_x(2|4) svmaxnm_f(16|32|64)_x(2|4) See [ARM-software/acle#217](https://github.com/ARM-software/acle/pull/217) --- clang/include/clang/Basic/arm_sve.td | 12 + .../aarch64-sme2-intrinsics/acle_sme2_maxnm.c | 444 ++ .../aarch64-sme2-intrinsics/acle_sme2_minnm.c | 444 ++ 3 files changed, 900 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..788025f893efe10 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1987,8 +1987,20 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; // SME intrinsics which operate only on vectors and do not require ZA should be added here, // as they could possibly become SVE instructions in the future. +multiclass SInstMinMaxByVector { + def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; + def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; + + def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>; + def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>; +} + let TargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; + +// == FMINNM / FMAXNM == + defm SVMINNM : SInstMinMaxByVector<"min">; + defm SVMAXNM : SInstMinMaxByVector<"max">; } diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c new file mode 100644 index 000..bb8299ba3e23496 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c @@ -0,0 +1,444 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// Single, x2 + +// CHECK-LABEL: @test_svmaxnm_single_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8) +// CHECK-NEXT:[[TMP2:%.*]] = tail call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( [[TMP0]], [[TMP1]], [[ZM:%.*]]) +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { , } [[TMP2]]
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
SamTebbs33 wrote: Considering the definitions are commented out, are these changes necessary? https://github.com/llvm/llvm-project/pull/71795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtings for sme2 (PR #71927)
https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/71927 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin >From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..1094c424e68abd5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1d..519bf019ff35ba5 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/71707 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)
https://github.com/SamTebbs33 approved this pull request. Nice one, looks good to me. https://github.com/llvm/llvm-project/pull/71795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
@@ -1987,8 +1987,26 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; // SME intrinsics which operate only on vectors and do not require ZA should be added here, // as they could possibly become SVE instructions in the future. +multiclass MinMaxIntr { + def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>; + def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>; + def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd", MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>; +} + let TargetGuard = "sme2" in { // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; + +// == SMAX / UMAX / FMAX == + defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">; + defm MAX_MULTI_X2 : MinMaxIntr<"max", "","x2", "222">; + defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">; + defm MAX_MULTI_X4 : MinMaxIntr<"max", "","x4", "444">; + +// == SMIN / UMIN / FMIN == + defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">; + defm MIN_MULTI_X2 : MinMaxIntr<"min", "","x2", "222">; + defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">; + defm MIN_MULTI_X4 : MinMaxIntr<"min", "","x4", "444">; SamTebbs33 wrote: Thank you, I agree. I've done that now and will go ahead and merge. https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)
SamTebbs33 wrote: > FYI I pushed > [3a9cc17](https://github.com/llvm/llvm-project/commit/3a9cc17ca088267348e4b4a6e64a88a38ae9c6e4) > to hopefully unbreak the build. Thank you! https://github.com/llvm/llvm-project/pull/71688 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/71927 >From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH 1/2] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..1094c424e68abd5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1d..519bf019ff35ba5 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { SamTebbs33 wrote: Well spotted. https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)
https://github.com/SamTebbs33 approved this pull request. https://github.com/llvm/llvm-project/pull/71953 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/66525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)
https://github.com/SamTebbs33 approved this pull request. Thanks Momchil. This looks good to me with a couple of questions. The tests are comprehensive as well. https://github.com/llvm/llvm-project/pull/66525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)
@@ -861,6 +861,12 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; + +def AArch64probedalloca +: SDNode<"AArch64ISD::PROBED_ALLOCA", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain]>; SamTebbs33 wrote: Does this need a `SDNPMayStore` attribute? https://github.com/llvm/llvm-project/pull/66525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)
@@ -9461,6 +9462,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable( return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); } +MachineBasicBlock::iterator +AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI, SamTebbs33 wrote: I think it would be useful for this function to have some comments that explain what it emits, so it can be understood isolated from the rest of this patch. https://github.com/llvm/llvm-project/pull/66525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4825,6 +4827,113 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +//ldr(%tileslice, %ptr, %vecnum) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +//ldr(%tileslice, %ptr, 22) +//ldr(%tileslice, %ptr, 23) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * 15 +//%tileslice2 = %tileslice + 15 +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +//ldr(%tileslice, %ptr, %vecnum + 7) +//ldr(%tileslice, %ptr, %vecnum + 8) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int Addend = 0; + + // If the vnum is an add, we can fold that add into the instruction if the + // operand is an immediate. The range check is performed below. + if (VecNum.getOpcode() == ISD::ADD) { +if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) { + Addend = ImmNode->getSExtValue(); + VecNum = VecNum.getOperand(0); +} + } + + SDValue Remainder = DAG.getTargetConstant(Addend, DL, MVT::i32); + + // true if the base and slice registers need to be modified + bool NeedsAdd = true; + auto ImmNode = dyn_cast(VecNum); + if (ImmNode || Addend != 0) { +int Imm = ImmNode ? ImmNode->getSExtValue() + Addend : Addend; +Remainder = DAG.getTargetConstant(Imm % 16, DL, MVT::i32); +if (Imm >= 0 && Imm <= 15) { + // If vnum is an immediate in range then we don't need to modify the tile + // slice and base register. We could also get here because Addend != 0 but + // vecnum is not an immediate, in which case we still want the base and + // slice register to be modified + NeedsAdd = !ImmNode; SamTebbs33 wrote: I actually didn't realise that `SDValue()` is a falsey value. That certainly does eliminate the need for the `NeedsAdd` boolean. Thank you! https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)
https://github.com/SamTebbs33 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/71191 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend (PR #68993)
https://github.com/SamTebbs33 approved this pull request. Looks good to me :+1: https://github.com/llvm/llvm-project/pull/68993 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme2" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; SamTebbs33 wrote: That makes sense :+1: https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
@@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s SamTebbs33 wrote: I'll add the `__arm_streaming_compatible` attribute to the functions, thanks for spotting that. https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 01/11] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT:ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { SamTebbs33 wrote: Sure thing. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -6,20 +6,20 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) { +void test_sme(svbool_t pg, void *ptr) __arm_streaming { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming { svst1_ver_za16(0, 0, pg, ptr); } -void undefined(svbool_t pg, void *ptr) { - svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}} SamTebbs33 wrote: With the streaming attribute added, the error is thrown for the function signature line, so it has to be on this line. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -2995,6 +2995,134 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible }; +bool Sema::ParseSVEImmChecks( +CallExpr *TheCall, SmallVector, 3> &ImmChecks) { + // Perform all the immediate checks for this builtin call. + bool HasError = false; + for (auto &I : ImmChecks) { +int ArgNum, CheckTy, ElementSizeInBits; +std::tie(ArgNum, CheckTy, ElementSizeInBits) = I; + +typedef bool (*OptionSetCheckFnTy)(int64_t Value); + +// Function that checks whether the operand (ArgNum) is an immediate +// that is one of the predefined values. +auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm, + int ErrDiag) -> bool { + // We can't check the value of a dependent argument. + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) +return false; + + // Check constant-ness first. + llvm::APSInt Imm; + if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm)) +return true; + + if (!CheckImm(Imm.getSExtValue())) +return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange(); + return false; +}; + +switch ((SVETypeFlags::ImmCheckType)CheckTy) { +case SVETypeFlags::ImmCheck0_31: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_13: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13)) +HasError = true; + break; +case SVETypeFlags::ImmCheck1_16: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_7: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7)) +HasError = true; + break; +case SVETypeFlags::ImmCheckExtract: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (2048 / ElementSizeInBits) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftRight: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftRightNarrow: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, + ElementSizeInBits / 2)) +HasError = true; + break; +case SVETypeFlags::ImmCheckShiftLeft: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + ElementSizeInBits - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndex: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (1 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndexCompRotate: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (2 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckLaneIndexDot: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (4 * ElementSizeInBits)) - 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheckComplexRot90_270: + if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; }, + diag::err_rotation_argument_to_cadd)) +HasError = true; + break; +case SVETypeFlags::ImmCheckComplexRotAll90: + if (CheckImmediateInSet( + [](int64_t V) { +return V == 0 || V == 90 || V == 180 || V == 270; + }, + diag::err_rotation_argument_to_cmla)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_1: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_3: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_0: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_15: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) +HasError = true; + break; +case SVETypeFlags::ImmCheck0_255: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) +HasError = true; + break; +case SVETypeFlags::ImmCheck2_4_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) +HasError = true; + break; SamTebbs33 wrote: I
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t p // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]]) // CHECK-CXX-NEXT:ret void // -ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) { +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming { SamTebbs33 wrote: Done https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3023,6 +3151,66 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, << TheCall->getSourceRange() << "streaming compatible"; return; } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) +return true; + if (const auto *T = FD->getType()->getAs()) +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: +return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm"; +std::optional BuiltinType; + +switch (BuiltinID) { +default: + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" SamTebbs33 wrote: Kept the function separate as per our offline discussion. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3183,6 +3140,114 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return HasError; } +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + ArmStreamingType FnType = getArmStreamingFnType(FD); + + if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming"; + } + + if (FnType == ArmStreamingCompatible && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) +return true; + if (const auto *T = FD->getType()->getAs()) +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: +return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +switch (BuiltinID) { +default: + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS +} + +if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + +if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); + } + + // Range check SME intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: +return false; +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + +bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { SamTebbs33 wrote: Sure thing https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3172,6 +3117,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) HasError = true; break; +case SVETypeFlags::ImmCheck1_1: SamTebbs33 wrote: I'm actually not sure. When building the compiler was telling me that these cases weren't being checked for so I added them. I'm not sure what caused them to disappear in the first place. I'll revert this chunk. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3183,6 +3140,114 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return HasError; } +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + ArmStreamingType FnType = getArmStreamingFnType(FD); + + if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming"; + } + + if (FnType == ArmStreamingCompatible && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) +return true; + if (const auto *T = FD->getType()->getAs()) +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: +return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +switch (BuiltinID) { +default: + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS +} + +if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType); + +if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD)) + Diag(TheCall->getBeginLoc(), + diag::warn_attribute_arm_za_builtin_no_za_state) + << TheCall->getSourceRange(); + } + + // Range check SME intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: +return false; +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + +bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { SamTebbs33 wrote: Done! Rebased and should be ready for review again now. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)
https://github.com/SamTebbs33 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/74720 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3058,6 +3058,11 @@ bool Sema::ParseSVEImmChecks( if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7)) HasError = true; break; +case SVETypeFlags::ImmCheck2_4_Mul2: SamTebbs33 wrote: I think you were looking at an old commit as I fixed that in [48ee745](https://github.com/llvm/llvm-project/pull/74064/commits/48ee745a815f0fda41cb1791f91d73db73e4aeba) https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); } + +void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr); SamTebbs33 wrote: > nit: returning a `void` value from a `void` function doesn't seem right. Agreed, I'll fix that. > Also, is `incompat_sme_norm` testing anything that `incompat_sme_sm` isn't > testing? > Or should this be a test where we'd call a non-streaming SVE/SME builtin from > a streaming-function? Yeah it doesn't look like it is. That would be better! https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -6,20 +6,21 @@ #include __attribute__((target("sme"))) -void test_sme(svbool_t pg, void *ptr) { +void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_za8(0, 0, pg, ptr); } __attribute__((target("arch=armv8-a+sme"))) -void test_arch_sme(svbool_t pg, void *ptr) { +void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svld1_hor_vnum_za32(0, 0, pg, ptr, 0); } __attribute__((target("+sme"))) -void test_plus_sme(svbool_t pg, void *ptr) { +void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za { svst1_ver_za16(0, 0, pg, ptr); } -void undefined(svbool_t pg, void *ptr) { - svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}} +__attribute__((target("+sme"))) +void undefined(svbool_t pg, void *ptr) __arm_shared_za { + svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call has undefined behaviour when called from a non-streaming function}} SamTebbs33 wrote: In a previous review you asked me to do so: > I think instead you want to compile this test with > __attribute__((target("+sme"))) but without the __arm_streaming to ensure you > get a diagnostic on the builtin call that the behaviour is undefined when the > (parent) function is not a streaming function. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -18,7 +18,7 @@ // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.zero(i32 0) // CHECK-CXX-NEXT:ret void // -void test_svzero_mask_za() { +__arm_new_za void test_svzero_mask_za() { SamTebbs33 wrote: I did try `__arm_shared_za` but got > '__arm_shared_za' only applies to non-K&R-style functions and > error: '__arm_shared_za' only applies to function types; type here is 'void > ()' https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3168,11 +3168,70 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, << TheCall->getSourceRange() << "streaming compatible"; return; } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +static bool hasSMEZAState(const FunctionDecl *FD) { + if (FD->hasAttr()) +return true; + if (const auto *T = FD->getType()->getAs()) +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + return true; + return false; +} + +static bool hasSMEZAState(unsigned BuiltinID) { + switch (BuiltinID) { + default: +return false; +#define GET_SME_BUILTIN_HAS_ZA_STATE +#include "clang/Basic/arm_sme_builtins_za_state.inc" +#undef GET_SME_BUILTIN_HAS_ZA_STATE + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; SamTebbs33 wrote: Sure thing. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -1375,6 +1381,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) { OS << "#define __aio static __inline__ __attribute__((__always_inline__, " "__nodebug__, __overloadable__))\n\n"; + OS << "#ifdef __ARM_FEATURE_SME\n"; + OS << "#define __asc __attribute__((arm_streaming_compatible))\n"; SamTebbs33 wrote: Omitting it works for me, cheers. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -500,6 +506,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSmeRangeChecks: EmitSmeRangeChecks(Records, OS); break; + case GenArmSmeStreamingAttrs: SamTebbs33 wrote: I remember you suggesting that we have the SME and SVE changes in separate PRs but I can bring the SVE changes into this one. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3168,11 +3168,70 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, << TheCall->getSourceRange() << "streaming compatible"; return; } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} SamTebbs33 wrote: I've removed the ZA state changes and added the SVE changes. Let me know what you think. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); } + +void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr); SamTebbs33 wrote: I removed this test since, as you said, it wasn't testing anything different and after adding the SVE changes we have tests for SVE builtins anyway. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -500,6 +506,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenArmSmeRangeChecks: EmitSmeRangeChecks(Records, OS); break; + case GenArmSmeStreamingAttrs: SamTebbs33 wrote: Done https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[mlir] [clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From 83e20904c206980285c4ee9d0227706803147654 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 01/12] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 165 insertions(+), 106 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..8444aea8c8ac4b6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9815,6 +9815,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9870,18 +9875,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/71927 >From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH 1/4] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 3d4c2129565903d..1094c424e68abd5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1d..519bf019ff35ba5 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %
[llvm] [mlir] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4850,6 +4852,93 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. SamTebbs33 wrote: And it's no longer precise as we lower it to the SDNode first. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -4850,6 +4852,93 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG &DAG, SDValue Chain, Mask); } +// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. +// Case 1: If the vector number (vecnum) is an immediate in range, it gets +// folded into the instruction +//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11] +// Case 2: If the vecnum is not an immediate, then it is used to modify the base +// and tile slice registers +//ldr(%tileslice, %ptr, %vecnum) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 0], [%ptr2, 0] +// Case 3: If the vecnum is an immediate out of range, then the same is done as +// case 2, but the base and slice registers are modified by the greatest +// multiple of 15 lower than the vecnum and the remainder is folded into the +// instruction. This means that successive loads and stores that are offset from +// each other can share the same base and slice register updates. +//ldr(%tileslice, %ptr, 22) +//ldr(%tileslice, %ptr, 23) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * 15 +//%tileslice2 = %tileslice + 15 +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 4: If the vecnum is an add of an immediate, then the non-immediate +// operand and the immediate can be folded into the instruction, like case 2. +//ldr(%tileslice, %ptr, %vecnum + 7) +//ldr(%tileslice, %ptr, %vecnum + 8) +//-> +//%svl = rdsvl +//%ptr2 = %ptr + %svl * %vecnum +//%tileslice2 = %tileslice + %vecnum +//ldr [%tileslice2, 7], [%ptr2, 7] +//ldr [%tileslice2, 8], [%ptr2, 8] +// Case 5: The vecnum being an add of an immediate out of range is also handled, +// in which case the same remainder logic as case 3 is used. +SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) { + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + int32_t ConstAddend = 0; + SDValue VarAddend = VecNum; + + // If the vnum is an add of an immediate, we can fold it into the instruction + if (VecNum.getOpcode() == ISD::ADD && + isa(VecNum.getOperand(1))) { +ConstAddend = cast(VecNum.getOperand(1))->getSExtValue(); +VarAddend = VecNum.getOperand(0); + } else if (auto ImmNode = dyn_cast(VecNum)) { +ConstAddend = ImmNode->getSExtValue(); +VarAddend = SDValue(); + } + + int32_t ImmAddend = ConstAddend % 16; + if (int32_t C = (ConstAddend - ImmAddend)) { +SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32); +VarAddend = VarAddend +? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal}) +: CVal; + } + + if (VarAddend) { +// Get the vector length that will be multiplied by vnum +auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, + DAG.getConstant(1, DL, MVT::i32)); + +// Multiply SVL and vnum then add it to the base +SDValue Mul = DAG.getNode( +ISD::MUL, DL, MVT::i64, +{SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)}); +Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul}); +// Just add vnum to the tileslice +TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend}); + } + + SmallVector Ops = { + /*Chain=*/N.getOperand(0), TileSlice, Base, + DAG.getTargetConstant(ImmAddend, DL, MVT::i32)}; + auto LdrStr = + DAG.getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR, DL, + MVT::Other, Ops); + return LdrStr; SamTebbs33 wrote: Thanks, I must have kept this from before the refactor. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
SamTebbs33 wrote: > Thanks for all the changes! LGTM with few little nits addressed. Thanks! https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From 83e20904c206980285c4ee9d0227706803147654 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 01/13] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 165 insertions(+), 106 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 09309a3937fb613..8444aea8c8ac4b6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9815,6 +9815,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9870,18 +9875,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj
[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/71707 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -292,7 +292,42 @@ define void @ldr_with_off_16mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 16, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0) + ret void; +} + +define void @ldr_with_off_var(ptr %base, i32 %off) { +; CHECK-LABEL: ldr_with_off_var: +; CHECK: // %bb.0: +; CHECK-NEXT:rdsvl x8, #1 +; CHECK-NEXT:// kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT:mov w12, #16 // =0x10 +; CHECK-NEXT:madd x8, x8, x1, x0 +; CHECK-NEXT:ldr za[w12, 0], [x8] +; CHECK-NEXT:ret + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off) + ret void; +} + +define void @ldr_with_off_15imm(ptr %base) { +; CHECK-LABEL: ldr_with_off_15imm: +; CHECK: // %bb.0: +; CHECK-NEXT:mov w12, #16 // =0x10 +; CHECK-NEXT:ldr za[w12, 15], [x0, #15, mul vl] +; CHECK-NEXT:ret + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15) + ret void; +} + +define void @ldr_with_off_16imm(ptr %base) { +; CHECK-LABEL: ldr_with_off_16imm: +; CHECK: // %bb.0: +; CHECK-NEXT:rdsvl x8, #1 +; CHECK-NEXT:mov w12, #16 // =0x10 +; CHECK-NEXT:madd x8, x8, x12, x0 +; CHECK-NEXT:ldr za[w12, 0], [x8] SamTebbs33 wrote: I think you're right. It looks like the same thing seems to be happening with `ldr_with_off_var` as well. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -1741,6 +1742,54 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector select parameter is an immediate in the range 0-15 then we + // can emit it directly into the instruction as it's a legal operand. + // Otherwise we must emit 0 as the vector select operand and modify the base + // register instead. + SDLoc DL(N); + + SDValue VecNum = N->getOperand(4), Base = N->getOperand(3), + TileSlice = N->getOperand(2); + int Imm = -1; + if (auto ImmNode = dyn_cast(VecNum)) +Imm = ImmNode->getZExtValue(); + + if (Imm >= 0 && Imm <= 15) { SamTebbs33 wrote: > Something still needs to happen with the immediate argument when it doesn't > fit entirely into the instruction's immediate operand. The current result is > not correct. I don't quite understand what that something is if immediate if it doesn't fit in the range. Currently the immediate is multiplied by svl and added to the base register if it doesn't fit. What else should be done if it doesn't fit in the range? > Maybe you can restructure the code in such a way that it first tries to fold > as much of the constant into the instruction's immediate operand, and then > adds the remainder to the tile-slice and to the pointer (multiplied by SVL), > as you do in the code below. Do you mean that if, for example, we have an immediate vecnum of 17, then we put 15 into the offset field of the instruction and add 2 x SVL to the base and slice registers? Naively I don't see that being an improvement as you're doing the multiplication in two places (ldr instruction and an extra madd) rather than one. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)
https://github.com/SamTebbs33 approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/70809 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -1741,6 +1742,54 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector select parameter is an immediate in the range 0-15 then we + // can emit it directly into the instruction as it's a legal operand. + // Otherwise we must emit 0 as the vector select operand and modify the base + // register instead. + SDLoc DL(N); + + SDValue VecNum = N->getOperand(4), Base = N->getOperand(3), + TileSlice = N->getOperand(2); + int Imm = -1; + if (auto ImmNode = dyn_cast(VecNum)) +Imm = ImmNode->getZExtValue(); + + if (Imm >= 0 && Imm <= 15) { SamTebbs33 wrote: We had an offline discussion and agreed that when the vnum is an immediate but is greater than a multiple of 15, it makes sense to modify the base and slice registers by that multiple and put the remainder in the immediate field of the instruction. That way when we unroll a loop with lots of ldrs offset by one from each other, they can all share the base and slice modifications but have increasing immediates. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 1/5] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK
[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
SamTebbs33 wrote: I've changed the approach to consider immediates outside of 0-15 and fixed the issue of the tile slice not being updated. Please let me know what you think. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/68565 >From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Fri, 6 Oct 2023 17:09:36 +0100 Subject: [PATCH 1/4] [AArch64][SME] Remove immediate argument restriction for svldr and svstr The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently require that the vnum argument be an immediate, since the instructions take an immediate vector number. However, we emit 0 as the immediate for the instruction no matter what, and instead modify the base register. This patch removes that restriction on the argument, so that the argument can be a non-immediate. If an appropriate immediate was passed to the builtin then CGBuiltin passes that directly to the LLVM intrinsic, otherwise it modifies the base register as is existing behaviour. --- clang/lib/CodeGen/CGBuiltin.cpp | 45 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 --- .../aarch64-sme-intrinsics/acle_sme_str.c | 51 - llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +-- .../CostModel/ARM/unaligned_double_load.ll| 59 +++ .../CodeGen/AArch64/sme-intrinsics-loads.ll | 33 +++-- 7 files changed, 166 insertions(+), 107 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b665..63508b40096141e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, return Store; } +Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) { + llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false); + return Builder.CreateAdd(Base, CastOffset, "tileslice"); +} + Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { - if (Ops.size() == 3) { -Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); -llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); - -llvm::Value *VecNum = Ops[2]; -llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl"); - -Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); -Ops[0] = Builder.CreateAdd( -Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice"); -Ops.erase(&Ops[2]); - } + if (Ops.size() == 2) { +// Intrinsics without a vecnum also use this function, so just provide 0 +Ops.push_back(Ops[1]); +Ops[1] = Builder.getInt32(0); + } else { +int Imm = -1; +if (ConstantInt* C = dyn_cast(Ops[2])) + if (C->getZExtValue() <= 15) + Imm = C->getZExtValue(); + +if (Imm != -1) { + Ops[2] = Ops[1]; + Ops[1] = Builder.getInt32(Imm); +} else { + Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb"); + + llvm::Value *VecNum = Ops[2]; + llvm::Value *MulVL = Builder.CreateMul( + CntsbCall, + VecNum, + "mulvl"); + + Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL); + Ops[1] = Builder.getInt32(0); + Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, false); +} + } Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c index e85c47072f2df80..8e07cf1d11c19b2 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c @@ -6,57 +6,46 @@ #include -// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za( -// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-C-NEXT: entry: -// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-C-NEXT:ret void -// -// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv( -// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-CXX-NEXT: entry: -// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], ptr [[PTR]]) -// CHECK-CXX-NEXT:ret void +// CHECK-C-LABEL: @test_svldr_vnum_za( +// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK
[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -1741,6 +1742,69 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) { + // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA. + // If the vector number is an immediate between 0 and 15 inclusive then we can + // put that directly into the immediate field of the instruction. If it's + // outside of that range then we modify the base and slice by the greatest + // multiple of 15 smaller than that number and put the remainder in the + // instruction field. If it's not an immediate then we modify the base and + // slice registers by that number and put 0 in the instruction. + SDLoc DL(N); + + SDValue TileSlice = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue VecNum = N->getOperand(4); + SDValue Remainder = CurDAG->getTargetConstant(0, DL, MVT::i32); + + // true if the base and slice registers need to me modified + bool NeedsAdd = true; + if (auto ImmNode = dyn_cast(VecNum)) { +int Imm = ImmNode->getSExtValue(); +if (Imm >= 0 && Imm <= 15) { + Remainder = CurDAG->getTargetConstant(Imm, DL, MVT::i32); + NeedsAdd = false; +} else { + Remainder = CurDAG->getTargetConstant(Imm % 15, DL, MVT::i32); SamTebbs33 wrote: Yep, the range of the immediate is 0-15. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)
@@ -292,23 +292,101 @@ define void @ldr_with_off_16mulvl(ptr %ptr) { %vscale = call i64 @llvm.vscale.i64() %mulvl = mul i64 %vscale, 256 %base = getelementptr i8, ptr %ptr, i64 %mulvl - call void @llvm.aarch64.sme.ldr(i32 16, ptr %base) + call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0) ret void; } +define void @ldr_with_off_var(ptr %base, i32 %off) { +; CHECK-LABEL: ldr_with_off_var: +; CHECK: // %bb.0: +; CHECK-NEXT:// kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT:sxtw x8, w2 SamTebbs33 wrote: I did forget to stage those changes. Thanks for noticing. https://github.com/llvm/llvm-project/pull/68565 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME2] Add multi-vector unpack builtins (PR #75075)
https://github.com/SamTebbs33 approved this pull request. Looks good to me. https://github.com/llvm/llvm-project/pull/75075 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -1694,6 +1697,61 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { OS << "#endif\n\n"; } +void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) +createIntrinsic(R, Defs); + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { +return A->getMangledName() < B->getMangledName(); + }); + + switch (Kind) { SamTebbs33 wrote: That looks nice and clean to me. Thanks for the suggestion. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
@@ -3168,9 +3167,60 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, << TheCall->getSourceRange() << "streaming compatible"; return; } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +ArmStreamingType BuiltinType; + +switch (BuiltinID) { +default: + BuiltinType = ArmNonStreaming; + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS +#define GET_SVE_STREAMING_ATTRS +#include "clang/Basic/arm_sve_streaming_attrs.inc" SamTebbs33 wrote: Good spot, thanks. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Revert "[AArch64][SME] Warn when using a streaming builtin from a non-streaming function" (PR #75449)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/75449 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
SamTebbs33 wrote: Thanks for reporting that Nico. I've reverted the patch and will work on improving compile time. I like your idea Sander. https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
SamTebbs33 wrote: Commit [6dc8fa2](https://github.com/llvm/llvm-project/pull/75487/commits/6dc8fa2c89159d234d9477358dd1ce1cbf059865) is the only one with new content. [d9a8da1](https://github.com/llvm/llvm-project/pull/75487/commits/d9a8da139e93f0b3259cb22d7f4c55ed46f9f265) is the exact same as what was reverted. https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
SamTebbs33 wrote: New PR at https://github.com/llvm/llvm-project/pull/75487 https://github.com/llvm/llvm-project/pull/74064 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
SamTebbs33 wrote: > If it's not too much trouble, could you `git cherry-pick > c60663d128f8e0dccd418bdf16ecc403b96aa74a` into this? (Cool if not, ofc.) Sure, I can do that. Would you also mind attempting to reproduce the compile time with the latest commit, just to make sure it fixes the issue on your end? I saw a 36 -> 31 second (compared to 30 on main) decrease which seems OK but isn't on the same scale as the numbers you're seeing. https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
@@ -1702,6 +1705,62 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) { OS << "#endif\n\n"; } +void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) +createIntrinsic(R, Defs); + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { +return A->getMangledName() < B->getMangledName(); + }); + + StringRef ExtensionKind; + switch (Kind) { + case ACLEKind::SME: +ExtensionKind = "SME"; +break; + case ACLEKind::SVE: +ExtensionKind = "SVE"; +break; + } + + OS << "#ifdef GET_" << ExtensionKind << "_STREAMING_ATTRS\n"; + + // Ensure these are only emitted once. + std::set Emitted; + llvm::StringMap> StreamingMap; SamTebbs33 wrote: Thanks for the ideas. I ended up removing the sorting since grouping the builtins within the map makes the sorting across all builtins redundant anyway. https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
SamTebbs33 wrote: @nico I tried to cherry-pick your commit (c60663d128f8e0dccd418bdf16ecc403b96aa74a) into my branch but for some reason it was always empty. It might be best for you to try recommitting it now. https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/75487 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/71927 >From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH 1/5] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98434c5c53e28c..38437e98858b23 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in { def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1..519bf019ff35ba 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in { def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme2" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, IsStreaming], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, IsStreaming], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreaming], [ImmCheck<3, ImmCheck0_7>]>; SamTebbs33 wrote: Thanks Sander. I've made these builtins use `IsStreamingCompatible`, which will be changed to `IsStreamingOrSVE2p1` once that is added. https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
SamTebbs33 wrote: Rebased to fix a merge conflict and made the builtins temporarily `IsStreamingCompatible`. https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)
https://github.com/SamTebbs33 approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/75584 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/71927 >From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH 1/6] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98434c5c53e28c..38437e98858b23 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in { def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1..519bf019ff35ba 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail
[clang] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)
SamTebbs33 wrote: Should these builtins be `IsStreamingCompatible` until we add `IsStreamingOrSVE2p1`? https://github.com/llvm/llvm-project/pull/75821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Warn when a function doesn't have ZA state (PR #75805)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/75805 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/71927 >From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 9 Nov 2023 17:39:26 + Subject: [PATCH 1/7] [AArch64][SME2] Enable bfm builtings for sme2 This patch enables the following builtins for SME2 svbfmlslb_f32 svbfmlslb_lane_f32 svbfmlslt_f32 svbfmlslt_lane_f32 Patch by: Kerry McLaughlin --- clang/include/clang/Basic/arm_sve.td | 15 +-- .../acle_sve2p1_bfmlsl.c | 5 + 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98434c5c53e28c..38437e98858b23 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", "f", MergeNone, "aarch64_ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; - -def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; -def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; - -def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; -def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { @@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in { def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, "aarch64_sve_sunpk_x4", [IsStreaming], []>; def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>; } + +let TargetGuard = "sve2p1|sme" in { +// == BFloat16 multiply-subtract == + def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; + def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + + def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c index 7e1763a63ec4e1..519bf019ff35ba 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -2,10 +2,15 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail
[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/71927 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -679,6 +679,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("f32mm", FPU & SveMode && HasMatmulFP32) .Case("f64mm", FPU & SveMode && HasMatmulFP64) .Case("sve2", FPU & SveMode && HasSVE2) + .Case("sve2p1", HasSVE2p1) SamTebbs33 wrote: Yep, you're right. I've removed these changes now. https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -8,7 +8,7 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -target-feature -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s SamTebbs33 wrote: Removed, thanks! https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -11,10 +11,16 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \ // RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \ -// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming SamTebbs33 wrote: arm_sve.td has SVFCLAMP as `IsStreamingOrSVE2p1` so I think it has to be `__arm_streaming``, or is the builtin def incorrect? https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -1,14 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s #include +#ifndef TEST_SME2 +#define ATTR +#else +#define ATTR __arm_streaming SamTebbs33 wrote: It's actually marked as streaming or sve2p1 in arm_sve.td. Is that incorrect? https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)
@@ -10,6 +10,10 @@ // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \ // RUN: -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s SamTebbs33 wrote: Yep ypu're right, thanks! https://github.com/llvm/llvm-project/pull/75958 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)
https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/73672 This patch introduces a warning that is emitted when a Neon builtin is called from a streaming function, as that situation is not supported. Uses work by Kerry McLaughlin. >From 96464a9c37a532216e4df6c003aa1a8fcb448637 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Tue, 28 Nov 2023 16:22:32 + Subject: [PATCH] [AArch64] Warn when calling a NEON builtin in a streaming function This patch introduces a warning that is emitted when a Neon builtin is called from a streaming function, as that situation is not supported. --- .../clang/Basic/DiagnosticSemaKinds.td| 3 + clang/lib/Sema/SemaChecking.cpp | 81 +++ .../Sema/aarch64-incompat-sm-builtin-calls.c | 24 ++ 3 files changed, 108 insertions(+) create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9a7dafa4a298273..e2b7a695322c14b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error< def err_attribute_arm_feature_sve_bits_unsupported : Error< "%0 is only supported when '-msve-vector-bits=' is specified with a " "value of 128, 256, 512, 1024 or 2048.">; +def warn_attribute_arm_sm_incompat_builtin : Warning< + "builtin call has undefined behaviour when called from a %0 function">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ae588db02bbe722..07937047a2843b0 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmLocallyStreaming, + ArmStreamingOrSVE2p1 +}; + +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmLocallyStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + assert(BuiltinType != ArmLocallyStreaming && + "Unexpected locally_streaming attribute for builtin!"); + + ArmStreamingType FnType = getArmStreamingFnType(FD); + if (BuiltinType == ArmStreamingOrSVE2p1) { +// Check intrinsics that are available in [sve2p1 or sme/sme2]. +llvm::StringMap CallerFeatureMap; +S.Context.getFunctionFeatureMap(CallerFeatureMap, FD); +if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap)) + BuiltinType = ArmStreamingCompatible; +else + BuiltinType = ArmStreaming; + } + + if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) && + BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming or locally streaming"; + } + + if ((FnType == ArmStreamingCompatible) && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { // Range check SVE intrinsics that take immediate values. SmallVector, 3> ImmChecks; @@ -3136,6 +3192,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +bool IsNeon = false; +switch (BuiltinID) { +default: + break; +#define GET_NEON_BUILTINS +#define TARGET_BUILTIN(id, x, y, z) \ + case NEON::BI##id: \ +IsNeon = true
[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/73672 >From 96464a9c37a532216e4df6c003aa1a8fcb448637 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Tue, 28 Nov 2023 16:22:32 + Subject: [PATCH 1/2] [AArch64] Warn when calling a NEON builtin in a streaming function This patch introduces a warning that is emitted when a Neon builtin is called from a streaming function, as that situation is not supported. --- .../clang/Basic/DiagnosticSemaKinds.td| 3 + clang/lib/Sema/SemaChecking.cpp | 81 +++ .../Sema/aarch64-incompat-sm-builtin-calls.c | 24 ++ 3 files changed, 108 insertions(+) create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9a7dafa4a298273..e2b7a695322c14b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error< def err_attribute_arm_feature_sve_bits_unsupported : Error< "%0 is only supported when '-msve-vector-bits=' is specified with a " "value of 128, 256, 512, 1024 or 2048.">; +def warn_attribute_arm_sm_incompat_builtin : Warning< + "builtin call has undefined behaviour when called from a %0 function">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ae588db02bbe722..07937047a2843b0 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmLocallyStreaming, + ArmStreamingOrSVE2p1 +}; + +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmLocallyStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + assert(BuiltinType != ArmLocallyStreaming && + "Unexpected locally_streaming attribute for builtin!"); + + ArmStreamingType FnType = getArmStreamingFnType(FD); + if (BuiltinType == ArmStreamingOrSVE2p1) { +// Check intrinsics that are available in [sve2p1 or sme/sme2]. +llvm::StringMap CallerFeatureMap; +S.Context.getFunctionFeatureMap(CallerFeatureMap, FD); +if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap)) + BuiltinType = ArmStreamingCompatible; +else + BuiltinType = ArmStreaming; + } + + if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) && + BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming or locally streaming"; + } + + if ((FnType == ArmStreamingCompatible) && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { // Range check SVE intrinsics that take immediate values. SmallVector, 3> ImmChecks; @@ -3136,6 +3192,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +bool IsNeon = false; +switch (BuiltinID) { +default: + break; +#define GET_NEON_BUILTINS +#define TARGET_BUILTIN(id, x, y, z) \ + case NEON::BI##id: \ +IsNeon = true; \ +break; +#define BUILTIN(id, x, y) TARGET_BUILTIN(id, x, y, ""); +#include "clang/Basic/arm_neon.inc"
[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/73672 >From ba3d2c36ee3268b24864466d429a30fec92a69e3 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Tue, 28 Nov 2023 16:22:32 + Subject: [PATCH 1/4] [AArch64] Warn when calling a NEON builtin in a streaming function This patch introduces a warning that is emitted when a Neon builtin is called from a streaming function, as that situation is not supported. --- .../clang/Basic/DiagnosticSemaKinds.td| 3 + clang/lib/Sema/SemaChecking.cpp | 81 +++ .../Sema/aarch64-incompat-sm-builtin-calls.c | 24 ++ 3 files changed, 108 insertions(+) create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ed9bd929c6c4816..6dfb2d7195203a3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error< def err_attribute_arm_feature_sve_bits_unsupported : Error< "%0 is only supported when '-msve-vector-bits=' is specified with a " "value of 128, 256, 512, 1024 or 2048.">; +def warn_attribute_arm_sm_incompat_builtin : Warning< + "builtin call has undefined behaviour when called from a %0 function">, + InGroup>; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; def err_attribute_riscv_rvv_bits_unsupported : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 9dfff132cd88db3..b0da86a5b227def 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmLocallyStreaming, + ArmStreamingOrSVE2p1 +}; + +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmLocallyStreaming; + if (const auto *T = FD->getType()->getAs()) { +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; +if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + const FunctionDecl *FD, + ArmStreamingType BuiltinType) { + assert(BuiltinType != ArmLocallyStreaming && + "Unexpected locally_streaming attribute for builtin!"); + + ArmStreamingType FnType = getArmStreamingFnType(FD); + if (BuiltinType == ArmStreamingOrSVE2p1) { +// Check intrinsics that are available in [sve2p1 or sme/sme2]. +llvm::StringMap CallerFeatureMap; +S.Context.getFunctionFeatureMap(CallerFeatureMap, FD); +if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap)) + BuiltinType = ArmStreamingCompatible; +else + BuiltinType = ArmStreaming; + } + + if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) && + BuiltinType == ArmNonStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming or locally streaming"; + } + + if ((FnType == ArmStreamingCompatible) && + BuiltinType != ArmStreamingCompatible) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "streaming compatible"; +return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) { +S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) +<< TheCall->getSourceRange() << "non-streaming"; + } +} + bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { // Range check SVE intrinsics that take immediate values. SmallVector, 3> ImmChecks; @@ -3148,6 +3204,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { +std::optional BuiltinType; + +bool IsNeon = false; +switch (BuiltinID) { +default: + break; +#define GET_NEON_BUILTINS +#define TARGET_BUILTIN(id, x, y, z) \ + case NEON::BI##id: \ +IsNeon = true; \ +break; +#define BUILTIN(id, x, y) TARGET_BUILTIN(id, x, y, ""); +#include "clang/Basic/arm_neon.inc"
[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)
@@ -2993,6 +2993,47 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, llvm_unreachable("Invalid NeonTypeFlag!"); } +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmLocallyStreaming +}; + +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) +return ArmLocallyStreaming; SamTebbs33 wrote: Thanks Sander, that makes sense to me. https://github.com/llvm/llvm-project/pull/73672 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)
https://github.com/SamTebbs33 closed https://github.com/llvm/llvm-project/pull/73672 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits