[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-06 Thread Sam Tebbs via cfe-commits


@@ -4825,6 +4827,72 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG 
&DAG, SDValue Chain,
  Mask);
 }
 
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector number is an immediate between 0 and 15 inclusive then we 
can
+  // put that directly into the immediate field of the instruction. If it's
+  // outside of that range then we modify the base and slice by the greatest
+  // multiple of 15 smaller than that number and put the remainder in the
+  // instruction field. If it's not an immediate then we modify the base and
+  // slice registers by that number and put 0 in the instruction.
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  SDValue Remainder = DAG.getTargetConstant(0, DL, MVT::i32);
+
+  // true if the base and slice registers need to me modified
+  bool NeedsAdd = true;
+  if (auto ImmNode = dyn_cast(VecNum)) {
+int Imm = ImmNode->getSExtValue();
+if (Imm >= 0 && Imm <= 15) {
+  Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32);
+  NeedsAdd = false;
+} else {
+  Remainder = DAG.getTargetConstant(Imm % 15, DL, MVT::i32);
+  NeedsAdd = true;
+  VecNum = DAG.getConstant(Imm - (Imm % 15), DL, MVT::i32);
+}
+  } else if (VecNum.getOpcode() == ISD::ADD) {
+// If the vnum is an add, we can fold that add into the instruction if the
+// operand is an immediate in range
+if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) {
+  int Imm = ImmNode->getSExtValue();
+  if (Imm >= 0 && Imm <= 15) {
+VecNum = VecNum.getOperand(0);
+Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32);
+NeedsAdd = true;
+  }
+}
+  }
+  if (NeedsAdd) {
+// Get the vector length that will be multiplied by vnum
+auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+   DAG.getConstant(1, DL, MVT::i32));
+
+// Multiply SVL and vnum then add it to the base
+// Just add vnum to the tileslice
+SDValue BaseMulOps[] = {
+SVL, VecNum.getValueType() == MVT::i32
+ ? DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VecNum)

SamTebbs33 wrote:

We do indeed, I'll remove the condition.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-06 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/6] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK

[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-06 Thread Sam Tebbs via cfe-commits


@@ -1741,6 +1742,69 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, 
unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector number is an immediate between 0 and 15 inclusive then we 
can
+  // put that directly into the immediate field of the instruction. If it's
+  // outside of that range then we modify the base and slice by the greatest
+  // multiple of 15 smaller than that number and put the remainder in the
+  // instruction field. If it's not an immediate then we modify the base and
+  // slice registers by that number and put 0 in the instruction.
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  SDValue Remainder = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+  // true if the base and slice registers need to me modified
+  bool NeedsAdd = true;
+  if (auto ImmNode = dyn_cast(VecNum)) {
+int Imm = ImmNode->getSExtValue();
+if (Imm >= 0 && Imm <= 15) {
+  Remainder = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
+  NeedsAdd = false;
+} else {
+  Remainder = CurDAG->getTargetConstant(Imm % 15, DL, MVT::i32);

SamTebbs33 wrote:

You are right, thank you. I've updated it to modulo 16 instead and added some 
tests for higher immediates.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-07 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/7] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK

[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-07 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/9] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK

[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-07 Thread Sam Tebbs via cfe-commits


@@ -4825,6 +4827,72 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG 
&DAG, SDValue Chain,
  Mask);
 }
 
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector number is an immediate between 0 and 15 inclusive then we 
can
+  // put that directly into the immediate field of the instruction. If it's
+  // outside of that range then we modify the base and slice by the greatest
+  // multiple of 15 smaller than that number and put the remainder in the
+  // instruction field. If it's not an immediate then we modify the base and
+  // slice registers by that number and put 0 in the instruction.
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  SDValue Remainder = DAG.getTargetConstant(0, DL, MVT::i32);
+
+  // true if the base and slice registers need to me modified
+  bool NeedsAdd = true;
+  if (auto ImmNode = dyn_cast(VecNum)) {
+int Imm = ImmNode->getSExtValue();
+if (Imm >= 0 && Imm <= 15) {
+  Remainder = DAG.getTargetConstant(Imm, DL, MVT::i32);
+  NeedsAdd = false;
+} else {
+  Remainder = DAG.getTargetConstant(Imm % 15, DL, MVT::i32);
+  NeedsAdd = true;
+  VecNum = DAG.getConstant(Imm - (Imm % 15), DL, MVT::i32);
+}
+  } else if (VecNum.getOpcode() == ISD::ADD) {
+// If the vnum is an add, we can fold that add into the instruction if the
+// operand is an immediate in range
+if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) {
+  int Imm = ImmNode->getSExtValue();
+  if (Imm >= 0 && Imm <= 15) {

SamTebbs33 wrote:

My latest two commits do this, which improves the codegen as well since we 
properly fold successive loads/stores with adds of large immediates. See the 
test [here](https://media.tenor.com/vYhPH4gA184C/vroom-car.gif). Thanks for 
the idea!

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-08 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 01/10] [AArch64][SME] Remove immediate argument restriction
 for svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj

[clang] [Clang][SME2] Add outer product and accumulate/subtract builtins (PR #71176)

2023-11-08 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.


https://github.com/llvm/llvm-project/pull/71176
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)

2023-11-08 Thread Sam Tebbs via cfe-commits


@@ -296,5 +296,28 @@ multiclass ZAAddSub {
   }
 }
 
+
+// SME2 - MIN, MAX
+
+multiclass MinMaxIntr {
+  def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+}
+
+let TargetGuard = "sme2" in {
+  // SMAX / UMAX / FMAX
+  defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">;
+  defm MAX_MULTI_X2  : MinMaxIntr<"max", "","x2", "222">;
+  defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">;
+  defm MAX_MULTI_X4  : MinMaxIntr<"max", "","x4", "444">;
+
+  // SMIN / UMIN / FMIN
+  defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">;
+  defm MIN_MULTI_X2  : MinMaxIntr<"min", "","x2", "222">;
+  defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">;
+  defm MIN_MULTI_X4  : MinMaxIntr<"min", "","x4", "444">;

SamTebbs33 wrote:

Done, thanks Kerry.

https://github.com/llvm/llvm-project/pull/71688
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)

2023-11-08 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 created 
https://github.com/llvm/llvm-project/pull/71707

Adds the following SME2 builtins:

svminnm_single_f(16|32|64)_x(2|4)
svminnm_f(16|32|64)_x(2|4)
svmaxnm_single_f(16|32|64)_x(2|4)
svmaxnm_f(16|32|64)_x(2|4)

See
[ARM-software/acle#217](https://github.com/ARM-software/acle/pull/217)

>From 1ca052c831f0ec5a56320b78fb7b9755d9ff93da Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Wed, 8 Nov 2023 17:19:10 +
Subject: [PATCH] [Clang][SME2] Add single and multi min/max by vector builtins

Adds the following SME2 builtins:

svminnm_single_f(16|32|64)_x(2|4)
svminnm_f(16|32|64)_x(2|4)
svmaxnm_single_f(16|32|64)_x(2|4)
svmaxnm_f(16|32|64)_x(2|4)

See
[ARM-software/acle#217](https://github.com/ARM-software/acle/pull/217)
---
 clang/include/clang/Basic/arm_sve.td  |  12 +
 .../aarch64-sme2-intrinsics/acle_sme2_maxnm.c | 444 ++
 .../aarch64-sme2-intrinsics/acle_sme2_minnm.c | 444 ++
 3 files changed, 900 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c
 create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..788025f893efe10 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1987,8 +1987,20 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", 
"aarch64_sve_revd">;
 // SME intrinsics which operate only on vectors and do not require ZA should 
be added here,
 // as they could possibly become SVE instructions in the future.
 
+multiclass SInstMinMaxByVector {
+  def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
+  def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
+
+  def NAME # _X2 : SInst<"sv" # name # "nm[_{d}_x2]", "222", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x2", [IsStreaming], []>;
+  def NAME # _X4 : SInst<"sv" # name # "nm[_{d}_x4]", "444", "hfd", MergeNone, 
"aarch64_sve_f" # name # "nm_x4", [IsStreaming], []>;
+}
+
 let TargetGuard = "sme2" in {
 // == ADD (vectors) ==
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
+
+// == FMINNM / FMAXNM ==
+  defm SVMINNM : SInstMinMaxByVector<"min">;
+  defm SVMAXNM : SInstMinMaxByVector<"max">;
 }
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c
new file mode 100644
index 000..bb8299ba3e23496
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c
@@ -0,0 +1,444 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve2 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 
-target-feature +sve2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5
+#endif
+
+// Single, x2
+
+// CHECK-LABEL: @test_svmaxnm_single_f16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN]], i64 8)
+// CHECK-NEXT:[[TMP2:%.*]] = tail call { ,  } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( 
[[TMP0]],  [[TMP1]],  [[ZM:%.*]])
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { ,  } [[TMP2]]

[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)

2023-11-10 Thread Sam Tebbs via cfe-commits




SamTebbs33 wrote:

Considering the definitions are commented out, are these changes necessary?

https://github.com/llvm/llvm-project/pull/71795
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtings for sme2 (PR #71927)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 created 
https://github.com/llvm/llvm-project/pull/71927

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 

>From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..1094c424e68abd5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1d..519bf019ff35ba5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-

[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/71688
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/71707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64][SME2] Add ldr_zt, str_zt builtins and intrinsics (PR #71795)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

Nice one, looks good to me.

https://github.com/llvm/llvm-project/pull/71795
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)

2023-11-10 Thread Sam Tebbs via cfe-commits


@@ -1987,8 +1987,26 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", 
"aarch64_sve_revd">;
 // SME intrinsics which operate only on vectors and do not require ZA should 
be added here,
 // as they could possibly become SVE instructions in the future.
 
+multiclass MinMaxIntr {
+  def SVS # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "csil", 
MergeNone, "aarch64_sve_s" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVU # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, 
"UcUsUiUl", MergeNone, "aarch64_sve_u" # i # zm # "_" # mul, [IsStreaming], []>;
+  def SVF # NAME : SInst<"sv" # i # "[" # zm # "_{d}_" # mul # "]", t, "hfd",  
MergeNone, "aarch64_sve_f" # i # zm # "_" # mul, [IsStreaming], []>;
+}
+
 let TargetGuard = "sme2" in {
 // == ADD (vectors) ==
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
+
+// == SMAX / UMAX / FMAX ==
+  defm MAX_SINGLE_X2 : MinMaxIntr<"max", "_single", "x2", "22d">;
+  defm MAX_MULTI_X2  : MinMaxIntr<"max", "","x2", "222">;
+  defm MAX_SINGLE_X4 : MinMaxIntr<"max", "_single", "x4", "44d">;
+  defm MAX_MULTI_X4  : MinMaxIntr<"max", "","x4", "444">;
+
+// == SMIN / UMIN / FMIN ==
+  defm MIN_SINGLE_X2 : MinMaxIntr<"min", "_single", "x2", "22d">;
+  defm MIN_MULTI_X2  : MinMaxIntr<"min", "","x2", "222">;
+  defm MIN_SINGLE_X4 : MinMaxIntr<"min", "_single", "x4", "44d">;
+  defm MIN_MULTI_X4  : MinMaxIntr<"min", "","x4", "444">;

SamTebbs33 wrote:

Thank you, I agree. I've done that now and will go ahead and merge.

https://github.com/llvm/llvm-project/pull/71688
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/71688
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add single and multi min and max builtins (PR #71688)

2023-11-10 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

> FYI I pushed 
> [3a9cc17](https://github.com/llvm/llvm-project/commit/3a9cc17ca088267348e4b4a6e64a88a38ae9c6e4)
>  to hopefully unbreak the build.

Thank you!

https://github.com/llvm/llvm-project/pull/71688
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/71927

>From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH 1/2] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..1094c424e68abd5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1d..519bf019ff35ba5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %

[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-10 Thread Sam Tebbs via cfe-commits


@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {

SamTebbs33 wrote:

Well spotted.

https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Add SME2 builtins for pfalse and ptrue (PR #71953)

2023-11-10 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.


https://github.com/llvm/llvm-project/pull/71953
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

Thanks Momchil. This looks good to me with a couple of questions. The tests are 
comprehensive as well.

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-14 Thread Sam Tebbs via cfe-commits


@@ -861,6 +861,12 @@ def AArch64stilp : SDNode<"AArch64ISD::STILP", 
SDT_AArch64stilp, [SDNPHasChain,
 def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, 
SDNPMayStore, SDNPMemOperand]>;
 
 def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
+
+def AArch64probedalloca
+: SDNode<"AArch64ISD::PROBED_ALLOCA",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain]>;

SamTebbs33 wrote:

Does this need a `SDNPMayStore` attribute?

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Stack probing for dynamic allocas in SelectionDAG (PR #66525)

2023-11-14 Thread Sam Tebbs via cfe-commits


@@ -9461,6 +9462,94 @@ bool AArch64InstrInfo::isReallyTriviallyReMaterializable(
   return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
 }
 
+MachineBasicBlock::iterator
+AArch64InstrInfo::probedStackAlloc(MachineBasicBlock::iterator MBBI,

SamTebbs33 wrote:

I think it would be useful for this function to have some comments that explain 
what it emits, so it can be understood isolated from the rest of this patch.

https://github.com/llvm/llvm-project/pull/66525
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-14 Thread Sam Tebbs via cfe-commits


@@ -4825,6 +4827,113 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG 
&DAG, SDValue Chain,
  Mask);
 }
 
+// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+// Case 1: If the vector number (vecnum) is an immediate in range, it gets
+// folded into the instruction
+//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
+// Case 2: If the vecnum is not an immediate, then it is used to modify the 
base
+// and tile slice registers
+//ldr(%tileslice, %ptr, %vecnum)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * %vecnum
+//%tileslice2 = %tileslice + %vecnum
+//ldr [%tileslice2, 0], [%ptr2, 0]
+// Case 3: If the vecnum is an immediate out of range, then the same is done as
+// case 2, but the base and slice registers are modified by the greatest
+// multiple of 15 lower than the vecnum and the remainder is folded into the
+// instruction. This means that successive loads and stores that are offset 
from
+// each other can share the same base and slice register updates.
+//ldr(%tileslice, %ptr, 22)
+//ldr(%tileslice, %ptr, 23)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * 15
+//%tileslice2 = %tileslice + 15
+//ldr [%tileslice2, 7], [%ptr2, 7]
+//ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 4: If the vecnum is an add of an immediate, then the non-immediate
+// operand and the immediate can be folded into the instruction, like case 2.
+//ldr(%tileslice, %ptr, %vecnum + 7)
+//ldr(%tileslice, %ptr, %vecnum + 8)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * %vecnum
+//%tileslice2 = %tileslice + %vecnum
+//ldr [%tileslice2, 7], [%ptr2, 7]
+//ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 5: The vecnum being an add of an immediate out of range is also 
handled,
+// in which case the same remainder logic as case 3 is used.
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  int Addend = 0;
+
+  // If the vnum is an add, we can fold that add into the instruction if the
+  // operand is an immediate. The range check is performed below.
+  if (VecNum.getOpcode() == ISD::ADD) {
+if (auto ImmNode = dyn_cast(VecNum.getOperand(1))) {
+  Addend = ImmNode->getSExtValue();
+  VecNum = VecNum.getOperand(0);
+}
+  }
+
+  SDValue Remainder = DAG.getTargetConstant(Addend, DL, MVT::i32);
+
+  // true if the base and slice registers need to be modified
+  bool NeedsAdd = true;
+  auto ImmNode = dyn_cast(VecNum);
+  if (ImmNode || Addend != 0) {
+int Imm = ImmNode ? ImmNode->getSExtValue() + Addend : Addend;
+Remainder = DAG.getTargetConstant(Imm % 16, DL, MVT::i32);
+if (Imm >= 0 && Imm <= 15) {
+  // If vnum is an immediate in range then we don't need to modify the tile
+  // slice and base register. We could also get here because Addend != 0 
but
+  // vecnum is not an immediate, in which case we still want the base and
+  // slice register to be modified
+  NeedsAdd = !ImmNode;

SamTebbs33 wrote:

I actually didn't realise that `SDValue()` is a falsey value. That certainly 
does eliminate the need for the `NeedsAdd` boolean. Thank you!

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SVE2] Add builtins for moving multi-vectors to/from ZA (PR #71191)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/71191
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [clang][AArch64] Pass down stack clash protection options to LLVM/Backend (PR #68993)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

Looks good to me :+1: 

https://github.com/llvm/llvm-project/pull/68993
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-14 Thread Sam Tebbs via cfe-commits


@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme2" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;

SamTebbs33 wrote:

That makes sense :+1: 

https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-14 Thread Sam Tebbs via cfe-commits


@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s

SamTebbs33 wrote:

I'll add the `__arm_streaming_compatible` attribute to the functions, thanks 
for spotting that.

https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 01/11] [AArch64][SME] Remove immediate argument restriction
 for svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj

[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-05 Thread Sam Tebbs via cfe-commits


@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t 
slice_base, svbool_t p
 // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:ret void
 //
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, 
svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, 
int64_t vnum) __arm_streaming {

SamTebbs33 wrote:

Sure thing.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-05 Thread Sam Tebbs via cfe-commits


@@ -6,20 +6,20 @@
 #include 
 
 __attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming {
   svld1_hor_za8(0, 0, pg, ptr);
 }
 
 __attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
   svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
 }
 
 __attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
   svst1_ver_za16(0, 0, pg, ptr);
 }
 
-void undefined(svbool_t pg, void *ptr) {
-  svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error 
{{'svst1_ver_vnum_za64' needs target feature sme}}
+void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error 
{{function executed in streaming-SVE mode requires 'sme'}}

SamTebbs33 wrote:

With the streaming attribute added, the error is thrown for the function 
signature line, so it has to be on this line.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-05 Thread Sam Tebbs via cfe-commits


@@ -2995,6 +2995,134 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
 
 enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible 
};
 
+bool Sema::ParseSVEImmChecks(
+CallExpr *TheCall, SmallVector, 3> &ImmChecks) {
+  // Perform all the immediate checks for this builtin call.
+  bool HasError = false;
+  for (auto &I : ImmChecks) {
+int ArgNum, CheckTy, ElementSizeInBits;
+std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
+
+typedef bool (*OptionSetCheckFnTy)(int64_t Value);
+
+// Function that checks whether the operand (ArgNum) is an immediate
+// that is one of the predefined values.
+auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+   int ErrDiag) -> bool {
+  // We can't check the value of a dependent argument.
+  Expr *Arg = TheCall->getArg(ArgNum);
+  if (Arg->isTypeDependent() || Arg->isValueDependent())
+return false;
+
+  // Check constant-ness first.
+  llvm::APSInt Imm;
+  if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
+return true;
+
+  if (!CheckImm(Imm.getSExtValue()))
+return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+  return false;
+};
+
+switch ((SVETypeFlags::ImmCheckType)CheckTy) {
+case SVETypeFlags::ImmCheck0_31:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_13:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck1_16:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_7:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckExtract:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+  (2048 / ElementSizeInBits) - 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckShiftRight:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckShiftRightNarrow:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1,
+  ElementSizeInBits / 2))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckShiftLeft:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+  ElementSizeInBits - 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckLaneIndex:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+  (128 / (1 * ElementSizeInBits)) - 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+  (128 / (2 * ElementSizeInBits)) - 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckLaneIndexDot:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+  (128 / (4 * ElementSizeInBits)) - 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckComplexRot90_270:
+  if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+  diag::err_rotation_argument_to_cadd))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheckComplexRotAll90:
+  if (CheckImmediateInSet(
+  [](int64_t V) {
+return V == 0 || V == 90 || V == 180 || V == 270;
+  },
+  diag::err_rotation_argument_to_cmla))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_1:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_2:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_3:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_0:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_15:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck0_255:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
+HasError = true;
+  break;
+case SVETypeFlags::ImmCheck2_4_Mul2:
+  if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
+  SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
+HasError = true;
+  break;

SamTebbs33 wrote:

I 

[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-05 Thread Sam Tebbs via cfe-commits


@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t 
slice_base, svbool_t p
 // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
 // CHECK-CXX-NEXT:ret void
 //
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, 
svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, 
int64_t vnum) __arm_streaming {

SamTebbs33 wrote:

Done

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-05 Thread Sam Tebbs via cfe-commits


@@ -3023,6 +3151,66 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr 
*TheCall,
 << TheCall->getSourceRange() << "streaming compatible";
 return;
   }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return true;
+  if (const auto *T = FD->getType()->getAs())
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+  return true;
+  return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+  }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
+std::optional BuiltinType;
+
+switch (BuiltinID) {
+default:
+  break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"

SamTebbs33 wrote:

Kept the function separate as per our offline discussion.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-06 Thread Sam Tebbs via cfe-commits


@@ -3183,6 +3140,114 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
   return HasError;
 }
 
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmStreaming;
+  if (const auto *T = FD->getType()->getAs()) {
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+  return ArmStreaming;
+if (T->getAArch64SMEAttributes() & 
FunctionType::SME_PStateSMCompatibleMask)
+  return ArmStreamingCompatible;
+  }
+  return ArmNonStreaming;
+}
+
+static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
+ const FunctionDecl *FD,
+ ArmStreamingType BuiltinType) {
+  ArmStreamingType FnType = getArmStreamingFnType(FD);
+
+  if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming";
+  }
+
+  if (FnType == ArmStreamingCompatible &&
+  BuiltinType != ArmStreamingCompatible) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming compatible";
+return;
+  }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return true;
+  if (const auto *T = FD->getType()->getAs())
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+  return true;
+  return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+  }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;
+
+switch (BuiltinID) {
+default:
+  break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+}
+
+if (BuiltinType)
+  checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
+
+if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
+  Diag(TheCall->getBeginLoc(),
+   diag::warn_attribute_arm_za_builtin_no_za_state)
+  << TheCall->getSourceRange();
+  }
+
+  // Range check SME intrinsics that take immediate values.
+  SmallVector, 3> ImmChecks;
+
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
+  }
+
+  return ParseSVEImmChecks(TheCall, ImmChecks);
+}
+
+bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {

SamTebbs33 wrote:

Sure thing

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-06 Thread Sam Tebbs via cfe-commits


@@ -3172,6 +3117,18 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
   if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
 HasError = true;
   break;
+case SVETypeFlags::ImmCheck1_1:

SamTebbs33 wrote:

I'm actually not sure. When building the compiler was telling me that these 
cases weren't being checked for so I added them. I'm not sure what caused them 
to disappear in the first place. I'll revert this chunk.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-07 Thread Sam Tebbs via cfe-commits


@@ -3183,6 +3140,114 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
   return HasError;
 }
 
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmStreaming;
+  if (const auto *T = FD->getType()->getAs()) {
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+  return ArmStreaming;
+if (T->getAArch64SMEAttributes() & 
FunctionType::SME_PStateSMCompatibleMask)
+  return ArmStreamingCompatible;
+  }
+  return ArmNonStreaming;
+}
+
+static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
+ const FunctionDecl *FD,
+ ArmStreamingType BuiltinType) {
+  ArmStreamingType FnType = getArmStreamingFnType(FD);
+
+  if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming";
+  }
+
+  if (FnType == ArmStreamingCompatible &&
+  BuiltinType != ArmStreamingCompatible) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming compatible";
+return;
+  }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return true;
+  if (const auto *T = FD->getType()->getAs())
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+  return true;
+  return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+  }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;
+
+switch (BuiltinID) {
+default:
+  break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+}
+
+if (BuiltinType)
+  checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
+
+if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
+  Diag(TheCall->getBeginLoc(),
+   diag::warn_attribute_arm_za_builtin_no_za_state)
+  << TheCall->getSourceRange();
+  }
+
+  // Range check SME intrinsics that take immediate values.
+  SmallVector, 3> ImmChecks;
+
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
+  }
+
+  return ParseSVEImmChecks(TheCall, ImmChecks);
+}
+
+bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {

SamTebbs33 wrote:

Done! Rebased and should be ready for review again now.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add builtins to cast svbool from/to svcount. (PR #74720)

2023-12-07 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/74720
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -3058,6 +3058,11 @@ bool Sema::ParseSVEImmChecks(
   if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7))
 HasError = true;
   break;
+case SVETypeFlags::ImmCheck2_4_Mul2:

SamTebbs33 wrote:

I think you were looking at an old commit as I fixed that in 
[48ee745](https://github.com/llvm/llvm-project/pull/74064/commits/48ee745a815f0fda41cb1791f91d73db73e4aeba)

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) 
__arm_streaming_compatible {
   // expected-warning@+1 {{builtin call has undefined behaviour when called 
from a streaming compatible function}}
   return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, 
(int8x16_t)splat, 33);
 }
+
+void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za {
+  // expected-warning@+1 {{builtin call has undefined behaviour when called 
from a non-streaming function}}
+  return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);

SamTebbs33 wrote:

> nit: returning a `void` value from a `void` function doesn't seem right.

Agreed, I'll fix that.
> Also, is `incompat_sme_norm` testing anything that `incompat_sme_sm` isn't 
> testing?
> Or should this be a test where we'd call a non-streaming SVE/SME builtin from 
> a streaming-function?

Yeah it doesn't look like it is. That would be better!





https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -6,20 +6,21 @@
 #include 
 
 __attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
   svld1_hor_za8(0, 0, pg, ptr);
 }
 
 __attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
   svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
 }
 
 __attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming __arm_shared_za {
   svst1_ver_za16(0, 0, pg, ptr);
 }
 
-void undefined(svbool_t pg, void *ptr) {
-  svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error 
{{'svst1_ver_vnum_za64' needs target feature sme}}
+__attribute__((target("+sme")))
+void undefined(svbool_t pg, void *ptr) __arm_shared_za {
+  svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-warning {{builtin call 
has undefined behaviour when called from a non-streaming function}}

SamTebbs33 wrote:

In a previous review you asked me to do so:

> I think instead you want to compile this test with 
> __attribute__((target("+sme"))) but without the __arm_streaming to ensure you 
> get a diagnostic on the builtin call that the behaviour is undefined when the 
> (parent) function is not a streaming function.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -18,7 +18,7 @@
 // CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.zero(i32 0)
 // CHECK-CXX-NEXT:ret void
 //
-void test_svzero_mask_za() {
+__arm_new_za void test_svzero_mask_za() {

SamTebbs33 wrote:

I did try `__arm_shared_za` but got 

> '__arm_shared_za' only applies to non-K&R-style functions

and

> error: '__arm_shared_za' only applies to function types; type here is 'void 
> ()'

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -3168,11 +3168,70 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr 
*TheCall,
 << TheCall->getSourceRange() << "streaming compatible";
 return;
   }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return true;
+  if (const auto *T = FD->getType()->getAs())
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+  return true;
+  return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  default:
+return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+  }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;

SamTebbs33 wrote:

Sure thing.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -1375,6 +1381,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
   OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
 "__nodebug__, __overloadable__))\n\n";
 
+  OS << "#ifdef __ARM_FEATURE_SME\n";
+  OS << "#define __asc __attribute__((arm_streaming_compatible))\n";

SamTebbs33 wrote:

Omitting it works for me, cheers.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -500,6 +506,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper 
&Records) {
   case GenArmSmeRangeChecks:
 EmitSmeRangeChecks(Records, OS);
 break;
+  case GenArmSmeStreamingAttrs:

SamTebbs33 wrote:

I remember you suggesting that we have the SME and SVE changes in separate PRs 
but I can bring the SVE changes into this one.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -3168,11 +3168,70 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr 
*TheCall,
 << TheCall->getSourceRange() << "streaming compatible";
 return;
   }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}

SamTebbs33 wrote:

I've removed the ZA state changes and added the SVE changes. Let me know what 
you think.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) 
__arm_streaming_compatible {
   // expected-warning@+1 {{builtin call has undefined behaviour when called 
from a streaming compatible function}}
   return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, 
(int8x16_t)splat, 33);
 }
+
+void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za {
+  // expected-warning@+1 {{builtin call has undefined behaviour when called 
from a non-streaming function}}
+  return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);

SamTebbs33 wrote:

I removed this test since, as you said, it wasn't testing anything different 
and after adding the SVE changes we have tests for SVE builtins anyway.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-08 Thread Sam Tebbs via cfe-commits


@@ -500,6 +506,12 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper 
&Records) {
   case GenArmSmeRangeChecks:
 EmitSmeRangeChecks(Records, OS);
 break;
+  case GenArmSmeStreamingAttrs:

SamTebbs33 wrote:

Done

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[mlir] [clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-15 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From 83e20904c206980285c4ee9d0227706803147654 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 01/12] [AArch64][SME] Remove immediate argument restriction
 for svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  2 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 165 insertions(+), 106 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..8444aea8c8ac4b6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9815,6 +9815,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9870,18 +9875,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj

[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-11-15 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/71927

>From ff33739706ce758f416153aa3cdb7c632e068da3 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH 1/4] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 3d4c2129565903d..1094c424e68abd5 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1959,12 +1959,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1d..519bf019ff35ba5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %

[llvm] [mlir] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-15 Thread Sam Tebbs via cfe-commits


@@ -4850,6 +4852,93 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG 
&DAG, SDValue Chain,
  Mask);
 }
 
+// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.

SamTebbs33 wrote:

And it's no longer precise as we lower it to the SDNode first.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-15 Thread Sam Tebbs via cfe-commits


@@ -4850,6 +4852,93 @@ SDValue AArch64TargetLowering::getPStateSM(SelectionDAG 
&DAG, SDValue Chain,
  Mask);
 }
 
+// Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+// Case 1: If the vector number (vecnum) is an immediate in range, it gets
+// folded into the instruction
+//ldr(%tileslice, %ptr, 11) -> ldr [%tileslice, 11], [%ptr, 11]
+// Case 2: If the vecnum is not an immediate, then it is used to modify the 
base
+// and tile slice registers
+//ldr(%tileslice, %ptr, %vecnum)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * %vecnum
+//%tileslice2 = %tileslice + %vecnum
+//ldr [%tileslice2, 0], [%ptr2, 0]
+// Case 3: If the vecnum is an immediate out of range, then the same is done as
+// case 2, but the base and slice registers are modified by the greatest
+// multiple of 15 lower than the vecnum and the remainder is folded into the
+// instruction. This means that successive loads and stores that are offset 
from
+// each other can share the same base and slice register updates.
+//ldr(%tileslice, %ptr, 22)
+//ldr(%tileslice, %ptr, 23)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * 15
+//%tileslice2 = %tileslice + 15
+//ldr [%tileslice2, 7], [%ptr2, 7]
+//ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 4: If the vecnum is an add of an immediate, then the non-immediate
+// operand and the immediate can be folded into the instruction, like case 2.
+//ldr(%tileslice, %ptr, %vecnum + 7)
+//ldr(%tileslice, %ptr, %vecnum + 8)
+//->
+//%svl = rdsvl
+//%ptr2 = %ptr + %svl * %vecnum
+//%tileslice2 = %tileslice + %vecnum
+//ldr [%tileslice2, 7], [%ptr2, 7]
+//ldr [%tileslice2, 8], [%ptr2, 8]
+// Case 5: The vecnum being an add of an immediate out of range is also 
handled,
+// in which case the same remainder logic as case 3 is used.
+SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  int32_t ConstAddend = 0;
+  SDValue VarAddend = VecNum;
+
+  // If the vnum is an add of an immediate, we can fold it into the instruction
+  if (VecNum.getOpcode() == ISD::ADD &&
+  isa(VecNum.getOperand(1))) {
+ConstAddend = cast(VecNum.getOperand(1))->getSExtValue();
+VarAddend = VecNum.getOperand(0);
+  } else if (auto ImmNode = dyn_cast(VecNum)) {
+ConstAddend = ImmNode->getSExtValue();
+VarAddend = SDValue();
+  }
+
+  int32_t ImmAddend = ConstAddend % 16;
+  if (int32_t C = (ConstAddend - ImmAddend)) {
+SDValue CVal = DAG.getTargetConstant(C, DL, MVT::i32);
+VarAddend = VarAddend
+? DAG.getNode(ISD::ADD, DL, MVT::i32, {VarAddend, CVal})
+: CVal;
+  }
+
+  if (VarAddend) {
+// Get the vector length that will be multiplied by vnum
+auto SVL = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64,
+   DAG.getConstant(1, DL, MVT::i32));
+
+// Multiply SVL and vnum then add it to the base
+SDValue Mul = DAG.getNode(
+ISD::MUL, DL, MVT::i64,
+{SVL, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, VarAddend)});
+Base = DAG.getNode(ISD::ADD, DL, MVT::i64, {Base, Mul});
+// Just add vnum to the tileslice
+TileSlice = DAG.getNode(ISD::ADD, DL, MVT::i32, {TileSlice, VarAddend});
+  }
+
+  SmallVector Ops = {
+  /*Chain=*/N.getOperand(0), TileSlice, Base,
+  DAG.getTargetConstant(ImmAddend, DL, MVT::i32)};
+  auto LdrStr =
+  DAG.getNode(IsLoad ? AArch64ISD::SME_ZA_LDR : AArch64ISD::SME_ZA_STR, DL,
+  MVT::Other, Ops);
+  return LdrStr;

SamTebbs33 wrote:

Thanks, I must have kept this from before the refactor.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-15 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

> Thanks for all the changes! LGTM with few little nits addressed.

Thanks!

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [mlir] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-15 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From 83e20904c206980285c4ee9d0227706803147654 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 01/13] [AArch64][SME] Remove immediate argument restriction
 for svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  2 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 165 insertions(+), 106 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 09309a3937fb613..8444aea8c8ac4b6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9815,6 +9815,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9870,18 +9875,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zaj

[clang] [Clang][SME2] Add single and multi min/max by vector builtins (PR #71707)

2023-11-16 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/71707
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [mlir] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-20 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-30 Thread Sam Tebbs via cfe-commits


@@ -292,7 +292,42 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
   %vscale = call i64 @llvm.vscale.i64()
   %mulvl = mul i64 %vscale, 256
   %base = getelementptr i8, ptr %ptr, i64 %mulvl
-  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base)
+  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0)
+  ret void;
+}
+
+define void @ldr_with_off_var(ptr %base, i32 %off) {
+; CHECK-LABEL: ldr_with_off_var:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rdsvl x8, #1
+; CHECK-NEXT:// kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:mov w12, #16 // =0x10
+; CHECK-NEXT:madd x8, x8, x1, x0
+; CHECK-NEXT:ldr za[w12, 0], [x8]
+; CHECK-NEXT:ret
+  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 %off)
+  ret void;
+}
+
+define void @ldr_with_off_15imm(ptr %base) {
+; CHECK-LABEL: ldr_with_off_15imm:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:mov w12, #16 // =0x10
+; CHECK-NEXT:ldr za[w12, 15], [x0, #15, mul vl]
+; CHECK-NEXT:ret
+  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 15)
+  ret void;
+}
+
+define void @ldr_with_off_16imm(ptr %base) {
+; CHECK-LABEL: ldr_with_off_16imm:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:rdsvl x8, #1
+; CHECK-NEXT:mov w12, #16 // =0x10
+; CHECK-NEXT:madd x8, x8, x12, x0
+; CHECK-NEXT:ldr za[w12, 0], [x8]

SamTebbs33 wrote:

I think you're right. It looks like the same thing seems to be happening with 
`ldr_with_off_var` as well.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-31 Thread Sam Tebbs via cfe-commits


@@ -1741,6 +1742,54 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, 
unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector select parameter is an immediate in the range 0-15 then we
+  // can emit it directly into the instruction as it's a legal operand.
+  // Otherwise we must emit 0 as the vector select operand and modify the base
+  // register instead.
+  SDLoc DL(N);
+
+  SDValue VecNum = N->getOperand(4), Base = N->getOperand(3),
+  TileSlice = N->getOperand(2);
+  int Imm = -1;
+  if (auto ImmNode = dyn_cast(VecNum))
+Imm = ImmNode->getZExtValue();
+
+  if (Imm >= 0 && Imm <= 15) {

SamTebbs33 wrote:

> Something still needs to happen with the immediate argument when it doesn't 
> fit entirely into the instruction's immediate operand. The current result is 
> not correct.

I don't quite understand what that something is if immediate if it doesn't fit 
in the range. Currently the immediate is multiplied by svl and added to the 
base register if it doesn't fit. What else should be done if it doesn't fit in 
the range?

> Maybe you can restructure the code in such a way that it first tries to fold 
> as much of the constant into the instruction's immediate operand, and then 
> adds the remainder to the tile-slice and to the pointer (multiplied by SVL), 
> as you do in the code below.

Do you mean that if, for example, we have an immediate vecnum of 17, then we 
put 15 into the offset field of the instruction and add 2 x SVL to the base and 
slice registers? Naively I don't see that being an improvement as you're doing 
the multiplication in two places (ldr instruction and an extra madd) rather 
than one.



https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [compiler-rt] [AArch64][SME] Add support for sme-fa64 (PR #70809)

2023-10-31 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/70809
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-31 Thread Sam Tebbs via cfe-commits


@@ -1741,6 +1742,54 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, 
unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector select parameter is an immediate in the range 0-15 then we
+  // can emit it directly into the instruction as it's a legal operand.
+  // Otherwise we must emit 0 as the vector select operand and modify the base
+  // register instead.
+  SDLoc DL(N);
+
+  SDValue VecNum = N->getOperand(4), Base = N->getOperand(3),
+  TileSlice = N->getOperand(2);
+  int Imm = -1;
+  if (auto ImmNode = dyn_cast(VecNum))
+Imm = ImmNode->getZExtValue();
+
+  if (Imm >= 0 && Imm <= 15) {

SamTebbs33 wrote:

We had an offline discussion and agreed that when the vnum is an immediate but 
is greater than a multiple of 15, it makes sense to modify the base and slice 
registers by that multiple and put the remainder in the immediate field of the 
instruction. That way when we unroll a loop with lots of ldrs offset by one 
from each other, they can all share the base and slice modifications but have 
increasing immediates.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-31 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-31 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/5] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK

[clang] [llvm] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-10-31 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

I've changed the approach to consider immediates outside of 0-15 and fixed the 
issue of the tile slice not being updated. Please let me know what you think.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-03 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/68565

>From de07976922782b9dcf5d13d44551b782dc8b3b94 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Fri, 6 Oct 2023 17:09:36 +0100
Subject: [PATCH 1/4] [AArch64][SME] Remove immediate argument restriction for
 svldr and svstr

The svldr_vnum_za and svstr_vnum_za builtins/intrinsics currently
require that the vnum argument be an immediate, since the instructions
take an immediate vector number. However, we emit 0 as the immediate
for the instruction no matter what, and instead modify the base register.

This patch removes that restriction on the argument, so that the
argument can be a non-immediate. If an appropriate immediate was
passed to the builtin then CGBuiltin passes that directly to the LLVM
intrinsic, otherwise it modifies the base register as is existing
behaviour.
---
 clang/lib/CodeGen/CGBuiltin.cpp   | 45 
 .../aarch64-sme-intrinsics/acle_sme_ldr.c | 71 ---
 .../aarch64-sme-intrinsics/acle_sme_str.c | 51 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  4 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 10 +--
 .../CostModel/ARM/unaligned_double_load.ll| 59 +++
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   | 33 +++--
 7 files changed, 166 insertions(+), 107 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/ARM/unaligned_double_load.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e1211bb8949b665..63508b40096141e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9712,6 +9712,11 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const 
CallExpr *E,
   return Store;
 }
 
+Value *CodeGenFunction::EmitTileslice(Value *Offset, Value *Base) {
+  llvm::Value *CastOffset = Builder.CreateIntCast(Offset, Int64Ty, false);
+  return Builder.CreateAdd(Base, CastOffset, "tileslice");
+}
+
 Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
@@ -9767,18 +9772,34 @@ Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags 
&TypeFlags,
 Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,
   SmallVectorImpl &Ops,
   unsigned IntID) {
-  if (Ops.size() == 3) {
-Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
-llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
-
-llvm::Value *VecNum = Ops[2];
-llvm::Value *MulVL = Builder.CreateMul(CntsbCall, VecNum, "mulvl");
-
-Ops[1] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
-Ops[0] = Builder.CreateAdd(
-Ops[0], Builder.CreateIntCast(VecNum, Int32Ty, true), "tileslice");
-Ops.erase(&Ops[2]);
-  }
+  if (Ops.size() == 2) {
+// Intrinsics without a vecnum also use this function, so just provide 0
+Ops.push_back(Ops[1]);
+Ops[1] = Builder.getInt32(0);
+  } else {
+int Imm = -1;
+if (ConstantInt* C = dyn_cast(Ops[2]))
+  if (C->getZExtValue() <= 15)
+  Imm = C->getZExtValue();
+
+if (Imm != -1) {
+  Ops[2] = Ops[1];
+  Ops[1] = Builder.getInt32(Imm);
+} else {
+  Function *Cntsb = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);
+  llvm::Value *CntsbCall = Builder.CreateCall(Cntsb, {}, "svlb");
+
+  llvm::Value *VecNum = Ops[2];
+  llvm::Value *MulVL = Builder.CreateMul(
+  CntsbCall,
+  VecNum,
+  "mulvl");
+
+  Ops[2] = Builder.CreateGEP(Int8Ty, Ops[1], MulVL);
+  Ops[1] = Builder.getInt32(0);
+  Ops[0] = Builder.CreateIntCast(EmitTileslice(Ops[0], VecNum), Int32Ty, 
false);
+}
+   }
   Function *F = CGM.getIntrinsic(IntID, {});
   return Builder.CreateCall(F, Ops);
 }
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c 
b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
index e85c47072f2df80..8e07cf1d11c19b2 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ldr.c
@@ -6,57 +6,46 @@
 
 #include 
 
-// CHECK-C-LABEL: define dso_local void @test_svldr_vnum_za(
-// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-C-NEXT:  entry:
-// CHECK-C-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-C-NEXT:ret void
-//
-// CHECK-CXX-LABEL: define dso_local void @_Z18test_svldr_vnum_zajPKv(
-// CHECK-CXX-SAME: i32 noundef [[SLICE_BASE:%.*]], ptr noundef [[PTR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT:  entry:
-// CHECK-CXX-NEXT:tail call void @llvm.aarch64.sme.ldr(i32 [[SLICE_BASE]], 
ptr [[PTR]])
-// CHECK-CXX-NEXT:ret void
+// CHECK-C-LABEL: @test_svldr_vnum_za(
+// CHECK-CXX-LABEL: @_Z18test_svldr_vnum_zajPK

[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-06 Thread Sam Tebbs via cfe-commits


@@ -1741,6 +1742,69 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, 
unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+void AArch64DAGToDAGISel::SelectSMELdrStrZA(SDNode *N, bool IsLoad) {
+  // Lower an SME LDR/STR ZA intrinsic to LDR_ZA_PSEUDO or STR_ZA.
+  // If the vector number is an immediate between 0 and 15 inclusive then we 
can
+  // put that directly into the immediate field of the instruction. If it's
+  // outside of that range then we modify the base and slice by the greatest
+  // multiple of 15 smaller than that number and put the remainder in the
+  // instruction field. If it's not an immediate then we modify the base and
+  // slice registers by that number and put 0 in the instruction.
+  SDLoc DL(N);
+
+  SDValue TileSlice = N->getOperand(2);
+  SDValue Base = N->getOperand(3);
+  SDValue VecNum = N->getOperand(4);
+  SDValue Remainder = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+  // true if the base and slice registers need to me modified
+  bool NeedsAdd = true;
+  if (auto ImmNode = dyn_cast(VecNum)) {
+int Imm = ImmNode->getSExtValue();
+if (Imm >= 0 && Imm <= 15) {
+  Remainder = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
+  NeedsAdd = false;
+} else {
+  Remainder = CurDAG->getTargetConstant(Imm % 15, DL, MVT::i32);

SamTebbs33 wrote:

Yep, the range of the immediate is 0-15.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [AArch64][SME] Remove immediate argument restriction for svldr and svstr (PR #68565)

2023-11-06 Thread Sam Tebbs via cfe-commits


@@ -292,23 +292,101 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
   %vscale = call i64 @llvm.vscale.i64()
   %mulvl = mul i64 %vscale, 256
   %base = getelementptr i8, ptr %ptr, i64 %mulvl
-  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base)
+  call void @llvm.aarch64.sme.ldr(i32 16, ptr %base, i32 0)
   ret void;
 }
 
+define void @ldr_with_off_var(ptr %base, i32 %off) {
+; CHECK-LABEL: ldr_with_off_var:
+; CHECK:   // %bb.0:
+; CHECK-NEXT:// kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:sxtw x8, w2

SamTebbs33 wrote:

I did forget to stage those changes. Thanks for noticing.

https://github.com/llvm/llvm-project/pull/68565
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME2] Add multi-vector unpack builtins (PR #75075)

2023-12-12 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

Looks good to me.

https://github.com/llvm/llvm-project/pull/75075
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-13 Thread Sam Tebbs via cfe-commits


@@ -1694,6 +1697,61 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
+void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
+  std::vector RV = Records.getAllDerivedDefinitions("Inst");
+  SmallVector, 128> Defs;
+  for (auto *R : RV)
+createIntrinsic(R, Defs);
+
+  // The mappings must be sorted based on BuiltinID.
+  llvm::sort(Defs, [](const std::unique_ptr &A,
+  const std::unique_ptr &B) {
+return A->getMangledName() < B->getMangledName();
+  });
+
+  switch (Kind) {

SamTebbs33 wrote:

That looks nice and clean to me. Thanks for the suggestion.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-13 Thread Sam Tebbs via cfe-commits


@@ -3168,9 +3167,60 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr 
*TheCall,
 << TheCall->getSourceRange() << "streaming compatible";
 return;
   }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+ArmStreamingType BuiltinType;
+
+switch (BuiltinID) {
+default:
+  BuiltinType = ArmNonStreaming;
+  break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+#define GET_SVE_STREAMING_ATTRS
+#include "clang/Basic/arm_sve_streaming_attrs.inc"

SamTebbs33 wrote:

Good spot, thanks.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-13 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Revert "[AArch64][SME] Warn when using a streaming builtin from a non-streaming function" (PR #75449)

2023-12-14 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/75449
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-14 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

Thanks for reporting that Nico. I've reverted the patch and will work on 
improving compile time. I like your idea Sander.

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)

2023-12-14 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

Commit 
[6dc8fa2](https://github.com/llvm/llvm-project/pull/75487/commits/6dc8fa2c89159d234d9477358dd1ce1cbf059865)
 is the only one with new content. 
[d9a8da1](https://github.com/llvm/llvm-project/pull/75487/commits/d9a8da139e93f0b3259cb22d7f4c55ed46f9f265)
 is the exact same as what was reverted.

https://github.com/llvm/llvm-project/pull/75487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)

2023-12-14 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

New PR at https://github.com/llvm/llvm-project/pull/75487

https://github.com/llvm/llvm-project/pull/74064
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)

2023-12-15 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

> If it's not too much trouble, could you `git cherry-pick 
> c60663d128f8e0dccd418bdf16ecc403b96aa74a` into this? (Cool if not, ofc.)

Sure, I can do that. Would you also mind attempting to reproduce the compile 
time with the latest commit, just to make sure it fixes the issue on your end? 
I saw a 36 -> 31 second (compared to 30 on main) decrease which seems OK but 
isn't on the same scale as the numbers you're seeing.

https://github.com/llvm/llvm-project/pull/75487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)

2023-12-15 Thread Sam Tebbs via cfe-commits


@@ -1702,6 +1705,62 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
+void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
+  std::vector RV = Records.getAllDerivedDefinitions("Inst");
+  SmallVector, 128> Defs;
+  for (auto *R : RV)
+createIntrinsic(R, Defs);
+
+  // The mappings must be sorted based on BuiltinID.
+  llvm::sort(Defs, [](const std::unique_ptr &A,
+  const std::unique_ptr &B) {
+return A->getMangledName() < B->getMangledName();
+  });
+
+  StringRef ExtensionKind;
+  switch (Kind) {
+  case ACLEKind::SME:
+ExtensionKind = "SME";
+break;
+  case ACLEKind::SVE:
+ExtensionKind = "SVE";
+break;
+  }
+
+  OS << "#ifdef GET_" << ExtensionKind << "_STREAMING_ATTRS\n";
+
+  // Ensure these are only emitted once.
+  std::set Emitted;
+  llvm::StringMap> StreamingMap;

SamTebbs33 wrote:

Thanks for the ideas. I ended up removing the sorting since grouping the 
builtins within the map makes the sorting across all builtins redundant anyway.

https://github.com/llvm/llvm-project/pull/75487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)

2023-12-18 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

@nico I tried to cherry-pick your commit 
(c60663d128f8e0dccd418bdf16ecc403b96aa74a)  into my branch but for some reason 
it was always empty. It might be best for you to try recommitting it now.

https://github.com/llvm/llvm-project/pull/75487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #75487)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/75487
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/71927

>From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH 1/5] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 98434c5c53e28c..38437e98858b23 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in {
   def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, 
"aarch64_sve_sunpk_x4", [IsStreaming], []>;
   def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, 
"aarch64_sve_uunpk_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1..519bf019ff35ba 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail

[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits


@@ -1992,3 +1986,12 @@ let TargetGuard = "sme2" in {
   def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
   def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", 
MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme2" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone, IsStreaming], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone, IsStreaming], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreaming], [ImmCheck<3, 
ImmCheck0_7>]>;

SamTebbs33 wrote:

Thanks Sander. I've made these builtins use `IsStreamingCompatible`, which will 
be changed to `IsStreamingOrSVE2p1` once that is added.

https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

Rebased to fix a merge conflict and made the builtins temporarily 
`IsStreamingCompatible`.

https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Add SME2 MLA/MLS builtins. (PR #75584)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/75584
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/71927

>From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH 1/6] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 98434c5c53e28c..38437e98858b23 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in {
   def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, 
"aarch64_sve_sunpk_x4", [IsStreaming], []>;
   def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, 
"aarch64_sve_uunpk_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1..519bf019ff35ba 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail

[clang] [Clang][SME2] Enable multi-vector loads & stores for SME2 (PR #75821)

2023-12-18 Thread Sam Tebbs via cfe-commits

SamTebbs33 wrote:

Should these builtins be `IsStreamingCompatible` until we add 
`IsStreamingOrSVE2p1`?

https://github.com/llvm/llvm-project/pull/75821
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Warn when a function doesn't have ZA state (PR #75805)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/75805
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/71927

>From 93a02d9af1d7e4f1e23c252d72b20d292927a79f Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Thu, 9 Nov 2023 17:39:26 +
Subject: [PATCH 1/7] [AArch64][SME2] Enable bfm builtings for sme2

This patch enables the following builtins for SME2
svbfmlslb_f32
svbfmlslb_lane_f32
svbfmlslt_f32
svbfmlslt_lane_f32

Patch by: Kerry McLaughlin 
---
 clang/include/clang/Basic/arm_sve.td  | 15 +--
 .../acle_sve2p1_bfmlsl.c  |  5 +
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 98434c5c53e28c..38437e98858b23 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2057,12 +2057,6 @@ def SVDOT_X2_F : SInst<"svdot[_{d}_{2}_{3}]", "ddhh", 
"f",  MergeNone, "aarch64_
 def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i",  
MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", 
MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  
MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
-
-def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
-def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
-
-def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
-def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
 }
 
 let TargetGuard = "sve2p1" in {
@@ -2306,3 +2300,12 @@ let TargetGuard = "sme2" in {
   def SVSUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "sil",MergeNone, 
"aarch64_sve_sunpk_x4", [IsStreaming], []>;
   def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, 
"aarch64_sve_uunpk_x4", [IsStreaming], []>;
 }
+
+let TargetGuard = "sve2p1|sme" in {
+// == BFloat16 multiply-subtract ==
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslb", [IsOverloadNone], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, 
"aarch64_sve_bfmlslt", [IsOverloadNone], []>;
+
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, 
"aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c 
b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 7e1763a63ec4e1..519bf019ff35ba 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,10 +2,15 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s 
| opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -S -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tail

[clang] [AArch64][SME2] Enable bfm builtins for sme2 (PR #71927)

2023-12-18 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/71927
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)

2023-12-20 Thread Sam Tebbs via cfe-commits


@@ -679,6 +679,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const 
{
   .Case("f32mm", FPU & SveMode && HasMatmulFP32)
   .Case("f64mm", FPU & SveMode && HasMatmulFP64)
   .Case("sve2", FPU & SveMode && HasSVE2)
+  .Case("sve2p1", HasSVE2p1)

SamTebbs33 wrote:

Yep, you're right. I've removed these changes now.

https://github.com/llvm/llvm-project/pull/75958
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)

2023-12-20 Thread Sam Tebbs via cfe-commits


@@ -8,7 +8,7 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o 
/dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve -target-feature -S -DTEST_SME2 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature -S -DTEST_SME2 -disable-O0-optnone 
-Werror -Wall -o /dev/null %s

SamTebbs33 wrote:

Removed, thanks!

https://github.com/llvm/llvm-project/pull/75958
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)

2023-12-20 Thread Sam Tebbs via cfe-commits


@@ -11,10 +11,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +sve \
-// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN:   -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming

SamTebbs33 wrote:

arm_sve.td has SVFCLAMP as `IsStreamingOrSVE2p1` so I think it has to be 
`__arm_streaming``, or is the builtin def incorrect?

https://github.com/llvm/llvm-project/pull/75958
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)

2023-12-20 Thread Sam Tebbs via cfe-commits


@@ -1,14 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt 
-S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include 
 
+#ifndef TEST_SME2
+#define ATTR
+#else
+#define ATTR __arm_streaming

SamTebbs33 wrote:

It's actually marked as streaming or sve2p1 in arm_sve.td. Is that incorrect?

https://github.com/llvm/llvm-project/pull/75958
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][SME] Add IsStreamingOrSVE2p1 (PR #75958)

2023-12-20 Thread Sam Tebbs via cfe-commits


@@ -10,6 +10,10 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
 // RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | 
FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
+// RUN:   -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s

SamTebbs33 wrote:

Yep ypu're right, thanks!

https://github.com/llvm/llvm-project/pull/75958
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)

2023-11-28 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 created 
https://github.com/llvm/llvm-project/pull/73672

This patch introduces a warning that is emitted when a Neon builtin is called 
from a streaming function, as that situation is not supported.

Uses work by Kerry McLaughlin.

>From 96464a9c37a532216e4df6c003aa1a8fcb448637 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Tue, 28 Nov 2023 16:22:32 +
Subject: [PATCH] [AArch64] Warn when calling a NEON builtin in a streaming
 function

This patch introduces a warning that is emitted when a Neon builtin is called 
from a streaming function, as that situation is not supported.
---
 .../clang/Basic/DiagnosticSemaKinds.td|  3 +
 clang/lib/Sema/SemaChecking.cpp   | 81 +++
 .../Sema/aarch64-incompat-sm-builtin-calls.c  | 24 ++
 3 files changed, 108 insertions(+)
 create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9a7dafa4a298273..e2b7a695322c14b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error<
 def err_attribute_arm_feature_sve_bits_unsupported : Error<
   "%0 is only supported when '-msve-vector-bits=' is specified with a "
   "value of 128, 256, 512, 1024 or 2048.">;
+def warn_attribute_arm_sm_incompat_builtin : Warning<
+  "builtin call has undefined behaviour when called from a %0 function">,
+  InGroup>;
 def err_sve_vector_in_non_sve_target : Error<
   "SVE vector type %0 cannot be used in a target without sve">;
 def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..07937047a2843b0 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
 
+enum ArmStreamingType {
+  ArmNonStreaming,
+  ArmStreaming,
+  ArmStreamingCompatible,
+  ArmLocallyStreaming,
+  ArmStreamingOrSVE2p1
+};
+
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmLocallyStreaming;
+  if (const auto *T = FD->getType()->getAs()) {
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+  return ArmStreaming;
+if (T->getAArch64SMEAttributes() & 
FunctionType::SME_PStateSMCompatibleMask)
+  return ArmStreamingCompatible;
+  }
+  return ArmNonStreaming;
+}
+
+static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
+ const FunctionDecl *FD,
+ ArmStreamingType BuiltinType) {
+  assert(BuiltinType != ArmLocallyStreaming &&
+ "Unexpected locally_streaming attribute for builtin!");
+
+  ArmStreamingType FnType = getArmStreamingFnType(FD);
+  if (BuiltinType == ArmStreamingOrSVE2p1) {
+// Check intrinsics that are available in [sve2p1 or sme/sme2].
+llvm::StringMap CallerFeatureMap;
+S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
+if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
+  BuiltinType = ArmStreamingCompatible;
+else
+  BuiltinType = ArmStreaming;
+  }
+
+  if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) &&
+  BuiltinType == ArmNonStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming or locally streaming";
+  }
+
+  if ((FnType == ArmStreamingCompatible) &&
+  BuiltinType != ArmStreamingCompatible) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming compatible";
+return;
+  }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
 bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   // Range check SVE intrinsics that take immediate values.
   SmallVector, 3> ImmChecks;
@@ -3136,6 +3192,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
 
 bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 unsigned BuiltinID, CallExpr *TheCall) 
{
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;
+
+bool IsNeon = false;
+switch (BuiltinID) {
+default:
+  break;
+#define GET_NEON_BUILTINS
+#define TARGET_BUILTIN(id, x, y, z)
\
+  case NEON::BI##id:   
\
+IsNeon = true

[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)

2023-11-30 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/73672

>From 96464a9c37a532216e4df6c003aa1a8fcb448637 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Tue, 28 Nov 2023 16:22:32 +
Subject: [PATCH 1/2] [AArch64] Warn when calling a NEON builtin in a streaming
 function

This patch introduces a warning that is emitted when a Neon builtin is called 
from a streaming function, as that situation is not supported.
---
 .../clang/Basic/DiagnosticSemaKinds.td|  3 +
 clang/lib/Sema/SemaChecking.cpp   | 81 +++
 .../Sema/aarch64-incompat-sm-builtin-calls.c  | 24 ++
 3 files changed, 108 insertions(+)
 create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9a7dafa4a298273..e2b7a695322c14b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error<
 def err_attribute_arm_feature_sve_bits_unsupported : Error<
   "%0 is only supported when '-msve-vector-bits=' is specified with a "
   "value of 128, 256, 512, 1024 or 2048.">;
+def warn_attribute_arm_sm_incompat_builtin : Warning<
+  "builtin call has undefined behaviour when called from a %0 function">,
+  InGroup>;
 def err_sve_vector_in_non_sve_target : Error<
   "SVE vector type %0 cannot be used in a target without sve">;
 def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index ae588db02bbe722..07937047a2843b0 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
 
+enum ArmStreamingType {
+  ArmNonStreaming,
+  ArmStreaming,
+  ArmStreamingCompatible,
+  ArmLocallyStreaming,
+  ArmStreamingOrSVE2p1
+};
+
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmLocallyStreaming;
+  if (const auto *T = FD->getType()->getAs()) {
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+  return ArmStreaming;
+if (T->getAArch64SMEAttributes() & 
FunctionType::SME_PStateSMCompatibleMask)
+  return ArmStreamingCompatible;
+  }
+  return ArmNonStreaming;
+}
+
+static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
+ const FunctionDecl *FD,
+ ArmStreamingType BuiltinType) {
+  assert(BuiltinType != ArmLocallyStreaming &&
+ "Unexpected locally_streaming attribute for builtin!");
+
+  ArmStreamingType FnType = getArmStreamingFnType(FD);
+  if (BuiltinType == ArmStreamingOrSVE2p1) {
+// Check intrinsics that are available in [sve2p1 or sme/sme2].
+llvm::StringMap CallerFeatureMap;
+S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
+if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
+  BuiltinType = ArmStreamingCompatible;
+else
+  BuiltinType = ArmStreaming;
+  }
+
+  if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) &&
+  BuiltinType == ArmNonStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming or locally streaming";
+  }
+
+  if ((FnType == ArmStreamingCompatible) &&
+  BuiltinType != ArmStreamingCompatible) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming compatible";
+return;
+  }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
 bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   // Range check SVE intrinsics that take immediate values.
   SmallVector, 3> ImmChecks;
@@ -3136,6 +3192,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
 
 bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 unsigned BuiltinID, CallExpr *TheCall) 
{
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;
+
+bool IsNeon = false;
+switch (BuiltinID) {
+default:
+  break;
+#define GET_NEON_BUILTINS
+#define TARGET_BUILTIN(id, x, y, z)
\
+  case NEON::BI##id:   
\
+IsNeon = true; 
\
+break;
+#define BUILTIN(id, x, y) TARGET_BUILTIN(id, x, y, "");
+#include "clang/Basic/arm_neon.inc"

[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)

2023-11-30 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 updated 
https://github.com/llvm/llvm-project/pull/73672

>From ba3d2c36ee3268b24864466d429a30fec92a69e3 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Tue, 28 Nov 2023 16:22:32 +
Subject: [PATCH 1/4] [AArch64] Warn when calling a NEON builtin in a streaming
 function

This patch introduces a warning that is emitted when a Neon builtin is called 
from a streaming function, as that situation is not supported.
---
 .../clang/Basic/DiagnosticSemaKinds.td|  3 +
 clang/lib/Sema/SemaChecking.cpp   | 81 +++
 .../Sema/aarch64-incompat-sm-builtin-calls.c  | 24 ++
 3 files changed, 108 insertions(+)
 create mode 100644 clang/test/Sema/aarch64-incompat-sm-builtin-calls.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ed9bd929c6c4816..6dfb2d7195203a3 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3148,6 +3148,9 @@ def err_attribute_bad_sve_vector_size : Error<
 def err_attribute_arm_feature_sve_bits_unsupported : Error<
   "%0 is only supported when '-msve-vector-bits=' is specified with a "
   "value of 128, 256, 512, 1024 or 2048.">;
+def warn_attribute_arm_sm_incompat_builtin : Warning<
+  "builtin call has undefined behaviour when called from a %0 function">,
+  InGroup>;
 def err_sve_vector_in_non_sve_target : Error<
   "SVE vector type %0 cannot be used in a target without sve">;
 def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9dfff132cd88db3..b0da86a5b227def 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2993,6 +2993,62 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
 
+enum ArmStreamingType {
+  ArmNonStreaming,
+  ArmStreaming,
+  ArmStreamingCompatible,
+  ArmLocallyStreaming,
+  ArmStreamingOrSVE2p1
+};
+
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmLocallyStreaming;
+  if (const auto *T = FD->getType()->getAs()) {
+if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+  return ArmStreaming;
+if (T->getAArch64SMEAttributes() & 
FunctionType::SME_PStateSMCompatibleMask)
+  return ArmStreamingCompatible;
+  }
+  return ArmNonStreaming;
+}
+
+static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
+ const FunctionDecl *FD,
+ ArmStreamingType BuiltinType) {
+  assert(BuiltinType != ArmLocallyStreaming &&
+ "Unexpected locally_streaming attribute for builtin!");
+
+  ArmStreamingType FnType = getArmStreamingFnType(FD);
+  if (BuiltinType == ArmStreamingOrSVE2p1) {
+// Check intrinsics that are available in [sve2p1 or sme/sme2].
+llvm::StringMap CallerFeatureMap;
+S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
+if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
+  BuiltinType = ArmStreamingCompatible;
+else
+  BuiltinType = ArmStreaming;
+  }
+
+  if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) &&
+  BuiltinType == ArmNonStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming or locally streaming";
+  }
+
+  if ((FnType == ArmStreamingCompatible) &&
+  BuiltinType != ArmStreamingCompatible) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "streaming compatible";
+return;
+  }
+
+  if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+S.Diag(TheCall->getBeginLoc(), 
diag::warn_attribute_arm_sm_incompat_builtin)
+<< TheCall->getSourceRange() << "non-streaming";
+  }
+}
+
 bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   // Range check SVE intrinsics that take immediate values.
   SmallVector, 3> ImmChecks;
@@ -3148,6 +3204,31 @@ bool Sema::CheckSVEBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
 
 bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI,
 unsigned BuiltinID, CallExpr *TheCall) 
{
+  if (const FunctionDecl *FD = getCurFunctionDecl()) {
+std::optional BuiltinType;
+
+bool IsNeon = false;
+switch (BuiltinID) {
+default:
+  break;
+#define GET_NEON_BUILTINS
+#define TARGET_BUILTIN(id, x, y, z)
\
+  case NEON::BI##id:   
\
+IsNeon = true; 
\
+break;
+#define BUILTIN(id, x, y) TARGET_BUILTIN(id, x, y, "");
+#include "clang/Basic/arm_neon.inc"

[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)

2023-11-30 Thread Sam Tebbs via cfe-commits


@@ -2993,6 +2993,47 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
 
+enum ArmStreamingType {
+  ArmNonStreaming,
+  ArmStreaming,
+  ArmStreamingCompatible,
+  ArmLocallyStreaming
+};
+
+static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
+  if (FD->hasAttr())
+return ArmLocallyStreaming;

SamTebbs33 wrote:

Thanks Sander, that makes sense to me.

https://github.com/llvm/llvm-project/pull/73672
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [AArch64] Warn when calling a NEON builtin in a streaming function (PR #73672)

2023-11-30 Thread Sam Tebbs via cfe-commits

https://github.com/SamTebbs33 closed 
https://github.com/llvm/llvm-project/pull/73672
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


  1   2   3   >