[clang] [llvm] [AArch64][SVE] Instcombine ptrue(all) to splat(i1) (PR #135016)

via cfe-commits Wed, 09 Apr 2025 09:58:55 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Matthew Devereau (MDevereau)

<details>
<summary>Changes</summary>

SVE Operations such as predicated loads become canonicalized to LLVM masked 
loads, and doing the same for ptrue(all) to splat(1) creates further 
optimization opportunities from generic LLVM IR passes.

---

Patch is 214.78 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/135016.diff


11 Files Affected:

- (modified) 
clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c (+2-4) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c (+2-4) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+29-12) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll (+1-2) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
 (+35-70) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll (+10-12) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll (+25-47) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll 
(+6-12) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll 
(+242-363) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll 
(+1-2) 
- (modified) 
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll 
(+1-2) 


``````````diff
diff --git 
a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index bbed683ac1fd7..c3d0541229fac 100644
--- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -52,9 +52,8 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 
, 1, 0,
 typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 // CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x)
 // CHECK128-NEXT:  entry:
-// CHECK128-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CHECK128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> 
@llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> 
[[X:%.*]], i64 0)
-// CHECK128-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> 
[[CASTSCALABLESVE]], i32 1)
+// CHECK128-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 
x i8> [[CASTSCALABLESVE]], i32 1)
 // CHECK128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> 
@llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0)
 // CHECK128-NEXT:    ret <16 x i8> [[CASTFIXEDSVE]]
 
@@ -62,9 +61,8 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 // CHECK-SAME:   ptr dead_on_unwind noalias writable writeonly 
sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, 
[[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0)
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], 
align 16, [[TBAA6:!tbaa !.*]]
-// CHECK-NEXT:   [[TMP1:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> 
@llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, 
<[[#div(VBITS,8)]] x i8> [[X]], i64 0)
-// CHECK-NEXT:   [[TMP2:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> 
[[CASTSCALABLESVE]], i32 1)
+// CHECK-NEXT:   [[TMP2:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 
x i8> [[CASTSCALABLESVE]], i32 1)
 // CHECK-NEXT:   [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> 
@llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], 
i64 0)
 // CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr 
[[AGG_RESULT:%.*]], align 16, [[TBAA6]]
 // CHECK-NEXT:   ret void
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c 
b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
index 6bf56bdea505c..ca3480d62725a 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c
@@ -7,14 +7,12 @@
 
 // CHECK-LABEL: @test_svrdffr(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 // CPP-CHECK-LABEL: @_Z12test_svrdffrv(
 // CPP-CHECK-NEXT:  entry:
-// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true))
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
 svbool_t test_svrdffr()
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b1d8277182add..ae5792cc8c1f1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1492,9 +1492,17 @@ static bool isAllActivePredicate(Value *Pred) {
     if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
         cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
       Pred = UncastedPred;
+  if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+                      m_ConstantInt<AArch64SVEPredPattern::all>())))
+    return true;
+
+  if (Value *Splat = getSplatValue(Pred)) {
+    auto ConstIdx = dyn_cast<ConstantInt>(Splat);
+    if (ConstIdx->getZExtValue() == 1)
+      return true;
+  }
 
-  return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
-                         m_ConstantInt<AArch64SVEPredPattern::all>()));
+  return false;
 }
 
 // Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
@@ -1701,14 +1709,7 @@ static std::optional<Instruction *> 
instCombineSVECmpNE(InstCombiner &IC,
                                                         IntrinsicInst &II) {
   LLVMContext &Ctx = II.getContext();
 
-  // Check that the predicate is all active
-  auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
-  if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
-    return std::nullopt;
-
-  const auto PTruePattern =
-      cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
-  if (PTruePattern != AArch64SVEPredPattern::all)
+  if (!isAllActivePredicate(II.getArgOperand(0)))
     return std::nullopt;
 
   // Check that we have a compare of zero..
@@ -2118,8 +2119,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst 
&II) {
   auto *OpPredicate = II.getOperand(0);
   auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
   if (BinOpCode == Instruction::BinaryOpsEnd ||
-      !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
-                              m_ConstantInt<AArch64SVEPredPattern::all>())))
+      !isAllActivePredicate(OpPredicate))
     return std::nullopt;
   auto BinOp = IC.Builder.CreateBinOpFMF(
       BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags());
@@ -2641,6 +2641,21 @@ static std::optional<Instruction *> 
instCombineDMB(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
+                                                     IntrinsicInst &II) {
+  IRBuilder<> Builder(&II);
+  auto Type = cast<VectorType>(II.getType());
+  ConstantInt *Pattern;
+  if (match(II.getOperand(0), m_ConstantInt(Pattern)) &&
+      Pattern->getZExtValue() == AArch64SVEPredPattern::all) {
+    Value *One = ConstantInt::get(Builder.getInt1Ty(), APInt(1, 1));
+    Value *SplatOne =
+        Builder.CreateVectorSplat(Type->getElementCount(), One);
+    return IC.replaceInstUsesWith(II, SplatOne);
+  }
+  return std::nullopt;
+}
+
 std::optional<Instruction *>
 AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                      IntrinsicInst &II) const {
@@ -2744,6 +2759,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
     return instCombineSVEDupqLane(IC, II);
   case Intrinsic::aarch64_sve_insr:
     return instCombineSVEInsr(IC, II);
+  case Intrinsic::aarch64_sve_ptrue:
+    return instCombinePTrue(IC, II);
   }
 
   return std::nullopt;
diff --git 
a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll 
b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
index 7fb0fbdda0b5d..f71aaa289b89c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll
@@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 
8 x i16> %a, <vsca
 
 define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, 
<vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 {
 ; CHECK-LABEL: @srshl_abs_all_active_pred(
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> 
[[PG]], <vscale x 8 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[ABS:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> 
splat (i1 true), <vscale x 8 x i16> [[A:%.*]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> 
[[ABS]], <vscale x 8 x i16> splat (i16 2))
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 ;
diff --git 
a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
 
b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
index b8ea4de3d2382..1c5f7464d858a 100644
--- 
a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
+++ 
b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll
@@ -5,8 +5,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> 
%a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(
 ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x 
i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x 
i1> splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x bfloat> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -17,8 +16,7 @@ define <vscale x 8 x bfloat> 
@test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a
 define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, 
<vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(
 ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x 
i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x 
i1> splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x bfloat> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -29,8 +27,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 
x bfloat> %a, <vsca
 define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale 
x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32(
 ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> 
[[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> 
splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -41,8 +38,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x 
half> %a, <vscale x
 define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale 
x 2 x double> %b) {
 ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64(
 ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> 
[[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> 
splat (i1 true), <vscale x 2 x double> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[OUT]]
 ;
   %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -53,8 +49,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x 
half> %a, <vscale x
 define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, 
<vscale x 8 x half> %b) {
 ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16(
 ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> 
[[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> 
splat (i1 true), <vscale x 8 x half> [[B]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -65,8 +60,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x 
float> %a, <vscale
 define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, 
<vscale x 2 x double> %b) {
 ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64(
 ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> 
[[PG]], <vscale x 2 x double> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> 
splat (i1 true), <vscale x 2 x double> [[B]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[OUT]]
 ;
   %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -77,8 +71,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x 
float> %a, <vscale
 define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, 
<vscale x 8 x half> %b) {
 ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16(
 ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> 
[[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> 
splat (i1 true), <vscale x 8 x half> [[B]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[OUT]]
 ;
   %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -89,8 +82,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x 
double> %a, <vscal
 define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, 
<vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32(
 ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> 
[[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> 
splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[OUT]]
 ;
   %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -101,8 +93,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 
x double> %a, <vscal
 define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, 
<vscale x 8 x half> %b) {
 ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16(
 ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> 
[[PG]], <vscale x 8 x half> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> 
splat (i1 true), <vscale x 8 x half> [[B]])
 ; CHECK-NEXT:    ret <vscale x 4 x float> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -113,8 +104,7 @@ define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 
4 x float> %a, <vscal
 define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, 
<vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32(
 ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 2 x i1> 
@llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> 
[[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> 
splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[OUT]]
 ;
   %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -125,8 +115,7 @@ define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 
2 x double> %a, <vsc
 define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, 
<vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(
 ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) 
{
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x 
i1> [[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x 
i1> splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x bfloat> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -137,8 +126,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale 
x 8 x bfloat> %a, <vs
 define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, 
<vscale x 4 x float> %b) {
 ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32(
 ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
-; CHECK-NEXT:    [[PG:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> 
[[PG]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    [[OUT:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> 
splat (i1 true), <vscale x 4 x float> [[B]])
 ; CHECK-NEXT:    ret <vscale x 8 x half> [[OUT]]
 ;
   %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
@@ -149,8 +137,7 @@ define <vs...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/135016
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][SVE] Instcombine ptrue(all) to splat(i1) (PR #135016)

Reply via email to