llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Matthew Devereau (MDevereau) <details> <summary>Changes</summary> SVE Operations such as predicated loads become canonicalized to LLVM masked loads, and doing the same for ptrue(all) to splat(1) creates further optimization opportunities from generic LLVM IR passes. --- Patch is 214.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135016.diff 11 Files Affected: - (modified) clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c (+2-4) - (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c (+2-4) - (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+29-12) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll (+1-2) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll (+35-70) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll (+10-12) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-cmpne.ll (+25-47) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-strictfp.ll (+6-12) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll (+242-363) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-ptest.ll (+1-2) - (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-rdffr-predication.ll (+1-2) ``````````diff diff --git a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c index bbed683ac1fd7..c3d0541229fac 100644 --- a/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c +++ b/clang/test/CodeGen/AArch64/sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c @@ -52,9 +52,8 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 2 , 1, 0, typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK128-LABEL: define{{.*}} <16 x i8> @f2(<16 x i8> noundef %x) // CHECK128-NEXT: entry: -// CHECK128-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CHECK128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> [[X:%.*]], i64 0) -// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) +// CHECK128-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) // CHECK128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[TMP1]], i64 0) // CHECK128-NEXT: ret <16 x i8> [[CASTFIXEDSVE]] @@ -62,9 +61,8 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8))); // CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(<[[#div(VBITS,8)]] x i8>) align 16 captures(none) initializes((0, [[#div(VBITS,8)]])) %agg.result, ptr noundef readonly captures(none) %0) // CHECK-NEXT: entry: // CHECK-NEXT: [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, ptr [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]] -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8(<vscale x 16 x i8> poison, <[[#div(VBITS,8)]] x i8> [[X]], i64 0) -// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[TMP1]], <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[CASTSCALABLESVE]], i32 1) // CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <[[#div(VBITS,8)]] x i8> @llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> [[TMP2]], i64 0) // CHECK-NEXT: store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 16, [[TBAA6]] // CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c index 6bf56bdea505c..ca3480d62725a 100644 --- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c +++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_rdffr.c @@ -7,14 +7,12 @@ // CHECK-LABEL: @test_svrdffr( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true)) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]] // // CPP-CHECK-LABEL: @_Z12test_svrdffrv( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rdffr.z(<vscale x 16 x i1> splat (i1 true)) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP1]] // svbool_t test_svrdffr() diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b1d8277182add..ae5792cc8c1f1 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1492,9 +1492,17 @@ static bool isAllActivePredicate(Value *Pred) { if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <= cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements()) Pred = UncastedPred; + if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( + m_ConstantInt<AArch64SVEPredPattern::all>()))) + return true; + + if (Value *Splat = getSplatValue(Pred)) { + auto ConstIdx = dyn_cast<ConstantInt>(Splat); + if (ConstIdx->getZExtValue() == 1) + return true; + } - return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( - m_ConstantInt<AArch64SVEPredPattern::all>())); + return false; } // Use SVE intrinsic info to eliminate redundant operands and/or canonicalise @@ -1701,14 +1709,7 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC, IntrinsicInst &II) { LLVMContext &Ctx = II.getContext(); - // Check that the predicate is all active - auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0)); - if (!Pg || Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) - return std::nullopt; - - const auto PTruePattern = - cast<ConstantInt>(Pg->getOperand(0))->getZExtValue(); - if (PTruePattern != AArch64SVEPredPattern::all) + if (!isAllActivePredicate(II.getArgOperand(0))) return std::nullopt; // Check that we have a compare of zero.. @@ -2118,8 +2119,7 @@ instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID()); if (BinOpCode == Instruction::BinaryOpsEnd || - !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>( - m_ConstantInt<AArch64SVEPredPattern::all>()))) + !isAllActivePredicate(OpPredicate)) return std::nullopt; auto BinOp = IC.Builder.CreateBinOpFMF( BinOpCode, II.getOperand(1), II.getOperand(2), II.getFastMathFlags()); @@ -2641,6 +2641,21 @@ static std::optional<Instruction *> instCombineDMB(InstCombiner &IC, return std::nullopt; } +static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(&II); + auto Type = cast<VectorType>(II.getType()); + ConstantInt *Pattern; + if (match(II.getOperand(0), m_ConstantInt(Pattern)) && + Pattern->getZExtValue() == AArch64SVEPredPattern::all) { + Value *One = ConstantInt::get(Builder.getInt1Ty(), APInt(1, 1)); + Value *SplatOne = + Builder.CreateVectorSplat(Type->getElementCount(), One); + return IC.replaceInstUsesWith(II, SplatOne); + } + return std::nullopt; +} + std::optional<Instruction *> AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -2744,6 +2759,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, return instCombineSVEDupqLane(IC, II); case Intrinsic::aarch64_sve_insr: return instCombineSVEInsr(IC, II); + case Intrinsic::aarch64_sve_ptrue: + return instCombinePTrue(IC, II); } return std::nullopt; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll index 7fb0fbdda0b5d..f71aaa289b89c 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll @@ -42,8 +42,7 @@ define <vscale x 8 x i16> @srshl_abs_positive_merge(<vscale x 8 x i16> %a, <vsca define <vscale x 8 x i16> @srshl_abs_all_active_pred(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %pg2) #0 { ; CHECK-LABEL: @srshl_abs_all_active_pred( -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) -; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> [[PG]], <vscale x 8 x i16> [[A:%.*]]) +; CHECK-NEXT: [[ABS:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A:%.*]]) ; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[PG2:%.*]], <vscale x 8 x i16> [[ABS]], <vscale x 8 x i16> splat (i16 2)) ; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] ; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll index b8ea4de3d2382..1c5f7464d858a 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-all-active-lanes-cvt.ll @@ -5,8 +5,7 @@ target triple = "aarch64-unknown-linux-gnu" define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> poison, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -17,8 +16,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32_poison(<vscale x 8 x bfloat> %a define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvt_bf16_f32( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32.v2(<vscale x 8 x bfloat> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -29,8 +27,7 @@ define <vscale x 8 x bfloat> @test_fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vsca define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -41,8 +38,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f32(<vscale x 8 x half> %a, <vscale x define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvt_f16_f64( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -53,8 +49,7 @@ define <vscale x 8 x half> @test_fcvt_f16_f64(<vscale x 8 x half> %a, <vscale x define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f16( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -65,8 +60,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f16(<vscale x 4 x float> %a, <vscale define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x double> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvt_f32_f64( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[PG]], <vscale x 2 x double> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -77,8 +71,7 @@ define <vscale x 4 x float> @test_fcvt_f32_f64(<vscale x 4 x float> %a, <vscale define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f16( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -89,8 +82,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f16(<vscale x 2 x double> %a, <vscal define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvt_f64_f32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -101,8 +93,7 @@ define <vscale x 2 x double> @test_fcvt_f64_f32(<vscale x 2 x double> %a, <vscal define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 8 x half> %b) { ; CHECK-LABEL: define <vscale x 4 x float> @test_fcvtlt_f32_f16( ; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[PG]], <vscale x 8 x half> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> splat (i1 true), <vscale x 8 x half> [[B]]) ; CHECK-NEXT: ret <vscale x 4 x float> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -113,8 +104,7 @@ define <vscale x 4 x float> @test_fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscal define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 2 x double> @test_fcvtlt_f64_f32( ; CHECK-SAME: <vscale x 2 x double> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 2 x double> [[OUT]] ; %pg = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) @@ -125,8 +115,7 @@ define <vscale x 2 x double> @test_fcvtlt_f64_f32(<vscale x 2 x double> %a, <vsc define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32( ; CHECK-SAME: <vscale x 8 x bfloat> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32.v2(<vscale x 8 x bfloat> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x bfloat> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -137,8 +126,7 @@ define <vscale x 8 x bfloat> @test_fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vs define <vscale x 8 x half> @test_fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x float> %b) { ; CHECK-LABEL: define <vscale x 8 x half> @test_fcvtnt_f16_f32( ; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) { -; CHECK-NEXT: [[PG:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) -; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> [[PG]], <vscale x 4 x float> [[B]]) +; CHECK-NEXT: [[OUT:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> [[A]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[B]]) ; CHECK-NEXT: ret <vscale x 8 x half> [[OUT]] ; %pg = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -149,8 +137,7 @@ define <vs... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/135016 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits