Author: David Sherwood Date: 2021-11-10T09:42:58Z New Revision: 2a48b6993a973e0ab2331e8c11dbd6e6100e2cfe
URL: https://github.com/llvm/llvm-project/commit/2a48b6993a973e0ab2331e8c11dbd6e6100e2cfe DIFF: https://github.com/llvm/llvm-project/commit/2a48b6993a973e0ab2331e8c11dbd6e6100e2cfe.diff LOG: [IR] In ConstantFoldShuffleVectorInstruction use zeroinitializer for splats of 0 When creating a splat of 0 for scalable vectors we tend to create them with using a combination of shufflevector and insertelement, i.e. shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) However, for the case of a zero splat we can actually just replace the above with zeroinitializer instead. This makes the IR a lot simpler and easier to read. I have changed ConstantFoldShuffleVectorInstruction to use zeroinitializer when creating a splat of integer 0 or FP +0.0 values. Differential Revision: https://reviews.llvm.org/D113394 Added: Modified: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c llvm/lib/IR/ConstantFold.cpp llvm/test/Bitcode/vscale-round-trip.ll llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll llvm/test/Transforms/LoopVectorize/scalable-inductions.ll llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c index 3c13080e14f70..1cd9ef1f1a277 100644 --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c @@ -568,7 +568,7 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) // CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[TMP15]], i64 0) // CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP17]], i64 0) -// CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> zeroinitializer) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // // CPP-CHECK-LABEL: @_Z16test_svdupq_n_b8bbbbbbbbbbbbbbbb( @@ -608,7 +608,7 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) // CPP-CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) // CPP-CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[TMP15]], i64 0) // CPP-CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP17]], i64 0) -// CPP-CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CPP-CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> zeroinitializer) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP19]] // svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, @@ -641,7 +641,7 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) // CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[TMP15]], i64 0) // CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP17]], i64 0) -// CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> zeroinitializer) // CHECK-NEXT: [[TMP20:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] // @@ -666,7 +666,7 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP16:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) // CPP-CHECK-NEXT: [[TMP17:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[TMP15]], i64 0) // CPP-CHECK-NEXT: [[TMP18:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP17]], i64 0) -// CPP-CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CPP-CHECK-NEXT: [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> zeroinitializer) // CPP-CHECK-NEXT: [[TMP20:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP20]] // @@ -690,7 +690,7 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) // CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[TMP7]], i64 0) // CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP9]], i64 0) -// CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> zeroinitializer) // CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] // @@ -707,7 +707,7 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3, // CPP-CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) // CPP-CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[TMP7]], i64 0) // CPP-CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP9]], i64 0) -// CPP-CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> zeroinitializer) // CPP-CHECK-NEXT: [[TMP12:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP12]] // @@ -726,7 +726,7 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) // CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) // CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0) -// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> zeroinitializer) // CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) // CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] // @@ -739,7 +739,7 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) // CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) // CPP-CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0) -// CPP-CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> zeroinitializer) // CPP-CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]] // diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 7c49adb873dc1..437fd05584473 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -731,12 +731,16 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, // If the mask is all zeros this is a splat, no need to go through all // elements. - if (all_of(Mask, [](int Elt) { return Elt == 0; }) && - !MaskEltCount.isScalable()) { + if (all_of(Mask, [](int Elt) { return Elt == 0; })) { Type *Ty = IntegerType::get(V1->getContext(), 32); Constant *Elt = ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0)); - return ConstantVector::getSplat(MaskEltCount, Elt); + + if (Elt->isNullValue()) { + auto *VTy = VectorType::get(EltTy, MaskEltCount); + return ConstantAggregateZero::get(VTy); + } else if (!MaskEltCount.isScalable()) + return ConstantVector::getSplat(MaskEltCount, Elt); } // Do not iterate on scalable vector. The num of elements is unknown at // compile-time. diff --git a/llvm/test/Bitcode/vscale-round-trip.ll b/llvm/test/Bitcode/vscale-round-trip.ll index 22790179fc071..62a3479a37242 100644 --- a/llvm/test/Bitcode/vscale-round-trip.ll +++ b/llvm/test/Bitcode/vscale-round-trip.ll @@ -6,7 +6,7 @@ target triple = "aarch64" @important_val = extern_weak dso_local global i32, align 4 ; CHECK-LABEL: define <vscale x 4 x i32> @const_shufflevector( -; CHECK: <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> +; CHECK: <vscale x 4 x i32> zeroinitializer define <vscale x 4 x i32> @const_shufflevector() { ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> zeroinitializer, @@ -15,7 +15,7 @@ define <vscale x 4 x i32> @const_shufflevector() { } ; CHECK-LABEL: define <vscale x 4 x i32> @const_shufflevector_ex() -; CHECK: <vscale x 4 x i32> shufflevector (<vscale x 2 x i32> +; CHECK: <vscale x 4 x i32> zeroinitializer define <vscale x 4 x i32> @const_shufflevector_ex() { ret <vscale x 4 x i32> shufflevector (<vscale x 2 x i32> zeroinitializer, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index eb78fe171ba0b..bd5c2929cbe11 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -121,9 +121,9 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* ; CHECK-ORDERED: %[[LOAD2:.*]] = load float, float* %[[ARRAYIDX]] ; CHECK-ORDERED: vector.ph ; CHECK-ORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() -; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer ; CHECK-ORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-ORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %[[STEPVEC_MUL]] +; CHECK-ORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]] ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI2:.*]] = phi float [ %[[LOAD2]], %vector.ph ], [ %[[RDX2:.*]], %vector.body ] ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ %[[LOAD1]], %vector.ph ], [ %[[RDX1:.*]], %vector.body ] @@ -147,9 +147,9 @@ define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* ; CHECK-UNORDERED: %[[INS_ELT2:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD2]], i32 0 ; CHECK-UNORDERED: %[[INS_ELT1:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD1]], i32 0 ; CHECK-UNORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64() -; CHECK-UNORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-UNORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer ; CHECK-UNORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-UNORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %[[STEPVEC_MUL]] +; CHECK-UNORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]] ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT2]], %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ] ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT1]], %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ] @@ -273,7 +273,7 @@ define float @fadd_conditional(float* noalias nocapture readonly %a, float* noal ; CHECK-ORDERED: vector.body ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ] ; CHECK-ORDERED: %[[LOAD:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* -; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], zeroinitializer ; CHECK-ORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison) ; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]] @@ -295,7 +295,7 @@ define float @fadd_conditional(float* noalias nocapture readonly %a, float* noal ; CHECK-UNORDERED: vector.body ; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 1.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD:.*]], %vector.body ] ; CHECK-UNORDERED: %[[LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* -; CHECK-UNORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-UNORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD1]], zeroinitializer ; CHECK-UNORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison) ; CHECK-UNORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-UNORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll index 79cb34995aca8..dfbbe6610c637 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll @@ -9,7 +9,7 @@ define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readon ; CHECK-NEXT: entry: ; CHECK: vector.body: ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* {{.*}}, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK: store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4 ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll index bc083a2bc870c..1f0453011aba7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll @@ -83,7 +83,7 @@ define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture reado ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %[[GEP]], i32 0 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32*> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer ; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* -; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], zeroinitializer ; CHECK: %[[MASKED_LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison) ; CHECK-NEXT: %[[MASKED_GATHER:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[SPLAT]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> undef) ; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[MASKED_GATHER]], %[[MASKED_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll index bb705ea3c2656..0a09e29c67386 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll @@ -57,7 +57,7 @@ define void @cond_inv_store_i32(i32* noalias %dst, i32* noalias readonly %src, i ; CHECK-NEXT: %[[SPLAT_PTRS:.*]] = shufflevector <vscale x 4 x i32*> %[[TMP1]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer ; CHECK: vector.body: ; CHECK: %[[VECLOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, align 4 -; CHECK-NEXT: %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VECLOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VECLOAD]], zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VECLOAD]], <vscale x 4 x i32*> %[[SPLAT_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]]) entry: br label %for.body @@ -104,7 +104,7 @@ define void @uniform_store_i1(i1* noalias %dst, i64* noalias %start, i64 %N) #0 ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i64, i64* [[START]], <vscale x 2 x i64> [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index a8285cfa08768..6878ad68af469 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -154,7 +154,7 @@ exit: ; preds = %for.body define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> ; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison) @@ -162,7 +162,7 @@ define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src ; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], zeroinitializer ; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]]) ; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index c2a7f3edea0c9..d335d142617d1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -47,7 +47,7 @@ define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias ; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP7]] ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START_2]], <vscale x 2 x i64> [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 @@ -126,7 +126,7 @@ define void @pointer_induction(i8* noalias %start, i64 %N) { ; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], <vscale x 2 x i64> [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX1]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index c8b23fba825bb..0ded0899078fb 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -143,7 +143,7 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) { ; CHECK: %[[STEPVEC:.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() ; CHECK-NEXT: %[[TMP1:.*]] = uitofp <vscale x 4 x i32> %[[STEPVEC]] to <vscale x 4 x float> ; CHECK-NEXT: %[[TMP2:.*]] = fmul <vscale x 4 x float> %[[TMP1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-NEXT: %[[INDINIT:.*]] = fadd <vscale x 4 x float> %[[TMP2]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-NEXT: %[[INDINIT:.*]] = fadd <vscale x 4 x float> %[[TMP2]], zeroinitializer ; CHECK-NEXT: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: %[[TMP3:.*]] = shl i32 %8, 2 ; CHECK-NEXT: %[[TMP4:.*]] = uitofp i32 %[[TMP3]] to float diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll index 9fcebe076eb0b..9d0aa5ef082bc 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll @@ -7,8 +7,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) { ; CHECK-LABEL: @reduction_add_trunc( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[TMP36:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ] ; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) ; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>* _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits