[PATCH] D113394: [IR] In ConstantFoldShuffleVectorInstruction use zeroinitializer for splats of 0

David Sherwood via Phabricator via cfe-commits Wed, 10 Nov 2021 01:44:59 -0800

This revision was automatically updated to reflect the committed changes.
david-arm marked an inline comment as done.
Closed by commit rG2a48b6993a97: [IR] In ConstantFoldShuffleVectorInstruction 
use zeroinitializer for splats of 0 (authored by david-arm).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


Changed prior to commit:
  https://reviews.llvm.org/D113394?vs=385764&id=386094#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D113394/new/

https://reviews.llvm.org/D113394

Files:
  clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
  llvm/lib/IR/ConstantFold.cpp
  llvm/test/Bitcode/vscale-round-trip.ll
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
  llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
  llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
  llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll

Index: llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -7,8 +7,8 @@
 ; CHECK-LABEL: @reduction_add_trunc(
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
 ; CHECK:         [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
Index: llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
+++ llvm/test/Transforms/LoopVectorize/scalable-inductions.ll
@@ -143,7 +143,7 @@
 ; CHECK:      %[[STEPVEC:.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
 ; CHECK-NEXT: %[[TMP1:.*]] = uitofp <vscale x 4 x i32> %[[STEPVEC]] to <vscale x 4 x float>
 ; CHECK-NEXT: %[[TMP2:.*]] = fmul <vscale x 4 x float> %[[TMP1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: %[[INDINIT:.*]] = fadd <vscale x 4 x float> %[[TMP2]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: %[[INDINIT:.*]] = fadd <vscale x 4 x float> %[[TMP2]], zeroinitializer
 ; CHECK-NEXT: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32()
 ; CHECK-NEXT: %[[TMP3:.*]] = shl i32 %8, 2
 ; CHECK-NEXT: %[[TMP4:.*]] = uitofp i32 %[[TMP3]] to float
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -47,7 +47,7 @@
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP7]]
 ; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START_2]], <vscale x 2 x i64> [[TMP8]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
@@ -126,7 +126,7 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
 ; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], <vscale x 2 x i64> [[TMP7]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX1]], 0
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
@@ -154,7 +154,7 @@
 define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 {
 ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp
 ; CHECK-VF4IC1:      vector.body:
-; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
+; CHECK-VF4IC1:        [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
 ; CHECK-VF4IC1:        [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
 ; CHECK-VF4IC1:        [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-VF4IC1:        [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
@@ -162,7 +162,7 @@
 ; CHECK-VF4IC1-NEXT:   [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:        [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
 ; CHECK-VF4IC1:      middle.block:
-; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-VF4IC1-NEXT:   [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], zeroinitializer
 ; CHECK-VF4IC1-NEXT:   [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
 ; CHECK-VF4IC1-NEXT:   {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0
 
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -57,7 +57,7 @@
 ; CHECK-NEXT:    %[[SPLAT_PTRS:.*]] = shufflevector <vscale x 4 x i32*> %[[TMP1]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK:       vector.body:
 ; CHECK:         %[[VECLOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* %{{.*}}, align 4
-; CHECK-NEXT:    %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VECLOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    %[[MASK:.*]] = icmp sgt <vscale x 4 x i32> %[[VECLOAD]], zeroinitializer
 ; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VECLOAD]], <vscale x 4 x i32*> %[[SPLAT_PTRS]], i32 4, <vscale x 4 x i1> %[[MASK]])
 entry:
   br label %for.body
@@ -104,7 +104,7 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
 ; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i64, i64* [[START]], <vscale x 2 x i64> [[TMP7]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -83,7 +83,7 @@
 ; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %[[GEP]], i32 0
 ; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32*> %[[SPLATINS]], <vscale x 4 x i32*> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK: %[[LOAD:.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>*
-; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: %[[ICMP:.*]] = icmp ne <vscale x 4 x i32> %[[LOAD]], zeroinitializer
 ; CHECK: %[[MASKED_LOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* %[[BITCAST:.*]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> poison)
 ; CHECK-NEXT: %[[MASKED_GATHER:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[SPLAT]], i32 4, <vscale x 4 x i1> %[[ICMP]], <vscale x 4 x i32> undef)
 ; CHECK-NEXT: %[[ADD:.*]] = add nsw <vscale x 4 x i32> %[[MASKED_GATHER]], %[[MASKED_LOAD]]
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:  entry:
 ; CHECK:       vector.body:
 ; CHECK:         [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* {{.*}}, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 10, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK:         store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* {{.*}}, align 4
 ;
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -121,9 +121,9 @@
 ; CHECK-ORDERED: %[[LOAD2:.*]] = load float, float* %[[ARRAYIDX]]
 ; CHECK-ORDERED: vector.ph
 ; CHECK-ORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer
 ; CHECK-ORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-ORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %[[STEPVEC_MUL]]
+; CHECK-ORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]]
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI2:.*]] = phi float [ %[[LOAD2]], %vector.ph ], [ %[[RDX2:.*]], %vector.body ]
 ; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ %[[LOAD1]], %vector.ph ], [ %[[RDX1:.*]], %vector.body ]
@@ -147,9 +147,9 @@
 ; CHECK-UNORDERED: %[[INS_ELT2:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD2]], i32 0
 ; CHECK-UNORDERED: %[[INS_ELT1:.*]] = insertelement <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float %[[LOAD1]], i32 0
 ; CHECK-UNORDERED: %[[STEPVEC1:.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-UNORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-UNORDERED: %[[STEPVEC_ADD1:.*]] = add <vscale x 4 x i64> %[[STEPVEC1]], zeroinitializer
 ; CHECK-UNORDERED: %[[STEPVEC_MUL:.*]] = mul <vscale x 4 x i64> %[[STEPVEC_ADD1]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-UNORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 0, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %[[STEPVEC_MUL]]
+; CHECK-UNORDERED: %[[INDUCTION:.*]] = add <vscale x 4 x i64> zeroinitializer, %[[STEPVEC_MUL]]
 ; CHECK-UNORDERED: vector.body
 ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT2]], %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi <vscale x 4 x float> [ %[[INS_ELT1]], %vector.ph ], [ %[[VEC_FADD1:.*]], %vector.body ]
@@ -273,7 +273,7 @@
 ; CHECK-ORDERED: vector.body
 ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
 ; CHECK-ORDERED: %[[LOAD:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
-; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-ORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD]], zeroinitializer
 ; CHECK-ORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)
 ; CHECK-ORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-ORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
@@ -295,7 +295,7 @@
 ; CHECK-UNORDERED: vector.body
 ; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float -0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), float 1.000000e+00, i32 0), %vector.ph ], [ %[[VEC_FADD:.*]], %vector.body ]
 ; CHECK-UNORDERED: %[[LOAD1:.*]] = load <vscale x 4 x float>, <vscale x 4 x float>*
-; CHECK-UNORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-UNORDERED: %[[FCMP:.*]] = fcmp une <vscale x 4 x float> %[[LOAD1]], zeroinitializer
 ; CHECK-UNORDERED: %[[MASKED_LOAD:.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* {{.*}}, i32 4, <vscale x 4 x i1> %[[FCMP]], <vscale x 4 x float> poison)
 ; CHECK-UNORDERED: %[[XOR:.*]] = xor <vscale x 4 x i1> %[[FCMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
 ; CHECK-UNORDERED: %[[SELECT:.*]] = select <vscale x 4 x i1> %[[XOR]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> %[[MASKED_LOAD]]
Index: llvm/test/Bitcode/vscale-round-trip.ll
===================================================================
--- llvm/test/Bitcode/vscale-round-trip.ll
+++ llvm/test/Bitcode/vscale-round-trip.ll
@@ -6,7 +6,7 @@
 @important_val = extern_weak dso_local global i32, align 4
 
 ; CHECK-LABEL: define <vscale x 4 x i32> @const_shufflevector(
-; CHECK: <vscale x 4 x i32> shufflevector (<vscale x 4 x i32>
+; CHECK: <vscale x 4 x i32> zeroinitializer
 
 define <vscale x 4 x i32> @const_shufflevector() {
   ret <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> zeroinitializer,
@@ -15,7 +15,7 @@
 }
 
 ; CHECK-LABEL: define <vscale x 4 x i32> @const_shufflevector_ex()
-; CHECK: <vscale x 4 x i32> shufflevector (<vscale x 2 x i32>
+; CHECK: <vscale x 4 x i32> zeroinitializer
 
 define <vscale x 4 x i32> @const_shufflevector_ex() {
   ret <vscale x 4 x i32> shufflevector (<vscale x 2 x i32> zeroinitializer,
Index: llvm/lib/IR/ConstantFold.cpp
===================================================================
--- llvm/lib/IR/ConstantFold.cpp
+++ llvm/lib/IR/ConstantFold.cpp
@@ -731,12 +731,16 @@
 
   // If the mask is all zeros this is a splat, no need to go through all
   // elements.
-  if (all_of(Mask, [](int Elt) { return Elt == 0; }) &&
-      !MaskEltCount.isScalable()) {
+  if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
     Type *Ty = IntegerType::get(V1->getContext(), 32);
     Constant *Elt =
         ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0));
-    return ConstantVector::getSplat(MaskEltCount, Elt);
+
+    if (Elt->isNullValue()) {
+      auto *VTy = VectorType::get(EltTy, MaskEltCount);
+      return ConstantAggregateZero::get(VTy);
+    } else if (!MaskEltCount.isScalable())
+      return ConstantVector::getSplat(MaskEltCount, Elt);
   }
   // Do not iterate on scalable vector. The num of elements is unknown at
   // compile-time.
Index: clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
+++ clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
@@ -568,7 +568,7 @@
 // CHECK-NEXT:    [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[TMP15]], i64 0)
 // CHECK-NEXT:    [[TMP18:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP17]], i64 0)
-// CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP19]]
 //
 // CPP-CHECK-LABEL: @_Z16test_svdupq_n_b8bbbbbbbbbbbbbbbb(
@@ -608,7 +608,7 @@
 // CPP-CHECK-NEXT:    [[TMP16:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CPP-CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[TMP15]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP18:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP17]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CPP-CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[TMP18]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP19]]
 //
 svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
@@ -641,7 +641,7 @@
 // CHECK-NEXT:    [[TMP16:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 // CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[TMP15]], i64 0)
 // CHECK-NEXT:    [[TMP18:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP17]], i64 0)
-// CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    [[TMP20:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP20]]
 //
@@ -666,7 +666,7 @@
 // CPP-CHECK-NEXT:    [[TMP16:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
 // CPP-CHECK-NEXT:    [[TMP17:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[TMP15]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP18:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP17]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CPP-CHECK-NEXT:    [[TMP19:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> [[TMP16]], <vscale x 8 x i16> [[TMP18]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    [[TMP20:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP19]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP20]]
 //
@@ -690,7 +690,7 @@
 // CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 // CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[TMP7]], i64 0)
 // CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP9]], i64 0)
-// CHECK-NEXT:    [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CHECK-NEXT:    [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    [[TMP12:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP12]]
 //
@@ -707,7 +707,7 @@
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[TMP7]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP9]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[TMP10]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    [[TMP12:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP12]]
 //
@@ -726,7 +726,7 @@
 // CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
 // CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP3]], i64 0)
 // CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0)
-// CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> zeroinitializer)
 // CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]])
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP8]]
 //
@@ -739,7 +739,7 @@
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP3]], i64 0)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP5]], i64 0)
-// CPP-CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.cmpne.nxv2i64(<vscale x 2 x i1> [[TMP4]], <vscale x 2 x i64> [[TMP6]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP8]]
 //

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D113394: [IR] In ConstantFoldShuffleVectorInstruction use zeroinitializer for splats of 0

Reply via email to