[llvm-branch-commits] [llvm] 39e1e53 - [SLP] add reduction test with mixed fast-math-flags; NFC

2021-01-23 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-23T11:17:20-05:00
New Revision: 39e1e53a7c162652c6c138d1bcf50d2766fe9561

URL: 
https://github.com/llvm/llvm-project/commit/39e1e53a7c162652c6c138d1bcf50d2766fe9561
DIFF: 
https://github.com/llvm/llvm-project/commit/39e1e53a7c162652c6c138d1bcf50d2766fe9561.diff

LOG: [SLP] add reduction test with mixed fast-math-flags; NFC

Added: 


Modified: 
llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll

Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 8e175f1acda9..38d36c676fa7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1801,4 +1801,36 @@ define float @fadd_v4f32_fmf(float* %p) {
   ret float %add3
 }
 
+define float @fadd_v4f32_fmf_intersect(float* %p) {
+; CHECK-LABEL: @fadd_v4f32_fmf_intersect(
+; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; CHECK-NEXT:ret float [[TMP3]]
+;
+; STORE-LABEL: @fadd_v4f32_fmf_intersect(
+; STORE-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; STORE-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; STORE-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; STORE-NEXT:ret float [[TMP3]]
+;
+  %p1 = getelementptr inbounds float, float* %p, i64 1
+  %p2 = getelementptr inbounds float, float* %p, i64 2
+  %p3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %p1, align 4
+  %t2 = load float, float* %p2, align 4
+  %t3 = load float, float* %p3, align 4
+  %add1 = fadd ninf reassoc nsz nnan float %t1, %t0
+  %add2 = fadd ninf reassoc nsz nnan arcp float %t2, %add1
+  %add3 = fadd ninf reassoc nsz contract float %t3, %add2
+  ret float %add3
+}
+
 declare i32 @__gxx_personality_v0(...)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a6f0221 - [SLP] fix fast-math-flag propagation on FP reductions

2021-01-23 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-23T11:17:20-05:00
New Revision: a6f02212764a76935ec5fb704fe86a1a76f65745

URL: 
https://github.com/llvm/llvm-project/commit/a6f02212764a76935ec5fb704fe86a1a76f65745
DIFF: 
https://github.com/llvm/llvm-project/commit/a6f02212764a76935ec5fb704fe86a1a76f65745.diff

LOG: [SLP] fix fast-math-flag propagation on FP reductions

As shown in the test diffs, we could miscompile by
propagating flags that did not exist in the original
code.

The flags required for fmin/fmax reductions will be
fixed in a follow-up patch.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 78ce4870588c..6c2b10e5b9fa 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6820,12 +6820,18 @@ class HorizontalReduction {
 if (NumReducedVals < 4)
   return false;
 
-// FIXME: Fast-math-flags should be set based on the instructions in the
-//reduction (not all of 'fast' are required).
+// Intersect the fast-math-flags from all reduction operations.
+FastMathFlags RdxFMF;
+RdxFMF.set();
+for (ReductionOpsType &RdxOp : ReductionOps) {
+  for (Value *RdxVal : RdxOp) {
+if (auto *FPMO = dyn_cast(RdxVal))
+  RdxFMF &= FPMO->getFastMathFlags();
+  }
+}
+
 IRBuilder<> Builder(cast(ReductionRoot));
-FastMathFlags Unsafe;
-Unsafe.setFast();
-Builder.setFastMathFlags(Unsafe);
+Builder.setFastMathFlags(RdxFMF);
 
 BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
 // The same extra argument may be used several times, so log each attempt
@@ -7071,9 +7077,6 @@ class HorizontalReduction {
 assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
 
-// FIXME: The builder should use an FMF guard. It should not be hard-coded
-//to 'fast'.
-assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
 return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
ReductionOps.back());
   }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 38d36c676fa7..03ec04cb8cbe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1766,7 +1766,6 @@ bb.1:
   ret void
 }
 
-; FIXME: This is a miscompile.
 ; The FMF on the reduction should match the incoming insts.
 
 define float @fadd_v4f32_fmf(float* %p) {
@@ -1776,7 +1775,7 @@ define float @fadd_v4f32_fmf(float* %p) {
 ; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
 ; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
-; CHECK-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; CHECK-NEXT:[[TMP3:%.*]] = call reassoc nsz float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
 ; CHECK-NEXT:ret float [[TMP3]]
 ;
 ; STORE-LABEL: @fadd_v4f32_fmf(
@@ -1785,7 +1784,7 @@ define float @fadd_v4f32_fmf(float* %p) {
 ; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
 ; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
 ; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
-; STORE-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; STORE-NEXT:[[TMP3:%.*]] = call reassoc nsz float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
 ; STORE-NEXT:ret float [[TMP3]]
 ;
   %p1 = getelementptr inbounds float, float* %p, i64 1
@@ -1801,6 +1800,10 @@ define float @fadd_v4f32_fmf(float* %p) {
   ret float %add3
 }
 
+; The minimal FMF for fadd reduction are "reassoc nsz".
+; Only the common FMF of all operations in the reduction propagate to the 
result.
+; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with 
the required flags.
+
 define float @fadd_v4f32_fmf_intersect(float* %p) {
 ; CHECK-LABEL: @fadd_v4f32_fmf_intersect(
 ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
@@ -1808,7 +1811,7 @@ define float @fadd_v4f32_fmf_intersect(float* %p) {
 ; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
 ; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
-; CHECK-NEXT:[[TMP3:%.*]] = cal

[llvm-branch-commits] [llvm] 77adbe6 - [SLP] fix fast-math requirements for fmin/fmax reductions

2021-01-24 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-24T08:55:56-05:00
New Revision: 77adbe6a8c716bead04393560ec5aa88877ac1d2

URL: 
https://github.com/llvm/llvm-project/commit/77adbe6a8c716bead04393560ec5aa88877ac1d2
DIFF: 
https://github.com/llvm/llvm-project/commit/77adbe6a8c716bead04393560ec5aa88877ac1d2.diff

LOG: [SLP] fix fast-math requirements for fmin/fmax reductions

a6f0221276 enabled intersection of FMF on reduction instructions,
so it is safe to ease the check here.

There is still some room to improve here - it looks like we
have nearly duplicate flags propagation logic inside of the
LoopUtils helper but it is limited targets that do not form
reduction intrinsics (they form the shuffle expansion).

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c5cfc9e77d8a..7114b4d412fd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6422,9 +6422,7 @@ class HorizontalReduction {
   // FP min/max are associative except for NaN and -0.0. We do not
   // have to rule out -0.0 here because the intrinsic semantics do not
   // specify a fixed result for it.
-  // TODO: This is artificially restricted to fast because the code that
-  //   creates reductions assumes/produces fast ops.
-  return I->getFastMathFlags().isFast();
+  return I->getFastMathFlags().noNaNs();
 }
 
 return I->isAssociative();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
index fc134aa6deef..8136f2cb2dfe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
@@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) {
   ret float %m3
 }
 
-; TODO: This should become a reduce intrinsic.
-
 define float @reduction_v4f32_nnan(float* %p) {
 ; CHECK-LABEL: @reduction_v4f32_nnan(
 ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
 ; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
 ; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
-; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
-; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
-; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
-; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
-; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T1]], float [[T0]])
-; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T2]], float [[M1]])
-; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T3]], float [[M2]])
-; CHECK-NEXT:ret float [[M3]]
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP3:%.*]] = call nnan float 
@llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
+; CHECK-NEXT:ret float [[TMP3]]
 ;
   %g1 = getelementptr inbounds float, float* %p, i64 1
   %g2 = getelementptr inbounds float, float* %p, i64 2

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
index e5a4fc235748..470dc8290eee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
@@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) {
   ret float %m3
 }
 
-; TODO: This should become a reduce intrinsic.
-
 define float @reduction_v4f32_nnan(float* %p) {
 ; CHECK-LABEL: @reduction_v4f32_nnan(
 ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
 ; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
 ; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
-; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
-; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
-; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
-; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
-; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float 
[[T1]], float [[T0]])
-; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float 
[[T2]], float [[M1]])
-; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float 
[[T3]], float [[M2]])
-; CHECK-NEXT:ret float [[M3]]
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align

[llvm-branch-commits] [llvm] 07b60d0 - [InstCombine] add tests for min/max intrinsics with extended values; NFC

2021-01-25 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-25T07:52:50-05:00
New Revision: 07b60d0060688dea121be36b46de859bafcec29b

URL: 
https://github.com/llvm/llvm-project/commit/07b60d0060688dea121be36b46de859bafcec29b
DIFF: 
https://github.com/llvm/llvm-project/commit/07b60d0060688dea121be36b46de859bafcec29b.diff

LOG: [InstCombine] add tests for min/max intrinsics with extended values; NFC

Added: 


Modified: 
llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll 
b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index 797f85d94447..bccfac81bdce 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -5,6 +5,8 @@ declare i8 @llvm.umin.i8(i8, i8)
 declare i8 @llvm.umax.i8(i8, i8)
 declare i8 @llvm.smin.i8(i8, i8)
 declare i8 @llvm.smax.i8(i8, i8)
+declare <3 x i8> @llvm.umin.v3i8(<3 x i8>, <3 x i8>)
+declare void @use(i8)
 
 define i8 @umin_known_bits(i8 %x, i8 %y) {
 ; CHECK-LABEL: @umin_known_bits(
@@ -45,3 +47,154 @@ define i8 @smax_known_bits(i8 %x, i8 %y) {
   %r = and i8 %m, -128
   ret i8 %r
 }
+
+define i8 @smax_sext(i5 %x, i5 %y) {
+; CHECK-LABEL: @smax_sext(
+; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %sx = sext i5 %x to i8
+  %sy = sext i5 %y to i8
+  %m = call i8 @llvm.smax.i8(i8 %sx, i8 %sy)
+  ret i8 %m
+}
+
+define i8 @smin_sext(i5 %x, i5 %y) {
+; CHECK-LABEL: @smin_sext(
+; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:call void @use(i8 [[SY]])
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %sx = sext i5 %x to i8
+  %sy = sext i5 %y to i8
+  call void @use(i8 %sy)
+  %m = call i8 @llvm.smin.i8(i8 %sx, i8 %sy)
+  ret i8 %m
+}
+
+define i8 @umax_sext(i5 %x, i5 %y) {
+; CHECK-LABEL: @umax_sext(
+; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
+; CHECK-NEXT:call void @use(i8 [[SX]])
+; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %sx = sext i5 %x to i8
+  call void @use(i8 %sx)
+  %sy = sext i5 %y to i8
+  %m = call i8 @llvm.umax.i8(i8 %sx, i8 %sy)
+  ret i8 %m
+}
+
+define <3 x i8> @umin_sext(<3 x i5> %x, <3 x i5> %y) {
+; CHECK-LABEL: @umin_sext(
+; CHECK-NEXT:[[SX:%.*]] = sext <3 x i5> [[X:%.*]] to <3 x i8>
+; CHECK-NEXT:[[SY:%.*]] = sext <3 x i5> [[Y:%.*]] to <3 x i8>
+; CHECK-NEXT:[[M:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[SX]], <3 
x i8> [[SY]])
+; CHECK-NEXT:ret <3 x i8> [[M]]
+;
+  %sx = sext <3 x i5> %x to <3 x i8>
+  %sy = sext <3 x i5> %y to <3 x i8>
+  %m = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sx, <3 x i8> %sy)
+  ret <3 x i8> %m
+}
+
+define i8 @smax_zext(i5 %x, i5 %y) {
+; CHECK-LABEL: @smax_zext(
+; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[ZX]], i8 [[ZY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %zx = zext i5 %x to i8
+  %zy = zext i5 %y to i8
+  %m = call i8 @llvm.smax.i8(i8 %zx, i8 %zy)
+  ret i8 %m
+}
+
+define i8 @smin_zext(i5 %x, i5 %y) {
+; CHECK-LABEL: @smin_zext(
+; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[ZX]], i8 [[ZY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %zx = zext i5 %x to i8
+  %zy = zext i5 %y to i8
+  %m = call i8 @llvm.smin.i8(i8 %zx, i8 %zy)
+  ret i8 %m
+}
+
+define i8 @umax_zext(i5 %x, i5 %y) {
+; CHECK-LABEL: @umax_zext(
+; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[ZX]], i8 [[ZY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %zx = zext i5 %x to i8
+  %zy = zext i5 %y to i8
+  %m = call i8 @llvm.umax.i8(i8 %zx, i8 %zy)
+  ret i8 %m
+}
+
+define i8 @umin_zext(i5 %x, i5 %y) {
+; CHECK-LABEL: @umin_zext(
+; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8
+; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umin.i8(i8 [[ZX]], i8 [[ZY]])
+; CHECK-NEXT:ret i8 [[M]]
+;
+  %zx = zext i5 %x to i8
+  %zy = zext i5 %y to i8
+  %m = call i8 @llvm.umin.i8(i8 %zx, i8 %zy)
+  ret i8 %m
+}
+
+define i8 @umin_zext_types(i6 %x, i5 %y) {
+; CHECK-LABEL: @umin_zext_types(
+; CHECK-NEXT:[[ZX:%.*]] = zext i6 [[X:%.*]] to i8
+; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8
+; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umin.i8(i8 [[ZX]], i8 [[ZY]])
+; CHECK-NEXT:re

[llvm-branch-commits] [llvm] 09a136b - [InstCombine] narrow min/max intrinsics with extended inputs

2021-01-25 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-25T07:52:50-05:00
New Revision: 09a136bcc6947128df86492d88f1733bdff745d1

URL: 
https://github.com/llvm/llvm-project/commit/09a136bcc6947128df86492d88f1733bdff745d1
DIFF: 
https://github.com/llvm/llvm-project/commit/09a136bcc6947128df86492d88f1733bdff745d1.diff

LOG: [InstCombine] narrow min/max intrinsics with extended inputs

We can sink extends after min/max if they match and would
not change the sign-interpreted compare. The only combo
that doesn't work is zext+smin/smax because the zexts
could change a negative number into positive:
https://alive2.llvm.org/ce/z/D6sz6J

Sext+umax/umin works:

  define i32 @src(i8 %x, i8 %y) {
  %0:
%sx = sext i8 %x to i32
%sy = sext i8 %y to i32
%m = umax i32 %sx, %sy
ret i32 %m
  }
  =>
  define i32 @tgt(i8 %x, i8 %y) {
  %0:
%m = umax i8 %x, %y
%r = sext i8 %m to i32
ret i32 %r
  }
  Transformation seems to be correct!

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5ba51d255109..0b4246feecee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -830,6 +830,30 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) 
{
 
 break;
   }
+  case Intrinsic::umax:
+  case Intrinsic::umin: {
+Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+Value *X, *Y;
+if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
+(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
+  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+  return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
+}
+// If both operands of unsigned min/max are sign-extended, it is still ok
+// to narrow the operation.
+LLVM_FALLTHROUGH;
+  }
+  case Intrinsic::smax:
+  case Intrinsic::smin: {
+Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
+Value *X, *Y;
+if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
+(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
+  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+  return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
+}
+break;
+  }
   case Intrinsic::bswap: {
 Value *IIOperand = II->getArgOperand(0);
 Value *X = nullptr;

diff  --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll 
b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index bccfac81bdce..97ed799f32a8 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -50,9 +50,8 @@ define i8 @smax_known_bits(i8 %x, i8 %y) {
 
 define i8 @smax_sext(i5 %x, i5 %y) {
 ; CHECK-LABEL: @smax_sext(
-; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
-; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
-; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.smax.i5(i5 [[X:%.*]], i5 
[[Y:%.*]])
+; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8
 ; CHECK-NEXT:ret i8 [[M]]
 ;
   %sx = sext i5 %x to i8
@@ -61,12 +60,14 @@ define i8 @smax_sext(i5 %x, i5 %y) {
   ret i8 %m
 }
 
+; Extra use is ok.
+
 define i8 @smin_sext(i5 %x, i5 %y) {
 ; CHECK-LABEL: @smin_sext(
-; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
 ; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
 ; CHECK-NEXT:call void @use(i8 [[SY]])
-; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.smin.i5(i5 [[X:%.*]], i5 [[Y]])
+; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8
 ; CHECK-NEXT:ret i8 [[M]]
 ;
   %sx = sext i5 %x to i8
@@ -76,12 +77,14 @@ define i8 @smin_sext(i5 %x, i5 %y) {
   ret i8 %m
 }
 
+; Sext doesn't change unsigned min/max comparison of narrow values.
+
 define i8 @umax_sext(i5 %x, i5 %y) {
 ; CHECK-LABEL: @umax_sext(
 ; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8
 ; CHECK-NEXT:call void @use(i8 [[SX]])
-; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8
-; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[SX]], i8 [[SY]])
+; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.umax.i5(i5 [[X]], i5 [[Y:%.*]])
+; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8
 ; CHECK-NEXT:ret i8 [[M]]
 ;
   %sx = sext i5 %x to i8
@@ -93,9 +96,8 @@ define i8 @umax_sext(i5 %x, i5 %y) {
 
 define <3 x i8> @umin_sext(<3 x i5> %x, <3 x i5> %y) {
 ; CHECK-LABEL: @umin_sext(
-; CHECK-NEXT:[[SX:%.*]] = sext <3 x i5> [[X:%.*]] to <3 x i8>
-; CHECK-NEXT:[[SY:%.*]] = sext <3 x i5> [[Y:%.*]] to <3 x

[llvm-branch-commits] [llvm] 46507a9 - [SLP] reduce code duplication while matching reductions; NFC

2021-01-12 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-12T16:03:57-05:00
New Revision: 46507a96fc13146f73e5915a008055c5a59191c2

URL: 
https://github.com/llvm/llvm-project/commit/46507a96fc13146f73e5915a008055c5a59191c2
DIFF: 
https://github.com/llvm/llvm-project/commit/46507a96fc13146f73e5915a008055c5a59191c2.diff

LOG: [SLP] reduce code duplication while matching reductions; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bd673d112b3a..ff22572782e2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6857,49 +6857,48 @@ class HorizontalReduction {
 
   // Visit left or right.
   Value *NextV = TreeN->getOperand(EdgeToVisit);
-  if (NextV != Phi) {
-auto *I = dyn_cast(NextV);
-OpData = getOperationData(I);
-// Continue analysis if the next operand is a reduction operation or
-// (possibly) a reduced value. If the reduced value opcode is not set,
-// the first met operation != reduction operation is considered as the
-// reduced value class.
-const bool IsRdxInst = OpData == RdxTreeInst;
-if (I && (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) {
-  // Only handle trees in the current basic block.
-  if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) {
-// I is an extra argument for TreeN (its parent operation).
-markExtraArg(Stack.back(), I);
-continue;
-  }
+  auto *I = dyn_cast(NextV);
+  OpData = getOperationData(I);
+  // Continue analysis if the next operand is a reduction operation or
+  // (possibly) a reduced value. If the reduced value opcode is not set,
+  // the first met operation != reduction operation is considered as the
+  // reduced value class.
+  const bool IsRdxInst = OpData == RdxTreeInst;
+  if (I && I != Phi &&
+  (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) {
+// Only handle trees in the current basic block.
+if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) {
+  // I is an extra argument for TreeN (its parent operation).
+  markExtraArg(Stack.back(), I);
+  continue;
+}
 
-  // Each tree node needs to have minimal number of users except for 
the
-  // ultimate reduction.
-  if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
-// I is an extra argument for TreeN (its parent operation).
-markExtraArg(Stack.back(), I);
-continue;
-  }
+// Each tree node needs to have minimal number of users except for the
+// ultimate reduction.
+if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
+  // I is an extra argument for TreeN (its parent operation).
+  markExtraArg(Stack.back(), I);
+  continue;
+}
 
-  if (IsRdxInst) {
-// We need to be able to reassociate the reduction operations.
-if (!OpData.isAssociative(I)) {
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
-  continue;
-}
-  } else if (RdxLeafVal && RdxLeafVal != OpData) {
-// Make sure that the opcodes of the operations that we are going 
to
-// reduce match.
+if (IsRdxInst) {
+  // We need to be able to reassociate the reduction operations.
+  if (!OpData.isAssociative(I)) {
 // I is an extra argument for TreeN (its parent operation).
 markExtraArg(Stack.back(), I);
 continue;
-  } else if (!RdxLeafVal) {
-RdxLeafVal = OpData;
   }
-  Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
+} else if (RdxLeafVal && RdxLeafVal != OpData) {
+  // Make sure that the opcodes of the operations that we are going to
+  // reduce match.
+  // I is an extra argument for TreeN (its parent operation).
+  markExtraArg(Stack.back(), I);
   continue;
+} else if (!RdxLeafVal) {
+  RdxLeafVal = OpData;
 }
+Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
+continue;
   }
   // NextV is an extra argument for TreeN (its parent operation).
   markExtraArg(Stack.back(), NextV);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 554be30 - [SLP] reduce code duplication in processing reductions; NFC

2021-01-12 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-12T16:03:57-05:00
New Revision: 554be30a42802d66807f93e4671a518c1c04e0f8

URL: 
https://github.com/llvm/llvm-project/commit/554be30a42802d66807f93e4671a518c1c04e0f8
DIFF: 
https://github.com/llvm/llvm-project/commit/554be30a42802d66807f93e4671a518c1c04e0f8.diff

LOG: [SLP] reduce code duplication in processing reductions; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ff22572782e2..04bdc74c7879 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6867,38 +6867,29 @@ class HorizontalReduction {
   if (I && I != Phi &&
   (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) {
 // Only handle trees in the current basic block.
-if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) {
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
-  continue;
-}
-
 // Each tree node needs to have minimal number of users except for the
 // ultimate reduction.
-if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
-  continue;
-}
-
-if (IsRdxInst) {
-  // We need to be able to reassociate the reduction operations.
-  if (!OpData.isAssociative(I)) {
+if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) &&
+RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
+  if (IsRdxInst) {
+// We need to be able to reassociate the reduction operations.
+if (!OpData.isAssociative(I)) {
+  // I is an extra argument for TreeN (its parent operation).
+  markExtraArg(Stack.back(), I);
+  continue;
+}
+  } else if (RdxLeafVal && RdxLeafVal != OpData) {
+// Make sure that the opcodes of the operations that we are going 
to
+// reduce match.
 // I is an extra argument for TreeN (its parent operation).
 markExtraArg(Stack.back(), I);
 continue;
+  } else if (!RdxLeafVal) {
+RdxLeafVal = OpData;
   }
-} else if (RdxLeafVal && RdxLeafVal != OpData) {
-  // Make sure that the opcodes of the operations that we are going to
-  // reduce match.
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
+  Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
   continue;
-} else if (!RdxLeafVal) {
-  RdxLeafVal = OpData;
 }
-Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
-continue;
   }
   // NextV is an extra argument for TreeN (its parent operation).
   markExtraArg(Stack.back(), NextV);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 92fb5c4 - [SLP] rename variable to improve readability; NFC

2021-01-12 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-12T16:03:57-05:00
New Revision: 92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5

URL: 
https://github.com/llvm/llvm-project/commit/92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5
DIFF: 
https://github.com/llvm/llvm-project/commit/92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5.diff

LOG: [SLP] rename variable to improve readability; NFC

The OperationData in the 2nd block (visiting the operands)
is completely independent of the 1st block.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 04bdc74c7879..1ef762c9dfa7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6826,7 +6826,7 @@ class HorizontalReduction {
 while (!Stack.empty()) {
   Instruction *TreeN = Stack.back().first;
   unsigned EdgeToVisit = Stack.back().second++;
-  OperationData OpData = getOperationData(TreeN);
+  const OperationData OpData = getOperationData(TreeN);
   bool IsReducedValue = OpData != RdxTreeInst;
 
   // Postorder vist.
@@ -6858,14 +6858,14 @@ class HorizontalReduction {
   // Visit left or right.
   Value *NextV = TreeN->getOperand(EdgeToVisit);
   auto *I = dyn_cast(NextV);
-  OpData = getOperationData(I);
+  const OperationData EdgeOpData = getOperationData(I);
   // Continue analysis if the next operand is a reduction operation or
   // (possibly) a reduced value. If the reduced value opcode is not set,
   // the first met operation != reduction operation is considered as the
   // reduced value class.
-  const bool IsRdxInst = OpData == RdxTreeInst;
+  const bool IsRdxInst = EdgeOpData == RdxTreeInst;
   if (I && I != Phi &&
-  (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) {
+  (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) {
 // Only handle trees in the current basic block.
 // Each tree node needs to have minimal number of users except for the
 // ultimate reduction.
@@ -6873,21 +6873,21 @@ class HorizontalReduction {
 RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
   if (IsRdxInst) {
 // We need to be able to reassociate the reduction operations.
-if (!OpData.isAssociative(I)) {
+if (!EdgeOpData.isAssociative(I)) {
   // I is an extra argument for TreeN (its parent operation).
   markExtraArg(Stack.back(), I);
   continue;
 }
-  } else if (RdxLeafVal && RdxLeafVal != OpData) {
+  } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) {
 // Make sure that the opcodes of the operations that we are going 
to
 // reduce match.
 // I is an extra argument for TreeN (its parent operation).
 markExtraArg(Stack.back(), I);
 continue;
   } else if (!RdxLeafVal) {
-RdxLeafVal = OpData;
+RdxLeafVal = EdgeOpData;
   }
-  Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex()));
+  Stack.push_back(std::make_pair(I, 
EdgeOpData.getFirstOperandIndex()));
   continue;
 }
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 9e7895a - [SLP] reduce code duplication while processing reductions; NFC

2021-01-12 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-12T16:03:57-05:00
New Revision: 9e7895a8682ce3ad98c006955d573d5f2fded4f6

URL: 
https://github.com/llvm/llvm-project/commit/9e7895a8682ce3ad98c006955d573d5f2fded4f6
DIFF: 
https://github.com/llvm/llvm-project/commit/9e7895a8682ce3ad98c006955d573d5f2fded4f6.diff

LOG: [SLP] reduce code duplication while processing reductions; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1ef762c9dfa7..403170447f5a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6863,33 +6863,32 @@ class HorizontalReduction {
   // (possibly) a reduced value. If the reduced value opcode is not set,
   // the first met operation != reduction operation is considered as the
   // reduced value class.
+  // Only handle trees in the current basic block.
+  // Each tree node needs to have minimal number of users except for the
+  // ultimate reduction.
   const bool IsRdxInst = EdgeOpData == RdxTreeInst;
-  if (I && I != Phi &&
+  if (I && I != Phi && I != B &&
+  RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) &&
+  RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) &&
   (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) {
-// Only handle trees in the current basic block.
-// Each tree node needs to have minimal number of users except for the
-// ultimate reduction.
-if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) &&
-RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) {
-  if (IsRdxInst) {
-// We need to be able to reassociate the reduction operations.
-if (!EdgeOpData.isAssociative(I)) {
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
-  continue;
-}
-  } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) {
-// Make sure that the opcodes of the operations that we are going 
to
-// reduce match.
+if (IsRdxInst) {
+  // We need to be able to reassociate the reduction operations.
+  if (!EdgeOpData.isAssociative(I)) {
 // I is an extra argument for TreeN (its parent operation).
 markExtraArg(Stack.back(), I);
 continue;
-  } else if (!RdxLeafVal) {
-RdxLeafVal = EdgeOpData;
   }
-  Stack.push_back(std::make_pair(I, 
EdgeOpData.getFirstOperandIndex()));
+} else if (RdxLeafVal && RdxLeafVal != EdgeOpData) {
+  // Make sure that the opcodes of the operations that we are going to
+  // reduce match.
+  // I is an extra argument for TreeN (its parent operation).
+  markExtraArg(Stack.back(), I);
   continue;
+} else if (!RdxLeafVal) {
+  RdxLeafVal = EdgeOpData;
 }
+Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex()));
+continue;
   }
   // NextV is an extra argument for TreeN (its parent operation).
   markExtraArg(Stack.back(), NextV);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e433ca2 - [SLP] add reduction test for FMF; NFC

2021-01-13 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-13T11:43:51-05:00
New Revision: e433ca28ec923929efe4f6babb8d33b4e6673ac1

URL: 
https://github.com/llvm/llvm-project/commit/e433ca28ec923929efe4f6babb8d33b4e6673ac1
DIFF: 
https://github.com/llvm/llvm-project/commit/e433ca28ec923929efe4f6babb8d33b4e6673ac1.diff

LOG: [SLP] add reduction test for FMF; NFC

Added: 


Modified: 
llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll

Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index faa4a186e6c4..33b4f7f706fe 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -1197,6 +1197,58 @@ define float @extra_args_no_replace(float* nocapture 
readonly %x, i32 %a, i32 %b
   ret float %add4.6
 }
 
+define float @extra_args_no_fast(float* %x, float %a, float %b) {
+; CHECK-LABEL: @extra_args_no_fast(
+; CHECK-NEXT:[[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.00e+00
+; CHECK-NEXT:[[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
+; CHECK-NEXT:[[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* 
[[X:%.*]], i64 1
+; CHECK-NEXT:[[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* 
[[X]], i64 2
+; CHECK-NEXT:[[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* 
[[X]], i64 3
+; CHECK-NEXT:[[T0:%.*]] = load float, float* [[X]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
+; CHECK-NEXT:[[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
+; CHECK-NEXT:[[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
+; CHECK-NEXT:[[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
+; CHECK-NEXT:[[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
+; CHECK-NEXT:[[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
+; CHECK-NEXT:ret float [[ADD5]]
+;
+; THRESHOLD-LABEL: @extra_args_no_fast(
+; THRESHOLD-NEXT:[[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.00e+00
+; THRESHOLD-NEXT:[[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]]
+; THRESHOLD-NEXT:[[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* 
[[X:%.*]], i64 1
+; THRESHOLD-NEXT:[[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, 
float* [[X]], i64 2
+; THRESHOLD-NEXT:[[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, 
float* [[X]], i64 3
+; THRESHOLD-NEXT:[[T0:%.*]] = load float, float* [[X]], align 4
+; THRESHOLD-NEXT:[[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; THRESHOLD-NEXT:[[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4
+; THRESHOLD-NEXT:[[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4
+; THRESHOLD-NEXT:[[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]]
+; THRESHOLD-NEXT:[[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]]
+; THRESHOLD-NEXT:[[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]]
+; THRESHOLD-NEXT:[[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]]
+; THRESHOLD-NEXT:[[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]]
+; THRESHOLD-NEXT:ret float [[ADD5]]
+;
+  %addc = fadd fast float %b, 3.0
+  %add = fadd fast float %a, %addc
+  %arrayidx3 = getelementptr inbounds float, float* %x, i64 1
+  %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2
+  %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3
+  %t0 = load float, float* %x, align 4
+  %t1 = load float, float* %arrayidx3, align 4
+  %t2 = load float, float* %arrayidx3.1, align 4
+  %t3 = load float, float* %arrayidx3.2, align 4
+  %add1 = fadd fast float %t0, %add
+  %add4 = fadd fast float %t1, %add1
+  %add4.1 = fadd float %t2, %add4  ; this is not a reduction candidate
+  %add4.2 = fadd fast float %t3, %add4.1
+  %add5 = fadd fast float %add4.2, %a
+  ret float %add5
+}
+
 define i32 @wobble(i32 %arg, i32 %bar) {
 ; CHECK-LABEL: @wobble(
 ; CHECK-NEXT:  bb:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 123674a - [SLP] simplify type check for reductions

2021-01-13 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-13T13:30:46-05:00
New Revision: 123674a816742254debdfcc978026b8107b502d8

URL: 
https://github.com/llvm/llvm-project/commit/123674a816742254debdfcc978026b8107b502d8
DIFF: 
https://github.com/llvm/llvm-project/commit/123674a816742254debdfcc978026b8107b502d8.diff

LOG: [SLP] simplify type check for reductions

This is NFC-intended. The 'valid' call allows int/FP/pointers
for other parts of SLP. The difference here is that we can't
reduce pointers.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 403170447f5a..b3a3d65d3340 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6809,10 +6809,10 @@ class HorizontalReduction {
 if (!RdxTreeInst.isVectorizable(B))
   return false;
 
+// Analyze "regular" integer/FP types for reductions - no target-specific
+// types or pointers.
 Type *Ty = B->getType();
-if (!isValidElementType(Ty))
-  return false;
-if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy())
+if (!isValidElementType(Ty) || Ty->isPointerTy())
   return false;
 
 RdxLeafVal.clear();



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b21905d - [SLP] remove unnecessary state in matching reductions

2021-01-14 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-14T18:32:37-05:00
New Revision: b21905dfe3797289791443661540b72cb43dfdf3

URL: 
https://github.com/llvm/llvm-project/commit/b21905dfe3797289791443661540b72cb43dfdf3
DIFF: 
https://github.com/llvm/llvm-project/commit/b21905dfe3797289791443661540b72cb43dfdf3.diff

LOG: [SLP] remove unnecessary state in matching reductions

This is NFC-intended. I'm still trying to figure out
how the loop where this is used works. It does not
seem like we require this data at all, but it's
hard to confirm given the complicated predicates.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0f3f74b63860..3f1279b67519 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6679,9 +6679,6 @@ class HorizontalReduction {
   /// The operation data of the reduction operation.
   OperationData RdxTreeInst;
 
-  /// The operation data for the leaf values that we perform a reduction on.
-  OperationData RdxLeafVal;
-
   /// Checks if the ParentStackElem.first should be marked as a reduction
   /// operation with an extra argument or as extra argument itself.
   void markExtraArg(std::pair &ParentStackElem,
@@ -6825,9 +6822,11 @@ class HorizontalReduction {
 if (!isValidElementType(Ty) || Ty->isPointerTy())
   return false;
 
-RdxLeafVal.clear();
 ReductionRoot = B;
 
+// The operation data for the leaf values that we perform a reduction on.
+OperationData RdxLeafVal;
+
 // Post order traverse the reduction tree starting at B. We only handle 
true
 // trees containing only binary operators.
 SmallVector, 32> Stack;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 1f21de5 - [SLP] remove unused reduction functions; NFC

2021-01-15 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-15T14:59:33-05:00
New Revision: 1f21de535d37997c41b9b1ecb2f7ca0e472e9f77

URL: 
https://github.com/llvm/llvm-project/commit/1f21de535d37997c41b9b1ecb2f7ca0e472e9f77
DIFF: 
https://github.com/llvm/llvm-project/commit/1f21de535d37997c41b9b1ecb2f7ca0e472e9f77.diff

LOG: [SLP] remove unused reduction functions; NFC

These were made obsolete by simplifying the code in recent patches.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3f1279b67519..e1befc449492 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6615,16 +6615,6 @@ class HorizontalReduction {
   return Kind == OD.Kind && Opcode == OD.Opcode;
 }
 bool operator!=(const OperationData &OD) const { return !(*this == OD); }
-void clear() {
-  Opcode = 0;
-  Kind = RecurKind::None;
-}
-
-/// Get the opcode of the reduction operation.
-unsigned getOpcode() const {
-  assert(isVectorizable() && "Expected vectorizable operation.");
-  return Opcode;
-}
 
 /// Get kind of reduction data.
 RecurKind getKind() const { return Kind; }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ceb3cdc - [SLP] remove dead code in reduction matching; NFC

2021-01-15 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-15T17:03:26-05:00
New Revision: ceb3cdccd0fb597659147e0f538fdee91414541e

URL: 
https://github.com/llvm/llvm-project/commit/ceb3cdccd0fb597659147e0f538fdee91414541e
DIFF: 
https://github.com/llvm/llvm-project/commit/ceb3cdccd0fb597659147e0f538fdee91414541e.diff

LOG: [SLP] remove dead code in reduction matching; NFC

To get into this block we had: !A || B || C
and we checked C in the first 'if' clause
leaving !A || B. But the 2nd 'if' is checking:
A && !B --> !(!A || B)

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e1befc449492..cf7c05e30d06 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6877,12 +6877,6 @@ class HorizontalReduction {
 markExtraArg(Stack.back(), I);
 continue;
   }
-} else if (RdxLeafVal && RdxLeafVal != EdgeOpData) {
-  // Make sure that the opcodes of the operations that we are going to
-  // reduce match.
-  // I is an extra argument for TreeN (its parent operation).
-  markExtraArg(Stack.back(), I);
-  continue;
 } else if (!RdxLeafVal) {
   RdxLeafVal = EdgeOpData;
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 48dbac5 - [SLP] remove unnecessary use of 'OperationData'

2021-01-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-16T13:55:52-05:00
New Revision: 48dbac5b6b0bc7a03e9af42cb99176abba8d0467

URL: 
https://github.com/llvm/llvm-project/commit/48dbac5b6b0bc7a03e9af42cb99176abba8d0467
DIFF: 
https://github.com/llvm/llvm-project/commit/48dbac5b6b0bc7a03e9af42cb99176abba8d0467.diff

LOG: [SLP] remove unnecessary use of 'OperationData'

This is another NFC-intended patch to allow matching
intrinsics (example: maxnum) as candidates for reductions.

It's possible that the loop/if logic can be reduced now,
but it's still difficult to understand how this all works.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cf7c05e30d06..d5e6dfed8e2c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6814,8 +6814,11 @@ class HorizontalReduction {
 
 ReductionRoot = B;
 
-// The operation data for the leaf values that we perform a reduction on.
-OperationData RdxLeafVal;
+// The opcode for leaf values that we perform a reduction on.
+// For example: load(x) + load(y) + load(z) + fptoui(w)
+// The leaf opcode for 'w' does not match, so we don't include it as a
+// potential candidate for the reduction.
+unsigned LeafOpcode = 0;
 
 // Post order traverse the reduction tree starting at B. We only handle 
true
 // trees containing only binary operators.
@@ -6859,9 +6862,9 @@ class HorizontalReduction {
   auto *I = dyn_cast(NextV);
   const OperationData EdgeOpData = getOperationData(I);
   // Continue analysis if the next operand is a reduction operation or
-  // (possibly) a reduced value. If the reduced value opcode is not set,
+  // (possibly) a leaf value. If the leaf value opcode is not set,
   // the first met operation != reduction operation is considered as the
-  // reduced value class.
+  // leaf opcode.
   // Only handle trees in the current basic block.
   // Each tree node needs to have minimal number of users except for the
   // ultimate reduction.
@@ -6869,7 +6872,7 @@ class HorizontalReduction {
   if (I && I != Phi && I != B &&
   RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) &&
   RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) &&
-  (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) {
+  (!LeafOpcode || LeafOpcode == I->getOpcode() || IsRdxInst)) {
 if (IsRdxInst) {
   // We need to be able to reassociate the reduction operations.
   if (!EdgeOpData.isAssociative(I)) {
@@ -6877,8 +6880,8 @@ class HorizontalReduction {
 markExtraArg(Stack.back(), I);
 continue;
   }
-} else if (!RdxLeafVal) {
-  RdxLeafVal = EdgeOpData;
+} else if (!LeafOpcode) {
+  LeafOpcode = I->getOpcode();
 }
 Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex()));
 continue;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] fcfcc3c - [SLP] fix typos; NFC

2021-01-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-16T13:55:52-05:00
New Revision: fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e

URL: 
https://github.com/llvm/llvm-project/commit/fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e
DIFF: 
https://github.com/llvm/llvm-project/commit/fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e.diff

LOG: [SLP] fix typos; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d5e6dfed8e2c..a8d8ef5024d7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6784,7 +6784,7 @@ class HorizontalReduction {
   /// Try to find a reduction tree.
   bool matchAssociativeReduction(PHINode *Phi, Instruction *B) {
 assert((!Phi || is_contained(Phi->operands(), B)) &&
-   "Thi phi needs to use the binary operator");
+   "Phi needs to use the binary operator");
 
 RdxTreeInst = getOperationData(B);
 
@@ -6831,7 +6831,7 @@ class HorizontalReduction {
   const OperationData OpData = getOperationData(TreeN);
   bool IsReducedValue = OpData != RdxTreeInst;
 
-  // Postorder vist.
+  // Postorder visit.
   if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) {
 if (IsReducedValue)
   ReducedVals.push_back(TreeN);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 49b96cd - [SLP] remove opcode field from reduction data class

2021-01-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-16T13:55:52-05:00
New Revision: 49b96cd9ef2f81d193641796b8a85781292faf7a

URL: 
https://github.com/llvm/llvm-project/commit/49b96cd9ef2f81d193641796b8a85781292faf7a
DIFF: 
https://github.com/llvm/llvm-project/commit/49b96cd9ef2f81d193641796b8a85781292faf7a.diff

LOG: [SLP] remove opcode field from reduction data class

This is NFC-intended and another step towards supporting
intrinsics as reduction candidates.

The remaining bits of the OperationData class do not make
much sense as-is, so I will try to improve that, but I'm
trying to take minimal steps because it's still not clear
how this was intended to work.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a8d8ef5024d7..8dd318a880fc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6430,40 +6430,15 @@ class HorizontalReduction {
   // Use map vector to make stable output.
   MapVector ExtraArgs;
 
-  /// Contains info about operation, like its opcode, left and right operands.
+  /// This wraps functionality around a RecurKind (reduction kind).
+  /// TODO: Remove this class if callers can use the 'Kind' value directly?
   class OperationData {
-/// Opcode of the instruction.
-unsigned Opcode = 0;
-
 /// Kind of the reduction operation.
 RecurKind Kind = RecurKind::None;
+bool IsLeafValue = false;
 
 /// Checks if the reduction operation can be vectorized.
-bool isVectorizable() const {
-  switch (Kind) {
-  case RecurKind::Add:
-return Opcode == Instruction::Add;
-  case RecurKind::Mul:
-return Opcode == Instruction::Mul;
-  case RecurKind::Or:
-return Opcode == Instruction::Or;
-  case RecurKind::And:
-return Opcode == Instruction::And;
-  case RecurKind::Xor:
-return Opcode == Instruction::Xor;
-  case RecurKind::FAdd:
-return Opcode == Instruction::FAdd;
-  case RecurKind::FMul:
-return Opcode == Instruction::FMul;
-  case RecurKind::SMax:
-  case RecurKind::SMin:
-  case RecurKind::UMax:
-  case RecurKind::UMin:
-return Opcode == Instruction::ICmp;
-  default:
-return false;
-  }
-}
+bool isVectorizable() const { return Kind != RecurKind::None; }
 
 /// Creates reduction operation with the current opcode.
 Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
@@ -6505,19 +6480,17 @@ class HorizontalReduction {
   public:
 explicit OperationData() = default;
 
-/// Construction for reduced values. They are identified by opcode only and
-/// don't have associated LHS/RHS values.
-explicit OperationData(Instruction &I) {
-  Opcode = I.getOpcode();
-}
+/// Constructor for reduced values. They are identified by the bool only.
+explicit OperationData(Instruction &I) { IsLeafValue = true; }
 
 /// Constructor for reduction operations with opcode and type.
-OperationData(unsigned Opcode, RecurKind RdxKind)
-: Opcode(Opcode), Kind(RdxKind) {
+OperationData(RecurKind RdxKind) : Kind(RdxKind) {
   assert(Kind != RecurKind::None && "Expected reduction operation.");
 }
 
-explicit operator bool() const { return Opcode; }
+explicit operator bool() const {
+  return IsLeafValue || Kind != RecurKind::None;
+}
 
 /// Return true if this operation is any kind of minimum or maximum.
 bool isMinMax() const {
@@ -6580,8 +6553,7 @@ class HorizontalReduction {
 
 /// Add all reduction operations for the reduction instruction \p I.
 void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) {
-  assert(Kind != RecurKind::None && !!*this &&
- "Expected reduction operation.");
+  assert(Kind != RecurKind::None && "Expected reduction operation.");
   if (isMinMax()) {
 ReductionOps[0].emplace_back(cast(I)->getCondition());
 ReductionOps[1].emplace_back(I);
@@ -6592,13 +6564,10 @@ class HorizontalReduction {
 
 /// Checks if instruction is associative and can be vectorized.
 bool isAssociative(Instruction *I) const {
-  assert(Kind != RecurKind::None && *this &&
- "Expected reduction operation.");
-  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) {
-assert(Opcode == Instruction::ICmp &&
-   "Only integer compare operation is expected.");
+  assert(Kind != RecurKind::None && "Expected reduction operation.");
+  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
 return true;
-  }
+
   return I->isAssociative();
 }
 
@@ -6610,9 +6579,7 @@ class HorizontalReduction {
 /// Checks if two operation data are both a reduc

[llvm-branch-commits] [llvm] d1c4e85 - [SLP] reduce opcode API dependency in reduction cost calc; NFC

2021-01-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-18T09:32:57-05:00
New Revision: d1c4e859ce42c35c61a0db2f1eb8a4209be4503d

URL: 
https://github.com/llvm/llvm-project/commit/d1c4e859ce42c35c61a0db2f1eb8a4209be4503d
DIFF: 
https://github.com/llvm/llvm-project/commit/d1c4e859ce42c35c61a0db2f1eb8a4209be4503d.diff

LOG: [SLP] reduce opcode API dependency in reduction cost calc; NFC

The icmp opcode is now hard-coded in the cost model call.
This will make it easier to eventually remove all opcode
queries for min/max patterns as we transition to intrinsics.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8dd318a880fc..bf8ef208ccf9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7058,12 +7058,10 @@ class HorizontalReduction {
   int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal,
unsigned ReduxWidth) {
 Type *ScalarTy = FirstReducedVal->getType();
-auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
+FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
 
 RecurKind Kind = RdxTreeInst.getKind();
-unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
-int SplittingRdxCost;
-int ScalarReduxCost;
+int VectorCost, ScalarCost;
 switch (Kind) {
 case RecurKind::Add:
 case RecurKind::Mul:
@@ -7071,22 +7069,24 @@ class HorizontalReduction {
 case RecurKind::And:
 case RecurKind::Xor:
 case RecurKind::FAdd:
-case RecurKind::FMul:
-  SplittingRdxCost = TTI->getArithmeticReductionCost(
-  RdxOpcode, VecTy, /*IsPairwiseForm=*/false);
-  ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
+case RecurKind::FMul: {
+  unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
+  VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy,
+  
/*IsPairwiseForm=*/false);
+  ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
   break;
+}
 case RecurKind::SMax:
 case RecurKind::SMin:
 case RecurKind::UMax:
 case RecurKind::UMin: {
-  auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy));
+  auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy));
   bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin;
-  SplittingRdxCost =
-  TTI->getMinMaxReductionCost(VecTy, VecCondTy,
+  VectorCost =
+  TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
   /*IsPairwiseForm=*/false, IsUnsigned);
-  ScalarReduxCost =
-  TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) +
+  ScalarCost =
+  TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
   TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
   CmpInst::makeCmpResultType(ScalarTy));
   break;
@@ -7095,12 +7095,12 @@ class HorizontalReduction {
   llvm_unreachable("Expected arithmetic or min/max reduction operation");
 }
 
-ScalarReduxCost *= (ReduxWidth - 1);
-LLVM_DEBUG(dbgs() << "SLP: Adding cost "
-  << SplittingRdxCost - ScalarReduxCost
+// Scalar cost is repeated for N-1 elements.
+ScalarCost *= (ReduxWidth - 1);
+LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
   << " for reduction that starts with " << *FirstReducedVal
   << " (It is a splitting reduction)\n");
-return SplittingRdxCost - ScalarReduxCost;
+return VectorCost - ScalarCost;
   }
 
   /// Emit a horizontal reduction of the vectorized value.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3dbbadb - [SLP] rename reduction query for min/max ops; NFC

2021-01-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-18T09:32:57-05:00
New Revision: 3dbbadb8ef53d1e91785c17ccd70848de7e842e9

URL: 
https://github.com/llvm/llvm-project/commit/3dbbadb8ef53d1e91785c17ccd70848de7e842e9
DIFF: 
https://github.com/llvm/llvm-project/commit/3dbbadb8ef53d1e91785c17ccd70848de7e842e9.diff

LOG: [SLP] rename reduction query for min/max ops; NFC

This will avoid confusion once we start matching
min/max intrinsics. All of these hacks to accomodate
cmp+sel idioms should disappear once we canonicalize
to min/max intrinsics.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bf8ef208ccf9..0323e02d0d2c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6492,8 +6492,8 @@ class HorizontalReduction {
   return IsLeafValue || Kind != RecurKind::None;
 }
 
-/// Return true if this operation is any kind of minimum or maximum.
-bool isMinMax() const {
+/// Return true if this operation is a cmp+select idiom.
+bool isCmpSel() const {
   assert(Kind != RecurKind::None && "Expected reduction operation.");
   return RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind);
 }
@@ -6504,14 +6504,14 @@ class HorizontalReduction {
   // We allow calling this before 'Kind' is set, so handle that specially.
   if (Kind == RecurKind::None)
 return 0;
-  return isMinMax() ? 1 : 0;
+  return isCmpSel() ? 1 : 0;
 }
 
 /// Total number of operands in the reduction operation.
 unsigned getNumberOfOperands() const {
   assert(Kind != RecurKind::None && !!*this &&
  "Expected reduction operation.");
-  return isMinMax() ? 3 : 2;
+  return isCmpSel() ? 3 : 2;
 }
 
 /// Checks if the instruction is in basic block \p BB.
@@ -6519,7 +6519,7 @@ class HorizontalReduction {
 bool hasSameParent(Instruction *I, BasicBlock *BB, bool IsRedOp) const {
   assert(Kind != RecurKind::None && !!*this &&
  "Expected reduction operation.");
-  if (IsRedOp && isMinMax()) {
+  if (IsRedOp && isCmpSel()) {
 auto *Cmp = cast(cast(I)->getCondition());
 return I->getParent() == BB && Cmp && Cmp->getParent() == BB;
   }
@@ -6532,7 +6532,7 @@ class HorizontalReduction {
  "Expected reduction operation.");
   // SelectInst must be used twice while the condition op must have single
   // use only.
-  if (isMinMax())
+  if (isCmpSel())
 return I->hasNUses(2) &&
(!IsReductionOp ||
 cast(I)->getCondition()->hasOneUse());
@@ -6545,7 +6545,7 @@ class HorizontalReduction {
 void initReductionOps(ReductionOpsListType &ReductionOps) {
   assert(Kind != RecurKind::None && !!*this &&
  "Expected reduction operation.");
-  if (isMinMax())
+  if (isCmpSel())
 ReductionOps.assign(2, ReductionOpsType());
   else
 ReductionOps.assign(1, ReductionOpsType());
@@ -6554,7 +6554,7 @@ class HorizontalReduction {
 /// Add all reduction operations for the reduction instruction \p I.
 void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) {
   assert(Kind != RecurKind::None && "Expected reduction operation.");
-  if (isMinMax()) {
+  if (isCmpSel()) {
 ReductionOps[0].emplace_back(cast(I)->getCondition());
 ReductionOps[1].emplace_back(I);
   } else {
@@ -6988,10 +6988,10 @@ class HorizontalReduction {
   DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc();
   Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
 
-  // Emit a reduction. For min/max, the root is a select, but the insertion
+  // Emit a reduction. If the root is a select (min/max idiom), the insert
   // point is the compare condition of that select.
   Instruction *RdxRootInst = cast(ReductionRoot);
-  if (RdxTreeInst.isMinMax())
+  if (RdxTreeInst.isCmpSel())
 Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
   else
 Builder.SetInsertPoint(RdxRootInst);
@@ -7033,7 +7033,7 @@ class HorizontalReduction {
   // select, we also have to RAUW for the compare instruction feeding the
   // reduction root. That's because the original compare may have extra 
uses
   // besides the final select of the reduction.
-  if (RdxTreeInst.isMinMax()) {
+  if (RdxTreeInst.isCmpSel()) {
 if (auto *VecSelect = dyn_cast(VectorizedTree)) {
   Instruction *ScalarCmp =
   getCmpForMinMaxReduction(cast(ReductionRoot));



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-

[llvm-branch-commits] [llvm] ca7e270 - [SLP] add more FMF tests for fmax/fmin reductions; NFC

2021-01-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-18T12:25:28-05:00
New Revision: ca7e27054c25c2bc6cf88879d73745699251412c

URL: 
https://github.com/llvm/llvm-project/commit/ca7e27054c25c2bc6cf88879d73745699251412c
DIFF: 
https://github.com/llvm/llvm-project/commit/ca7e27054c25c2bc6cf88879d73745699251412c.diff

LOG: [SLP] add more FMF tests for fmax/fmin reductions; NFC

Added: 


Modified: 
llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll

Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
index 23f2196b2425..e2754862399e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
@@ -392,6 +392,33 @@ define float @reduction_v4f32_nnan(float* %p) {
   ret float %m3
 }
 
+define float @reduction_v4f32_not_fast(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_not_fast(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:[[M1:%.*]] = tail call float @llvm.maxnum.f32(float [[T1]], 
float [[T0]])
+; CHECK-NEXT:[[M2:%.*]] = tail call float @llvm.maxnum.f32(float [[T2]], 
float [[M1]])
+; CHECK-NEXT:[[M3:%.*]] = tail call float @llvm.maxnum.f32(float [[T3]], 
float [[M2]])
+; CHECK-NEXT:ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call float @llvm.maxnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
 define float @reduction_v8f32_fast(float* %p) {
 ; CHECK-LABEL: @reduction_v8f32_fast(
 ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
@@ -485,4 +512,31 @@ define double @reduction_v4f64_fast(double* %p) {
   ret double %m3
 }
 
+define double @reduction_v4f64_wrong_fmf(double* %p) {
+; CHECK-LABEL: @reduction_v4f64_wrong_fmf(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds double, double* 
[[P:%.*]], i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds double, double* [[P]], 
i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds double, double* [[P]], 
i64 3
+; CHECK-NEXT:[[T0:%.*]] = load double, double* [[P]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load double, double* [[G1]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load double, double* [[G2]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load double, double* [[G3]], align 4
+; CHECK-NEXT:[[M1:%.*]] = tail call ninf nsz double 
@llvm.maxnum.f64(double [[T1]], double [[T0]])
+; CHECK-NEXT:[[M2:%.*]] = tail call ninf nsz double 
@llvm.maxnum.f64(double [[T2]], double [[M1]])
+; CHECK-NEXT:[[M3:%.*]] = tail call ninf nsz double 
@llvm.maxnum.f64(double [[T3]], double [[M2]])
+; CHECK-NEXT:ret double [[M3]]
+;
+  %g1 = getelementptr inbounds double, double* %p, i64 1
+  %g2 = getelementptr inbounds double, double* %p, i64 2
+  %g3 = getelementptr inbounds double, double* %p, i64 3
+  %t0 = load double, double* %p, align 4
+  %t1 = load double, double* %g1, align 4
+  %t2 = load double, double* %g2, align 4
+  %t3 = load double, double* %g3, align 4
+  %m1 = tail call ninf nsz double @llvm.maxnum.f64(double %t1, double %t0)
+  %m2 = tail call ninf nsz double @llvm.maxnum.f64(double %t2, double %m1)
+  %m3 = tail call ninf nsz double @llvm.maxnum.f64(double %t3, double %m2)
+  ret double %m3
+}
+
 attributes #0 = { nounwind }

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
index 81bcfb2f1e9b..15a7848f8eca 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll
@@ -392,6 +392,33 @@ define float @reduction_v4f32_nnan(float* %p) {
   ret float %m3
 }
 
+define float @reduction_v4f32_wrong_fmf(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_wrong_fmf(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i6

[llvm-branch-commits] [llvm] 5b77ac3 - [SLP] match maxnum/minnum intrinsics as FP reduction ops

2021-01-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-18T17:37:16-05:00
New Revision: 5b77ac32b1150d066b35b45d6d982f4b4a1f62ff

URL: 
https://github.com/llvm/llvm-project/commit/5b77ac32b1150d066b35b45d6d982f4b4a1f62ff
DIFF: 
https://github.com/llvm/llvm-project/commit/5b77ac32b1150d066b35b45d6d982f4b4a1f62ff.diff

LOG: [SLP] match maxnum/minnum intrinsics as FP reduction ops

After much refactoring over the last 2 weeks to the reduction
matching code, I think this change is finally ready.

We effectively broke fmax/fmin vector reduction optimization
when we started canonicalizing to intrinsics in instcombine,
so this should restore that functionality for SLP.

There are still FMF problems here as noted in the code comments,
but we should be avoiding miscompiles on those for fmax/fmin by
restricting to full 'fast' ops (negative tests are included).

Fixing FMF propagation is a planned follow-up.

Differential Revision: https://reviews.llvm.org/D94913

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0323e02d0d2c..0fee52dcdd93 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6455,6 +6455,10 @@ class HorizontalReduction {
   case RecurKind::FMul:
 return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
+  case RecurKind::FMax:
+return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
+  case RecurKind::FMin:
+return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
 
   case RecurKind::SMax: {
 Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
@@ -6568,6 +6572,15 @@ class HorizontalReduction {
   if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
 return true;
 
+  if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
+// FP min/max are associative except for NaN and -0.0. We do not
+// have to rule out -0.0 here because the intrinsic semantics do not
+// specify a fixed result for it.
+// TODO: This is artificially restricted to fast because the code that
+//   creates reductions assumes/produces fast ops.
+return I->getFastMathFlags().isFast();
+  }
+
   return I->isAssociative();
 }
 
@@ -6677,6 +6690,11 @@ class HorizontalReduction {
 if (match(I, m_FMul(m_Value(), m_Value(
   return OperationData(RecurKind::FMul);
 
+if (match(I, m_Intrinsic(m_Value(), m_Value(
+  return OperationData(RecurKind::FMax);
+if (match(I, m_Intrinsic(m_Value(), m_Value(
+  return OperationData(RecurKind::FMin);
+
 if (match(I, m_SMax(m_Value(), m_Value(
   return OperationData(RecurKind::SMax);
 if (match(I, m_SMin(m_Value(), m_Value(
@@ -7076,6 +7094,18 @@ class HorizontalReduction {
   ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
   break;
 }
+case RecurKind::FMax:
+case RecurKind::FMin: {
+  auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy));
+  VectorCost =
+  TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
+  /*pairwise=*/false, /*unsigned=*/false);
+  ScalarCost =
+  TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
+  TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+  CmpInst::makeCmpResultType(ScalarTy));
+  break;
+}
 case RecurKind::SMax:
 case RecurKind::SMin:
 case RecurKind::UMax:
@@ -7307,6 +7337,16 @@ static Value *getReductionValue(const DominatorTree *DT, 
PHINode *P,
   return nullptr;
 }
 
+static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
+  if (match(I, m_BinOp(m_Value(V0), m_Value(V1
+return true;
+  if (match(I, m_Intrinsic(m_Value(V0), m_Value(V1
+return true;
+  if (match(I, m_Intrinsic(m_Value(V0), m_Value(V1
+return true;
+  return false;
+}
+
 /// Attempt to reduce a horizontal reduction.
 /// If it is legal to match a horizontal reduction feeding the phi node \a P
 /// with reduction operators \a Root (or one of its operands) in a basic block
@@ -7347,7 +7387,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
 unsigned Level;
 std::tie(Inst, Level) = Stack.pop_back_val();
 Value *B0, *B1;
-bool IsBinop = match(Inst, m_BinOp(m_Value(B0), m_Value(B1)));
+bool IsBinop = matchRdxBop(Inst, B0, B1);
 bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
 if (IsBinop || IsSelect) {
   

[llvm-branch-commits] [llvm] d27bb5c - [x86] add cast to avoid compile-time warning; NFC

2021-01-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-18T17:47:04-05:00
New Revision: d27bb5c375ca8e96e15168587a3bcd91b244fcad

URL: 
https://github.com/llvm/llvm-project/commit/d27bb5c375ca8e96e15168587a3bcd91b244fcad
DIFF: 
https://github.com/llvm/llvm-project/commit/d27bb5c375ca8e96e15168587a3bcd91b244fcad.diff

LOG: [x86] add cast to avoid compile-time warning; NFC

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 60a2fd233d5c..97fcef0b92fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10964,7 +10964,7 @@ static bool IsElementEquivalent(int MaskSize, SDValue 
Op, SDValue ExpectedOp,
   case X86ISD::VBROADCAST_LOAD:
 // TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()?
 return (Op == ExpectedOp &&
-Op.getValueType().getVectorNumElements() == MaskSize);
+(int)Op.getValueType().getVectorNumElements() == MaskSize);
   case X86ISD::HADD:
   case X86ISD::HSUB:
   case X86ISD::FHADD:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8590d24 - [SLP] move reduction createOp functions; NFC

2021-01-20 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-20T11:14:48-05:00
New Revision: 8590d245434dd4205c89f0a05b4c22feccb7421c

URL: 
https://github.com/llvm/llvm-project/commit/8590d245434dd4205c89f0a05b4c22feccb7421c
DIFF: 
https://github.com/llvm/llvm-project/commit/8590d245434dd4205c89f0a05b4c22feccb7421c.diff

LOG: [SLP] move reduction createOp functions; NFC

We were able to remove almost all of the state from
OperationData, so these don't make sense as members
of that class - just pass the RecurKind in as a param.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 24885e4d8257..3d657b0b898c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6397,7 +6397,7 @@ namespace {
 class HorizontalReduction {
   using ReductionOpsType = SmallVector;
   using ReductionOpsListType = SmallVector;
-  ReductionOpsListType  ReductionOps;
+  ReductionOpsListType ReductionOps;
   SmallVector ReducedVals;
   // Use map vector to make stable output.
   MapVector ExtraArgs;
@@ -6412,47 +6412,6 @@ class HorizontalReduction {
 /// Checks if the reduction operation can be vectorized.
 bool isVectorizable() const { return Kind != RecurKind::None; }
 
-/// Creates reduction operation with the current opcode.
-Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
-const Twine &Name) const {
-  assert(isVectorizable() && "Unhandled reduction operation.");
-  unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
-  switch (Kind) {
-  case RecurKind::Add:
-  case RecurKind::Mul:
-  case RecurKind::Or:
-  case RecurKind::And:
-  case RecurKind::Xor:
-  case RecurKind::FAdd:
-  case RecurKind::FMul:
-return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
-   Name);
-  case RecurKind::FMax:
-return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
-  case RecurKind::FMin:
-return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
-
-  case RecurKind::SMax: {
-Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
-return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-  }
-  case RecurKind::SMin: {
-Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
-return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-  }
-  case RecurKind::UMax: {
-Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
-return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-  }
-  case RecurKind::UMin: {
-Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
-return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-  }
-  default:
-llvm_unreachable("Unknown reduction operation.");
-  }
-}
-
   public:
 explicit OperationData() = default;
 
@@ -6580,40 +6539,6 @@ class HorizontalReduction {
 return nullptr;
   return I->getOperand(getFirstOperandIndex() + 1);
 }
-
-/// Creates reduction operation with the current opcode with the IR flags
-/// from \p ReductionOps.
-Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
-const Twine &Name,
-const ReductionOpsListType &ReductionOps) const {
-  assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
-  Value *Op = createOp(Builder, LHS, RHS, Name);
-  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) {
-if (auto *Sel = dyn_cast(Op))
-  propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
-propagateIRFlags(Op, ReductionOps[1]);
-return Op;
-  }
-  propagateIRFlags(Op, ReductionOps[0]);
-  return Op;
-}
-/// Creates reduction operation with the current opcode with the IR flags
-/// from \p I.
-Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
-const Twine &Name, Instruction *I) const {
-  assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
-  Value *Op = createOp(Builder, LHS, RHS, Name);
-  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) {
-if (auto *Sel = dyn_cast(Op)) {
-  propagateIRFlags(Sel->getCondition(),
-   cast(I)->getCondition());
-}
-  }
-  propagateIRFlags(Op, I);
-  return Op;
-}
   };
 
   WeakTrackingVH ReductionRoot;
@@ -6642,6 +6567,76 @@ class HorizontalReduction {
 }
   }
 
+  /// Creates reduction operation with the current opcode.
+  static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
+ Value *RHS, const Twine &Name) {
+u

[llvm-branch-commits] [llvm] 1c54112 - [SLP] refactor more reduction functions; NFC

2021-01-20 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-20T11:14:48-05:00
New Revision: 1c54112a5762ebab2c14a90c55f27d00bfced7f8

URL: 
https://github.com/llvm/llvm-project/commit/1c54112a5762ebab2c14a90c55f27d00bfced7f8
DIFF: 
https://github.com/llvm/llvm-project/commit/1c54112a5762ebab2c14a90c55f27d00bfced7f8.diff

LOG: [SLP] refactor more reduction functions; NFC

We were able to remove almost all of the state from
OperationData, so these don't make sense as members
of that class - just pass the RecurKind in as a param.

More streamlining is possible, but I'm trying to avoid
logic/typo bugs while fixing this. Eventually, we should
not need the `OperationData` class.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3d657b0b898c..3192d7959f70 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6427,76 +6427,6 @@ class HorizontalReduction {
   return IsLeafValue || Kind != RecurKind::None;
 }
 
-/// Return true if this operation is a cmp+select idiom.
-bool isCmpSel() const {
-  assert(Kind != RecurKind::None && "Expected reduction operation.");
-  return RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind);
-}
-
-/// Get the index of the first operand.
-unsigned getFirstOperandIndex() const {
-  assert(!!*this && "The opcode is not set.");
-  // We allow calling this before 'Kind' is set, so handle that specially.
-  if (Kind == RecurKind::None)
-return 0;
-  return isCmpSel() ? 1 : 0;
-}
-
-/// Total number of operands in the reduction operation.
-unsigned getNumberOfOperands() const {
-  assert(Kind != RecurKind::None && !!*this &&
- "Expected reduction operation.");
-  return isCmpSel() ? 3 : 2;
-}
-
-/// Checks if the instruction is in basic block \p BB.
-/// For a min/max reduction check that both compare and select are in \p 
BB.
-bool hasSameParent(Instruction *I, BasicBlock *BB, bool IsRedOp) const {
-  assert(Kind != RecurKind::None && !!*this &&
- "Expected reduction operation.");
-  if (IsRedOp && isCmpSel()) {
-auto *Cmp = cast(cast(I)->getCondition());
-return I->getParent() == BB && Cmp && Cmp->getParent() == BB;
-  }
-  return I->getParent() == BB;
-}
-
-/// Expected number of uses for reduction operations/reduced values.
-bool hasRequiredNumberOfUses(Instruction *I, bool IsReductionOp) const {
-  assert(Kind != RecurKind::None && !!*this &&
- "Expected reduction operation.");
-  // SelectInst must be used twice while the condition op must have single
-  // use only.
-  if (isCmpSel())
-return I->hasNUses(2) &&
-   (!IsReductionOp ||
-cast(I)->getCondition()->hasOneUse());
-
-  // Arithmetic reduction operation must be used once only.
-  return I->hasOneUse();
-}
-
-/// Initializes the list of reduction operations.
-void initReductionOps(ReductionOpsListType &ReductionOps) {
-  assert(Kind != RecurKind::None && !!*this &&
- "Expected reduction operation.");
-  if (isCmpSel())
-ReductionOps.assign(2, ReductionOpsType());
-  else
-ReductionOps.assign(1, ReductionOpsType());
-}
-
-/// Add all reduction operations for the reduction instruction \p I.
-void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) {
-  assert(Kind != RecurKind::None && "Expected reduction operation.");
-  if (isCmpSel()) {
-ReductionOps[0].emplace_back(cast(I)->getCondition());
-ReductionOps[1].emplace_back(I);
-  } else {
-ReductionOps[0].emplace_back(I);
-  }
-}
-
 /// Checks if instruction is associative and can be vectorized.
 bool isAssociative(Instruction *I) const {
   assert(Kind != RecurKind::None && "Expected reduction operation.");
@@ -6529,16 +6459,6 @@ class HorizontalReduction {
 
 /// Get kind of reduction data.
 RecurKind getKind() const { return Kind; }
-Value *getLHS(Instruction *I) const {
-  if (Kind == RecurKind::None)
-return nullptr;
-  return I->getOperand(getFirstOperandIndex());
-}
-Value *getRHS(Instruction *I) const {
-  if (Kind == RecurKind::None)
-return nullptr;
-  return I->getOperand(getFirstOperandIndex() + 1);
-}
   };
 
   WeakTrackingVH ReductionRoot;
@@ -6559,7 +6479,7 @@ class HorizontalReduction {
   // Do not perform analysis of remaining operands of ParentStackElem.first
   // instruction, this whole instruction is an extra argument.
   OperationData OpData = getOperationData(ParentStackElem.first);
-  ParentStackElem.second = OpData.getNumberOfOperand

[llvm-branch-commits] [llvm] c09be0d - [SLP] reduce reduction code for checking vectorizable ops; NFC

2021-01-20 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-20T11:14:48-05:00
New Revision: c09be0d2a0f930a128c946329b42eef45d53062a

URL: 
https://github.com/llvm/llvm-project/commit/c09be0d2a0f930a128c946329b42eef45d53062a
DIFF: 
https://github.com/llvm/llvm-project/commit/c09be0d2a0f930a128c946329b42eef45d53062a.diff

LOG: [SLP] reduce reduction code for checking vectorizable ops; NFC

This is another step towards removing `OperationData` and
fixing FMF matching/propagation bugs when forming reductions.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3192d7959f70..2597f88ab88d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6409,9 +6409,6 @@ class HorizontalReduction {
 RecurKind Kind = RecurKind::None;
 bool IsLeafValue = false;
 
-/// Checks if the reduction operation can be vectorized.
-bool isVectorizable() const { return Kind != RecurKind::None; }
-
   public:
 explicit OperationData() = default;
 
@@ -6427,29 +6424,6 @@ class HorizontalReduction {
   return IsLeafValue || Kind != RecurKind::None;
 }
 
-/// Checks if instruction is associative and can be vectorized.
-bool isAssociative(Instruction *I) const {
-  assert(Kind != RecurKind::None && "Expected reduction operation.");
-  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
-return true;
-
-  if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
-// FP min/max are associative except for NaN and -0.0. We do not
-// have to rule out -0.0 here because the intrinsic semantics do not
-// specify a fixed result for it.
-// TODO: This is artificially restricted to fast because the code that
-//   creates reductions assumes/produces fast ops.
-return I->getFastMathFlags().isFast();
-  }
-
-  return I->isAssociative();
-}
-
-/// Checks if the reduction operation can be vectorized.
-bool isVectorizable(Instruction *I) const {
-  return isVectorizable() && isAssociative(I);
-}
-
 /// Checks if two operation data are both a reduction op or both a reduced
 /// value.
 bool operator==(const OperationData &OD) const {
@@ -6466,6 +6440,25 @@ class HorizontalReduction {
   /// The operation data of the reduction operation.
   OperationData RdxTreeInst;
 
+  /// Checks if instruction is associative and can be vectorized.
+  static bool isVectorizable(RecurKind Kind, Instruction *I) {
+if (Kind == RecurKind::None)
+  return false;
+if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind))
+  return true;
+
+if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
+  // FP min/max are associative except for NaN and -0.0. We do not
+  // have to rule out -0.0 here because the intrinsic semantics do not
+  // specify a fixed result for it.
+  // TODO: This is artificially restricted to fast because the code that
+  //   creates reductions assumes/produces fast ops.
+  return I->getFastMathFlags().isFast();
+}
+
+return I->isAssociative();
+  }
+
   /// Checks if the ParentStackElem.first should be marked as a reduction
   /// operation with an extra argument or as extra argument itself.
   void markExtraArg(std::pair &ParentStackElem,
@@ -6694,8 +6687,7 @@ class HorizontalReduction {
   }
 
   /// Initializes the list of reduction operations.
-  static void initReductionOps(RecurKind Kind,
-   ReductionOpsListType &ReductionOps) {
+  void initReductionOps(RecurKind Kind) {
 if (isCmpSel(Kind))
   ReductionOps.assign(2, ReductionOpsType());
 else
@@ -6703,8 +6695,7 @@ class HorizontalReduction {
   }
 
   /// Add all reduction operations for the reduction instruction \p I.
-  static void addReductionOps(RecurKind Kind, Instruction *I,
-  ReductionOpsListType &ReductionOps) {
+  void addReductionOps(RecurKind Kind, Instruction *I) {
 assert(Kind != RecurKind::None && "Expected reduction operation.");
 if (isCmpSel(Kind)) {
   ReductionOps[0].emplace_back(cast(I)->getCondition());
@@ -6750,7 +6741,7 @@ class HorizontalReduction {
   }
 }
 
-if (!RdxTreeInst.isVectorizable(B))
+if (!isVectorizable(RdxTreeInst.getKind(), B))
   return false;
 
 // Analyze "regular" integer/FP types for reductions - no target-specific
@@ -6772,7 +6763,7 @@ class HorizontalReduction {
 SmallVector, 32> Stack;
 Stack.push_back(
 std::make_pair(B, getFirstOperandIndex(RdxTreeInst.getKind(;
-initReductionOps(RdxTreeInst.getKind(), ReductionOps);
+initReductionOps(RdxTreeInst.getKind());
 while (!Stack.empty()) {
   Instruction *TreeN = Stack.back()

[llvm-branch-commits] [llvm] 070af1b - [InstCombine] avoid crashing on attribute propagation

2021-01-21 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-21T08:13:26-05:00
New Revision: 070af1b7887f80383d8473bb4da565edbde6c6b0

URL: 
https://github.com/llvm/llvm-project/commit/070af1b7887f80383d8473bb4da565edbde6c6b0
DIFF: 
https://github.com/llvm/llvm-project/commit/070af1b7887f80383d8473bb4da565edbde6c6b0.diff

LOG: [InstCombine] avoid crashing on attribute propagation

In https://llvm.org/PR48810 , we are crashing while trying to
propagate attributes from mempcpy (returns void*) to memcpy
(returns nothing - void).

We can avoid the crash by removing known incompatible
attributes for the void return type.

I'm not sure if this goes far enough (should we just drop all
attributes since this isn't the same function?). We also need
to audit other transforms in LibCallSimplifier to make sure
there are no other cases that have the same problem.

Differential Revision: https://reviews.llvm.org/D95088

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
llvm/test/Transforms/InstCombine/mempcpy.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp 
b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 99b28b0a832c..b68e45363811 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1150,7 +1150,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, 
IRBuilderBase &B) {
   // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n
   CallInst *NewCI =
   B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N);
+  // Propagate attributes, but memcpy has no return value, so make sure that
+  // any return attributes are compliant.
+  // TODO: Attach return value attributes to the 1st operand to preserve them?
   NewCI->setAttributes(CI->getAttributes());
+  NewCI->removeAttributes(AttributeList::ReturnIndex,
+  AttributeFuncs::typeIncompatible(NewCI->getType()));
   return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/mempcpy.ll 
b/llvm/test/Transforms/InstCombine/mempcpy.ll
index 79158a3a0a6d..61e7ec4a3339 100644
--- a/llvm/test/Transforms/InstCombine/mempcpy.ll
+++ b/llvm/test/Transforms/InstCombine/mempcpy.ll
@@ -53,4 +53,15 @@ define i8* @memcpy_big_const_n(i8* %d, i8* nocapture 
readonly %s) {
   ret i8* %r
 }
 
+; The original call may have attributes that can not propagate to memcpy.
+
+define i32 @PR48810() {
+; CHECK-LABEL: @PR48810(
+; CHECK-NEXT:call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* 
align 536870912 null, i64 undef, i1 false)
+; CHECK-NEXT:ret i32 undef
+;
+  %r = call dereferenceable(1) i8* @mempcpy(i8* undef, i8* null, i64 undef)
+  ret i32 undef
+}
+
 declare i8* @mempcpy(i8*, i8* nocapture readonly, i64)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] d777533 - [SLP] simplify reduction matching

2021-01-21 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-21T14:58:57-05:00
New Revision: d77753381fe024434ae8ffaaacfe4b9ed9d4d760

URL: 
https://github.com/llvm/llvm-project/commit/d77753381fe024434ae8ffaaacfe4b9ed9d4d760
DIFF: 
https://github.com/llvm/llvm-project/commit/d77753381fe024434ae8ffaaacfe4b9ed9d4d760.diff

LOG: [SLP] simplify reduction matching

This is NFC-intended and removes the "OperationData"
class which had become nothing more than a recurrence
(reduction) type.

I adjusted the matching logic to distinguish
instructions from non-instructions - that's all that
the "IsLeafValue" member was keeping track of.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2597f88ab88d..73260016f443 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6401,44 +6401,9 @@ class HorizontalReduction {
   SmallVector ReducedVals;
   // Use map vector to make stable output.
   MapVector ExtraArgs;
-
-  /// This wraps functionality around a RecurKind (reduction kind).
-  /// TODO: Remove this class if callers can use the 'Kind' value directly?
-  class OperationData {
-/// Kind of the reduction operation.
-RecurKind Kind = RecurKind::None;
-bool IsLeafValue = false;
-
-  public:
-explicit OperationData() = default;
-
-/// Constructor for reduced values. They are identified by the bool only.
-explicit OperationData(Instruction &I) { IsLeafValue = true; }
-
-/// Constructor for reduction operations with opcode and type.
-OperationData(RecurKind RdxKind) : Kind(RdxKind) {
-  assert(Kind != RecurKind::None && "Expected reduction operation.");
-}
-
-explicit operator bool() const {
-  return IsLeafValue || Kind != RecurKind::None;
-}
-
-/// Checks if two operation data are both a reduction op or both a reduced
-/// value.
-bool operator==(const OperationData &OD) const {
-  return Kind == OD.Kind && IsLeafValue == OD.IsLeafValue;
-}
-bool operator!=(const OperationData &OD) const { return !(*this == OD); }
-
-/// Get kind of reduction data.
-RecurKind getKind() const { return Kind; }
-  };
-
   WeakTrackingVH ReductionRoot;
-
-  /// The operation data of the reduction operation.
-  OperationData RdxTreeInst;
+  /// The type of reduction operation.
+  RecurKind RdxKind;
 
   /// Checks if instruction is associative and can be vectorized.
   static bool isVectorizable(RecurKind Kind, Instruction *I) {
@@ -6471,8 +6436,8 @@ class HorizontalReduction {
   // in this case.
   // Do not perform analysis of remaining operands of ParentStackElem.first
   // instruction, this whole instruction is an extra argument.
-  OperationData OpData = getOperationData(ParentStackElem.first);
-  ParentStackElem.second = getNumberOfOperands(OpData.getKind());
+  RecurKind RdxKind = getRdxKind(ParentStackElem.first);
+  ParentStackElem.second = getNumberOfOperands(RdxKind);
 } else {
   // We ran into something like:
   // ParentStackElem.first += ... + ExtraArg + ...
@@ -6550,39 +6515,37 @@ class HorizontalReduction {
 return Op;
   }
 
-  static OperationData getOperationData(Instruction *I) {
-if (!I)
-  return OperationData();
-
+  static RecurKind getRdxKind(Instruction *I) {
+assert(I && "Expected instruction for reduction matching");
 TargetTransformInfo::ReductionFlags RdxFlags;
 if (match(I, m_Add(m_Value(), m_Value(
-  return OperationData(RecurKind::Add);
+  return RecurKind::Add;
 if (match(I, m_Mul(m_Value(), m_Value(
-  return OperationData(RecurKind::Mul);
+  return RecurKind::Mul;
 if (match(I, m_And(m_Value(), m_Value(
-  return OperationData(RecurKind::And);
+  return RecurKind::And;
 if (match(I, m_Or(m_Value(), m_Value(
-  return OperationData(RecurKind::Or);
+  return RecurKind::Or;
 if (match(I, m_Xor(m_Value(), m_Value(
-  return OperationData(RecurKind::Xor);
+  return RecurKind::Xor;
 if (match(I, m_FAdd(m_Value(), m_Value(
-  return OperationData(RecurKind::FAdd);
+  return RecurKind::FAdd;
 if (match(I, m_FMul(m_Value(), m_Value(
-  return OperationData(RecurKind::FMul);
+  return RecurKind::FMul;
 
 if (match(I, m_Intrinsic(m_Value(), m_Value(
-  return OperationData(RecurKind::FMax);
+  return RecurKind::FMax;
 if (match(I, m_Intrinsic(m_Value(), m_Value(
-  return OperationData(RecurKind::FMin);
+  return RecurKind::FMin;
 
 if (match(I, m_SMax(m_Value(), m_Value(
-  return OperationData(RecurKind::SMax);
+  return RecurKind::SMax;
 if (match(I, m_SMin(m_Value(), m_Value(
-  return OperationData(RecurKind::SMin);
+  

[llvm-branch-commits] [llvm] 2f03528 - [SLP] rename reduction variable to avoid shadowing; NFC

2021-01-21 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-21T16:02:38-05:00
New Revision: 2f03528f5e7fd9df0a12091392e000c697497262

URL: 
https://github.com/llvm/llvm-project/commit/2f03528f5e7fd9df0a12091392e000c697497262
DIFF: 
https://github.com/llvm/llvm-project/commit/2f03528f5e7fd9df0a12091392e000c697497262.diff

LOG: [SLP] rename reduction variable to avoid shadowing; NFC

The code structure can likely be improved now that
'OperationData' is gone.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 73260016f443..cee388e62bf2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6436,8 +6436,8 @@ class HorizontalReduction {
   // in this case.
   // Do not perform analysis of remaining operands of ParentStackElem.first
   // instruction, this whole instruction is an extra argument.
-  RecurKind RdxKind = getRdxKind(ParentStackElem.first);
-  ParentStackElem.second = getNumberOfOperands(RdxKind);
+  RecurKind ParentRdxKind = getRdxKind(ParentStackElem.first);
+  ParentStackElem.second = getNumberOfOperands(ParentRdxKind);
 } else {
   // We ran into something like:
   // ParentStackElem.first += ... + ExtraArg + ...



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a4914dc - [SLP] do not traverse constant uses

2021-01-22 Thread Sanjay Patel via llvm-branch-commits

Author: Anton Rapetov
Date: 2021-01-22T08:14:09-05:00
New Revision: a4914dc1f2b4a49cf488d3be7a01fe7238c889d8

URL: 
https://github.com/llvm/llvm-project/commit/a4914dc1f2b4a49cf488d3be7a01fe7238c889d8
DIFF: 
https://github.com/llvm/llvm-project/commit/a4914dc1f2b4a49cf488d3be7a01fe7238c889d8.diff

LOG: [SLP] do not traverse constant uses

Walking the use list of a Constant (particularly, ConstantData)
is not scalable, since a given constant may be used by many
instructinos in many functions in many modules.

Differential Revision: https://reviews.llvm.org/D94713

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cee388e62bf2..78ce4870588c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -987,6 +987,14 @@ class BoUpSLP {
   std::array, 2> Values = {{LHS, RHS}};
   for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
 Value *V = Values[Idx].first;
+if (isa(V)) {
+  // Since this is a function pass, it doesn't make semantic sense to
+  // walk the users of a subclass of Constant. The users could be in
+  // another function, or even another module that happens to be in
+  // the same LLVMContext.
+  continue;
+}
+
 // Calculate the absolute lane, using the minimum relative lane of LHS
 // and RHS as base and Idx as the offset.
 int Ln = std::min(LHS.second, RHS.second) + Idx;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e679eea - [InstCombine] add tests for abs(sext X); NFC

2021-01-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-22T13:36:04-05:00
New Revision: e679eea6d20d6e6e749525827c95f42bfef16285

URL: 
https://github.com/llvm/llvm-project/commit/e679eea6d20d6e6e749525827c95f42bfef16285
DIFF: 
https://github.com/llvm/llvm-project/commit/e679eea6d20d6e6e749525827c95f42bfef16285.diff

LOG: [InstCombine] add tests for abs(sext X); NFC

https://llvm.org/PR48816

Added: 


Modified: 
llvm/test/Transforms/InstCombine/abs-intrinsic.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll 
b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
index 30e5a9ddab3c..baeb44d1d8dc 100644
--- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
@@ -292,3 +292,66 @@ define i1 @abs_ne_int_min_nopoison(i8 %x) {
   %cmp = icmp ne i8 %abs, -128
   ret i1 %cmp
 }
+
+define i32 @abs_sext(i8 %x) {
+; CHECK-LABEL: @abs_sext(
+; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:[[A:%.*]] = call i32 @llvm.abs.i32(i32 [[S]], i1 false)
+; CHECK-NEXT:ret i32 [[A]]
+;
+  %s = sext i8 %x to i32
+  %a = call i32 @llvm.abs.i32(i32 %s, i1 0)
+  ret i32 %a
+}
+
+define <3 x i82> @abs_nsw_sext(<3 x i7> %x) {
+; CHECK-LABEL: @abs_nsw_sext(
+; CHECK-NEXT:[[S:%.*]] = sext <3 x i7> [[X:%.*]] to <3 x i82>
+; CHECK-NEXT:[[A:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[S]], 
i1 true)
+; CHECK-NEXT:ret <3 x i82> [[A]]
+;
+  %s = sext <3 x i7> %x to <3 x i82>
+  %a = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %s, i1 1)
+  ret <3 x i82> %a
+}
+
+define i32 @abs_sext_extra_use(i8 %x, i32* %p) {
+; CHECK-LABEL: @abs_sext_extra_use(
+; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:store i32 [[S]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:[[A:%.*]] = call i32 @llvm.abs.i32(i32 [[S]], i1 false)
+; CHECK-NEXT:ret i32 [[A]]
+;
+  %s = sext i8 %x to i32
+  store i32 %s, i32* %p
+  %a = call i32 @llvm.abs.i32(i32 %s, i1 0)
+  ret i32 %a
+}
+
+; PR48816
+
+define i8 @trunc_abs_sext(i8 %x) {
+; CHECK-LABEL: @trunc_abs_sext(
+; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:[[A:%.*]] = tail call i32 @llvm.abs.i32(i32 [[S]], i1 true)
+; CHECK-NEXT:[[T:%.*]] = trunc i32 [[A]] to i8
+; CHECK-NEXT:ret i8 [[T]]
+;
+  %s = sext i8 %x to i32
+  %a = tail call i32 @llvm.abs.i32(i32 %s, i1 true)
+  %t = trunc i32 %a to i8
+  ret i8 %t
+}
+
+define <4 x i8> @trunc_abs_sext_vec(<4 x i8> %x) {
+; CHECK-LABEL: @trunc_abs_sext_vec(
+; CHECK-NEXT:[[S:%.*]] = sext <4 x i8> [[X:%.*]] to <4 x i32>
+; CHECK-NEXT:[[A:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> 
[[S]], i1 true)
+; CHECK-NEXT:[[T:%.*]] = trunc <4 x i32> [[A]] to <4 x i8>
+; CHECK-NEXT:ret <4 x i8> [[T]]
+;
+  %s = sext <4 x i8> %x to <4 x i32>
+  %a = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %s, i1 true)
+  %t = trunc <4 x i32> %a to <4 x i8>
+  ret <4 x i8> %t
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 411c144 - [InstCombine] narrow abs with sign-extended input

2021-01-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-22T13:36:04-05:00
New Revision: 411c144e4c99f4d4370ed2b9c248dc6bb9a39648

URL: 
https://github.com/llvm/llvm-project/commit/411c144e4c99f4d4370ed2b9c248dc6bb9a39648
DIFF: 
https://github.com/llvm/llvm-project/commit/411c144e4c99f4d4370ed2b9c248dc6bb9a39648.diff

LOG: [InstCombine] narrow abs with sign-extended input

In the motivating cases from https://llvm.org/PR48816 ,
we have a trailing trunc. But that is not required to
reduce the abs width:
https://alive2.llvm.org/ce/z/ECaz-p
...as long as we clear the int-min-is-poison bit (nsw).

We have some existing tests that are affected, and I'm
not sure what the overall implications are, but in general
we favor narrowing operations over preserving nsw/nuw.

If that causes problems, we could restrict this transform
based on type (shouldChangeType() and/or vector vs. scalar).

Differential Revision: https://reviews.llvm.org/D95235

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/abs-1.ll
llvm/test/Transforms/InstCombine/abs-intrinsic.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 7d63b30d35f8..5ba51d255109 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -820,6 +820,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) 
{
   return BinaryOperator::CreateNeg(IIOperand);
 }
 
+// abs (sext X) --> zext (abs X*)
+// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
+if (match(IIOperand, m_OneUse(m_SExt(m_Value(X) {
+  Value *NarrowAbs =
+  Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
+  return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
+}
+
 break;
   }
   case Intrinsic::bswap: {

diff  --git a/llvm/test/Transforms/InstCombine/abs-1.ll 
b/llvm/test/Transforms/InstCombine/abs-1.ll
index 7e5eadf5b25e..7452798ead77 100644
--- a/llvm/test/Transforms/InstCombine/abs-1.ll
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -102,9 +102,9 @@ define i8 @abs_canonical_4(i8 %x) {
 
 define i32 @abs_canonical_5(i8 %x) {
 ; CHECK-LABEL: @abs_canonical_5(
-; CHECK-NEXT:[[CONV:%.*]] = sext i8 [[X:%.*]] to i32
-; CHECK-NEXT:[[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[CONV]], i1 true)
-; CHECK-NEXT:ret i32 [[TMP1]]
+; CHECK-NEXT:[[TMP1:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false)
+; CHECK-NEXT:[[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:ret i32 [[TMP2]]
 ;
   %cmp = icmp sgt i8 %x, 0
   %conv = sext i8 %x to i32
@@ -250,9 +250,9 @@ define i8 @nabs_canonical_4(i8 %x) {
 
 define i32 @nabs_canonical_5(i8 %x) {
 ; CHECK-LABEL: @nabs_canonical_5(
-; CHECK-NEXT:[[CONV:%.*]] = sext i8 [[X:%.*]] to i32
-; CHECK-NEXT:[[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[CONV]], i1 false)
-; CHECK-NEXT:[[ABS:%.*]] = sub nsw i32 0, [[TMP1]]
+; CHECK-NEXT:[[TMP1:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false)
+; CHECK-NEXT:[[TMP2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:[[ABS:%.*]] = sub nsw i32 0, [[TMP2]]
 ; CHECK-NEXT:ret i32 [[ABS]]
 ;
   %cmp = icmp sgt i8 %x, 0

diff  --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll 
b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
index baeb44d1d8dc..1f5f1c2ba562 100644
--- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
@@ -64,9 +64,9 @@ define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> 
%x) {
 ; sign bits, the abs reduces this to 2 sign bits.
 define i32 @abs_signbits(i30 %x) {
 ; CHECK-LABEL: @abs_signbits(
-; CHECK-NEXT:[[EXT:%.*]] = sext i30 [[X:%.*]] to i32
-; CHECK-NEXT:[[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[EXT]], i1 false)
-; CHECK-NEXT:[[ADD:%.*]] = add nuw nsw i32 [[ABS]], 1
+; CHECK-NEXT:[[TMP1:%.*]] = call i30 @llvm.abs.i30(i30 [[X:%.*]], i1 false)
+; CHECK-NEXT:[[NARROW:%.*]] = add nuw i30 [[TMP1]], 1
+; CHECK-NEXT:[[ADD:%.*]] = zext i30 [[NARROW]] to i32
 ; CHECK-NEXT:ret i32 [[ADD]]
 ;
   %ext = sext i30 %x to i32
@@ -77,9 +77,9 @@ define i32 @abs_signbits(i30 %x) {
 
 define <4 x i32> @abs_signbits_vec(<4 x i30> %x) {
 ; CHECK-LABEL: @abs_signbits_vec(
-; CHECK-NEXT:[[EXT:%.*]] = sext <4 x i30> [[X:%.*]] to <4 x i32>
-; CHECK-NEXT:[[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> 
[[EXT]], i1 false)
-; CHECK-NEXT:[[ADD:%.*]] = add nuw nsw <4 x i32> [[ABS]], 
+; CHECK-NEXT:[[TMP1:%.*]] = call <4 x i30> @llvm.abs.v4i30(<4 x i30> 
[[X:%.*]], i1 false)
+; CHECK-NEXT:[[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], 
+; CHECK-NEXT:[[ADD:%.*]] = zext <4 x i30> [[NARROW]] to <4 x i32>
 ; CHECK-NEXT:ret <4 x i32> [[ADD]]
 ;
   %ext = sext <4 x i30> %x to <4 

[llvm-branch-commits] [llvm] 1351f71 - [InstSimplify] add tests for ctpop; NFC (PR48608)

2020-12-28 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-28T16:06:19-05:00
New Revision: 1351f719d49642f7f1254d13e90d8d3a2824dcde

URL: 
https://github.com/llvm/llvm-project/commit/1351f719d49642f7f1254d13e90d8d3a2824dcde
DIFF: 
https://github.com/llvm/llvm-project/commit/1351f719d49642f7f1254d13e90d8d3a2824dcde.diff

LOG: [InstSimplify] add tests for ctpop; NFC (PR48608)

Added: 


Modified: 
llvm/test/Transforms/InstSimplify/call.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstSimplify/call.ll 
b/llvm/test/Transforms/InstSimplify/call.ll
index bfbd101b046c..fa73e07b4c45 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1287,6 +1287,8 @@ define i32 @call_undef_musttail() {
 
 ; This is not the builtin fmax, so we don't know anything about its behavior.
 
+declare float @fmaxf(float, float)
+
 define float @nobuiltin_fmax() {
 ; CHECK-LABEL: @nobuiltin_fmax(
 ; CHECK-NEXT:[[M:%.*]] = call float @fmaxf(float 0.00e+00, float 
1.00e+00) [[ATTR3:#.*]]
@@ -1298,6 +1300,62 @@ define float @nobuiltin_fmax() {
   ret float %r
 }
 
-declare float @fmaxf(float, float)
+
+declare i32 @llvm.ctpop.i32(i32)
+declare <3 x i33> @llvm.ctpop.v3i33(<3 x i33>)
+declare i1 @llvm.ctpop.i1(i1)
+
+define i32 @ctpop_lowbit(i32 %x) {
+; CHECK-LABEL: @ctpop_lowbit(
+; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]])
+; CHECK-NEXT:ret i32 [[R]]
+;
+  %b = and i32 %x, 1
+  %r = call i32 @llvm.ctpop.i32(i32 %b)
+  ret i32 %r
+}
+
+define i32 @ctpop_pow2(i32 %x) {
+; CHECK-LABEL: @ctpop_pow2(
+; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 4
+; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]])
+; CHECK-NEXT:ret i32 [[R]]
+;
+  %b = and i32 %x, 4
+  %r = call i32 @llvm.ctpop.i32(i32 %b)
+  ret i32 %r
+}
+
+define <3 x i33> @ctpop_signbit(<3 x i33> %x) {
+; CHECK-LABEL: @ctpop_signbit(
+; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], 
+; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> 
[[B]])
+; CHECK-NEXT:ret <3 x i33> [[R]]
+;
+  %b = lshr <3 x i33> %x, 
+  %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b)
+  ret <3 x i33> %r
+}
+
+define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) {
+; CHECK-LABEL: @ctpop_notsignbit(
+; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], 
+; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> 
[[B]])
+; CHECK-NEXT:ret <3 x i33> [[R]]
+;
+  %b = lshr <3 x i33> %x, 
+  %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b)
+  ret <3 x i33> %r
+}
+
+define i1 @ctpop_bool(i1 %x) {
+; CHECK-LABEL: @ctpop_bool(
+; CHECK-NEXT:[[R:%.*]] = tail call i1 @llvm.ctpop.i1(i1 [[X:%.*]])
+; CHECK-NEXT:ret i1 [[R]]
+;
+  %r = tail call i1 @llvm.ctpop.i1(i1 %x)
+  ret i1 %r
+}
 
 attributes #0 = { nobuiltin readnone }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 236c452 - [InstSimplify] remove ctpop of 1 (low) bit

2020-12-28 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-28T16:06:20-05:00
New Revision: 236c4524a7cd3051a150690b4f4f55f496e7e248

URL: 
https://github.com/llvm/llvm-project/commit/236c4524a7cd3051a150690b4f4f55f496e7e248
DIFF: 
https://github.com/llvm/llvm-project/commit/236c4524a7cd3051a150690b4f4f55f496e7e248.diff

LOG: [InstSimplify] remove ctpop of 1 (low) bit

https://llvm.org/PR48608

As noted in the test comment, we could handle a more general
case in instcombine and remove this, but I don't have evidence
that we need to do that.

https://alive2.llvm.org/ce/z/MRW9gD

Added: 


Modified: 
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/test/Transforms/InstCombine/ctpop.ll
llvm/test/Transforms/InstSimplify/call.ll

Removed: 




diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp 
b/llvm/lib/Analysis/InstructionSimplify.cpp
index 27b73a5a8236..30c7ecff7940 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5246,6 +5246,15 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value 
*Op0,
 // bitreverse(bitreverse(x)) -> x
 if (match(Op0, m_BitReverse(m_Value(X return X;
 break;
+  case Intrinsic::ctpop: {
+// If everything but the lowest bit is zero, that bit is the pop-count. Ex:
+// ctpop(and X, 1) --> and X, 1
+unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1),
+  Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+  return Op0;
+break;
+  }
   case Intrinsic::exp:
 // exp(log(x)) -> x
 if (Q.CxtI->hasAllowReassoc() &&

diff  --git a/llvm/test/Transforms/InstCombine/ctpop.ll 
b/llvm/test/Transforms/InstCombine/ctpop.ll
index 33b95b02dd2e..237fb0458225 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -84,11 +84,11 @@ define <2 x i1> @test5vec(<2 x i32> %arg) {
   ret <2 x i1> %res
 }
 
-; Make sure we don't add range metadata to i1 ctpop.
+; No intrinsic or range needed - ctpop of bool bit is the bit itself.
+
 define i1 @test6(i1 %arg) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:[[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]])
-; CHECK-NEXT:ret i1 [[CNT]]
+; CHECK-NEXT:ret i1 [[ARG:%.*]]
 ;
   %cnt = call i1 @llvm.ctpop.i1(i1 %arg)
   ret i1 %cnt

diff  --git a/llvm/test/Transforms/InstSimplify/call.ll 
b/llvm/test/Transforms/InstSimplify/call.ll
index fa73e07b4c45..841582ab8974 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1308,14 +1308,16 @@ declare i1 @llvm.ctpop.i1(i1)
 define i32 @ctpop_lowbit(i32 %x) {
 ; CHECK-LABEL: @ctpop_lowbit(
 ; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 1
-; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]])
-; CHECK-NEXT:ret i32 [[R]]
+; CHECK-NEXT:ret i32 [[B]]
 ;
   %b = and i32 %x, 1
   %r = call i32 @llvm.ctpop.i32(i32 %b)
   ret i32 %r
 }
 
+; Negative test - only low bit allowed
+; This could be reduced by instcombine to and+shift.
+
 define i32 @ctpop_pow2(i32 %x) {
 ; CHECK-LABEL: @ctpop_pow2(
 ; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 4
@@ -1330,14 +1332,15 @@ define i32 @ctpop_pow2(i32 %x) {
 define <3 x i33> @ctpop_signbit(<3 x i33> %x) {
 ; CHECK-LABEL: @ctpop_signbit(
 ; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], 
-; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> 
[[B]])
-; CHECK-NEXT:ret <3 x i33> [[R]]
+; CHECK-NEXT:ret <3 x i33> [[B]]
 ;
   %b = lshr <3 x i33> %x, 
   %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b)
   ret <3 x i33> %r
 }
 
+; Negative test - only 1 bit allowed
+
 define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) {
 ; CHECK-LABEL: @ctpop_notsignbit(
 ; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], 
@@ -1351,8 +1354,7 @@ define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) {
 
 define i1 @ctpop_bool(i1 %x) {
 ; CHECK-LABEL: @ctpop_bool(
-; CHECK-NEXT:[[R:%.*]] = tail call i1 @llvm.ctpop.i1(i1 [[X:%.*]])
-; CHECK-NEXT:ret i1 [[R]]
+; CHECK-NEXT:ret i1 [[X:%.*]]
 ;
   %r = tail call i1 @llvm.ctpop.i1(i1 %x)
   ret i1 %r



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 21a3a02 - [SLP] replace local reduction enum with RecurrenceKind; NFCI

2020-12-29 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-29T14:52:11-05:00
New Revision: 21a3a0225d84cd35227fc9d4d08234918a54f8d3

URL: 
https://github.com/llvm/llvm-project/commit/21a3a0225d84cd35227fc9d4d08234918a54f8d3
DIFF: 
https://github.com/llvm/llvm-project/commit/21a3a0225d84cd35227fc9d4d08234918a54f8d3.diff

LOG: [SLP] replace local reduction enum with RecurrenceKind; NFCI

I'm not sure if the SLP enum was created before the IVDescriptor
RecurrenceDescriptor / RecurrenceKind existed, but the code in
SLP is now redundant with that class, so it just makes things
more complicated to have both. We eventually call LoopUtils
createSimpleTargetReduction() to create reduction ops, so we
might as well standardize on those enum names.

There's still a question of whether we need to use TTI::ReductionFlags
vs. MinMaxRecurrenceKind, but that can be another clean-up step.

Another option would just be to flatten the enums in RecurrenceDescriptor
into a single enum. There isn't much benefit (smaller switches?) to
having a min/max subset.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9f1768907227..eff0690eda82 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
@@ -6445,16 +6446,7 @@ class HorizontalReduction {
   SmallVector ReducedVals;
   // Use map vector to make stable output.
   MapVector ExtraArgs;
-
-  /// Kind of the reduction data.
-  enum ReductionKind {
-RK_None,   /// Not a reduction.
-RK_Arithmetic, /// Binary reduction data.
-RK_SMin,   /// Signed minimum reduction data.
-RK_UMin,   /// Unsigned minimum reduction data.
-RK_SMax,   /// Signed maximum reduction data.
-RK_UMax,   /// Unsigned maximum reduction data.
-  };
+  using RD = RecurrenceDescriptor;
 
   /// Contains info about operation, like its opcode, left and right operands.
   class OperationData {
@@ -6462,20 +6454,27 @@ class HorizontalReduction {
 unsigned Opcode = 0;
 
 /// Kind of the reduction operation.
-ReductionKind Kind = RK_None;
+RD::RecurrenceKind Kind = RD::RK_NoRecurrence;
+TargetTransformInfo::ReductionFlags RdxFlags;
 
 /// Checks if the reduction operation can be vectorized.
 bool isVectorizable() const {
   switch (Kind) {
-  case RK_Arithmetic:
-return Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
-   Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
-   Opcode == Instruction::And || Opcode == Instruction::Or ||
-   Opcode == Instruction::Xor;
-  case RK_SMin:
-  case RK_SMax:
-  case RK_UMin:
-  case RK_UMax:
+  case RD::RK_IntegerAdd:
+return Opcode == Instruction::Add;
+  case RD::RK_IntegerMult:
+return Opcode == Instruction::Mul;
+  case RD::RK_IntegerOr:
+return Opcode == Instruction::Or;
+  case RD::RK_IntegerAnd:
+return Opcode == Instruction::And;
+  case RD::RK_IntegerXor:
+return Opcode == Instruction::Xor;
+  case RD::RK_FloatAdd:
+return Opcode == Instruction::FAdd;
+  case RD::RK_FloatMult:
+return Opcode == Instruction::FMul;
+  case RD::RK_IntegerMinMax:
 return Opcode == Instruction::ICmp;
   default:
 return false;
@@ -6485,33 +6484,31 @@ class HorizontalReduction {
 /// Creates reduction operation with the current opcode.
 Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
 const Twine &Name) const {
-  assert(isVectorizable() &&
- "Expected add|fadd or min/max reduction operation.");
-  Value *Cmp = nullptr;
+  assert(isVectorizable() && "Unhandled reduction operation.");
   switch (Kind) {
-  case RK_Arithmetic:
+  case RD::RK_IntegerAdd:
+  case RD::RK_IntegerMult:
+  case RD::RK_IntegerOr:
+  case RD::RK_IntegerAnd:
+  case RD::RK_IntegerXor:
+  case RD::RK_FloatAdd:
+  case RD::RK_FloatMult:
 return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS,
Name);
-  case RK_SMin:
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
-Cmp = Builder.CreateICmpSLT(LHS, RHS);
-return Builder.CreateSelect(Cmp, LHS, RHS, Name);
-  case RK_SMax:
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
-Cmp = Builder.CreateICmp

[llvm-branch-commits] [llvm] 8d18bc8 - [Utils] reduce code in createTargetReduction(); NFC

2020-12-29 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-29T15:56:19-05:00
New Revision: 8d18bc8e6db717352811a44a81e76a196530f612

URL: 
https://github.com/llvm/llvm-project/commit/8d18bc8e6db717352811a44a81e76a196530f612
DIFF: 
https://github.com/llvm/llvm-project/commit/8d18bc8e6db717352811a44a81e76a196530f612.diff

LOG: [Utils] reduce code in createTargetReduction(); NFC

The switch duplicated the translation in getRecurrenceBinOp().
This code is still weird because it translates to the TTI
ReductionFlags for min/max, but then createSimpleTargetReduction()
converts that back to RecurrenceDescriptor::MinMaxRecurrenceKind.

Added: 


Modified: 
llvm/lib/Transforms/Utils/LoopUtils.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 1ac270814b00..653680e5dc1e 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1063,7 +1063,6 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
bool NoNaN) {
   // TODO: Support in-order reductions based on the recurrence descriptor.
   using RD = RecurrenceDescriptor;
-  RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
   TargetTransformInfo::ReductionFlags Flags;
   Flags.NoNaN = NoNaN;
 
@@ -1072,34 +1071,12 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
   IRBuilderBase::FastMathFlagGuard FMFGuard(B);
   B.setFastMathFlags(Desc.getFastMathFlags());
 
-  switch (RecKind) {
-  case RD::RK_FloatAdd:
-return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
-  case RD::RK_FloatMult:
-return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
-  case RD::RK_IntegerAdd:
-return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
-  case RD::RK_IntegerMult:
-return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
-  case RD::RK_IntegerAnd:
-return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
-  case RD::RK_IntegerOr:
-return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
-  case RD::RK_IntegerXor:
-return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
-  case RD::RK_IntegerMinMax: {
-RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
-Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
-Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
-return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
-  }
-  case RD::RK_FloatMinMax: {
-Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
-return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
-  }
-  default:
-llvm_unreachable("Unhandled RecKind");
-  }
+  RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
+  Flags.IsMaxOp = MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax ||
+  MMKind == RD::MRK_FloatMax;
+  Flags.IsSigned = MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin;
+  return createSimpleTargetReduction(B, TTI, Desc.getRecurrenceBinOp(), Src,
+ Flags);
 }
 
 void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] e90ea76 - [IR] remove 'NoNan' param when creating FP reductions

2020-12-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-30T09:51:23-05:00
New Revision: e90ea76380d411bf81861228f23e4716ef337100

URL: 
https://github.com/llvm/llvm-project/commit/e90ea76380d411bf81861228f23e4716ef337100
DIFF: 
https://github.com/llvm/llvm-project/commit/e90ea76380d411bf81861228f23e4716ef337100.diff

LOG: [IR] remove 'NoNan' param when creating FP reductions

This is no-functional-change-intended (AFAIK, we can't
isolate this difference in a regression test).

That's because the callers should be setting the IRBuilder's
FMF field when creating the reduction and/or setting those
flags after creating. It doesn't make sense to override this
one flag alone.

This is part of a multi-step process to clean up the FMF
setting/propagation. See PR35538 for an example.

Added: 


Modified: 
llvm/include/llvm/IR/IRBuilder.h
llvm/lib/IR/IRBuilder.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp

Removed: 




diff  --git a/llvm/include/llvm/IR/IRBuilder.h 
b/llvm/include/llvm/IR/IRBuilder.h
index 4b26299d046c..c9074abe88c2 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -779,11 +779,11 @@ class IRBuilderBase {
 
   /// Create a vector float max reduction intrinsic of the source
   /// vector.
-  CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false);
+  CallInst *CreateFPMaxReduce(Value *Src);
 
   /// Create a vector float min reduction intrinsic of the source
   /// vector.
-  CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false);
+  CallInst *CreateFPMinReduce(Value *Src);
 
   /// Create a lifetime.start intrinsic.
   ///

diff  --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index e8fa35314a94..51e289165590 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -380,24 +380,12 @@ CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, 
bool IsSigned) {
   return getReductionIntrinsic(this, ID, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) {
-  auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src);
-  if (NoNaN) {
-FastMathFlags FMF;
-FMF.setNoNaNs();
-Rdx->setFastMathFlags(FMF);
-  }
-  return Rdx;
+CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src) {
+  return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src);
 }
 
-CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src, bool NoNaN) {
-  auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src);
-  if (NoNaN) {
-FastMathFlags FMF;
-FMF.setNoNaNs();
-Rdx->setFastMathFlags(FMF);
-  }
-  return Rdx;
+CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
+  return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src);
 }
 
 CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 80ae6b37e132..a3665a5636e5 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1039,10 +1039,10 @@ Value *llvm::createSimpleTargetReduction(
   case Instruction::FCmp:
 if (Flags.IsMaxOp) {
   MinMaxKind = RD::MRK_FloatMax;
-  BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); 
};
+  BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src); };
 } else {
   MinMaxKind = RD::MRK_FloatMin;
-  BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); 
};
+  BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src); };
 }
 break;
   default:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3567908 - [SLP] add fadd reduction test to show broken FMF propagation; NFC

2020-12-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-30T11:27:50-05:00
New Revision: 3567908d8ceb95afe50961c7a953c202131235c5

URL: 
https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5
DIFF: 
https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5.diff

LOG: [SLP] add fadd reduction test to show broken FMF propagation; NFC

Added: 


Modified: 
llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll

Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 5663c88b6366..8e175f1acda9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1766,4 +1766,39 @@ bb.1:
   ret void
 }
 
+; FIXME: This is a miscompile.
+; The FMF on the reduction should match the incoming insts.
+
+define float @fadd_v4f32_fmf(float* %p) {
+; CHECK-LABEL: @fadd_v4f32_fmf(
+; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; CHECK-NEXT:ret float [[TMP3]]
+;
+; STORE-LABEL: @fadd_v4f32_fmf(
+; STORE-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; STORE-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
+; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; STORE-NEXT:[[TMP3:%.*]] = call fast float 
@llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]])
+; STORE-NEXT:ret float [[TMP3]]
+;
+  %p1 = getelementptr inbounds float, float* %p, i64 1
+  %p2 = getelementptr inbounds float, float* %p, i64 2
+  %p3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %p1, align 4
+  %t2 = load float, float* %p2, align 4
+  %t3 = load float, float* %p3, align 4
+  %add1 = fadd reassoc nsz float %t1, %t0
+  %add2 = fadd reassoc nsz float %t2, %add1
+  %add3 = fadd reassoc nsz float %t3, %add2
+  ret float %add3
+}
+
 declare i32 @__gxx_personality_v0(...)



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 5ced712 - [LoopVectorizer] add test to show wrong FMF propagation; NFC

2020-12-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-30T15:13:57-05:00
New Revision: 5ced712e9851f00ecd81ba4dc235811bbc9ec5a2

URL: 
https://github.com/llvm/llvm-project/commit/5ced712e9851f00ecd81ba4dc235811bbc9ec5a2
DIFF: 
https://github.com/llvm/llvm-project/commit/5ced712e9851f00ecd81ba4dc235811bbc9ec5a2.diff

LOG: [LoopVectorizer] add test to show wrong FMF propagation; NFC

Added: 


Modified: 
llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll

Removed: 




diff  --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll 
b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
index fbbbd59f41c5..f35024b4361b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
@@ -261,3 +261,92 @@ loop.exit:
   %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.00e+00, %entry ]
   ret float %sum.lcssa
 }
+
+; FIXME: Some fcmp are 'nnan ninf', some are 'fast', but the reduction is 
sequential?
+
+define float @PR35538(float* nocapture readonly %a, i32 %N) #0 {
+; CHECK-LABEL: @PR35538(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[CMP12:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label 
[[FOR_COND_CLEANUP:%.*]]
+; CHECK:   for.body.lr.ph:
+; CHECK-NEXT:[[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8
+; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label 
[[VECTOR_PH:%.*]]
+; CHECK:   vector.ph:
+; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8
+; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-NEXT:br label [[VECTOR_BODY:%.*]]
+; CHECK:   vector.body:
+; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] 
], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] 
], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:[[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:[[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds float, float* 
[[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:[[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], 
i64 [[TMP1]]
+; CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], 
i32 0
+; CHECK-NEXT:[[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
+; CHECK-NEXT:[[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], 
align 4
+; CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP2]], 
i32 4
+; CHECK-NEXT:[[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
+; CHECK-NEXT:[[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], 
align 4
+; CHECK-NEXT:[[TMP8:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], 
[[VEC_PHI]]
+; CHECK-NEXT:[[TMP9:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD2]], 
[[VEC_PHI1]]
+; CHECK-NEXT:[[TMP10]] = select <4 x i1> [[TMP8]], <4 x float> 
[[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
+; CHECK-NEXT:[[TMP11]] = select <4 x i1> [[TMP9]], <4 x float> 
[[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
+; CHECK-NEXT:[[INDEX_NEXT]] = add i64 [[INDEX]], 8
+; CHECK-NEXT:[[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label 
[[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
+; CHECK:   middle.block:
+; CHECK-NEXT:[[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP10]], 
[[TMP11]]
+; CHECK-NEXT:[[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> 
[[RDX_MINMAX_CMP]], <4 x float> [[TMP10]], <4 x float> [[TMP11]]
+; CHECK-NEXT:[[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 
x float> [[RDX_MINMAX_SELECT]])
+; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], 
label [[SCALAR_PH]]
+; CHECK:   scalar.ph:
+; CHECK-NEXT:[[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] 
], [ 0, [[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT:[[BC_MERGE_RDX:%.*]] = phi float [ -1.00e+00, 
[[FOR_BODY_LR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:br label [[FOR_BODY:%.*]]
+; CHECK:   for.cond.cleanup.loopexit:
+; CHECK-NEXT:[[MAX_0__LCSSA:%.*]] = phi float [ [[MAX_0_:%.*]], 
[[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:br label [[FOR_COND_CLEANUP]]
+; CHECK:   for.cond.cleanup:
+; CHECK-NEXT:[[MAX_0_LCSSA:%.*]] = phi float [ -1.00e+00, 
[[ENTRY:%.*]] ], [ [[MAX_0__LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:ret float [[MAX_0_LCSSA]]
+; CHECK:   for.body:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_V

[llvm-branch-commits] [llvm] 8ca60db - [LoopUtils] reduce FMF and min/max complexity when forming reductions

2020-12-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-30T15:22:26-05:00
New Revision: 8ca60db40bd944dc5f67e0f200a403b4e03818ea

URL: 
https://github.com/llvm/llvm-project/commit/8ca60db40bd944dc5f67e0f200a403b4e03818ea
DIFF: 
https://github.com/llvm/llvm-project/commit/8ca60db40bd944dc5f67e0f200a403b4e03818ea.diff

LOG: [LoopUtils] reduce FMF and min/max complexity when forming reductions

I don't know if there's some way this changes what the vectorizers
may produce for reductions, but I have added test coverage with
3567908 and 5ced712 to show that both passes already have bugs in
this area. Hopefully this does not make things worse before we can
really fix it.

Added: 


Modified: 
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h 
b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index ef348ed56129..ba2bb0a4c6b0 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -365,24 +365,21 @@ Value *getShuffleReduction(IRBuilderBase &Builder, Value 
*Src, unsigned Op,
 
 /// Create a target reduction of the given vector. The reduction operation
 /// is described by the \p Opcode parameter. min/max reductions require
-/// additional information supplied in \p Flags.
+/// additional information supplied in \p MinMaxKind.
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
 /// Fast-math-flags are propagated using the IRBuilder's setting.
-Value *createSimpleTargetReduction(IRBuilderBase &B,
-   const TargetTransformInfo *TTI,
-   unsigned Opcode, Value *Src,
-   TargetTransformInfo::ReductionFlags Flags =
-   TargetTransformInfo::ReductionFlags(),
-   ArrayRef RedOps = None);
+Value *createSimpleTargetReduction(
+IRBuilderBase &B, const TargetTransformInfo *TTI, unsigned Opcode,
+Value *Src, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
+ArrayRef RedOps = None);
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
 /// required to implement the reduction.
 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
- RecurrenceDescriptor &Desc, Value *Src,
- bool NoNaN = false);
+ RecurrenceDescriptor &Desc, Value *Src);
 
 /// Get the intersection (logical and) of all of the potential IR flags
 /// of each scalar operation (VL) that will be converted into a vector (I).

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index a3665a5636e5..8dc7709c6e55 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -985,14 +985,12 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value 
*Src, unsigned Op,
 /// flags (if generating min/max reductions).
 Value *llvm::createSimpleTargetReduction(
 IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
-Value *Src, TargetTransformInfo::ReductionFlags Flags,
+Value *Src, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
 ArrayRef RedOps) {
   auto *SrcVTy = cast(Src->getType());
 
   std::function BuildFunc;
   using RD = RecurrenceDescriptor;
-  RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
-
   switch (Opcode) {
   case Instruction::Add:
 BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
@@ -1024,33 +1022,42 @@ Value *llvm::createSimpleTargetReduction(
 };
 break;
   case Instruction::ICmp:
-if (Flags.IsMaxOp) {
-  MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
-  BuildFunc = [&]() {
-return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
-  };
-} else {
-  MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
-  BuildFunc = [&]() {
-return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
-  };
+switch (MinMaxKind) {
+case RD::MRK_SIntMax:
+  BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, true); };
+  break;
+case RD::MRK_SIntMin:
+  BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, true); };
+  break;
+case RD::MRK_UIntMax:
+  BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, false); };
+  break;
+case RD::MRK_UIntMin:
+  BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, false); 

[llvm-branch-commits] [llvm] eaab711 - [Analysis] reduce code for matching min/max; NFC

2020-12-31 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-31T17:19:37-05:00
New Revision: eaab71106b81031d272acfc6987e99e8b65cbe6c

URL: 
https://github.com/llvm/llvm-project/commit/eaab71106b81031d272acfc6987e99e8b65cbe6c
DIFF: 
https://github.com/llvm/llvm-project/commit/eaab71106b81031d272acfc6987e99e8b65cbe6c.diff

LOG: [Analysis] reduce code for matching min/max; NFC

This might also make it easier to adapt if we want
to match min/max intrinsics rather than cmp+sel idioms.

The 'const' part is to potentially avoid confusion
in calling code. There's some surprising and possibly
wrong behavior related to matching min/max reductions
differently than other reductions.

Added: 


Modified: 
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/lib/Analysis/IVDescriptors.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h 
b/llvm/include/llvm/Analysis/IVDescriptors.h
index e736adf899b8..30216e22fc34 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -96,15 +96,15 @@ class RecurrenceDescriptor {
 : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
   UnsafeAlgebraInst(UAI) {}
 
-bool isRecurrence() { return IsRecurrence; }
+bool isRecurrence() const { return IsRecurrence; }
 
-bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; }
 
-Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; }
 
-MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
+MinMaxRecurrenceKind getMinMaxKind() const { return MinMaxKind; }
 
-Instruction *getPatternInst() { return PatternLastInst; }
+Instruction *getPatternInst() const { return PatternLastInst; }
 
   private:
 // Is this instruction a recurrence candidate.
@@ -134,10 +134,11 @@ class RecurrenceDescriptor {
   /// Returns true if all uses of the instruction I is within the Set.
   static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl 
&Set);
 
-  /// Returns a struct describing if the instruction if the instruction is a
+  /// Returns a struct describing if the instruction is a
   /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
-  /// or max(X, Y).
-  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev);
+  /// or max(X, Y). \p Prev is specifies the description of an already 
processed
+  /// select instruction, so its corresponding cmp can be matched to it.
+  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, const InstDesc 
&Prev);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.

diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp 
b/llvm/lib/Analysis/IVDescriptors.cpp
index d9756512de77..eac6f3cb30f8 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -456,53 +456,42 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, 
RecurrenceKind Kind,
   return true;
 }
 
-/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
-/// pattern corresponding to a min(X, Y) or max(X, Y).
 RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) 
{
-
-  assert((isa(I) || isa(I) || isa(I)) &&
- "Expect a select instruction");
-  Instruction *Cmp = nullptr;
-  SelectInst *Select = nullptr;
+RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
+   const InstDesc &Prev) {
+  assert((isa(I) || isa(I)) &&
+ "Expected a cmp or select instruction");
 
   // We must handle the select(cmp()) as a single instruction. Advance to the
   // select.
-  if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) {
-if (!Cmp->hasOneUse() || !(Select = 
dyn_cast(*I->user_begin(
-  return InstDesc(false, I);
-return InstDesc(Select, Prev.getMinMaxKind());
+  CmpInst::Predicate Pred;
+  if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value() {
+if (auto *Select = dyn_cast(*I->user_begin()))
+  return InstDesc(Select, Prev.getMinMaxKind());
   }
 
-  // Only handle single use cases for now.
-  if (!(Select = dyn_cast(I)))
+  // Only match select with single use cmp condition.
+  if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), 
m_Value(),
+ m_Value(
 return InstDesc(false, I);
-  if (!(Cmp = dyn_cast(I->getOperand(0))) &&
-  !(Cmp = dyn_cast(I->getOperand(0
-return InstDesc(false, I);
-  if (!Cmp->hasOneUse())
-return InstDesc(false, I);
-
-  Value *CmpLeft;
-  Value *CmpRight;
 
   // Look for a min/max pattern.
-  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
-return InstDesc(Select, MRK_U

[llvm-branch-commits] [llvm] c182a00 - [Analysis] fix typo in code comment; NFC

2021-01-01 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-01T12:20:16-05:00
New Revision: c182a000954de667f1e87055bf0329e5e6f52f1f

URL: 
https://github.com/llvm/llvm-project/commit/c182a000954de667f1e87055bf0329e5e6f52f1f
DIFF: 
https://github.com/llvm/llvm-project/commit/c182a000954de667f1e87055bf0329e5e6f52f1f.diff

LOG: [Analysis] fix typo in code comment; NFC

Added: 


Modified: 
llvm/include/llvm/Analysis/IVDescriptors.h

Removed: 




diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h 
b/llvm/include/llvm/Analysis/IVDescriptors.h
index 30216e22fc34..b9f6b7c2d04e 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -136,9 +136,10 @@ class RecurrenceDescriptor {
 
   /// Returns a struct describing if the instruction is a
   /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
-  /// or max(X, Y). \p Prev is specifies the description of an already 
processed
+  /// or max(X, Y). \p Prev specifies the description of an already processed
   /// select instruction, so its corresponding cmp can be matched to it.
-  static InstDesc isMinMaxSelectCmpPattern(Instruction *I, const InstDesc 
&Prev);
+  static InstDesc isMinMaxSelectCmpPattern(Instruction *I,
+   const InstDesc &Prev);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c74e853 - [Analysis] flatten enums for recurrence types

2021-01-01 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-01T12:20:16-05:00
New Revision: c74e8539ff372a89d08e7bfea7323a4dc2979d22

URL: 
https://github.com/llvm/llvm-project/commit/c74e8539ff372a89d08e7bfea7323a4dc2979d22
DIFF: 
https://github.com/llvm/llvm-project/commit/c74e8539ff372a89d08e7bfea7323a4dc2979d22.diff

LOG: [Analysis] flatten enums for recurrence types

This is almost all mechanical search-and-replace and
no-functional-change-intended (NFC). Having a single
enum makes it easier to match/reason about the
reduction cases.

The goal is to remove `Opcode` from reduction matching
code in the vectorizers because that makes it harder to
adapt the code to handle intrinsics.

The code in RecurrenceDescriptor::AddReductionVar() is
the only place that required closer inspection. It uses
a RecurrenceDescriptor and a second InstDesc to sometimes
overwrite part of the struct. It seem like we should be
able to simplify that logic, but it's not clear exactly
which cmp+sel patterns that we are trying to handle/avoid.

Added: 


Modified: 
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/CodeGen/ExpandReductions.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h 
b/llvm/include/llvm/Analysis/IVDescriptors.h
index b9f6b7c2d04e..798eb430df08 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -34,6 +34,24 @@ class SCEV;
 class DominatorTree;
 class ICFLoopSafetyInfo;
 
+/// These are the kinds of recurrences that we support.
+enum class RecurKind {
+  None,   ///< Not a recurrence.
+  Add,///< Sum of integers.
+  Mul,///< Product of integers.
+  Or, ///< Bitwise or logical OR of integers.
+  And,///< Bitwise or logical AND of integers.
+  Xor,///< Bitwise or logical XOR of integers.
+  SMin,   ///< Signed integer min implemented in terms of select(cmp()).
+  SMax,   ///< Signed integer max implemented in terms of select(cmp()).
+  UMin,   ///< Unisgned integer min implemented in terms of select(cmp()).
+  UMax,   ///< Unsigned integer max implemented in terms of select(cmp()).
+  FAdd,   ///< Sum of floats.
+  FMul,   ///< Product of floats.
+  FMin,   ///< FP min implemented in terms of select(cmp()).
+  FMax///< FP max implemented in terms of select(cmp()).
+};
+
 /// The RecurrenceDescriptor is used to identify recurrences variables in a
 /// loop. Reduction is a special case of recurrence that has uses of the
 /// recurrence variable outside the loop. The method isReductionPHI identifies
@@ -48,40 +66,13 @@ class ICFLoopSafetyInfo;
 /// This struct holds information about recurrence variables.
 class RecurrenceDescriptor {
 public:
-  /// This enum represents the kinds of recurrences that we support.
-  enum RecurrenceKind {
-RK_NoRecurrence,  ///< Not a recurrence.
-RK_IntegerAdd,///< Sum of integers.
-RK_IntegerMult,   ///< Product of integers.
-RK_IntegerOr, ///< Bitwise or logical OR of numbers.
-RK_IntegerAnd,///< Bitwise or logical AND of numbers.
-RK_IntegerXor,///< Bitwise or logical XOR of numbers.
-RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
-RK_FloatAdd,  ///< Sum of floats.
-RK_FloatMult, ///< Product of floats.
-RK_FloatMinMax///< Min/max implemented in terms of select(cmp()).
-  };
-
-  // This enum represents the kind of minmax recurrence.
-  enum MinMaxRecurrenceKind {
-MRK_Invalid,
-MRK_UIntMin,
-MRK_UIntMax,
-MRK_SIntMin,
-MRK_SIntMax,
-MRK_FloatMin,
-MRK_FloatMax
-  };
-
   RecurrenceDescriptor() = default;
 
-  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
-   FastMathFlags FMF, MinMaxRecurrenceKind MK,
-   Instruction *UAI, Type *RT, bool Signed,
-   SmallPtrSetImpl &CI)
+  RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K,
+   FastMathFlags FMF, Instruction *UAI, Type *RT,
+   bool Signed, SmallPtrSetImpl &CI)
   : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
-MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
-IsSigned(Signed) {
+UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
 CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -89,11 +80,11 @@ class RecurrenceDescriptor {
   class InstDesc {
   public:
 InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
-: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
-  UnsafeAlgebraInst(UAI) {}
+: IsRecurrence(IsRecur), PatternLastInst(I),
+ 

[llvm-branch-commits] [llvm] 6976812 - [InstCombine] add tests for ashr+icmp; NFC

2021-01-04 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-04T13:35:07-05:00
New Revision: 6976812129bf62975e37f6eabced717dcd090037

URL: 
https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037
DIFF: 
https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037.diff

LOG: [InstCombine] add tests for ashr+icmp; NFC

Added: 


Modified: 
llvm/test/Transforms/InstCombine/icmp-shr.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll 
b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 214f315f3178..22f61d2d5e6a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -5,7 +5,7 @@ target datalayout = 
"e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:
 
 define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
 ; CHECK-LABEL: @lshr_eq_msb_low_last_zero(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6
+; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = lshr i8 127, %a
@@ -15,7 +15,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
 
 define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> %a, 
+; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], 
 ; CHECK-NEXT:ret <2 x i1> [[CMP]]
 ;
   %shr = lshr <2 x i8> , %a
@@ -25,7 +25,7 @@ define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) {
 
 define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
 ; CHECK-LABEL: @ashr_eq_msb_low_second_zero(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6
+; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr i8 127, %a
@@ -35,7 +35,7 @@ define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
 
 define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
 ; CHECK-LABEL: @lshr_ne_msb_low_last_zero(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7
+; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = lshr i8 127, %a
@@ -45,7 +45,7 @@ define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
 
 define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
 ; CHECK-LABEL: @ashr_ne_msb_low_second_zero(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7
+; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr i8 127, %a
@@ -55,7 +55,7 @@ define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
 
 define i1 @ashr_eq_both_equal(i8 %a) {
 ; CHECK-LABEL: @ashr_eq_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr i8 128, %a
@@ -65,7 +65,7 @@ define i1 @ashr_eq_both_equal(i8 %a) {
 
 define i1 @ashr_ne_both_equal(i8 %a) {
 ; CHECK-LABEL: @ashr_ne_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr i8 128, %a
@@ -75,7 +75,7 @@ define i1 @ashr_ne_both_equal(i8 %a) {
 
 define i1 @lshr_eq_both_equal(i8 %a) {
 ; CHECK-LABEL: @lshr_eq_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = lshr i8 127, %a
@@ -85,7 +85,7 @@ define i1 @lshr_eq_both_equal(i8 %a) {
 
 define i1 @lshr_ne_both_equal(i8 %a) {
 ; CHECK-LABEL: @lshr_ne_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = lshr i8 127, %a
@@ -95,7 +95,7 @@ define i1 @lshr_ne_both_equal(i8 %a) {
 
 define i1 @exact_ashr_eq_both_equal(i8 %a) {
 ; CHECK-LABEL: @exact_ashr_eq_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr exact i8 128, %a
@@ -105,7 +105,7 @@ define i1 @exact_ashr_eq_both_equal(i8 %a) {
 
 define i1 @exact_ashr_ne_both_equal(i8 %a) {
 ; CHECK-LABEL: @exact_ashr_ne_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = ashr exact i8 128, %a
@@ -115,7 +115,7 @@ define i1 @exact_ashr_ne_both_equal(i8 %a) {
 
 define i1 @exact_lshr_eq_both_equal(i8 %a) {
 ; CHECK-LABEL: @exact_lshr_eq_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   %shr = lshr exact i8 126, %a
@@ -125,7 +125,7 @@ define i1 @exact_lshr_eq_both_equal(i8 %a) {
 
 define i1 @exact_lshr_ne_both_equal(i8 %a) {
 ; CHECK-LABEL: @exact_lshr_ne_both_equal(
-; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0
+; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0
 ; CHECK-NEXT:ret i1 [[CMP]]
 ;
   

[llvm-branch-commits] [llvm] 9766957 - [LoopUtils] reduce code for creatng reduction; NFC

2021-01-04 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-04T16:05:03-05:00
New Revision: 976695752416f6ff51993ec1f3769e8a62eea2f2

URL: 
https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2
DIFF: 
https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2.diff

LOG: [LoopUtils] reduce code for creatng reduction; NFC

We can return from each case instead creating a temporary
variable just to have a common return.

Added: 


Modified: 
llvm/lib/Transforms/Utils/LoopUtils.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e062eacf82b2..3245f5f21017 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -983,77 +983,53 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase 
&Builder,
  RecurKind RdxKind,
  ArrayRef RedOps) {
   TargetTransformInfo::ReductionFlags RdxFlags;
-  RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax ||
- RdxKind == RecurKind::UMax ||
+  RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax 
||
  RdxKind == RecurKind::FMax;
   RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
   if (!ForceReductionIntrinsic &&
   !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
 return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
 
-  auto *SrcVTy = cast(Src->getType());
-
-  std::function BuildFunc;
+  auto *SrcVecEltTy = cast(Src->getType())->getElementType();
   switch (Opcode) {
   case Instruction::Add:
-BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
-break;
+return Builder.CreateAddReduce(Src);
   case Instruction::Mul:
-BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
-break;
+return Builder.CreateMulReduce(Src);
   case Instruction::And:
-BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
-break;
+return Builder.CreateAndReduce(Src);
   case Instruction::Or:
-BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
-break;
+return Builder.CreateOrReduce(Src);
   case Instruction::Xor:
-BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
-break;
+return Builder.CreateXorReduce(Src);
   case Instruction::FAdd:
-BuildFunc = [&]() {
-  auto Rdx = Builder.CreateFAddReduce(
-  ConstantFP::getNegativeZero(SrcVTy->getElementType()), Src);
-  return Rdx;
-};
-break;
+return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
+Src);
   case Instruction::FMul:
-BuildFunc = [&]() {
-  Type *Ty = SrcVTy->getElementType();
-  auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src);
-  return Rdx;
-};
-break;
+return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src);
   case Instruction::ICmp:
 switch (RdxKind) {
 case RecurKind::SMax:
-  BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, true); };
-  break;
+  return Builder.CreateIntMaxReduce(Src, true);
 case RecurKind::SMin:
-  BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, true); };
-  break;
+  return Builder.CreateIntMinReduce(Src, true);
 case RecurKind::UMax:
-  BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, false); };
-  break;
+  return Builder.CreateIntMaxReduce(Src, false);
 case RecurKind::UMin:
-  BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, false); };
-  break;
+  return Builder.CreateIntMinReduce(Src, false);
 default:
   llvm_unreachable("Unexpected min/max reduction type");
 }
-break;
   case Instruction::FCmp:
 assert((RdxKind == RecurKind::FMax || RdxKind == RecurKind::FMin) &&
"Unexpected min/max reduction type");
 if (RdxKind == RecurKind::FMax)
-  BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src); };
+  return Builder.CreateFPMaxReduce(Src);
 else
-  BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src); };
-break;
+  return Builder.CreateFPMinReduce(Src);
   default:
 llvm_unreachable("Unhandled opcode");
   }
-  return BuildFunc();
 }
 
 Value *llvm::createTargetReduction(IRBuilderBase &B,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 58b6c5d - [LoopUtils] reorder logic for creating reduction; NFC

2021-01-04 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-04T16:05:02-05:00
New Revision: 58b6c5d932a0d435ddfd13f4f5b011207e64297f

URL: 
https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f
DIFF: 
https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f.diff

LOG: [LoopUtils] reorder logic for creating reduction; NFC

If we are using a shuffle reduction, we don't need to
go through the switch on opcode - return early.

Added: 


Modified: 
llvm/lib/Transforms/Utils/LoopUtils.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 96f1d4219bac..e062eacf82b2 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -982,6 +982,15 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase 
&Builder,
  unsigned Opcode, Value *Src,
  RecurKind RdxKind,
  ArrayRef RedOps) {
+  TargetTransformInfo::ReductionFlags RdxFlags;
+  RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax ||
+ RdxKind == RecurKind::UMax ||
+ RdxKind == RecurKind::FMax;
+  RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
+  if (!ForceReductionIntrinsic &&
+  !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
+return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
+
   auto *SrcVTy = cast(Src->getType());
 
   std::function BuildFunc;
@@ -1044,15 +1053,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase 
&Builder,
   default:
 llvm_unreachable("Unhandled opcode");
   }
-  TargetTransformInfo::ReductionFlags RdxFlags;
-  RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax ||
- RdxKind == RecurKind::UMax ||
- RdxKind == RecurKind::FMax;
-  RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
-  if (ForceReductionIntrinsic ||
-  TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags))
-return BuildFunc();
-  return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps);
+  return BuildFunc();
 }
 
 Value *llvm::createTargetReduction(IRBuilderBase &B,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 36263a7 - [LoopUtils] remove redundant opcode parameter; NFC

2021-01-04 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-04T17:05:28-05:00
New Revision: 36263a70d98afc36dea55e7a004d08455811

URL: 
https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811
DIFF: 
https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811.diff

LOG: [LoopUtils] remove redundant opcode parameter; NFC

While here, rename the inaccurate getRecurrenceBinOp()
because that was also used to get CmpInst opcodes.

The recurrence/reduction kind should always refer to the
expected opcode for a reduction. SLP appears to be the
only direct caller of createSimpleTargetReduction(), and
that calling code ideally should not be carrying around
both an opcode and a reduction kind.

This should allow us to generalize reduction matching to
use intrinsics instead of only binops.

Added: 


Modified: 
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h 
b/llvm/include/llvm/Analysis/IVDescriptors.h
index 798eb430df08f..6bb6c4cae0a2c 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -139,9 +139,8 @@ class RecurrenceDescriptor {
   /// Returns identity corresponding to the RecurrenceKind.
   static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp);
 
-  /// Returns the opcode of binary operation corresponding to the
-  /// RecurrenceKind.
-  static unsigned getRecurrenceBinOp(RecurKind Kind);
+  /// Returns the opcode corresponding to the RecurrenceKind.
+  static unsigned getOpcode(RecurKind Kind);
 
   /// Returns true if Phi is a reduction of type Kind and adds it to the
   /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
@@ -178,9 +177,7 @@ class RecurrenceDescriptor {
 
   RecurKind getRecurrenceKind() const { return Kind; }
 
-  unsigned getRecurrenceBinOp() const {
-return getRecurrenceBinOp(getRecurrenceKind());
-  }
+  unsigned getOpcode() const { return getOpcode(getRecurrenceKind()); }
 
   FastMathFlags getFastMathFlags() const { return FMF; }
 

diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h 
b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index b29add4cba0e5..d606fa954f952 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -366,8 +366,7 @@ Value *getShuffleReduction(IRBuilderBase &Builder, Value 
*Src, unsigned Op,
 /// required to implement the reduction.
 /// Fast-math-flags are propagated using the IRBuilder's setting.
 Value *createSimpleTargetReduction(IRBuilderBase &B,
-   const TargetTransformInfo *TTI,
-   unsigned Opcode, Value *Src,
+   const TargetTransformInfo *TTI, Value *Src,
RecurKind RdxKind,
ArrayRef RedOps = None);
 

diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp 
b/llvm/lib/Analysis/IVDescriptors.cpp
index 0bd4f98541587..a11faac093db0 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -800,8 +800,7 @@ Constant 
*RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) {
   }
 }
 
-/// This function translates the recurrence kind to an LLVM binary operator.
-unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind Kind) {
+unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
   switch (Kind) {
   case RecurKind::Add:
 return Instruction::Add;
@@ -833,7 +832,7 @@ unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind 
Kind) {
 SmallVector
 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
   SmallVector ReductionOperations;
-  unsigned RedOp = getRecurrenceBinOp(Kind);
+  unsigned RedOp = getOpcode(Kind);
 
   // Search down from the Phi to the LoopExitInstr, looking for instructions
   // with a single user of the correct type for the reduction.

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 3245f5f21017f..f2b94d9e78adc 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -979,9 +979,9 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, 
Value *Src,
 
 Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
  const TargetTransformInfo *TTI,
- unsigned Opcode, Value *Src,
- RecurKind RdxKind,
+ Value *Src, Re

[llvm-branch-commits] [llvm] 3b8b2c7 - [SLP] delete unused pairwise reduction option

2021-01-05 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-05T13:23:07-05:00
New Revision: 3b8b2c7da2efb88d9f13e911e383af430ab463ef

URL: 
https://github.com/llvm/llvm-project/commit/3b8b2c7da2efb88d9f13e911e383af430ab463ef
DIFF: 
https://github.com/llvm/llvm-project/commit/3b8b2c7da2efb88d9f13e911e383af430ab463ef.diff

LOG: [SLP] delete unused pairwise reduction option

SLP tries to model 2 forms of vector reductions: pairwise and splitting.
>From the cost model code comments, those are defined using an example as:

  /// Pairwise:
  ///  (v0, v1, v2, v3)
  ///  ((v0+v1), (v2+v3), undef, undef)
  /// Split:
  ///  (v0, v1, v2, v3)
  ///  ((v0+v2), (v1+v3), undef, undef)

I don't know the full history of this functionality, but it was partly
added back in D29402. There are apparently no users at this point (no
regression tests change). X86 might have managed to work-around the need
for this through cost model and codegen improvements.

Removing this code makes it easier to continue the work that was started
in D87416 / D88193. The alternative -- if there is some target that is
silently using this option -- is to move this logic into LoopUtils. We
have related/duplicate functionality there via llvm::createTargetReduction().

Differential Revision: https://reviews.llvm.org/D93860

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index a655d3dd91bd..8965a44ffd2b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6382,35 +6382,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, 
BoUpSLP &R) {
   return false;
 }
 
-/// Generate a shuffle mask to be used in a reduction tree.
-///
-/// \param VecLen The length of the vector to be reduced.
-/// \param NumEltsToRdx The number of elements that should be reduced in the
-///vector.
-/// \param IsPairwise Whether the reduction is a pairwise or splitting
-///reduction. A pairwise reduction will generate a mask of
-///<0,2,...> or <1,3,..> while a splitting reduction will generate
-///<2,3, undef,undef> for a vector of 4 and NumElts = 2.
-/// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static SmallVector createRdxShuffleMask(unsigned VecLen,
- unsigned NumEltsToRdx,
- bool IsPairwise, bool IsLeft) 
{
-  assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
-
-  SmallVector ShuffleMask(VecLen, -1);
-
-  if (IsPairwise)
-// Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
-for (unsigned i = 0; i != NumEltsToRdx; ++i)
-  ShuffleMask[i] = 2 * i + !IsLeft;
-  else
-// Move the upper half of the vector to the lower half.
-for (unsigned i = 0; i != NumEltsToRdx; ++i)
-  ShuffleMask[i] = NumEltsToRdx + i;
-
-  return ShuffleMask;
-}
-
 namespace {
 
 /// Model horizontal reductions.
@@ -6730,10 +6701,6 @@ class HorizontalReduction {
   /// The operation data for the leaf values that we perform a reduction on.
   OperationData RdxLeafVal;
 
-  /// Should we model this reduction as a pairwise reduction tree or a tree 
that
-  /// splits the vector in halves and adds those halves.
-  bool IsPairwiseReduction = false;
-
   /// Checks if the ParentStackElem.first should be marked as a reduction
   /// operation with an extra argument or as extra argument itself.
   void markExtraArg(std::pair &ParentStackElem,
@@ -7170,7 +7137,6 @@ class HorizontalReduction {
 Type *ScalarTy = FirstReducedVal->getType();
 auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
 
-int PairwiseRdxCost;
 int SplittingRdxCost;
 switch (RdxTreeInst.getKind()) {
 case RecurKind::Add:
@@ -7180,9 +7146,6 @@ class HorizontalReduction {
 case RecurKind::Xor:
 case RecurKind::FAdd:
 case RecurKind::FMul:
-  PairwiseRdxCost =
-  TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy,
-  /*IsPairwiseForm=*/true);
   SplittingRdxCost =
   TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy,
   /*IsPairwiseForm=*/false);
@@ -7194,9 +7157,6 @@ class HorizontalReduction {
   auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy));
   RecurKind Kind = RdxTreeInst.getKind();
   bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin;
-  PairwiseRdxCost =
-  TTI->getMinMaxReductionCost(VecTy, VecCondTy,
-  /*IsPairwiseForm=*/true, IsUnsigned);
   SplittingRdxCost =
   TTI->getMinMaxReductionCost(VecTy, VecCondTy,
   /*IsPairwiseForm=*/false, IsUnsi

[llvm-branch-commits] [llvm] d4a999b - [SLP] reduce code duplication; NFC

2021-01-05 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-05T15:12:40-05:00
New Revision: d4a999b453a4d3cfeee02f00f4900327fc7fcede

URL: 
https://github.com/llvm/llvm-project/commit/d4a999b453a4d3cfeee02f00f4900327fc7fcede
DIFF: 
https://github.com/llvm/llvm-project/commit/d4a999b453a4d3cfeee02f00f4900327fc7fcede.diff

LOG: [SLP] reduce code duplication; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8965a44ffd2b..390b71e7a46b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7137,8 +7137,9 @@ class HorizontalReduction {
 Type *ScalarTy = FirstReducedVal->getType();
 auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
 
+RecurKind Kind = RdxTreeInst.getKind();
 int SplittingRdxCost;
-switch (RdxTreeInst.getKind()) {
+switch (Kind) {
 case RecurKind::Add:
 case RecurKind::Mul:
 case RecurKind::Or:
@@ -7155,7 +7156,6 @@ class HorizontalReduction {
 case RecurKind::UMax:
 case RecurKind::UMin: {
   auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy));
-  RecurKind Kind = RdxTreeInst.getKind();
   bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin;
   SplittingRdxCost =
   TTI->getMinMaxReductionCost(VecTy, VecCondTy,
@@ -7167,7 +7167,7 @@ class HorizontalReduction {
 }
 
 int ScalarReduxCost = 0;
-switch (RdxTreeInst.getKind()) {
+switch (Kind) {
 case RecurKind::Add:
 case RecurKind::Mul:
 case RecurKind::Or:



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 5a1d31a - [SLP] use reduction kind's opcode for cost model queries; NFC

2021-01-05 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-05T15:12:40-05:00
New Revision: 5a1d31a28490e85de440b55e2e257b61d32e85b9

URL: 
https://github.com/llvm/llvm-project/commit/5a1d31a28490e85de440b55e2e257b61d32e85b9
DIFF: 
https://github.com/llvm/llvm-project/commit/5a1d31a28490e85de440b55e2e257b61d32e85b9.diff

LOG: [SLP] use reduction kind's opcode for cost model queries; NFC

This should be no-functional-change because the reduction kind
opcodes are 1-for-1 mappings to the instructions we are matching
as reductions. But we want to remove the need for the
`OperationData` opcode field because that does not work when
we start matching intrinsics (eg, maxnum) as reduction candidates.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 390b71e7a46b..48f2a2d2886f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7138,6 +7138,7 @@ class HorizontalReduction {
 auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth);
 
 RecurKind Kind = RdxTreeInst.getKind();
+unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
 int SplittingRdxCost;
 switch (Kind) {
 case RecurKind::Add:
@@ -7147,9 +7148,8 @@ class HorizontalReduction {
 case RecurKind::Xor:
 case RecurKind::FAdd:
 case RecurKind::FMul:
-  SplittingRdxCost =
-  TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy,
-  /*IsPairwiseForm=*/false);
+  SplittingRdxCost = TTI->getArithmeticReductionCost(
+  RdxOpcode, VecTy, /*IsPairwiseForm=*/false);
   break;
 case RecurKind::SMax:
 case RecurKind::SMin:
@@ -7175,15 +7175,14 @@ class HorizontalReduction {
 case RecurKind::Xor:
 case RecurKind::FAdd:
 case RecurKind::FMul:
-  ScalarReduxCost =
-  TTI->getArithmeticInstrCost(RdxTreeInst.getOpcode(), ScalarTy);
+  ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
   break;
 case RecurKind::SMax:
 case RecurKind::SMin:
 case RecurKind::UMax:
 case RecurKind::UMin:
   ScalarReduxCost =
-  TTI->getCmpSelInstrCost(RdxTreeInst.getOpcode(), ScalarTy) +
+  TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) +
   TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
   CmpInst::makeCmpResultType(ScalarTy));
   break;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 6a03f8a - [SLP] reduce code for finding reduction costs; NFC

2021-01-05 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-05T17:35:54-05:00
New Revision: 6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef

URL: 
https://github.com/llvm/llvm-project/commit/6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef
DIFF: 
https://github.com/llvm/llvm-project/commit/6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef.diff

LOG: [SLP] reduce code for finding reduction costs; NFC

We can get both (vector/scalar) costs in a single switch
instead of sequentially.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 48f2a2d2886f..92e3ae7bea8b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7140,6 +7140,7 @@ class HorizontalReduction {
 RecurKind Kind = RdxTreeInst.getKind();
 unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
 int SplittingRdxCost;
+int ScalarReduxCost;
 switch (Kind) {
 case RecurKind::Add:
 case RecurKind::Mul:
@@ -7150,6 +7151,7 @@ class HorizontalReduction {
 case RecurKind::FMul:
   SplittingRdxCost = TTI->getArithmeticReductionCost(
   RdxOpcode, VecTy, /*IsPairwiseForm=*/false);
+  ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
   break;
 case RecurKind::SMax:
 case RecurKind::SMin:
@@ -7160,42 +7162,21 @@ class HorizontalReduction {
   SplittingRdxCost =
   TTI->getMinMaxReductionCost(VecTy, VecCondTy,
   /*IsPairwiseForm=*/false, IsUnsigned);
-  break;
-}
-default:
-  llvm_unreachable("Expected arithmetic or min/max reduction operation");
-}
-
-int ScalarReduxCost = 0;
-switch (Kind) {
-case RecurKind::Add:
-case RecurKind::Mul:
-case RecurKind::Or:
-case RecurKind::And:
-case RecurKind::Xor:
-case RecurKind::FAdd:
-case RecurKind::FMul:
-  ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
-  break;
-case RecurKind::SMax:
-case RecurKind::SMin:
-case RecurKind::UMax:
-case RecurKind::UMin:
   ScalarReduxCost =
   TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) +
   TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
   CmpInst::makeCmpResultType(ScalarTy));
   break;
+}
 default:
   llvm_unreachable("Expected arithmetic or min/max reduction operation");
 }
-ScalarReduxCost *= (ReduxWidth - 1);
 
+ScalarReduxCost *= (ReduxWidth - 1);
 LLVM_DEBUG(dbgs() << "SLP: Adding cost "
   << SplittingRdxCost - ScalarReduxCost
   << " for reduction that starts with " << *FirstReducedVal
   << " (It is a splitting reduction)\n");
-
 return SplittingRdxCost - ScalarReduxCost;
   }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 5d24089 - [SLP] reduce code for propagating flags on reductions; NFC

2021-01-06 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-06T14:37:44-05:00
New Revision: 5d24089a7001e9fb4c0e665e93312916d88aaef9

URL: 
https://github.com/llvm/llvm-project/commit/5d24089a7001e9fb4c0e665e93312916d88aaef9
DIFF: 
https://github.com/llvm/llvm-project/commit/5d24089a7001e9fb4c0e665e93312916d88aaef9.diff

LOG: [SLP] reduce code for propagating flags on reductions; NFC

If we add/change to match intrinsics, this might get more
wordy, but there's no need to list each kind currently.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 75f881dc7d4b..c4278722418b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6637,28 +6637,15 @@ class HorizontalReduction {
 const ReductionOpsListType &ReductionOps) const {
   assert(isVectorizable() &&
  "Expected add|fadd or min/max reduction operation.");
-  auto *Op = createOp(Builder, LHS, RHS, Name);
-  switch (Kind) {
-  case RecurKind::Add:
-  case RecurKind::Mul:
-  case RecurKind::Or:
-  case RecurKind::And:
-  case RecurKind::Xor:
-  case RecurKind::FAdd:
-  case RecurKind::FMul:
-propagateIRFlags(Op, ReductionOps[0]);
-return Op;
-  case RecurKind::SMax:
-  case RecurKind::SMin:
-  case RecurKind::UMax:
-  case RecurKind::UMin:
-if (auto *SI = dyn_cast(Op))
-  propagateIRFlags(SI->getCondition(), ReductionOps[0]);
+  Value *Op = createOp(Builder, LHS, RHS, Name);
+  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) {
+if (auto *Sel = dyn_cast(Op))
+  propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
 propagateIRFlags(Op, ReductionOps[1]);
 return Op;
-  default:
-llvm_unreachable("Unknown reduction operation.");
   }
+  propagateIRFlags(Op, ReductionOps[0]);
+  return Op;
 }
 /// Creates reduction operation with the current opcode with the IR flags
 /// from \p I.
@@ -,30 +6653,15 @@ class HorizontalReduction {
 const Twine &Name, Instruction *I) const {
   assert(isVectorizable() &&
  "Expected add|fadd or min/max reduction operation.");
-  auto *Op = createOp(Builder, LHS, RHS, Name);
-  switch (Kind) {
-  case RecurKind::Add:
-  case RecurKind::Mul:
-  case RecurKind::Or:
-  case RecurKind::And:
-  case RecurKind::Xor:
-  case RecurKind::FAdd:
-  case RecurKind::FMul:
-propagateIRFlags(Op, I);
-return Op;
-  case RecurKind::SMax:
-  case RecurKind::SMin:
-  case RecurKind::UMax:
-  case RecurKind::UMin:
-if (auto *SI = dyn_cast(Op)) {
-  propagateIRFlags(SI->getCondition(),
+  Value *Op = createOp(Builder, LHS, RHS, Name);
+  if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) {
+if (auto *Sel = dyn_cast(Op)) {
+  propagateIRFlags(Sel->getCondition(),
cast(I)->getCondition());
 }
-propagateIRFlags(Op, I);
-return Op;
-  default:
-llvm_unreachable("Unknown reduction operation.");
   }
+  propagateIRFlags(Op, I);
+  return Op;
 }
   };
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4c022b5 - [SLP] use reduction kind's opcode to create new instructions; NFC

2021-01-06 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-06T14:37:44-05:00
New Revision: 4c022b5a41dee998ae50cdad4e8b6548acbeee9f

URL: 
https://github.com/llvm/llvm-project/commit/4c022b5a41dee998ae50cdad4e8b6548acbeee9f
DIFF: 
https://github.com/llvm/llvm-project/commit/4c022b5a41dee998ae50cdad4e8b6548acbeee9f.diff

LOG: [SLP] use reduction kind's opcode to create new instructions; NFC

Similar to 5a1d31a28 -
This should be no-functional-change because the reduction kind
opcodes are 1-for-1 mappings to the instructions we are matching
as reductions. But we want to remove the need for the
`OperationData` opcode field because that does not work when
we start matching intrinsics (eg, maxnum) as reduction candidates.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c4278722418b..7b77aef2a75c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6457,6 +6457,7 @@ class HorizontalReduction {
 Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS,
 const Twine &Name) const {
   assert(isVectorizable() && "Unhandled reduction operation.");
+  unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
   switch (Kind) {
   case RecurKind::Add:
   case RecurKind::Mul:
@@ -6465,26 +6466,22 @@ class HorizontalReduction {
   case RecurKind::Xor:
   case RecurKind::FAdd:
   case RecurKind::FMul:
-return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS,
+return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
 
   case RecurKind::SMax: {
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
 Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
   }
   case RecurKind::SMin: {
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
 Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
   }
   case RecurKind::UMax: {
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
 Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
   }
   case RecurKind::UMin: {
-assert(Opcode == Instruction::ICmp && "Expected integer types.");
 Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
 return Builder.CreateSelect(Cmp, LHS, RHS, Name);
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4c7148d - [SLP] remove opcode identifier for reduction; NFC

2021-01-07 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-07T14:07:27-05:00
New Revision: 4c7148d75cd7e75f169251cdab3e013819344cfd

URL: 
https://github.com/llvm/llvm-project/commit/4c7148d75cd7e75f169251cdab3e013819344cfd
DIFF: 
https://github.com/llvm/llvm-project/commit/4c7148d75cd7e75f169251cdab3e013819344cfd.diff

LOG: [SLP] remove opcode identifier for reduction; NFC

Another step towards allowing intrinsics in reduction matching.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8d6453f277ea..c8e5fdb458ff 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -772,7 +772,7 @@ class BoUpSLP {
   /// effectively impossible for the backend to undo.
   /// TODO: If load combining is allowed in the IR optimizer, this analysis
   ///   may not be necessary.
-  bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const;
+  bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
 
   /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
   /// can be load combined in the backend. Load combining may not be allowed in
@@ -3896,8 +3896,8 @@ static bool isLoadCombineCandidateImpl(Value *Root, 
unsigned NumElts,
   return true;
 }
 
-bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const {
-  if (RdxOpcode != Instruction::Or)
+bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
+  if (RdxKind != RecurKind::Or)
 return false;
 
   unsigned NumElts = VectorizableTree[0]->Scalars.size();
@@ -6987,7 +6987,7 @@ class HorizontalReduction {
   }
   if (V.isTreeTinyAndNotFullyVectorizable())
 break;
-  if (V.isLoadCombineReductionCandidate(RdxTreeInst.getOpcode()))
+  if (V.isLoadCombineReductionCandidate(RdxTreeInst.getKind()))
 break;
 
   V.computeMinimumValueSizes();



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 267ff79 - [SLP] limit verifyFunction to debug build (PR48689)

2021-01-08 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-08T08:10:17-05:00
New Revision: 267ff7901c745dc903d55599240464ebc4c0bda3

URL: 
https://github.com/llvm/llvm-project/commit/267ff7901c745dc903d55599240464ebc4c0bda3
DIFF: 
https://github.com/llvm/llvm-project/commit/267ff7901c745dc903d55599240464ebc4c0bda3.diff

LOG: [SLP] limit verifyFunction to debug build (PR48689)

As noted in PR48689, the verifier may have some kind
of exponential behavior that should be addressed
separately. For now, only run it in debug mode to
prevent problems for release+asserts.
That limit is what we had before D80401, and I'm
not sure if there was a reason to change it in that
patch.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8c06e29341ad..ef0dea0f11d3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2499,7 +2499,7 @@ BoUpSLP::~BoUpSLP() {
"trying to erase instruction with users.");
 Pair.getFirst()->eraseFromParent();
   }
-  assert(!verifyFunction(*F, &dbgs()));
+  LLVM_DEBUG(verifyFunction(*F));
 }
 
 void BoUpSLP::eraseInstructions(ArrayRef AV) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0aa75fb - [SLP] put verifyFunction call behind EXPENSIVE_CHECKS

2021-01-10 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-10T12:32:21-05:00
New Revision: 0aa75fb12faa04e07ba1a6e334605357b6a159c9

URL: 
https://github.com/llvm/llvm-project/commit/0aa75fb12faa04e07ba1a6e334605357b6a159c9
DIFF: 
https://github.com/llvm/llvm-project/commit/0aa75fb12faa04e07ba1a6e334605357b6a159c9.diff

LOG: [SLP] put verifyFunction call behind EXPENSIVE_CHECKS

A severe compile-time slowdown from this call is noted in:
https://llvm.org/PR48689
My naive fix was to put it under LLVM_DEBUG ( 267ff79 ),
but that's not limiting in the way we want.
This is a quick fix (or we could just remove the call completely
and rely on some later pass to discover potentially wrong IR?).
A bigger/better fix would be to improve/limit verifyFunction()
as noted in:
https://llvm.org/PR47712

Differential Revision: https://reviews.llvm.org/D94328

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f124dd8ef374..d0b6b432e93e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2499,7 +2499,11 @@ BoUpSLP::~BoUpSLP() {
"trying to erase instruction with users.");
 Pair.getFirst()->eraseFromParent();
   }
-  LLVM_DEBUG(verifyFunction(*F));
+#ifdef EXPENSIVE_CHECKS
+  // If we could guarantee that this call is not extremely slow, we could
+  // remove the ifdef limitation (see PR47712).
+  assert(!verifyFunction(*F, %dbgs()));
+#endif
 }
 
 void BoUpSLP::eraseInstructions(ArrayRef AV) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3f09c77 - [SLP] fix typo in assert

2021-01-10 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-10T13:15:04-05:00
New Revision: 3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d

URL: 
https://github.com/llvm/llvm-project/commit/3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d
DIFF: 
https://github.com/llvm/llvm-project/commit/3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d.diff

LOG: [SLP] fix typo in assert

This snuck into 0aa75fb12faa , but I didn't catch it locally.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d0b6b432e93e..5b91495bd844 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2502,7 +2502,7 @@ BoUpSLP::~BoUpSLP() {
 #ifdef EXPENSIVE_CHECKS
   // If we could guarantee that this call is not extremely slow, we could
   // remove the ifdef limitation (see PR47712).
-  assert(!verifyFunction(*F, %dbgs()));
+  assert(!verifyFunction(*F, &dbgs()));
 #endif
 }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 288f3fc - [InstCombine] reduce icmp(ashr X, C1), C2 to sign-bit test

2021-01-11 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2021-01-11T15:53:39-05:00
New Revision: 288f3fc5dfee0c51fc00fe10a985f93c505073eb

URL: 
https://github.com/llvm/llvm-project/commit/288f3fc5dfee0c51fc00fe10a985f93c505073eb
DIFF: 
https://github.com/llvm/llvm-project/commit/288f3fc5dfee0c51fc00fe10a985f93c505073eb.diff

LOG: [InstCombine] reduce icmp(ashr X, C1), C2 to sign-bit test

This is a more basic pattern that we should handle before trying to solve:
https://llvm.org/PR48640

There might be a better way to think about this because the pre-condition
that I came up with (number of sign bits in the compare constant) misses a
potential transform for each of ugt and ult as commented on in the test file.

Tried to model this is in Alive:
https://rise4fun.com/Alive/juX1
...but I couldn't get the ComputeNumSignBits() pre-condition to work as
expected, so replaced with leading 0/1 preconditions instead.

  Name: ugt
  Pre: countLeadingZeros(C2) <= C1 && countLeadingOnes(C2) <= C1
  %a = ashr %x, C1
  %r = icmp ugt i8 %a, C2
=>
  %r = icmp slt i8 %x, 0

  Name: ult
  Pre: countLeadingZeros(C2) <= C1 && countLeadingOnes(C2) <= C1
  %a = ashr %x, C1
  %r = icmp ult i4 %a, C2
=>
  %r = icmp sgt i4 %x, -1

Also approximated in Alive2:
https://alive2.llvm.org/ce/z/u5hCcz
https://alive2.llvm.org/ce/z/__szVL

Differential Revision: https://reviews.llvm.org/D94014

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/icmp-shr.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 852def699716..9b3cfb3bd754 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2210,6 +2210,21 @@ Instruction 
*InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
   (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
 return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
 }
+
+// If the compare constant has significant bits above the lowest sign-bit,
+// then convert an unsigned cmp to a test of the sign-bit:
+// (ashr X, ShiftC) u> C --> X s< 0
+// (ashr X, ShiftC) u< C --> X s> -1
+if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) {
+  if (Pred == CmpInst::ICMP_UGT) {
+return new ICmpInst(CmpInst::ICMP_SLT, X,
+ConstantInt::getNullValue(ShrTy));
+  }
+  if (Pred == CmpInst::ICMP_ULT) {
+return new ICmpInst(CmpInst::ICMP_SGT, X,
+ConstantInt::getAllOnesValue(ShrTy));
+  }
+}
   } else {
 if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
   // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)

diff  --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll 
b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 22f61d2d5e6a..ad3eb713aa19 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -507,6 +507,10 @@ define <2 x i1> @exact_eq0_multiuse(<2 x i32> %x, <2 x 
i32> %y) {
   ret <2 x i1> %cmp
 }
 
+; Verify conversions of ashr+icmp to a sign-bit test.
+
+; negative test, but 
diff erent transform possible
+
 define i1 @ashr_ugt_0(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_0(
 ; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[X:%.*]], 1
@@ -517,6 +521,8 @@ define i1 @ashr_ugt_0(i4 %x) {
   ret i1 %r
 }
 
+; negative test
+
 define i1 @ashr_ugt_1(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_1(
 ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
@@ -528,6 +534,8 @@ define i1 @ashr_ugt_1(i4 %x) {
   ret i1 %r
 }
 
+; negative test
+
 define i1 @ashr_ugt_2(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_2(
 ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
@@ -539,6 +547,9 @@ define i1 @ashr_ugt_2(i4 %x) {
   ret i1 %r
 }
 
+; negative test
+; TODO: This is a sign-bit test, but we don't recognize the pattern.
+
 define i1 @ashr_ugt_3(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_3(
 ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
@@ -552,8 +563,7 @@ define i1 @ashr_ugt_3(i4 %x) {
 
 define i1 @ashr_ugt_4(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_4(
-; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[S]], 4
+; CHECK-NEXT:[[R:%.*]] = icmp slt i4 [[X:%.*]], 0
 ; CHECK-NEXT:ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1
@@ -563,8 +573,7 @@ define i1 @ashr_ugt_4(i4 %x) {
 
 define i1 @ashr_ugt_5(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_5(
-; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[S]], 5
+; CHECK-NEXT:[[R:%.*]] = icmp slt i4 [[X:%.*]], 0
 ; CHECK-NEXT:ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1
@@ -574,8 +583,7 @@ define i1 @ashr_ugt_5(i4 %x) {
 
 define i1 @ashr_ugt_6(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_6(
-; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEX

[llvm-branch-commits] [llvm] 9c1765a - [VectorCombine] add test for load with offset; NFC

2020-12-14 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-14T14:40:06-05:00
New Revision: 9c1765acabf10b7df7cf49456a06bbba2b33b364

URL: 
https://github.com/llvm/llvm-project/commit/9c1765acabf10b7df7cf49456a06bbba2b33b364
DIFF: 
https://github.com/llvm/llvm-project/commit/9c1765acabf10b7df7cf49456a06bbba2b33b364.diff

LOG: [VectorCombine] add test for load with offset; NFC

Added: 


Modified: 
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index 824a507ed103..ba2bf3f37d7b 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -535,3 +535,20 @@ define <8 x i32> 
@load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16
   %r = insertelement <8 x i32> undef, i32 %s, i32 0
   ret <8 x i32> %r
 }
+
+; TODO: Can't safely load the offset vector, but can load+shuffle if it is 
profitable.
+
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 16 
dereferenceable(16) %p) {
+; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* 
[[P:%.*]], i64 1
+; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 2
+; CHECK-NEXT:[[S:%.*]] = extractelement <2 x i16> [[L]], i32 0
+; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1
+  %l = load <2 x i16>, <2 x i16>* %gep, align 2
+  %s = extractelement <2 x i16> %l, i32 0
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] d399f87 - [VectorCombine] make load transform poison-safe

2020-12-14 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-14T17:42:01-05:00
New Revision: d399f870b5a94b9dcc1817ed69fec88c325bb817

URL: 
https://github.com/llvm/llvm-project/commit/d399f870b5a94b9dcc1817ed69fec88c325bb817
DIFF: 
https://github.com/llvm/llvm-project/commit/d399f870b5a94b9dcc1817ed69fec88c325bb817.diff

LOG: [VectorCombine] make load transform poison-safe

As noted in D93229, the transform from scalar load to vector load
potentially leaks poison from the extra vector elements that are
being loaded.

We could use freeze here (and x86 codegen at least appears to be
the same either way), but we already have a shuffle in this logic
to optionally change the vector size, so let's allow that
instruction to serve both purposes.

Differential Revision: https://reviews.llvm.org/D93238

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 19f5a2b432f7a..89b60045ce910 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -161,15 +161,17 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
   Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
 
-  // If the insert type does not match the target's minimum vector type,
-  // use an identity shuffle to shrink/grow the vector.
-  if (Ty != MinVecTy) {
-unsigned OutputNumElts = Ty->getNumElements();
-SmallVector Mask(OutputNumElts, UndefMaskElem);
-for (unsigned i = 0; i < OutputNumElts && i < MinVecNumElts; ++i)
-  Mask[i] = i;
-VecLd = Builder.CreateShuffleVector(VecLd, Mask);
-  }
+  // Set everything but element 0 to undef to prevent poison from propagating
+  // from the extra loaded memory. This will also optionally shrink/grow the
+  // vector from the loaded size to the output size.
+  // We assume this operation has no cost in codegen.
+  // Note that we could use freeze to avoid poison problems, but then we might
+  // still need a shuffle to change the vector size.
+  unsigned OutputNumElts = Ty->getNumElements();
+  SmallVector Mask(OutputNumElts, UndefMaskElem);
+  Mask[0] = 0;
+  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
+
   replaceValue(I, *VecLd);
   ++NumVecLoad;
   return true;

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index ba2bf3f37d7b6..03902c48157fe 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -175,7 +175,8 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* 
align 32 dereferenceable
 define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) 
%p) {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
   %s = load float, float* %p, align 4
@@ -185,7 +186,8 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 
dereferenceable(16) %p
 
 define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @casted_load_f32_insert_v4f32(
-; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4
+; CHECK-NEXT:[[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], 
align 4
+; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
   %b = bitcast <4 x float>* %p to float*
@@ -199,7 +201,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x 
float>* align 4 dereferenc
 define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT:[[R:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x i32> [[R]]
 ;
   %s = load i32, i32* %p, align 4
@@ -212,7 +215,8 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 
dereferenceable(16) %p) {
 define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @casted_load_i32_insert_v4i32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>*
-; CHECK-NEX

[llvm-branch-commits] [llvm] 8593e19 - [VectorCombine] add alignment test for gep load; NFC

2020-12-14 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-14T18:31:19-05:00
New Revision: 8593e197bc837286abeb4dee50726b2391a77de9

URL: 
https://github.com/llvm/llvm-project/commit/8593e197bc837286abeb4dee50726b2391a77de9
DIFF: 
https://github.com/llvm/llvm-project/commit/8593e197bc837286abeb4dee50726b2391a77de9.diff

LOG: [VectorCombine] add alignment test for gep load; NFC

Added: 


Modified: 
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index 03902c48157f..d28d28761632 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -284,6 +284,21 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x 
i16>* align 16 derefere
   ret <8 x i16> %r
 }
 
+; TODO: Verify that alignment of the new load is not over-specified.
+
+define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 
2 dereferenceable(16) %p) {
+; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign(
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* 
[[P:%.*]], i64 0, i64 1
+; CHECK-NEXT:[[S:%.*]] = load i16, i16* [[GEP]], align 8
+; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1
+  %s = load i16, i16* %gep, align 8
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}
+
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
 define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 
dereferenceable(32) %p) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] aaaf0ec - [VectorCombine] loosen alignment constraint for load transform

2020-12-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-16T12:25:18-05:00
New Revision: aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9

URL: 
https://github.com/llvm/llvm-project/commit/aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9
DIFF: 
https://github.com/llvm/llvm-project/commit/aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9.diff

LOG: [VectorCombine] loosen alignment constraint for load transform

As discussed in D93229, we only need a minimal alignment constraint
when querying whether a hypothetical vector load is safe. We still
pass/use the potentially stronger alignment attribute when checking
costs and creating the new load.

There's already a test that changes with the minimum code change,
so splitting this off as a preliminary commit independent of any
gep/offset enhancements.

Differential Revision: https://reviews.llvm.org/D93397

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 89b60045ce91..086169c55c8d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -134,13 +134,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
 return false;
 
   // Check safety of replacing the scalar load with a larger vector load.
+  // We use minimal alignment (maximum flexibility) because we only care about
+  // the dereferenceable region. When calculating cost and creating a new op,
+  // we may use a larger value based on alignment attributes.
   unsigned MinVecNumElts = MinVectorSize / ScalarSize;
   auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
-  Align Alignment = Load->getAlign();
-  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT))
+  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
 return false;
 
   // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
+  Align Alignment = Load->getAlign();
   Type *LoadTy = Load->getType();
   int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
   APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index d28d28761632..f5a962dd7cfe 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -403,12 +403,14 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* 
align 16 dereferenceab
   ret <4 x float> %r
 }
 
-; Negative test? - pointer is not as aligned as load.
+; Pointer is not as aligned as load, but that's ok.
+; The new load uses the larger alignment value.
 
 define <4 x float> @load_f32_insert_v4f32_align(float* align 1 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_f32_insert_v4f32_align(
-; CHECK-NEXT:[[S:%.*]] = load float, float* [[P:%.*]], align 4
-; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
   %s = load float, float* %p, align 4



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 38ebc1a - [VectorCombine] optimize alignment for load transform

2020-12-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-16T15:25:45-05:00
New Revision: 38ebc1a13dc8ce41917d66918b319d793dc2fb02

URL: 
https://github.com/llvm/llvm-project/commit/38ebc1a13dc8ce41917d66918b319d793dc2fb02
DIFF: 
https://github.com/llvm/llvm-project/commit/38ebc1a13dc8ce41917d66918b319d793dc2fb02.diff

LOG: [VectorCombine] optimize alignment for load transform

Here's another minimal step suggested by D93229 / D93397 .
(I'm trying to be extra careful in these changes because
load transforms are easy to get wrong.)

We can optimistically choose the greater alignment of a
load and its pointer operand. As the test diffs show, this
can improve what would have been unaligned vector loads
into aligned loads.

When we enhance with gep offsets, we will need to adjust
the alignment calculation to include that offset.

Differential Revision: https://reviews.llvm.org/D93406

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 086169c55c8d..8e341619dcf4 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -143,7 +143,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
 return false;
 
   // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
-  Align Alignment = Load->getAlign();
+  // Use the greater of the alignment on the load or its source pointer.
+  Align Alignment = std::max(SrcPtr->getPointerAlignment(DL), 
Load->getAlign());
   Type *LoadTy = Load->getType();
   int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
   APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index f5a962dd7cfe..e8ba175b0235 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -175,7 +175,7 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* 
align 32 dereferenceable
 define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) 
%p) {
 ; CHECK-LABEL: @load_f32_insert_v4f32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
16
 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
@@ -201,7 +201,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x 
float>* align 4 dereferenc
 define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_i32_insert_v4i32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> 
undef, <4 x i32> 
 ; CHECK-NEXT:ret <4 x i32> [[R]]
 ;
@@ -434,7 +434,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* 
align 4 dereferenceable(1
 define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_i32_insert_v8i32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
-; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> 
undef, <8 x i32> 
 ; CHECK-NEXT:ret <8 x i32> [[R]]
 ;
@@ -458,7 +458,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* 
align 4 dereferenceabl
 define <16 x float> @load_f32_insert_v16f32(float* align 16 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_f32_insert_v16f32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
16
 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> 
undef, <16 x i32> 
 ; CHECK-NEXT:ret <16 x float> [[R]]
 ;
@@ -470,7 +470,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 
dereferenceable(16)
 define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) 
%p) {
 ; CHECK-LABEL: @load_f32_insert_v2f32(
 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
-; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 
4
+; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>

[llvm-branch-commits] [llvm] 46c331b - [VectorCombine] adjust test alignments for better coverage; NFC

2020-12-16 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-16T16:30:45-05:00
New Revision: 46c331bf26d169326b52079578178ab91e3546c0

URL: 
https://github.com/llvm/llvm-project/commit/46c331bf26d169326b52079578178ab91e3546c0
DIFF: 
https://github.com/llvm/llvm-project/commit/46c331bf26d169326b52079578178ab91e3546c0.diff

LOG: [VectorCombine] adjust test alignments for better coverage; NFC

Added: 


Modified: 
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index e8ba175b0235..dee6c5eced91 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -565,16 +565,16 @@ define <8 x i32> 
@load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16
 
 ; TODO: Can't safely load the offset vector, but can load+shuffle if it is 
profitable.
 
-define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 16 
dereferenceable(16) %p) {
+define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16(
 ; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* 
[[P:%.*]], i64 1
-; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 2
+; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 8
 ; CHECK-NEXT:[[S:%.*]] = extractelement <2 x i16> [[L]], i32 0
 ; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
 ; CHECK-NEXT:ret <8 x i16> [[R]]
 ;
   %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1
-  %l = load <2 x i16>, <2 x i16>* %gep, align 2
+  %l = load <2 x i16>, <2 x i16>* %gep, align 8
   %s = extractelement <2 x i16> %l, i32 0
   %r = insertelement <8 x i16> undef, i16 %s, i64 0
   ret <8 x i16> %r



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 71a1b9f - [VectorCombine] add tests for gep load with cast; NFC

2020-12-17 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-17T16:40:55-05:00
New Revision: 71a1b9fe76acfea8920e143c807c5cb8bf510254

URL: 
https://github.com/llvm/llvm-project/commit/71a1b9fe76acfea8920e143c807c5cb8bf510254
DIFF: 
https://github.com/llvm/llvm-project/commit/71a1b9fe76acfea8920e143c807c5cb8bf510254.diff

LOG: [VectorCombine] add tests for gep load with cast; NFC

Added: 


Modified: 
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index dee6c5eced91..6b4fe43a8a29 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -299,6 +299,51 @@ define <8 x i16> 
@gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2
   ret <8 x i16> %r
 }
 
+define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 
dereferenceable(16) %p) {
+; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32(
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* 
[[P:%.*]], i64 0, i64 1
+; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32*
+; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
+; CHECK-NEXT:ret <4 x i32> [[R]]
+;
+  %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 1
+  %b = bitcast i8* %gep to i32*
+  %s = load i32, i32* %b, align 1
+  %r = insertelement <4 x i32> undef, i32 %s, i64 0
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 
dereferenceable(20) %p) {
+; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32(
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* 
[[P:%.*]], i64 0, i64 12
+; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32*
+; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
+; CHECK-NEXT:ret <4 x i32> [[R]]
+;
+  %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12
+  %b = bitcast i8* %gep to i32*
+  %s = load i32, i32* %b, align 1
+  %r = insertelement <4 x i32> undef, i32 %s, i64 0
+  ret <4 x i32> %r
+}
+
+define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 
dereferenceable(20) %p) {
+; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32(
+; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* 
[[P:%.*]], i64 0, i64 13
+; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32*
+; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0
+; CHECK-NEXT:ret <4 x i32> [[R]]
+;
+  %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 13
+  %b = bitcast i8* %gep to i32*
+  %s = load i32, i32* %b, align 1
+  %r = insertelement <4 x i32> undef, i32 %s, i64 0
+  ret <4 x i32> %r
+}
+
 ; If there are enough dereferenceable bytes, we can offset the vector load.
 
 define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 
dereferenceable(32) %p) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 47aaa99 - [VectorCombine] allow peeking through GEPs when creating a vector load

2020-12-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-18T09:25:03-05:00
New Revision: 47aaa99c0e1e28573bf24d95c5540005ee734531

URL: 
https://github.com/llvm/llvm-project/commit/47aaa99c0e1e28573bf24d95c5540005ee734531
DIFF: 
https://github.com/llvm/llvm-project/commit/47aaa99c0e1e28573bf24d95c5540005ee734531.diff

LOG: [VectorCombine] allow peeking through GEPs when creating a vector load

This is an enhancement motivated by https://llvm.org/PR16739
(see D92858 for another).

We can look through a GEP to find a base pointer that may be
safe to use for a vector load. If so, then we shuffle (shift)
the necessary vector element over to index 0.

Alive2 proof based on 1 of the regression tests:
https://alive2.llvm.org/ce/z/yPJLkh

The vector translation is independent of endian (verify by
changing to leading 'E' in the datalayout string).

Differential Revision: https://reviews.llvm.org/D93229

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 8e341619dcf4..a865f88cba74 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -93,6 +93,7 @@ static void replaceValue(Value &Old, Value &New) {
 
 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // Match insert into fixed vector of scalar value.
+  // TODO: Handle non-zero insert index.
   auto *Ty = dyn_cast(I.getType());
   Value *Scalar;
   if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) 
||
@@ -115,7 +116,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   mustSuppressSpeculation(*Load))
 return false;
 
-  // TODO: Extend this to match GEP with constant offsets.
   const DataLayout &DL = I.getModule()->getDataLayout();
   Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
   assert(isa(SrcPtr->getType()) && "Expected a pointer type");
@@ -127,10 +127,13 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   if (AS != SrcPtr->getType()->getPointerAddressSpace())
 SrcPtr = Load->getPointerOperand();
 
+  // We are potentially transforming byte-sized (8-bit) memory accesses, so 
make
+  // sure we have all of our type-based constraints in place for this target.
   Type *ScalarTy = Scalar->getType();
   uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
   unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
-  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0)
+  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
+  ScalarSize % 8 != 0)
 return false;
 
   // Check safety of replacing the scalar load with a larger vector load.
@@ -139,12 +142,45 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // we may use a larger value based on alignment attributes.
   unsigned MinVecNumElts = MinVectorSize / ScalarSize;
   auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
-  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
-return false;
+  unsigned OffsetEltIndex = 0;
+  Align Alignment = Load->getAlign();
+  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) 
{
+// It is not safe to load directly from the pointer, but we can still peek
+// through gep offsets and check if it safe to load from a base address 
with
+// updated alignment. If it is, we can shuffle the element(s) into place
+// after loading.
+unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
+APInt Offset(OffsetBitWidth, 0);
+SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
+
+// We want to shuffle the result down from a high element of a vector, so
+// the offset must be positive.
+if (Offset.isNegative())
+  return false;
+
+// The offset must be a multiple of the scalar element to shuffle cleanly
+// in the element's size.
+uint64_t ScalarSizeInBytes = ScalarSize / 8;
+if (Offset.urem(ScalarSizeInBytes) != 0)
+  return false;
+
+// If we load MinVecNumElts, will our target element still be loaded?
+OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
+if (OffsetEltIndex >= MinVecNumElts)
+  return false;
+
+if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, 
&DT))
+  return false;
+
+// Update alignment with offset value. Note that the offset could be 
negated
+// to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
+// negation does not change the result of the alignment calculation.
+Alignment = commonAlignment(Alignment, Offset.getZExtValue());
+  }
 
   // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
   // Use the grea

[llvm-branch-commits] [llvm] 37d0dda - [SLP] fix typo; NFC

2020-12-18 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-18T16:55:52-05:00
New Revision: 37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf

URL: 
https://github.com/llvm/llvm-project/commit/37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf
DIFF: 
https://github.com/llvm/llvm-project/commit/37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf.diff

LOG: [SLP] fix typo; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 9ab89e091596..80d510185470 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6911,12 +6911,12 @@ class HorizontalReduction {
 ReductionData.initReductionOps(ReductionOps);
 while (!Stack.empty()) {
   Instruction *TreeN = Stack.back().first;
-  unsigned EdgeToVist = Stack.back().second++;
+  unsigned EdgeToVisit = Stack.back().second++;
   OperationData OpData = getOperationData(TreeN);
   bool IsReducedValue = OpData != ReductionData;
 
   // Postorder vist.
-  if (IsReducedValue || EdgeToVist == OpData.getNumberOfOperands()) {
+  if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) {
 if (IsReducedValue)
   ReducedVals.push_back(TreeN);
 else {
@@ -6942,7 +6942,7 @@ class HorizontalReduction {
   }
 
   // Visit left or right.
-  Value *NextV = TreeN->getOperand(EdgeToVist);
+  Value *NextV = TreeN->getOperand(EdgeToVisit);
   if (NextV != Phi) {
 auto *I = dyn_cast(NextV);
 OpData = getOperationData(I);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] d611875 - [InstSimplify] add tests for inverted logic operands; NFC

2020-12-21 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-21T08:51:42-05:00
New Revision: d6118759f30e343a05aab053f66e5049ea149175

URL: 
https://github.com/llvm/llvm-project/commit/d6118759f30e343a05aab053f66e5049ea149175
DIFF: 
https://github.com/llvm/llvm-project/commit/d6118759f30e343a05aab053f66e5049ea149175.diff

LOG: [InstSimplify] add tests for inverted logic operands; NFC

Added: 


Modified: 
llvm/test/Transforms/InstSimplify/AndOrXor.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll 
b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
index 8952acc2feb6..9e549ebefc6b 100644
--- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
@@ -885,168 +885,286 @@ define i32 @reversed_not(i32 %a) {
 
 define i64 @shl_or_and1(i32 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and1(
-; CHECK-NEXT:[[TMP2:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT:ret i64 [[TMP2]]
+; CHECK-NEXT:[[T2:%.*]] = zext i1 [[B:%.*]] to i64
+; CHECK-NEXT:ret i64 [[T2]]
 ;
-  %tmp1 = zext i32 %a to i64
-  %tmp2 = zext i1 %b to i64
-  %tmp3 = shl nuw i64 %tmp1, 32
-  %tmp4 = or i64 %tmp2, %tmp3
-  %tmp5 = and i64 %tmp4, 1
-  ret i64 %tmp5
+  %t1 = zext i32 %a to i64
+  %t2 = zext i1 %b to i64
+  %t3 = shl nuw i64 %t1, 32
+  %t4 = or i64 %t2, %t3
+  %t5 = and i64 %t4, 1
+  ret i64 %t5
 }
 
 define i64 @shl_or_and2(i32 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and2(
-; CHECK-NEXT:[[TMP1:%.*]] = zext i1 [[B:%.*]] to i64
-; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32
-; CHECK-NEXT:ret i64 [[TMP3]]
+; CHECK-NEXT:[[T1:%.*]] = zext i1 [[B:%.*]] to i64
+; CHECK-NEXT:[[T3:%.*]] = shl nuw i64 [[T1]], 32
+; CHECK-NEXT:ret i64 [[T3]]
 ;
-  %tmp1 = zext i1 %b to i64
-  %tmp2 = zext i32 %a to i64
-  %tmp3 = shl nuw i64 %tmp1, 32
-  %tmp4 = or i64 %tmp2, %tmp3
-  %tmp5 = and i64 %tmp4, 4294967296
-  ret i64 %tmp5
+  %t1 = zext i1 %b to i64
+  %t2 = zext i32 %a to i64
+  %t3 = shl nuw i64 %t1, 32
+  %t4 = or i64 %t2, %t3
+  %t5 = and i64 %t4, 4294967296
+  ret i64 %t5
 }
 
 ; concatenate two 32-bit integers and extract lower 32-bit
 define i64 @shl_or_and3(i32 %a, i32 %b) {
 ; CHECK-LABEL: @shl_or_and3(
-; CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[B:%.*]] to i64
-; CHECK-NEXT:ret i64 [[TMP2]]
+; CHECK-NEXT:[[T2:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:ret i64 [[T2]]
 ;
-  %tmp1 = zext i32 %a to i64
-  %tmp2 = zext i32 %b to i64
-  %tmp3 = shl nuw i64 %tmp1, 32
-  %tmp4 = or i64 %tmp2, %tmp3
-  %tmp5 = and i64 %tmp4, 4294967295
-  ret i64 %tmp5
+  %t1 = zext i32 %a to i64
+  %t2 = zext i32 %b to i64
+  %t3 = shl nuw i64 %t1, 32
+  %t4 = or i64 %t2, %t3
+  %t5 = and i64 %t4, 4294967295
+  ret i64 %t5
 }
 
 ; concatenate two 16-bit integers and extract higher 16-bit
 define i32 @shl_or_and4(i16 %a, i16 %b) {
 ; CHECK-LABEL: @shl_or_and4(
-; CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:ret i32 [[TMP3]]
+; CHECK-NEXT:[[T1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT:[[T3:%.*]] = shl nuw i32 [[T1]], 16
+; CHECK-NEXT:ret i32 [[T3]]
 ;
-  %tmp1 = zext i16 %a to i32
-  %tmp2 = zext i16 %b to i32
-  %tmp3 = shl nuw i32 %tmp1, 16
-  %tmp4 = or i32 %tmp2, %tmp3
-  %tmp5 = and i32 %tmp4, 4294901760 ; mask with 0x
-  ret i32 %tmp5
+  %t1 = zext i16 %a to i32
+  %t2 = zext i16 %b to i32
+  %t3 = shl nuw i32 %t1, 16
+  %t4 = or i32 %t2, %t3
+  %t5 = and i32 %t4, 4294901760 ; mask with 0x
+  ret i32 %t5
 }
 
 define i128 @shl_or_and5(i64 %a, i1 %b) {
 ; CHECK-LABEL: @shl_or_and5(
-; CHECK-NEXT:[[TMP2:%.*]] = zext i1 [[B:%.*]] to i128
-; CHECK-NEXT:ret i128 [[TMP2]]
+; CHECK-NEXT:[[T2:%.*]] = zext i1 [[B:%.*]] to i128
+; CHECK-NEXT:ret i128 [[T2]]
 ;
-  %tmp1 = zext i64 %a to i128
-  %tmp2 = zext i1 %b to i128
-  %tmp3 = shl nuw i128 %tmp1, 64
-  %tmp4 = or i128 %tmp2, %tmp3
-  %tmp5 = and i128 %tmp4, 1
-  ret i128 %tmp5
+  %t1 = zext i64 %a to i128
+  %t2 = zext i1 %b to i128
+  %t3 = shl nuw i128 %t1, 64
+  %t4 = or i128 %t2, %t3
+  %t5 = and i128 %t4, 1
+  ret i128 %t5
 }
 
 ; A variation of above test cases; it fails due to the mask value
 define i32 @shl_or_and6(i16 %a, i16 %b) {
 ; CHECK-LABEL: @shl_or_and6(
-; CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[A:%.*]] to i32
-; CHECK-NEXT:[[TMP2:%.*]] = zext i16 [[B:%.*]] to i32
-; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16
-; CHECK-NEXT:[[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], -65535
-; CHECK-NEXT:ret i32 [[TMP5]]
+; CHECK-NEXT:[[T1:%.*]] = zext i16 [[A:%.*]] to i32
+; CHECK-NEXT:[[T2:%.*]] = zext i16 [[B:%.*]] to i32
+; CHECK-NEXT:[[T3:%.*]] = shl nuw i32 [[T1]], 16
+; CHECK-NEXT:[[T4:%.*]] = or i32 [[T2]], [[T3]]
+; CHECK-NEXT:[[T5:%.*]] = and i32 [[T4]], -65535
+; CHECK-NEXT:ret i32 [[T5]]
 ;
-  %tmp

[llvm-branch-commits] [llvm] 38ca7fa - [InstSimplify] reduce logic with inverted add/sub ops

2020-12-21 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-21T08:51:43-05:00
New Revision: 38ca7face67e8488d482b66a999d0a685806879f

URL: 
https://github.com/llvm/llvm-project/commit/38ca7face67e8488d482b66a999d0a685806879f
DIFF: 
https://github.com/llvm/llvm-project/commit/38ca7face67e8488d482b66a999d0a685806879f.diff

LOG: [InstSimplify] reduce logic with inverted add/sub ops

https://llvm.org/PR48559
This could be part of a larger ValueTracking API,
but I don't see that currently.

https://rise4fun.com/Alive/gR0

  Name: and
  Pre: C1 == ~C2
  %sub = add i8 %x, C1
  %sub1 = sub i8 C2, %x
  %r = and i8 %sub, %sub1
  =>
  %r = 0

  Name: or
  Pre: C1 == ~C2
  %sub = add i8 %x, C1
  %sub1 = sub i8 C2, %x
  %r = or i8 %sub, %sub1
  =>
  %r = -1

  Name: xor
  Pre: C1 == ~C2
  %sub = add i8 %x, C1
  %sub1 = sub i8 C2, %x
  %r = xor i8 %sub, %sub1
  =>
  %r = -1

Added: 


Modified: 
llvm/lib/Analysis/InstructionSimplify.cpp
llvm/test/Transforms/InstSimplify/AndOrXor.ll

Removed: 




diff  --git a/llvm/lib/Analysis/InstructionSimplify.cpp 
b/llvm/lib/Analysis/InstructionSimplify.cpp
index 55f3bc4f2923..27b73a5a8236 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1999,6 +1999,30 @@ static Value 
*omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0,
   return NotOp1;
 }
 
+/// Given a bitwise logic op, check if the operands are add/sub with a common
+/// source value and inverted constant (identity: C - X -> ~(X + ~C)).
+static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
+Instruction::BinaryOps Opcode) {
+  assert(Op0->getType() == Op1->getType() && "Mismatched binop types");
+  assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op");
+  Value *X;
+  Constant *C1, *C2;
+  if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) &&
+   match(Op1, m_Sub(m_Constant(C2), m_Specific(X ||
+  (match(Op1, m_Add(m_Value(X), m_Constant(C1))) &&
+   match(Op0, m_Sub(m_Constant(C2), m_Specific(X) {
+if (ConstantExpr::getNot(C1) == C2) {
+  // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0
+  // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1
+  // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1
+  Type *Ty = Op0->getType();
+  return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty)
+: ConstantInt::getAllOnesValue(Ty);
+}
+  }
+  return nullptr;
+}
+
 /// Given operands for an And, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2035,6 +2059,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, 
const SimplifyQuery &Q,
   if (match(Op1, m_c_Or(m_Specific(Op0), m_Value(
 return Op0;
 
+  if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
+return V;
+
   // A mask that only clears known zeros of a shifted value is a no-op.
   Value *X;
   const APInt *Mask;
@@ -2194,6 +2221,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, 
const SimplifyQuery &Q,
   if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value()
 return Constant::getAllOnesValue(Op0->getType());
 
+  if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
+return V;
+
   Value *A, *B;
   // (A & ~B) | (A ^ B) -> (A ^ B)
   // (~B & A) | (A ^ B) -> (A ^ B)
@@ -2323,6 +2353,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, 
const SimplifyQuery &Q,
   match(Op1, m_Not(m_Specific(Op0
 return Constant::getAllOnesValue(Op0->getType());
 
+  if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor))
+return V;
+
   // Try some generic simplifications for associative operations.
   if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
   MaxRecurse))

diff  --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll 
b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
index 9e549ebefc6b..e23262835c3c 100644
--- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
@@ -1053,10 +1053,7 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> 
%b) {
 
 define i8 @and_add_sub(i8 %x) {
 ; CHECK-LABEL: @and_add_sub(
-; CHECK-NEXT:[[A:%.*]] = add i8 [[X:%.*]], -1
-; CHECK-NEXT:[[S:%.*]] = sub i8 0, [[X]]
-; CHECK-NEXT:[[R:%.*]] = and i8 [[A]], [[S]]
-; CHECK-NEXT:ret i8 [[R]]
+; CHECK-NEXT:ret i8 0
 ;
   %a = add i8 %x, -1
   %s = sub i8 0, %x
@@ -1066,10 +1063,7 @@ define i8 @and_add_sub(i8 %x) {
 
 define <2 x i8> @and_sub_add(<2 x i8> %x) {
 ; CHECK-LABEL: @and_sub_add(
-; CHECK-NEXT:[[A:%.*]] = add <2 x i8> [[X:%.*]], 
-; CHECK-NEXT:[[S:%.*]] = sub <2 x i8> , [[X]]
-; CHECK-NEXT:[[R:%.*]] = and <2 x i8> [[S]], [[A]]
-; CHECK-NEXT:ret <2 x i8> [[R]]
+; CHECK-NEXT:ret <2 

[llvm-branch-commits] [llvm] 0d15d4b - [SLP] use operand index abstraction for number of operands

2020-12-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-22T16:05:39-05:00
New Revision: 0d15d4b6f43a3355c1d618766c8e550cfe1481d0

URL: 
https://github.com/llvm/llvm-project/commit/0d15d4b6f43a3355c1d618766c8e550cfe1481d0
DIFF: 
https://github.com/llvm/llvm-project/commit/0d15d4b6f43a3355c1d618766c8e550cfe1481d0.diff

LOG: [SLP] use operand index abstraction for number of operands

I think this is NFC currently, but the bug would be exposed
when we allow binary intrinsics (maxnum, etc) as candidates
for reductions.

The code in matchAssociativeReduction() is using
OperationData::getNumberOfOperands() when comparing whether
the "EdgeToVisit" iterator is in-bounds, so this code must
use the same (potentially offset) operand value to set
the "EdgeToVisit".

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b03fb203c6d7..baa8ce2638a0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6772,7 +6772,8 @@ class HorizontalReduction {
   // in this case.
   // Do not perform analysis of remaining operands of ParentStackElem.first
   // instruction, this whole instruction is an extra argument.
-  ParentStackElem.second = ParentStackElem.first->getNumOperands();
+  OperationData OpData = getOperationData(ParentStackElem.first);
+  ParentStackElem.second = OpData.getNumberOfOperands();
 } else {
   // We ran into something like:
   // ParentStackElem.first += ... + ExtraArg + ...



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f6929c0 - [SLP] add reduction tests for maxnum/minnum intrinsics; NFC

2020-12-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-22T16:05:39-05:00
New Revision: f6929c01952b3f144df620544ed937e801b9c945

URL: 
https://github.com/llvm/llvm-project/commit/f6929c01952b3f144df620544ed937e801b9c945
DIFF: 
https://github.com/llvm/llvm-project/commit/f6929c01952b3f144df620544ed937e801b9c945.diff

LOG: [SLP] add reduction tests for maxnum/minnum intrinsics; NFC

Added: 


Modified: 
llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll

Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
index e03f3f808a4f..23f2196b2425 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll
@@ -338,4 +338,151 @@ define void @fmaxnum_16f32() #0 {
   ret void
 }
 
+define float @reduction_v4f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_fast(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:[[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float 
[[T1]], float [[T0]])
+; CHECK-NEXT:[[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float 
[[T2]], float [[M1]])
+; CHECK-NEXT:[[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float 
[[T3]], float [[M2]])
+; CHECK-NEXT:ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v4f32_nnan(float* %p) {
+; CHECK-LABEL: @reduction_v4f32_nnan(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T1]], float [[T0]])
+; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T2]], float [[M1]])
+; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float 
[[T3]], float [[M2]])
+; CHECK-NEXT:ret float [[M3]]
+;
+  %g1 = getelementptr inbounds float, float* %p, i64 1
+  %g2 = getelementptr inbounds float, float* %p, i64 2
+  %g3 = getelementptr inbounds float, float* %p, i64 3
+  %t0 = load float, float* %p, align 4
+  %t1 = load float, float* %g1, align 4
+  %t2 = load float, float* %g2, align 4
+  %t3 = load float, float* %g3, align 4
+  %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0)
+  %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1)
+  %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2)
+  ret float %m3
+}
+
+define float @reduction_v8f32_fast(float* %p) {
+; CHECK-LABEL: @reduction_v8f32_fast(
+; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], 
i64 1
+; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
+; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
+; CHECK-NEXT:[[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4
+; CHECK-NEXT:[[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5
+; CHECK-NEXT:[[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6
+; CHECK-NEXT:[[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7
+; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4
+; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4
+; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4
+; CHECK-NEXT:[[T4:%.*]] = load float, float* [[G4]], align 4
+; CHECK-NEXT:[[T5:%.*]] = load float, float* [[G5]], align 4
+; CHECK-NEXT:[[T6:

[llvm-branch-commits] [llvm] badf0f2 - [SLP] rename reduction variables for readability; NFC

2020-12-26 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-26T11:20:25-05:00
New Revision: badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25

URL: 
https://github.com/llvm/llvm-project/commit/badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25
DIFF: 
https://github.com/llvm/llvm-project/commit/badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25.diff

LOG: [SLP] rename reduction variables for readability; NFC

I am hoping to extend the reduction matching code, and it is
hard to distinguish "ReductionData" from "ReducedValueData".
So extend the tree/root metaphor to include leaves.

Another problem is that the name "OperationData" does not
provide insight into its purpose. I'm not sure if we can alter
that underlying data structure to make the code clearer.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bba6ddc87afb..8a455f300e39 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6422,17 +6422,16 @@ namespace {
 
 /// Model horizontal reductions.
 ///
-/// A horizontal reduction is a tree of reduction operations (currently add and
-/// fadd) that has operations that can be put into a vector as its leaf.
-/// For example, this tree:
+/// A horizontal reduction is a tree of reduction instructions that has values
+/// that can be put into a vector as its leaves. For example:
 ///
 /// mul mul mul mul
 ///  \  /\  /
 ///   +   +
 ///\ /
 ///   +
-/// This tree has "mul" as its reduced values and "+" as its reduction
-/// operations. A reduction might be feeding into a store or a binary operation
+/// This tree has "mul" as its leaf values and "+" as its reduction
+/// instructions. A reduction can feed into a store or a binary operation
 /// feeding a phi.
 ///...
 ///\  /
@@ -6756,10 +6755,10 @@ class HorizontalReduction {
   WeakTrackingVH ReductionRoot;
 
   /// The operation data of the reduction operation.
-  OperationData ReductionData;
+  OperationData RdxTreeInst;
 
-  /// The operation data of the values we perform a reduction on.
-  OperationData ReducedValueData;
+  /// The operation data for the leaf values that we perform a reduction on.
+  OperationData RdxLeafVal;
 
   /// Should we model this reduction as a pairwise reduction tree or a tree 
that
   /// splits the vector in halves and adds those halves.
@@ -6875,24 +6874,24 @@ class HorizontalReduction {
 assert((!Phi || is_contained(Phi->operands(), B)) &&
"Thi phi needs to use the binary operator");
 
-ReductionData = getOperationData(B);
+RdxTreeInst = getOperationData(B);
 
 // We could have a initial reductions that is not an add.
 //  r *= v1 + v2 + v3 + v4
 // In such a case start looking for a tree rooted in the first '+'.
 if (Phi) {
-  if (ReductionData.getLHS(B) == Phi) {
+  if (RdxTreeInst.getLHS(B) == Phi) {
 Phi = nullptr;
-B = dyn_cast(ReductionData.getRHS(B));
-ReductionData = getOperationData(B);
-  } else if (ReductionData.getRHS(B) == Phi) {
+B = dyn_cast(RdxTreeInst.getRHS(B));
+RdxTreeInst = getOperationData(B);
+  } else if (RdxTreeInst.getRHS(B) == Phi) {
 Phi = nullptr;
-B = dyn_cast(ReductionData.getLHS(B));
-ReductionData = getOperationData(B);
+B = dyn_cast(RdxTreeInst.getLHS(B));
+RdxTreeInst = getOperationData(B);
   }
 }
 
-if (!ReductionData.isVectorizable(B))
+if (!RdxTreeInst.isVectorizable(B))
   return false;
 
 Type *Ty = B->getType();
@@ -6901,19 +6900,19 @@ class HorizontalReduction {
 if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy())
   return false;
 
-ReducedValueData.clear();
+RdxLeafVal.clear();
 ReductionRoot = B;
 
 // Post order traverse the reduction tree starting at B. We only handle 
true
 // trees containing only binary operators.
 SmallVector, 32> Stack;
-Stack.push_back(std::make_pair(B, ReductionData.getFirstOperandIndex()));
-ReductionData.initReductionOps(ReductionOps);
+Stack.push_back(std::make_pair(B, RdxTreeInst.getFirstOperandIndex()));
+RdxTreeInst.initReductionOps(ReductionOps);
 while (!Stack.empty()) {
   Instruction *TreeN = Stack.back().first;
   unsigned EdgeToVisit = Stack.back().second++;
   OperationData OpData = getOperationData(TreeN);
-  bool IsReducedValue = OpData != ReductionData;
+  bool IsReducedValue = OpData != RdxTreeInst;
 
   // Postorder vist.
   if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) {
@@ -6934,7 +6933,7 @@ class HorizontalReduction {
 markExtraArg(Stack[Stack.size() - 2], TreeN);
 ExtraArgs.erase(TreeN);
   } else
-ReductionData.addReductionOps(TreeN, ReductionOp

[llvm-branch-commits] [llvm] c4ca108 - [SLP] use switch to improve readability; NFC

2020-12-26 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-26T10:59:45-05:00
New Revision: c4ca108966926871a7e2bf362b1816be88a99162

URL: 
https://github.com/llvm/llvm-project/commit/c4ca108966926871a7e2bf362b1816be88a99162
DIFF: 
https://github.com/llvm/llvm-project/commit/c4ca108966926871a7e2bf362b1816be88a99162.diff

LOG: [SLP] use switch to improve readability; NFC

This will get more complicated when we handle intrinsics like maxnum.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f3a0baa00267..bba6ddc87afb 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6475,15 +6475,20 @@ class HorizontalReduction {
 
 /// Checks if the reduction operation can be vectorized.
 bool isVectorizable() const {
-  // We currently only support add/mul/logical && min/max reductions.
-  return ((Kind == RK_Arithmetic &&
-   (Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
-Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
-Opcode == Instruction::And || Opcode == Instruction::Or ||
-Opcode == Instruction::Xor)) ||
-  (Opcode == Instruction::ICmp &&
-   (Kind == RK_SMin || Kind == RK_SMax ||
-Kind == RK_UMin || Kind == RK_UMax)));
+  switch (Kind) {
+  case RK_Arithmetic:
+return Opcode == Instruction::Add || Opcode == Instruction::FAdd ||
+   Opcode == Instruction::Mul || Opcode == Instruction::FMul ||
+   Opcode == Instruction::And || Opcode == Instruction::Or ||
+   Opcode == Instruction::Xor;
+  case RK_SMin:
+  case RK_SMax:
+  case RK_UMin:
+  case RK_UMax:
+return Opcode == Instruction::ICmp;
+  default:
+return false;
+  }
 }
 
 /// Creates reduction operation with the current opcode.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] c5a4d80 - [ValueTracking][MemCpyOpt] avoid crash on inttoptr with vector pointer type (PR48075)

2020-11-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-22T12:54:18-05:00
New Revision: c5a4d80fd47cfdae1995df46d0c407f78d8666e8

URL: 
https://github.com/llvm/llvm-project/commit/c5a4d80fd47cfdae1995df46d0c407f78d8666e8
DIFF: 
https://github.com/llvm/llvm-project/commit/c5a4d80fd47cfdae1995df46d0c407f78d8666e8.diff

LOG: [ValueTracking][MemCpyOpt] avoid crash on inttoptr with vector pointer 
type (PR48075)

Added: 


Modified: 
llvm/lib/Analysis/ValueTracking.cpp
llvm/test/Transforms/MemCpyOpt/crash.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index bcf35111502e..90f8dff87472 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3610,12 +3610,13 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout 
&DL) {
 
   if (auto *CE = dyn_cast(C)) {
 if (CE->getOpcode() == Instruction::IntToPtr) {
-  auto PS = DL.getPointerSizeInBits(
-  cast(CE->getType())->getAddressSpace());
-  return isBytewiseValue(
-  ConstantExpr::getIntegerCast(CE->getOperand(0),
-   Type::getIntNTy(Ctx, PS), false),
-  DL);
+  if (auto *PtrTy = dyn_cast(CE->getType())) {
+unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
+return isBytewiseValue(
+ConstantExpr::getIntegerCast(CE->getOperand(0),
+ Type::getIntNTy(Ctx, BitWidth), 
false),
+DL);
+  }
 }
   }
 

diff  --git a/llvm/test/Transforms/MemCpyOpt/crash.ll 
b/llvm/test/Transforms/MemCpyOpt/crash.ll
index f70f10429f84..73635891c683 100644
--- a/llvm/test/Transforms/MemCpyOpt/crash.ll
+++ b/llvm/test/Transforms/MemCpyOpt/crash.ll
@@ -83,3 +83,16 @@ define void @test2(i32 %cmd) nounwind {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) 
nounwind
   ret void
 }
+
+; https://llvm.org/PR48075
+
+@g = external global i16, align 1
+
+define void @inttoptr_constexpr_crash(<1 x i16*>* %p) {
+; CHECK-LABEL: @inttoptr_constexpr_crash(
+; CHECK-NEXT:store <1 x i16*> inttoptr (<1 x i16> bitcast (<2 x i8>  to <1 x i16>) to <1 x 
i16*>), <1 x i16*>* [[P:%.*]], align 1
+; CHECK-NEXT:ret void
+;
+  store <1 x i16*> inttoptr (<1 x i16> bitcast (<2 x i8>  to <1 x i16>) to <1 x i16*>), <1 x i16*>* 
%p, align 1
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3a18f26 - [CostModel] add tests for FP maximum; NFC

2020-11-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-22T13:33:42-05:00
New Revision: 3a18f267236351873a4c7821735c70b0790e4919

URL: 
https://github.com/llvm/llvm-project/commit/3a18f267236351873a4c7821735c70b0790e4919
DIFF: 
https://github.com/llvm/llvm-project/commit/3a18f267236351873a4c7821735c70b0790e4919.diff

LOG: [CostModel] add tests for FP maximum; NFC

These min/max intrinsics are not handled in the basic
implementation and probably not handled in target-specific
overrides either.

Added: 


Modified: 
llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

Removed: 




diff  --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll 
b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index e472e0424d8a..805bd810e950 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -22,6 +22,9 @@ declare <16 x float> @llvm.log2.v16f32(<16 x float>)
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, 
metadata)
 declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, 
<16 x float>, metadata, metadata)
 
+declare float @llvm.maximum.f32(float, float)
+declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
+
 declare i32 @llvm.cttz.i32(i32, i1)
 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
 
@@ -141,6 +144,32 @@ define void @constrained_fadd(float %a, <16 x float> %va) {
   ret void
 }
 
+define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
+; THRU-LABEL: 'fmaximum'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 784 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
void
+;
+; LATE-LABEL: 'fmaximum'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+; SIZE-LABEL: 'fmaximum'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 784 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
+;
+; SIZE_LATE-LABEL: 'fmaximum'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%s = call float @llvm.maximum.f32(float %a, float %b)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 784 for instruction: 
%v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
ret void
+;
+  %s = call float @llvm.maximum.f32(float %a, float %b)
+  %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> 
%vb)
+  ret void
+}
+
 define void @cttz(i32 %a, <16 x i32> %va) {
 ; THRU-LABEL: 'cttz'
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call i32 @llvm.cttz.i32(i32 %a, i1 false)

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll 
b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 2e53c836676f..f7f0a24af363 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -25,6 +25,9 @@ declare <16 x float> @llvm.log2.v16f32(<16 x float>)
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, 
metadata)
 declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, 
<16 x float>, metadata, metadata)
 
+declare float @llvm.maximum.f32(float, float)
+declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
+
 declare i32 @llvm.cttz.i32(i32, i1)
 declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
 
@@ -172,6 +175,32 @@ define void @constrained_fadd(float %a, <16 x float> %va) {
   ret void
 }
 
+define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
+; THRU-LABEL: 'fmaximum'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
void
+

[llvm-branch-commits] [llvm] 2717252 - [CostModel] add basic handling for FP maximum/minimum intrinsics

2020-11-22 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-22T13:43:53-05:00
New Revision: 2717252c929be7b1f14c36dda9686a4aa8726de3

URL: 
https://github.com/llvm/llvm-project/commit/2717252c929be7b1f14c36dda9686a4aa8726de3
DIFF: 
https://github.com/llvm/llvm-project/commit/2717252c929be7b1f14c36dda9686a4aa8726de3.diff

LOG: [CostModel] add basic handling for FP maximum/minimum intrinsics

This might be a regression for some ARM targets, but that should
be changed in the target-specific overrides.

There is apparently still no default lowering for these nodes,
so I am assuming these intrinsics are not in common use.
X86, PowerPC, and RISC-V for example, just crash given the most
basic IR.

Added: 


Modified: 
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 91c426fb6730..fce025aa75f8 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1396,6 +1396,12 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::maxnum:
   ISDs.push_back(ISD::FMAXNUM);
   break;
+case Intrinsic::minimum:
+  ISDs.push_back(ISD::FMINIMUM);
+  break;
+case Intrinsic::maximum:
+  ISDs.push_back(ISD::FMAXIMUM);
+  break;
 case Intrinsic::copysign:
   ISDs.push_back(ISD::FCOPYSIGN);
   break;

diff  --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll 
b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 805bd810e950..2ed26243733b 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -146,8 +146,8 @@ define void @constrained_fadd(float %a, <16 x float> %va) {
 
 define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
 ; THRU-LABEL: 'fmaximum'
-; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
-; THRU-NEXT:  Cost Model: Found an estimated cost of 784 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 928 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
void
 ;
 ; LATE-LABEL: 'fmaximum'
@@ -161,8 +161,8 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, 
<16 x float> %vb) {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
 ;
 ; SIZE_LATE-LABEL: 'fmaximum'
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
%s = call float @llvm.maximum.f32(float %a, float %b)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 784 for instruction: 
%v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: 
%s = call float @llvm.maximum.f32(float %a, float %b)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 928 for instruction: 
%v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
ret void
 ;
   %s = call float @llvm.maximum.f32(float %a, float %b)

diff  --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll 
b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index f7f0a24af363..4d0dbe544fb5 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -177,8 +177,8 @@ define void @constrained_fadd(float %a, <16 x float> %va) {
 
 define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
 ; THRU-LABEL: 'fmaximum'
-; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
-; THRU-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %s = 
call float @llvm.maximum.f32(float %a, float %b)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 196 for instruction: %v = 
call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
void
 ;
 ; LATE-LABEL: 'fmaximum'
@@ -192,8 +192,8 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, 
<16 x floa

[llvm-branch-commits] [llvm] ab29f09 - [InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps

2020-11-23 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-23T16:50:09-05:00
New Revision: ab29f091eb64c8608ba943df604b218bcff41a26

URL: 
https://github.com/llvm/llvm-project/commit/ab29f091eb64c8608ba943df604b218bcff41a26
DIFF: 
https://github.com/llvm/llvm-project/commit/ab29f091eb64c8608ba943df604b218bcff41a26.diff

LOG: [InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps

This is a retry of 324a53205. I cautiously reverted that at 6aa3fc4
because the rules about gep math were not clear. Since then, we
have added this line to LangRef for gep inbounds:
"The successive addition of offsets (without adding the base address)
does not wrap the pointer index type in a signed sense (nsw)."

See D90708 and post-commit comments on the revert patch for more details.

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
llvm/test/Transforms/InstCombine/sub-gep.ll
llvm/test/Transforms/InstCombine/sub.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index b8431a5a4532..9a6a790aefaf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1678,11 +1678,12 @@ Value 
*InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
 I->getOpcode() == Instruction::Mul)
   I->setHasNoUnsignedWrap();
 
-  // If we had a constant expression GEP on the other side offsetting the
-  // pointer, subtract it from the offset we have.
+  // If we have a 2nd GEP of the same base pointer, subtract the offsets.
+  // If both GEPs are inbounds, then the subtract does not have signed 
overflow.
   if (GEP2) {
 Value *Offset = EmitGEPOffset(GEP2);
-Result = Builder.CreateSub(Result, Offset, "gep
diff ");
+Result = Builder.CreateSub(Result, Offset, "gep
diff ", /* NUW */ false,
+   GEP1->isInBounds() && GEP2->isInBounds());
   }
 
   // If we have p - gep(p, ...)  then we have to negate the result.

diff  --git a/llvm/test/Transforms/InstCombine/sub-gep.ll 
b/llvm/test/Transforms/InstCombine/sub-gep.ll
index 9868ed1cdf57..2389b70c3452 100644
--- a/llvm/test/Transforms/InstCombine/sub-gep.ll
+++ b/llvm/test/Transforms/InstCombine/sub-gep.ll
@@ -245,7 +245,7 @@ define i64 @test24b(i8* %P, i64 %A){
 define i64 @test25(i8* %P, i64 %A){
 ; CHECK-LABEL: @test25(
 ; CHECK-NEXT:[[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1
-; CHECK-NEXT:[[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84
+; CHECK-NEXT:[[GEPDIFF:%.*]] = add nsw i64 [[B_IDX]], -84
 ; CHECK-NEXT:ret i64 [[GEPDIFF]]
 ;
   %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A
@@ -260,7 +260,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
 ; CHECK-LABEL: @test25_as1(
 ; CHECK-NEXT:[[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16
 ; CHECK-NEXT:[[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1
-; CHECK-NEXT:[[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84
+; CHECK-NEXT:[[GEPDIFF:%.*]] = add nsw i16 [[B_IDX]], -84
 ; CHECK-NEXT:ret i16 [[GEPDIFF]]
 ;
   %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, 
i64 0, i64 %A
@@ -272,7 +272,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) {
 define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @test30(
 ; CHECK-NEXT:[[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2
-; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_IDX]], [[J:%.*]]
 ; CHECK-NEXT:ret i64 [[GEPDIFF]]
 ;
   %bit = bitcast i8* %foo to i32*
@@ -287,7 +287,7 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) {
 define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
 ; CHECK-LABEL: @test30_as1(
 ; CHECK-NEXT:[[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2
-; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]]
+; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i16 [[GEP1_IDX]], [[J:%.*]]
 ; CHECK-NEXT:ret i16 [[GEPDIFF]]
 ;
   %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
@@ -299,9 +299,11 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 
%j) {
   ret i16 %sub
 }
 
+; Inbounds translates to 'nsw' on sub
+
 define i64 @gep_
diff _both_inbounds(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @gep_
diff _both_inbounds(
-; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
+; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i64 [[I:%.*]], [[J:%.*]]
 ; CHECK-NEXT:ret i64 [[GEPDIFF]]
 ;
   %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i
@@ -312,6 +314,8 @@ define i64 @gep_
diff _both_inbounds(i8* %foo, i64 %i, i64 %j) {
   ret i64 %sub
 }
 
+; Negative test for 'nsw' - both geps must be inbounds
+
 define i64 @gep_
diff _first_inbounds(i8* %foo, i64 %i, i64 %j) {
 ; CHECK-LABEL: @gep_
diff _first_inbounds(
 ; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]]
@@ 

[llvm-branch-commits] [llvm] 2cebad7 - [IR] remove redundant code comments; NFC

2020-11-29 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-29T09:29:59-05:00
New Revision: 2cebad702cdff8c320c8afa748626e8cc1b3b2f3

URL: 
https://github.com/llvm/llvm-project/commit/2cebad702cdff8c320c8afa748626e8cc1b3b2f3
DIFF: 
https://github.com/llvm/llvm-project/commit/2cebad702cdff8c320c8afa748626e8cc1b3b2f3.diff

LOG: [IR] remove redundant code comments; NFC

As noted in D92247 (and independent of that patch):

http://llvm.org/docs/CodingStandards.html#doxygen-use-in-documentation-comments

"Don’t duplicate the documentation comment in the header file and in the
implementation file. Put the documentation comments for public APIs into
the header file."

Added: 


Modified: 
llvm/lib/IR/BasicBlock.cpp

Removed: 




diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 23a1184e1246..31666265b504 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -130,15 +130,11 @@ iplist::iterator 
BasicBlock::eraseFromParent() {
   return getParent()->getBasicBlockList().erase(getIterator());
 }
 
-/// Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right before MovePos.
 void BasicBlock::moveBefore(BasicBlock *MovePos) {
   MovePos->getParent()->getBasicBlockList().splice(
   MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator());
 }
 
-/// Unlink this basic block from its current function and
-/// insert it into the function that MovePos lives in, right after MovePos.
 void BasicBlock::moveAfter(BasicBlock *MovePos) {
   MovePos->getParent()->getBasicBlockList().splice(
   ++MovePos->getIterator(), getParent()->getBasicBlockList(),
@@ -265,8 +261,6 @@ void BasicBlock::dropAllReferences() {
 I.dropAllReferences();
 }
 
-/// If this basic block has a single predecessor block,
-/// return the block, otherwise return a null pointer.
 const BasicBlock *BasicBlock::getSinglePredecessor() const {
   const_pred_iterator PI = pred_begin(this), E = pred_end(this);
   if (PI == E) return nullptr; // No preds.
@@ -275,11 +269,6 @@ const BasicBlock *BasicBlock::getSinglePredecessor() const 
{
   return (PI == E) ? ThePred : nullptr /*multiple preds*/;
 }
 
-/// If this basic block has a unique predecessor block,
-/// return the block, otherwise return a null pointer.
-/// Note that unique predecessor doesn't mean single edge, there can be
-/// multiple edges from the unique predecessor to this block (for example
-/// a switch statement with multiple cases having the same destination).
 const BasicBlock *BasicBlock::getUniquePredecessor() const {
   const_pred_iterator PI = pred_begin(this), E = pred_end(this);
   if (PI == E) return nullptr; // No preds.
@@ -329,12 +318,6 @@ iterator_range 
BasicBlock::phis() {
   return make_range(P, nullptr);
 }
 
-/// Update PHI nodes in this BasicBlock before removal of predecessor \p Pred.
-/// Note that this function does not actually remove the predecessor.
-///
-/// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with
-/// zero or one incoming values, and don't simplify PHIs with all incoming
-/// values the same.
 void BasicBlock::removePredecessor(BasicBlock *Pred,
bool KeepOneInputPHIs) {
   // Use hasNUsesOrMore to bound the cost of this assertion for complex CFGs.
@@ -389,17 +372,6 @@ bool BasicBlock::isLegalToHoistInto() const {
   return !Term->isExceptionalTerminator();
 }
 
-/// This splits a basic block into two at the specified
-/// instruction.  Note that all instructions BEFORE the specified iterator stay
-/// as part of the original basic block, an unconditional branch is added to
-/// the new BB, and the rest of the instructions in the BB are moved to the new
-/// BB, including the old terminator.  This invalidates the iterator.
-///
-/// Note that this only works on well formed basic blocks (must have a
-/// terminator), and 'I' must not be the end of instruction list (which would
-/// cause a degenerate basic block to be formed, having a terminator inside of
-/// the basic block).
-///
 BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
   assert(I != InstList.end() &&
@@ -454,13 +426,10 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock 
*New) {
   this->replaceSuccessorsPhiUsesWith(this, New);
 }
 
-/// Return true if this basic block is a landing pad. I.e., it's
-/// the destination of the 'unwind' edge of an invoke instruction.
 bool BasicBlock::isLandingPad() const {
   return isa(getFirstNonPHI());
 }
 
-/// Return the landingpad instruction associated with the landing pad.
 const LandingPadInst *BasicBlock::getLandingPadInst() const {
   return dyn_cast(getFirstNonPHI());
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lis

[llvm-branch-commits] [llvm] ce134da - [IR] simplify code in removePredecessor(); NFCI

2020-11-29 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-29T09:55:04-05:00
New Revision: ce134da4b18c27bbeba4e32f5813b1a3b043066e

URL: 
https://github.com/llvm/llvm-project/commit/ce134da4b18c27bbeba4e32f5813b1a3b043066e
DIFF: 
https://github.com/llvm/llvm-project/commit/ce134da4b18c27bbeba4e32f5813b1a3b043066e.diff

LOG: [IR] simplify code in removePredecessor(); NFCI

As suggested in D92247 (and independent of whatever we decide to do there),
this code is confusing as-is. Hopefully, this is at least mildly better.

We might be able to do better still, but we have a function called
"removePredecessor" with this behavior:
"Note that this function does not actually remove the predecessor." (!)

Added: 


Modified: 
llvm/lib/IR/BasicBlock.cpp

Removed: 




diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 31666265b504..3268641ddf19 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -327,21 +327,19 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
   // Return early if there are no PHI nodes to update.
   if (!isa(begin()))
 return;
-  unsigned NumPreds = cast(front()).getNumIncomingValues();
 
-  // Update all PHI nodes.
-  for (iterator II = begin(); isa(II);) {
-PHINode *PN = cast(II++);
-PN->removeIncomingValue(Pred, !KeepOneInputPHIs);
-if (!KeepOneInputPHIs) {
-  // If we have a single predecessor, removeIncomingValue erased the PHI
-  // node itself.
-  if (NumPreds > 1) {
-if (Value *PNV = PN->hasConstantValue()) {
-  // Replace the PHI node with its constant value.
-  PN->replaceAllUsesWith(PNV);
-  PN->eraseFromParent();
-}
+  unsigned NumPreds = cast(front()).getNumIncomingValues();
+  for (PHINode &Phi : make_early_inc_range(phis())) {
+Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
+if (KeepOneInputPHIs)
+  continue;
+// If we have a single predecessor, removeIncomingValue erased the PHI
+// node itself.
+// Try to replace the PHI node with a constant value.
+if (NumPreds > 1) {
+  if (Value *PhiConstant = Phi.hasConstantValue()) {
+Phi.replaceAllUsesWith(PhiConstant);
+Phi.eraseFromParent();
   }
 }
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] bfd2c21 - [IR][LoopRotate] avoid leaving phi with no operands (PR48296)

2020-11-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-30T09:28:45-05:00
New Revision: bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb

URL: 
https://github.com/llvm/llvm-project/commit/bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb
DIFF: 
https://github.com/llvm/llvm-project/commit/bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb.diff

LOG: [IR][LoopRotate] avoid leaving phi with no operands (PR48296)

https://llvm.org/PR48296 shows an example where we delete all of the operands
of a phi without actually deleting the phi, and that is currently considered
invalid IR. The reduced test included here would crash for that reason.

A suggested follow-up is to loosen the assert to allow 0-operand phis
in unreachable blocks.

Differential Revision: https://reviews.llvm.org/D92247

Added: 
llvm/test/Transforms/LoopRotate/phi-empty.ll

Modified: 
llvm/include/llvm/IR/BasicBlock.h
llvm/lib/IR/BasicBlock.cpp

Removed: 




diff  --git a/llvm/include/llvm/IR/BasicBlock.h 
b/llvm/include/llvm/IR/BasicBlock.h
index 26cfdd9e51d6..149b0a26c1f3 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -387,9 +387,9 @@ class BasicBlock final : public Value, // Basic blocks are 
data objects also
   /// Update PHI nodes in this BasicBlock before removal of predecessor \p 
Pred.
   /// Note that this function does not actually remove the predecessor.
   ///
-  /// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with
-  /// zero or one incoming values, and don't simplify PHIs with all incoming
-  /// values the same.
+  /// If \p KeepOneInputPHIs is true, then don't remove PHIs that are left with
+  /// one incoming value and don't simplify PHIs with all incoming values the
+  /// same.
   void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false);
 
   bool canSplitPredecessors() const;

diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 3268641ddf19..aee769aa0fea 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -330,7 +330,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 
   unsigned NumPreds = cast(front()).getNumIncomingValues();
   for (PHINode &Phi : make_early_inc_range(phis())) {
-Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
+Phi.removeIncomingValue(Pred);
 if (KeepOneInputPHIs)
   continue;
 // If we have a single predecessor, removeIncomingValue erased the PHI

diff  --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll 
b/llvm/test/Transforms/LoopRotate/phi-empty.ll
new file mode 100644
index ..e246cff91b62
--- /dev/null
+++ b/llvm/test/Transforms/LoopRotate/phi-empty.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s
+
+define void @PR48296(i1 %cond) {
+; CHECK-LABEL: @PR48296(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[LOOP:%.*]]
+; CHECK:   loop:
+; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label 
[[LOOP_BACKEDGE:%.*]]
+; CHECK:   loop.backedge:
+; CHECK-NEXT:br label [[LOOP]]
+; CHECK:   dead:
+; CHECK-NEXT:unreachable
+; CHECK:   inc:
+; CHECK-NEXT:br label [[LOOP_BACKEDGE]]
+; CHECK:   return:
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %loop
+
+loop:
+  br i1 %cond, label %inc, label %loop
+
+dead:; No predecessors!
+  br i1 %cond, label %inc, label %return
+
+inc:
+  br label %loop
+
+return:
+  %r = phi i32 [ undef, %dead ]
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 355aee3 - Revert "[IR][LoopRotate] avoid leaving phi with no operands (PR48296)"

2020-11-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-30T10:15:42-05:00
New Revision: 355aee3dcd441461a6da6e56c43dc1bd81c79f31

URL: 
https://github.com/llvm/llvm-project/commit/355aee3dcd441461a6da6e56c43dc1bd81c79f31
DIFF: 
https://github.com/llvm/llvm-project/commit/355aee3dcd441461a6da6e56c43dc1bd81c79f31.diff

LOG: Revert "[IR][LoopRotate] avoid leaving phi with no operands (PR48296)"

This reverts commit bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb.
This appears to be causing stage2 msan failures on buildbots:
  FAIL: LLVM :: Transforms/SimplifyCFG/X86/bug-25299.ll (65872 of 71835)
   TEST 'LLVM :: Transforms/SimplifyCFG/X86/bug-25299.ll' 
FAILED 
  Script:
  --
  : 'RUN: at line 1';   
/b/sanitizer-x86_64-linux-fast/build/llvm_build_msan/bin/opt < 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/Transforms/SimplifyCFG/X86/bug-25299.ll
 -simplifycfg -S | 
/b/sanitizer-x86_64-linux-fast/build/llvm_build_msan/bin/FileCheck 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/Transforms/SimplifyCFG/X86/bug-25299.ll
  --
  Exit Code: 2
  Command Output (stderr):
  --
  ==87374==WARNING: MemorySanitizer: use-of-uninitialized-value
  #0 0x9de47b6 in getBasicBlockIndex 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/IR/Instructions.h:2749:5
  #1 0x9de47b6 in simplifyCommonResume 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:4112:23
  #2 0x9de47b6 in simplifyResume 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:4039:12
  #3 0x9de47b6 in (anonymous 
namespace)::SimplifyCFGOpt::simplifyOnce(llvm::BasicBlock*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6330:16
  #4 0x9dcca13 in run 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6358:16
  #5 0x9dcca13 in llvm::simplifyCFG(llvm::BasicBlock*, 
llvm::TargetTransformInfo const&, llvm::SimplifyCFGOptions const&, 
llvm::SmallPtrSetImpl*) 
/b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6369:8
  #6 0x974643d in iterativelySimplifyCFG(

Added: 


Modified: 
llvm/include/llvm/IR/BasicBlock.h
llvm/lib/IR/BasicBlock.cpp

Removed: 
llvm/test/Transforms/LoopRotate/phi-empty.ll



diff  --git a/llvm/include/llvm/IR/BasicBlock.h 
b/llvm/include/llvm/IR/BasicBlock.h
index 149b0a26c1f3..26cfdd9e51d6 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -387,9 +387,9 @@ class BasicBlock final : public Value, // Basic blocks are 
data objects also
   /// Update PHI nodes in this BasicBlock before removal of predecessor \p 
Pred.
   /// Note that this function does not actually remove the predecessor.
   ///
-  /// If \p KeepOneInputPHIs is true, then don't remove PHIs that are left with
-  /// one incoming value and don't simplify PHIs with all incoming values the
-  /// same.
+  /// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with
+  /// zero or one incoming values, and don't simplify PHIs with all incoming
+  /// values the same.
   void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false);
 
   bool canSplitPredecessors() const;

diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index aee769aa0fea..3268641ddf19 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -330,7 +330,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 
   unsigned NumPreds = cast(front()).getNumIncomingValues();
   for (PHINode &Phi : make_early_inc_range(phis())) {
-Phi.removeIncomingValue(Pred);
+Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
 if (KeepOneInputPHIs)
   continue;
 // If we have a single predecessor, removeIncomingValue erased the PHI

diff  --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll 
b/llvm/test/Transforms/LoopRotate/phi-empty.ll
deleted file mode 100644
index e246cff91b62..
--- a/llvm/test/Transforms/LoopRotate/phi-empty.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s
-
-define void @PR48296(i1 %cond) {
-; CHECK-LABEL: @PR48296(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:br label [[LOOP:%.*]]
-; CHECK:   loop:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label 
[[LOOP_BACKEDGE:%.*]]
-; CHECK:   loop.backedge:
-; CHECK-NEXT:br label [[LOOP]]
-; CHECK:   dead:
-; CHECK-NEXT:unreachable
-; CHECK:   inc:
-; CHECK-NEXT:br label [[LOOP_BACKEDGE]]
-; CHECK:   return:
-; CHECK-NEXT:ret void
-;
-entry:
-  br label %loop
-
-loop:
-  br i1 %cond, label %inc, label %loop
-
-dead:; No predecessors!
-  br i1 %cond, label %inc, 

[llvm-branch-commits] [llvm] 1dc38f8 - [IR] improve code comment/logic in removePredecessor(); NFC

2020-11-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-30T10:51:30-05:00
New Revision: 1dc38f8cfbbc4cce12f8416a1e51d38285e6872f

URL: 
https://github.com/llvm/llvm-project/commit/1dc38f8cfbbc4cce12f8416a1e51d38285e6872f
DIFF: 
https://github.com/llvm/llvm-project/commit/1dc38f8cfbbc4cce12f8416a1e51d38285e6872f.diff

LOG: [IR] improve code comment/logic in removePredecessor(); NFC

This was suggested in the post-commit review of ce134da4b1.

Added: 


Modified: 
llvm/lib/IR/BasicBlock.cpp

Removed: 




diff  --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 3268641ddf19..95b8602b9b6c 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -333,14 +333,16 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 Phi.removeIncomingValue(Pred, !KeepOneInputPHIs);
 if (KeepOneInputPHIs)
   continue;
-// If we have a single predecessor, removeIncomingValue erased the PHI
-// node itself.
+
+// If we have a single predecessor, removeIncomingValue may have erased the
+// PHI node itself.
+if (NumPreds == 1)
+  continue;
+
 // Try to replace the PHI node with a constant value.
-if (NumPreds > 1) {
-  if (Value *PhiConstant = Phi.hasConstantValue()) {
-Phi.replaceAllUsesWith(PhiConstant);
-Phi.eraseFromParent();
-  }
+if (Value *PhiConstant = Phi.hasConstantValue()) {
+  Phi.replaceAllUsesWith(PhiConstant);
+  Phi.eraseFromParent();
 }
   }
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 9eb2c01 - [IR][LoopRotate] remove assertion that phi must have at least one operand

2020-11-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-30T11:32:42-05:00
New Revision: 9eb2c0113dfe2c1054e524122ca0e17ad552bb01

URL: 
https://github.com/llvm/llvm-project/commit/9eb2c0113dfe2c1054e524122ca0e17ad552bb01
DIFF: 
https://github.com/llvm/llvm-project/commit/9eb2c0113dfe2c1054e524122ca0e17ad552bb01.diff

LOG: [IR][LoopRotate] remove assertion that phi must have at least one operand

This was suggested in D92247 - I initially committed an alternate
fix ( bfd2c216ea ) to avoid the crash/assert shown in
https://llvm.org/PR48296 ,
but that was reverted because it caused msan failures on other
tests. We can try to revive that patch using the test included
here, but I do not have an immediate plan to isolate that problem.

Added: 
llvm/test/Transforms/LoopRotate/phi-empty.ll

Modified: 
llvm/lib/IR/Verifier.cpp

Removed: 




diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index eda923da8df8..bc24d488d2f7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2565,11 +2565,6 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
 SmallVector, 8> Values;
 llvm::sort(Preds);
 for (const PHINode &PN : BB.phis()) {
-  // Ensure that PHI nodes have at least one entry!
-  Assert(PN.getNumIncomingValues() != 0,
- "PHI nodes must have at least one entry.  If the block is dead, "
- "the PHI should be removed!",
- &PN);
   Assert(PN.getNumIncomingValues() == Preds.size(),
  "PHINode should have one entry for each predecessor of its "
  "parent basic block!",

diff  --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll 
b/llvm/test/Transforms/LoopRotate/phi-empty.ll
new file mode 100644
index ..9337133f8903
--- /dev/null
+++ b/llvm/test/Transforms/LoopRotate/phi-empty.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s
+
+; After rotate, the phi has no operands because it has no predecessors.
+; We might want to delete that instruction instead, but we do not
+; fail/assert by assuming that the phi is invalid IR.
+
+define void @PR48296(i1 %cond) {
+; CHECK-LABEL: @PR48296(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[LOOP:%.*]]
+; CHECK:   loop:
+; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label 
[[LOOP_BACKEDGE:%.*]]
+; CHECK:   loop.backedge:
+; CHECK-NEXT:br label [[LOOP]]
+; CHECK:   dead:
+; CHECK-NEXT:unreachable
+; CHECK:   inc:
+; CHECK-NEXT:br label [[LOOP_BACKEDGE]]
+; CHECK:   return:
+; CHECK-NEXT:[[R:%.*]] = phi i32
+; CHECK-NEXT:ret void
+;
+entry:
+  br label %loop
+
+loop:
+  br i1 %cond, label %inc, label %loop
+
+dead:; No predecessors!
+  br i1 %cond, label %inc, label %return
+
+inc:
+  br label %loop
+
+return:
+  %r = phi i32 [ undef, %dead ]
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 40dc535 - [x86] add tests for maxnum/minnum with nnan; NFC

2020-11-30 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-11-30T14:30:28-05:00
New Revision: 40dc535b5afffb1d309e44ca636219c1b8a6873b

URL: 
https://github.com/llvm/llvm-project/commit/40dc535b5afffb1d309e44ca636219c1b8a6873b
DIFF: 
https://github.com/llvm/llvm-project/commit/40dc535b5afffb1d309e44ca636219c1b8a6873b.diff

LOG: [x86] add tests for maxnum/minnum with nnan; NFC

Added: 


Modified: 
llvm/test/Analysis/CostModel/X86/fmaxnum.ll
llvm/test/Analysis/CostModel/X86/fminnum.ll

Removed: 




diff  --git a/llvm/test/Analysis/CostModel/X86/fmaxnum.ll 
b/llvm/test/Analysis/CostModel/X86/fmaxnum.ll
index f1d8e3270298..3116e65388e8 100644
--- a/llvm/test/Analysis/CostModel/X86/fmaxnum.ll
+++ b/llvm/test/Analysis/CostModel/X86/fmaxnum.ll
@@ -92,6 +92,88 @@ define i32 @f64(i32 %arg) {
   ret i32 undef
 }
 
+define i32 @f32_nnan(i32 %arg) {
+; SSE-LABEL: 'f32_nnan'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = 
call nnan float @llvm.maxnum.f32(float undef, float undef)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F32 
= call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 
= call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 
= call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: 
%V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x 
float> undef)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 
undef
+;
+; AVX1-LABEL: 'f32_nnan'
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = 
call nnan float @llvm.maxnum.f32(float undef, float undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F32 
= call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 
= call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 
= call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: 
%V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x 
float> undef)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i32 undef
+;
+; AVX2-LABEL: 'f32_nnan'
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = 
call nnan float @llvm.maxnum.f32(float undef, float undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F32 
= call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 
= call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8F32 
= call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: 
%V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x 
float> undef)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i32 undef
+;
+; AVX512-LABEL: 'f32_nnan'
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 
= call nnan float @llvm.maxnum.f32(float undef, float undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x 
float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x 
float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x 
float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: 
%V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x 
float> undef)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i32 undef
+;
+  %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef)
+  %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x 
float> undef)
+  %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x 
float> undef)
+  %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x 
float> undef)
+  %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, 

[llvm-branch-commits] [llvm] b2cdd77 - [InstCombine] add tests for sign-bit-shift-of-sub; NFC

2020-12-01 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-01T08:01:00-05:00
New Revision: b2cdd776e3e5a709d5904633956d3e9eaad78020

URL: 
https://github.com/llvm/llvm-project/commit/b2cdd776e3e5a709d5904633956d3e9eaad78020
DIFF: 
https://github.com/llvm/llvm-project/commit/b2cdd776e3e5a709d5904633956d3e9eaad78020.diff

LOG: [InstCombine] add tests for sign-bit-shift-of-sub; NFC

Added: 


Modified: 
llvm/test/Transforms/InstCombine/ashr-lshr.ll
llvm/test/Transforms/InstCombine/lshr.ll

Removed: 




diff  --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll 
b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index ee90dd5170c3..dc1deb043428 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -434,3 +434,139 @@ define <2 x i32> @ashr_lshr_inv_vec_wrong_pred(<2 x i32> 
%x, <2 x i32> %y) {
   %ret = select <2 x i1> %cmp, <2 x i32> %r, <2 x i32> %l
   ret <2 x i32> %ret
 }
+
+define i32 @lshr_sub_nsw(i32 %x, i32 %y) {
+; CHECK-LABEL: @lshr_sub_nsw(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub nsw i32 %x, %y
+  %shr = lshr i32 %sub, 31
+  ret i32 %shr
+}
+
+define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) {
+; CHECK-LABEL: @lshr_sub_wrong_amount(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 30
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub nsw i32 %x, %y
+  %shr = lshr i32 %sub, 30
+  ret i32 %shr
+}
+
+define i32 @lshr_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @lshr_sub(
+; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub i32 %x, %y
+  %shr = lshr i32 %sub, 31
+  ret i32 %shr
+}
+
+define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) {
+; CHECK-LABEL: @lshr_sub_nsw_extra_use(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:store i32 [[SUB]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub nsw i32 %x, %y
+  store i32 %sub, i32* %p
+  %shr = lshr i32 %sub, 31
+  ret i32 %shr
+}
+
+define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) {
+; CHECK-LABEL: @lshr_sub_nsw_splat(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], 
+; CHECK-NEXT:ret <3 x i42> [[SHR]]
+;
+  %sub = sub nsw <3 x i42> %x, %y
+  %shr = lshr <3 x i42> %sub, 
+  ret <3 x i42> %shr
+}
+
+define <3 x i42> @lshr_sub_nsw_splat_undef(<3 x i42> %x, <3 x i42> %y) {
+; CHECK-LABEL: @lshr_sub_nsw_splat_undef(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], 
+; CHECK-NEXT:ret <3 x i42> [[SHR]]
+;
+  %sub = sub nsw <3 x i42> %x, %y
+  %shr = lshr <3 x i42> %sub, 
+  ret <3 x i42> %shr
+}
+
+define i17 @ashr_sub_nsw(i17 %x, i17 %y) {
+; CHECK-LABEL: @ashr_sub_nsw(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = ashr i17 [[SUB]], 16
+; CHECK-NEXT:ret i17 [[SHR]]
+;
+  %sub = sub nsw i17 %x, %y
+  %shr = ashr i17 %sub, 16
+  ret i17 %shr
+}
+
+define i17 @ashr_sub_wrong_amount(i17 %x, i17 %y) {
+; CHECK-LABEL: @ashr_sub_wrong_amount(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = ashr i17 [[SUB]], 15
+; CHECK-NEXT:ret i17 [[SHR]]
+;
+  %sub = sub nsw i17 %x, %y
+  %shr = ashr i17 %sub, 15
+  ret i17 %shr
+}
+
+define i32 @ashr_sub(i32 %x, i32 %y) {
+; CHECK-LABEL: @ashr_sub(
+; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = ashr i32 [[SUB]], 31
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub i32 %x, %y
+  %shr = ashr i32 %sub, 31
+  ret i32 %shr
+}
+
+define i32 @ashr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) {
+; CHECK-LABEL: @ashr_sub_nsw_extra_use(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:store i32 [[SUB]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:[[SHR:%.*]] = ashr i32 [[SUB]], 31
+; CHECK-NEXT:ret i32 [[SHR]]
+;
+  %sub = sub nsw i32 %x, %y
+  store i32 %sub, i32* %p
+  %shr = ashr i32 %sub, 31
+  ret i32 %shr
+}
+
+define <3 x i43> @ashr_sub_nsw_splat(<3 x i43> %x, <3 x i43> %y) {
+; CHECK-LABEL: @ashr_sub_nsw_splat(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i43> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = ashr <3 x i43> [[SUB]], 
+; CHECK-NEXT:ret <3 x i43> [[SHR]]
+;
+  %sub = sub nsw <3 x i43> %x, %y
+  %shr = ashr <3 x i43> %sub, 
+  ret <3 x i43> %shr
+}
+
+define <3 x i43> @ashr_sub_nsw_splat_undef(<3 x i43> %x, <3 x i43> %y) {
+; CHECK-LABEL: @ashr_sub_nsw_splat_undef(
+; CHECK-NEXT:[[SUB:%.*]] = sub nsw 

[llvm-branch-commits] [llvm] 9f60b8b - [InstCombine] canonicalize sign-bit-shift of difference to ext(icmp)

2020-12-01 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-01T09:58:11-05:00
New Revision: 9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258

URL: 
https://github.com/llvm/llvm-project/commit/9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258
DIFF: 
https://github.com/llvm/llvm-project/commit/9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258.diff

LOG: [InstCombine] canonicalize sign-bit-shift of difference to ext(icmp)

icmp is the preferred spelling in IR because icmp analysis is
expected to be better than any other analysis. This should
lead to more follow-on folding potential.

It's difficult to say exactly what we should do in codegen to
compensate. For example on AArch64, which of these is preferred:
sub w8, w0, w1
lsr w0, w8, #31

vs:
cmp w0, w1
csetw0, lt

If there are perf regressions, then we should deal with those in
codegen on a case-by-case basis.

A possible motivating example for better optimization is shown in:
https://llvm.org/PR43198 but that will require other transforms
before anything changes there.

Alive proof:
https://rise4fun.com/Alive/o4E

  Name: sign-bit splat
  Pre: C1 == (width(%x) - 1)
  %s = sub nsw %x, %y
  %r = ashr %s, C1
  =>
  %c = icmp slt %x, %y
  %r = sext %c

  Name: sign-bit LSB
  Pre: C1 == (width(%x) - 1)
  %s = sub nsw %x, %y
  %r = lshr %s, C1
  =>
  %c = icmp slt %x, %y
  %r = zext %c

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
llvm/test/Transforms/InstCombine/ashr-lshr.ll
llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll
llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 4eaf1bcc22fe..7295369365c4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1131,6 +1131,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator 
&I) {
   }
 }
 
+// lshr i32 (X -nsw Y), 31 --> zext (X < Y)
+Value *Y;
+if (ShAmt == BitWidth - 1 &&
+match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)
+  return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
 if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1 {
   unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
   // Oversized shifts are simplified to zero in InstSimplify.
@@ -1293,6 +1299,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator 
&I) {
   return new SExtInst(NewSh, Ty);
 }
 
+// ashr i32 (X -nsw Y), 31 --> sext (X < Y)
+Value *Y;
+if (ShAmt == BitWidth - 1 &&
+match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y)
+  return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty);
+
 // If the shifted-out value is known-zero, then this is an exact shift.
 if (!I.isExact() &&
 MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {

diff  --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll 
b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index dc1deb043428..72fa0252d839 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -437,8 +437,8 @@ define <2 x i32> @ashr_lshr_inv_vec_wrong_pred(<2 x i32> 
%x, <2 x i32> %y) {
 
 define i32 @lshr_sub_nsw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @lshr_sub_nsw(
-; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31
+; CHECK-NEXT:[[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:[[SHR:%.*]] = zext i1 [[TMP1]] to i32
 ; CHECK-NEXT:ret i32 [[SHR]]
 ;
   %sub = sub nsw i32 %x, %y
@@ -446,6 +446,8 @@ define i32 @lshr_sub_nsw(i32 %x, i32 %y) {
   ret i32 %shr
 }
 
+; negative test - must shift sign-bit
+
 define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) {
 ; CHECK-LABEL: @lshr_sub_wrong_amount(
 ; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
@@ -457,6 +459,8 @@ define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) {
   ret i32 %shr
 }
 
+; negative test - must have nsw
+
 define i32 @lshr_sub(i32 %x, i32 %y) {
 ; CHECK-LABEL: @lshr_sub(
 ; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
@@ -468,6 +472,8 @@ define i32 @lshr_sub(i32 %x, i32 %y) {
   ret i32 %shr
 }
 
+; negative test - one-use
+
 define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) {
 ; CHECK-LABEL: @lshr_sub_nsw_extra_use(
 ; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
@@ -483,8 +489,8 @@ define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) 
{
 
 define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) {
 ; CHECK-LABEL: @lshr_sub_nsw_splat(
-; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], 
+; CHECK-NEXT:[[TMP1:%.*]] = icmp slt <3 x i42> [[X:%.*]], [[Y:%.*

[llvm-branch-commits] [llvm] 136f98e - [x86] adjust cost model values for minnum/maxnum with fast-math-flags

2020-12-01 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-01T10:45:53-05:00
New Revision: 136f98e5236522f55693b8b2d23e87692987f734

URL: 
https://github.com/llvm/llvm-project/commit/136f98e5236522f55693b8b2d23e87692987f734
DIFF: 
https://github.com/llvm/llvm-project/commit/136f98e5236522f55693b8b2d23e87692987f734.diff

LOG: [x86] adjust cost model values for minnum/maxnum with fast-math-flags

Without FMF, we lower these intrinsics into something like this:

vmaxsd  %xmm0, %xmm1, %xmm2
vcmpunordsd %xmm0, %xmm0, %xmm0
vblendvpd   %xmm0, %xmm1, %xmm2, %xmm0

But if we can ignore NANs, the single min/max instruction is enough
because there is no need to fix up the x86 logic that corresponds to
X > Y ? X : Y.

We probably want to make other adjustments for FP intrinsics with FMF
to account for specialized codegen (for example, FSQRT).

Differential Revision: https://reviews.llvm.org/D92337

Added: 


Modified: 
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/fmaxnum.ll
llvm/test/Analysis/CostModel/X86/fminnum.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp 
b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 770317a9a8b5..36a04a850110 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2802,93 +2802,105 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
   return LT.first * Cost;
 }
 
+auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost,
+  FastMathFlags FMF) {
+  // If there are no NANs to deal with, then these are reduced to a
+  // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that 
we
+  // assume is used in the non-fast case.
+  if (Entry.ISD == ISD::FMAXNUM || Entry.ISD == ISD::FMINNUM) {
+if (FMF.noNaNs())
+  return LegalizationCost * 1;
+  }
+  return LegalizationCost * (int)Entry.Cost;
+};
+
 if (ST->useGLMDivSqrtCosts())
   if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->isSLM())
   if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasCDI())
   if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasBWI())
   if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasAVX512())
   if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasXOP())
   if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasAVX2())
   if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasAVX())
   if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasSSE42())
   if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasSSE41())
   if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasSSSE3())
   if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasSSE2())
   if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasSSE1())
   if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
-return LT.first * Entry->Cost;
+return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
 if (ST->hasBMI()) {
   if (ST->is64Bit())
 if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy))
-  return LT.first * Entry->Cost;
+  return adjustTableCost(*Entry, LT.first, ICA.getFlags());
 
   if (const auto 

[llvm-branch-commits] [llvm] 56fd29e - [SLP] use 'match' for binop/select; NFC

2020-12-02 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-02T09:04:08-05:00
New Revision: 56fd29e93bd133d354e7e639bca1c025162e91ac

URL: 
https://github.com/llvm/llvm-project/commit/56fd29e93bd133d354e7e639bca1c025162e91ac
DIFF: 
https://github.com/llvm/llvm-project/commit/56fd29e93bd133d354e7e639bca1c025162e91ac.diff

LOG: [SLP] use 'match' for binop/select; NFC

This might be a small improvement in readability, but the
real motivation is to make it easier to adapt the code to
deal with intrinsics like 'maxnum' and/or integer min/max.

There is potentially help in doing that with D92086, but
we might also just add specialized wrappers here to deal
with the expected patterns.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index bfec51f0ada6..66d736974fbc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7463,9 +7463,10 @@ static bool tryToVectorizeHorReductionOrInstOperands(
 Instruction *Inst;
 unsigned Level;
 std::tie(Inst, Level) = Stack.pop_back_val();
-auto *BI = dyn_cast(Inst);
-auto *SI = dyn_cast(Inst);
-if (BI || SI) {
+Value *B0, *B1;
+bool IsBinop = match(Inst, m_BinOp(m_Value(B0), m_Value(B1)));
+bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
+if (IsBinop || IsSelect) {
   HorizontalReduction HorRdx;
   if (HorRdx.matchAssociativeReduction(P, Inst)) {
 if (HorRdx.tryToReduce(R, TTI)) {
@@ -7476,10 +7477,10 @@ static bool tryToVectorizeHorReductionOrInstOperands(
   continue;
 }
   }
-  if (P && BI) {
-Inst = dyn_cast(BI->getOperand(0));
+  if (P && IsBinop) {
+Inst = dyn_cast(B0);
 if (Inst == P)
-  Inst = dyn_cast(BI->getOperand(1));
+  Inst = dyn_cast(B1);
 if (!Inst) {
   // Set P to nullptr to avoid re-analysis of phi node in
   // matchAssociativeReduction function unless this is the root node.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 9d6d24c - [JumpThreading][VectorUtils] avoid infinite loop on unreachable IR

2020-12-02 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-02T13:39:33-05:00
New Revision: 9d6d24c25056c17db56cf1ef5124f82eb18afc2c

URL: 
https://github.com/llvm/llvm-project/commit/9d6d24c25056c17db56cf1ef5124f82eb18afc2c
DIFF: 
https://github.com/llvm/llvm-project/commit/9d6d24c25056c17db56cf1ef5124f82eb18afc2c.diff

LOG: [JumpThreading][VectorUtils] avoid infinite loop on unreachable IR

https://llvm.org/PR48362

It's possible that we could stub this out sooner somewhere
within JumpThreading, but I'm not sure how to do that, and
then we would still have potential danger in other callers.

I can't find a way to trigger this using 'instsimplify',
however, because that already has a bailout on unreachable
blocks.

Added: 


Modified: 
llvm/lib/Analysis/VectorUtils.cpp
llvm/test/Transforms/JumpThreading/unreachable-loops.ll

Removed: 




diff  --git a/llvm/lib/Analysis/VectorUtils.cpp 
b/llvm/lib/Analysis/VectorUtils.cpp
index bd69055ac246..90726979ca4a 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -290,6 +290,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
 if (EltNo == IIElt)
   return III->getOperand(1);
 
+// Guard against infinite loop on malformed, unreachable IR.
+if (III == III->getOperand(0))
+  return nullptr;
+
 // Otherwise, the insertelement doesn't modify the value, recurse on its
 // vector input.
 return findScalarElement(III->getOperand(0), EltNo);

diff  --git a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll 
b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
index 3f75aeae906c..a0f1c2127209 100644
--- a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
+++ b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll
@@ -1,11 +1,12 @@
 ; RUN: opt -jump-threading -S < %s | FileCheck %s
 ; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
+
 ; Check the unreachable loop won't cause infinite loop
 ; in jump-threading when it tries to update the predecessors'
 ; profile metadata from a phi node.
 
 define void @unreachable_single_bb_loop() {
-; CHECK-LABEL: @unreachable_single_bb_loop()
+; CHECK-LABEL: @unreachable_single_bb_loop(
 bb:
   %tmp = call i32 @a()
   %tmp1 = icmp eq i32 %tmp, 1
@@ -15,8 +16,8 @@ bb:
 bb2:  ; preds = %bb2
   %tmp4 = icmp ne i32 %tmp, 1
   switch i1 %tmp4, label %bb2 [
-i1 0, label %bb5
-i1 1, label %bb8
+  i1 0, label %bb5
+  i1 1, label %bb8
   ]
 
 bb5:  ; preds = %bb2, %bb
@@ -31,7 +32,7 @@ bb8:  ; preds = 
%bb8, %bb7, %bb5, %b
 }
 
 define void @unreachable_multi_bbs_loop() {
-; CHECK-LABEL: @unreachable_multi_bbs_loop()
+; CHECK-LABEL: @unreachable_multi_bbs_loop(
 bb:
   %tmp = call i32 @a()
   %tmp1 = icmp eq i32 %tmp, 1
@@ -44,8 +45,8 @@ bb3:  ; preds = 
%bb2
 bb2:  ; preds = %bb3
   %tmp4 = icmp ne i32 %tmp, 1
   switch i1 %tmp4, label %bb3 [
-i1 0, label %bb5
-i1 1, label %bb8
+  i1 0, label %bb5
+  i1 1, label %bb8
   ]
 
 bb5:  ; preds = %bb2, %bb
@@ -60,4 +61,85 @@ bb8:  ; preds = 
%bb8, %bb7, %bb5, %b
 }
 declare i32 @a()
 
+; This gets into a state that could cause instruction simplify
+; to hang - an insertelement instruction has itself as an operand.
+
+define void @PR48362() {
+; CHECK-LABEL: @PR48362(
+cleanup1491:  ; preds = %for.body1140
+  switch i32 0, label %cleanup2343.loopexit4 [
+  i32 0, label %cleanup.cont1500
+  i32 128, label %lbl_555.loopexit
+  ]
+
+cleanup.cont1500: ; preds = %cleanup1491
+  unreachable
+
+lbl_555.loopexit: ; preds = %cleanup1491
+  br label %for.body1509
+
+for.body1509: ; preds = %for.inc2340, 
%lbl_555.loopexit
+  %l_580.sroa.0.0 = phi <4 x i32> [ , %lbl_555.loopexit ], [ %l_580.sroa.0.2, 
%for.inc2340 ]
+  %p_55.addr.10 = phi i16 [ 0, %lbl_555.loopexit ], [ %p_55.addr.11, 
%for.inc2340 ]
+  %i82 = load i32, i32* undef, align 1
+  %tobool1731.not = icmp eq i32 %i82, 0
+  br i1 %tobool1731.not, label %if.end1733, label %if.then1732
+
+if.then1732:  ; preds = %for.body1509
+  br label %cleanup2329
+
+if.end1733:   ; preds = %for.body1509
+  %tobool1735.not = icmp eq i16 %p_55.addr.10, 0
+  br i1 %tobool1735.not, label %if.then1736, label %if.else1904
+
+if.then1736:  ; preds = %if.end1733
+  br label %cleanup2329
+
+if.else1904:  ; preds = %if.end1733
+  br label %for.body1911
+
+for.body1911: ; pre

[llvm-branch-commits] [llvm] 94f6d36 - [InstCombine] avoid crash on phi with unreachable incoming block (PR48369)

2020-12-06 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-06T09:31:47-05:00
New Revision: 94f6d365e4be0cf05930df0eedd2bfb23f6fce51

URL: 
https://github.com/llvm/llvm-project/commit/94f6d365e4be0cf05930df0eedd2bfb23f6fce51
DIFF: 
https://github.com/llvm/llvm-project/commit/94f6d365e4be0cf05930df0eedd2bfb23f6fce51.diff

LOG: [InstCombine] avoid crash on phi with unreachable incoming block (PR48369)

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/phi-select-constant.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp 
b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 92504da01cbf..cab6f1e5632f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1083,9 +1083,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction 
&I, PHINode *PN) {
   // operation in that block.  However, if this is a critical edge, we would be
   // inserting the computation on some other paths (e.g. inside a loop).  Only
   // do this if the pred block is unconditionally branching into the phi block.
+  // Also, make sure that the pred block is not dead code.
   if (NonConstBB != nullptr) {
 BranchInst *BI = dyn_cast(NonConstBB->getTerminator());
-if (!BI || !BI->isUnconditional()) return nullptr;
+if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB))
+  return nullptr;
   }
 
   // Okay, we can do the transformation: create the new PHI node.

diff  --git a/llvm/test/Transforms/InstCombine/phi-select-constant.ll 
b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
index 9d1c973925bb..c65be75c0b4a 100644
--- a/llvm/test/Transforms/InstCombine/phi-select-constant.ll
+++ b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
@@ -77,16 +77,16 @@ final:
 define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> 
%z) {
 ; CHECK-LABEL: @vec3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[PHITMP1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> 
[[Y:%.*]], <2 x i32> 
+; CHECK-NEXT:[[PHI_SEL1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> 
[[Y:%.*]], <2 x i32> 
 ; CHECK-NEXT:br i1 [[COND1:%.*]], label [[IF1:%.*]], label [[ELSE:%.*]]
 ; CHECK:   if1:
-; CHECK-NEXT:[[PHITMP2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> 
[[Z]], <2 x i32> 
+; CHECK-NEXT:[[PHI_SEL2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> 
[[Z]], <2 x i32> 
 ; CHECK-NEXT:br i1 [[COND2:%.*]], label [[IF2:%.*]], label [[ELSE]]
 ; CHECK:   if2:
-; CHECK-NEXT:[[PHITMP:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> [[Y]], 
<2 x i8> [[Z]]
+; CHECK-NEXT:[[PHI_SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> [[Y]], 
<2 x i8> [[Z]]
 ; CHECK-NEXT:br label [[ELSE]]
 ; CHECK:   else:
-; CHECK-NEXT:[[PHI:%.*]] = phi <2 x i8> [ [[PHITMP]], [[IF2]] ], [ 
[[PHITMP1]], [[ENTRY:%.*]] ], [ [[PHITMP2]], [[IF1]] ]
+; CHECK-NEXT:[[PHI:%.*]] = phi <2 x i8> [ [[PHI_SEL]], [[IF2]] ], [ 
[[PHI_SEL1]], [[ENTRY:%.*]] ], [ [[PHI_SEL2]], [[IF1]] ]
 ; CHECK-NEXT:ret <2 x i8> [[PHI]]
 ;
 entry:
@@ -103,3 +103,37 @@ else:
   %sel = select <2 x i1> %phi, <2 x i8> %y, <2 x i8> %z
   ret <2 x i8> %sel
 }
+
+; Don't crash on unreachable IR.
+
+define void @PR48369(i32 %a, i32* %p) {
+; CHECK-LABEL: @PR48369(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[PHI_CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0
+; CHECK-NEXT:br label [[BB1:%.*]]
+; CHECK:   bb1:
+; CHECK-NEXT:[[CMP:%.*]] = phi i1 [ [[PHI_CMP]], [[DEADBB:%.*]] ], [ true, 
[[ENTRY:%.*]] ]
+; CHECK-NEXT:[[SHL:%.*]] = select i1 [[CMP]], i32 256, i32 0
+; CHECK-NEXT:store i32 [[SHL]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:br label [[END:%.*]]
+; CHECK:   deadbb:
+; CHECK-NEXT:br label [[BB1]]
+; CHECK:   end:
+; CHECK-NEXT:ret void
+;
+entry:
+  %phi.cmp = icmp sgt i32 %a, 0
+  br label %bb1
+
+bb1:
+  %cmp = phi i1 [ %phi.cmp, %deadbb ], [ true, %entry ]
+  %shl = select i1 %cmp, i32 256, i32 0
+  store i32 %shl, i32* %p
+  br label %end
+
+deadbb:
+  br label %bb1
+
+end:
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ac522f8 - [DAGCombiner] Fold (sext (not i1 x)) -> (add (zext i1 x), -1)

2020-12-06 Thread Sanjay Patel via llvm-branch-commits

Author: Layton Kifer
Date: 2020-12-06T11:52:10-05:00
New Revision: ac522f87002ffc20d377e284080c9fa7f63216fc

URL: 
https://github.com/llvm/llvm-project/commit/ac522f87002ffc20d377e284080c9fa7f63216fc
DIFF: 
https://github.com/llvm/llvm-project/commit/ac522f87002ffc20d377e284080c9fa7f63216fc.diff

LOG: [DAGCombiner] Fold (sext (not i1 x)) -> (add (zext i1 x), -1)

Move fold of (sext (not i1 x)) -> (add (zext i1 x), -1) from X86 to DAGCombiner 
to improve codegen on other targets.

Differential Revision: https://reviews.llvm.org/D91589

Added: 


Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/AArch64/select_const.ll
llvm/test/CodeGen/ARM/select_const.ll
llvm/test/CodeGen/PowerPC/select_const.ll
llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
llvm/test/CodeGen/SystemZ/sext-zext.ll
llvm/test/CodeGen/X86/pr44140.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b1a3d849ed99..c40c2502f536 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -10663,6 +10663,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
   }
 
+  // fold sext (not i1 X) -> add (zext i1 X), -1
+  // TODO: This could be extended to handle bool vectors.
+  if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
+  (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
+TLI.isOperationLegal(ISD::ADD, VT {
+// If we can eliminate the 'not', the sext form should be better
+if (SDValue NewXor = visitXOR(N0.getNode()))
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
+
+SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+  }
+
   return SDValue();
 }
 

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bfd80690347d..690eb39fa0d4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46882,7 +46882,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
-  EVT InVT = N0.getValueType();
   SDLoc DL(N);
 
   // (i32 (sext (i8 (x86isd::setcc_carry -> (i32 (x86isd::setcc_carry))
@@ -46911,16 +46910,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG 
&DAG,
   if (SDValue V = combineExtSetcc(N, DAG, Subtarget))
 return V;
 
-  if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
-  isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
-// Invert and sign-extend a boolean is the same as zero-extend and subtract
-// 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
-// lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
-// sext (xor Bool, -1) --> sub (zext Bool), 1
-SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
-return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
-  }
-
   if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
 return V;
 

diff  --git a/llvm/test/CodeGen/AArch64/select_const.ll 
b/llvm/test/CodeGen/AArch64/select_const.ll
index affb8150ff85..945e7cdc35ad 100644
--- a/llvm/test/CodeGen/AArch64/select_const.ll
+++ b/llvm/test/CodeGen/AArch64/select_const.ll
@@ -68,8 +68,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) {
 define i32 @select_0_or_neg1(i1 %cond) {
 ; CHECK-LABEL: select_0_or_neg1:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:mvn w8, w0
-; CHECK-NEXT:sbfx w0, w8, #0, #1
+; CHECK-NEXT:and w8, w0, #0x1
+; CHECK-NEXT:sub w0, w8, #1 // =1
 ; CHECK-NEXT:ret
   %sel = select i1 %cond, i32 0, i32 -1
   ret i32 %sel
@@ -78,8 +78,7 @@ define i32 @select_0_or_neg1(i1 %cond) {
 define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) {
 ; CHECK-LABEL: select_0_or_neg1_zeroext:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:mvn w8, w0
-; CHECK-NEXT:sbfx w0, w8, #0, #1
+; CHECK-NEXT:sub w0, w0, #1 // =1
 ; CHECK-NEXT:ret
   %sel = select i1 %cond, i32 0, i32 -1
   ret i32 %sel

diff  --git a/llvm/test/CodeGen/ARM/select_const.ll 
b/llvm/test/CodeGen/ARM/select_const.ll
index 500426074736..03f538ea5313 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -137,23 +137,21 @@ define i32 @select_1_or_0_signext(i1 signext %cond) {
 define i32 @select_0_or_neg1(i1 %cond) {
 ; ARM-LABEL: select_0_or_neg1:
 ; ARM:   @ %bb.0:
-; ARM-NEXT:mov r1, #1
-; ARM-NEXT:bic r0, r1, r0
-; ARM-NEXT:rsb r

[llvm-branch-commits] [llvm] 5fe1a49 - [SLP] fix typo in debug string; NFC

2020-12-07 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-07T15:09:21-05:00
New Revision: 5fe1a49f961d7e6a064addf6373288d5e3697e68

URL: 
https://github.com/llvm/llvm-project/commit/5fe1a49f961d7e6a064addf6373288d5e3697e68
DIFF: 
https://github.com/llvm/llvm-project/commit/5fe1a49f961d7e6a064addf6373288d5e3697e68.diff

LOG: [SLP] fix typo in debug string; NFC

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f78a4d9d9c71..e3f6d8cc05f7 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1727,7 +1727,7 @@ class BoUpSLP {
 dbgs() << "NULL\n";
   dbgs() << "ReuseShuffleIndices: ";
   if (ReuseShuffleIndices.empty())
-dbgs() << "Emtpy";
+dbgs() << "Empty";
   else
 for (unsigned ReuseIdx : ReuseShuffleIndices)
   dbgs() << ReuseIdx << ", ";



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 2a06628 - [VectorCombine] add tests for load of insert/extract; NFC

2020-12-08 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-08T12:56:54-05:00
New Revision: 2a06628185b4598fa8a6a5b733028b4255818ce9

URL: 
https://github.com/llvm/llvm-project/commit/2a06628185b4598fa8a6a5b733028b4255818ce9
DIFF: 
https://github.com/llvm/llvm-project/commit/2a06628185b4598fa8a6a5b733028b4255818ce9.diff

LOG: [VectorCombine] add tests for load of insert/extract; NFC

Added: 


Modified: 
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index 4b2859238a69..66b9f89dd8dd 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -496,3 +496,44 @@ define void @PR47558_multiple_use_load(<2 x float>* 
nocapture nonnull %resultptr
   store <2 x float> %result1, <2 x float>* %resultptr, align 8
   ret void
 }
+
+define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 
dereferenceable(16) %p) {
+; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
+; CHECK-NEXT:[[L:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 4
+; CHECK-NEXT:[[S:%.*]] = extractelement <2 x float> [[L]], i32 0
+; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:ret <4 x float> [[R]]
+;
+  %l = load <2 x float>, <2 x float>* %p, align 4
+  %s = extractelement <2 x float> %l, i32 0
+  %r = insertelement <4 x float> undef, float %s, i32 0
+  ret <4 x float> %r
+}
+
+define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 
dereferenceable(16) %p) {
+; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
+; CHECK-NEXT:[[L:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 4
+; CHECK-NEXT:[[S:%.*]] = extractelement <8 x float> [[L]], i32 0
+; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:ret <4 x float> [[R]]
+;
+  %l = load <8 x float>, <8 x float>* %p, align 4
+  %s = extractelement <8 x float> %l, i32 0
+  %r = insertelement <4 x float> undef, float %s, i32 0
+  ret <4 x float> %r
+}
+
+define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 
16 dereferenceable(16) %p, <1 x i32>* %store_ptr) {
+; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use(
+; CHECK-NEXT:[[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4
+; CHECK-NEXT:store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4
+; CHECK-NEXT:[[S:%.*]] = extractelement <1 x i32> [[L]], i32 0
+; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0
+; CHECK-NEXT:ret <8 x i32> [[R]]
+;
+  %l = load <1 x i32>, <1 x i32>* %p, align 4
+  store <1 x i32> %l, <1 x i32>* %store_ptr
+  %s = extractelement <1 x i32> %l, i32 0
+  %r = insertelement <8 x i32> undef, i32 %s, i32 0
+  ret <8 x i32> %r
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] b2ef264 - [VectorCombine] allow peeking through an extractelt when creating a vector load

2020-12-09 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-09T10:36:14-05:00
New Revision: b2ef264096c045cf7147320a8bcdf8ec725ec534

URL: 
https://github.com/llvm/llvm-project/commit/b2ef264096c045cf7147320a8bcdf8ec725ec534
DIFF: 
https://github.com/llvm/llvm-project/commit/b2ef264096c045cf7147320a8bcdf8ec725ec534.diff

LOG: [VectorCombine] allow peeking through an extractelt when creating a vector 
load

This is an enhancement to load vectorization that is motivated by
a pattern in https://llvm.org/PR16739.
Unfortunately, it's still not enough to make a difference there.
We will have to handle multi-use cases in some better way to avoid
creating multiple overlapping loads.

Differential Revision: https://reviews.llvm.org/D92858

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp
llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5f3d5c768a9e..0d0a338afca3 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -92,18 +92,25 @@ static void replaceValue(Value &Old, Value &New) {
 }
 
 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
-  // Match insert into fixed vector of scalar load.
+  // Match insert into fixed vector of scalar value.
   auto *Ty = dyn_cast(I.getType());
   Value *Scalar;
   if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) 
||
   !Scalar->hasOneUse())
 return false;
 
+  // Optionally match an extract from another vector.
+  Value *X;
+  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
+  if (!HasExtract)
+X = Scalar;
+
+  // Match source value as load of scalar or vector.
   // Do not vectorize scalar load (widening) if atomic/volatile or under
   // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
   // or create data races non-existent in the source.
-  auto *Load = dyn_cast(Scalar);
-  if (!Load || !Load->isSimple() ||
+  auto *Load = dyn_cast(X);
+  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
   Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
   mustSuppressSpeculation(*Load))
 return false;
@@ -134,10 +141,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
 return false;
 
 
-  // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0
-  int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, 
AS);
+  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
+  Type *LoadTy = Load->getType();
+  int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
   APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
-  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, true, false);
+  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
+  /* Insert */ true, HasExtract);
 
   // New pattern: load VecPtr
   int NewCost = TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, 
AS);

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll 
b/llvm/test/Transforms/VectorCombine/X86/load.ll
index 66b9f89dd8dd..824a507ed103 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -499,9 +499,8 @@ define void @PR47558_multiple_use_load(<2 x float>* 
nocapture nonnull %resultptr
 
 define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32(
-; CHECK-NEXT:[[L:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 4
-; CHECK-NEXT:[[S:%.*]] = extractelement <2 x float> [[L]], i32 0
-; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>*
+; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
   %l = load <2 x float>, <2 x float>* %p, align 4
@@ -512,9 +511,8 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x 
float>* align 16 derefe
 
 define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 
dereferenceable(16) %p) {
 ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32(
-; CHECK-NEXT:[[L:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 4
-; CHECK-NEXT:[[S:%.*]] = extractelement <8 x float> [[L]], i32 0
-; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>*
+; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
 ; CHECK-NEXT:ret <4 x float> [[R]]
 ;
   %l = load <8 x float>, <8 x float>* %p, align 4




[llvm-branch-commits] [llvm] 12b684a - [VectorCombine] improve readability; NFC

2020-12-10 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-10T13:10:26-05:00
New Revision: 12b684ae02226f7785d3fb412fb155d4e15cc9bd

URL: 
https://github.com/llvm/llvm-project/commit/12b684ae02226f7785d3fb412fb155d4e15cc9bd
DIFF: 
https://github.com/llvm/llvm-project/commit/12b684ae02226f7785d3fb412fb155d4e15cc9bd.diff

LOG: [VectorCombine] improve readability; NFC

If we are going to allow adjusting the pointer for GEPs,
rearranging the code a bit will make it easier to follow.

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 0d0a338afca3..19f5a2b432f7 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -116,15 +116,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
 return false;
 
   // TODO: Extend this to match GEP with constant offsets.
-  Value *PtrOp = Load->getPointerOperand()->stripPointerCasts();
-  assert(isa(PtrOp->getType()) && "Expected a pointer type");
-  unsigned AS = Load->getPointerAddressSpace();
+  const DataLayout &DL = I.getModule()->getDataLayout();
+  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
+  assert(isa(SrcPtr->getType()) && "Expected a pointer type");
 
   // If original AS != Load's AS, we can't bitcast the original pointer and 
have
   // to use Load's operand instead. Ideally we would want to strip pointer 
casts
   // without changing AS, but there's no API to do that ATM.
-  if (AS != PtrOp->getType()->getPointerAddressSpace())
-PtrOp = Load->getPointerOperand();
+  unsigned AS = Load->getPointerAddressSpace();
+  if (AS != SrcPtr->getType()->getPointerAddressSpace())
+SrcPtr = Load->getPointerOperand();
 
   Type *ScalarTy = Scalar->getType();
   uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
@@ -136,11 +137,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   unsigned MinVecNumElts = MinVectorSize / ScalarSize;
   auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
   Align Alignment = Load->getAlign();
-  const DataLayout &DL = I.getModule()->getDataLayout();
-  if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT))
+  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT))
 return false;
 
-
   // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
   Type *LoadTy = Load->getType();
   int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
@@ -159,7 +158,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // It is safe and potentially profitable to load a vector directly:
   // inselt undef, load Scalar, 0 --> load VecPtr
   IRBuilder<> Builder(Load);
-  Value *CastedPtr = Builder.CreateBitCast(PtrOp, MinVecTy->getPointerTo(AS));
+  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
   Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
 
   // If the insert type does not match the target's minimum vector type,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4f051fe - [InstCombine] avoid crash sinking to unreachable block

2020-12-10 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-10T13:10:26-05:00
New Revision: 4f051fe37438632d10480c346520a0de624dbebf

URL: 
https://github.com/llvm/llvm-project/commit/4f051fe37438632d10480c346520a0de624dbebf
DIFF: 
https://github.com/llvm/llvm-project/commit/4f051fe37438632d10480c346520a0de624dbebf.diff

LOG: [InstCombine] avoid crash sinking to unreachable block

The test is reduced from the example in D82005.

Similar to 94f6d365e, the test here would assert in
the DomTree when we tried to convert a select to a
phi with an unreachable block operand.

We may want to add some kind of guard code in DomTree
itself to avoid this sort of problem.

Added: 


Modified: 
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/phi-select-constant.ll

Removed: 




diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp 
b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index cab6f1e5632f..bbc76325a67b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3640,7 +3640,9 @@ bool InstCombinerImpl::run() {
 else
   UserParent = UserInst->getParent();
 
-if (UserParent != BB) {
+// Try sinking to another block. If that block is unreachable, then do
+// not bother. SimplifyCFG should handle it.
+if (UserParent != BB && DT.isReachableFromEntry(UserParent)) {
   // See if the user is one of our successors that has only one
   // predecessor, so that we don't have to split the critical edge.
   bool ShouldSink = UserParent->getUniquePredecessor() == BB;

diff  --git a/llvm/test/Transforms/InstCombine/phi-select-constant.ll 
b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
index c65be75c0b4a..e3f35d2e6001 100644
--- a/llvm/test/Transforms/InstCombine/phi-select-constant.ll
+++ b/llvm/test/Transforms/InstCombine/phi-select-constant.ll
@@ -137,3 +137,24 @@ deadbb:
 end:
   ret void
 }
+
+define i16 @sink_to_unreachable_crash(i1 %a)  {
+; CHECK-LABEL: @sink_to_unreachable_crash(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[S:%.*]] = select i1 [[A:%.*]], i16 0, i16 5
+; CHECK-NEXT:br label [[INF_LOOP:%.*]]
+; CHECK:   inf_loop:
+; CHECK-NEXT:br label [[INF_LOOP]]
+; CHECK:   unreachable:
+; CHECK-NEXT:ret i16 [[S]]
+;
+entry:
+  %s = select i1 %a, i16 0, i16 5
+  br label %inf_loop
+
+inf_loop:
+  br label %inf_loop
+
+unreachable:   ; No predecessors!
+  ret i16 %s
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 204bdc5 - [InstCombine][x86] fix insertion point bug in vector demanded elts fold (PR48476)

2020-12-11 Thread Sanjay Patel via llvm-branch-commits

Author: Sanjay Patel
Date: 2020-12-11T17:23:35-05:00
New Revision: 204bdc5322cc89603d503fb1f02a0eba19a1b496

URL: 
https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496
DIFF: 
https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496.diff

LOG: [InstCombine][x86] fix insertion point bug in vector demanded elts fold 
(PR48476)

This transform was added at:
c63799fc52ff

>From what I see, it's the first demanded elements transform that adds
a new instruction using the IRBuilder. There are similar folds in
the generic demanded bits chunk of instcombine that also use the
InsertPointGuard code pattern.

The tests here would assert/crash because the new instruction was
being added at the start of the demanded elements analysis rather
than at the instruction that is being replaced.

Added: 


Modified: 
llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
llvm/test/Transforms/InstCombine/X86/x86-addsub.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp 
b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 3b05dba57a33..ca026baa2c41 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -1916,13 +1916,20 @@ Optional 
X86TTIImpl::simplifyDemandedVectorEltsIntrinsic(
   case Intrinsic::x86_sse3_addsub_ps:
   case Intrinsic::x86_avx_addsub_pd_256:
   case Intrinsic::x86_avx_addsub_ps_256: {
+// If none of the even or none of the odd lanes are required, turn this
+// into a generic FP math instruction.
 APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1));
-if (DemandedElts.isSubsetOf(SubMask))
-  return IC.Builder.CreateFSub(II.getArgOperand(0), II.getArgOperand(1));
-
 APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2));
-if (DemandedElts.isSubsetOf(AddMask))
-  return IC.Builder.CreateFAdd(II.getArgOperand(0), II.getArgOperand(1));
+bool IsSubOnly = DemandedElts.isSubsetOf(SubMask);
+bool IsAddOnly = DemandedElts.isSubsetOf(AddMask);
+if (IsSubOnly || IsAddOnly) {
+  assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only");
+  IRBuilderBase::InsertPointGuard Guard(IC.Builder);
+  IC.Builder.SetInsertPoint(&II);
+  Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1);
+  return IC.Builder.CreateBinOp(
+  IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
+}
 
 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);

diff  --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll 
b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
index d051732ee819..0b9831be8fcf 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
@@ -5,6 +5,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 
x double>)
 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>)
 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>)
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 
immarg) #0
 
 ;
 ; Demanded Elts
@@ -164,4 +165,30 @@ define void @PR46277(float %0, float %1, float %2, float 
%3, <4 x float> %4, flo
   ret void
 }
 
+define double @PR48476_fsub(<2 x double> %x) {
+; CHECK-LABEL: @PR48476_fsub(
+; CHECK-NEXT:[[TMP1:%.*]] = fsub <2 x double> , [[X:%.*]]
+; CHECK-NEXT:[[T2:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x 
double> [[TMP1]], <2 x double> [[X]], i8 6)
+; CHECK-NEXT:[[VECEXT:%.*]] = extractelement <2 x double> [[T2]], i32 0
+; CHECK-NEXT:ret double [[VECEXT]]
+;
+  %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> 
zeroinitializer, <2 x double> %x)
+  %t2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %t1, <2 x double> 
%x, i8 6)
+  %vecext = extractelement <2 x double> %t2, i32 0
+  ret double %vecext
+}
 
+define double @PR48476_fadd_fsub(<2 x double> %x) {
+; CHECK-LABEL: @PR48476_fadd_fsub(
+; CHECK-NEXT:[[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], 
+; CHECK-NEXT:[[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> 
undef, <2 x i32> 
+; CHECK-NEXT:[[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]]
+; CHECK-NEXT:[[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:ret double [[VECEXT]]
+;
+  %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> 
zeroinitializer, <2 x double> %x)
+  %s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> 
+  %t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x 
double> %x)
+  %vecext = extractelement <2 x double> %t2, i32 0
+  ret double %vecext
+}



___
llvm-branch-commits mailing li