[llvm-branch-commits] [llvm] 39e1e53 - [SLP] add reduction test with mixed fast-math-flags; NFC
Author: Sanjay Patel Date: 2021-01-23T11:17:20-05:00 New Revision: 39e1e53a7c162652c6c138d1bcf50d2766fe9561 URL: https://github.com/llvm/llvm-project/commit/39e1e53a7c162652c6c138d1bcf50d2766fe9561 DIFF: https://github.com/llvm/llvm-project/commit/39e1e53a7c162652c6c138d1bcf50d2766fe9561.diff LOG: [SLP] add reduction test with mixed fast-math-flags; NFC Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll Removed: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 8e175f1acda9..38d36c676fa7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1801,4 +1801,36 @@ define float @fadd_v4f32_fmf(float* %p) { ret float %add3 } +define float @fadd_v4f32_fmf_intersect(float* %p) { +; CHECK-LABEL: @fadd_v4f32_fmf_intersect( +; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT:ret float [[TMP3]] +; +; STORE-LABEL: @fadd_v4f32_fmf_intersect( +; STORE-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; STORE-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; STORE-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; STORE-NEXT:ret float [[TMP3]] +; + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + %p3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %p1, align 4 + %t2 = load float, float* %p2, align 4 + %t3 = load float, float* %p3, align 4 + %add1 = fadd ninf reassoc nsz nnan float %t1, %t0 + %add2 = fadd ninf reassoc nsz nnan arcp float %t2, %add1 + %add3 = fadd ninf reassoc nsz contract float %t3, %add2 + ret float %add3 +} + declare i32 @__gxx_personality_v0(...) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] a6f0221 - [SLP] fix fast-math-flag propagation on FP reductions
Author: Sanjay Patel Date: 2021-01-23T11:17:20-05:00 New Revision: a6f02212764a76935ec5fb704fe86a1a76f65745 URL: https://github.com/llvm/llvm-project/commit/a6f02212764a76935ec5fb704fe86a1a76f65745 DIFF: https://github.com/llvm/llvm-project/commit/a6f02212764a76935ec5fb704fe86a1a76f65745.diff LOG: [SLP] fix fast-math-flag propagation on FP reductions As shown in the test diffs, we could miscompile by propagating flags that did not exist in the original code. The flags required for fmin/fmax reductions will be fixed in a follow-up patch. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 78ce4870588c..6c2b10e5b9fa 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6820,12 +6820,18 @@ class HorizontalReduction { if (NumReducedVals < 4) return false; -// FIXME: Fast-math-flags should be set based on the instructions in the -//reduction (not all of 'fast' are required). +// Intersect the fast-math-flags from all reduction operations. +FastMathFlags RdxFMF; +RdxFMF.set(); +for (ReductionOpsType &RdxOp : ReductionOps) { + for (Value *RdxVal : RdxOp) { +if (auto *FPMO = dyn_cast(RdxVal)) + RdxFMF &= FPMO->getFastMathFlags(); + } +} + IRBuilder<> Builder(cast(ReductionRoot)); -FastMathFlags Unsafe; -Unsafe.setFast(); -Builder.setFastMathFlags(Unsafe); +Builder.setFastMathFlags(RdxFMF); BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; // The same extra argument may be used several times, so log each attempt @@ -7071,9 +7077,6 @@ class HorizontalReduction { assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); -// FIXME: The builder should use an FMF guard. It should not be hard-coded -//to 'fast'. -assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF"); return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind, ReductionOps.back()); } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 38d36c676fa7..03ec04cb8cbe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1766,7 +1766,6 @@ bb.1: ret void } -; FIXME: This is a miscompile. ; The FMF on the reduction should match the incoming insts. define float @fadd_v4f32_fmf(float* %p) { @@ -1776,7 +1775,7 @@ define float @fadd_v4f32_fmf(float* %p) { ; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* ; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT:[[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) ; CHECK-NEXT:ret float [[TMP3]] ; ; STORE-LABEL: @fadd_v4f32_fmf( @@ -1785,7 +1784,7 @@ define float @fadd_v4f32_fmf(float* %p) { ; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 ; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* ; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; STORE-NEXT:[[TMP3:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) ; STORE-NEXT:ret float [[TMP3]] ; %p1 = getelementptr inbounds float, float* %p, i64 1 @@ -1801,6 +1800,10 @@ define float @fadd_v4f32_fmf(float* %p) { ret float %add3 } +; The minimal FMF for fadd reduction are "reassoc nsz". +; Only the common FMF of all operations in the reduction propagate to the result. +; In this example, "contract nnan arcp" are dropped, but "ninf" transfers with the required flags. + define float @fadd_v4f32_fmf_intersect(float* %p) { ; CHECK-LABEL: @fadd_v4f32_fmf_intersect( ; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 @@ -1808,7 +1811,7 @@ define float @fadd_v4f32_fmf_intersect(float* %p) { ; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* ; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT:[[TMP3:%.*]] = cal
[llvm-branch-commits] [llvm] 77adbe6 - [SLP] fix fast-math requirements for fmin/fmax reductions
Author: Sanjay Patel Date: 2021-01-24T08:55:56-05:00 New Revision: 77adbe6a8c716bead04393560ec5aa88877ac1d2 URL: https://github.com/llvm/llvm-project/commit/77adbe6a8c716bead04393560ec5aa88877ac1d2 DIFF: https://github.com/llvm/llvm-project/commit/77adbe6a8c716bead04393560ec5aa88877ac1d2.diff LOG: [SLP] fix fast-math requirements for fmin/fmax reductions a6f0221276 enabled intersection of FMF on reduction instructions, so it is safe to ease the check here. There is still some room to improve here - it looks like we have nearly duplicate flags propagation logic inside of the LoopUtils helper but it is limited targets that do not form reduction intrinsics (they form the shuffle expansion). Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c5cfc9e77d8a..7114b4d412fd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6422,9 +6422,7 @@ class HorizontalReduction { // FP min/max are associative except for NaN and -0.0. We do not // have to rule out -0.0 here because the intrinsic semantics do not // specify a fixed result for it. - // TODO: This is artificially restricted to fast because the code that - // creates reductions assumes/produces fast ops. - return I->getFastMathFlags().isFast(); + return I->getFastMathFlags().noNaNs(); } return I->isAssociative(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll index fc134aa6deef..8136f2cb2dfe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) { ret float %m3 } -; TODO: This should become a reduce intrinsic. - define float @reduction_v4f32_nnan(float* %p) { ; CHECK-LABEL: @reduction_v4f32_nnan( ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 ; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 ; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 -; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 -; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 -; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 -; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]]) -; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]]) -; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]]) -; CHECK-NEXT:ret float [[M3]] +; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]]) +; CHECK-NEXT:ret float [[TMP3]] ; %g1 = getelementptr inbounds float, float* %p, i64 1 %g2 = getelementptr inbounds float, float* %p, i64 2 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll index e5a4fc235748..470dc8290eee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll @@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) { ret float %m3 } -; TODO: This should become a reduce intrinsic. - define float @reduction_v4f32_nnan(float* %p) { ; CHECK-LABEL: @reduction_v4f32_nnan( ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 ; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 ; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 -; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 -; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 -; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 -; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 -; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T1]], float [[T0]]) -; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T2]], float [[M1]]) -; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T3]], float [[M2]]) -; CHECK-NEXT:ret float [[M3]] +; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align
[llvm-branch-commits] [llvm] 07b60d0 - [InstCombine] add tests for min/max intrinsics with extended values; NFC
Author: Sanjay Patel Date: 2021-01-25T07:52:50-05:00 New Revision: 07b60d0060688dea121be36b46de859bafcec29b URL: https://github.com/llvm/llvm-project/commit/07b60d0060688dea121be36b46de859bafcec29b DIFF: https://github.com/llvm/llvm-project/commit/07b60d0060688dea121be36b46de859bafcec29b.diff LOG: [InstCombine] add tests for min/max intrinsics with extended values; NFC Added: Modified: llvm/test/Transforms/InstCombine/minmax-intrinsics.ll Removed: diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 797f85d94447..bccfac81bdce 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -5,6 +5,8 @@ declare i8 @llvm.umin.i8(i8, i8) declare i8 @llvm.umax.i8(i8, i8) declare i8 @llvm.smin.i8(i8, i8) declare i8 @llvm.smax.i8(i8, i8) +declare <3 x i8> @llvm.umin.v3i8(<3 x i8>, <3 x i8>) +declare void @use(i8) define i8 @umin_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_known_bits( @@ -45,3 +47,154 @@ define i8 @smax_known_bits(i8 %x, i8 %y) { %r = and i8 %m, -128 ret i8 %r } + +define i8 @smax_sext(i5 %x, i5 %y) { +; CHECK-LABEL: @smax_sext( +; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %sx = sext i5 %x to i8 + %sy = sext i5 %y to i8 + %m = call i8 @llvm.smax.i8(i8 %sx, i8 %sy) + ret i8 %m +} + +define i8 @smin_sext(i5 %x, i5 %y) { +; CHECK-LABEL: @smin_sext( +; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:call void @use(i8 [[SY]]) +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %sx = sext i5 %x to i8 + %sy = sext i5 %y to i8 + call void @use(i8 %sy) + %m = call i8 @llvm.smin.i8(i8 %sx, i8 %sy) + ret i8 %m +} + +define i8 @umax_sext(i5 %x, i5 %y) { +; CHECK-LABEL: @umax_sext( +; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 +; CHECK-NEXT:call void @use(i8 [[SX]]) +; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %sx = sext i5 %x to i8 + call void @use(i8 %sx) + %sy = sext i5 %y to i8 + %m = call i8 @llvm.umax.i8(i8 %sx, i8 %sy) + ret i8 %m +} + +define <3 x i8> @umin_sext(<3 x i5> %x, <3 x i5> %y) { +; CHECK-LABEL: @umin_sext( +; CHECK-NEXT:[[SX:%.*]] = sext <3 x i5> [[X:%.*]] to <3 x i8> +; CHECK-NEXT:[[SY:%.*]] = sext <3 x i5> [[Y:%.*]] to <3 x i8> +; CHECK-NEXT:[[M:%.*]] = call <3 x i8> @llvm.umin.v3i8(<3 x i8> [[SX]], <3 x i8> [[SY]]) +; CHECK-NEXT:ret <3 x i8> [[M]] +; + %sx = sext <3 x i5> %x to <3 x i8> + %sy = sext <3 x i5> %y to <3 x i8> + %m = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sx, <3 x i8> %sy) + ret <3 x i8> %m +} + +define i8 @smax_zext(i5 %x, i5 %y) { +; CHECK-LABEL: @smax_zext( +; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[ZX]], i8 [[ZY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %zx = zext i5 %x to i8 + %zy = zext i5 %y to i8 + %m = call i8 @llvm.smax.i8(i8 %zx, i8 %zy) + ret i8 %m +} + +define i8 @smin_zext(i5 %x, i5 %y) { +; CHECK-LABEL: @smin_zext( +; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[ZX]], i8 [[ZY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %zx = zext i5 %x to i8 + %zy = zext i5 %y to i8 + %m = call i8 @llvm.smin.i8(i8 %zx, i8 %zy) + ret i8 %m +} + +define i8 @umax_zext(i5 %x, i5 %y) { +; CHECK-LABEL: @umax_zext( +; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[ZX]], i8 [[ZY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %zx = zext i5 %x to i8 + %zy = zext i5 %y to i8 + %m = call i8 @llvm.umax.i8(i8 %zx, i8 %zy) + ret i8 %m +} + +define i8 @umin_zext(i5 %x, i5 %y) { +; CHECK-LABEL: @umin_zext( +; CHECK-NEXT:[[ZX:%.*]] = zext i5 [[X:%.*]] to i8 +; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umin.i8(i8 [[ZX]], i8 [[ZY]]) +; CHECK-NEXT:ret i8 [[M]] +; + %zx = zext i5 %x to i8 + %zy = zext i5 %y to i8 + %m = call i8 @llvm.umin.i8(i8 %zx, i8 %zy) + ret i8 %m +} + +define i8 @umin_zext_types(i6 %x, i5 %y) { +; CHECK-LABEL: @umin_zext_types( +; CHECK-NEXT:[[ZX:%.*]] = zext i6 [[X:%.*]] to i8 +; CHECK-NEXT:[[ZY:%.*]] = zext i5 [[Y:%.*]] to i8 +; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umin.i8(i8 [[ZX]], i8 [[ZY]]) +; CHECK-NEXT:re
[llvm-branch-commits] [llvm] 09a136b - [InstCombine] narrow min/max intrinsics with extended inputs
Author: Sanjay Patel Date: 2021-01-25T07:52:50-05:00 New Revision: 09a136bcc6947128df86492d88f1733bdff745d1 URL: https://github.com/llvm/llvm-project/commit/09a136bcc6947128df86492d88f1733bdff745d1 DIFF: https://github.com/llvm/llvm-project/commit/09a136bcc6947128df86492d88f1733bdff745d1.diff LOG: [InstCombine] narrow min/max intrinsics with extended inputs We can sink extends after min/max if they match and would not change the sign-interpreted compare. The only combo that doesn't work is zext+smin/smax because the zexts could change a negative number into positive: https://alive2.llvm.org/ce/z/D6sz6J Sext+umax/umin works: define i32 @src(i8 %x, i8 %y) { %0: %sx = sext i8 %x to i32 %sy = sext i8 %y to i32 %m = umax i32 %sx, %sy ret i32 %m } => define i32 @tgt(i8 %x, i8 %y) { %0: %m = umax i8 %x, %y %r = sext i8 %m to i32 ret i32 %r } Transformation seems to be correct! Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp llvm/test/Transforms/InstCombine/minmax-intrinsics.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ba51d255109..0b4246feecee 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -830,6 +830,30 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; } + case Intrinsic::umax: + case Intrinsic::umin: { +Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); +Value *X, *Y; +if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) && +(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { + Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); + return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType()); +} +// If both operands of unsigned min/max are sign-extended, it is still ok +// to narrow the operation. +LLVM_FALLTHROUGH; + } + case Intrinsic::smax: + case Intrinsic::smin: { +Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1); +Value *X, *Y; +if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) && +(I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) { + Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y); + return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType()); +} +break; + } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index bccfac81bdce..97ed799f32a8 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -50,9 +50,8 @@ define i8 @smax_known_bits(i8 %x, i8 %y) { define i8 @smax_sext(i5 %x, i5 %y) { ; CHECK-LABEL: @smax_sext( -; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 -; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 -; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smax.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.smax.i5(i5 [[X:%.*]], i5 [[Y:%.*]]) +; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8 ; CHECK-NEXT:ret i8 [[M]] ; %sx = sext i5 %x to i8 @@ -61,12 +60,14 @@ define i8 @smax_sext(i5 %x, i5 %y) { ret i8 %m } +; Extra use is ok. + define i8 @smin_sext(i5 %x, i5 %y) { ; CHECK-LABEL: @smin_sext( -; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 ; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 ; CHECK-NEXT:call void @use(i8 [[SY]]) -; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.smin.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.smin.i5(i5 [[X:%.*]], i5 [[Y]]) +; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8 ; CHECK-NEXT:ret i8 [[M]] ; %sx = sext i5 %x to i8 @@ -76,12 +77,14 @@ define i8 @smin_sext(i5 %x, i5 %y) { ret i8 %m } +; Sext doesn't change unsigned min/max comparison of narrow values. + define i8 @umax_sext(i5 %x, i5 %y) { ; CHECK-LABEL: @umax_sext( ; CHECK-NEXT:[[SX:%.*]] = sext i5 [[X:%.*]] to i8 ; CHECK-NEXT:call void @use(i8 [[SX]]) -; CHECK-NEXT:[[SY:%.*]] = sext i5 [[Y:%.*]] to i8 -; CHECK-NEXT:[[M:%.*]] = call i8 @llvm.umax.i8(i8 [[SX]], i8 [[SY]]) +; CHECK-NEXT:[[TMP1:%.*]] = call i5 @llvm.umax.i5(i5 [[X]], i5 [[Y:%.*]]) +; CHECK-NEXT:[[M:%.*]] = sext i5 [[TMP1]] to i8 ; CHECK-NEXT:ret i8 [[M]] ; %sx = sext i5 %x to i8 @@ -93,9 +96,8 @@ define i8 @umax_sext(i5 %x, i5 %y) { define <3 x i8> @umin_sext(<3 x i5> %x, <3 x i5> %y) { ; CHECK-LABEL: @umin_sext( -; CHECK-NEXT:[[SX:%.*]] = sext <3 x i5> [[X:%.*]] to <3 x i8> -; CHECK-NEXT:[[SY:%.*]] = sext <3 x i5> [[Y:%.*]] to <3 x
[llvm-branch-commits] [llvm] 46507a9 - [SLP] reduce code duplication while matching reductions; NFC
Author: Sanjay Patel Date: 2021-01-12T16:03:57-05:00 New Revision: 46507a96fc13146f73e5915a008055c5a59191c2 URL: https://github.com/llvm/llvm-project/commit/46507a96fc13146f73e5915a008055c5a59191c2 DIFF: https://github.com/llvm/llvm-project/commit/46507a96fc13146f73e5915a008055c5a59191c2.diff LOG: [SLP] reduce code duplication while matching reductions; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bd673d112b3a..ff22572782e2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6857,49 +6857,48 @@ class HorizontalReduction { // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVisit); - if (NextV != Phi) { -auto *I = dyn_cast(NextV); -OpData = getOperationData(I); -// Continue analysis if the next operand is a reduction operation or -// (possibly) a reduced value. If the reduced value opcode is not set, -// the first met operation != reduction operation is considered as the -// reduced value class. -const bool IsRdxInst = OpData == RdxTreeInst; -if (I && (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { - // Only handle trees in the current basic block. - if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { -// I is an extra argument for TreeN (its parent operation). -markExtraArg(Stack.back(), I); -continue; - } + auto *I = dyn_cast(NextV); + OpData = getOperationData(I); + // Continue analysis if the next operand is a reduction operation or + // (possibly) a reduced value. If the reduced value opcode is not set, + // the first met operation != reduction operation is considered as the + // reduced value class. + const bool IsRdxInst = OpData == RdxTreeInst; + if (I && I != Phi && + (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { +// Only handle trees in the current basic block. +if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; +} - // Each tree node needs to have minimal number of users except for the - // ultimate reduction. - if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { -// I is an extra argument for TreeN (its parent operation). -markExtraArg(Stack.back(), I); -continue; - } +// Each tree node needs to have minimal number of users except for the +// ultimate reduction. +if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; +} - if (IsRdxInst) { -// We need to be able to reassociate the reduction operations. -if (!OpData.isAssociative(I)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; -} - } else if (RdxLeafVal && RdxLeafVal != OpData) { -// Make sure that the opcodes of the operations that we are going to -// reduce match. +if (IsRdxInst) { + // We need to be able to reassociate the reduction operations. + if (!OpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; - } else if (!RdxLeafVal) { -RdxLeafVal = OpData; } - Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); +} else if (RdxLeafVal && RdxLeafVal != OpData) { + // Make sure that the opcodes of the operations that we are going to + // reduce match. + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); continue; +} else if (!RdxLeafVal) { + RdxLeafVal = OpData; } +Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); +continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 554be30 - [SLP] reduce code duplication in processing reductions; NFC
Author: Sanjay Patel Date: 2021-01-12T16:03:57-05:00 New Revision: 554be30a42802d66807f93e4671a518c1c04e0f8 URL: https://github.com/llvm/llvm-project/commit/554be30a42802d66807f93e4671a518c1c04e0f8 DIFF: https://github.com/llvm/llvm-project/commit/554be30a42802d66807f93e4671a518c1c04e0f8.diff LOG: [SLP] reduce code duplication in processing reductions; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ff22572782e2..04bdc74c7879 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6867,38 +6867,29 @@ class HorizontalReduction { if (I && I != Phi && (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { // Only handle trees in the current basic block. -if (!RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; -} - // Each tree node needs to have minimal number of users except for the // ultimate reduction. -if (!RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; -} - -if (IsRdxInst) { - // We need to be able to reassociate the reduction operations. - if (!OpData.isAssociative(I)) { +if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && +RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { + if (IsRdxInst) { +// We need to be able to reassociate the reduction operations. +if (!OpData.isAssociative(I)) { + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); + continue; +} + } else if (RdxLeafVal && RdxLeafVal != OpData) { +// Make sure that the opcodes of the operations that we are going to +// reduce match. // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; + } else if (!RdxLeafVal) { +RdxLeafVal = OpData; } -} else if (RdxLeafVal && RdxLeafVal != OpData) { - // Make sure that the opcodes of the operations that we are going to - // reduce match. - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); + Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); continue; -} else if (!RdxLeafVal) { - RdxLeafVal = OpData; } -Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); -continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 92fb5c4 - [SLP] rename variable to improve readability; NFC
Author: Sanjay Patel Date: 2021-01-12T16:03:57-05:00 New Revision: 92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5 URL: https://github.com/llvm/llvm-project/commit/92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5 DIFF: https://github.com/llvm/llvm-project/commit/92fb5c49e8aa53ac97fa2fb1a891a4d7ccfd75c5.diff LOG: [SLP] rename variable to improve readability; NFC The OperationData in the 2nd block (visiting the operands) is completely independent of the 1st block. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 04bdc74c7879..1ef762c9dfa7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6826,7 +6826,7 @@ class HorizontalReduction { while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; unsigned EdgeToVisit = Stack.back().second++; - OperationData OpData = getOperationData(TreeN); + const OperationData OpData = getOperationData(TreeN); bool IsReducedValue = OpData != RdxTreeInst; // Postorder vist. @@ -6858,14 +6858,14 @@ class HorizontalReduction { // Visit left or right. Value *NextV = TreeN->getOperand(EdgeToVisit); auto *I = dyn_cast(NextV); - OpData = getOperationData(I); + const OperationData EdgeOpData = getOperationData(I); // Continue analysis if the next operand is a reduction operation or // (possibly) a reduced value. If the reduced value opcode is not set, // the first met operation != reduction operation is considered as the // reduced value class. - const bool IsRdxInst = OpData == RdxTreeInst; + const bool IsRdxInst = EdgeOpData == RdxTreeInst; if (I && I != Phi && - (!RdxLeafVal || OpData == RdxLeafVal || IsRdxInst)) { + (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) { // Only handle trees in the current basic block. // Each tree node needs to have minimal number of users except for the // ultimate reduction. @@ -6873,21 +6873,21 @@ class HorizontalReduction { RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { if (IsRdxInst) { // We need to be able to reassociate the reduction operations. -if (!OpData.isAssociative(I)) { +if (!EdgeOpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; } - } else if (RdxLeafVal && RdxLeafVal != OpData) { + } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { // Make sure that the opcodes of the operations that we are going to // reduce match. // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; } else if (!RdxLeafVal) { -RdxLeafVal = OpData; +RdxLeafVal = EdgeOpData; } - Stack.push_back(std::make_pair(I, OpData.getFirstOperandIndex())); + Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); continue; } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 9e7895a - [SLP] reduce code duplication while processing reductions; NFC
Author: Sanjay Patel Date: 2021-01-12T16:03:57-05:00 New Revision: 9e7895a8682ce3ad98c006955d573d5f2fded4f6 URL: https://github.com/llvm/llvm-project/commit/9e7895a8682ce3ad98c006955d573d5f2fded4f6 DIFF: https://github.com/llvm/llvm-project/commit/9e7895a8682ce3ad98c006955d573d5f2fded4f6.diff LOG: [SLP] reduce code duplication while processing reductions; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1ef762c9dfa7..403170447f5a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6863,33 +6863,32 @@ class HorizontalReduction { // (possibly) a reduced value. If the reduced value opcode is not set, // the first met operation != reduction operation is considered as the // reduced value class. + // Only handle trees in the current basic block. + // Each tree node needs to have minimal number of users except for the + // ultimate reduction. const bool IsRdxInst = EdgeOpData == RdxTreeInst; - if (I && I != Phi && + if (I && I != Phi && I != B && + RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && + RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) { -// Only handle trees in the current basic block. -// Each tree node needs to have minimal number of users except for the -// ultimate reduction. -if (RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && -RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && I != B) { - if (IsRdxInst) { -// We need to be able to reassociate the reduction operations. -if (!EdgeOpData.isAssociative(I)) { - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; -} - } else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { -// Make sure that the opcodes of the operations that we are going to -// reduce match. +if (IsRdxInst) { + // We need to be able to reassociate the reduction operations. + if (!EdgeOpData.isAssociative(I)) { // I is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), I); continue; - } else if (!RdxLeafVal) { -RdxLeafVal = EdgeOpData; } - Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); +} else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { + // Make sure that the opcodes of the operations that we are going to + // reduce match. + // I is an extra argument for TreeN (its parent operation). + markExtraArg(Stack.back(), I); continue; +} else if (!RdxLeafVal) { + RdxLeafVal = EdgeOpData; } +Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); +continue; } // NextV is an extra argument for TreeN (its parent operation). markExtraArg(Stack.back(), NextV); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e433ca2 - [SLP] add reduction test for FMF; NFC
Author: Sanjay Patel Date: 2021-01-13T11:43:51-05:00 New Revision: e433ca28ec923929efe4f6babb8d33b4e6673ac1 URL: https://github.com/llvm/llvm-project/commit/e433ca28ec923929efe4f6babb8d33b4e6673ac1 DIFF: https://github.com/llvm/llvm-project/commit/e433ca28ec923929efe4f6babb8d33b4e6673ac1.diff LOG: [SLP] add reduction test for FMF; NFC Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll Removed: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index faa4a186e6c4..33b4f7f706fe 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -1197,6 +1197,58 @@ define float @extra_args_no_replace(float* nocapture readonly %x, i32 %a, i32 %b ret float %add4.6 } +define float @extra_args_no_fast(float* %x, float %a, float %b) { +; CHECK-LABEL: @extra_args_no_fast( +; CHECK-NEXT:[[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.00e+00 +; CHECK-NEXT:[[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] +; CHECK-NEXT:[[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 +; CHECK-NEXT:[[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 +; CHECK-NEXT:[[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 +; CHECK-NEXT:[[T0:%.*]] = load float, float* [[X]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 +; CHECK-NEXT:[[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] +; CHECK-NEXT:[[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] +; CHECK-NEXT:[[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] +; CHECK-NEXT:[[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] +; CHECK-NEXT:[[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] +; CHECK-NEXT:ret float [[ADD5]] +; +; THRESHOLD-LABEL: @extra_args_no_fast( +; THRESHOLD-NEXT:[[ADDC:%.*]] = fadd fast float [[B:%.*]], 3.00e+00 +; THRESHOLD-NEXT:[[ADD:%.*]] = fadd fast float [[A:%.*]], [[ADDC]] +; THRESHOLD-NEXT:[[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i64 1 +; THRESHOLD-NEXT:[[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 +; THRESHOLD-NEXT:[[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 +; THRESHOLD-NEXT:[[T0:%.*]] = load float, float* [[X]], align 4 +; THRESHOLD-NEXT:[[T1:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +; THRESHOLD-NEXT:[[T2:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 +; THRESHOLD-NEXT:[[T3:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 +; THRESHOLD-NEXT:[[ADD1:%.*]] = fadd fast float [[T0]], [[ADD]] +; THRESHOLD-NEXT:[[ADD4:%.*]] = fadd fast float [[T1]], [[ADD1]] +; THRESHOLD-NEXT:[[ADD4_1:%.*]] = fadd float [[T2]], [[ADD4]] +; THRESHOLD-NEXT:[[ADD4_2:%.*]] = fadd fast float [[T3]], [[ADD4_1]] +; THRESHOLD-NEXT:[[ADD5:%.*]] = fadd fast float [[ADD4_2]], [[A]] +; THRESHOLD-NEXT:ret float [[ADD5]] +; + %addc = fadd fast float %b, 3.0 + %add = fadd fast float %a, %addc + %arrayidx3 = getelementptr inbounds float, float* %x, i64 1 + %arrayidx3.1 = getelementptr inbounds float, float* %x, i64 2 + %arrayidx3.2 = getelementptr inbounds float, float* %x, i64 3 + %t0 = load float, float* %x, align 4 + %t1 = load float, float* %arrayidx3, align 4 + %t2 = load float, float* %arrayidx3.1, align 4 + %t3 = load float, float* %arrayidx3.2, align 4 + %add1 = fadd fast float %t0, %add + %add4 = fadd fast float %t1, %add1 + %add4.1 = fadd float %t2, %add4 ; this is not a reduction candidate + %add4.2 = fadd fast float %t3, %add4.1 + %add5 = fadd fast float %add4.2, %a + ret float %add5 +} + define i32 @wobble(i32 %arg, i32 %bar) { ; CHECK-LABEL: @wobble( ; CHECK-NEXT: bb: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 123674a - [SLP] simplify type check for reductions
Author: Sanjay Patel Date: 2021-01-13T13:30:46-05:00 New Revision: 123674a816742254debdfcc978026b8107b502d8 URL: https://github.com/llvm/llvm-project/commit/123674a816742254debdfcc978026b8107b502d8 DIFF: https://github.com/llvm/llvm-project/commit/123674a816742254debdfcc978026b8107b502d8.diff LOG: [SLP] simplify type check for reductions This is NFC-intended. The 'valid' call allows int/FP/pointers for other parts of SLP. The difference here is that we can't reduce pointers. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 403170447f5a..b3a3d65d3340 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6809,10 +6809,10 @@ class HorizontalReduction { if (!RdxTreeInst.isVectorizable(B)) return false; +// Analyze "regular" integer/FP types for reductions - no target-specific +// types or pointers. Type *Ty = B->getType(); -if (!isValidElementType(Ty)) - return false; -if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy()) +if (!isValidElementType(Ty) || Ty->isPointerTy()) return false; RdxLeafVal.clear(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b21905d - [SLP] remove unnecessary state in matching reductions
Author: Sanjay Patel Date: 2021-01-14T18:32:37-05:00 New Revision: b21905dfe3797289791443661540b72cb43dfdf3 URL: https://github.com/llvm/llvm-project/commit/b21905dfe3797289791443661540b72cb43dfdf3 DIFF: https://github.com/llvm/llvm-project/commit/b21905dfe3797289791443661540b72cb43dfdf3.diff LOG: [SLP] remove unnecessary state in matching reductions This is NFC-intended. I'm still trying to figure out how the loop where this is used works. It does not seem like we require this data at all, but it's hard to confirm given the complicated predicates. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0f3f74b63860..3f1279b67519 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6679,9 +6679,6 @@ class HorizontalReduction { /// The operation data of the reduction operation. OperationData RdxTreeInst; - /// The operation data for the leaf values that we perform a reduction on. - OperationData RdxLeafVal; - /// Checks if the ParentStackElem.first should be marked as a reduction /// operation with an extra argument or as extra argument itself. void markExtraArg(std::pair &ParentStackElem, @@ -6825,9 +6822,11 @@ class HorizontalReduction { if (!isValidElementType(Ty) || Ty->isPointerTy()) return false; -RdxLeafVal.clear(); ReductionRoot = B; +// The operation data for the leaf values that we perform a reduction on. +OperationData RdxLeafVal; + // Post order traverse the reduction tree starting at B. We only handle true // trees containing only binary operators. SmallVector, 32> Stack; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 1f21de5 - [SLP] remove unused reduction functions; NFC
Author: Sanjay Patel Date: 2021-01-15T14:59:33-05:00 New Revision: 1f21de535d37997c41b9b1ecb2f7ca0e472e9f77 URL: https://github.com/llvm/llvm-project/commit/1f21de535d37997c41b9b1ecb2f7ca0e472e9f77 DIFF: https://github.com/llvm/llvm-project/commit/1f21de535d37997c41b9b1ecb2f7ca0e472e9f77.diff LOG: [SLP] remove unused reduction functions; NFC These were made obsolete by simplifying the code in recent patches. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3f1279b67519..e1befc449492 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6615,16 +6615,6 @@ class HorizontalReduction { return Kind == OD.Kind && Opcode == OD.Opcode; } bool operator!=(const OperationData &OD) const { return !(*this == OD); } -void clear() { - Opcode = 0; - Kind = RecurKind::None; -} - -/// Get the opcode of the reduction operation. -unsigned getOpcode() const { - assert(isVectorizable() && "Expected vectorizable operation."); - return Opcode; -} /// Get kind of reduction data. RecurKind getKind() const { return Kind; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ceb3cdc - [SLP] remove dead code in reduction matching; NFC
Author: Sanjay Patel Date: 2021-01-15T17:03:26-05:00 New Revision: ceb3cdccd0fb597659147e0f538fdee91414541e URL: https://github.com/llvm/llvm-project/commit/ceb3cdccd0fb597659147e0f538fdee91414541e DIFF: https://github.com/llvm/llvm-project/commit/ceb3cdccd0fb597659147e0f538fdee91414541e.diff LOG: [SLP] remove dead code in reduction matching; NFC To get into this block we had: !A || B || C and we checked C in the first 'if' clause leaving !A || B. But the 2nd 'if' is checking: A && !B --> !(!A || B) Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e1befc449492..cf7c05e30d06 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6877,12 +6877,6 @@ class HorizontalReduction { markExtraArg(Stack.back(), I); continue; } -} else if (RdxLeafVal && RdxLeafVal != EdgeOpData) { - // Make sure that the opcodes of the operations that we are going to - // reduce match. - // I is an extra argument for TreeN (its parent operation). - markExtraArg(Stack.back(), I); - continue; } else if (!RdxLeafVal) { RdxLeafVal = EdgeOpData; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 48dbac5 - [SLP] remove unnecessary use of 'OperationData'
Author: Sanjay Patel Date: 2021-01-16T13:55:52-05:00 New Revision: 48dbac5b6b0bc7a03e9af42cb99176abba8d0467 URL: https://github.com/llvm/llvm-project/commit/48dbac5b6b0bc7a03e9af42cb99176abba8d0467 DIFF: https://github.com/llvm/llvm-project/commit/48dbac5b6b0bc7a03e9af42cb99176abba8d0467.diff LOG: [SLP] remove unnecessary use of 'OperationData' This is another NFC-intended patch to allow matching intrinsics (example: maxnum) as candidates for reductions. It's possible that the loop/if logic can be reduced now, but it's still difficult to understand how this all works. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cf7c05e30d06..d5e6dfed8e2c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6814,8 +6814,11 @@ class HorizontalReduction { ReductionRoot = B; -// The operation data for the leaf values that we perform a reduction on. -OperationData RdxLeafVal; +// The opcode for leaf values that we perform a reduction on. +// For example: load(x) + load(y) + load(z) + fptoui(w) +// The leaf opcode for 'w' does not match, so we don't include it as a +// potential candidate for the reduction. +unsigned LeafOpcode = 0; // Post order traverse the reduction tree starting at B. We only handle true // trees containing only binary operators. @@ -6859,9 +6862,9 @@ class HorizontalReduction { auto *I = dyn_cast(NextV); const OperationData EdgeOpData = getOperationData(I); // Continue analysis if the next operand is a reduction operation or - // (possibly) a reduced value. If the reduced value opcode is not set, + // (possibly) a leaf value. If the leaf value opcode is not set, // the first met operation != reduction operation is considered as the - // reduced value class. + // leaf opcode. // Only handle trees in the current basic block. // Each tree node needs to have minimal number of users except for the // ultimate reduction. @@ -6869,7 +6872,7 @@ class HorizontalReduction { if (I && I != Phi && I != B && RdxTreeInst.hasSameParent(I, B->getParent(), IsRdxInst) && RdxTreeInst.hasRequiredNumberOfUses(I, IsRdxInst) && - (!RdxLeafVal || EdgeOpData == RdxLeafVal || IsRdxInst)) { + (!LeafOpcode || LeafOpcode == I->getOpcode() || IsRdxInst)) { if (IsRdxInst) { // We need to be able to reassociate the reduction operations. if (!EdgeOpData.isAssociative(I)) { @@ -6877,8 +6880,8 @@ class HorizontalReduction { markExtraArg(Stack.back(), I); continue; } -} else if (!RdxLeafVal) { - RdxLeafVal = EdgeOpData; +} else if (!LeafOpcode) { + LeafOpcode = I->getOpcode(); } Stack.push_back(std::make_pair(I, EdgeOpData.getFirstOperandIndex())); continue; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] fcfcc3c - [SLP] fix typos; NFC
Author: Sanjay Patel Date: 2021-01-16T13:55:52-05:00 New Revision: fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e URL: https://github.com/llvm/llvm-project/commit/fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e DIFF: https://github.com/llvm/llvm-project/commit/fcfcc3cc6b16e4fd7d7d2d07937634cca360b46e.diff LOG: [SLP] fix typos; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d5e6dfed8e2c..a8d8ef5024d7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6784,7 +6784,7 @@ class HorizontalReduction { /// Try to find a reduction tree. bool matchAssociativeReduction(PHINode *Phi, Instruction *B) { assert((!Phi || is_contained(Phi->operands(), B)) && - "Thi phi needs to use the binary operator"); + "Phi needs to use the binary operator"); RdxTreeInst = getOperationData(B); @@ -6831,7 +6831,7 @@ class HorizontalReduction { const OperationData OpData = getOperationData(TreeN); bool IsReducedValue = OpData != RdxTreeInst; - // Postorder vist. + // Postorder visit. if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) { if (IsReducedValue) ReducedVals.push_back(TreeN); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 49b96cd - [SLP] remove opcode field from reduction data class
Author: Sanjay Patel Date: 2021-01-16T13:55:52-05:00 New Revision: 49b96cd9ef2f81d193641796b8a85781292faf7a URL: https://github.com/llvm/llvm-project/commit/49b96cd9ef2f81d193641796b8a85781292faf7a DIFF: https://github.com/llvm/llvm-project/commit/49b96cd9ef2f81d193641796b8a85781292faf7a.diff LOG: [SLP] remove opcode field from reduction data class This is NFC-intended and another step towards supporting intrinsics as reduction candidates. The remaining bits of the OperationData class do not make much sense as-is, so I will try to improve that, but I'm trying to take minimal steps because it's still not clear how this was intended to work. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a8d8ef5024d7..8dd318a880fc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6430,40 +6430,15 @@ class HorizontalReduction { // Use map vector to make stable output. MapVector ExtraArgs; - /// Contains info about operation, like its opcode, left and right operands. + /// This wraps functionality around a RecurKind (reduction kind). + /// TODO: Remove this class if callers can use the 'Kind' value directly? class OperationData { -/// Opcode of the instruction. -unsigned Opcode = 0; - /// Kind of the reduction operation. RecurKind Kind = RecurKind::None; +bool IsLeafValue = false; /// Checks if the reduction operation can be vectorized. -bool isVectorizable() const { - switch (Kind) { - case RecurKind::Add: -return Opcode == Instruction::Add; - case RecurKind::Mul: -return Opcode == Instruction::Mul; - case RecurKind::Or: -return Opcode == Instruction::Or; - case RecurKind::And: -return Opcode == Instruction::And; - case RecurKind::Xor: -return Opcode == Instruction::Xor; - case RecurKind::FAdd: -return Opcode == Instruction::FAdd; - case RecurKind::FMul: -return Opcode == Instruction::FMul; - case RecurKind::SMax: - case RecurKind::SMin: - case RecurKind::UMax: - case RecurKind::UMin: -return Opcode == Instruction::ICmp; - default: -return false; - } -} +bool isVectorizable() const { return Kind != RecurKind::None; } /// Creates reduction operation with the current opcode. Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, @@ -6505,19 +6480,17 @@ class HorizontalReduction { public: explicit OperationData() = default; -/// Construction for reduced values. They are identified by opcode only and -/// don't have associated LHS/RHS values. -explicit OperationData(Instruction &I) { - Opcode = I.getOpcode(); -} +/// Constructor for reduced values. They are identified by the bool only. +explicit OperationData(Instruction &I) { IsLeafValue = true; } /// Constructor for reduction operations with opcode and type. -OperationData(unsigned Opcode, RecurKind RdxKind) -: Opcode(Opcode), Kind(RdxKind) { +OperationData(RecurKind RdxKind) : Kind(RdxKind) { assert(Kind != RecurKind::None && "Expected reduction operation."); } -explicit operator bool() const { return Opcode; } +explicit operator bool() const { + return IsLeafValue || Kind != RecurKind::None; +} /// Return true if this operation is any kind of minimum or maximum. bool isMinMax() const { @@ -6580,8 +6553,7 @@ class HorizontalReduction { /// Add all reduction operations for the reduction instruction \p I. void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) { - assert(Kind != RecurKind::None && !!*this && - "Expected reduction operation."); + assert(Kind != RecurKind::None && "Expected reduction operation."); if (isMinMax()) { ReductionOps[0].emplace_back(cast(I)->getCondition()); ReductionOps[1].emplace_back(I); @@ -6592,13 +6564,10 @@ class HorizontalReduction { /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { - assert(Kind != RecurKind::None && *this && - "Expected reduction operation."); - if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) { -assert(Opcode == Instruction::ICmp && - "Only integer compare operation is expected."); + assert(Kind != RecurKind::None && "Expected reduction operation."); + if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) return true; - } + return I->isAssociative(); } @@ -6610,9 +6579,7 @@ class HorizontalReduction { /// Checks if two operation data are both a reduc
[llvm-branch-commits] [llvm] d1c4e85 - [SLP] reduce opcode API dependency in reduction cost calc; NFC
Author: Sanjay Patel Date: 2021-01-18T09:32:57-05:00 New Revision: d1c4e859ce42c35c61a0db2f1eb8a4209be4503d URL: https://github.com/llvm/llvm-project/commit/d1c4e859ce42c35c61a0db2f1eb8a4209be4503d DIFF: https://github.com/llvm/llvm-project/commit/d1c4e859ce42c35c61a0db2f1eb8a4209be4503d.diff LOG: [SLP] reduce opcode API dependency in reduction cost calc; NFC The icmp opcode is now hard-coded in the cost model call. This will make it easier to eventually remove all opcode queries for min/max patterns as we transition to intrinsics. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8dd318a880fc..bf8ef208ccf9 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7058,12 +7058,10 @@ class HorizontalReduction { int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal, unsigned ReduxWidth) { Type *ScalarTy = FirstReducedVal->getType(); -auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth); +FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth); RecurKind Kind = RdxTreeInst.getKind(); -unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); -int SplittingRdxCost; -int ScalarReduxCost; +int VectorCost, ScalarCost; switch (Kind) { case RecurKind::Add: case RecurKind::Mul: @@ -7071,22 +7069,24 @@ class HorizontalReduction { case RecurKind::And: case RecurKind::Xor: case RecurKind::FAdd: -case RecurKind::FMul: - SplittingRdxCost = TTI->getArithmeticReductionCost( - RdxOpcode, VecTy, /*IsPairwiseForm=*/false); - ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); +case RecurKind::FMul: { + unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); + VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, + /*IsPairwiseForm=*/false); + ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); break; +} case RecurKind::SMax: case RecurKind::SMin: case RecurKind::UMax: case RecurKind::UMin: { - auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); + auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy)); bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin; - SplittingRdxCost = - TTI->getMinMaxReductionCost(VecTy, VecCondTy, + VectorCost = + TTI->getMinMaxReductionCost(VectorTy, VecCondTy, /*IsPairwiseForm=*/false, IsUnsigned); - ScalarReduxCost = - TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) + + ScalarCost = + TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) + TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, CmpInst::makeCmpResultType(ScalarTy)); break; @@ -7095,12 +7095,12 @@ class HorizontalReduction { llvm_unreachable("Expected arithmetic or min/max reduction operation"); } -ScalarReduxCost *= (ReduxWidth - 1); -LLVM_DEBUG(dbgs() << "SLP: Adding cost " - << SplittingRdxCost - ScalarReduxCost +// Scalar cost is repeated for N-1 elements. +ScalarCost *= (ReduxWidth - 1); +LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost << " for reduction that starts with " << *FirstReducedVal << " (It is a splitting reduction)\n"); -return SplittingRdxCost - ScalarReduxCost; +return VectorCost - ScalarCost; } /// Emit a horizontal reduction of the vectorized value. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3dbbadb - [SLP] rename reduction query for min/max ops; NFC
Author: Sanjay Patel Date: 2021-01-18T09:32:57-05:00 New Revision: 3dbbadb8ef53d1e91785c17ccd70848de7e842e9 URL: https://github.com/llvm/llvm-project/commit/3dbbadb8ef53d1e91785c17ccd70848de7e842e9 DIFF: https://github.com/llvm/llvm-project/commit/3dbbadb8ef53d1e91785c17ccd70848de7e842e9.diff LOG: [SLP] rename reduction query for min/max ops; NFC This will avoid confusion once we start matching min/max intrinsics. All of these hacks to accomodate cmp+sel idioms should disappear once we canonicalize to min/max intrinsics. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bf8ef208ccf9..0323e02d0d2c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6492,8 +6492,8 @@ class HorizontalReduction { return IsLeafValue || Kind != RecurKind::None; } -/// Return true if this operation is any kind of minimum or maximum. -bool isMinMax() const { +/// Return true if this operation is a cmp+select idiom. +bool isCmpSel() const { assert(Kind != RecurKind::None && "Expected reduction operation."); return RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind); } @@ -6504,14 +6504,14 @@ class HorizontalReduction { // We allow calling this before 'Kind' is set, so handle that specially. if (Kind == RecurKind::None) return 0; - return isMinMax() ? 1 : 0; + return isCmpSel() ? 1 : 0; } /// Total number of operands in the reduction operation. unsigned getNumberOfOperands() const { assert(Kind != RecurKind::None && !!*this && "Expected reduction operation."); - return isMinMax() ? 3 : 2; + return isCmpSel() ? 3 : 2; } /// Checks if the instruction is in basic block \p BB. @@ -6519,7 +6519,7 @@ class HorizontalReduction { bool hasSameParent(Instruction *I, BasicBlock *BB, bool IsRedOp) const { assert(Kind != RecurKind::None && !!*this && "Expected reduction operation."); - if (IsRedOp && isMinMax()) { + if (IsRedOp && isCmpSel()) { auto *Cmp = cast(cast(I)->getCondition()); return I->getParent() == BB && Cmp && Cmp->getParent() == BB; } @@ -6532,7 +6532,7 @@ class HorizontalReduction { "Expected reduction operation."); // SelectInst must be used twice while the condition op must have single // use only. - if (isMinMax()) + if (isCmpSel()) return I->hasNUses(2) && (!IsReductionOp || cast(I)->getCondition()->hasOneUse()); @@ -6545,7 +6545,7 @@ class HorizontalReduction { void initReductionOps(ReductionOpsListType &ReductionOps) { assert(Kind != RecurKind::None && !!*this && "Expected reduction operation."); - if (isMinMax()) + if (isCmpSel()) ReductionOps.assign(2, ReductionOpsType()); else ReductionOps.assign(1, ReductionOpsType()); @@ -6554,7 +6554,7 @@ class HorizontalReduction { /// Add all reduction operations for the reduction instruction \p I. void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) { assert(Kind != RecurKind::None && "Expected reduction operation."); - if (isMinMax()) { + if (isCmpSel()) { ReductionOps[0].emplace_back(cast(I)->getCondition()); ReductionOps[1].emplace_back(I); } else { @@ -6988,10 +6988,10 @@ class HorizontalReduction { DebugLoc Loc = cast(ReducedVals[i])->getDebugLoc(); Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues); - // Emit a reduction. For min/max, the root is a select, but the insertion + // Emit a reduction. If the root is a select (min/max idiom), the insert // point is the compare condition of that select. Instruction *RdxRootInst = cast(ReductionRoot); - if (RdxTreeInst.isMinMax()) + if (RdxTreeInst.isCmpSel()) Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst)); else Builder.SetInsertPoint(RdxRootInst); @@ -7033,7 +7033,7 @@ class HorizontalReduction { // select, we also have to RAUW for the compare instruction feeding the // reduction root. That's because the original compare may have extra uses // besides the final select of the reduction. - if (RdxTreeInst.isMinMax()) { + if (RdxTreeInst.isCmpSel()) { if (auto *VecSelect = dyn_cast(VectorizedTree)) { Instruction *ScalarCmp = getCmpForMinMaxReduction(cast(ReductionRoot)); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-
[llvm-branch-commits] [llvm] ca7e270 - [SLP] add more FMF tests for fmax/fmin reductions; NFC
Author: Sanjay Patel Date: 2021-01-18T12:25:28-05:00 New Revision: ca7e27054c25c2bc6cf88879d73745699251412c URL: https://github.com/llvm/llvm-project/commit/ca7e27054c25c2bc6cf88879d73745699251412c DIFF: https://github.com/llvm/llvm-project/commit/ca7e27054c25c2bc6cf88879d73745699251412c.diff LOG: [SLP] add more FMF tests for fmax/fmin reductions; NFC Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll Removed: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll index 23f2196b2425..e2754862399e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -392,6 +392,33 @@ define float @reduction_v4f32_nnan(float* %p) { ret float %m3 } +define float @reduction_v4f32_not_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_not_fast( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT:[[M1:%.*]] = tail call float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT:[[M2:%.*]] = tail call float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT:[[M3:%.*]] = tail call float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT:ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + define float @reduction_v8f32_fast(float* %p) { ; CHECK-LABEL: @reduction_v8f32_fast( ; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 @@ -485,4 +512,31 @@ define double @reduction_v4f64_fast(double* %p) { ret double %m3 } +define double @reduction_v4f64_wrong_fmf(double* %p) { +; CHECK-LABEL: @reduction_v4f64_wrong_fmf( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds double, double* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds double, double* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds double, double* [[P]], i64 3 +; CHECK-NEXT:[[T0:%.*]] = load double, double* [[P]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load double, double* [[G1]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load double, double* [[G2]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load double, double* [[G3]], align 4 +; CHECK-NEXT:[[M1:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T1]], double [[T0]]) +; CHECK-NEXT:[[M2:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T2]], double [[M1]]) +; CHECK-NEXT:[[M3:%.*]] = tail call ninf nsz double @llvm.maxnum.f64(double [[T3]], double [[M2]]) +; CHECK-NEXT:ret double [[M3]] +; + %g1 = getelementptr inbounds double, double* %p, i64 1 + %g2 = getelementptr inbounds double, double* %p, i64 2 + %g3 = getelementptr inbounds double, double* %p, i64 3 + %t0 = load double, double* %p, align 4 + %t1 = load double, double* %g1, align 4 + %t2 = load double, double* %g2, align 4 + %t3 = load double, double* %g3, align 4 + %m1 = tail call ninf nsz double @llvm.maxnum.f64(double %t1, double %t0) + %m2 = tail call ninf nsz double @llvm.maxnum.f64(double %t2, double %m1) + %m3 = tail call ninf nsz double @llvm.maxnum.f64(double %t3, double %m2) + ret double %m3 +} + attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll index 81bcfb2f1e9b..15a7848f8eca 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll @@ -392,6 +392,33 @@ define float @reduction_v4f32_nnan(float* %p) { ret float %m3 } +define float @reduction_v4f32_wrong_fmf(float* %p) { +; CHECK-LABEL: @reduction_v4f32_wrong_fmf( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i6
[llvm-branch-commits] [llvm] 5b77ac3 - [SLP] match maxnum/minnum intrinsics as FP reduction ops
Author: Sanjay Patel Date: 2021-01-18T17:37:16-05:00 New Revision: 5b77ac32b1150d066b35b45d6d982f4b4a1f62ff URL: https://github.com/llvm/llvm-project/commit/5b77ac32b1150d066b35b45d6d982f4b4a1f62ff DIFF: https://github.com/llvm/llvm-project/commit/5b77ac32b1150d066b35b45d6d982f4b4a1f62ff.diff LOG: [SLP] match maxnum/minnum intrinsics as FP reduction ops After much refactoring over the last 2 weeks to the reduction matching code, I think this change is finally ready. We effectively broke fmax/fmin vector reduction optimization when we started canonicalizing to intrinsics in instcombine, so this should restore that functionality for SLP. There are still FMF problems here as noted in the code comments, but we should be avoiding miscompiles on those for fmax/fmin by restricting to full 'fast' ops (negative tests are included). Fixing FMF propagation is a planned follow-up. Differential Revision: https://reviews.llvm.org/D94913 Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0323e02d0d2c..0fee52dcdd93 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6455,6 +6455,10 @@ class HorizontalReduction { case RecurKind::FMul: return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); + case RecurKind::FMax: +return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS); + case RecurKind::FMin: +return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS); case RecurKind::SMax: { Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name); @@ -6568,6 +6572,15 @@ class HorizontalReduction { if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) return true; + if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) { +// FP min/max are associative except for NaN and -0.0. We do not +// have to rule out -0.0 here because the intrinsic semantics do not +// specify a fixed result for it. +// TODO: This is artificially restricted to fast because the code that +// creates reductions assumes/produces fast ops. +return I->getFastMathFlags().isFast(); + } + return I->isAssociative(); } @@ -6677,6 +6690,11 @@ class HorizontalReduction { if (match(I, m_FMul(m_Value(), m_Value( return OperationData(RecurKind::FMul); +if (match(I, m_Intrinsic(m_Value(), m_Value( + return OperationData(RecurKind::FMax); +if (match(I, m_Intrinsic(m_Value(), m_Value( + return OperationData(RecurKind::FMin); + if (match(I, m_SMax(m_Value(), m_Value( return OperationData(RecurKind::SMax); if (match(I, m_SMin(m_Value(), m_Value( @@ -7076,6 +7094,18 @@ class HorizontalReduction { ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); break; } +case RecurKind::FMax: +case RecurKind::FMin: { + auto *VecCondTy = cast(CmpInst::makeCmpResultType(VectorTy)); + VectorCost = + TTI->getMinMaxReductionCost(VectorTy, VecCondTy, + /*pairwise=*/false, /*unsigned=*/false); + ScalarCost = + TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) + + TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, + CmpInst::makeCmpResultType(ScalarTy)); + break; +} case RecurKind::SMax: case RecurKind::SMin: case RecurKind::UMax: @@ -7307,6 +7337,16 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, return nullptr; } +static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) { + if (match(I, m_BinOp(m_Value(V0), m_Value(V1 +return true; + if (match(I, m_Intrinsic(m_Value(V0), m_Value(V1 +return true; + if (match(I, m_Intrinsic(m_Value(V0), m_Value(V1 +return true; + return false; +} + /// Attempt to reduce a horizontal reduction. /// If it is legal to match a horizontal reduction feeding the phi node \a P /// with reduction operators \a Root (or one of its operands) in a basic block @@ -7347,7 +7387,7 @@ static bool tryToVectorizeHorReductionOrInstOperands( unsigned Level; std::tie(Inst, Level) = Stack.pop_back_val(); Value *B0, *B1; -bool IsBinop = match(Inst, m_BinOp(m_Value(B0), m_Value(B1))); +bool IsBinop = matchRdxBop(Inst, B0, B1); bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value())); if (IsBinop || IsSelect) {
[llvm-branch-commits] [llvm] d27bb5c - [x86] add cast to avoid compile-time warning; NFC
Author: Sanjay Patel Date: 2021-01-18T17:47:04-05:00 New Revision: d27bb5c375ca8e96e15168587a3bcd91b244fcad URL: https://github.com/llvm/llvm-project/commit/d27bb5c375ca8e96e15168587a3bcd91b244fcad DIFF: https://github.com/llvm/llvm-project/commit/d27bb5c375ca8e96e15168587a3bcd91b244fcad.diff LOG: [x86] add cast to avoid compile-time warning; NFC Added: Modified: llvm/lib/Target/X86/X86ISelLowering.cpp Removed: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 60a2fd233d5c..97fcef0b92fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10964,7 +10964,7 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, case X86ISD::VBROADCAST_LOAD: // TODO: Handle MaskSize != Op.getValueType().getVectorNumElements()? return (Op == ExpectedOp && -Op.getValueType().getVectorNumElements() == MaskSize); +(int)Op.getValueType().getVectorNumElements() == MaskSize); case X86ISD::HADD: case X86ISD::HSUB: case X86ISD::FHADD: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 8590d24 - [SLP] move reduction createOp functions; NFC
Author: Sanjay Patel Date: 2021-01-20T11:14:48-05:00 New Revision: 8590d245434dd4205c89f0a05b4c22feccb7421c URL: https://github.com/llvm/llvm-project/commit/8590d245434dd4205c89f0a05b4c22feccb7421c DIFF: https://github.com/llvm/llvm-project/commit/8590d245434dd4205c89f0a05b4c22feccb7421c.diff LOG: [SLP] move reduction createOp functions; NFC We were able to remove almost all of the state from OperationData, so these don't make sense as members of that class - just pass the RecurKind in as a param. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 24885e4d8257..3d657b0b898c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6397,7 +6397,7 @@ namespace { class HorizontalReduction { using ReductionOpsType = SmallVector; using ReductionOpsListType = SmallVector; - ReductionOpsListType ReductionOps; + ReductionOpsListType ReductionOps; SmallVector ReducedVals; // Use map vector to make stable output. MapVector ExtraArgs; @@ -6412,47 +6412,6 @@ class HorizontalReduction { /// Checks if the reduction operation can be vectorized. bool isVectorizable() const { return Kind != RecurKind::None; } -/// Creates reduction operation with the current opcode. -Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, -const Twine &Name) const { - assert(isVectorizable() && "Unhandled reduction operation."); - unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); - switch (Kind) { - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::Or: - case RecurKind::And: - case RecurKind::Xor: - case RecurKind::FAdd: - case RecurKind::FMul: -return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, - Name); - case RecurKind::FMax: -return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS); - case RecurKind::FMin: -return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS); - - case RecurKind::SMax: { -Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name); -return Builder.CreateSelect(Cmp, LHS, RHS, Name); - } - case RecurKind::SMin: { -Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name); -return Builder.CreateSelect(Cmp, LHS, RHS, Name); - } - case RecurKind::UMax: { -Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name); -return Builder.CreateSelect(Cmp, LHS, RHS, Name); - } - case RecurKind::UMin: { -Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name); -return Builder.CreateSelect(Cmp, LHS, RHS, Name); - } - default: -llvm_unreachable("Unknown reduction operation."); - } -} - public: explicit OperationData() = default; @@ -6580,40 +6539,6 @@ class HorizontalReduction { return nullptr; return I->getOperand(getFirstOperandIndex() + 1); } - -/// Creates reduction operation with the current opcode with the IR flags -/// from \p ReductionOps. -Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, -const Twine &Name, -const ReductionOpsListType &ReductionOps) const { - assert(isVectorizable() && - "Expected add|fadd or min/max reduction operation."); - Value *Op = createOp(Builder, LHS, RHS, Name); - if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) { -if (auto *Sel = dyn_cast(Op)) - propagateIRFlags(Sel->getCondition(), ReductionOps[0]); -propagateIRFlags(Op, ReductionOps[1]); -return Op; - } - propagateIRFlags(Op, ReductionOps[0]); - return Op; -} -/// Creates reduction operation with the current opcode with the IR flags -/// from \p I. -Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, -const Twine &Name, Instruction *I) const { - assert(isVectorizable() && - "Expected add|fadd or min/max reduction operation."); - Value *Op = createOp(Builder, LHS, RHS, Name); - if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) { -if (auto *Sel = dyn_cast(Op)) { - propagateIRFlags(Sel->getCondition(), - cast(I)->getCondition()); -} - } - propagateIRFlags(Op, I); - return Op; -} }; WeakTrackingVH ReductionRoot; @@ -6642,6 +6567,76 @@ class HorizontalReduction { } } + /// Creates reduction operation with the current opcode. + static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS, + Value *RHS, const Twine &Name) { +u
[llvm-branch-commits] [llvm] 1c54112 - [SLP] refactor more reduction functions; NFC
Author: Sanjay Patel Date: 2021-01-20T11:14:48-05:00 New Revision: 1c54112a5762ebab2c14a90c55f27d00bfced7f8 URL: https://github.com/llvm/llvm-project/commit/1c54112a5762ebab2c14a90c55f27d00bfced7f8 DIFF: https://github.com/llvm/llvm-project/commit/1c54112a5762ebab2c14a90c55f27d00bfced7f8.diff LOG: [SLP] refactor more reduction functions; NFC We were able to remove almost all of the state from OperationData, so these don't make sense as members of that class - just pass the RecurKind in as a param. More streamlining is possible, but I'm trying to avoid logic/typo bugs while fixing this. Eventually, we should not need the `OperationData` class. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3d657b0b898c..3192d7959f70 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6427,76 +6427,6 @@ class HorizontalReduction { return IsLeafValue || Kind != RecurKind::None; } -/// Return true if this operation is a cmp+select idiom. -bool isCmpSel() const { - assert(Kind != RecurKind::None && "Expected reduction operation."); - return RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind); -} - -/// Get the index of the first operand. -unsigned getFirstOperandIndex() const { - assert(!!*this && "The opcode is not set."); - // We allow calling this before 'Kind' is set, so handle that specially. - if (Kind == RecurKind::None) -return 0; - return isCmpSel() ? 1 : 0; -} - -/// Total number of operands in the reduction operation. -unsigned getNumberOfOperands() const { - assert(Kind != RecurKind::None && !!*this && - "Expected reduction operation."); - return isCmpSel() ? 3 : 2; -} - -/// Checks if the instruction is in basic block \p BB. -/// For a min/max reduction check that both compare and select are in \p BB. -bool hasSameParent(Instruction *I, BasicBlock *BB, bool IsRedOp) const { - assert(Kind != RecurKind::None && !!*this && - "Expected reduction operation."); - if (IsRedOp && isCmpSel()) { -auto *Cmp = cast(cast(I)->getCondition()); -return I->getParent() == BB && Cmp && Cmp->getParent() == BB; - } - return I->getParent() == BB; -} - -/// Expected number of uses for reduction operations/reduced values. -bool hasRequiredNumberOfUses(Instruction *I, bool IsReductionOp) const { - assert(Kind != RecurKind::None && !!*this && - "Expected reduction operation."); - // SelectInst must be used twice while the condition op must have single - // use only. - if (isCmpSel()) -return I->hasNUses(2) && - (!IsReductionOp || -cast(I)->getCondition()->hasOneUse()); - - // Arithmetic reduction operation must be used once only. - return I->hasOneUse(); -} - -/// Initializes the list of reduction operations. -void initReductionOps(ReductionOpsListType &ReductionOps) { - assert(Kind != RecurKind::None && !!*this && - "Expected reduction operation."); - if (isCmpSel()) -ReductionOps.assign(2, ReductionOpsType()); - else -ReductionOps.assign(1, ReductionOpsType()); -} - -/// Add all reduction operations for the reduction instruction \p I. -void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) { - assert(Kind != RecurKind::None && "Expected reduction operation."); - if (isCmpSel()) { -ReductionOps[0].emplace_back(cast(I)->getCondition()); -ReductionOps[1].emplace_back(I); - } else { -ReductionOps[0].emplace_back(I); - } -} - /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { assert(Kind != RecurKind::None && "Expected reduction operation."); @@ -6529,16 +6459,6 @@ class HorizontalReduction { /// Get kind of reduction data. RecurKind getKind() const { return Kind; } -Value *getLHS(Instruction *I) const { - if (Kind == RecurKind::None) -return nullptr; - return I->getOperand(getFirstOperandIndex()); -} -Value *getRHS(Instruction *I) const { - if (Kind == RecurKind::None) -return nullptr; - return I->getOperand(getFirstOperandIndex() + 1); -} }; WeakTrackingVH ReductionRoot; @@ -6559,7 +6479,7 @@ class HorizontalReduction { // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. OperationData OpData = getOperationData(ParentStackElem.first); - ParentStackElem.second = OpData.getNumberOfOperand
[llvm-branch-commits] [llvm] c09be0d - [SLP] reduce reduction code for checking vectorizable ops; NFC
Author: Sanjay Patel Date: 2021-01-20T11:14:48-05:00 New Revision: c09be0d2a0f930a128c946329b42eef45d53062a URL: https://github.com/llvm/llvm-project/commit/c09be0d2a0f930a128c946329b42eef45d53062a DIFF: https://github.com/llvm/llvm-project/commit/c09be0d2a0f930a128c946329b42eef45d53062a.diff LOG: [SLP] reduce reduction code for checking vectorizable ops; NFC This is another step towards removing `OperationData` and fixing FMF matching/propagation bugs when forming reductions. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3192d7959f70..2597f88ab88d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6409,9 +6409,6 @@ class HorizontalReduction { RecurKind Kind = RecurKind::None; bool IsLeafValue = false; -/// Checks if the reduction operation can be vectorized. -bool isVectorizable() const { return Kind != RecurKind::None; } - public: explicit OperationData() = default; @@ -6427,29 +6424,6 @@ class HorizontalReduction { return IsLeafValue || Kind != RecurKind::None; } -/// Checks if instruction is associative and can be vectorized. -bool isAssociative(Instruction *I) const { - assert(Kind != RecurKind::None && "Expected reduction operation."); - if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) -return true; - - if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) { -// FP min/max are associative except for NaN and -0.0. We do not -// have to rule out -0.0 here because the intrinsic semantics do not -// specify a fixed result for it. -// TODO: This is artificially restricted to fast because the code that -// creates reductions assumes/produces fast ops. -return I->getFastMathFlags().isFast(); - } - - return I->isAssociative(); -} - -/// Checks if the reduction operation can be vectorized. -bool isVectorizable(Instruction *I) const { - return isVectorizable() && isAssociative(I); -} - /// Checks if two operation data are both a reduction op or both a reduced /// value. bool operator==(const OperationData &OD) const { @@ -6466,6 +6440,25 @@ class HorizontalReduction { /// The operation data of the reduction operation. OperationData RdxTreeInst; + /// Checks if instruction is associative and can be vectorized. + static bool isVectorizable(RecurKind Kind, Instruction *I) { +if (Kind == RecurKind::None) + return false; +if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) + return true; + +if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) { + // FP min/max are associative except for NaN and -0.0. We do not + // have to rule out -0.0 here because the intrinsic semantics do not + // specify a fixed result for it. + // TODO: This is artificially restricted to fast because the code that + // creates reductions assumes/produces fast ops. + return I->getFastMathFlags().isFast(); +} + +return I->isAssociative(); + } + /// Checks if the ParentStackElem.first should be marked as a reduction /// operation with an extra argument or as extra argument itself. void markExtraArg(std::pair &ParentStackElem, @@ -6694,8 +6687,7 @@ class HorizontalReduction { } /// Initializes the list of reduction operations. - static void initReductionOps(RecurKind Kind, - ReductionOpsListType &ReductionOps) { + void initReductionOps(RecurKind Kind) { if (isCmpSel(Kind)) ReductionOps.assign(2, ReductionOpsType()); else @@ -6703,8 +6695,7 @@ class HorizontalReduction { } /// Add all reduction operations for the reduction instruction \p I. - static void addReductionOps(RecurKind Kind, Instruction *I, - ReductionOpsListType &ReductionOps) { + void addReductionOps(RecurKind Kind, Instruction *I) { assert(Kind != RecurKind::None && "Expected reduction operation."); if (isCmpSel(Kind)) { ReductionOps[0].emplace_back(cast(I)->getCondition()); @@ -6750,7 +6741,7 @@ class HorizontalReduction { } } -if (!RdxTreeInst.isVectorizable(B)) +if (!isVectorizable(RdxTreeInst.getKind(), B)) return false; // Analyze "regular" integer/FP types for reductions - no target-specific @@ -6772,7 +6763,7 @@ class HorizontalReduction { SmallVector, 32> Stack; Stack.push_back( std::make_pair(B, getFirstOperandIndex(RdxTreeInst.getKind(; -initReductionOps(RdxTreeInst.getKind(), ReductionOps); +initReductionOps(RdxTreeInst.getKind()); while (!Stack.empty()) { Instruction *TreeN = Stack.back()
[llvm-branch-commits] [llvm] 070af1b - [InstCombine] avoid crashing on attribute propagation
Author: Sanjay Patel Date: 2021-01-21T08:13:26-05:00 New Revision: 070af1b7887f80383d8473bb4da565edbde6c6b0 URL: https://github.com/llvm/llvm-project/commit/070af1b7887f80383d8473bb4da565edbde6c6b0 DIFF: https://github.com/llvm/llvm-project/commit/070af1b7887f80383d8473bb4da565edbde6c6b0.diff LOG: [InstCombine] avoid crashing on attribute propagation In https://llvm.org/PR48810 , we are crashing while trying to propagate attributes from mempcpy (returns void*) to memcpy (returns nothing - void). We can avoid the crash by removing known incompatible attributes for the void return type. I'm not sure if this goes far enough (should we just drop all attributes since this isn't the same function?). We also need to audit other transforms in LibCallSimplifier to make sure there are no other cases that have the same problem. Differential Revision: https://reviews.llvm.org/D95088 Added: Modified: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp llvm/test/Transforms/InstCombine/mempcpy.ll Removed: diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 99b28b0a832c..b68e45363811 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1150,7 +1150,12 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) { // mempcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n), x + n CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), CI->getArgOperand(1), Align(1), N); + // Propagate attributes, but memcpy has no return value, so make sure that + // any return attributes are compliant. + // TODO: Attach return value attributes to the 1st operand to preserve them? NewCI->setAttributes(CI->getAttributes()); + NewCI->removeAttributes(AttributeList::ReturnIndex, + AttributeFuncs::typeIncompatible(NewCI->getType())); return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N); } diff --git a/llvm/test/Transforms/InstCombine/mempcpy.ll b/llvm/test/Transforms/InstCombine/mempcpy.ll index 79158a3a0a6d..61e7ec4a3339 100644 --- a/llvm/test/Transforms/InstCombine/mempcpy.ll +++ b/llvm/test/Transforms/InstCombine/mempcpy.ll @@ -53,4 +53,15 @@ define i8* @memcpy_big_const_n(i8* %d, i8* nocapture readonly %s) { ret i8* %r } +; The original call may have attributes that can not propagate to memcpy. + +define i32 @PR48810() { +; CHECK-LABEL: @PR48810( +; CHECK-NEXT:call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 undef, i8* align 536870912 null, i64 undef, i1 false) +; CHECK-NEXT:ret i32 undef +; + %r = call dereferenceable(1) i8* @mempcpy(i8* undef, i8* null, i64 undef) + ret i32 undef +} + declare i8* @mempcpy(i8*, i8* nocapture readonly, i64) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] d777533 - [SLP] simplify reduction matching
Author: Sanjay Patel Date: 2021-01-21T14:58:57-05:00 New Revision: d77753381fe024434ae8ffaaacfe4b9ed9d4d760 URL: https://github.com/llvm/llvm-project/commit/d77753381fe024434ae8ffaaacfe4b9ed9d4d760 DIFF: https://github.com/llvm/llvm-project/commit/d77753381fe024434ae8ffaaacfe4b9ed9d4d760.diff LOG: [SLP] simplify reduction matching This is NFC-intended and removes the "OperationData" class which had become nothing more than a recurrence (reduction) type. I adjusted the matching logic to distinguish instructions from non-instructions - that's all that the "IsLeafValue" member was keeping track of. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2597f88ab88d..73260016f443 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6401,44 +6401,9 @@ class HorizontalReduction { SmallVector ReducedVals; // Use map vector to make stable output. MapVector ExtraArgs; - - /// This wraps functionality around a RecurKind (reduction kind). - /// TODO: Remove this class if callers can use the 'Kind' value directly? - class OperationData { -/// Kind of the reduction operation. -RecurKind Kind = RecurKind::None; -bool IsLeafValue = false; - - public: -explicit OperationData() = default; - -/// Constructor for reduced values. They are identified by the bool only. -explicit OperationData(Instruction &I) { IsLeafValue = true; } - -/// Constructor for reduction operations with opcode and type. -OperationData(RecurKind RdxKind) : Kind(RdxKind) { - assert(Kind != RecurKind::None && "Expected reduction operation."); -} - -explicit operator bool() const { - return IsLeafValue || Kind != RecurKind::None; -} - -/// Checks if two operation data are both a reduction op or both a reduced -/// value. -bool operator==(const OperationData &OD) const { - return Kind == OD.Kind && IsLeafValue == OD.IsLeafValue; -} -bool operator!=(const OperationData &OD) const { return !(*this == OD); } - -/// Get kind of reduction data. -RecurKind getKind() const { return Kind; } - }; - WeakTrackingVH ReductionRoot; - - /// The operation data of the reduction operation. - OperationData RdxTreeInst; + /// The type of reduction operation. + RecurKind RdxKind; /// Checks if instruction is associative and can be vectorized. static bool isVectorizable(RecurKind Kind, Instruction *I) { @@ -6471,8 +6436,8 @@ class HorizontalReduction { // in this case. // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. - OperationData OpData = getOperationData(ParentStackElem.first); - ParentStackElem.second = getNumberOfOperands(OpData.getKind()); + RecurKind RdxKind = getRdxKind(ParentStackElem.first); + ParentStackElem.second = getNumberOfOperands(RdxKind); } else { // We ran into something like: // ParentStackElem.first += ... + ExtraArg + ... @@ -6550,39 +6515,37 @@ class HorizontalReduction { return Op; } - static OperationData getOperationData(Instruction *I) { -if (!I) - return OperationData(); - + static RecurKind getRdxKind(Instruction *I) { +assert(I && "Expected instruction for reduction matching"); TargetTransformInfo::ReductionFlags RdxFlags; if (match(I, m_Add(m_Value(), m_Value( - return OperationData(RecurKind::Add); + return RecurKind::Add; if (match(I, m_Mul(m_Value(), m_Value( - return OperationData(RecurKind::Mul); + return RecurKind::Mul; if (match(I, m_And(m_Value(), m_Value( - return OperationData(RecurKind::And); + return RecurKind::And; if (match(I, m_Or(m_Value(), m_Value( - return OperationData(RecurKind::Or); + return RecurKind::Or; if (match(I, m_Xor(m_Value(), m_Value( - return OperationData(RecurKind::Xor); + return RecurKind::Xor; if (match(I, m_FAdd(m_Value(), m_Value( - return OperationData(RecurKind::FAdd); + return RecurKind::FAdd; if (match(I, m_FMul(m_Value(), m_Value( - return OperationData(RecurKind::FMul); + return RecurKind::FMul; if (match(I, m_Intrinsic(m_Value(), m_Value( - return OperationData(RecurKind::FMax); + return RecurKind::FMax; if (match(I, m_Intrinsic(m_Value(), m_Value( - return OperationData(RecurKind::FMin); + return RecurKind::FMin; if (match(I, m_SMax(m_Value(), m_Value( - return OperationData(RecurKind::SMax); + return RecurKind::SMax; if (match(I, m_SMin(m_Value(), m_Value( - return OperationData(RecurKind::SMin); +
[llvm-branch-commits] [llvm] 2f03528 - [SLP] rename reduction variable to avoid shadowing; NFC
Author: Sanjay Patel Date: 2021-01-21T16:02:38-05:00 New Revision: 2f03528f5e7fd9df0a12091392e000c697497262 URL: https://github.com/llvm/llvm-project/commit/2f03528f5e7fd9df0a12091392e000c697497262 DIFF: https://github.com/llvm/llvm-project/commit/2f03528f5e7fd9df0a12091392e000c697497262.diff LOG: [SLP] rename reduction variable to avoid shadowing; NFC The code structure can likely be improved now that 'OperationData' is gone. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 73260016f443..cee388e62bf2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6436,8 +6436,8 @@ class HorizontalReduction { // in this case. // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. - RecurKind RdxKind = getRdxKind(ParentStackElem.first); - ParentStackElem.second = getNumberOfOperands(RdxKind); + RecurKind ParentRdxKind = getRdxKind(ParentStackElem.first); + ParentStackElem.second = getNumberOfOperands(ParentRdxKind); } else { // We ran into something like: // ParentStackElem.first += ... + ExtraArg + ... ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] a4914dc - [SLP] do not traverse constant uses
Author: Anton Rapetov Date: 2021-01-22T08:14:09-05:00 New Revision: a4914dc1f2b4a49cf488d3be7a01fe7238c889d8 URL: https://github.com/llvm/llvm-project/commit/a4914dc1f2b4a49cf488d3be7a01fe7238c889d8 DIFF: https://github.com/llvm/llvm-project/commit/a4914dc1f2b4a49cf488d3be7a01fe7238c889d8.diff LOG: [SLP] do not traverse constant uses Walking the use list of a Constant (particularly, ConstantData) is not scalable, since a given constant may be used by many instructinos in many functions in many modules. Differential Revision: https://reviews.llvm.org/D94713 Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cee388e62bf2..78ce4870588c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -987,6 +987,14 @@ class BoUpSLP { std::array, 2> Values = {{LHS, RHS}}; for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) { Value *V = Values[Idx].first; +if (isa(V)) { + // Since this is a function pass, it doesn't make semantic sense to + // walk the users of a subclass of Constant. The users could be in + // another function, or even another module that happens to be in + // the same LLVMContext. + continue; +} + // Calculate the absolute lane, using the minimum relative lane of LHS // and RHS as base and Idx as the offset. int Ln = std::min(LHS.second, RHS.second) + Idx; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e679eea - [InstCombine] add tests for abs(sext X); NFC
Author: Sanjay Patel Date: 2021-01-22T13:36:04-05:00 New Revision: e679eea6d20d6e6e749525827c95f42bfef16285 URL: https://github.com/llvm/llvm-project/commit/e679eea6d20d6e6e749525827c95f42bfef16285 DIFF: https://github.com/llvm/llvm-project/commit/e679eea6d20d6e6e749525827c95f42bfef16285.diff LOG: [InstCombine] add tests for abs(sext X); NFC https://llvm.org/PR48816 Added: Modified: llvm/test/Transforms/InstCombine/abs-intrinsic.ll Removed: diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index 30e5a9ddab3c..baeb44d1d8dc 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -292,3 +292,66 @@ define i1 @abs_ne_int_min_nopoison(i8 %x) { %cmp = icmp ne i8 %abs, -128 ret i1 %cmp } + +define i32 @abs_sext(i8 %x) { +; CHECK-LABEL: @abs_sext( +; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT:[[A:%.*]] = call i32 @llvm.abs.i32(i32 [[S]], i1 false) +; CHECK-NEXT:ret i32 [[A]] +; + %s = sext i8 %x to i32 + %a = call i32 @llvm.abs.i32(i32 %s, i1 0) + ret i32 %a +} + +define <3 x i82> @abs_nsw_sext(<3 x i7> %x) { +; CHECK-LABEL: @abs_nsw_sext( +; CHECK-NEXT:[[S:%.*]] = sext <3 x i7> [[X:%.*]] to <3 x i82> +; CHECK-NEXT:[[A:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[S]], i1 true) +; CHECK-NEXT:ret <3 x i82> [[A]] +; + %s = sext <3 x i7> %x to <3 x i82> + %a = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %s, i1 1) + ret <3 x i82> %a +} + +define i32 @abs_sext_extra_use(i8 %x, i32* %p) { +; CHECK-LABEL: @abs_sext_extra_use( +; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT:store i32 [[S]], i32* [[P:%.*]], align 4 +; CHECK-NEXT:[[A:%.*]] = call i32 @llvm.abs.i32(i32 [[S]], i1 false) +; CHECK-NEXT:ret i32 [[A]] +; + %s = sext i8 %x to i32 + store i32 %s, i32* %p + %a = call i32 @llvm.abs.i32(i32 %s, i1 0) + ret i32 %a +} + +; PR48816 + +define i8 @trunc_abs_sext(i8 %x) { +; CHECK-LABEL: @trunc_abs_sext( +; CHECK-NEXT:[[S:%.*]] = sext i8 [[X:%.*]] to i32 +; CHECK-NEXT:[[A:%.*]] = tail call i32 @llvm.abs.i32(i32 [[S]], i1 true) +; CHECK-NEXT:[[T:%.*]] = trunc i32 [[A]] to i8 +; CHECK-NEXT:ret i8 [[T]] +; + %s = sext i8 %x to i32 + %a = tail call i32 @llvm.abs.i32(i32 %s, i1 true) + %t = trunc i32 %a to i8 + ret i8 %t +} + +define <4 x i8> @trunc_abs_sext_vec(<4 x i8> %x) { +; CHECK-LABEL: @trunc_abs_sext_vec( +; CHECK-NEXT:[[S:%.*]] = sext <4 x i8> [[X:%.*]] to <4 x i32> +; CHECK-NEXT:[[A:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[S]], i1 true) +; CHECK-NEXT:[[T:%.*]] = trunc <4 x i32> [[A]] to <4 x i8> +; CHECK-NEXT:ret <4 x i8> [[T]] +; + %s = sext <4 x i8> %x to <4 x i32> + %a = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> %s, i1 true) + %t = trunc <4 x i32> %a to <4 x i8> + ret <4 x i8> %t +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 411c144 - [InstCombine] narrow abs with sign-extended input
Author: Sanjay Patel Date: 2021-01-22T13:36:04-05:00 New Revision: 411c144e4c99f4d4370ed2b9c248dc6bb9a39648 URL: https://github.com/llvm/llvm-project/commit/411c144e4c99f4d4370ed2b9c248dc6bb9a39648 DIFF: https://github.com/llvm/llvm-project/commit/411c144e4c99f4d4370ed2b9c248dc6bb9a39648.diff LOG: [InstCombine] narrow abs with sign-extended input In the motivating cases from https://llvm.org/PR48816 , we have a trailing trunc. But that is not required to reduce the abs width: https://alive2.llvm.org/ce/z/ECaz-p ...as long as we clear the int-min-is-poison bit (nsw). We have some existing tests that are affected, and I'm not sure what the overall implications are, but in general we favor narrowing operations over preserving nsw/nuw. If that causes problems, we could restrict this transform based on type (shouldChangeType() and/or vector vs. scalar). Differential Revision: https://reviews.llvm.org/D95235 Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp llvm/test/Transforms/InstCombine/abs-1.ll llvm/test/Transforms/InstCombine/abs-intrinsic.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 7d63b30d35f8..5ba51d255109 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -820,6 +820,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return BinaryOperator::CreateNeg(IIOperand); } +// abs (sext X) --> zext (abs X*) +// Clear the IsIntMin (nsw) bit on the abs to allow narrowing. +if (match(IIOperand, m_OneUse(m_SExt(m_Value(X) { + Value *NarrowAbs = + Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse()); + return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType()); +} + break; } case Intrinsic::bswap: { diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll index 7e5eadf5b25e..7452798ead77 100644 --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -102,9 +102,9 @@ define i8 @abs_canonical_4(i8 %x) { define i32 @abs_canonical_5(i8 %x) { ; CHECK-LABEL: @abs_canonical_5( -; CHECK-NEXT:[[CONV:%.*]] = sext i8 [[X:%.*]] to i32 -; CHECK-NEXT:[[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[CONV]], i1 true) -; CHECK-NEXT:ret i32 [[TMP1]] +; CHECK-NEXT:[[TMP1:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT:[[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT:ret i32 [[TMP2]] ; %cmp = icmp sgt i8 %x, 0 %conv = sext i8 %x to i32 @@ -250,9 +250,9 @@ define i8 @nabs_canonical_4(i8 %x) { define i32 @nabs_canonical_5(i8 %x) { ; CHECK-LABEL: @nabs_canonical_5( -; CHECK-NEXT:[[CONV:%.*]] = sext i8 [[X:%.*]] to i32 -; CHECK-NEXT:[[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[CONV]], i1 false) -; CHECK-NEXT:[[ABS:%.*]] = sub nsw i32 0, [[TMP1]] +; CHECK-NEXT:[[TMP1:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT:[[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT:[[ABS:%.*]] = sub nsw i32 0, [[TMP2]] ; CHECK-NEXT:ret i32 [[ABS]] ; %cmp = icmp sgt i8 %x, 0 diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index baeb44d1d8dc..1f5f1c2ba562 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -64,9 +64,9 @@ define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> %x) { ; sign bits, the abs reduces this to 2 sign bits. define i32 @abs_signbits(i30 %x) { ; CHECK-LABEL: @abs_signbits( -; CHECK-NEXT:[[EXT:%.*]] = sext i30 [[X:%.*]] to i32 -; CHECK-NEXT:[[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[EXT]], i1 false) -; CHECK-NEXT:[[ADD:%.*]] = add nuw nsw i32 [[ABS]], 1 +; CHECK-NEXT:[[TMP1:%.*]] = call i30 @llvm.abs.i30(i30 [[X:%.*]], i1 false) +; CHECK-NEXT:[[NARROW:%.*]] = add nuw i30 [[TMP1]], 1 +; CHECK-NEXT:[[ADD:%.*]] = zext i30 [[NARROW]] to i32 ; CHECK-NEXT:ret i32 [[ADD]] ; %ext = sext i30 %x to i32 @@ -77,9 +77,9 @@ define i32 @abs_signbits(i30 %x) { define <4 x i32> @abs_signbits_vec(<4 x i30> %x) { ; CHECK-LABEL: @abs_signbits_vec( -; CHECK-NEXT:[[EXT:%.*]] = sext <4 x i30> [[X:%.*]] to <4 x i32> -; CHECK-NEXT:[[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[EXT]], i1 false) -; CHECK-NEXT:[[ADD:%.*]] = add nuw nsw <4 x i32> [[ABS]], +; CHECK-NEXT:[[TMP1:%.*]] = call <4 x i30> @llvm.abs.v4i30(<4 x i30> [[X:%.*]], i1 false) +; CHECK-NEXT:[[NARROW:%.*]] = add nuw <4 x i30> [[TMP1]], +; CHECK-NEXT:[[ADD:%.*]] = zext <4 x i30> [[NARROW]] to <4 x i32> ; CHECK-NEXT:ret <4 x i32> [[ADD]] ; %ext = sext <4 x i30> %x to <4
[llvm-branch-commits] [llvm] 1351f71 - [InstSimplify] add tests for ctpop; NFC (PR48608)
Author: Sanjay Patel Date: 2020-12-28T16:06:19-05:00 New Revision: 1351f719d49642f7f1254d13e90d8d3a2824dcde URL: https://github.com/llvm/llvm-project/commit/1351f719d49642f7f1254d13e90d8d3a2824dcde DIFF: https://github.com/llvm/llvm-project/commit/1351f719d49642f7f1254d13e90d8d3a2824dcde.diff LOG: [InstSimplify] add tests for ctpop; NFC (PR48608) Added: Modified: llvm/test/Transforms/InstSimplify/call.ll Removed: diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index bfbd101b046c..fa73e07b4c45 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -1287,6 +1287,8 @@ define i32 @call_undef_musttail() { ; This is not the builtin fmax, so we don't know anything about its behavior. +declare float @fmaxf(float, float) + define float @nobuiltin_fmax() { ; CHECK-LABEL: @nobuiltin_fmax( ; CHECK-NEXT:[[M:%.*]] = call float @fmaxf(float 0.00e+00, float 1.00e+00) [[ATTR3:#.*]] @@ -1298,6 +1300,62 @@ define float @nobuiltin_fmax() { ret float %r } -declare float @fmaxf(float, float) + +declare i32 @llvm.ctpop.i32(i32) +declare <3 x i33> @llvm.ctpop.v3i33(<3 x i33>) +declare i1 @llvm.ctpop.i1(i1) + +define i32 @ctpop_lowbit(i32 %x) { +; CHECK-LABEL: @ctpop_lowbit( +; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 1 +; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]]) +; CHECK-NEXT:ret i32 [[R]] +; + %b = and i32 %x, 1 + %r = call i32 @llvm.ctpop.i32(i32 %b) + ret i32 %r +} + +define i32 @ctpop_pow2(i32 %x) { +; CHECK-LABEL: @ctpop_pow2( +; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 4 +; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]]) +; CHECK-NEXT:ret i32 [[R]] +; + %b = and i32 %x, 4 + %r = call i32 @llvm.ctpop.i32(i32 %b) + ret i32 %r +} + +define <3 x i33> @ctpop_signbit(<3 x i33> %x) { +; CHECK-LABEL: @ctpop_signbit( +; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> [[B]]) +; CHECK-NEXT:ret <3 x i33> [[R]] +; + %b = lshr <3 x i33> %x, + %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b) + ret <3 x i33> %r +} + +define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) { +; CHECK-LABEL: @ctpop_notsignbit( +; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], +; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> [[B]]) +; CHECK-NEXT:ret <3 x i33> [[R]] +; + %b = lshr <3 x i33> %x, + %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b) + ret <3 x i33> %r +} + +define i1 @ctpop_bool(i1 %x) { +; CHECK-LABEL: @ctpop_bool( +; CHECK-NEXT:[[R:%.*]] = tail call i1 @llvm.ctpop.i1(i1 [[X:%.*]]) +; CHECK-NEXT:ret i1 [[R]] +; + %r = tail call i1 @llvm.ctpop.i1(i1 %x) + ret i1 %r +} attributes #0 = { nobuiltin readnone } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 236c452 - [InstSimplify] remove ctpop of 1 (low) bit
Author: Sanjay Patel Date: 2020-12-28T16:06:20-05:00 New Revision: 236c4524a7cd3051a150690b4f4f55f496e7e248 URL: https://github.com/llvm/llvm-project/commit/236c4524a7cd3051a150690b4f4f55f496e7e248 DIFF: https://github.com/llvm/llvm-project/commit/236c4524a7cd3051a150690b4f4f55f496e7e248.diff LOG: [InstSimplify] remove ctpop of 1 (low) bit https://llvm.org/PR48608 As noted in the test comment, we could handle a more general case in instcombine and remove this, but I don't have evidence that we need to do that. https://alive2.llvm.org/ce/z/MRW9gD Added: Modified: llvm/lib/Analysis/InstructionSimplify.cpp llvm/test/Transforms/InstCombine/ctpop.ll llvm/test/Transforms/InstSimplify/call.ll Removed: diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 27b73a5a8236..30c7ecff7940 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5246,6 +5246,15 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0, // bitreverse(bitreverse(x)) -> x if (match(Op0, m_BitReverse(m_Value(X return X; break; + case Intrinsic::ctpop: { +// If everything but the lowest bit is zero, that bit is the pop-count. Ex: +// ctpop(and X, 1) --> and X, 1 +unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); +if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1), + Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + return Op0; +break; + } case Intrinsic::exp: // exp(log(x)) -> x if (Q.CxtI->hasAllowReassoc() && diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 33b95b02dd2e..237fb0458225 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -84,11 +84,11 @@ define <2 x i1> @test5vec(<2 x i32> %arg) { ret <2 x i1> %res } -; Make sure we don't add range metadata to i1 ctpop. +; No intrinsic or range needed - ctpop of bool bit is the bit itself. + define i1 @test6(i1 %arg) { ; CHECK-LABEL: @test6( -; CHECK-NEXT:[[CNT:%.*]] = call i1 @llvm.ctpop.i1(i1 [[ARG:%.*]]) -; CHECK-NEXT:ret i1 [[CNT]] +; CHECK-NEXT:ret i1 [[ARG:%.*]] ; %cnt = call i1 @llvm.ctpop.i1(i1 %arg) ret i1 %cnt diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index fa73e07b4c45..841582ab8974 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -1308,14 +1308,16 @@ declare i1 @llvm.ctpop.i1(i1) define i32 @ctpop_lowbit(i32 %x) { ; CHECK-LABEL: @ctpop_lowbit( ; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 1 -; CHECK-NEXT:[[R:%.*]] = call i32 @llvm.ctpop.i32(i32 [[B]]) -; CHECK-NEXT:ret i32 [[R]] +; CHECK-NEXT:ret i32 [[B]] ; %b = and i32 %x, 1 %r = call i32 @llvm.ctpop.i32(i32 %b) ret i32 %r } +; Negative test - only low bit allowed +; This could be reduced by instcombine to and+shift. + define i32 @ctpop_pow2(i32 %x) { ; CHECK-LABEL: @ctpop_pow2( ; CHECK-NEXT:[[B:%.*]] = and i32 [[X:%.*]], 4 @@ -1330,14 +1332,15 @@ define i32 @ctpop_pow2(i32 %x) { define <3 x i33> @ctpop_signbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_signbit( ; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], -; CHECK-NEXT:[[R:%.*]] = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> [[B]]) -; CHECK-NEXT:ret <3 x i33> [[R]] +; CHECK-NEXT:ret <3 x i33> [[B]] ; %b = lshr <3 x i33> %x, %r = tail call <3 x i33> @llvm.ctpop.v3i33(<3 x i33> %b) ret <3 x i33> %r } +; Negative test - only 1 bit allowed + define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) { ; CHECK-LABEL: @ctpop_notsignbit( ; CHECK-NEXT:[[B:%.*]] = lshr <3 x i33> [[X:%.*]], @@ -1351,8 +1354,7 @@ define <3 x i33> @ctpop_notsignbit(<3 x i33> %x) { define i1 @ctpop_bool(i1 %x) { ; CHECK-LABEL: @ctpop_bool( -; CHECK-NEXT:[[R:%.*]] = tail call i1 @llvm.ctpop.i1(i1 [[X:%.*]]) -; CHECK-NEXT:ret i1 [[R]] +; CHECK-NEXT:ret i1 [[X:%.*]] ; %r = tail call i1 @llvm.ctpop.i1(i1 %x) ret i1 %r ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 21a3a02 - [SLP] replace local reduction enum with RecurrenceKind; NFCI
Author: Sanjay Patel Date: 2020-12-29T14:52:11-05:00 New Revision: 21a3a0225d84cd35227fc9d4d08234918a54f8d3 URL: https://github.com/llvm/llvm-project/commit/21a3a0225d84cd35227fc9d4d08234918a54f8d3 DIFF: https://github.com/llvm/llvm-project/commit/21a3a0225d84cd35227fc9d4d08234918a54f8d3.diff LOG: [SLP] replace local reduction enum with RecurrenceKind; NFCI I'm not sure if the SLP enum was created before the IVDescriptor RecurrenceDescriptor / RecurrenceKind existed, but the code in SLP is now redundant with that class, so it just makes things more complicated to have both. We eventually call LoopUtils createSimpleTargetReduction() to create reduction ops, so we might as well standardize on those enum names. There's still a question of whether we need to use TTI::ReductionFlags vs. MinMaxRecurrenceKind, but that can be another clean-up step. Another option would just be to flatten the enums in RecurrenceDescriptor into a single enum. There isn't much benefit (smaller switches?) to having a min/max subset. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9f1768907227..eff0690eda82 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -6445,16 +6446,7 @@ class HorizontalReduction { SmallVector ReducedVals; // Use map vector to make stable output. MapVector ExtraArgs; - - /// Kind of the reduction data. - enum ReductionKind { -RK_None, /// Not a reduction. -RK_Arithmetic, /// Binary reduction data. -RK_SMin, /// Signed minimum reduction data. -RK_UMin, /// Unsigned minimum reduction data. -RK_SMax, /// Signed maximum reduction data. -RK_UMax, /// Unsigned maximum reduction data. - }; + using RD = RecurrenceDescriptor; /// Contains info about operation, like its opcode, left and right operands. class OperationData { @@ -6462,20 +6454,27 @@ class HorizontalReduction { unsigned Opcode = 0; /// Kind of the reduction operation. -ReductionKind Kind = RK_None; +RD::RecurrenceKind Kind = RD::RK_NoRecurrence; +TargetTransformInfo::ReductionFlags RdxFlags; /// Checks if the reduction operation can be vectorized. bool isVectorizable() const { switch (Kind) { - case RK_Arithmetic: -return Opcode == Instruction::Add || Opcode == Instruction::FAdd || - Opcode == Instruction::Mul || Opcode == Instruction::FMul || - Opcode == Instruction::And || Opcode == Instruction::Or || - Opcode == Instruction::Xor; - case RK_SMin: - case RK_SMax: - case RK_UMin: - case RK_UMax: + case RD::RK_IntegerAdd: +return Opcode == Instruction::Add; + case RD::RK_IntegerMult: +return Opcode == Instruction::Mul; + case RD::RK_IntegerOr: +return Opcode == Instruction::Or; + case RD::RK_IntegerAnd: +return Opcode == Instruction::And; + case RD::RK_IntegerXor: +return Opcode == Instruction::Xor; + case RD::RK_FloatAdd: +return Opcode == Instruction::FAdd; + case RD::RK_FloatMult: +return Opcode == Instruction::FMul; + case RD::RK_IntegerMinMax: return Opcode == Instruction::ICmp; default: return false; @@ -6485,33 +6484,31 @@ class HorizontalReduction { /// Creates reduction operation with the current opcode. Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, const Twine &Name) const { - assert(isVectorizable() && - "Expected add|fadd or min/max reduction operation."); - Value *Cmp = nullptr; + assert(isVectorizable() && "Unhandled reduction operation."); switch (Kind) { - case RK_Arithmetic: + case RD::RK_IntegerAdd: + case RD::RK_IntegerMult: + case RD::RK_IntegerOr: + case RD::RK_IntegerAnd: + case RD::RK_IntegerXor: + case RD::RK_FloatAdd: + case RD::RK_FloatMult: return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, Name); - case RK_SMin: -assert(Opcode == Instruction::ICmp && "Expected integer types."); -Cmp = Builder.CreateICmpSLT(LHS, RHS); -return Builder.CreateSelect(Cmp, LHS, RHS, Name); - case RK_SMax: -assert(Opcode == Instruction::ICmp && "Expected integer types."); -Cmp = Builder.CreateICmp
[llvm-branch-commits] [llvm] 8d18bc8 - [Utils] reduce code in createTargetReduction(); NFC
Author: Sanjay Patel Date: 2020-12-29T15:56:19-05:00 New Revision: 8d18bc8e6db717352811a44a81e76a196530f612 URL: https://github.com/llvm/llvm-project/commit/8d18bc8e6db717352811a44a81e76a196530f612 DIFF: https://github.com/llvm/llvm-project/commit/8d18bc8e6db717352811a44a81e76a196530f612.diff LOG: [Utils] reduce code in createTargetReduction(); NFC The switch duplicated the translation in getRecurrenceBinOp(). This code is still weird because it translates to the TTI ReductionFlags for min/max, but then createSimpleTargetReduction() converts that back to RecurrenceDescriptor::MinMaxRecurrenceKind. Added: Modified: llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 1ac270814b00..653680e5dc1e 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1063,7 +1063,6 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, bool NoNaN) { // TODO: Support in-order reductions based on the recurrence descriptor. using RD = RecurrenceDescriptor; - RD::RecurrenceKind RecKind = Desc.getRecurrenceKind(); TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; @@ -1072,34 +1071,12 @@ Value *llvm::createTargetReduction(IRBuilderBase &B, IRBuilderBase::FastMathFlagGuard FMFGuard(B); B.setFastMathFlags(Desc.getFastMathFlags()); - switch (RecKind) { - case RD::RK_FloatAdd: -return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags); - case RD::RK_FloatMult: -return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags); - case RD::RK_IntegerAdd: -return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags); - case RD::RK_IntegerMult: -return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags); - case RD::RK_IntegerAnd: -return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags); - case RD::RK_IntegerOr: -return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags); - case RD::RK_IntegerXor: -return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags); - case RD::RK_IntegerMinMax: { -RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); -Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax); -Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin); -return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags); - } - case RD::RK_FloatMinMax: { -Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax; -return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags); - } - default: -llvm_unreachable("Unhandled RecKind"); - } + RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind(); + Flags.IsMaxOp = MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax || + MMKind == RD::MRK_FloatMax; + Flags.IsSigned = MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin; + return createSimpleTargetReduction(B, TTI, Desc.getRecurrenceBinOp(), Src, + Flags); } void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e90ea76 - [IR] remove 'NoNan' param when creating FP reductions
Author: Sanjay Patel Date: 2020-12-30T09:51:23-05:00 New Revision: e90ea76380d411bf81861228f23e4716ef337100 URL: https://github.com/llvm/llvm-project/commit/e90ea76380d411bf81861228f23e4716ef337100 DIFF: https://github.com/llvm/llvm-project/commit/e90ea76380d411bf81861228f23e4716ef337100.diff LOG: [IR] remove 'NoNan' param when creating FP reductions This is no-functional-change-intended (AFAIK, we can't isolate this difference in a regression test). That's because the callers should be setting the IRBuilder's FMF field when creating the reduction and/or setting those flags after creating. It doesn't make sense to override this one flag alone. This is part of a multi-step process to clean up the FMF setting/propagation. See PR35538 for an example. Added: Modified: llvm/include/llvm/IR/IRBuilder.h llvm/lib/IR/IRBuilder.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 4b26299d046c..c9074abe88c2 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -779,11 +779,11 @@ class IRBuilderBase { /// Create a vector float max reduction intrinsic of the source /// vector. - CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false); + CallInst *CreateFPMaxReduce(Value *Src); /// Create a vector float min reduction intrinsic of the source /// vector. - CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false); + CallInst *CreateFPMinReduce(Value *Src); /// Create a lifetime.start intrinsic. /// diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index e8fa35314a94..51e289165590 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -380,24 +380,12 @@ CallInst *IRBuilderBase::CreateIntMinReduce(Value *Src, bool IsSigned) { return getReductionIntrinsic(this, ID, Src); } -CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src, bool NoNaN) { - auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src); - if (NoNaN) { -FastMathFlags FMF; -FMF.setNoNaNs(); -Rdx->setFastMathFlags(FMF); - } - return Rdx; +CallInst *IRBuilderBase::CreateFPMaxReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmax, Src); } -CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src, bool NoNaN) { - auto Rdx = getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src); - if (NoNaN) { -FastMathFlags FMF; -FMF.setNoNaNs(); -Rdx->setFastMathFlags(FMF); - } - return Rdx; +CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) { + return getReductionIntrinsic(this, Intrinsic::vector_reduce_fmin, Src); } CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 80ae6b37e132..a3665a5636e5 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1039,10 +1039,10 @@ Value *llvm::createSimpleTargetReduction( case Instruction::FCmp: if (Flags.IsMaxOp) { MinMaxKind = RD::MRK_FloatMax; - BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); }; + BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src); }; } else { MinMaxKind = RD::MRK_FloatMin; - BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); }; + BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src); }; } break; default: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3567908 - [SLP] add fadd reduction test to show broken FMF propagation; NFC
Author: Sanjay Patel Date: 2020-12-30T11:27:50-05:00 New Revision: 3567908d8ceb95afe50961c7a953c202131235c5 URL: https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5 DIFF: https://github.com/llvm/llvm-project/commit/3567908d8ceb95afe50961c7a953c202131235c5.diff LOG: [SLP] add fadd reduction test to show broken FMF propagation; NFC Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll Removed: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll index 5663c88b6366..8e175f1acda9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -1766,4 +1766,39 @@ bb.1: ret void } +; FIXME: This is a miscompile. +; The FMF on the reduction should match the incoming insts. + +define float @fadd_v4f32_fmf(float* %p) { +; CHECK-LABEL: @fadd_v4f32_fmf( +; CHECK-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; CHECK-NEXT:ret float [[TMP3]] +; +; STORE-LABEL: @fadd_v4f32_fmf( +; STORE-NEXT:[[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; STORE-NEXT:[[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; STORE-NEXT:[[P3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; STORE-NEXT:[[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>* +; STORE-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; STORE-NEXT:[[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.00e+00, <4 x float> [[TMP2]]) +; STORE-NEXT:ret float [[TMP3]] +; + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + %p3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %p1, align 4 + %t2 = load float, float* %p2, align 4 + %t3 = load float, float* %p3, align 4 + %add1 = fadd reassoc nsz float %t1, %t0 + %add2 = fadd reassoc nsz float %t2, %add1 + %add3 = fadd reassoc nsz float %t3, %add2 + ret float %add3 +} + declare i32 @__gxx_personality_v0(...) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 5ced712 - [LoopVectorizer] add test to show wrong FMF propagation; NFC
Author: Sanjay Patel Date: 2020-12-30T15:13:57-05:00 New Revision: 5ced712e9851f00ecd81ba4dc235811bbc9ec5a2 URL: https://github.com/llvm/llvm-project/commit/5ced712e9851f00ecd81ba4dc235811bbc9ec5a2 DIFF: https://github.com/llvm/llvm-project/commit/5ced712e9851f00ecd81ba4dc235811bbc9ec5a2.diff LOG: [LoopVectorizer] add test to show wrong FMF propagation; NFC Added: Modified: llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll Removed: diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index fbbbd59f41c5..f35024b4361b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -261,3 +261,92 @@ loop.exit: %sum.lcssa = phi float [ %sum.inc, %loop ], [ 0.00e+00, %entry ] ret float %sum.lcssa } + +; FIXME: Some fcmp are 'nnan ninf', some are 'fast', but the reduction is sequential? + +define float @PR35538(float* nocapture readonly %a, i32 %N) #0 { +; CHECK-LABEL: @PR35538( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[CMP12:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT:br i1 [[CMP12]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT:[[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT:[[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 8 +; CHECK-NEXT:br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT:[[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 8 +; CHECK-NEXT:[[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT:br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT:[[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[VEC_PHI1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT:[[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT:[[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT:[[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]] +; CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>* +; CHECK-NEXT:[[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4 +; CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 4 +; CHECK-NEXT:[[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>* +; CHECK-NEXT:[[WIDE_LOAD2:%.*]] = load <4 x float>, <4 x float>* [[TMP7]], align 4 +; CHECK-NEXT:[[TMP8:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT:[[TMP9:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] +; CHECK-NEXT:[[TMP10]] = select <4 x i1> [[TMP8]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]] +; CHECK-NEXT:[[TMP11]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]] +; CHECK-NEXT:[[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT:[[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT:br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]] +; CHECK: middle.block: +; CHECK-NEXT:[[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP10]], [[TMP11]] +; CHECK-NEXT:[[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP10]], <4 x float> [[TMP11]] +; CHECK-NEXT:[[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) +; CHECK-NEXT:[[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT:br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT:[[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT:[[BC_MERGE_RDX:%.*]] = phi float [ -1.00e+00, [[FOR_BODY_LR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT:br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT:[[MAX_0__LCSSA:%.*]] = phi float [ [[MAX_0_:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT:br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT:[[MAX_0_LCSSA:%.*]] = phi float [ -1.00e+00, [[ENTRY:%.*]] ], [ [[MAX_0__LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; CHECK-NEXT:ret float [[MAX_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_V
[llvm-branch-commits] [llvm] 8ca60db - [LoopUtils] reduce FMF and min/max complexity when forming reductions
Author: Sanjay Patel Date: 2020-12-30T15:22:26-05:00 New Revision: 8ca60db40bd944dc5f67e0f200a403b4e03818ea URL: https://github.com/llvm/llvm-project/commit/8ca60db40bd944dc5f67e0f200a403b4e03818ea DIFF: https://github.com/llvm/llvm-project/commit/8ca60db40bd944dc5f67e0f200a403b4e03818ea.diff LOG: [LoopUtils] reduce FMF and min/max complexity when forming reductions I don't know if there's some way this changes what the vectorizers may produce for reductions, but I have added test coverage with 3567908 and 5ced712 to show that both passes already have bugs in this area. Hopefully this does not make things worse before we can really fix it. Added: Modified: llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index ef348ed56129..ba2bb0a4c6b0 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -365,24 +365,21 @@ Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require -/// additional information supplied in \p Flags. +/// additional information supplied in \p MinMaxKind. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. -Value *createSimpleTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, - TargetTransformInfo::ReductionFlags Flags = - TargetTransformInfo::ReductionFlags(), - ArrayRef RedOps = None); +Value *createSimpleTargetReduction( +IRBuilderBase &B, const TargetTransformInfo *TTI, unsigned Opcode, +Value *Src, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, +ArrayRef RedOps = None); /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, - bool NoNaN = false); + RecurrenceDescriptor &Desc, Value *Src); /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index a3665a5636e5..8dc7709c6e55 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -985,14 +985,12 @@ llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, /// flags (if generating min/max reductions). Value *llvm::createSimpleTargetReduction( IRBuilderBase &Builder, const TargetTransformInfo *TTI, unsigned Opcode, -Value *Src, TargetTransformInfo::ReductionFlags Flags, +Value *Src, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, ArrayRef RedOps) { auto *SrcVTy = cast(Src->getType()); std::function BuildFunc; using RD = RecurrenceDescriptor; - RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid; - switch (Opcode) { case Instruction::Add: BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; @@ -1024,33 +1022,42 @@ Value *llvm::createSimpleTargetReduction( }; break; case Instruction::ICmp: -if (Flags.IsMaxOp) { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax; - BuildFunc = [&]() { -return Builder.CreateIntMaxReduce(Src, Flags.IsSigned); - }; -} else { - MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin; - BuildFunc = [&]() { -return Builder.CreateIntMinReduce(Src, Flags.IsSigned); - }; +switch (MinMaxKind) { +case RD::MRK_SIntMax: + BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, true); }; + break; +case RD::MRK_SIntMin: + BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, true); }; + break; +case RD::MRK_UIntMax: + BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, false); }; + break; +case RD::MRK_UIntMin: + BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, false);
[llvm-branch-commits] [llvm] eaab711 - [Analysis] reduce code for matching min/max; NFC
Author: Sanjay Patel Date: 2020-12-31T17:19:37-05:00 New Revision: eaab71106b81031d272acfc6987e99e8b65cbe6c URL: https://github.com/llvm/llvm-project/commit/eaab71106b81031d272acfc6987e99e8b65cbe6c DIFF: https://github.com/llvm/llvm-project/commit/eaab71106b81031d272acfc6987e99e8b65cbe6c.diff LOG: [Analysis] reduce code for matching min/max; NFC This might also make it easier to adapt if we want to match min/max intrinsics rather than cmp+sel idioms. The 'const' part is to potentially avoid confusion in calling code. There's some surprising and possibly wrong behavior related to matching min/max reductions differently than other reductions. Added: Modified: llvm/include/llvm/Analysis/IVDescriptors.h llvm/lib/Analysis/IVDescriptors.cpp Removed: diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index e736adf899b8..30216e22fc34 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -96,15 +96,15 @@ class RecurrenceDescriptor { : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), UnsafeAlgebraInst(UAI) {} -bool isRecurrence() { return IsRecurrence; } +bool isRecurrence() const { return IsRecurrence; } -bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } +bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; } -Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } +Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; } -MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } +MinMaxRecurrenceKind getMinMaxKind() const { return MinMaxKind; } -Instruction *getPatternInst() { return PatternLastInst; } +Instruction *getPatternInst() const { return PatternLastInst; } private: // Is this instruction a recurrence candidate. @@ -134,10 +134,11 @@ class RecurrenceDescriptor { /// Returns true if all uses of the instruction I is within the Set. static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl &Set); - /// Returns a struct describing if the instruction if the instruction is a + /// Returns a struct describing if the instruction is a /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); + /// or max(X, Y). \p Prev is specifies the description of an already processed + /// select instruction, so its corresponding cmp can be matched to it. + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, const InstDesc &Prev); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index d9756512de77..eac6f3cb30f8 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -456,53 +456,42 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, return true; } -/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction -/// pattern corresponding to a min(X, Y) or max(X, Y). RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) { - - assert((isa(I) || isa(I) || isa(I)) && - "Expect a select instruction"); - Instruction *Cmp = nullptr; - SelectInst *Select = nullptr; +RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, + const InstDesc &Prev) { + assert((isa(I) || isa(I)) && + "Expected a cmp or select instruction"); // We must handle the select(cmp()) as a single instruction. Advance to the // select. - if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) { -if (!Cmp->hasOneUse() || !(Select = dyn_cast(*I->user_begin( - return InstDesc(false, I); -return InstDesc(Select, Prev.getMinMaxKind()); + CmpInst::Predicate Pred; + if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value() { +if (auto *Select = dyn_cast(*I->user_begin())) + return InstDesc(Select, Prev.getMinMaxKind()); } - // Only handle single use cases for now. - if (!(Select = dyn_cast(I))) + // Only match select with single use cmp condition. + if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), + m_Value( return InstDesc(false, I); - if (!(Cmp = dyn_cast(I->getOperand(0))) && - !(Cmp = dyn_cast(I->getOperand(0 -return InstDesc(false, I); - if (!Cmp->hasOneUse()) -return InstDesc(false, I); - - Value *CmpLeft; - Value *CmpRight; // Look for a min/max pattern. - if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) -return InstDesc(Select, MRK_U
[llvm-branch-commits] [llvm] c182a00 - [Analysis] fix typo in code comment; NFC
Author: Sanjay Patel Date: 2021-01-01T12:20:16-05:00 New Revision: c182a000954de667f1e87055bf0329e5e6f52f1f URL: https://github.com/llvm/llvm-project/commit/c182a000954de667f1e87055bf0329e5e6f52f1f DIFF: https://github.com/llvm/llvm-project/commit/c182a000954de667f1e87055bf0329e5e6f52f1f.diff LOG: [Analysis] fix typo in code comment; NFC Added: Modified: llvm/include/llvm/Analysis/IVDescriptors.h Removed: diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 30216e22fc34..b9f6b7c2d04e 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -136,9 +136,10 @@ class RecurrenceDescriptor { /// Returns a struct describing if the instruction is a /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). \p Prev is specifies the description of an already processed + /// or max(X, Y). \p Prev specifies the description of an already processed /// select instruction, so its corresponding cmp can be matched to it. - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, const InstDesc &Prev); + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, + const InstDesc &Prev); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] c74e853 - [Analysis] flatten enums for recurrence types
Author: Sanjay Patel Date: 2021-01-01T12:20:16-05:00 New Revision: c74e8539ff372a89d08e7bfea7323a4dc2979d22 URL: https://github.com/llvm/llvm-project/commit/c74e8539ff372a89d08e7bfea7323a4dc2979d22 DIFF: https://github.com/llvm/llvm-project/commit/c74e8539ff372a89d08e7bfea7323a4dc2979d22.diff LOG: [Analysis] flatten enums for recurrence types This is almost all mechanical search-and-replace and no-functional-change-intended (NFC). Having a single enum makes it easier to match/reason about the reduction cases. The goal is to remove `Opcode` from reduction matching code in the vectorizers because that makes it harder to adapt the code to handle intrinsics. The code in RecurrenceDescriptor::AddReductionVar() is the only place that required closer inspection. It uses a RecurrenceDescriptor and a second InstDesc to sometimes overwrite part of the struct. It seem like we should be able to simplify that logic, but it's not clear exactly which cmp+sel patterns that we are trying to handle/avoid. Added: Modified: llvm/include/llvm/Analysis/IVDescriptors.h llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Analysis/IVDescriptors.cpp llvm/lib/CodeGen/ExpandReductions.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index b9f6b7c2d04e..798eb430df08 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -34,6 +34,24 @@ class SCEV; class DominatorTree; class ICFLoopSafetyInfo; +/// These are the kinds of recurrences that we support. +enum class RecurKind { + None, ///< Not a recurrence. + Add,///< Sum of integers. + Mul,///< Product of integers. + Or, ///< Bitwise or logical OR of integers. + And,///< Bitwise or logical AND of integers. + Xor,///< Bitwise or logical XOR of integers. + SMin, ///< Signed integer min implemented in terms of select(cmp()). + SMax, ///< Signed integer max implemented in terms of select(cmp()). + UMin, ///< Unisgned integer min implemented in terms of select(cmp()). + UMax, ///< Unsigned integer max implemented in terms of select(cmp()). + FAdd, ///< Sum of floats. + FMul, ///< Product of floats. + FMin, ///< FP min implemented in terms of select(cmp()). + FMax///< FP max implemented in terms of select(cmp()). +}; + /// The RecurrenceDescriptor is used to identify recurrences variables in a /// loop. Reduction is a special case of recurrence that has uses of the /// recurrence variable outside the loop. The method isReductionPHI identifies @@ -48,40 +66,13 @@ class ICFLoopSafetyInfo; /// This struct holds information about recurrence variables. class RecurrenceDescriptor { public: - /// This enum represents the kinds of recurrences that we support. - enum RecurrenceKind { -RK_NoRecurrence, ///< Not a recurrence. -RK_IntegerAdd,///< Sum of integers. -RK_IntegerMult, ///< Product of integers. -RK_IntegerOr, ///< Bitwise or logical OR of numbers. -RK_IntegerAnd,///< Bitwise or logical AND of numbers. -RK_IntegerXor,///< Bitwise or logical XOR of numbers. -RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). -RK_FloatAdd, ///< Sum of floats. -RK_FloatMult, ///< Product of floats. -RK_FloatMinMax///< Min/max implemented in terms of select(cmp()). - }; - - // This enum represents the kind of minmax recurrence. - enum MinMaxRecurrenceKind { -MRK_Invalid, -MRK_UIntMin, -MRK_UIntMax, -MRK_SIntMin, -MRK_SIntMax, -MRK_FloatMin, -MRK_FloatMax - }; - RecurrenceDescriptor() = default; - RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, - FastMathFlags FMF, MinMaxRecurrenceKind MK, - Instruction *UAI, Type *RT, bool Signed, - SmallPtrSetImpl &CI) + RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, + FastMathFlags FMF, Instruction *UAI, Type *RT, + bool Signed, SmallPtrSetImpl &CI) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), -MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT), -IsSigned(Signed) { +UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { CastInsts.insert(CI.begin(), CI.end()); } @@ -89,11 +80,11 @@ class RecurrenceDescriptor { class InstDesc { public: InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) -: IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), - UnsafeAlgebraInst(UAI) {} +: IsRecurrence(IsRecur), PatternLastInst(I), +
[llvm-branch-commits] [llvm] 6976812 - [InstCombine] add tests for ashr+icmp; NFC
Author: Sanjay Patel Date: 2021-01-04T13:35:07-05:00 New Revision: 6976812129bf62975e37f6eabced717dcd090037 URL: https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037 DIFF: https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037.diff LOG: [InstCombine] add tests for ashr+icmp; NFC Added: Modified: llvm/test/Transforms/InstCombine/icmp-shr.ll Removed: diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 214f315f3178..22f61d2d5e6a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8: define i1 @lshr_eq_msb_low_last_zero(i8 %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6 +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -15,7 +15,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) { define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> %a, +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], ; CHECK-NEXT:ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> , %a @@ -25,7 +25,7 @@ define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { define i1 @ashr_eq_msb_low_second_zero(i8 %a) { ; CHECK-LABEL: @ashr_eq_msb_low_second_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6 +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 127, %a @@ -35,7 +35,7 @@ define i1 @ashr_eq_msb_low_second_zero(i8 %a) { define i1 @lshr_ne_msb_low_last_zero(i8 %a) { ; CHECK-LABEL: @lshr_ne_msb_low_last_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7 +; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -45,7 +45,7 @@ define i1 @lshr_ne_msb_low_last_zero(i8 %a) { define i1 @ashr_ne_msb_low_second_zero(i8 %a) { ; CHECK-LABEL: @ashr_ne_msb_low_second_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7 +; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 127, %a @@ -55,7 +55,7 @@ define i1 @ashr_ne_msb_low_second_zero(i8 %a) { define i1 @ashr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @ashr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 128, %a @@ -65,7 +65,7 @@ define i1 @ashr_eq_both_equal(i8 %a) { define i1 @ashr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @ashr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 128, %a @@ -75,7 +75,7 @@ define i1 @ashr_ne_both_equal(i8 %a) { define i1 @lshr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @lshr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -85,7 +85,7 @@ define i1 @lshr_eq_both_equal(i8 %a) { define i1 @lshr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @lshr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -95,7 +95,7 @@ define i1 @lshr_ne_both_equal(i8 %a) { define i1 @exact_ashr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @exact_ashr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr exact i8 128, %a @@ -105,7 +105,7 @@ define i1 @exact_ashr_eq_both_equal(i8 %a) { define i1 @exact_ashr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @exact_ashr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr exact i8 128, %a @@ -115,7 +115,7 @@ define i1 @exact_ashr_ne_both_equal(i8 %a) { define i1 @exact_lshr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @exact_lshr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr exact i8 126, %a @@ -125,7 +125,7 @@ define i1 @exact_lshr_eq_both_equal(i8 %a) { define i1 @exact_lshr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @exact_lshr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ;
[llvm-branch-commits] [llvm] 9766957 - [LoopUtils] reduce code for creatng reduction; NFC
Author: Sanjay Patel Date: 2021-01-04T16:05:03-05:00 New Revision: 976695752416f6ff51993ec1f3769e8a62eea2f2 URL: https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2 DIFF: https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2.diff LOG: [LoopUtils] reduce code for creatng reduction; NFC We can return from each case instead creating a temporary variable just to have a common return. Added: Modified: llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e062eacf82b2..3245f5f21017 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -983,77 +983,53 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, RecurKind RdxKind, ArrayRef RedOps) { TargetTransformInfo::ReductionFlags RdxFlags; - RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || - RdxKind == RecurKind::UMax || + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || RdxKind == RecurKind::FMax; RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; if (!ForceReductionIntrinsic && !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); - auto *SrcVTy = cast(Src->getType()); - - std::function BuildFunc; + auto *SrcVecEltTy = cast(Src->getType())->getElementType(); switch (Opcode) { case Instruction::Add: -BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; -break; +return Builder.CreateAddReduce(Src); case Instruction::Mul: -BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; -break; +return Builder.CreateMulReduce(Src); case Instruction::And: -BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; -break; +return Builder.CreateAndReduce(Src); case Instruction::Or: -BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; -break; +return Builder.CreateOrReduce(Src); case Instruction::Xor: -BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; -break; +return Builder.CreateXorReduce(Src); case Instruction::FAdd: -BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce( - ConstantFP::getNegativeZero(SrcVTy->getElementType()), Src); - return Rdx; -}; -break; +return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), +Src); case Instruction::FMul: -BuildFunc = [&]() { - Type *Ty = SrcVTy->getElementType(); - auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); - return Rdx; -}; -break; +return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); case Instruction::ICmp: switch (RdxKind) { case RecurKind::SMax: - BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, true); }; - break; + return Builder.CreateIntMaxReduce(Src, true); case RecurKind::SMin: - BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, true); }; - break; + return Builder.CreateIntMinReduce(Src, true); case RecurKind::UMax: - BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, false); }; - break; + return Builder.CreateIntMaxReduce(Src, false); case RecurKind::UMin: - BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, false); }; - break; + return Builder.CreateIntMinReduce(Src, false); default: llvm_unreachable("Unexpected min/max reduction type"); } -break; case Instruction::FCmp: assert((RdxKind == RecurKind::FMax || RdxKind == RecurKind::FMin) && "Unexpected min/max reduction type"); if (RdxKind == RecurKind::FMax) - BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src); }; + return Builder.CreateFPMaxReduce(Src); else - BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src); }; -break; + return Builder.CreateFPMinReduce(Src); default: llvm_unreachable("Unhandled opcode"); } - return BuildFunc(); } Value *llvm::createTargetReduction(IRBuilderBase &B, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 58b6c5d - [LoopUtils] reorder logic for creating reduction; NFC
Author: Sanjay Patel Date: 2021-01-04T16:05:02-05:00 New Revision: 58b6c5d932a0d435ddfd13f4f5b011207e64297f URL: https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f DIFF: https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f.diff LOG: [LoopUtils] reorder logic for creating reduction; NFC If we are using a shuffle reduction, we don't need to go through the switch on opcode - return early. Added: Modified: llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 96f1d4219bac..e062eacf82b2 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -982,6 +982,15 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, unsigned Opcode, Value *Src, RecurKind RdxKind, ArrayRef RedOps) { + TargetTransformInfo::ReductionFlags RdxFlags; + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || + RdxKind == RecurKind::UMax || + RdxKind == RecurKind::FMax; + RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; + if (!ForceReductionIntrinsic && + !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) +return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + auto *SrcVTy = cast(Src->getType()); std::function BuildFunc; @@ -1044,15 +1053,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, default: llvm_unreachable("Unhandled opcode"); } - TargetTransformInfo::ReductionFlags RdxFlags; - RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || - RdxKind == RecurKind::UMax || - RdxKind == RecurKind::FMax; - RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; - if (ForceReductionIntrinsic || - TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) -return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + return BuildFunc(); } Value *llvm::createTargetReduction(IRBuilderBase &B, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 36263a7 - [LoopUtils] remove redundant opcode parameter; NFC
Author: Sanjay Patel Date: 2021-01-04T17:05:28-05:00 New Revision: 36263a70d98afc36dea55e7a004d08455811 URL: https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811 DIFF: https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811.diff LOG: [LoopUtils] remove redundant opcode parameter; NFC While here, rename the inaccurate getRecurrenceBinOp() because that was also used to get CmpInst opcodes. The recurrence/reduction kind should always refer to the expected opcode for a reduction. SLP appears to be the only direct caller of createSimpleTargetReduction(), and that calling code ideally should not be carrying around both an opcode and a reduction kind. This should allow us to generalize reduction matching to use intrinsics instead of only binops. Added: Modified: llvm/include/llvm/Analysis/IVDescriptors.h llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Analysis/IVDescriptors.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp Removed: diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 798eb430df08f..6bb6c4cae0a2c 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -139,9 +139,8 @@ class RecurrenceDescriptor { /// Returns identity corresponding to the RecurrenceKind. static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp); - /// Returns the opcode of binary operation corresponding to the - /// RecurrenceKind. - static unsigned getRecurrenceBinOp(RecurKind Kind); + /// Returns the opcode corresponding to the RecurrenceKind. + static unsigned getOpcode(RecurKind Kind); /// Returns true if Phi is a reduction of type Kind and adds it to the /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are @@ -178,9 +177,7 @@ class RecurrenceDescriptor { RecurKind getRecurrenceKind() const { return Kind; } - unsigned getRecurrenceBinOp() const { -return getRecurrenceBinOp(getRecurrenceKind()); - } + unsigned getOpcode() const { return getOpcode(getRecurrenceKind()); } FastMathFlags getFastMathFlags() const { return FMF; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index b29add4cba0e5..d606fa954f952 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -366,8 +366,7 @@ Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, + const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind, ArrayRef RedOps = None); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 0bd4f98541587..a11faac093db0 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -800,8 +800,7 @@ Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) { } } -/// This function translates the recurrence kind to an LLVM binary operator. -unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind Kind) { +unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { switch (Kind) { case RecurKind::Add: return Instruction::Add; @@ -833,7 +832,7 @@ unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind Kind) { SmallVector RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { SmallVector ReductionOperations; - unsigned RedOp = getRecurrenceBinOp(Kind); + unsigned RedOp = getOpcode(Kind); // Search down from the Phi to the LoopExitInstr, looking for instructions // with a single user of the correct type for the reduction. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 3245f5f21017f..f2b94d9e78adc 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -979,9 +979,9 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, - RecurKind RdxKind, + Value *Src, Re
[llvm-branch-commits] [llvm] 3b8b2c7 - [SLP] delete unused pairwise reduction option
Author: Sanjay Patel Date: 2021-01-05T13:23:07-05:00 New Revision: 3b8b2c7da2efb88d9f13e911e383af430ab463ef URL: https://github.com/llvm/llvm-project/commit/3b8b2c7da2efb88d9f13e911e383af430ab463ef DIFF: https://github.com/llvm/llvm-project/commit/3b8b2c7da2efb88d9f13e911e383af430ab463ef.diff LOG: [SLP] delete unused pairwise reduction option SLP tries to model 2 forms of vector reductions: pairwise and splitting. >From the cost model code comments, those are defined using an example as: /// Pairwise: /// (v0, v1, v2, v3) /// ((v0+v1), (v2+v3), undef, undef) /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) I don't know the full history of this functionality, but it was partly added back in D29402. There are apparently no users at this point (no regression tests change). X86 might have managed to work-around the need for this through cost model and codegen improvements. Removing this code makes it easier to continue the work that was started in D87416 / D88193. The alternative -- if there is some target that is silently using this option -- is to move this logic into LoopUtils. We have related/duplicate functionality there via llvm::createTargetReduction(). Differential Revision: https://reviews.llvm.org/D93860 Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a655d3dd91bd..8965a44ffd2b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6382,35 +6382,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) { return false; } -/// Generate a shuffle mask to be used in a reduction tree. -/// -/// \param VecLen The length of the vector to be reduced. -/// \param NumEltsToRdx The number of elements that should be reduced in the -///vector. -/// \param IsPairwise Whether the reduction is a pairwise or splitting -///reduction. A pairwise reduction will generate a mask of -///<0,2,...> or <1,3,..> while a splitting reduction will generate -///<2,3, undef,undef> for a vector of 4 and NumElts = 2. -/// \param IsLeft True will generate a mask of even elements, odd otherwise. -static SmallVector createRdxShuffleMask(unsigned VecLen, - unsigned NumEltsToRdx, - bool IsPairwise, bool IsLeft) { - assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask"); - - SmallVector ShuffleMask(VecLen, -1); - - if (IsPairwise) -// Build a mask of 0, 2, ... (left) or 1, 3, ... (right). -for (unsigned i = 0; i != NumEltsToRdx; ++i) - ShuffleMask[i] = 2 * i + !IsLeft; - else -// Move the upper half of the vector to the lower half. -for (unsigned i = 0; i != NumEltsToRdx; ++i) - ShuffleMask[i] = NumEltsToRdx + i; - - return ShuffleMask; -} - namespace { /// Model horizontal reductions. @@ -6730,10 +6701,6 @@ class HorizontalReduction { /// The operation data for the leaf values that we perform a reduction on. OperationData RdxLeafVal; - /// Should we model this reduction as a pairwise reduction tree or a tree that - /// splits the vector in halves and adds those halves. - bool IsPairwiseReduction = false; - /// Checks if the ParentStackElem.first should be marked as a reduction /// operation with an extra argument or as extra argument itself. void markExtraArg(std::pair &ParentStackElem, @@ -7170,7 +7137,6 @@ class HorizontalReduction { Type *ScalarTy = FirstReducedVal->getType(); auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth); -int PairwiseRdxCost; int SplittingRdxCost; switch (RdxTreeInst.getKind()) { case RecurKind::Add: @@ -7180,9 +7146,6 @@ class HorizontalReduction { case RecurKind::Xor: case RecurKind::FAdd: case RecurKind::FMul: - PairwiseRdxCost = - TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy, - /*IsPairwiseForm=*/true); SplittingRdxCost = TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy, /*IsPairwiseForm=*/false); @@ -7194,9 +7157,6 @@ class HorizontalReduction { auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); RecurKind Kind = RdxTreeInst.getKind(); bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin; - PairwiseRdxCost = - TTI->getMinMaxReductionCost(VecTy, VecCondTy, - /*IsPairwiseForm=*/true, IsUnsigned); SplittingRdxCost = TTI->getMinMaxReductionCost(VecTy, VecCondTy, /*IsPairwiseForm=*/false, IsUnsi
[llvm-branch-commits] [llvm] d4a999b - [SLP] reduce code duplication; NFC
Author: Sanjay Patel Date: 2021-01-05T15:12:40-05:00 New Revision: d4a999b453a4d3cfeee02f00f4900327fc7fcede URL: https://github.com/llvm/llvm-project/commit/d4a999b453a4d3cfeee02f00f4900327fc7fcede DIFF: https://github.com/llvm/llvm-project/commit/d4a999b453a4d3cfeee02f00f4900327fc7fcede.diff LOG: [SLP] reduce code duplication; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8965a44ffd2b..390b71e7a46b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7137,8 +7137,9 @@ class HorizontalReduction { Type *ScalarTy = FirstReducedVal->getType(); auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth); +RecurKind Kind = RdxTreeInst.getKind(); int SplittingRdxCost; -switch (RdxTreeInst.getKind()) { +switch (Kind) { case RecurKind::Add: case RecurKind::Mul: case RecurKind::Or: @@ -7155,7 +7156,6 @@ class HorizontalReduction { case RecurKind::UMax: case RecurKind::UMin: { auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); - RecurKind Kind = RdxTreeInst.getKind(); bool IsUnsigned = Kind == RecurKind::UMax || Kind == RecurKind::UMin; SplittingRdxCost = TTI->getMinMaxReductionCost(VecTy, VecCondTy, @@ -7167,7 +7167,7 @@ class HorizontalReduction { } int ScalarReduxCost = 0; -switch (RdxTreeInst.getKind()) { +switch (Kind) { case RecurKind::Add: case RecurKind::Mul: case RecurKind::Or: ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 5a1d31a - [SLP] use reduction kind's opcode for cost model queries; NFC
Author: Sanjay Patel Date: 2021-01-05T15:12:40-05:00 New Revision: 5a1d31a28490e85de440b55e2e257b61d32e85b9 URL: https://github.com/llvm/llvm-project/commit/5a1d31a28490e85de440b55e2e257b61d32e85b9 DIFF: https://github.com/llvm/llvm-project/commit/5a1d31a28490e85de440b55e2e257b61d32e85b9.diff LOG: [SLP] use reduction kind's opcode for cost model queries; NFC This should be no-functional-change because the reduction kind opcodes are 1-for-1 mappings to the instructions we are matching as reductions. But we want to remove the need for the `OperationData` opcode field because that does not work when we start matching intrinsics (eg, maxnum) as reduction candidates. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 390b71e7a46b..48f2a2d2886f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7138,6 +7138,7 @@ class HorizontalReduction { auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth); RecurKind Kind = RdxTreeInst.getKind(); +unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); int SplittingRdxCost; switch (Kind) { case RecurKind::Add: @@ -7147,9 +7148,8 @@ class HorizontalReduction { case RecurKind::Xor: case RecurKind::FAdd: case RecurKind::FMul: - SplittingRdxCost = - TTI->getArithmeticReductionCost(RdxTreeInst.getOpcode(), VecTy, - /*IsPairwiseForm=*/false); + SplittingRdxCost = TTI->getArithmeticReductionCost( + RdxOpcode, VecTy, /*IsPairwiseForm=*/false); break; case RecurKind::SMax: case RecurKind::SMin: @@ -7175,15 +7175,14 @@ class HorizontalReduction { case RecurKind::Xor: case RecurKind::FAdd: case RecurKind::FMul: - ScalarReduxCost = - TTI->getArithmeticInstrCost(RdxTreeInst.getOpcode(), ScalarTy); + ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); break; case RecurKind::SMax: case RecurKind::SMin: case RecurKind::UMax: case RecurKind::UMin: ScalarReduxCost = - TTI->getCmpSelInstrCost(RdxTreeInst.getOpcode(), ScalarTy) + + TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) + TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, CmpInst::makeCmpResultType(ScalarTy)); break; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 6a03f8a - [SLP] reduce code for finding reduction costs; NFC
Author: Sanjay Patel Date: 2021-01-05T17:35:54-05:00 New Revision: 6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef URL: https://github.com/llvm/llvm-project/commit/6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef DIFF: https://github.com/llvm/llvm-project/commit/6a03f8ab629b34a2425764caaa46dbfcf3d8e1ef.diff LOG: [SLP] reduce code for finding reduction costs; NFC We can get both (vector/scalar) costs in a single switch instead of sequentially. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 48f2a2d2886f..92e3ae7bea8b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7140,6 +7140,7 @@ class HorizontalReduction { RecurKind Kind = RdxTreeInst.getKind(); unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); int SplittingRdxCost; +int ScalarReduxCost; switch (Kind) { case RecurKind::Add: case RecurKind::Mul: @@ -7150,6 +7151,7 @@ class HorizontalReduction { case RecurKind::FMul: SplittingRdxCost = TTI->getArithmeticReductionCost( RdxOpcode, VecTy, /*IsPairwiseForm=*/false); + ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); break; case RecurKind::SMax: case RecurKind::SMin: @@ -7160,42 +7162,21 @@ class HorizontalReduction { SplittingRdxCost = TTI->getMinMaxReductionCost(VecTy, VecCondTy, /*IsPairwiseForm=*/false, IsUnsigned); - break; -} -default: - llvm_unreachable("Expected arithmetic or min/max reduction operation"); -} - -int ScalarReduxCost = 0; -switch (Kind) { -case RecurKind::Add: -case RecurKind::Mul: -case RecurKind::Or: -case RecurKind::And: -case RecurKind::Xor: -case RecurKind::FAdd: -case RecurKind::FMul: - ScalarReduxCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy); - break; -case RecurKind::SMax: -case RecurKind::SMin: -case RecurKind::UMax: -case RecurKind::UMin: ScalarReduxCost = TTI->getCmpSelInstrCost(RdxOpcode, ScalarTy) + TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy, CmpInst::makeCmpResultType(ScalarTy)); break; +} default: llvm_unreachable("Expected arithmetic or min/max reduction operation"); } -ScalarReduxCost *= (ReduxWidth - 1); +ScalarReduxCost *= (ReduxWidth - 1); LLVM_DEBUG(dbgs() << "SLP: Adding cost " << SplittingRdxCost - ScalarReduxCost << " for reduction that starts with " << *FirstReducedVal << " (It is a splitting reduction)\n"); - return SplittingRdxCost - ScalarReduxCost; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 5d24089 - [SLP] reduce code for propagating flags on reductions; NFC
Author: Sanjay Patel Date: 2021-01-06T14:37:44-05:00 New Revision: 5d24089a7001e9fb4c0e665e93312916d88aaef9 URL: https://github.com/llvm/llvm-project/commit/5d24089a7001e9fb4c0e665e93312916d88aaef9 DIFF: https://github.com/llvm/llvm-project/commit/5d24089a7001e9fb4c0e665e93312916d88aaef9.diff LOG: [SLP] reduce code for propagating flags on reductions; NFC If we add/change to match intrinsics, this might get more wordy, but there's no need to list each kind currently. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 75f881dc7d4b..c4278722418b 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6637,28 +6637,15 @@ class HorizontalReduction { const ReductionOpsListType &ReductionOps) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - auto *Op = createOp(Builder, LHS, RHS, Name); - switch (Kind) { - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::Or: - case RecurKind::And: - case RecurKind::Xor: - case RecurKind::FAdd: - case RecurKind::FMul: -propagateIRFlags(Op, ReductionOps[0]); -return Op; - case RecurKind::SMax: - case RecurKind::SMin: - case RecurKind::UMax: - case RecurKind::UMin: -if (auto *SI = dyn_cast(Op)) - propagateIRFlags(SI->getCondition(), ReductionOps[0]); + Value *Op = createOp(Builder, LHS, RHS, Name); + if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) { +if (auto *Sel = dyn_cast(Op)) + propagateIRFlags(Sel->getCondition(), ReductionOps[0]); propagateIRFlags(Op, ReductionOps[1]); return Op; - default: -llvm_unreachable("Unknown reduction operation."); } + propagateIRFlags(Op, ReductionOps[0]); + return Op; } /// Creates reduction operation with the current opcode with the IR flags /// from \p I. @@ -,30 +6653,15 @@ class HorizontalReduction { const Twine &Name, Instruction *I) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - auto *Op = createOp(Builder, LHS, RHS, Name); - switch (Kind) { - case RecurKind::Add: - case RecurKind::Mul: - case RecurKind::Or: - case RecurKind::And: - case RecurKind::Xor: - case RecurKind::FAdd: - case RecurKind::FMul: -propagateIRFlags(Op, I); -return Op; - case RecurKind::SMax: - case RecurKind::SMin: - case RecurKind::UMax: - case RecurKind::UMin: -if (auto *SI = dyn_cast(Op)) { - propagateIRFlags(SI->getCondition(), + Value *Op = createOp(Builder, LHS, RHS, Name); + if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) { +if (auto *Sel = dyn_cast(Op)) { + propagateIRFlags(Sel->getCondition(), cast(I)->getCondition()); } -propagateIRFlags(Op, I); -return Op; - default: -llvm_unreachable("Unknown reduction operation."); } + propagateIRFlags(Op, I); + return Op; } }; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 4c022b5 - [SLP] use reduction kind's opcode to create new instructions; NFC
Author: Sanjay Patel Date: 2021-01-06T14:37:44-05:00 New Revision: 4c022b5a41dee998ae50cdad4e8b6548acbeee9f URL: https://github.com/llvm/llvm-project/commit/4c022b5a41dee998ae50cdad4e8b6548acbeee9f DIFF: https://github.com/llvm/llvm-project/commit/4c022b5a41dee998ae50cdad4e8b6548acbeee9f.diff LOG: [SLP] use reduction kind's opcode to create new instructions; NFC Similar to 5a1d31a28 - This should be no-functional-change because the reduction kind opcodes are 1-for-1 mappings to the instructions we are matching as reductions. But we want to remove the need for the `OperationData` opcode field because that does not work when we start matching intrinsics (eg, maxnum) as reduction candidates. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c4278722418b..7b77aef2a75c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6457,6 +6457,7 @@ class HorizontalReduction { Value *createOp(IRBuilder<> &Builder, Value *LHS, Value *RHS, const Twine &Name) const { assert(isVectorizable() && "Unhandled reduction operation."); + unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind); switch (Kind) { case RecurKind::Add: case RecurKind::Mul: @@ -6465,26 +6466,22 @@ class HorizontalReduction { case RecurKind::Xor: case RecurKind::FAdd: case RecurKind::FMul: -return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, +return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS, Name); case RecurKind::SMax: { -assert(Opcode == Instruction::ICmp && "Expected integer types."); Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name); return Builder.CreateSelect(Cmp, LHS, RHS, Name); } case RecurKind::SMin: { -assert(Opcode == Instruction::ICmp && "Expected integer types."); Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name); return Builder.CreateSelect(Cmp, LHS, RHS, Name); } case RecurKind::UMax: { -assert(Opcode == Instruction::ICmp && "Expected integer types."); Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name); return Builder.CreateSelect(Cmp, LHS, RHS, Name); } case RecurKind::UMin: { -assert(Opcode == Instruction::ICmp && "Expected integer types."); Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name); return Builder.CreateSelect(Cmp, LHS, RHS, Name); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 4c7148d - [SLP] remove opcode identifier for reduction; NFC
Author: Sanjay Patel Date: 2021-01-07T14:07:27-05:00 New Revision: 4c7148d75cd7e75f169251cdab3e013819344cfd URL: https://github.com/llvm/llvm-project/commit/4c7148d75cd7e75f169251cdab3e013819344cfd DIFF: https://github.com/llvm/llvm-project/commit/4c7148d75cd7e75f169251cdab3e013819344cfd.diff LOG: [SLP] remove opcode identifier for reduction; NFC Another step towards allowing intrinsics in reduction matching. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8d6453f277ea..c8e5fdb458ff 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -772,7 +772,7 @@ class BoUpSLP { /// effectively impossible for the backend to undo. /// TODO: If load combining is allowed in the IR optimizer, this analysis /// may not be necessary. - bool isLoadCombineReductionCandidate(unsigned ReductionOpcode) const; + bool isLoadCombineReductionCandidate(RecurKind RdxKind) const; /// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values /// can be load combined in the backend. Load combining may not be allowed in @@ -3896,8 +3896,8 @@ static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts, return true; } -bool BoUpSLP::isLoadCombineReductionCandidate(unsigned RdxOpcode) const { - if (RdxOpcode != Instruction::Or) +bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const { + if (RdxKind != RecurKind::Or) return false; unsigned NumElts = VectorizableTree[0]->Scalars.size(); @@ -6987,7 +6987,7 @@ class HorizontalReduction { } if (V.isTreeTinyAndNotFullyVectorizable()) break; - if (V.isLoadCombineReductionCandidate(RdxTreeInst.getOpcode())) + if (V.isLoadCombineReductionCandidate(RdxTreeInst.getKind())) break; V.computeMinimumValueSizes(); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 267ff79 - [SLP] limit verifyFunction to debug build (PR48689)
Author: Sanjay Patel Date: 2021-01-08T08:10:17-05:00 New Revision: 267ff7901c745dc903d55599240464ebc4c0bda3 URL: https://github.com/llvm/llvm-project/commit/267ff7901c745dc903d55599240464ebc4c0bda3 DIFF: https://github.com/llvm/llvm-project/commit/267ff7901c745dc903d55599240464ebc4c0bda3.diff LOG: [SLP] limit verifyFunction to debug build (PR48689) As noted in PR48689, the verifier may have some kind of exponential behavior that should be addressed separately. For now, only run it in debug mode to prevent problems for release+asserts. That limit is what we had before D80401, and I'm not sure if there was a reason to change it in that patch. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8c06e29341ad..ef0dea0f11d3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2499,7 +2499,7 @@ BoUpSLP::~BoUpSLP() { "trying to erase instruction with users."); Pair.getFirst()->eraseFromParent(); } - assert(!verifyFunction(*F, &dbgs())); + LLVM_DEBUG(verifyFunction(*F)); } void BoUpSLP::eraseInstructions(ArrayRef AV) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 0aa75fb - [SLP] put verifyFunction call behind EXPENSIVE_CHECKS
Author: Sanjay Patel Date: 2021-01-10T12:32:21-05:00 New Revision: 0aa75fb12faa04e07ba1a6e334605357b6a159c9 URL: https://github.com/llvm/llvm-project/commit/0aa75fb12faa04e07ba1a6e334605357b6a159c9 DIFF: https://github.com/llvm/llvm-project/commit/0aa75fb12faa04e07ba1a6e334605357b6a159c9.diff LOG: [SLP] put verifyFunction call behind EXPENSIVE_CHECKS A severe compile-time slowdown from this call is noted in: https://llvm.org/PR48689 My naive fix was to put it under LLVM_DEBUG ( 267ff79 ), but that's not limiting in the way we want. This is a quick fix (or we could just remove the call completely and rely on some later pass to discover potentially wrong IR?). A bigger/better fix would be to improve/limit verifyFunction() as noted in: https://llvm.org/PR47712 Differential Revision: https://reviews.llvm.org/D94328 Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f124dd8ef374..d0b6b432e93e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2499,7 +2499,11 @@ BoUpSLP::~BoUpSLP() { "trying to erase instruction with users."); Pair.getFirst()->eraseFromParent(); } - LLVM_DEBUG(verifyFunction(*F)); +#ifdef EXPENSIVE_CHECKS + // If we could guarantee that this call is not extremely slow, we could + // remove the ifdef limitation (see PR47712). + assert(!verifyFunction(*F, %dbgs())); +#endif } void BoUpSLP::eraseInstructions(ArrayRef AV) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3f09c77 - [SLP] fix typo in assert
Author: Sanjay Patel Date: 2021-01-10T13:15:04-05:00 New Revision: 3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d URL: https://github.com/llvm/llvm-project/commit/3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d DIFF: https://github.com/llvm/llvm-project/commit/3f09c77d33dcd74b3cba4558b07f88d87ab2dd9d.diff LOG: [SLP] fix typo in assert This snuck into 0aa75fb12faa , but I didn't catch it locally. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d0b6b432e93e..5b91495bd844 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2502,7 +2502,7 @@ BoUpSLP::~BoUpSLP() { #ifdef EXPENSIVE_CHECKS // If we could guarantee that this call is not extremely slow, we could // remove the ifdef limitation (see PR47712). - assert(!verifyFunction(*F, %dbgs())); + assert(!verifyFunction(*F, &dbgs())); #endif } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 288f3fc - [InstCombine] reduce icmp(ashr X, C1), C2 to sign-bit test
Author: Sanjay Patel Date: 2021-01-11T15:53:39-05:00 New Revision: 288f3fc5dfee0c51fc00fe10a985f93c505073eb URL: https://github.com/llvm/llvm-project/commit/288f3fc5dfee0c51fc00fe10a985f93c505073eb DIFF: https://github.com/llvm/llvm-project/commit/288f3fc5dfee0c51fc00fe10a985f93c505073eb.diff LOG: [InstCombine] reduce icmp(ashr X, C1), C2 to sign-bit test This is a more basic pattern that we should handle before trying to solve: https://llvm.org/PR48640 There might be a better way to think about this because the pre-condition that I came up with (number of sign bits in the compare constant) misses a potential transform for each of ugt and ult as commented on in the test file. Tried to model this is in Alive: https://rise4fun.com/Alive/juX1 ...but I couldn't get the ComputeNumSignBits() pre-condition to work as expected, so replaced with leading 0/1 preconditions instead. Name: ugt Pre: countLeadingZeros(C2) <= C1 && countLeadingOnes(C2) <= C1 %a = ashr %x, C1 %r = icmp ugt i8 %a, C2 => %r = icmp slt i8 %x, 0 Name: ult Pre: countLeadingZeros(C2) <= C1 && countLeadingOnes(C2) <= C1 %a = ashr %x, C1 %r = icmp ult i4 %a, C2 => %r = icmp sgt i4 %x, -1 Also approximated in Alive2: https://alive2.llvm.org/ce/z/u5hCcz https://alive2.llvm.org/ce/z/__szVL Differential Revision: https://reviews.llvm.org/D94014 Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp llvm/test/Transforms/InstCombine/icmp-shr.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 852def699716..9b3cfb3bd754 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2210,6 +2210,21 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp, (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); } + +// If the compare constant has significant bits above the lowest sign-bit, +// then convert an unsigned cmp to a test of the sign-bit: +// (ashr X, ShiftC) u> C --> X s< 0 +// (ashr X, ShiftC) u< C --> X s> -1 +if (C.getBitWidth() > 2 && C.getNumSignBits() <= ShAmtVal) { + if (Pred == CmpInst::ICMP_UGT) { +return new ICmpInst(CmpInst::ICMP_SLT, X, +ConstantInt::getNullValue(ShrTy)); + } + if (Pred == CmpInst::ICMP_ULT) { +return new ICmpInst(CmpInst::ICMP_SGT, X, +ConstantInt::getAllOnesValue(ShrTy)); + } +} } else { if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 22f61d2d5e6a..ad3eb713aa19 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -507,6 +507,10 @@ define <2 x i1> @exact_eq0_multiuse(<2 x i32> %x, <2 x i32> %y) { ret <2 x i1> %cmp } +; Verify conversions of ashr+icmp to a sign-bit test. + +; negative test, but diff erent transform possible + define i1 @ashr_ugt_0(i4 %x) { ; CHECK-LABEL: @ashr_ugt_0( ; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[X:%.*]], 1 @@ -517,6 +521,8 @@ define i1 @ashr_ugt_0(i4 %x) { ret i1 %r } +; negative test + define i1 @ashr_ugt_1(i4 %x) { ; CHECK-LABEL: @ashr_ugt_1( ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 @@ -528,6 +534,8 @@ define i1 @ashr_ugt_1(i4 %x) { ret i1 %r } +; negative test + define i1 @ashr_ugt_2(i4 %x) { ; CHECK-LABEL: @ashr_ugt_2( ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 @@ -539,6 +547,9 @@ define i1 @ashr_ugt_2(i4 %x) { ret i1 %r } +; negative test +; TODO: This is a sign-bit test, but we don't recognize the pattern. + define i1 @ashr_ugt_3(i4 %x) { ; CHECK-LABEL: @ashr_ugt_3( ; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 @@ -552,8 +563,7 @@ define i1 @ashr_ugt_3(i4 %x) { define i1 @ashr_ugt_4(i4 %x) { ; CHECK-LABEL: @ashr_ugt_4( -; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[S]], 4 +; CHECK-NEXT:[[R:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT:ret i1 [[R]] ; %s = ashr i4 %x, 1 @@ -563,8 +573,7 @@ define i1 @ashr_ugt_4(i4 %x) { define i1 @ashr_ugt_5(i4 %x) { ; CHECK-LABEL: @ashr_ugt_5( -; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEXT:[[R:%.*]] = icmp ugt i4 [[S]], 5 +; CHECK-NEXT:[[R:%.*]] = icmp slt i4 [[X:%.*]], 0 ; CHECK-NEXT:ret i1 [[R]] ; %s = ashr i4 %x, 1 @@ -574,8 +583,7 @@ define i1 @ashr_ugt_5(i4 %x) { define i1 @ashr_ugt_6(i4 %x) { ; CHECK-LABEL: @ashr_ugt_6( -; CHECK-NEXT:[[S:%.*]] = ashr i4 [[X:%.*]], 1 -; CHECK-NEX
[llvm-branch-commits] [llvm] 9c1765a - [VectorCombine] add test for load with offset; NFC
Author: Sanjay Patel Date: 2020-12-14T14:40:06-05:00 New Revision: 9c1765acabf10b7df7cf49456a06bbba2b33b364 URL: https://github.com/llvm/llvm-project/commit/9c1765acabf10b7df7cf49456a06bbba2b33b364 DIFF: https://github.com/llvm/llvm-project/commit/9c1765acabf10b7df7cf49456a06bbba2b33b364.diff LOG: [VectorCombine] add test for load with offset; NFC Added: Modified: llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 824a507ed103..ba2bf3f37d7b 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -535,3 +535,20 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r } + +; TODO: Can't safely load the offset vector, but can load+shuffle if it is profitable. + +define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16( +; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1 +; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 2 +; CHECK-NEXT:[[S:%.*]] = extractelement <2 x i16> [[L]], i32 0 +; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT:ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 + %l = load <2 x i16>, <2 x i16>* %gep, align 2 + %s = extractelement <2 x i16> %l, i32 0 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] d399f87 - [VectorCombine] make load transform poison-safe
Author: Sanjay Patel Date: 2020-12-14T17:42:01-05:00 New Revision: d399f870b5a94b9dcc1817ed69fec88c325bb817 URL: https://github.com/llvm/llvm-project/commit/d399f870b5a94b9dcc1817ed69fec88c325bb817 DIFF: https://github.com/llvm/llvm-project/commit/d399f870b5a94b9dcc1817ed69fec88c325bb817.diff LOG: [VectorCombine] make load transform poison-safe As noted in D93229, the transform from scalar load to vector load potentially leaks poison from the extra vector elements that are being loaded. We could use freeze here (and x86 codegen at least appears to be the same either way), but we already have a shuffle in this logic to optionally change the vector size, so let's allow that instruction to serve both purposes. Differential Revision: https://reviews.llvm.org/D93238 Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 19f5a2b432f7a..89b60045ce910 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -161,15 +161,17 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); - // If the insert type does not match the target's minimum vector type, - // use an identity shuffle to shrink/grow the vector. - if (Ty != MinVecTy) { -unsigned OutputNumElts = Ty->getNumElements(); -SmallVector Mask(OutputNumElts, UndefMaskElem); -for (unsigned i = 0; i < OutputNumElts && i < MinVecNumElts; ++i) - Mask[i] = i; -VecLd = Builder.CreateShuffleVector(VecLd, Mask); - } + // Set everything but element 0 to undef to prevent poison from propagating + // from the extra loaded memory. This will also optionally shrink/grow the + // vector from the loaded size to the output size. + // We assume this operation has no cost in codegen. + // Note that we could use freeze to avoid poison problems, but then we might + // still need a shuffle to change the vector size. + unsigned OutputNumElts = Ty->getNumElements(); + SmallVector Mask(OutputNumElts, UndefMaskElem); + Mask[0] = 0; + VecLd = Builder.CreateShuffleVector(VecLd, Mask); + replaceValue(I, *VecLd); ++NumVecLoad; return true; diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index ba2bf3f37d7b6..03902c48157fe 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -175,7 +175,8 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT:ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 @@ -185,7 +186,8 @@ define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_f32_insert_v4f32( -; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 +; CHECK-NEXT:[[TMP1:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 4 +; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; CHECK-NEXT:ret <4 x float> [[R]] ; %b = bitcast <4 x float>* %p to float* @@ -199,7 +201,8 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenc define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT:[[R:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT:ret <4 x i32> [[R]] ; %s = load i32, i32* %p, align 4 @@ -212,7 +215,8 @@ define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) { define <4 x i32> @casted_load_i32_insert_v4i32(<16 x i8>* align 4 dereferenceable(16) %p) { ; CHECK-LABEL: @casted_load_i32_insert_v4i32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast <16 x i8>* [[P:%.*]] to <4 x i32>* -; CHECK-NEX
[llvm-branch-commits] [llvm] 8593e19 - [VectorCombine] add alignment test for gep load; NFC
Author: Sanjay Patel Date: 2020-12-14T18:31:19-05:00 New Revision: 8593e197bc837286abeb4dee50726b2391a77de9 URL: https://github.com/llvm/llvm-project/commit/8593e197bc837286abeb4dee50726b2391a77de9 DIFF: https://github.com/llvm/llvm-project/commit/8593e197bc837286abeb4dee50726b2391a77de9.diff LOG: [VectorCombine] add alignment test for gep load; NFC Added: Modified: llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 03902c48157f..d28d28761632 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -284,6 +284,21 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref(<8 x i16>* align 16 derefere ret <8 x i16> %r } +; TODO: Verify that alignment of the new load is not over-specified. + +define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 dereferenceable(16) %p) { +; CHECK-LABEL: @gep01_load_i16_insert_v8i16_deref_minalign( +; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT:[[S:%.*]] = load i16, i16* [[GEP]], align 8 +; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 +; CHECK-NEXT:ret <8 x i16> [[R]] +; + %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 0, i64 1 + %s = load i16, i16* %gep, align 8 + %r = insertelement <8 x i16> undef, i16 %s, i64 0 + ret <8 x i16> %r +} + ; If there are enough dereferenceable bytes, we can offset the vector load. define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] aaaf0ec - [VectorCombine] loosen alignment constraint for load transform
Author: Sanjay Patel Date: 2020-12-16T12:25:18-05:00 New Revision: aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9 URL: https://github.com/llvm/llvm-project/commit/aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9 DIFF: https://github.com/llvm/llvm-project/commit/aaaf0ec72b062dea09a277e5b9e6bda0a3da55c9.diff LOG: [VectorCombine] loosen alignment constraint for load transform As discussed in D93229, we only need a minimal alignment constraint when querying whether a hypothetical vector load is safe. We still pass/use the potentially stronger alignment attribute when checking costs and creating the new load. There's already a test that changes with the minimum code change, so splitting this off as a preliminary commit independent of any gep/offset enhancements. Differential Revision: https://reviews.llvm.org/D93397 Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 89b60045ce91..086169c55c8d 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -134,13 +134,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; // Check safety of replacing the scalar load with a larger vector load. + // We use minimal alignment (maximum flexibility) because we only care about + // the dereferenceable region. When calculating cost and creating a new op, + // we may use a larger value based on alignment attributes. unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); - Align Alignment = Load->getAlign(); - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) return false; // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 + Align Alignment = Load->getAlign(); Type *LoadTy = Load->getType(); int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index d28d28761632..f5a962dd7cfe 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -403,12 +403,14 @@ define <4 x float> @load_f32_insert_v4f32_volatile(float* align 16 dereferenceab ret <4 x float> %r } -; Negative test? - pointer is not as aligned as load. +; Pointer is not as aligned as load, but that's ok. +; The new load uses the larger alignment value. define <4 x float> @load_f32_insert_v4f32_align(float* align 1 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32_align( -; CHECK-NEXT:[[S:%.*]] = load float, float* [[P:%.*]], align 4 -; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT:ret <4 x float> [[R]] ; %s = load float, float* %p, align 4 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 38ebc1a - [VectorCombine] optimize alignment for load transform
Author: Sanjay Patel Date: 2020-12-16T15:25:45-05:00 New Revision: 38ebc1a13dc8ce41917d66918b319d793dc2fb02 URL: https://github.com/llvm/llvm-project/commit/38ebc1a13dc8ce41917d66918b319d793dc2fb02 DIFF: https://github.com/llvm/llvm-project/commit/38ebc1a13dc8ce41917d66918b319d793dc2fb02.diff LOG: [VectorCombine] optimize alignment for load transform Here's another minimal step suggested by D93229 / D93397 . (I'm trying to be extra careful in these changes because load transforms are easy to get wrong.) We can optimistically choose the greater alignment of a load and its pointer operand. As the test diffs show, this can improve what would have been unaligned vector loads into aligned loads. When we enhance with gep offsets, we will need to adjust the alignment calculation to include that offset. Differential Revision: https://reviews.llvm.org/D93406 Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 086169c55c8d..8e341619dcf4 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -143,7 +143,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 - Align Alignment = Load->getAlign(); + // Use the greater of the alignment on the load or its source pointer. + Align Alignment = std::max(SrcPtr->getPointerAlignment(DL), Load->getAlign()); Type *LoadTy = Load->getType(); int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index f5a962dd7cfe..e8ba175b0235 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -175,7 +175,7 @@ define double @larger_fp_scalar_256bit_vec(<8 x float>* align 32 dereferenceable define <4 x float> @load_f32_insert_v4f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v4f32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT:ret <4 x float> [[R]] ; @@ -201,7 +201,7 @@ define <4 x float> @casted_load_f32_insert_v4f32(<4 x float>* align 4 dereferenc define <4 x i32> @load_i32_insert_v4i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v4i32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT:ret <4 x i32> [[R]] ; @@ -434,7 +434,7 @@ define <4 x float> @load_f32_insert_v4f32_deref(float* align 4 dereferenceable(1 define <8 x i32> @load_i32_insert_v8i32(i32* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_i32_insert_v8i32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>* -; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <8 x i32> ; CHECK-NEXT:ret <8 x i32> [[R]] ; @@ -458,7 +458,7 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v16f32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 16 ; CHECK-NEXT:[[R:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <16 x i32> ; CHECK-NEXT:ret <16 x float> [[R]] ; @@ -470,7 +470,7 @@ define <16 x float> @load_f32_insert_v16f32(float* align 16 dereferenceable(16) define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_f32_insert_v2f32( ; CHECK-NEXT:[[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* -; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT:[[TMP2:%.*]] = load <4 x float>, <4 x float>
[llvm-branch-commits] [llvm] 46c331b - [VectorCombine] adjust test alignments for better coverage; NFC
Author: Sanjay Patel Date: 2020-12-16T16:30:45-05:00 New Revision: 46c331bf26d169326b52079578178ab91e3546c0 URL: https://github.com/llvm/llvm-project/commit/46c331bf26d169326b52079578178ab91e3546c0 DIFF: https://github.com/llvm/llvm-project/commit/46c331bf26d169326b52079578178ab91e3546c0.diff LOG: [VectorCombine] adjust test alignments for better coverage; NFC Added: Modified: llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index e8ba175b0235..dee6c5eced91 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -565,16 +565,16 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 ; TODO: Can't safely load the offset vector, but can load+shuffle if it is profitable. -define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 16 dereferenceable(16) %p) { +define <8 x i16> @gep1_load_v2i16_extract_insert_v8i16(<2 x i16>* align 1 dereferenceable(16) %p) { ; CHECK-LABEL: @gep1_load_v2i16_extract_insert_v8i16( ; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <2 x i16>, <2 x i16>* [[P:%.*]], i64 1 -; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 2 +; CHECK-NEXT:[[L:%.*]] = load <2 x i16>, <2 x i16>* [[GEP]], align 8 ; CHECK-NEXT:[[S:%.*]] = extractelement <2 x i16> [[L]], i32 0 ; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0 ; CHECK-NEXT:ret <8 x i16> [[R]] ; %gep = getelementptr inbounds <2 x i16>, <2 x i16>* %p, i64 1 - %l = load <2 x i16>, <2 x i16>* %gep, align 2 + %l = load <2 x i16>, <2 x i16>* %gep, align 8 %s = extractelement <2 x i16> %l, i32 0 %r = insertelement <8 x i16> undef, i16 %s, i64 0 ret <8 x i16> %r ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 71a1b9f - [VectorCombine] add tests for gep load with cast; NFC
Author: Sanjay Patel Date: 2020-12-17T16:40:55-05:00 New Revision: 71a1b9fe76acfea8920e143c807c5cb8bf510254 URL: https://github.com/llvm/llvm-project/commit/71a1b9fe76acfea8920e143c807c5cb8bf510254 DIFF: https://github.com/llvm/llvm-project/commit/71a1b9fe76acfea8920e143c807c5cb8bf510254.diff LOG: [VectorCombine] add tests for gep load with cast; NFC Added: Modified: llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index dee6c5eced91..6b4fe43a8a29 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -299,6 +299,51 @@ define <8 x i16> @gep01_load_i16_insert_v8i16_deref_minalign(<8 x i16>* align 2 ret <8 x i16> %r } +define <4 x i32> @gep01_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(16) %p) { +; CHECK-LABEL: @gep01_bitcast_load_i32_insert_v4i32( +; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 1 +; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32* +; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0 +; CHECK-NEXT:ret <4 x i32> [[R]] +; + %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 1 + %b = bitcast i8* %gep to i32* + %s = load i32, i32* %b, align 1 + %r = insertelement <4 x i32> undef, i32 %s, i64 0 + ret <4 x i32> %r +} + +define <4 x i32> @gep012_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) { +; CHECK-LABEL: @gep012_bitcast_load_i32_insert_v4i32( +; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 12 +; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32* +; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0 +; CHECK-NEXT:ret <4 x i32> [[R]] +; + %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 12 + %b = bitcast i8* %gep to i32* + %s = load i32, i32* %b, align 1 + %r = insertelement <4 x i32> undef, i32 %s, i64 0 + ret <4 x i32> %r +} + +define <4 x i32> @gep013_bitcast_load_i32_insert_v4i32(<16 x i8>* align 1 dereferenceable(20) %p) { +; CHECK-LABEL: @gep013_bitcast_load_i32_insert_v4i32( +; CHECK-NEXT:[[GEP:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[P:%.*]], i64 0, i64 13 +; CHECK-NEXT:[[B:%.*]] = bitcast i8* [[GEP]] to i32* +; CHECK-NEXT:[[S:%.*]] = load i32, i32* [[B]], align 1 +; CHECK-NEXT:[[R:%.*]] = insertelement <4 x i32> undef, i32 [[S]], i64 0 +; CHECK-NEXT:ret <4 x i32> [[R]] +; + %gep = getelementptr inbounds <16 x i8>, <16 x i8>* %p, i64 0, i64 13 + %b = bitcast i8* %gep to i32* + %s = load i32, i32* %b, align 1 + %r = insertelement <4 x i32> undef, i32 %s, i64 0 + ret <4 x i32> %r +} + ; If there are enough dereferenceable bytes, we can offset the vector load. define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(32) %p) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 47aaa99 - [VectorCombine] allow peeking through GEPs when creating a vector load
Author: Sanjay Patel Date: 2020-12-18T09:25:03-05:00 New Revision: 47aaa99c0e1e28573bf24d95c5540005ee734531 URL: https://github.com/llvm/llvm-project/commit/47aaa99c0e1e28573bf24d95c5540005ee734531 DIFF: https://github.com/llvm/llvm-project/commit/47aaa99c0e1e28573bf24d95c5540005ee734531.diff LOG: [VectorCombine] allow peeking through GEPs when creating a vector load This is an enhancement motivated by https://llvm.org/PR16739 (see D92858 for another). We can look through a GEP to find a base pointer that may be safe to use for a vector load. If so, then we shuffle (shift) the necessary vector element over to index 0. Alive2 proof based on 1 of the regression tests: https://alive2.llvm.org/ce/z/yPJLkh The vector translation is independent of endian (verify by changing to leading 'E' in the datalayout string). Differential Revision: https://reviews.llvm.org/D93229 Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 8e341619dcf4..a865f88cba74 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -93,6 +93,7 @@ static void replaceValue(Value &Old, Value &New) { bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Match insert into fixed vector of scalar value. + // TODO: Handle non-zero insert index. auto *Ty = dyn_cast(I.getType()); Value *Scalar; if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || @@ -115,7 +116,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { mustSuppressSpeculation(*Load)) return false; - // TODO: Extend this to match GEP with constant offsets. const DataLayout &DL = I.getModule()->getDataLayout(); Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); assert(isa(SrcPtr->getType()) && "Expected a pointer type"); @@ -127,10 +127,13 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { if (AS != SrcPtr->getType()->getPointerAddressSpace()) SrcPtr = Load->getPointerOperand(); + // We are potentially transforming byte-sized (8-bit) memory accesses, so make + // sure we have all of our type-based constraints in place for this target. Type *ScalarTy = Scalar->getType(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth(); - if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0) + if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 || + ScalarSize % 8 != 0) return false; // Check safety of replacing the scalar load with a larger vector load. @@ -139,12 +142,45 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // we may use a larger value based on alignment attributes. unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); - if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) -return false; + unsigned OffsetEltIndex = 0; + Align Alignment = Load->getAlign(); + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) { +// It is not safe to load directly from the pointer, but we can still peek +// through gep offsets and check if it safe to load from a base address with +// updated alignment. If it is, we can shuffle the element(s) into place +// after loading. +unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType()); +APInt Offset(OffsetBitWidth, 0); +SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + +// We want to shuffle the result down from a high element of a vector, so +// the offset must be positive. +if (Offset.isNegative()) + return false; + +// The offset must be a multiple of the scalar element to shuffle cleanly +// in the element's size. +uint64_t ScalarSizeInBytes = ScalarSize / 8; +if (Offset.urem(ScalarSizeInBytes) != 0) + return false; + +// If we load MinVecNumElts, will our target element still be loaded? +OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue(); +if (OffsetEltIndex >= MinVecNumElts) + return false; + +if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) + return false; + +// Update alignment with offset value. Note that the offset could be negated +// to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but +// negation does not change the result of the alignment calculation. +Alignment = commonAlignment(Alignment, Offset.getZExtValue()); + } // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 // Use the grea
[llvm-branch-commits] [llvm] 37d0dda - [SLP] fix typo; NFC
Author: Sanjay Patel Date: 2020-12-18T16:55:52-05:00 New Revision: 37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf URL: https://github.com/llvm/llvm-project/commit/37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf DIFF: https://github.com/llvm/llvm-project/commit/37d0dda739aa5ebc1ad8cca8c570788b2a3ef5cf.diff LOG: [SLP] fix typo; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9ab89e091596..80d510185470 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6911,12 +6911,12 @@ class HorizontalReduction { ReductionData.initReductionOps(ReductionOps); while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; - unsigned EdgeToVist = Stack.back().second++; + unsigned EdgeToVisit = Stack.back().second++; OperationData OpData = getOperationData(TreeN); bool IsReducedValue = OpData != ReductionData; // Postorder vist. - if (IsReducedValue || EdgeToVist == OpData.getNumberOfOperands()) { + if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) { if (IsReducedValue) ReducedVals.push_back(TreeN); else { @@ -6942,7 +6942,7 @@ class HorizontalReduction { } // Visit left or right. - Value *NextV = TreeN->getOperand(EdgeToVist); + Value *NextV = TreeN->getOperand(EdgeToVisit); if (NextV != Phi) { auto *I = dyn_cast(NextV); OpData = getOperationData(I); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] d611875 - [InstSimplify] add tests for inverted logic operands; NFC
Author: Sanjay Patel Date: 2020-12-21T08:51:42-05:00 New Revision: d6118759f30e343a05aab053f66e5049ea149175 URL: https://github.com/llvm/llvm-project/commit/d6118759f30e343a05aab053f66e5049ea149175 DIFF: https://github.com/llvm/llvm-project/commit/d6118759f30e343a05aab053f66e5049ea149175.diff LOG: [InstSimplify] add tests for inverted logic operands; NFC Added: Modified: llvm/test/Transforms/InstSimplify/AndOrXor.ll Removed: diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 8952acc2feb6..9e549ebefc6b 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -885,168 +885,286 @@ define i32 @reversed_not(i32 %a) { define i64 @shl_or_and1(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and1( -; CHECK-NEXT:[[TMP2:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT:ret i64 [[TMP2]] +; CHECK-NEXT:[[T2:%.*]] = zext i1 [[B:%.*]] to i64 +; CHECK-NEXT:ret i64 [[T2]] ; - %tmp1 = zext i32 %a to i64 - %tmp2 = zext i1 %b to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 1 - ret i64 %tmp5 + %t1 = zext i32 %a to i64 + %t2 = zext i1 %b to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 1 + ret i64 %t5 } define i64 @shl_or_and2(i32 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and2( -; CHECK-NEXT:[[TMP1:%.*]] = zext i1 [[B:%.*]] to i64 -; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i64 [[TMP1]], 32 -; CHECK-NEXT:ret i64 [[TMP3]] +; CHECK-NEXT:[[T1:%.*]] = zext i1 [[B:%.*]] to i64 +; CHECK-NEXT:[[T3:%.*]] = shl nuw i64 [[T1]], 32 +; CHECK-NEXT:ret i64 [[T3]] ; - %tmp1 = zext i1 %b to i64 - %tmp2 = zext i32 %a to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 4294967296 - ret i64 %tmp5 + %t1 = zext i1 %b to i64 + %t2 = zext i32 %a to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 4294967296 + ret i64 %t5 } ; concatenate two 32-bit integers and extract lower 32-bit define i64 @shl_or_and3(i32 %a, i32 %b) { ; CHECK-LABEL: @shl_or_and3( -; CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[B:%.*]] to i64 -; CHECK-NEXT:ret i64 [[TMP2]] +; CHECK-NEXT:[[T2:%.*]] = zext i32 [[B:%.*]] to i64 +; CHECK-NEXT:ret i64 [[T2]] ; - %tmp1 = zext i32 %a to i64 - %tmp2 = zext i32 %b to i64 - %tmp3 = shl nuw i64 %tmp1, 32 - %tmp4 = or i64 %tmp2, %tmp3 - %tmp5 = and i64 %tmp4, 4294967295 - ret i64 %tmp5 + %t1 = zext i32 %a to i64 + %t2 = zext i32 %b to i64 + %t3 = shl nuw i64 %t1, 32 + %t4 = or i64 %t2, %t3 + %t5 = and i64 %t4, 4294967295 + ret i64 %t5 } ; concatenate two 16-bit integers and extract higher 16-bit define i32 @shl_or_and4(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and4( -; CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT:ret i32 [[TMP3]] +; CHECK-NEXT:[[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT:[[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT:ret i32 [[T3]] ; - %tmp1 = zext i16 %a to i32 - %tmp2 = zext i16 %b to i32 - %tmp3 = shl nuw i32 %tmp1, 16 - %tmp4 = or i32 %tmp2, %tmp3 - %tmp5 = and i32 %tmp4, 4294901760 ; mask with 0x - ret i32 %tmp5 + %t1 = zext i16 %a to i32 + %t2 = zext i16 %b to i32 + %t3 = shl nuw i32 %t1, 16 + %t4 = or i32 %t2, %t3 + %t5 = and i32 %t4, 4294901760 ; mask with 0x + ret i32 %t5 } define i128 @shl_or_and5(i64 %a, i1 %b) { ; CHECK-LABEL: @shl_or_and5( -; CHECK-NEXT:[[TMP2:%.*]] = zext i1 [[B:%.*]] to i128 -; CHECK-NEXT:ret i128 [[TMP2]] +; CHECK-NEXT:[[T2:%.*]] = zext i1 [[B:%.*]] to i128 +; CHECK-NEXT:ret i128 [[T2]] ; - %tmp1 = zext i64 %a to i128 - %tmp2 = zext i1 %b to i128 - %tmp3 = shl nuw i128 %tmp1, 64 - %tmp4 = or i128 %tmp2, %tmp3 - %tmp5 = and i128 %tmp4, 1 - ret i128 %tmp5 + %t1 = zext i64 %a to i128 + %t2 = zext i1 %b to i128 + %t3 = shl nuw i128 %t1, 64 + %t4 = or i128 %t2, %t3 + %t5 = and i128 %t4, 1 + ret i128 %t5 } ; A variation of above test cases; it fails due to the mask value define i32 @shl_or_and6(i16 %a, i16 %b) { ; CHECK-LABEL: @shl_or_and6( -; CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[A:%.*]] to i32 -; CHECK-NEXT:[[TMP2:%.*]] = zext i16 [[B:%.*]] to i32 -; CHECK-NEXT:[[TMP3:%.*]] = shl nuw i32 [[TMP1]], 16 -; CHECK-NEXT:[[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT:[[TMP5:%.*]] = and i32 [[TMP4]], -65535 -; CHECK-NEXT:ret i32 [[TMP5]] +; CHECK-NEXT:[[T1:%.*]] = zext i16 [[A:%.*]] to i32 +; CHECK-NEXT:[[T2:%.*]] = zext i16 [[B:%.*]] to i32 +; CHECK-NEXT:[[T3:%.*]] = shl nuw i32 [[T1]], 16 +; CHECK-NEXT:[[T4:%.*]] = or i32 [[T2]], [[T3]] +; CHECK-NEXT:[[T5:%.*]] = and i32 [[T4]], -65535 +; CHECK-NEXT:ret i32 [[T5]] ; - %tmp
[llvm-branch-commits] [llvm] 38ca7fa - [InstSimplify] reduce logic with inverted add/sub ops
Author: Sanjay Patel Date: 2020-12-21T08:51:43-05:00 New Revision: 38ca7face67e8488d482b66a999d0a685806879f URL: https://github.com/llvm/llvm-project/commit/38ca7face67e8488d482b66a999d0a685806879f DIFF: https://github.com/llvm/llvm-project/commit/38ca7face67e8488d482b66a999d0a685806879f.diff LOG: [InstSimplify] reduce logic with inverted add/sub ops https://llvm.org/PR48559 This could be part of a larger ValueTracking API, but I don't see that currently. https://rise4fun.com/Alive/gR0 Name: and Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = and i8 %sub, %sub1 => %r = 0 Name: or Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = or i8 %sub, %sub1 => %r = -1 Name: xor Pre: C1 == ~C2 %sub = add i8 %x, C1 %sub1 = sub i8 C2, %x %r = xor i8 %sub, %sub1 => %r = -1 Added: Modified: llvm/lib/Analysis/InstructionSimplify.cpp llvm/test/Transforms/InstSimplify/AndOrXor.ll Removed: diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 55f3bc4f2923..27b73a5a8236 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1999,6 +1999,30 @@ static Value *omitCheckForZeroBeforeInvertedMulWithOverflow(Value *Op0, return NotOp1; } +/// Given a bitwise logic op, check if the operands are add/sub with a common +/// source value and inverted constant (identity: C - X -> ~(X + ~C)). +static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1, +Instruction::BinaryOps Opcode) { + assert(Op0->getType() == Op1->getType() && "Mismatched binop types"); + assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op"); + Value *X; + Constant *C1, *C2; + if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) && + match(Op1, m_Sub(m_Constant(C2), m_Specific(X || + (match(Op1, m_Add(m_Value(X), m_Constant(C1))) && + match(Op0, m_Sub(m_Constant(C2), m_Specific(X) { +if (ConstantExpr::getNot(C1) == C2) { + // (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0 + // (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1 + // (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1 + Type *Ty = Op0->getType(); + return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty) +: ConstantInt::getAllOnesValue(Ty); +} + } + return nullptr; +} + /// Given operands for an And, see if we can fold the result. /// If not, this returns null. static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, @@ -2035,6 +2059,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_c_Or(m_Specific(Op0), m_Value( return Op0; + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And)) +return V; + // A mask that only clears known zeros of a shifted value is a no-op. Value *X; const APInt *Mask; @@ -2194,6 +2221,9 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value() return Constant::getAllOnesValue(Op0->getType()); + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or)) +return V; + Value *A, *B; // (A & ~B) | (A ^ B) -> (A ^ B) // (~B & A) | (A ^ B) -> (A ^ B) @@ -2323,6 +2353,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, match(Op1, m_Not(m_Specific(Op0 return Constant::getAllOnesValue(Op0->getType()); + if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor)) +return V; + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, MaxRecurse)) diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 9e549ebefc6b..e23262835c3c 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -1053,10 +1053,7 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) { define i8 @and_add_sub(i8 %x) { ; CHECK-LABEL: @and_add_sub( -; CHECK-NEXT:[[A:%.*]] = add i8 [[X:%.*]], -1 -; CHECK-NEXT:[[S:%.*]] = sub i8 0, [[X]] -; CHECK-NEXT:[[R:%.*]] = and i8 [[A]], [[S]] -; CHECK-NEXT:ret i8 [[R]] +; CHECK-NEXT:ret i8 0 ; %a = add i8 %x, -1 %s = sub i8 0, %x @@ -1066,10 +1063,7 @@ define i8 @and_add_sub(i8 %x) { define <2 x i8> @and_sub_add(<2 x i8> %x) { ; CHECK-LABEL: @and_sub_add( -; CHECK-NEXT:[[A:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT:[[S:%.*]] = sub <2 x i8> , [[X]] -; CHECK-NEXT:[[R:%.*]] = and <2 x i8> [[S]], [[A]] -; CHECK-NEXT:ret <2 x i8> [[R]] +; CHECK-NEXT:ret <2
[llvm-branch-commits] [llvm] 0d15d4b - [SLP] use operand index abstraction for number of operands
Author: Sanjay Patel Date: 2020-12-22T16:05:39-05:00 New Revision: 0d15d4b6f43a3355c1d618766c8e550cfe1481d0 URL: https://github.com/llvm/llvm-project/commit/0d15d4b6f43a3355c1d618766c8e550cfe1481d0 DIFF: https://github.com/llvm/llvm-project/commit/0d15d4b6f43a3355c1d618766c8e550cfe1481d0.diff LOG: [SLP] use operand index abstraction for number of operands I think this is NFC currently, but the bug would be exposed when we allow binary intrinsics (maxnum, etc) as candidates for reductions. The code in matchAssociativeReduction() is using OperationData::getNumberOfOperands() when comparing whether the "EdgeToVisit" iterator is in-bounds, so this code must use the same (potentially offset) operand value to set the "EdgeToVisit". Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b03fb203c6d7..baa8ce2638a0 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6772,7 +6772,8 @@ class HorizontalReduction { // in this case. // Do not perform analysis of remaining operands of ParentStackElem.first // instruction, this whole instruction is an extra argument. - ParentStackElem.second = ParentStackElem.first->getNumOperands(); + OperationData OpData = getOperationData(ParentStackElem.first); + ParentStackElem.second = OpData.getNumberOfOperands(); } else { // We ran into something like: // ParentStackElem.first += ... + ExtraArg + ... ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f6929c0 - [SLP] add reduction tests for maxnum/minnum intrinsics; NFC
Author: Sanjay Patel Date: 2020-12-22T16:05:39-05:00 New Revision: f6929c01952b3f144df620544ed937e801b9c945 URL: https://github.com/llvm/llvm-project/commit/f6929c01952b3f144df620544ed937e801b9c945 DIFF: https://github.com/llvm/llvm-project/commit/f6929c01952b3f144df620544ed937e801b9c945.diff LOG: [SLP] add reduction tests for maxnum/minnum intrinsics; NFC Added: Modified: llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll Removed: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll index e03f3f808a4f..23f2196b2425 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll @@ -338,4 +338,151 @@ define void @fmaxnum_16f32() #0 { ret void } +define float @reduction_v4f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v4f32_fast( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT:[[M1:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT:[[M2:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT:[[M3:%.*]] = tail call fast float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT:ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call fast float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call fast float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call fast float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v4f32_nnan(float* %p) { +; CHECK-LABEL: @reduction_v4f32_nnan( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT:[[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]]) +; CHECK-NEXT:[[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]]) +; CHECK-NEXT:[[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]]) +; CHECK-NEXT:ret float [[M3]] +; + %g1 = getelementptr inbounds float, float* %p, i64 1 + %g2 = getelementptr inbounds float, float* %p, i64 2 + %g3 = getelementptr inbounds float, float* %p, i64 3 + %t0 = load float, float* %p, align 4 + %t1 = load float, float* %g1, align 4 + %t2 = load float, float* %g2, align 4 + %t3 = load float, float* %g3, align 4 + %m1 = tail call nnan float @llvm.maxnum.f32(float %t1, float %t0) + %m2 = tail call nnan float @llvm.maxnum.f32(float %t2, float %m1) + %m3 = tail call nnan float @llvm.maxnum.f32(float %t3, float %m2) + ret float %m3 +} + +define float @reduction_v8f32_fast(float* %p) { +; CHECK-LABEL: @reduction_v8f32_fast( +; CHECK-NEXT:[[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT:[[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT:[[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3 +; CHECK-NEXT:[[G4:%.*]] = getelementptr inbounds float, float* [[P]], i64 4 +; CHECK-NEXT:[[G5:%.*]] = getelementptr inbounds float, float* [[P]], i64 5 +; CHECK-NEXT:[[G6:%.*]] = getelementptr inbounds float, float* [[P]], i64 6 +; CHECK-NEXT:[[G7:%.*]] = getelementptr inbounds float, float* [[P]], i64 7 +; CHECK-NEXT:[[T0:%.*]] = load float, float* [[P]], align 4 +; CHECK-NEXT:[[T1:%.*]] = load float, float* [[G1]], align 4 +; CHECK-NEXT:[[T2:%.*]] = load float, float* [[G2]], align 4 +; CHECK-NEXT:[[T3:%.*]] = load float, float* [[G3]], align 4 +; CHECK-NEXT:[[T4:%.*]] = load float, float* [[G4]], align 4 +; CHECK-NEXT:[[T5:%.*]] = load float, float* [[G5]], align 4 +; CHECK-NEXT:[[T6:
[llvm-branch-commits] [llvm] badf0f2 - [SLP] rename reduction variables for readability; NFC
Author: Sanjay Patel Date: 2020-12-26T11:20:25-05:00 New Revision: badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25 URL: https://github.com/llvm/llvm-project/commit/badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25 DIFF: https://github.com/llvm/llvm-project/commit/badf0f20f3b3e8f8f06d6c632d2c9fc8e509fd25.diff LOG: [SLP] rename reduction variables for readability; NFC I am hoping to extend the reduction matching code, and it is hard to distinguish "ReductionData" from "ReducedValueData". So extend the tree/root metaphor to include leaves. Another problem is that the name "OperationData" does not provide insight into its purpose. I'm not sure if we can alter that underlying data structure to make the code clearer. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bba6ddc87afb..8a455f300e39 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6422,17 +6422,16 @@ namespace { /// Model horizontal reductions. /// -/// A horizontal reduction is a tree of reduction operations (currently add and -/// fadd) that has operations that can be put into a vector as its leaf. -/// For example, this tree: +/// A horizontal reduction is a tree of reduction instructions that has values +/// that can be put into a vector as its leaves. For example: /// /// mul mul mul mul /// \ /\ / /// + + ///\ / /// + -/// This tree has "mul" as its reduced values and "+" as its reduction -/// operations. A reduction might be feeding into a store or a binary operation +/// This tree has "mul" as its leaf values and "+" as its reduction +/// instructions. A reduction can feed into a store or a binary operation /// feeding a phi. ///... ///\ / @@ -6756,10 +6755,10 @@ class HorizontalReduction { WeakTrackingVH ReductionRoot; /// The operation data of the reduction operation. - OperationData ReductionData; + OperationData RdxTreeInst; - /// The operation data of the values we perform a reduction on. - OperationData ReducedValueData; + /// The operation data for the leaf values that we perform a reduction on. + OperationData RdxLeafVal; /// Should we model this reduction as a pairwise reduction tree or a tree that /// splits the vector in halves and adds those halves. @@ -6875,24 +6874,24 @@ class HorizontalReduction { assert((!Phi || is_contained(Phi->operands(), B)) && "Thi phi needs to use the binary operator"); -ReductionData = getOperationData(B); +RdxTreeInst = getOperationData(B); // We could have a initial reductions that is not an add. // r *= v1 + v2 + v3 + v4 // In such a case start looking for a tree rooted in the first '+'. if (Phi) { - if (ReductionData.getLHS(B) == Phi) { + if (RdxTreeInst.getLHS(B) == Phi) { Phi = nullptr; -B = dyn_cast(ReductionData.getRHS(B)); -ReductionData = getOperationData(B); - } else if (ReductionData.getRHS(B) == Phi) { +B = dyn_cast(RdxTreeInst.getRHS(B)); +RdxTreeInst = getOperationData(B); + } else if (RdxTreeInst.getRHS(B) == Phi) { Phi = nullptr; -B = dyn_cast(ReductionData.getLHS(B)); -ReductionData = getOperationData(B); +B = dyn_cast(RdxTreeInst.getLHS(B)); +RdxTreeInst = getOperationData(B); } } -if (!ReductionData.isVectorizable(B)) +if (!RdxTreeInst.isVectorizable(B)) return false; Type *Ty = B->getType(); @@ -6901,19 +6900,19 @@ class HorizontalReduction { if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy()) return false; -ReducedValueData.clear(); +RdxLeafVal.clear(); ReductionRoot = B; // Post order traverse the reduction tree starting at B. We only handle true // trees containing only binary operators. SmallVector, 32> Stack; -Stack.push_back(std::make_pair(B, ReductionData.getFirstOperandIndex())); -ReductionData.initReductionOps(ReductionOps); +Stack.push_back(std::make_pair(B, RdxTreeInst.getFirstOperandIndex())); +RdxTreeInst.initReductionOps(ReductionOps); while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; unsigned EdgeToVisit = Stack.back().second++; OperationData OpData = getOperationData(TreeN); - bool IsReducedValue = OpData != ReductionData; + bool IsReducedValue = OpData != RdxTreeInst; // Postorder vist. if (IsReducedValue || EdgeToVisit == OpData.getNumberOfOperands()) { @@ -6934,7 +6933,7 @@ class HorizontalReduction { markExtraArg(Stack[Stack.size() - 2], TreeN); ExtraArgs.erase(TreeN); } else -ReductionData.addReductionOps(TreeN, ReductionOp
[llvm-branch-commits] [llvm] c4ca108 - [SLP] use switch to improve readability; NFC
Author: Sanjay Patel Date: 2020-12-26T10:59:45-05:00 New Revision: c4ca108966926871a7e2bf362b1816be88a99162 URL: https://github.com/llvm/llvm-project/commit/c4ca108966926871a7e2bf362b1816be88a99162 DIFF: https://github.com/llvm/llvm-project/commit/c4ca108966926871a7e2bf362b1816be88a99162.diff LOG: [SLP] use switch to improve readability; NFC This will get more complicated when we handle intrinsics like maxnum. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f3a0baa00267..bba6ddc87afb 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6475,15 +6475,20 @@ class HorizontalReduction { /// Checks if the reduction operation can be vectorized. bool isVectorizable() const { - // We currently only support add/mul/logical && min/max reductions. - return ((Kind == RK_Arithmetic && - (Opcode == Instruction::Add || Opcode == Instruction::FAdd || -Opcode == Instruction::Mul || Opcode == Instruction::FMul || -Opcode == Instruction::And || Opcode == Instruction::Or || -Opcode == Instruction::Xor)) || - (Opcode == Instruction::ICmp && - (Kind == RK_SMin || Kind == RK_SMax || -Kind == RK_UMin || Kind == RK_UMax))); + switch (Kind) { + case RK_Arithmetic: +return Opcode == Instruction::Add || Opcode == Instruction::FAdd || + Opcode == Instruction::Mul || Opcode == Instruction::FMul || + Opcode == Instruction::And || Opcode == Instruction::Or || + Opcode == Instruction::Xor; + case RK_SMin: + case RK_SMax: + case RK_UMin: + case RK_UMax: +return Opcode == Instruction::ICmp; + default: +return false; + } } /// Creates reduction operation with the current opcode. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] c5a4d80 - [ValueTracking][MemCpyOpt] avoid crash on inttoptr with vector pointer type (PR48075)
Author: Sanjay Patel Date: 2020-11-22T12:54:18-05:00 New Revision: c5a4d80fd47cfdae1995df46d0c407f78d8666e8 URL: https://github.com/llvm/llvm-project/commit/c5a4d80fd47cfdae1995df46d0c407f78d8666e8 DIFF: https://github.com/llvm/llvm-project/commit/c5a4d80fd47cfdae1995df46d0c407f78d8666e8.diff LOG: [ValueTracking][MemCpyOpt] avoid crash on inttoptr with vector pointer type (PR48075) Added: Modified: llvm/lib/Analysis/ValueTracking.cpp llvm/test/Transforms/MemCpyOpt/crash.ll Removed: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index bcf35111502e..90f8dff87472 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -3610,12 +3610,13 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { if (auto *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr) { - auto PS = DL.getPointerSizeInBits( - cast(CE->getType())->getAddressSpace()); - return isBytewiseValue( - ConstantExpr::getIntegerCast(CE->getOperand(0), - Type::getIntNTy(Ctx, PS), false), - DL); + if (auto *PtrTy = dyn_cast(CE->getType())) { +unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace()); +return isBytewiseValue( +ConstantExpr::getIntegerCast(CE->getOperand(0), + Type::getIntNTy(Ctx, BitWidth), false), +DL); + } } } diff --git a/llvm/test/Transforms/MemCpyOpt/crash.ll b/llvm/test/Transforms/MemCpyOpt/crash.ll index f70f10429f84..73635891c683 100644 --- a/llvm/test/Transforms/MemCpyOpt/crash.ll +++ b/llvm/test/Transforms/MemCpyOpt/crash.ll @@ -83,3 +83,16 @@ define void @test2(i32 %cmd) nounwind { call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i1 false) nounwind ret void } + +; https://llvm.org/PR48075 + +@g = external global i16, align 1 + +define void @inttoptr_constexpr_crash(<1 x i16*>* %p) { +; CHECK-LABEL: @inttoptr_constexpr_crash( +; CHECK-NEXT:store <1 x i16*> inttoptr (<1 x i16> bitcast (<2 x i8> to <1 x i16>) to <1 x i16*>), <1 x i16*>* [[P:%.*]], align 1 +; CHECK-NEXT:ret void +; + store <1 x i16*> inttoptr (<1 x i16> bitcast (<2 x i8> to <1 x i16>) to <1 x i16*>), <1 x i16*>* %p, align 1 + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3a18f26 - [CostModel] add tests for FP maximum; NFC
Author: Sanjay Patel Date: 2020-11-22T13:33:42-05:00 New Revision: 3a18f267236351873a4c7821735c70b0790e4919 URL: https://github.com/llvm/llvm-project/commit/3a18f267236351873a4c7821735c70b0790e4919 DIFF: https://github.com/llvm/llvm-project/commit/3a18f267236351873a4c7821735c70b0790e4919.diff LOG: [CostModel] add tests for FP maximum; NFC These min/max intrinsics are not handled in the basic implementation and probably not handled in target-specific overrides either. Added: Modified: llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll Removed: diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index e472e0424d8a..805bd810e950 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -22,6 +22,9 @@ declare <16 x float> @llvm.log2.v16f32(<16 x float>) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare float @llvm.maximum.f32(float, float) +declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) + declare i32 @llvm.cttz.i32(i32, i1) declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) @@ -141,6 +144,32 @@ define void @constrained_fadd(float %a, <16 x float> %va) { ret void } +define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { +; THRU-LABEL: 'fmaximum' +; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; THRU-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'fmaximum' +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'fmaximum' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; SIZE-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'fmaximum' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call float @llvm.maximum.f32(float %a, float %b) + %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) + ret void +} + define void @cttz(i32 %a, <16 x i32> %va) { ; THRU-LABEL: 'cttz' ; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 2e53c836676f..f7f0a24af363 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -25,6 +25,9 @@ declare <16 x float> @llvm.log2.v16f32(<16 x float>) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) +declare float @llvm.maximum.f32(float, float) +declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) + declare i32 @llvm.cttz.i32(i32, i1) declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) @@ -172,6 +175,32 @@ define void @constrained_fadd(float %a, <16 x float> %va) { ret void } +define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { +; THRU-LABEL: 'fmaximum' +; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; THRU-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +
[llvm-branch-commits] [llvm] 2717252 - [CostModel] add basic handling for FP maximum/minimum intrinsics
Author: Sanjay Patel Date: 2020-11-22T13:43:53-05:00 New Revision: 2717252c929be7b1f14c36dda9686a4aa8726de3 URL: https://github.com/llvm/llvm-project/commit/2717252c929be7b1f14c36dda9686a4aa8726de3 DIFF: https://github.com/llvm/llvm-project/commit/2717252c929be7b1f14c36dda9686a4aa8726de3.diff LOG: [CostModel] add basic handling for FP maximum/minimum intrinsics This might be a regression for some ARM targets, but that should be changed in the target-specific overrides. There is apparently still no default lowering for these nodes, so I am assuming these intrinsics are not in common use. X86, PowerPC, and RISC-V for example, just crash given the most basic IR. Added: Modified: llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll Removed: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 91c426fb6730..fce025aa75f8 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1396,6 +1396,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::maxnum: ISDs.push_back(ISD::FMAXNUM); break; +case Intrinsic::minimum: + ISDs.push_back(ISD::FMINIMUM); + break; +case Intrinsic::maximum: + ISDs.push_back(ISD::FMAXIMUM); + break; case Intrinsic::copysign: ISDs.push_back(ISD::FCOPYSIGN); break; diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index 805bd810e950..2ed26243733b 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -146,8 +146,8 @@ define void @constrained_fadd(float %a, <16 x float> %va) { define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { ; THRU-LABEL: 'fmaximum' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; THRU-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; THRU-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' @@ -161,8 +161,8 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'fmaximum' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 784 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call float @llvm.maximum.f32(float %a, float %b) diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index f7f0a24af363..4d0dbe544fb5 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -177,8 +177,8 @@ define void @constrained_fadd(float %a, <16 x float> %va) { define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { ; THRU-LABEL: 'fmaximum' -; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) -; THRU-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) +; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) +; THRU-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fmaximum' @@ -192,8 +192,8 @@ define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x floa
[llvm-branch-commits] [llvm] ab29f09 - [InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps
Author: Sanjay Patel Date: 2020-11-23T16:50:09-05:00 New Revision: ab29f091eb64c8608ba943df604b218bcff41a26 URL: https://github.com/llvm/llvm-project/commit/ab29f091eb64c8608ba943df604b218bcff41a26 DIFF: https://github.com/llvm/llvm-project/commit/ab29f091eb64c8608ba943df604b218bcff41a26.diff LOG: [InstCombine] propagate 'nsw' on pointer difference of 'inbounds' geps This is a retry of 324a53205. I cautiously reverted that at 6aa3fc4 because the rules about gep math were not clear. Since then, we have added this line to LangRef for gep inbounds: "The successive addition of offsets (without adding the base address) does not wrap the pointer index type in a signed sense (nsw)." See D90708 and post-commit comments on the revert patch for more details. Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp llvm/test/Transforms/InstCombine/sub-gep.ll llvm/test/Transforms/InstCombine/sub.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index b8431a5a4532..9a6a790aefaf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1678,11 +1678,12 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, I->getOpcode() == Instruction::Mul) I->setHasNoUnsignedWrap(); - // If we had a constant expression GEP on the other side offsetting the - // pointer, subtract it from the offset we have. + // If we have a 2nd GEP of the same base pointer, subtract the offsets. + // If both GEPs are inbounds, then the subtract does not have signed overflow. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); -Result = Builder.CreateSub(Result, Offset, "gep diff "); +Result = Builder.CreateSub(Result, Offset, "gep diff ", /* NUW */ false, + GEP1->isInBounds() && GEP2->isInBounds()); } // If we have p - gep(p, ...) then we have to negate the result. diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index 9868ed1cdf57..2389b70c3452 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -245,7 +245,7 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( ; CHECK-NEXT:[[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 -; CHECK-NEXT:[[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT:[[GEPDIFF:%.*]] = add nsw i64 [[B_IDX]], -84 ; CHECK-NEXT:ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A @@ -260,7 +260,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT:[[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 ; CHECK-NEXT:[[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 -; CHECK-NEXT:[[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT:[[GEPDIFF:%.*]] = add nsw i16 [[B_IDX]], -84 ; CHECK-NEXT:ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A @@ -272,7 +272,7 @@ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( ; CHECK-NEXT:[[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i64 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT:ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* @@ -287,7 +287,7 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( ; CHECK-NEXT:[[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 -; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i16 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT:ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* @@ -299,9 +299,11 @@ define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ret i16 %sub } +; Inbounds translates to 'nsw' on sub + define i64 @gep_ diff _both_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_ diff _both_inbounds( -; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT:[[GEPDIFF:%.*]] = sub nsw i64 [[I:%.*]], [[J:%.*]] ; CHECK-NEXT:ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i @@ -312,6 +314,8 @@ define i64 @gep_ diff _both_inbounds(i8* %foo, i64 %i, i64 %j) { ret i64 %sub } +; Negative test for 'nsw' - both geps must be inbounds + define i64 @gep_ diff _first_inbounds(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @gep_ diff _first_inbounds( ; CHECK-NEXT:[[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] @@
[llvm-branch-commits] [llvm] 2cebad7 - [IR] remove redundant code comments; NFC
Author: Sanjay Patel Date: 2020-11-29T09:29:59-05:00 New Revision: 2cebad702cdff8c320c8afa748626e8cc1b3b2f3 URL: https://github.com/llvm/llvm-project/commit/2cebad702cdff8c320c8afa748626e8cc1b3b2f3 DIFF: https://github.com/llvm/llvm-project/commit/2cebad702cdff8c320c8afa748626e8cc1b3b2f3.diff LOG: [IR] remove redundant code comments; NFC As noted in D92247 (and independent of that patch): http://llvm.org/docs/CodingStandards.html#doxygen-use-in-documentation-comments "Don’t duplicate the documentation comment in the header file and in the implementation file. Put the documentation comments for public APIs into the header file." Added: Modified: llvm/lib/IR/BasicBlock.cpp Removed: diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 23a1184e1246..31666265b504 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -130,15 +130,11 @@ iplist::iterator BasicBlock::eraseFromParent() { return getParent()->getBasicBlockList().erase(getIterator()); } -/// Unlink this basic block from its current function and -/// insert it into the function that MovePos lives in, right before MovePos. void BasicBlock::moveBefore(BasicBlock *MovePos) { MovePos->getParent()->getBasicBlockList().splice( MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator()); } -/// Unlink this basic block from its current function and -/// insert it into the function that MovePos lives in, right after MovePos. void BasicBlock::moveAfter(BasicBlock *MovePos) { MovePos->getParent()->getBasicBlockList().splice( ++MovePos->getIterator(), getParent()->getBasicBlockList(), @@ -265,8 +261,6 @@ void BasicBlock::dropAllReferences() { I.dropAllReferences(); } -/// If this basic block has a single predecessor block, -/// return the block, otherwise return a null pointer. const BasicBlock *BasicBlock::getSinglePredecessor() const { const_pred_iterator PI = pred_begin(this), E = pred_end(this); if (PI == E) return nullptr; // No preds. @@ -275,11 +269,6 @@ const BasicBlock *BasicBlock::getSinglePredecessor() const { return (PI == E) ? ThePred : nullptr /*multiple preds*/; } -/// If this basic block has a unique predecessor block, -/// return the block, otherwise return a null pointer. -/// Note that unique predecessor doesn't mean single edge, there can be -/// multiple edges from the unique predecessor to this block (for example -/// a switch statement with multiple cases having the same destination). const BasicBlock *BasicBlock::getUniquePredecessor() const { const_pred_iterator PI = pred_begin(this), E = pred_end(this); if (PI == E) return nullptr; // No preds. @@ -329,12 +318,6 @@ iterator_range BasicBlock::phis() { return make_range(P, nullptr); } -/// Update PHI nodes in this BasicBlock before removal of predecessor \p Pred. -/// Note that this function does not actually remove the predecessor. -/// -/// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with -/// zero or one incoming values, and don't simplify PHIs with all incoming -/// values the same. void BasicBlock::removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs) { // Use hasNUsesOrMore to bound the cost of this assertion for complex CFGs. @@ -389,17 +372,6 @@ bool BasicBlock::isLegalToHoistInto() const { return !Term->isExceptionalTerminator(); } -/// This splits a basic block into two at the specified -/// instruction. Note that all instructions BEFORE the specified iterator stay -/// as part of the original basic block, an unconditional branch is added to -/// the new BB, and the rest of the instructions in the BB are moved to the new -/// BB, including the old terminator. This invalidates the iterator. -/// -/// Note that this only works on well formed basic blocks (must have a -/// terminator), and 'I' must not be the end of instruction list (which would -/// cause a degenerate basic block to be formed, having a terminator inside of -/// the basic block). -/// BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!"); assert(I != InstList.end() && @@ -454,13 +426,10 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) { this->replaceSuccessorsPhiUsesWith(this, New); } -/// Return true if this basic block is a landing pad. I.e., it's -/// the destination of the 'unwind' edge of an invoke instruction. bool BasicBlock::isLandingPad() const { return isa(getFirstNonPHI()); } -/// Return the landingpad instruction associated with the landing pad. const LandingPadInst *BasicBlock::getLandingPadInst() const { return dyn_cast(getFirstNonPHI()); } ___ llvm-branch-commits mailing list llvm-branch-commits@lis
[llvm-branch-commits] [llvm] ce134da - [IR] simplify code in removePredecessor(); NFCI
Author: Sanjay Patel Date: 2020-11-29T09:55:04-05:00 New Revision: ce134da4b18c27bbeba4e32f5813b1a3b043066e URL: https://github.com/llvm/llvm-project/commit/ce134da4b18c27bbeba4e32f5813b1a3b043066e DIFF: https://github.com/llvm/llvm-project/commit/ce134da4b18c27bbeba4e32f5813b1a3b043066e.diff LOG: [IR] simplify code in removePredecessor(); NFCI As suggested in D92247 (and independent of whatever we decide to do there), this code is confusing as-is. Hopefully, this is at least mildly better. We might be able to do better still, but we have a function called "removePredecessor" with this behavior: "Note that this function does not actually remove the predecessor." (!) Added: Modified: llvm/lib/IR/BasicBlock.cpp Removed: diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 31666265b504..3268641ddf19 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -327,21 +327,19 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, // Return early if there are no PHI nodes to update. if (!isa(begin())) return; - unsigned NumPreds = cast(front()).getNumIncomingValues(); - // Update all PHI nodes. - for (iterator II = begin(); isa(II);) { -PHINode *PN = cast(II++); -PN->removeIncomingValue(Pred, !KeepOneInputPHIs); -if (!KeepOneInputPHIs) { - // If we have a single predecessor, removeIncomingValue erased the PHI - // node itself. - if (NumPreds > 1) { -if (Value *PNV = PN->hasConstantValue()) { - // Replace the PHI node with its constant value. - PN->replaceAllUsesWith(PNV); - PN->eraseFromParent(); -} + unsigned NumPreds = cast(front()).getNumIncomingValues(); + for (PHINode &Phi : make_early_inc_range(phis())) { +Phi.removeIncomingValue(Pred, !KeepOneInputPHIs); +if (KeepOneInputPHIs) + continue; +// If we have a single predecessor, removeIncomingValue erased the PHI +// node itself. +// Try to replace the PHI node with a constant value. +if (NumPreds > 1) { + if (Value *PhiConstant = Phi.hasConstantValue()) { +Phi.replaceAllUsesWith(PhiConstant); +Phi.eraseFromParent(); } } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] bfd2c21 - [IR][LoopRotate] avoid leaving phi with no operands (PR48296)
Author: Sanjay Patel Date: 2020-11-30T09:28:45-05:00 New Revision: bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb URL: https://github.com/llvm/llvm-project/commit/bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb DIFF: https://github.com/llvm/llvm-project/commit/bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb.diff LOG: [IR][LoopRotate] avoid leaving phi with no operands (PR48296) https://llvm.org/PR48296 shows an example where we delete all of the operands of a phi without actually deleting the phi, and that is currently considered invalid IR. The reduced test included here would crash for that reason. A suggested follow-up is to loosen the assert to allow 0-operand phis in unreachable blocks. Differential Revision: https://reviews.llvm.org/D92247 Added: llvm/test/Transforms/LoopRotate/phi-empty.ll Modified: llvm/include/llvm/IR/BasicBlock.h llvm/lib/IR/BasicBlock.cpp Removed: diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 26cfdd9e51d6..149b0a26c1f3 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -387,9 +387,9 @@ class BasicBlock final : public Value, // Basic blocks are data objects also /// Update PHI nodes in this BasicBlock before removal of predecessor \p Pred. /// Note that this function does not actually remove the predecessor. /// - /// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with - /// zero or one incoming values, and don't simplify PHIs with all incoming - /// values the same. + /// If \p KeepOneInputPHIs is true, then don't remove PHIs that are left with + /// one incoming value and don't simplify PHIs with all incoming values the + /// same. void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false); bool canSplitPredecessors() const; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 3268641ddf19..aee769aa0fea 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -330,7 +330,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, unsigned NumPreds = cast(front()).getNumIncomingValues(); for (PHINode &Phi : make_early_inc_range(phis())) { -Phi.removeIncomingValue(Pred, !KeepOneInputPHIs); +Phi.removeIncomingValue(Pred); if (KeepOneInputPHIs) continue; // If we have a single predecessor, removeIncomingValue erased the PHI diff --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll b/llvm/test/Transforms/LoopRotate/phi-empty.ll new file mode 100644 index ..e246cff91b62 --- /dev/null +++ b/llvm/test/Transforms/LoopRotate/phi-empty.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s + +define void @PR48296(i1 %cond) { +; CHECK-LABEL: @PR48296( +; CHECK-NEXT: entry: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label [[LOOP_BACKEDGE:%.*]] +; CHECK: loop.backedge: +; CHECK-NEXT:br label [[LOOP]] +; CHECK: dead: +; CHECK-NEXT:unreachable +; CHECK: inc: +; CHECK-NEXT:br label [[LOOP_BACKEDGE]] +; CHECK: return: +; CHECK-NEXT:ret void +; +entry: + br label %loop + +loop: + br i1 %cond, label %inc, label %loop + +dead:; No predecessors! + br i1 %cond, label %inc, label %return + +inc: + br label %loop + +return: + %r = phi i32 [ undef, %dead ] + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 355aee3 - Revert "[IR][LoopRotate] avoid leaving phi with no operands (PR48296)"
Author: Sanjay Patel Date: 2020-11-30T10:15:42-05:00 New Revision: 355aee3dcd441461a6da6e56c43dc1bd81c79f31 URL: https://github.com/llvm/llvm-project/commit/355aee3dcd441461a6da6e56c43dc1bd81c79f31 DIFF: https://github.com/llvm/llvm-project/commit/355aee3dcd441461a6da6e56c43dc1bd81c79f31.diff LOG: Revert "[IR][LoopRotate] avoid leaving phi with no operands (PR48296)" This reverts commit bfd2c216ea8ef09f8fb1f755ca2b89f86f74acbb. This appears to be causing stage2 msan failures on buildbots: FAIL: LLVM :: Transforms/SimplifyCFG/X86/bug-25299.ll (65872 of 71835) TEST 'LLVM :: Transforms/SimplifyCFG/X86/bug-25299.ll' FAILED Script: -- : 'RUN: at line 1'; /b/sanitizer-x86_64-linux-fast/build/llvm_build_msan/bin/opt < /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/Transforms/SimplifyCFG/X86/bug-25299.ll -simplifycfg -S | /b/sanitizer-x86_64-linux-fast/build/llvm_build_msan/bin/FileCheck /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/Transforms/SimplifyCFG/X86/bug-25299.ll -- Exit Code: 2 Command Output (stderr): -- ==87374==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x9de47b6 in getBasicBlockIndex /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/include/llvm/IR/Instructions.h:2749:5 #1 0x9de47b6 in simplifyCommonResume /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:4112:23 #2 0x9de47b6 in simplifyResume /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:4039:12 #3 0x9de47b6 in (anonymous namespace)::SimplifyCFGOpt::simplifyOnce(llvm::BasicBlock*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6330:16 #4 0x9dcca13 in run /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6358:16 #5 0x9dcca13 in llvm::simplifyCFG(llvm::BasicBlock*, llvm::TargetTransformInfo const&, llvm::SimplifyCFGOptions const&, llvm::SmallPtrSetImpl*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp:6369:8 #6 0x974643d in iterativelySimplifyCFG( Added: Modified: llvm/include/llvm/IR/BasicBlock.h llvm/lib/IR/BasicBlock.cpp Removed: llvm/test/Transforms/LoopRotate/phi-empty.ll diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 149b0a26c1f3..26cfdd9e51d6 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -387,9 +387,9 @@ class BasicBlock final : public Value, // Basic blocks are data objects also /// Update PHI nodes in this BasicBlock before removal of predecessor \p Pred. /// Note that this function does not actually remove the predecessor. /// - /// If \p KeepOneInputPHIs is true, then don't remove PHIs that are left with - /// one incoming value and don't simplify PHIs with all incoming values the - /// same. + /// If \p KeepOneInputPHIs is true then don't remove PHIs that are left with + /// zero or one incoming values, and don't simplify PHIs with all incoming + /// values the same. void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs = false); bool canSplitPredecessors() const; diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index aee769aa0fea..3268641ddf19 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -330,7 +330,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, unsigned NumPreds = cast(front()).getNumIncomingValues(); for (PHINode &Phi : make_early_inc_range(phis())) { -Phi.removeIncomingValue(Pred); +Phi.removeIncomingValue(Pred, !KeepOneInputPHIs); if (KeepOneInputPHIs) continue; // If we have a single predecessor, removeIncomingValue erased the PHI diff --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll b/llvm/test/Transforms/LoopRotate/phi-empty.ll deleted file mode 100644 index e246cff91b62.. --- a/llvm/test/Transforms/LoopRotate/phi-empty.ll +++ /dev/null @@ -1,34 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s - -define void @PR48296(i1 %cond) { -; CHECK-LABEL: @PR48296( -; CHECK-NEXT: entry: -; CHECK-NEXT:br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label [[LOOP_BACKEDGE:%.*]] -; CHECK: loop.backedge: -; CHECK-NEXT:br label [[LOOP]] -; CHECK: dead: -; CHECK-NEXT:unreachable -; CHECK: inc: -; CHECK-NEXT:br label [[LOOP_BACKEDGE]] -; CHECK: return: -; CHECK-NEXT:ret void -; -entry: - br label %loop - -loop: - br i1 %cond, label %inc, label %loop - -dead:; No predecessors! - br i1 %cond, label %inc,
[llvm-branch-commits] [llvm] 1dc38f8 - [IR] improve code comment/logic in removePredecessor(); NFC
Author: Sanjay Patel Date: 2020-11-30T10:51:30-05:00 New Revision: 1dc38f8cfbbc4cce12f8416a1e51d38285e6872f URL: https://github.com/llvm/llvm-project/commit/1dc38f8cfbbc4cce12f8416a1e51d38285e6872f DIFF: https://github.com/llvm/llvm-project/commit/1dc38f8cfbbc4cce12f8416a1e51d38285e6872f.diff LOG: [IR] improve code comment/logic in removePredecessor(); NFC This was suggested in the post-commit review of ce134da4b1. Added: Modified: llvm/lib/IR/BasicBlock.cpp Removed: diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 3268641ddf19..95b8602b9b6c 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -333,14 +333,16 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, Phi.removeIncomingValue(Pred, !KeepOneInputPHIs); if (KeepOneInputPHIs) continue; -// If we have a single predecessor, removeIncomingValue erased the PHI -// node itself. + +// If we have a single predecessor, removeIncomingValue may have erased the +// PHI node itself. +if (NumPreds == 1) + continue; + // Try to replace the PHI node with a constant value. -if (NumPreds > 1) { - if (Value *PhiConstant = Phi.hasConstantValue()) { -Phi.replaceAllUsesWith(PhiConstant); -Phi.eraseFromParent(); - } +if (Value *PhiConstant = Phi.hasConstantValue()) { + Phi.replaceAllUsesWith(PhiConstant); + Phi.eraseFromParent(); } } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 9eb2c01 - [IR][LoopRotate] remove assertion that phi must have at least one operand
Author: Sanjay Patel Date: 2020-11-30T11:32:42-05:00 New Revision: 9eb2c0113dfe2c1054e524122ca0e17ad552bb01 URL: https://github.com/llvm/llvm-project/commit/9eb2c0113dfe2c1054e524122ca0e17ad552bb01 DIFF: https://github.com/llvm/llvm-project/commit/9eb2c0113dfe2c1054e524122ca0e17ad552bb01.diff LOG: [IR][LoopRotate] remove assertion that phi must have at least one operand This was suggested in D92247 - I initially committed an alternate fix ( bfd2c216ea ) to avoid the crash/assert shown in https://llvm.org/PR48296 , but that was reverted because it caused msan failures on other tests. We can try to revive that patch using the test included here, but I do not have an immediate plan to isolate that problem. Added: llvm/test/Transforms/LoopRotate/phi-empty.ll Modified: llvm/lib/IR/Verifier.cpp Removed: diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index eda923da8df8..bc24d488d2f7 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2565,11 +2565,6 @@ void Verifier::visitBasicBlock(BasicBlock &BB) { SmallVector, 8> Values; llvm::sort(Preds); for (const PHINode &PN : BB.phis()) { - // Ensure that PHI nodes have at least one entry! - Assert(PN.getNumIncomingValues() != 0, - "PHI nodes must have at least one entry. If the block is dead, " - "the PHI should be removed!", - &PN); Assert(PN.getNumIncomingValues() == Preds.size(), "PHINode should have one entry for each predecessor of its " "parent basic block!", diff --git a/llvm/test/Transforms/LoopRotate/phi-empty.ll b/llvm/test/Transforms/LoopRotate/phi-empty.ll new file mode 100644 index ..9337133f8903 --- /dev/null +++ b/llvm/test/Transforms/LoopRotate/phi-empty.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -lcssa -loop-rotate < %s | FileCheck %s + +; After rotate, the phi has no operands because it has no predecessors. +; We might want to delete that instruction instead, but we do not +; fail/assert by assuming that the phi is invalid IR. + +define void @PR48296(i1 %cond) { +; CHECK-LABEL: @PR48296( +; CHECK-NEXT: entry: +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:br i1 [[COND:%.*]], label [[INC:%.*]], label [[LOOP_BACKEDGE:%.*]] +; CHECK: loop.backedge: +; CHECK-NEXT:br label [[LOOP]] +; CHECK: dead: +; CHECK-NEXT:unreachable +; CHECK: inc: +; CHECK-NEXT:br label [[LOOP_BACKEDGE]] +; CHECK: return: +; CHECK-NEXT:[[R:%.*]] = phi i32 +; CHECK-NEXT:ret void +; +entry: + br label %loop + +loop: + br i1 %cond, label %inc, label %loop + +dead:; No predecessors! + br i1 %cond, label %inc, label %return + +inc: + br label %loop + +return: + %r = phi i32 [ undef, %dead ] + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 40dc535 - [x86] add tests for maxnum/minnum with nnan; NFC
Author: Sanjay Patel Date: 2020-11-30T14:30:28-05:00 New Revision: 40dc535b5afffb1d309e44ca636219c1b8a6873b URL: https://github.com/llvm/llvm-project/commit/40dc535b5afffb1d309e44ca636219c1b8a6873b DIFF: https://github.com/llvm/llvm-project/commit/40dc535b5afffb1d309e44ca636219c1b8a6873b.diff LOG: [x86] add tests for maxnum/minnum with nnan; NFC Added: Modified: llvm/test/Analysis/CostModel/X86/fmaxnum.ll llvm/test/Analysis/CostModel/X86/fminnum.ll Removed: diff --git a/llvm/test/Analysis/CostModel/X86/fmaxnum.ll b/llvm/test/Analysis/CostModel/X86/fmaxnum.ll index f1d8e3270298..3116e65388e8 100644 --- a/llvm/test/Analysis/CostModel/X86/fmaxnum.ll +++ b/llvm/test/Analysis/CostModel/X86/fmaxnum.ll @@ -92,6 +92,88 @@ define i32 @f64(i32 %arg) { ret i32 undef } +define i32 @f32_nnan(i32 %arg) { +; SSE-LABEL: 'f32_nnan' +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX1-LABEL: 'f32_nnan' +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX2-LABEL: 'f32_nnan' +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; +; AVX512-LABEL: 'f32_nnan' +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; + %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) + %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) + %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef,
[llvm-branch-commits] [llvm] b2cdd77 - [InstCombine] add tests for sign-bit-shift-of-sub; NFC
Author: Sanjay Patel Date: 2020-12-01T08:01:00-05:00 New Revision: b2cdd776e3e5a709d5904633956d3e9eaad78020 URL: https://github.com/llvm/llvm-project/commit/b2cdd776e3e5a709d5904633956d3e9eaad78020 DIFF: https://github.com/llvm/llvm-project/commit/b2cdd776e3e5a709d5904633956d3e9eaad78020.diff LOG: [InstCombine] add tests for sign-bit-shift-of-sub; NFC Added: Modified: llvm/test/Transforms/InstCombine/ashr-lshr.ll llvm/test/Transforms/InstCombine/lshr.ll Removed: diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index ee90dd5170c3..dc1deb043428 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -434,3 +434,139 @@ define <2 x i32> @ashr_lshr_inv_vec_wrong_pred(<2 x i32> %x, <2 x i32> %y) { %ret = select <2 x i1> %cmp, <2 x i32> %r, <2 x i32> %l ret <2 x i32> %ret } + +define i32 @lshr_sub_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_sub_nsw( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub nsw i32 %x, %y + %shr = lshr i32 %sub, 31 + ret i32 %shr +} + +define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_sub_wrong_amount( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 30 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub nsw i32 %x, %y + %shr = lshr i32 %sub, 30 + ret i32 %shr +} + +define i32 @lshr_sub(i32 %x, i32 %y) { +; CHECK-LABEL: @lshr_sub( +; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub i32 %x, %y + %shr = lshr i32 %sub, 31 + ret i32 %shr +} + +define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @lshr_sub_nsw_extra_use( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:store i32 [[SUB]], i32* [[P:%.*]], align 4 +; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub nsw i32 %x, %y + store i32 %sub, i32* %p + %shr = lshr i32 %sub, 31 + ret i32 %shr +} + +define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) { +; CHECK-LABEL: @lshr_sub_nsw_splat( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], +; CHECK-NEXT:ret <3 x i42> [[SHR]] +; + %sub = sub nsw <3 x i42> %x, %y + %shr = lshr <3 x i42> %sub, + ret <3 x i42> %shr +} + +define <3 x i42> @lshr_sub_nsw_splat_undef(<3 x i42> %x, <3 x i42> %y) { +; CHECK-LABEL: @lshr_sub_nsw_splat_undef( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], +; CHECK-NEXT:ret <3 x i42> [[SHR]] +; + %sub = sub nsw <3 x i42> %x, %y + %shr = lshr <3 x i42> %sub, + ret <3 x i42> %shr +} + +define i17 @ashr_sub_nsw(i17 %x, i17 %y) { +; CHECK-LABEL: @ashr_sub_nsw( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = ashr i17 [[SUB]], 16 +; CHECK-NEXT:ret i17 [[SHR]] +; + %sub = sub nsw i17 %x, %y + %shr = ashr i17 %sub, 16 + ret i17 %shr +} + +define i17 @ashr_sub_wrong_amount(i17 %x, i17 %y) { +; CHECK-LABEL: @ashr_sub_wrong_amount( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i17 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = ashr i17 [[SUB]], 15 +; CHECK-NEXT:ret i17 [[SHR]] +; + %sub = sub nsw i17 %x, %y + %shr = ashr i17 %sub, 15 + ret i17 %shr +} + +define i32 @ashr_sub(i32 %x, i32 %y) { +; CHECK-LABEL: @ashr_sub( +; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = ashr i32 [[SUB]], 31 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub i32 %x, %y + %shr = ashr i32 %sub, 31 + ret i32 %shr +} + +define i32 @ashr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) { +; CHECK-LABEL: @ashr_sub_nsw_extra_use( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:store i32 [[SUB]], i32* [[P:%.*]], align 4 +; CHECK-NEXT:[[SHR:%.*]] = ashr i32 [[SUB]], 31 +; CHECK-NEXT:ret i32 [[SHR]] +; + %sub = sub nsw i32 %x, %y + store i32 %sub, i32* %p + %shr = ashr i32 %sub, 31 + ret i32 %shr +} + +define <3 x i43> @ashr_sub_nsw_splat(<3 x i43> %x, <3 x i43> %y) { +; CHECK-LABEL: @ashr_sub_nsw_splat( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i43> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = ashr <3 x i43> [[SUB]], +; CHECK-NEXT:ret <3 x i43> [[SHR]] +; + %sub = sub nsw <3 x i43> %x, %y + %shr = ashr <3 x i43> %sub, + ret <3 x i43> %shr +} + +define <3 x i43> @ashr_sub_nsw_splat_undef(<3 x i43> %x, <3 x i43> %y) { +; CHECK-LABEL: @ashr_sub_nsw_splat_undef( +; CHECK-NEXT:[[SUB:%.*]] = sub nsw
[llvm-branch-commits] [llvm] 9f60b8b - [InstCombine] canonicalize sign-bit-shift of difference to ext(icmp)
Author: Sanjay Patel Date: 2020-12-01T09:58:11-05:00 New Revision: 9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258 URL: https://github.com/llvm/llvm-project/commit/9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258 DIFF: https://github.com/llvm/llvm-project/commit/9f60b8b3d2e2cd38b9ae45da7e36a77b3c9dd258.diff LOG: [InstCombine] canonicalize sign-bit-shift of difference to ext(icmp) icmp is the preferred spelling in IR because icmp analysis is expected to be better than any other analysis. This should lead to more follow-on folding potential. It's difficult to say exactly what we should do in codegen to compensate. For example on AArch64, which of these is preferred: sub w8, w0, w1 lsr w0, w8, #31 vs: cmp w0, w1 csetw0, lt If there are perf regressions, then we should deal with those in codegen on a case-by-case basis. A possible motivating example for better optimization is shown in: https://llvm.org/PR43198 but that will require other transforms before anything changes there. Alive proof: https://rise4fun.com/Alive/o4E Name: sign-bit splat Pre: C1 == (width(%x) - 1) %s = sub nsw %x, %y %r = ashr %s, C1 => %c = icmp slt %x, %y %r = sext %c Name: sign-bit LSB Pre: C1 == (width(%x) - 1) %s = sub nsw %x, %y %r = lshr %s, C1 => %c = icmp slt %x, %y %r = zext %c Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp llvm/test/Transforms/InstCombine/ashr-lshr.ll llvm/test/Transforms/InstCombine/sub-ashr-and-to-icmp-select.ll llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 4eaf1bcc22fe..7295369365c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -1131,6 +1131,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { } } +// lshr i32 (X -nsw Y), 31 --> zext (X < Y) +Value *Y; +if (ShAmt == BitWidth - 1 && +match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y) + return new ZExtInst(Builder.CreateICmpSLT(X, Y), Ty); + if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1 { unsigned AmtSum = ShAmt + ShOp1->getZExtValue(); // Oversized shifts are simplified to zero in InstSimplify. @@ -1293,6 +1299,12 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) { return new SExtInst(NewSh, Ty); } +// ashr i32 (X -nsw Y), 31 --> sext (X < Y) +Value *Y; +if (ShAmt == BitWidth - 1 && +match(Op0, m_OneUse(m_NSWSub(m_Value(X), m_Value(Y) + return new SExtInst(Builder.CreateICmpSLT(X, Y), Ty); + // If the shifted-out value is known-zero, then this is an exact shift. if (!I.isExact() && MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) { diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index dc1deb043428..72fa0252d839 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -437,8 +437,8 @@ define <2 x i32> @ashr_lshr_inv_vec_wrong_pred(<2 x i32> %x, <2 x i32> %y) { define i32 @lshr_sub_nsw(i32 %x, i32 %y) { ; CHECK-LABEL: @lshr_sub_nsw( -; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT:[[SHR:%.*]] = lshr i32 [[SUB]], 31 +; CHECK-NEXT:[[TMP1:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT:[[SHR:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT:ret i32 [[SHR]] ; %sub = sub nsw i32 %x, %y @@ -446,6 +446,8 @@ define i32 @lshr_sub_nsw(i32 %x, i32 %y) { ret i32 %shr } +; negative test - must shift sign-bit + define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) { ; CHECK-LABEL: @lshr_sub_wrong_amount( ; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] @@ -457,6 +459,8 @@ define i32 @lshr_sub_wrong_amount(i32 %x, i32 %y) { ret i32 %shr } +; negative test - must have nsw + define i32 @lshr_sub(i32 %x, i32 %y) { ; CHECK-LABEL: @lshr_sub( ; CHECK-NEXT:[[SUB:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]] @@ -468,6 +472,8 @@ define i32 @lshr_sub(i32 %x, i32 %y) { ret i32 %shr } +; negative test - one-use + define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) { ; CHECK-LABEL: @lshr_sub_nsw_extra_use( ; CHECK-NEXT:[[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] @@ -483,8 +489,8 @@ define i32 @lshr_sub_nsw_extra_use(i32 %x, i32 %y, i32* %p) { define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) { ; CHECK-LABEL: @lshr_sub_nsw_splat( -; CHECK-NEXT:[[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT:[[SHR:%.*]] = lshr <3 x i42> [[SUB]], +; CHECK-NEXT:[[TMP1:%.*]] = icmp slt <3 x i42> [[X:%.*]], [[Y:%.*
[llvm-branch-commits] [llvm] 136f98e - [x86] adjust cost model values for minnum/maxnum with fast-math-flags
Author: Sanjay Patel Date: 2020-12-01T10:45:53-05:00 New Revision: 136f98e5236522f55693b8b2d23e87692987f734 URL: https://github.com/llvm/llvm-project/commit/136f98e5236522f55693b8b2d23e87692987f734 DIFF: https://github.com/llvm/llvm-project/commit/136f98e5236522f55693b8b2d23e87692987f734.diff LOG: [x86] adjust cost model values for minnum/maxnum with fast-math-flags Without FMF, we lower these intrinsics into something like this: vmaxsd %xmm0, %xmm1, %xmm2 vcmpunordsd %xmm0, %xmm0, %xmm0 vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 But if we can ignore NANs, the single min/max instruction is enough because there is no need to fix up the x86 logic that corresponds to X > Y ? X : Y. We probably want to make other adjustments for FP intrinsics with FMF to account for specialized codegen (for example, FSQRT). Differential Revision: https://reviews.llvm.org/D92337 Added: Modified: llvm/lib/Target/X86/X86TargetTransformInfo.cpp llvm/test/Analysis/CostModel/X86/fmaxnum.ll llvm/test/Analysis/CostModel/X86/fminnum.ll Removed: diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 770317a9a8b5..36a04a850110 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2802,93 +2802,105 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost( return LT.first * Cost; } +auto adjustTableCost = [](const CostTblEntry &Entry, int LegalizationCost, + FastMathFlags FMF) { + // If there are no NANs to deal with, then these are reduced to a + // single MIN** or MAX** instruction instead of the MIN/CMP/SELECT that we + // assume is used in the non-fast case. + if (Entry.ISD == ISD::FMAXNUM || Entry.ISD == ISD::FMINNUM) { +if (FMF.noNaNs()) + return LegalizationCost * 1; + } + return LegalizationCost * (int)Entry.Cost; +}; + if (ST->useGLMDivSqrtCosts()) if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->isSLM()) if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasCDI()) if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasBWI()) if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasAVX512()) if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasXOP()) if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasAVX2()) if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasAVX()) if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasSSE42()) if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasSSE41()) if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasSSSE3()) if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasSSE2()) if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasSSE1()) if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) -return LT.first * Entry->Cost; +return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (ST->hasBMI()) { if (ST->is64Bit()) if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy)) - return LT.first * Entry->Cost; + return adjustTableCost(*Entry, LT.first, ICA.getFlags()); if (const auto
[llvm-branch-commits] [llvm] 56fd29e - [SLP] use 'match' for binop/select; NFC
Author: Sanjay Patel Date: 2020-12-02T09:04:08-05:00 New Revision: 56fd29e93bd133d354e7e639bca1c025162e91ac URL: https://github.com/llvm/llvm-project/commit/56fd29e93bd133d354e7e639bca1c025162e91ac DIFF: https://github.com/llvm/llvm-project/commit/56fd29e93bd133d354e7e639bca1c025162e91ac.diff LOG: [SLP] use 'match' for binop/select; NFC This might be a small improvement in readability, but the real motivation is to make it easier to adapt the code to deal with intrinsics like 'maxnum' and/or integer min/max. There is potentially help in doing that with D92086, but we might also just add specialized wrappers here to deal with the expected patterns. Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index bfec51f0ada6..66d736974fbc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7463,9 +7463,10 @@ static bool tryToVectorizeHorReductionOrInstOperands( Instruction *Inst; unsigned Level; std::tie(Inst, Level) = Stack.pop_back_val(); -auto *BI = dyn_cast(Inst); -auto *SI = dyn_cast(Inst); -if (BI || SI) { +Value *B0, *B1; +bool IsBinop = match(Inst, m_BinOp(m_Value(B0), m_Value(B1))); +bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value())); +if (IsBinop || IsSelect) { HorizontalReduction HorRdx; if (HorRdx.matchAssociativeReduction(P, Inst)) { if (HorRdx.tryToReduce(R, TTI)) { @@ -7476,10 +7477,10 @@ static bool tryToVectorizeHorReductionOrInstOperands( continue; } } - if (P && BI) { -Inst = dyn_cast(BI->getOperand(0)); + if (P && IsBinop) { +Inst = dyn_cast(B0); if (Inst == P) - Inst = dyn_cast(BI->getOperand(1)); + Inst = dyn_cast(B1); if (!Inst) { // Set P to nullptr to avoid re-analysis of phi node in // matchAssociativeReduction function unless this is the root node. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 9d6d24c - [JumpThreading][VectorUtils] avoid infinite loop on unreachable IR
Author: Sanjay Patel Date: 2020-12-02T13:39:33-05:00 New Revision: 9d6d24c25056c17db56cf1ef5124f82eb18afc2c URL: https://github.com/llvm/llvm-project/commit/9d6d24c25056c17db56cf1ef5124f82eb18afc2c DIFF: https://github.com/llvm/llvm-project/commit/9d6d24c25056c17db56cf1ef5124f82eb18afc2c.diff LOG: [JumpThreading][VectorUtils] avoid infinite loop on unreachable IR https://llvm.org/PR48362 It's possible that we could stub this out sooner somewhere within JumpThreading, but I'm not sure how to do that, and then we would still have potential danger in other callers. I can't find a way to trigger this using 'instsimplify', however, because that already has a bailout on unreachable blocks. Added: Modified: llvm/lib/Analysis/VectorUtils.cpp llvm/test/Transforms/JumpThreading/unreachable-loops.ll Removed: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index bd69055ac246..90726979ca4a 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -290,6 +290,10 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) { if (EltNo == IIElt) return III->getOperand(1); +// Guard against infinite loop on malformed, unreachable IR. +if (III == III->getOperand(0)) + return nullptr; + // Otherwise, the insertelement doesn't modify the value, recurse on its // vector input. return findScalarElement(III->getOperand(0), EltNo); diff --git a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll index 3f75aeae906c..a0f1c2127209 100644 --- a/llvm/test/Transforms/JumpThreading/unreachable-loops.ll +++ b/llvm/test/Transforms/JumpThreading/unreachable-loops.ll @@ -1,11 +1,12 @@ ; RUN: opt -jump-threading -S < %s | FileCheck %s ; RUN: opt -passes=jump-threading -S < %s | FileCheck %s + ; Check the unreachable loop won't cause infinite loop ; in jump-threading when it tries to update the predecessors' ; profile metadata from a phi node. define void @unreachable_single_bb_loop() { -; CHECK-LABEL: @unreachable_single_bb_loop() +; CHECK-LABEL: @unreachable_single_bb_loop( bb: %tmp = call i32 @a() %tmp1 = icmp eq i32 %tmp, 1 @@ -15,8 +16,8 @@ bb: bb2: ; preds = %bb2 %tmp4 = icmp ne i32 %tmp, 1 switch i1 %tmp4, label %bb2 [ -i1 0, label %bb5 -i1 1, label %bb8 + i1 0, label %bb5 + i1 1, label %bb8 ] bb5: ; preds = %bb2, %bb @@ -31,7 +32,7 @@ bb8: ; preds = %bb8, %bb7, %bb5, %b } define void @unreachable_multi_bbs_loop() { -; CHECK-LABEL: @unreachable_multi_bbs_loop() +; CHECK-LABEL: @unreachable_multi_bbs_loop( bb: %tmp = call i32 @a() %tmp1 = icmp eq i32 %tmp, 1 @@ -44,8 +45,8 @@ bb3: ; preds = %bb2 bb2: ; preds = %bb3 %tmp4 = icmp ne i32 %tmp, 1 switch i1 %tmp4, label %bb3 [ -i1 0, label %bb5 -i1 1, label %bb8 + i1 0, label %bb5 + i1 1, label %bb8 ] bb5: ; preds = %bb2, %bb @@ -60,4 +61,85 @@ bb8: ; preds = %bb8, %bb7, %bb5, %b } declare i32 @a() +; This gets into a state that could cause instruction simplify +; to hang - an insertelement instruction has itself as an operand. + +define void @PR48362() { +; CHECK-LABEL: @PR48362( +cleanup1491: ; preds = %for.body1140 + switch i32 0, label %cleanup2343.loopexit4 [ + i32 0, label %cleanup.cont1500 + i32 128, label %lbl_555.loopexit + ] + +cleanup.cont1500: ; preds = %cleanup1491 + unreachable + +lbl_555.loopexit: ; preds = %cleanup1491 + br label %for.body1509 + +for.body1509: ; preds = %for.inc2340, %lbl_555.loopexit + %l_580.sroa.0.0 = phi <4 x i32> [ , %lbl_555.loopexit ], [ %l_580.sroa.0.2, %for.inc2340 ] + %p_55.addr.10 = phi i16 [ 0, %lbl_555.loopexit ], [ %p_55.addr.11, %for.inc2340 ] + %i82 = load i32, i32* undef, align 1 + %tobool1731.not = icmp eq i32 %i82, 0 + br i1 %tobool1731.not, label %if.end1733, label %if.then1732 + +if.then1732: ; preds = %for.body1509 + br label %cleanup2329 + +if.end1733: ; preds = %for.body1509 + %tobool1735.not = icmp eq i16 %p_55.addr.10, 0 + br i1 %tobool1735.not, label %if.then1736, label %if.else1904 + +if.then1736: ; preds = %if.end1733 + br label %cleanup2329 + +if.else1904: ; preds = %if.end1733 + br label %for.body1911 + +for.body1911: ; pre
[llvm-branch-commits] [llvm] 94f6d36 - [InstCombine] avoid crash on phi with unreachable incoming block (PR48369)
Author: Sanjay Patel Date: 2020-12-06T09:31:47-05:00 New Revision: 94f6d365e4be0cf05930df0eedd2bfb23f6fce51 URL: https://github.com/llvm/llvm-project/commit/94f6d365e4be0cf05930df0eedd2bfb23f6fce51 DIFF: https://github.com/llvm/llvm-project/commit/94f6d365e4be0cf05930df0eedd2bfb23f6fce51.diff LOG: [InstCombine] avoid crash on phi with unreachable incoming block (PR48369) Added: Modified: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp llvm/test/Transforms/InstCombine/phi-select-constant.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 92504da01cbf..cab6f1e5632f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1083,9 +1083,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) { // operation in that block. However, if this is a critical edge, we would be // inserting the computation on some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. + // Also, make sure that the pred block is not dead code. if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); -if (!BI || !BI->isUnconditional()) return nullptr; +if (!BI || !BI->isUnconditional() || !DT.isReachableFromEntry(NonConstBB)) + return nullptr; } // Okay, we can do the transformation: create the new PHI node. diff --git a/llvm/test/Transforms/InstCombine/phi-select-constant.ll b/llvm/test/Transforms/InstCombine/phi-select-constant.ll index 9d1c973925bb..c65be75c0b4a 100644 --- a/llvm/test/Transforms/InstCombine/phi-select-constant.ll +++ b/llvm/test/Transforms/InstCombine/phi-select-constant.ll @@ -77,16 +77,16 @@ final: define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @vec3( ; CHECK-NEXT: entry: -; CHECK-NEXT:[[PHITMP1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> +; CHECK-NEXT:[[PHI_SEL1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT:br i1 [[COND1:%.*]], label [[IF1:%.*]], label [[ELSE:%.*]] ; CHECK: if1: -; CHECK-NEXT:[[PHITMP2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> [[Z]], <2 x i32> +; CHECK-NEXT:[[PHI_SEL2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> [[Z]], <2 x i32> ; CHECK-NEXT:br i1 [[COND2:%.*]], label [[IF2:%.*]], label [[ELSE]] ; CHECK: if2: -; CHECK-NEXT:[[PHITMP:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> [[Y]], <2 x i8> [[Z]] +; CHECK-NEXT:[[PHI_SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> [[Y]], <2 x i8> [[Z]] ; CHECK-NEXT:br label [[ELSE]] ; CHECK: else: -; CHECK-NEXT:[[PHI:%.*]] = phi <2 x i8> [ [[PHITMP]], [[IF2]] ], [ [[PHITMP1]], [[ENTRY:%.*]] ], [ [[PHITMP2]], [[IF1]] ] +; CHECK-NEXT:[[PHI:%.*]] = phi <2 x i8> [ [[PHI_SEL]], [[IF2]] ], [ [[PHI_SEL1]], [[ENTRY:%.*]] ], [ [[PHI_SEL2]], [[IF1]] ] ; CHECK-NEXT:ret <2 x i8> [[PHI]] ; entry: @@ -103,3 +103,37 @@ else: %sel = select <2 x i1> %phi, <2 x i8> %y, <2 x i8> %z ret <2 x i8> %sel } + +; Don't crash on unreachable IR. + +define void @PR48369(i32 %a, i32* %p) { +; CHECK-LABEL: @PR48369( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[PHI_CMP:%.*]] = icmp sgt i32 [[A:%.*]], 0 +; CHECK-NEXT:br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT:[[CMP:%.*]] = phi i1 [ [[PHI_CMP]], [[DEADBB:%.*]] ], [ true, [[ENTRY:%.*]] ] +; CHECK-NEXT:[[SHL:%.*]] = select i1 [[CMP]], i32 256, i32 0 +; CHECK-NEXT:store i32 [[SHL]], i32* [[P:%.*]], align 4 +; CHECK-NEXT:br label [[END:%.*]] +; CHECK: deadbb: +; CHECK-NEXT:br label [[BB1]] +; CHECK: end: +; CHECK-NEXT:ret void +; +entry: + %phi.cmp = icmp sgt i32 %a, 0 + br label %bb1 + +bb1: + %cmp = phi i1 [ %phi.cmp, %deadbb ], [ true, %entry ] + %shl = select i1 %cmp, i32 256, i32 0 + store i32 %shl, i32* %p + br label %end + +deadbb: + br label %bb1 + +end: + ret void +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ac522f8 - [DAGCombiner] Fold (sext (not i1 x)) -> (add (zext i1 x), -1)
Author: Layton Kifer Date: 2020-12-06T11:52:10-05:00 New Revision: ac522f87002ffc20d377e284080c9fa7f63216fc URL: https://github.com/llvm/llvm-project/commit/ac522f87002ffc20d377e284080c9fa7f63216fc DIFF: https://github.com/llvm/llvm-project/commit/ac522f87002ffc20d377e284080c9fa7f63216fc.diff LOG: [DAGCombiner] Fold (sext (not i1 x)) -> (add (zext i1 x), -1) Move fold of (sext (not i1 x)) -> (add (zext i1 x), -1) from X86 to DAGCombiner to improve codegen on other targets. Differential Revision: https://reviews.llvm.org/D91589 Added: Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/test/CodeGen/AArch64/select_const.ll llvm/test/CodeGen/ARM/select_const.ll llvm/test/CodeGen/PowerPC/select_const.ll llvm/test/CodeGen/RISCV/sext-zext-trunc.ll llvm/test/CodeGen/SystemZ/sext-zext.ll llvm/test/CodeGen/X86/pr44140.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1a3d849ed99..c40c2502f536 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10663,6 +10663,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } + // fold sext (not i1 X) -> add (zext i1 X), -1 + // TODO: This could be extended to handle bool vectors. + if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() && + (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) && +TLI.isOperationLegal(ISD::ADD, VT { +// If we can eliminate the 'not', the sext form should be better +if (SDValue NewXor = visitXOR(N0.getNode())) + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor); + +SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); +return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); + } + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bfd80690347d..690eb39fa0d4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46882,7 +46882,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - EVT InVT = N0.getValueType(); SDLoc DL(N); // (i32 (sext (i8 (x86isd::setcc_carry -> (i32 (x86isd::setcc_carry)) @@ -46911,16 +46910,6 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) return V; - if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR && - isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) { -// Invert and sign-extend a boolean is the same as zero-extend and subtract -// 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently -// lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1. -// sext (xor Bool, -1) --> sub (zext Bool), 1 -SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); -return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT)); - } - if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) return V; diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll index affb8150ff85..945e7cdc35ad 100644 --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -68,8 +68,8 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; CHECK-LABEL: select_0_or_neg1: ; CHECK: // %bb.0: -; CHECK-NEXT:mvn w8, w0 -; CHECK-NEXT:sbfx w0, w8, #0, #1 +; CHECK-NEXT:and w8, w0, #0x1 +; CHECK-NEXT:sub w0, w8, #1 // =1 ; CHECK-NEXT:ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel @@ -78,8 +78,7 @@ define i32 @select_0_or_neg1(i1 %cond) { define i32 @select_0_or_neg1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_0_or_neg1_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT:mvn w8, w0 -; CHECK-NEXT:sbfx w0, w8, #0, #1 +; CHECK-NEXT:sub w0, w0, #1 // =1 ; CHECK-NEXT:ret %sel = select i1 %cond, i32 0, i32 -1 ret i32 %sel diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll index 500426074736..03f538ea5313 100644 --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -137,23 +137,21 @@ define i32 @select_1_or_0_signext(i1 signext %cond) { define i32 @select_0_or_neg1(i1 %cond) { ; ARM-LABEL: select_0_or_neg1: ; ARM: @ %bb.0: -; ARM-NEXT:mov r1, #1 -; ARM-NEXT:bic r0, r1, r0 -; ARM-NEXT:rsb r
[llvm-branch-commits] [llvm] 5fe1a49 - [SLP] fix typo in debug string; NFC
Author: Sanjay Patel Date: 2020-12-07T15:09:21-05:00 New Revision: 5fe1a49f961d7e6a064addf6373288d5e3697e68 URL: https://github.com/llvm/llvm-project/commit/5fe1a49f961d7e6a064addf6373288d5e3697e68 DIFF: https://github.com/llvm/llvm-project/commit/5fe1a49f961d7e6a064addf6373288d5e3697e68.diff LOG: [SLP] fix typo in debug string; NFC Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f78a4d9d9c71..e3f6d8cc05f7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1727,7 +1727,7 @@ class BoUpSLP { dbgs() << "NULL\n"; dbgs() << "ReuseShuffleIndices: "; if (ReuseShuffleIndices.empty()) -dbgs() << "Emtpy"; +dbgs() << "Empty"; else for (unsigned ReuseIdx : ReuseShuffleIndices) dbgs() << ReuseIdx << ", "; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 2a06628 - [VectorCombine] add tests for load of insert/extract; NFC
Author: Sanjay Patel Date: 2020-12-08T12:56:54-05:00 New Revision: 2a06628185b4598fa8a6a5b733028b4255818ce9 URL: https://github.com/llvm/llvm-project/commit/2a06628185b4598fa8a6a5b733028b4255818ce9 DIFF: https://github.com/llvm/llvm-project/commit/2a06628185b4598fa8a6a5b733028b4255818ce9.diff LOG: [VectorCombine] add tests for load of insert/extract; NFC Added: Modified: llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 4b2859238a69..66b9f89dd8dd 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -496,3 +496,44 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr store <2 x float> %result1, <2 x float>* %resultptr, align 8 ret void } + +define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( +; CHECK-NEXT:[[L:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 4 +; CHECK-NEXT:[[S:%.*]] = extractelement <2 x float> [[L]], i32 0 +; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT:ret <4 x float> [[R]] +; + %l = load <2 x float>, <2 x float>* %p, align 4 + %s = extractelement <2 x float> %l, i32 0 + %r = insertelement <4 x float> undef, float %s, i32 0 + ret <4 x float> %r +} + +define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( +; CHECK-NEXT:[[L:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 4 +; CHECK-NEXT:[[S:%.*]] = extractelement <8 x float> [[L]], i32 0 +; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT:ret <4 x float> [[R]] +; + %l = load <8 x float>, <8 x float>* %p, align 4 + %s = extractelement <8 x float> %l, i32 0 + %r = insertelement <4 x float> undef, float %s, i32 0 + ret <4 x float> %r +} + +define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(<1 x i32>* align 16 dereferenceable(16) %p, <1 x i32>* %store_ptr) { +; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use( +; CHECK-NEXT:[[L:%.*]] = load <1 x i32>, <1 x i32>* [[P:%.*]], align 4 +; CHECK-NEXT:store <1 x i32> [[L]], <1 x i32>* [[STORE_PTR:%.*]], align 4 +; CHECK-NEXT:[[S:%.*]] = extractelement <1 x i32> [[L]], i32 0 +; CHECK-NEXT:[[R:%.*]] = insertelement <8 x i32> undef, i32 [[S]], i32 0 +; CHECK-NEXT:ret <8 x i32> [[R]] +; + %l = load <1 x i32>, <1 x i32>* %p, align 4 + store <1 x i32> %l, <1 x i32>* %store_ptr + %s = extractelement <1 x i32> %l, i32 0 + %r = insertelement <8 x i32> undef, i32 %s, i32 0 + ret <8 x i32> %r +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b2ef264 - [VectorCombine] allow peeking through an extractelt when creating a vector load
Author: Sanjay Patel Date: 2020-12-09T10:36:14-05:00 New Revision: b2ef264096c045cf7147320a8bcdf8ec725ec534 URL: https://github.com/llvm/llvm-project/commit/b2ef264096c045cf7147320a8bcdf8ec725ec534 DIFF: https://github.com/llvm/llvm-project/commit/b2ef264096c045cf7147320a8bcdf8ec725ec534.diff LOG: [VectorCombine] allow peeking through an extractelt when creating a vector load This is an enhancement to load vectorization that is motivated by a pattern in https://llvm.org/PR16739. Unfortunately, it's still not enough to make a difference there. We will have to handle multi-use cases in some better way to avoid creating multiple overlapping loads. Differential Revision: https://reviews.llvm.org/D92858 Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp llvm/test/Transforms/VectorCombine/X86/load.ll Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 5f3d5c768a9e..0d0a338afca3 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -92,18 +92,25 @@ static void replaceValue(Value &Old, Value &New) { } bool VectorCombine::vectorizeLoadInsert(Instruction &I) { - // Match insert into fixed vector of scalar load. + // Match insert into fixed vector of scalar value. auto *Ty = dyn_cast(I.getType()); Value *Scalar; if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) || !Scalar->hasOneUse()) return false; + // Optionally match an extract from another vector. + Value *X; + bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt())); + if (!HasExtract) +X = Scalar; + + // Match source value as load of scalar or vector. // Do not vectorize scalar load (widening) if atomic/volatile or under // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions // or create data races non-existent in the source. - auto *Load = dyn_cast(Scalar); - if (!Load || !Load->isSimple() || + auto *Load = dyn_cast(X); + if (!Load || !Load->isSimple() || !Load->hasOneUse() || Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) || mustSuppressSpeculation(*Load)) return false; @@ -134,10 +141,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; - // Original pattern: insertelt undef, load [free casts of] ScalarPtr, 0 - int OldCost = TTI.getMemoryOpCost(Instruction::Load, ScalarTy, Alignment, AS); + // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 + Type *LoadTy = Load->getType(); + int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); - OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, true, false); + OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, + /* Insert */ true, HasExtract); // New pattern: load VecPtr int NewCost = TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS); diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 66b9f89dd8dd..824a507ed103 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -499,9 +499,8 @@ define void @PR47558_multiple_use_load(<2 x float>* nocapture nonnull %resultptr define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_v2f32_extract_insert_v4f32( -; CHECK-NEXT:[[L:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 4 -; CHECK-NEXT:[[S:%.*]] = extractelement <2 x float> [[L]], i32 0 -; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT:[[TMP1:%.*]] = bitcast <2 x float>* [[P:%.*]] to <4 x float>* +; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT:ret <4 x float> [[R]] ; %l = load <2 x float>, <2 x float>* %p, align 4 @@ -512,9 +511,8 @@ define <4 x float> @load_v2f32_extract_insert_v4f32(<2 x float>* align 16 derefe define <4 x float> @load_v8f32_extract_insert_v4f32(<8 x float>* align 16 dereferenceable(16) %p) { ; CHECK-LABEL: @load_v8f32_extract_insert_v4f32( -; CHECK-NEXT:[[L:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 4 -; CHECK-NEXT:[[S:%.*]] = extractelement <8 x float> [[L]], i32 0 -; CHECK-NEXT:[[R:%.*]] = insertelement <4 x float> undef, float [[S]], i32 0 +; CHECK-NEXT:[[TMP1:%.*]] = bitcast <8 x float>* [[P:%.*]] to <4 x float>* +; CHECK-NEXT:[[R:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 ; CHECK-NEXT:ret <4 x float> [[R]] ; %l = load <8 x float>, <8 x float>* %p, align 4
[llvm-branch-commits] [llvm] 12b684a - [VectorCombine] improve readability; NFC
Author: Sanjay Patel Date: 2020-12-10T13:10:26-05:00 New Revision: 12b684ae02226f7785d3fb412fb155d4e15cc9bd URL: https://github.com/llvm/llvm-project/commit/12b684ae02226f7785d3fb412fb155d4e15cc9bd DIFF: https://github.com/llvm/llvm-project/commit/12b684ae02226f7785d3fb412fb155d4e15cc9bd.diff LOG: [VectorCombine] improve readability; NFC If we are going to allow adjusting the pointer for GEPs, rearranging the code a bit will make it easier to follow. Added: Modified: llvm/lib/Transforms/Vectorize/VectorCombine.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 0d0a338afca3..19f5a2b432f7 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -116,15 +116,16 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { return false; // TODO: Extend this to match GEP with constant offsets. - Value *PtrOp = Load->getPointerOperand()->stripPointerCasts(); - assert(isa(PtrOp->getType()) && "Expected a pointer type"); - unsigned AS = Load->getPointerAddressSpace(); + const DataLayout &DL = I.getModule()->getDataLayout(); + Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); + assert(isa(SrcPtr->getType()) && "Expected a pointer type"); // If original AS != Load's AS, we can't bitcast the original pointer and have // to use Load's operand instead. Ideally we would want to strip pointer casts // without changing AS, but there's no API to do that ATM. - if (AS != PtrOp->getType()->getPointerAddressSpace()) -PtrOp = Load->getPointerOperand(); + unsigned AS = Load->getPointerAddressSpace(); + if (AS != SrcPtr->getType()->getPointerAddressSpace()) +SrcPtr = Load->getPointerOperand(); Type *ScalarTy = Scalar->getType(); uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits(); @@ -136,11 +137,9 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { unsigned MinVecNumElts = MinVectorSize / ScalarSize; auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false); Align Alignment = Load->getAlign(); - const DataLayout &DL = I.getModule()->getDataLayout(); - if (!isSafeToLoadUnconditionally(PtrOp, MinVecTy, Alignment, DL, Load, &DT)) + if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Alignment, DL, Load, &DT)) return false; - // Original pattern: insertelt undef, load [free casts of] PtrOp, 0 Type *LoadTy = Load->getType(); int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); @@ -159,7 +158,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // It is safe and potentially profitable to load a vector directly: // inselt undef, load Scalar, 0 --> load VecPtr IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(PtrOp, MinVecTy->getPointerTo(AS)); + Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); // If the insert type does not match the target's minimum vector type, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 4f051fe - [InstCombine] avoid crash sinking to unreachable block
Author: Sanjay Patel Date: 2020-12-10T13:10:26-05:00 New Revision: 4f051fe37438632d10480c346520a0de624dbebf URL: https://github.com/llvm/llvm-project/commit/4f051fe37438632d10480c346520a0de624dbebf DIFF: https://github.com/llvm/llvm-project/commit/4f051fe37438632d10480c346520a0de624dbebf.diff LOG: [InstCombine] avoid crash sinking to unreachable block The test is reduced from the example in D82005. Similar to 94f6d365e, the test here would assert in the DomTree when we tried to convert a select to a phi with an unreachable block operand. We may want to add some kind of guard code in DomTree itself to avoid this sort of problem. Added: Modified: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp llvm/test/Transforms/InstCombine/phi-select-constant.ll Removed: diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index cab6f1e5632f..bbc76325a67b 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3640,7 +3640,9 @@ bool InstCombinerImpl::run() { else UserParent = UserInst->getParent(); -if (UserParent != BB) { +// Try sinking to another block. If that block is unreachable, then do +// not bother. SimplifyCFG should handle it. +if (UserParent != BB && DT.isReachableFromEntry(UserParent)) { // See if the user is one of our successors that has only one // predecessor, so that we don't have to split the critical edge. bool ShouldSink = UserParent->getUniquePredecessor() == BB; diff --git a/llvm/test/Transforms/InstCombine/phi-select-constant.ll b/llvm/test/Transforms/InstCombine/phi-select-constant.ll index c65be75c0b4a..e3f35d2e6001 100644 --- a/llvm/test/Transforms/InstCombine/phi-select-constant.ll +++ b/llvm/test/Transforms/InstCombine/phi-select-constant.ll @@ -137,3 +137,24 @@ deadbb: end: ret void } + +define i16 @sink_to_unreachable_crash(i1 %a) { +; CHECK-LABEL: @sink_to_unreachable_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT:[[S:%.*]] = select i1 [[A:%.*]], i16 0, i16 5 +; CHECK-NEXT:br label [[INF_LOOP:%.*]] +; CHECK: inf_loop: +; CHECK-NEXT:br label [[INF_LOOP]] +; CHECK: unreachable: +; CHECK-NEXT:ret i16 [[S]] +; +entry: + %s = select i1 %a, i16 0, i16 5 + br label %inf_loop + +inf_loop: + br label %inf_loop + +unreachable: ; No predecessors! + ret i16 %s +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 204bdc5 - [InstCombine][x86] fix insertion point bug in vector demanded elts fold (PR48476)
Author: Sanjay Patel Date: 2020-12-11T17:23:35-05:00 New Revision: 204bdc5322cc89603d503fb1f02a0eba19a1b496 URL: https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496 DIFF: https://github.com/llvm/llvm-project/commit/204bdc5322cc89603d503fb1f02a0eba19a1b496.diff LOG: [InstCombine][x86] fix insertion point bug in vector demanded elts fold (PR48476) This transform was added at: c63799fc52ff >From what I see, it's the first demanded elements transform that adds a new instruction using the IRBuilder. There are similar folds in the generic demanded bits chunk of instcombine that also use the InsertPointGuard code pattern. The tests here would assert/crash because the new instruction was being added at the start of the demanded elements analysis rather than at the instruction that is being replaced. Added: Modified: llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp llvm/test/Transforms/InstCombine/X86/x86-addsub.ll Removed: diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 3b05dba57a33..ca026baa2c41 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -1916,13 +1916,20 @@ Optional X86TTIImpl::simplifyDemandedVectorEltsIntrinsic( case Intrinsic::x86_sse3_addsub_ps: case Intrinsic::x86_avx_addsub_pd_256: case Intrinsic::x86_avx_addsub_ps_256: { +// If none of the even or none of the odd lanes are required, turn this +// into a generic FP math instruction. APInt SubMask = APInt::getSplat(VWidth, APInt(2, 0x1)); -if (DemandedElts.isSubsetOf(SubMask)) - return IC.Builder.CreateFSub(II.getArgOperand(0), II.getArgOperand(1)); - APInt AddMask = APInt::getSplat(VWidth, APInt(2, 0x2)); -if (DemandedElts.isSubsetOf(AddMask)) - return IC.Builder.CreateFAdd(II.getArgOperand(0), II.getArgOperand(1)); +bool IsSubOnly = DemandedElts.isSubsetOf(SubMask); +bool IsAddOnly = DemandedElts.isSubsetOf(AddMask); +if (IsSubOnly || IsAddOnly) { + assert((IsSubOnly ^ IsAddOnly) && "Can't be both add-only and sub-only"); + IRBuilderBase::InsertPointGuard Guard(IC.Builder); + IC.Builder.SetInsertPoint(&II); + Value *Arg0 = II.getArgOperand(0), *Arg1 = II.getArgOperand(1); + return IC.Builder.CreateBinOp( + IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1); +} simplifyAndSetOp(&II, 0, DemandedElts, UndefElts); simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2); diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll index d051732ee819..0b9831be8fcf 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll @@ -5,6 +5,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8 immarg) #0 ; ; Demanded Elts @@ -164,4 +165,30 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo ret void } +define double @PR48476_fsub(<2 x double> %x) { +; CHECK-LABEL: @PR48476_fsub( +; CHECK-NEXT:[[TMP1:%.*]] = fsub <2 x double> , [[X:%.*]] +; CHECK-NEXT:[[T2:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[X]], i8 6) +; CHECK-NEXT:[[VECEXT:%.*]] = extractelement <2 x double> [[T2]], i32 0 +; CHECK-NEXT:ret double [[VECEXT]] +; + %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x) + %t2 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %t1, <2 x double> %x, i8 6) + %vecext = extractelement <2 x double> %t2, i32 0 + ret double %vecext +} +define double @PR48476_fadd_fsub(<2 x double> %x) { +; CHECK-LABEL: @PR48476_fadd_fsub( +; CHECK-NEXT:[[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], +; CHECK-NEXT:[[S:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> +; CHECK-NEXT:[[TMP2:%.*]] = fsub <2 x double> [[S]], [[X]] +; CHECK-NEXT:[[VECEXT:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 +; CHECK-NEXT:ret double [[VECEXT]] +; + %t1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> zeroinitializer, <2 x double> %x) + %s = shufflevector <2 x double> %t1, <2 x double> undef, <2 x i32> + %t2 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %s, <2 x double> %x) + %vecext = extractelement <2 x double> %t2, i32 0 + ret double %vecext +} ___ llvm-branch-commits mailing li