[llvm-branch-commits] [llvm] d6bb96e - [X86] Add experimental option to separately tune alignment of innermost loops

2021-01-20 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2021-01-21T11:15:16+07:00
New Revision: d6bb96e677759375b2bea00115918b2cb6552f5b

URL: 
https://github.com/llvm/llvm-project/commit/d6bb96e677759375b2bea00115918b2cb6552f5b
DIFF: 
https://github.com/llvm/llvm-project/commit/d6bb96e677759375b2bea00115918b2cb6552f5b.diff

LOG: [X86] Add experimental option to separately tune alignment of innermost 
loops

We already have an experimental option to tune loop alignment. Its impact
is very wide (and there is a suspicion that it's not always profitable). We want
to have something more narrow to play with. This patch adds similar option that
overrides preferred alignment for innermost loops. This is for experimental
purposes, default values do not change the existing behavior.

Differential Revision: https://reviews.llvm.org/D94895
Reviewed By: pengfei

Added: 
llvm/test/CodeGen/X86/innermost-loop-alignment.ll

Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 852078a299b9..7cd17f109935 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -76,6 +77,14 @@ static cl::opt ExperimentalPrefLoopAlignment(
 " of the loop header PC will be 0)."),
 cl::Hidden);
 
+static cl::opt ExperimentalPrefInnermostLoopAlignment(
+"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
+cl::desc(
+"Sets the preferable loop alignment for experiments (as log2 bytes) "
+"for innermost loops only. If specified, this option overrides "
+"alignment set by x86-experimental-pref-loop-alignment."),
+cl::Hidden);
+
 static cl::opt MulConstantOptimization(
 "mul-constant-optimization", cl::init(true),
 cl::desc("Replace 'mul x, Const' with more effective instructions like "
@@ -51696,3 +51705,10 @@ X86TargetLowering::getStackProbeSize(MachineFunction 
&MF) const {
 .getAsInteger(0, StackProbeSize);
   return StackProbeSize;
 }
+
+Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
+  if (ML->isInnermost() &&
+  ExperimentalPrefInnermostLoopAlignment.getNumOccurrences())
+return Align(1ULL << ExperimentalPrefInnermostLoopAlignment);
+  return TargetLowering::getPrefLoopAlignment();
+}

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h 
b/llvm/lib/Target/X86/X86ISelLowering.h
index 8b71c8394c01..76c83b7df9eb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1408,6 +1408,8 @@ namespace llvm {
SDValue Addr, SelectionDAG &DAG)
const override;
 
+Align getPrefLoopAlignment(MachineLoop *ML) const override;
+
   protected:
 std::pair
 findRepresentativeClass(const TargetRegisterInfo *TRI,

diff  --git a/llvm/test/CodeGen/X86/innermost-loop-alignment.ll 
b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
new file mode 100644
index ..fef30fd28716
--- /dev/null
+++ b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu 
-x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s 
-check-prefix=ALIGN32
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu 
-x86-experimental-pref-loop-alignment=5 
-x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s 
-check-prefix=ALIGN64
+
+declare void @foo()
+
+define void @test(i32 %n, i32 %m) {
+; DEFAULT-LABEL: test:
+; DEFAULT: .p2align 4, 0x90
+; DEFAULT-NEXT:  .LBB0_1: # %outer
+; DEFAULT-NEXT:# =>This Loop Header: Depth=1
+; DEFAULT-NEXT:# Child Loop BB0_2 Depth 2
+; DEFAULT: .p2align 4, 0x90
+; DEFAULT-NEXT:  .LBB0_2: # %inner
+; DEFAULT-NEXT:# Parent Loop BB0_1 Depth=1
+
+; ALIGN32-LABEL: test:
+; ALIGN32: .p2align 4, 0x90
+; ALIGN32-NEXT:  .LBB0_1: # %outer
+; ALIGN32-NEXT:# =>This Loop Header: Depth=1
+; ALIGN32-NEXT:# Child Loop BB0_2 Depth 2
+; ALIGN32: .p2align 5, 0x90
+; ALIGN32-NEXT:  .LBB0_2: # %inner
+; ALIGN32-NEXT:# Parent Loop BB0_1 Depth=1
+; ALIGN32-NEXT:# => This Inner Loop Header: Depth=2
+
+; ALIGN64-LABEL: test:
+; ALIGN64: .p2align 5, 0x90
+; ALIGN64-NEXT:  .LBB0_1: # %outer
+; ALIGN64-NEXT:# =>This Loop Header: Depth=1
+; ALIGN64-NEXT:# Child Loop BB0_2 De

[llvm-branch-commits] [llvm] e8287cb - [Test] Add failing test for PR48725

2021-01-12 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2021-01-12T16:06:34+07:00
New Revision: e8287cb2b2923af9da72fd953e2ec5495c33861a

URL: 
https://github.com/llvm/llvm-project/commit/e8287cb2b2923af9da72fd953e2ec5495c33861a
DIFF: 
https://github.com/llvm/llvm-project/commit/e8287cb2b2923af9da72fd953e2ec5495c33861a.diff

LOG: [Test] Add failing test for PR48725

Added: 
llvm/test/Transforms/LoopStrengthReduce/pr48725.ll

Modified: 


Removed: 




diff  --git a/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll 
b/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll
new file mode 100644
index ..ef25b92ffd1c
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr48725.ll
@@ -0,0 +1,102 @@
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+; XFAIL: *
+
+source_filename = "./simple.ll"
+target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test
+define void @test() {
+bb:
+  br label %bb1
+
+bb1:  ; preds = %bb1, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp87, %bb1 ]
+  %tmp2 = phi i32 [ undef, %bb ], [ %tmp86, %bb1 ]
+  %tmp3 = mul i32 %tmp, undef
+  %tmp4 = xor i32 %tmp3, -1
+  %tmp5 = add i32 %tmp, %tmp4
+  %tmp6 = add i32 %tmp2, -1
+  %tmp7 = add i32 %tmp5, %tmp6
+  %tmp8 = mul i32 %tmp7, %tmp3
+  %tmp9 = xor i32 %tmp8, -1
+  %tmp10 = add i32 %tmp7, %tmp9
+  %tmp11 = add i32 %tmp10, undef
+  %tmp12 = mul i32 %tmp11, %tmp8
+  %tmp13 = xor i32 %tmp12, -1
+  %tmp14 = add i32 %tmp11, %tmp13
+  %tmp15 = add i32 %tmp14, undef
+  %tmp16 = mul i32 %tmp15, %tmp12
+  %tmp17 = add i32 %tmp15, undef
+  %tmp18 = add i32 %tmp17, undef
+  %tmp19 = mul i32 %tmp18, %tmp16
+  %tmp20 = xor i32 %tmp19, -1
+  %tmp21 = add i32 %tmp18, %tmp20
+  %tmp22 = add i32 %tmp21, undef
+  %tmp23 = mul i32 %tmp22, %tmp19
+  %tmp24 = xor i32 %tmp23, -1
+  %tmp25 = add i32 %tmp22, %tmp24
+  %tmp26 = add i32 %tmp25, undef
+  %tmp27 = mul i32 %tmp26, %tmp23
+  %tmp28 = xor i32 %tmp27, -1
+  %tmp29 = add i32 %tmp26, %tmp28
+  %tmp30 = add i32 %tmp29, undef
+  %tmp31 = mul i32 %tmp30, %tmp27
+  %tmp32 = xor i32 %tmp31, -1
+  %tmp33 = add i32 %tmp30, %tmp32
+  %tmp34 = add i32 %tmp33, undef
+  %tmp35 = mul i32 %tmp34, %tmp31
+  %tmp36 = xor i32 %tmp35, -1
+  %tmp37 = add i32 %tmp34, %tmp36
+  %tmp38 = add i32 %tmp2, -9
+  %tmp39 = add i32 %tmp37, %tmp38
+  %tmp40 = mul i32 %tmp39, %tmp35
+  %tmp41 = xor i32 %tmp40, -1
+  %tmp42 = add i32 %tmp39, %tmp41
+  %tmp43 = add i32 %tmp42, undef
+  %tmp44 = mul i32 %tmp43, %tmp40
+  %tmp45 = xor i32 %tmp44, -1
+  %tmp46 = add i32 %tmp43, %tmp45
+  %tmp47 = add i32 %tmp46, undef
+  %tmp48 = mul i32 %tmp47, %tmp44
+  %tmp49 = xor i32 %tmp48, -1
+  %tmp50 = add i32 %tmp47, %tmp49
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = mul i32 %tmp51, %tmp48
+  %tmp53 = xor i32 %tmp52, -1
+  %tmp54 = add i32 %tmp51, %tmp53
+  %tmp55 = add i32 %tmp54, undef
+  %tmp56 = mul i32 %tmp55, %tmp52
+  %tmp57 = xor i32 %tmp56, -1
+  %tmp58 = add i32 %tmp55, %tmp57
+  %tmp59 = add i32 %tmp2, -14
+  %tmp60 = add i32 %tmp58, %tmp59
+  %tmp61 = mul i32 %tmp60, %tmp56
+  %tmp62 = xor i32 %tmp61, -1
+  %tmp63 = add i32 %tmp60, %tmp62
+  %tmp64 = add i32 %tmp63, undef
+  %tmp65 = mul i32 %tmp64, %tmp61
+  %tmp66 = xor i32 %tmp65, -1
+  %tmp67 = add i32 %tmp64, %tmp66
+  %tmp68 = add i32 %tmp67, undef
+  %tmp69 = mul i32 %tmp68, %tmp65
+  %tmp70 = xor i32 %tmp69, -1
+  %tmp71 = add i32 %tmp68, %tmp70
+  %tmp72 = add i32 %tmp71, undef
+  %tmp73 = mul i32 %tmp72, %tmp69
+  %tmp74 = xor i32 %tmp73, -1
+  %tmp75 = add i32 %tmp72, %tmp74
+  %tmp76 = add i32 %tmp75, undef
+  %tmp77 = mul i32 %tmp76, %tmp73
+  %tmp78 = xor i32 %tmp77, -1
+  %tmp79 = add i32 %tmp76, %tmp78
+  %tmp80 = add i32 %tmp79, undef
+  %tmp81 = mul i32 %tmp80, %tmp77
+  %tmp82 = xor i32 %tmp81, -1
+  %tmp83 = add i32 %tmp80, %tmp82
+  %tmp84 = add i32 %tmp83, undef
+  %tmp85 = add i32 %tmp84, undef
+  %tmp86 = add i32 %tmp2, -21
+  %tmp87 = add i32 %tmp85, %tmp86
+  br label %bb1
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 2fc2e6d - [Test] Test on assertion failure with expensive SCEV range inference

2020-12-14 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-15T13:47:19+07:00
New Revision: 2fc2e6de8222ae44fc4fd44fb31238b4a8cc990c

URL: 
https://github.com/llvm/llvm-project/commit/2fc2e6de8222ae44fc4fd44fb31238b4a8cc990c
DIFF: 
https://github.com/llvm/llvm-project/commit/2fc2e6de8222ae44fc4fd44fb31238b4a8cc990c.diff

LOG: [Test] Test on assertion failure with expensive SCEV range inference

Added: 

llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll

Modified: 


Removed: 




diff  --git 
a/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
 
b/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
new file mode 100644
index ..dd105f40544e
--- /dev/null
+++ 
b/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -indvars -S -scalar-evolution-use-expensive-range-sharpening | 
FileCheck %s
+; RUN: opt < %s -passes=indvars -S 
-scalar-evolution-use-expensive-range-sharpening | FileCheck %s
+; REQUIRES: asserts
+; XFAIL: *
+
+target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+; CHECK-LABEL: test
+
+bb:
+  br label %bb1
+
+bb1:  ; preds = %bb10, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp11, %bb10 ]
+  %tmp2 = phi i32 [ 0, %bb ], [ 1, %bb10 ]
+  br i1 false, label %bb3, label %bb4
+
+bb3:  ; preds = %bb1
+  br label %bb8
+
+bb4:  ; preds = %bb1
+  br label %bb16
+
+bb5:  ; preds = %bb16
+  %tmp6 = phi i64 [ %tmp21, %bb16 ]
+  %tmp7 = phi i64 [ undef, %bb16 ]
+  br label %bb8
+
+bb8:  ; preds = %bb5, %bb3
+  %tmp9 = phi i64 [ undef, %bb3 ], [ %tmp6, %bb5 ]
+  br label %bb13
+
+bb10: ; preds = %bb13
+  %tmp11 = phi i32 [ %tmp15, %bb13 ]
+  br i1 undef, label %bb12, label %bb1
+
+bb12: ; preds = %bb10
+  ret void
+
+bb13: ; preds = %bb13, %bb8
+  %tmp14 = phi i32 [ %tmp, %bb8 ], [ %tmp15, %bb13 ]
+  %tmp15 = add i32 %tmp14, undef
+  br i1 undef, label %bb10, label %bb13
+
+bb16: ; preds = %bb16, %bb4
+  %tmp17 = phi i32 [ %tmp27, %bb16 ], [ %tmp2, %bb4 ]
+  %tmp18 = phi i64 [ %tmp21, %bb16 ], [ undef, %bb4 ]
+  %tmp19 = sext i32 %tmp17 to i64
+  %tmp20 = mul i64 undef, %tmp19
+  %tmp21 = add i64 %tmp18, 1
+  %tmp22 = add i32 %tmp17, %tmp
+  %tmp23 = add i32 %tmp22, undef
+  %tmp24 = add i32 %tmp23, undef
+  %tmp25 = and i32 %tmp24, 31
+  %tmp26 = lshr i32 undef, %tmp25
+  %tmp27 = add nsw i32 %tmp17, 1
+  %tmp28 = icmp sgt i32 %tmp17, 111
+  br i1 %tmp28, label %bb5, label %bb16
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8b330f1 - [SCEV] Add missing type check into getRangeForAffineNoSelfWrappingAR

2020-12-14 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-15T14:50:32+07:00
New Revision: 8b330f1f6919a2ac85eeda753ad8d1090468e23f

URL: 
https://github.com/llvm/llvm-project/commit/8b330f1f6919a2ac85eeda753ad8d1090468e23f
DIFF: 
https://github.com/llvm/llvm-project/commit/8b330f1f6919a2ac85eeda753ad8d1090468e23f.diff

LOG: [SCEV] Add missing type check into getRangeForAffineNoSelfWrappingAR

We make type widening without checking if it's needed. Bail if the max
iteration count is wider than AR's type.

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp

llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3005a44b44ef..071b569d3f17 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6006,6 +6006,9 @@ ConstantRange 
ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
   // iteration count estimate, and we might infer nw from some exit for which 
we
   // do not know max exit count (or any other side reasoning).
   // TODO: Turn into assert at some point.
+  if (getTypeSizeInBits(MaxBECount->getType()) >
+  getTypeSizeInBits(AddRec->getType()))
+return ConstantRange::getFull(BitWidth);
   MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
   const SCEV *RangeWidth = getMinusOne(AddRec->getType());
   const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));

diff  --git 
a/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
 
b/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
index dd105f40544e..a1ad4d07fab2 100644
--- 
a/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
+++ 
b/llvm/test/Transforms/IndVarSimplify/2020-12-15-trunc-bug-expensive-range-inference.ll
@@ -1,7 +1,5 @@
 ; RUN: opt < %s -indvars -S -scalar-evolution-use-expensive-range-sharpening | 
FileCheck %s
 ; RUN: opt < %s -passes=indvars -S 
-scalar-evolution-use-expensive-range-sharpening | FileCheck %s
-; REQUIRES: asserts
-; XFAIL: *
 
 target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
 target triple = "x86_64-unknown-linux-gnu"



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 47e31d1 - [NFC] Reduce code duplication in binop processing in computeExitLimitFromCondCached

2020-11-22 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-23T13:18:12+07:00
New Revision: 47e31d1b5eac6a7b69a50404ecdc35daf18c01f9

URL: 
https://github.com/llvm/llvm-project/commit/47e31d1b5eac6a7b69a50404ecdc35daf18c01f9
DIFF: 
https://github.com/llvm/llvm-project/commit/47e31d1b5eac6a7b69a50404ecdc35daf18c01f9.diff

LOG: [NFC] Reduce code duplication in binop processing in 
computeExitLimitFromCondCached

Handling of `and` and `or` vastly uses copy-paste. Factored out into
a helper function as preparation step for further fix (see PR48225).

Differential Revision: https://reviews.llvm.org/D91864
Reviewed By: nikic

Added: 


Modified: 
llvm/include/llvm/Analysis/ScalarEvolution.h
llvm/lib/Analysis/ScalarEvolution.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h 
b/llvm/include/llvm/Analysis/ScalarEvolution.h
index a45034fb5494..677433c32556 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1669,7 +1669,14 @@ class ScalarEvolution {
  Value *ExitCond, bool ExitIfTrue,
  bool ControlsExit,
  bool AllowPredicates);
-
+  Optional
+  computeExitLimitFromCondFromBinOp(ExitLimitCacheTy &Cache, const Loop *L,
+Value *ExitCond, bool ExitIfTrue,
+bool ControlsExit, bool AllowPredicates);
+  ExitLimit computeExitLimitFromCondFromBinOpHelper(
+  ExitLimitCacheTy &Cache, const Loop *L, BinaryOperator *BO,
+  bool EitherMayExit, bool ExitIfTrue, bool ControlsExit,
+  bool AllowPredicates, const Constant *NeutralElement);
   /// Compute the number of times the backedge of the specified loop will
   /// execute if its exit condition were a conditional branch of the ICmpInst
   /// ExitCond and ExitIfTrue. If AllowPredicates is set, this call will try

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3176f8fc12f5..496b0da8853a 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7521,114 +7521,10 @@ ScalarEvolution::ExitLimit 
ScalarEvolution::computeExitLimitFromCondCached(
 ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
 ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
 bool ControlsExit, bool AllowPredicates) {
-  // Check if the controlling expression for this loop is an And or Or.
-  if (BinaryOperator *BO = dyn_cast(ExitCond)) {
-if (BO->getOpcode() == Instruction::And) {
-  // Recurse on the operands of the and.
-  bool EitherMayExit = !ExitIfTrue;
-  ExitLimit EL0 = computeExitLimitFromCondCached(
-  Cache, L, BO->getOperand(0), ExitIfTrue,
-  ControlsExit && !EitherMayExit, AllowPredicates);
-  ExitLimit EL1 = computeExitLimitFromCondCached(
-  Cache, L, BO->getOperand(1), ExitIfTrue,
-  ControlsExit && !EitherMayExit, AllowPredicates);
-  // Be robust against unsimplified IR for the form "and i1 X, true"
-  if (ConstantInt *CI = dyn_cast(BO->getOperand(1)))
-return CI->isOne() ? EL0 : EL1;
-  if (ConstantInt *CI = dyn_cast(BO->getOperand(0)))
-return CI->isOne() ? EL1 : EL0;
-  const SCEV *BECount = getCouldNotCompute();
-  const SCEV *MaxBECount = getCouldNotCompute();
-  if (EitherMayExit) {
-// Both conditions must be true for the loop to continue executing.
-// Choose the less conservative count.
-if (EL0.ExactNotTaken == getCouldNotCompute() ||
-EL1.ExactNotTaken == getCouldNotCompute())
-  BECount = getCouldNotCompute();
-else
-  BECount =
-  getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
-if (EL0.MaxNotTaken == getCouldNotCompute())
-  MaxBECount = EL1.MaxNotTaken;
-else if (EL1.MaxNotTaken == getCouldNotCompute())
-  MaxBECount = EL0.MaxNotTaken;
-else
-  MaxBECount =
-  getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
-  } else {
-// Both conditions must be true at the same time for the loop to exit.
-// For now, be conservative.
-if (EL0.MaxNotTaken == EL1.MaxNotTaken)
-  MaxBECount = EL0.MaxNotTaken;
-if (EL0.ExactNotTaken == EL1.ExactNotTaken)
-  BECount = EL0.ExactNotTaken;
-  }
-
-  // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
-  // to be more aggressive when computing BECount than when computing
-  // MaxBECount.  In these cases it is possible for EL0.ExactNotTaken and
-  // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and 
EL1.MaxNotTaken
-  // to not.
-  if (isa(MaxBECount) &&
-  !isa(BECount))
-M

[llvm-branch-commits] [llvm] 48d7cc6 - [SCEV] Fix incorrect treatment of max taken count. PR48225

2020-11-23 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-23T16:52:39+07:00
New Revision: 48d7cc6ae23b0e5b1922457462d0f6e4582a1ae7

URL: 
https://github.com/llvm/llvm-project/commit/48d7cc6ae23b0e5b1922457462d0f6e4582a1ae7
DIFF: 
https://github.com/llvm/llvm-project/commit/48d7cc6ae23b0e5b1922457462d0f6e4582a1ae7.diff

LOG: [SCEV] Fix incorrect treatment of max taken count. PR48225

SCEV makes a logical mistake when handling EitherMayExit in
case when both conditions must be met to exit the loop. The
mistake looks like follows: "if condition `A` fails within at most `X` first
iterations, and `B` fails within at most `Y` first iterations, then `A & B`
fails at most within `min (X, Y)` first iterations". This is wrong, because
both of them must fail at the same time.

Simple example illustrating this is following: we have an IV with step 1,
condition `A` = "IV is even", condition `B` = "IV is odd". Both `A` and `B`
will fail within first two iterations. But it doesn't mean that both of them
will fail within first two first iterations at the same time, which would mean
that IV is neither even nor odd at the same time within first 2 iterations.

We can only do so for known exact BE counts, but not for max.

Differential Revision: https://reviews.llvm.org/D91942
Reviewed By: nikic

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Analysis/ScalarEvolution/pr48225.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 496b0da8853a..a366ad355233 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7611,8 +7611,6 @@ ScalarEvolution::computeExitLimitFromCondFromBinOpHelper(
   } else {
 // Both conditions must be same at the same time for the loop to exit.
 // For now, be conservative.
-if (EL0.MaxNotTaken == EL1.MaxNotTaken)
-  MaxBECount = EL0.MaxNotTaken;
 if (EL0.ExactNotTaken == EL1.ExactNotTaken)
   BECount = EL0.ExactNotTaken;
   }

diff  --git a/llvm/test/Analysis/ScalarEvolution/pr48225.ll 
b/llvm/test/Analysis/ScalarEvolution/pr48225.ll
index bd7dac26ebd3..eaf9b18d7c5a 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr48225.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr48225.ll
@@ -4,7 +4,6 @@
 
 ; Tests demonstrate the bug reported as PR48225 by Congzhe Cao.
 
-; FIXME: This test demonstrates a bug in max backedge taken count computation.
 ; When %boolcond = false and %cond = 0:
 ; - %cond.false.on.first.iter is false on 1st iteration;
 ; - %cond.false.on.second.iter is false on 2nd iteration;
@@ -17,16 +16,16 @@ define void @test_and(i1 %boolcond) {
 ; CHECK-NEXT:%conv = zext i1 %boolcond to i32
 ; CHECK-NEXT:--> (zext i1 %boolcond to i32) U: [0,2) S: [0,2)
 ; CHECK-NEXT:%iv = phi i32 [ 0, %entry ], [ %inc, %backedge ]
-; CHECK-NEXT:--> {0,+,1}<%loop> U: [0,2) S: [0,2) Exits: 
<> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:--> {0,+,1}<%loop> U: [0,3) S: [0,3) Exits: 
<> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:%or.cond = and i1 %cond.false.on.first.iter, 
%cond.false.on.second.iter
 ; CHECK-NEXT:--> %or.cond U: full-set S: full-set Exits: <> 
LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:%inc = add nuw nsw i32 %iv, 1
-; CHECK-NEXT:--> {1,+,1}<%loop> U: [1,3) S: [1,3) Exits: 
<> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:--> {1,+,1}<%loop> U: [1,4) S: [1,4) Exits: 
<> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_and
 ; CHECK-NEXT:  Loop %loop:  Unpredictable backedge-taken count.
 ; CHECK-NEXT:exit count for loop: 2
 ; CHECK-NEXT:exit count for backedge: ***COULDNOTCOMPUTE***
-; CHECK-NEXT:  Loop %loop: max backedge-taken count is 1
+; CHECK-NEXT:  Loop %loop: max backedge-taken count is 2
 ; CHECK-NEXT:  Loop %loop: Unpredictable predicated backedge-taken count.
 ;
 entry:
@@ -52,7 +51,6 @@ for.end:
   ret void
 }
 
-; FIXME: This test demonstrates a bug in max backedge taken count computation.
 ; When %boolcond = false and %cond = 0:
 ; - %cond.true.on.first.iter is true on 1st iteration;
 ; - %cond.true.on.second.iter is true on 2nd iteration;
@@ -65,16 +63,16 @@ define void @test_or(i1 %boolcond) {
 ; CHECK-NEXT:%conv = zext i1 %boolcond to i32
 ; CHECK-NEXT:--> (zext i1 %boolcond to i32) U: [0,2) S: [0,2)
 ; CHECK-NEXT:%iv = phi i32 [ 0, %entry ], [ %inc, %backedge ]
-; CHECK-NEXT:--> {0,+,1}<%loop> U: [0,2) S: [0,2) Exits: 
<> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:--> {0,+,1}<%loop> U: [0,3) S: [0,3) Exits: 
<> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:%or.cond = or i1 %cond.true.on.first.iter, 
%cond.true.on.second.iter
 ; CHECK-NEXT:--> %or.cond U: full-set S: full-set Exits: <> 
LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:%inc = add nuw nsw i32 %iv, 1
-; CHECK-NEXT:  

[llvm-branch-commits] [llvm] 28d7ba1 - [IndVars] Use more precise context when eliminating narrowing

2020-11-24 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-25T11:47:39+07:00
New Revision: 28d7ba15435f055562d18ee7111db4adbaf28fae

URL: 
https://github.com/llvm/llvm-project/commit/28d7ba15435f055562d18ee7111db4adbaf28fae
DIFF: 
https://github.com/llvm/llvm-project/commit/28d7ba15435f055562d18ee7111db4adbaf28fae.diff

LOG: [IndVars] Use more precise context when eliminating narrowing

When deciding to widen narrow use, we may need to prove some facts
about it. For proof, the context is used. Currently we take the instruction
being widened as the context.

However, we may be more precise here if we take as context the point that
dominates all users of instruction being widened.

Differential Revision: https://reviews.llvm.org/D90456
Reviewed By: skatkov

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 2693f451ebde..290f3671afca 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1561,6 +1561,21 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
 return true;
   }
 
+  // We'll prove some facts that should be true in the context of ext users. If
+  // there is no users, we are done now. If there are some, pick their common
+  // dominator as context.
+  Instruction *Context = nullptr;
+  for (auto *Ext : ExtUsers) {
+if (!Context || DT->dominates(Ext, Context))
+  Context = Ext;
+else if (!DT->dominates(Context, Ext))
+  // For users that don't have dominance relation, use common dominator.
+  Context =
+  DT->findNearestCommonDominator(Context->getParent(), 
Ext->getParent())
+  ->getTerminator();
+  }
+  assert(Context && "Context not found?");
+
   if (!CanSignExtend && !CanZeroExtend) {
 // Because InstCombine turns 'sub nuw' to 'add' losing the no-wrap flag, we
 // will most likely not see it. Let's try to prove it.
@@ -1573,7 +1588,7 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse 
DU) {
 if (!SE->isKnownNegative(RHS))
   return false;
 bool ProvedSubNUW = SE->isKnownPredicateAt(
-ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), NarrowUse);
+ICmpInst::ICMP_UGE, LHS, SE->getNegativeSCEV(RHS), Context);
 if (!ProvedSubNUW)
   return false;
 // In fact, our 'add' is 'sub nuw'. We will need to widen the 2nd operand 
as

diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index cad5d3c66eca..2bb37d23866e 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -554,16 +554,13 @@ define i32 @test11(i32 %start, i32* %p, i32* %q) {
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:[[IV_NEXT:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
 ; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
 ; CHECK:   backedge:
-; CHECK-NEXT:[[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64
-; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[TMP1]]
 ; CHECK-NEXT:store i32 1, i32* [[STORE_ADDR]], align 4
-; CHECK-NEXT:[[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 
[[INDEX]]
-; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
 ; CHECK-NEXT:[[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
 ; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 9130651 - Revert "[SCEV] Generalize no-self-wrap check in isLoopInvariantExitCondDuringFirstIterations"

2020-11-24 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-25T13:26:17+07:00
New Revision: 9130651126b745b18138b816487cdeb8a689a27f

URL: 
https://github.com/llvm/llvm-project/commit/9130651126b745b18138b816487cdeb8a689a27f
DIFF: 
https://github.com/llvm/llvm-project/commit/9130651126b745b18138b816487cdeb8a689a27f.diff

LOG: Revert "[SCEV] Generalize no-self-wrap check in 
isLoopInvariantExitCondDuringFirstIterations"

This reverts commit 7dcc8899174f44b7447bc48a9f2ff27f5458f8b7.

This patch introduced a logical error that breaks whole logic of this analysis.
All checks we are making are supposed to be loop-independent, so that we could
safely remove the range check. The 'nw' fact is loop-dependent, so we can remove
the check basing on facts from this very check.

Motivating examples will follow-up.

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 08ed363918a9..5f77f4aa05c2 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9643,19 +9643,17 @@ 
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
   if (!ICmpInst::isRelational(Pred))
 return None;
 
+  // TODO: Support steps other than +/- 1.
   const SCEV *Step = AR->getStepRecurrence(*this);
-  bool IsStepNonPositive = isKnownNonPositive(Step);
-  if (!IsStepNonPositive && !isKnownNonNegative(Step))
+  auto *One = getOne(Step->getType());
+  auto *MinusOne = getNegativeSCEV(One);
+  if (Step != One && Step != MinusOne)
 return None;
-  bool HasNoSelfWrap = AR->hasNoSelfWrap();
-  if (!HasNoSelfWrap)
-// If num iter has same type as the AddRec, and step is +/- 1, even max
-// possible number of iterations is not enough to self-wrap.
-if (MaxIter->getType() == AR->getType())
-  if (Step == getOne(AR->getType()) || Step == getMinusOne(AR->getType()))
-HasNoSelfWrap = true;
-  // Only proceed with non-self-wrapping ARs.
-  if (!HasNoSelfWrap)
+
+  // Type mismatch here means that MaxIter is potentially larger than max
+  // unsigned value in start type, which mean we cannot prove no wrap for the
+  // indvar.
+  if (AR->getType() != MaxIter->getType())
 return None;
 
   // Value of IV on suggested last iteration.
@@ -9663,13 +9661,14 @@ 
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
   // Does it still meet the requirement?
   if (!isKnownPredicateAt(Pred, Last, RHS, Context))
 return None;
-  // We know that the addrec does not have a self-wrap. To prove that there is
-  // no signed/unsigned wrap, we need to check that
-  // Start <= Last for positive step or Start >= Last for negative step. Either
-  // works for zero step.
+  // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
+  // not exceed max unsigned value of this type), this effectively proves
+  // that there is no wrap during the iteration. To prove that there is no
+  // signed/unsigned wrap, we need to check that
+  // Start <= Last for step = 1 or Start >= Last for step = -1.
   ICmpInst::Predicate NoOverflowPred =
   CmpInst::isSigned(Pred) ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-  if (IsStepNonPositive)
+  if (Step == MinusOne)
 NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
   const SCEV *Start = AR->getStart();
   if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 664e1da - [LoopLoadElim] Make sure all loops are in simplify form. PR48150

2020-11-25 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T10:51:11+07:00
New Revision: 664e1da485d244325947d056a271c3c733ad4c7c

URL: 
https://github.com/llvm/llvm-project/commit/664e1da485d244325947d056a271c3c733ad4c7c
DIFF: 
https://github.com/llvm/llvm-project/commit/664e1da485d244325947d056a271c3c733ad4c7c.diff

LOG: [LoopLoadElim] Make sure all loops are in simplify form. PR48150

LoopLoadElim may end up expanding an AddRec from a loop
which is not the current loop. This loop may not be in simplify
form. We figure it out after the no-return point, so cannot bail
in this case.

AddRec requires simplify form to expand. The only way to ensure
this does not crash is to simplify all loops beforehand.

The issue only exists in new PM. Old PM requests LoopSimplify
required pass and it simplifies all loops before the opt begins.

Differential Revision: https://reviews.llvm.org/D91525
Reviewed By: asbirlea, aeubanks

Added: 


Modified: 
llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
llvm/test/Transforms/LoopLoadElim/pr-48150.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp 
b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 74e32fb870b5..475448740ae4 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -56,6 +56,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
@@ -610,6 +611,7 @@ class LoadEliminationForLoop {
 static bool
 eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
   BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+  ScalarEvolution *SE, AssumptionCache *AC,
   function_ref GetLAI) 
{
   // Build up a worklist of inner-loops to transform to avoid iterator
   // invalidation.
@@ -618,14 +620,17 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, 
DominatorTree &DT,
   // which merely optimizes the use of loads in a loop.
   SmallVector Worklist;
 
+  bool Changed = false;
+
   for (Loop *TopLevelLoop : LI)
-for (Loop *L : depth_first(TopLevelLoop))
+for (Loop *L : depth_first(TopLevelLoop)) {
+  Changed |= simplifyLoop(L, &DT, &LI, SE, AC, /*MSSAU*/ nullptr, false);
   // We only handle inner-most loops.
   if (L->isInnermost())
 Worklist.push_back(L);
+}
 
   // Now walk the identified inner loops.
-  bool Changed = false;
   for (Loop *L : Worklist) {
 // The actual work is performed by LoadEliminationForLoop.
 LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI);
@@ -660,7 +665,7 @@ class LoopLoadElimination : public FunctionPass {
 
 // Process each loop nest in the function.
 return eliminateLoadsAcrossLoops(
-F, LI, DT, BFI, PSI,
+F, LI, DT, BFI, PSI, /*SE*/ nullptr, /*AC*/ nullptr,
 [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
   }
 
@@ -717,7 +722,7 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
 
   auto &LAM = AM.getResult(F).getManager();
   bool Changed = eliminateLoadsAcrossLoops(
-  F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & {
+  F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
 LoopStandardAnalysisResults AR = {AA,  AC,  DT,  LI,  SE,
   TLI, TTI, nullptr, MSSA};
 return LAM.getResult(L, AR);

diff  --git a/llvm/test/Transforms/LoopLoadElim/pr-48150.ll 
b/llvm/test/Transforms/LoopLoadElim/pr-48150.ll
index 60cc02c50ad4..a88fd9b9d938 100644
--- a/llvm/test/Transforms/LoopLoadElim/pr-48150.ll
+++ b/llvm/test/Transforms/LoopLoadElim/pr-48150.ll
@@ -1,6 +1,4 @@
 ; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s
-; REQUIRES: asserts
-; XFAIL: *
 
 target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
 target triple = "x86_64-unknown-linux-gnu"



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f10500e - [IndVars] Use isLoopBackedgeGuardedByCond for last iteration check

2020-11-25 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T12:37:21+07:00
New Revision: f10500e220e354a80c53490158933ca2ede3be20

URL: 
https://github.com/llvm/llvm-project/commit/f10500e220e354a80c53490158933ca2ede3be20
DIFF: 
https://github.com/llvm/llvm-project/commit/f10500e220e354a80c53490158933ca2ede3be20.diff

LOG: [IndVars] Use isLoopBackedgeGuardedByCond for last iteration check

Use more context to prove contextual facts about the last iteration. It is
only executed when the backedge is taken, so we can use 
`isLoopBackedgeGuardedByCond`
to make this check.

Differential Revision: https://reviews.llvm.org/D91535
Reviewed By: skatkov

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 5a7f1b94a4e8..53fd668be05c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9673,7 +9673,7 @@ 
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
   // Value of IV on suggested last iteration.
   const SCEV *Last = AR->evaluateAtIteration(MaxIter, *this);
   // Does it still meet the requirement?
-  if (!isKnownPredicateAt(Pred, Last, RHS, Context))
+  if (!isLoopBackedgeGuardedByCond(L, Pred, Last, RHS))
 return None;
   // Because step is +/- 1 and MaxIter has same type as Start (i.e. it does
   // not exceed max unsigned value of this type), this effectively proves

diff  --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll 
b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
index 85946bbe59e6..ffe4db943548 100644
--- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
+++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
@@ -835,15 +835,16 @@ define void @test_can_predicate_simple_unsigned_
diff erent_start(i32 %start, i32*
 ; CHECK-LABEL: @test_can_predicate_simple_unsigned_
diff erent_start(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:[[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:[[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[START:%.*]], [[PREHEADER:%.*]] ], [ 
[[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[START]], [[PREHEADER:%.*]] ], [ 
[[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:[[ZERO_COND:%.*]] = icmp eq i32 [[IV]], 0
 ; CHECK-NEXT:br i1 [[ZERO_COND]], label [[EXIT:%.*]], label 
[[RANGE_CHECK_BLOCK:%.*]]
 ; CHECK:   range_check_block:
 ; CHECK-NEXT:[[IV_NEXT]] = sub i32 [[IV]], 1
-; CHECK-NEXT:[[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:[[RANGE_CHECK1:%.*]] = icmp ult i32 [[TMP0]], [[LEN]]
+; CHECK-NEXT:br i1 [[RANGE_CHECK1]], label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:   backedge:
 ; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]]
 ; CHECK-NEXT:[[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4
@@ -885,15 +886,16 @@ define void @test_can_predicate_simple_unsigned_inverted_
diff erent_start(i32 %st
 ; CHECK-LABEL: @test_can_predicate_simple_unsigned_inverted_
diff erent_start(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:[[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:[[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[START:%.*]], [[PREHEADER:%.*]] ], [ 
[[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[START]], [[PREHEADER:%.*]] ], [ 
[[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:[[ZERO_COND:%.*]] = icmp eq i32 [[IV]], 0
 ; CHECK-NEXT:br i1 [[ZERO_COND]], label [[EXIT:%.*]], label 
[[RANGE_CHECK_BLOCK:%.*]]
 ; CHECK:   range_check_block:
 ; CHECK-NEXT:[[IV_NEXT]] = sub i32 [[IV]], 1
-; CHECK-NEXT:[[RANGE_CHECK:%.*]] = icmp uge i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:br i1 [[RANGE_CHECK]], label [[FAIL:%.*]], label [[BACKEDGE]]
+; CHECK-NEXT:[[RANGE_CHECK1:%.*]] = icmp uge i32 [[TMP0]], [[LEN]]
+; CHECK-NEXT:br i1 [[RANGE_CHECK1]], label [[FAIL:%.*]], label [[BACKEDGE]]
 ; CHECK:   backedge:
 ; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]]
 ; CHECK-NEXT:[[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 14f2ad0 - [SCEV] Use isKnownPredicateAt in isLoopBackedgeGuardedByCond

2020-11-25 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T13:20:02+07:00
New Revision: 14f2ad0e3cc54d5eb254b545a469e8ffdb62b119

URL: 
https://github.com/llvm/llvm-project/commit/14f2ad0e3cc54d5eb254b545a469e8ffdb62b119
DIFF: 
https://github.com/llvm/llvm-project/commit/14f2ad0e3cc54d5eb254b545a469e8ffdb62b119.diff

LOG: [SCEV] Use isKnownPredicateAt in isLoopBackedgeGuardedByCond

A piece of code in `isLoopBackedgeGuardedByCond` basically duplicates
the dominators traversal from `isBlockEntryGuardedByCond` called from
`isKnownPredicateAt`, but it's less powerful because it does not give context
to `isImpliedCond`. This patch reuses the `isKnownPredicateAt `function there,
reducing the amount of code duplication and making it more powerful.

Differential Revision: https://reviews.llvm.org/D92152
Reviewed By: skatkov

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 53fd668be05c..31b88c92a889 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9911,42 +9911,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
-   DTN != HeaderDTN; DTN = DTN->getIDom()) {
-assert(DTN && "should reach the loop header before reaching the root!");
-
-BasicBlock *BB = DTN->getBlock();
-if (isImpliedViaGuard(BB, Pred, LHS, RHS))
-  return true;
-
-BasicBlock *PBB = BB->getSinglePredecessor();
-if (!PBB)
-  continue;
-
-BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
-if (!ContinuePredicate || !ContinuePredicate->isConditional())
-  continue;
-
-Value *Condition = ContinuePredicate->getCondition();
-
-// If we have an edge `E` within the loop body that dominates the only
-// latch, the condition guarding `E` also guards the backedge.  This
-// reasoning works only for loops with a single latch.
-
-BasicBlockEdge DominatingEdge(PBB, BB);
-if (DominatingEdge.isSingleEdge()) {
-  // We're constructively (and conservatively) enumerating edges within the
-  // loop body that dominate the latch.  The dominator tree better agree
-  // with us on this:
-  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
-
-  if (isImpliedCond(Pred, LHS, RHS, Condition,
-BB != ContinuePredicate->getSuccessor(0)))
-return true;
-}
-  }
-
-  return false;
+  return isKnownPredicateAt(Pred, LHS, RHS, Latch->getTerminator());
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 8a07a49303d2..c39828923d5f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3b6481e - Revert "[SCEV] Use isKnownPredicateAt in isLoopBackedgeGuardedByCond"

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T16:42:43+07:00
New Revision: 3b6481eae2597f656b9f5bb6a5eb5438eb8cb294

URL: 
https://github.com/llvm/llvm-project/commit/3b6481eae2597f656b9f5bb6a5eb5438eb8cb294
DIFF: 
https://github.com/llvm/llvm-project/commit/3b6481eae2597f656b9f5bb6a5eb5438eb8cb294.diff

LOG: Revert "[SCEV] Use isKnownPredicateAt in isLoopBackedgeGuardedByCond"

This reverts commit 14f2ad0e3cc54d5eb254b545a469e8ffdb62b119.

Reverting to investigate compile time drop.

Differential Revision: https://reviews.llvm.org/D92152

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 31b88c92a889..53fd668be05c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9911,7 +9911,42 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  return isKnownPredicateAt(Pred, LHS, RHS, Latch->getTerminator());
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+   DTN != HeaderDTN; DTN = DTN->getIDom()) {
+assert(DTN && "should reach the loop header before reaching the root!");
+
+BasicBlock *BB = DTN->getBlock();
+if (isImpliedViaGuard(BB, Pred, LHS, RHS))
+  return true;
+
+BasicBlock *PBB = BB->getSinglePredecessor();
+if (!PBB)
+  continue;
+
+BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
+if (!ContinuePredicate || !ContinuePredicate->isConditional())
+  continue;
+
+Value *Condition = ContinuePredicate->getCondition();
+
+// If we have an edge `E` within the loop body that dominates the only
+// latch, the condition guarding `E` also guards the backedge.  This
+// reasoning works only for loops with a single latch.
+
+BasicBlockEdge DominatingEdge(PBB, BB);
+if (DominatingEdge.isSingleEdge()) {
+  // We're constructively (and conservatively) enumerating edges within the
+  // loop body that dominate the latch.  The dominator tree better agree
+  // with us on this:
+  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
+
+  if (isImpliedCond(Pred, LHS, RHS, Condition,
+BB != ContinuePredicate->getSuccessor(0)))
+return true;
+}
+  }
+
+  return false;
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index c39828923d5f..8a07a49303d2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 3d4c046 - [SCEV] Use isBasicBlockEntryGuardedByCond in isLoopBackedgeGuardedByCond

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T17:08:38+07:00
New Revision: 3d4c0460ec6040fc071e56dc113afd181294591e

URL: 
https://github.com/llvm/llvm-project/commit/3d4c0460ec6040fc071e56dc113afd181294591e
DIFF: 
https://github.com/llvm/llvm-project/commit/3d4c0460ec6040fc071e56dc113afd181294591e.diff

LOG: [SCEV] Use isBasicBlockEntryGuardedByCond in isLoopBackedgeGuardedByCond

Previously we tried to using isKnownPredicateAt, but it makes an
extra query to isKnownPredicate, which has negative impact on compile
time. Let's try to use more lightweight isBasicBlockEntryGuardedByCond.

Differential Revision: https://reviews.llvm.org/D92152

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 53fd668be05c..b7bd54aafca7 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9911,42 +9911,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
-   DTN != HeaderDTN; DTN = DTN->getIDom()) {
-assert(DTN && "should reach the loop header before reaching the root!");
-
-BasicBlock *BB = DTN->getBlock();
-if (isImpliedViaGuard(BB, Pred, LHS, RHS))
-  return true;
-
-BasicBlock *PBB = BB->getSinglePredecessor();
-if (!PBB)
-  continue;
-
-BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
-if (!ContinuePredicate || !ContinuePredicate->isConditional())
-  continue;
-
-Value *Condition = ContinuePredicate->getCondition();
-
-// If we have an edge `E` within the loop body that dominates the only
-// latch, the condition guarding `E` also guards the backedge.  This
-// reasoning works only for loops with a single latch.
-
-BasicBlockEdge DominatingEdge(PBB, BB);
-if (DominatingEdge.isSingleEdge()) {
-  // We're constructively (and conservatively) enumerating edges within the
-  // loop body that dominate the latch.  The dominator tree better agree
-  // with us on this:
-  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
-
-  if (isImpliedCond(Pred, LHS, RHS, Condition,
-BB != ContinuePredicate->getSuccessor(0)))
-return true;
-}
-  }
-
-  return false;
+  return isBasicBlockEntryGuardedByCond(Latch, Pred, LHS, RHS);
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 8a07a49303d2..c39828923d5f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 91d6b6b - Revert "[SCEV] Use isBasicBlockEntryGuardedByCond in isLoopBackedgeGuardedByCond"

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T17:28:30+07:00
New Revision: 91d6b6b5fb94656dc12e1d760a3213a3cd72c8c5

URL: 
https://github.com/llvm/llvm-project/commit/91d6b6b5fb94656dc12e1d760a3213a3cd72c8c5
DIFF: 
https://github.com/llvm/llvm-project/commit/91d6b6b5fb94656dc12e1d760a3213a3cd72c8c5.diff

LOG: Revert "[SCEV] Use isBasicBlockEntryGuardedByCond in 
isLoopBackedgeGuardedByCond"

This reverts commit 3d4c0460ec6040fc071e56dc113afd181294591e.

Compile time impact is still high. Need to understand why.

Differential Revision: https://reviews.llvm.org/D92153

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index b7bd54aafca70..53fd668be05cd 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9911,7 +9911,42 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  return isBasicBlockEntryGuardedByCond(Latch, Pred, LHS, RHS);
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+   DTN != HeaderDTN; DTN = DTN->getIDom()) {
+assert(DTN && "should reach the loop header before reaching the root!");
+
+BasicBlock *BB = DTN->getBlock();
+if (isImpliedViaGuard(BB, Pred, LHS, RHS))
+  return true;
+
+BasicBlock *PBB = BB->getSinglePredecessor();
+if (!PBB)
+  continue;
+
+BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
+if (!ContinuePredicate || !ContinuePredicate->isConditional())
+  continue;
+
+Value *Condition = ContinuePredicate->getCondition();
+
+// If we have an edge `E` within the loop body that dominates the only
+// latch, the condition guarding `E` also guards the backedge.  This
+// reasoning works only for loops with a single latch.
+
+BasicBlockEdge DominatingEdge(PBB, BB);
+if (DominatingEdge.isSingleEdge()) {
+  // We're constructively (and conservatively) enumerating edges within the
+  // loop body that dominate the latch.  The dominator tree better agree
+  // with us on this:
+  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
+
+  if (isImpliedCond(Pred, LHS, RHS, Condition,
+BB != ContinuePredicate->getSuccessor(0)))
+return true;
+}
+  }
+
+  return false;
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index c39828923d5f9..8a07a49303d20 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f690986 - Return "[SCEV] Use isBasicBlockEntryGuardedByCond in isLoopBackedgeGuardedByCond", 2nd try

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T17:45:13+07:00
New Revision: f690986f314fbf3f4b3ca9ec0b87b95166008dc1

URL: 
https://github.com/llvm/llvm-project/commit/f690986f314fbf3f4b3ca9ec0b87b95166008dc1
DIFF: 
https://github.com/llvm/llvm-project/commit/f690986f314fbf3f4b3ca9ec0b87b95166008dc1.diff

LOG: Return "[SCEV] Use isBasicBlockEntryGuardedByCond in 
isLoopBackedgeGuardedByCond", 2nd try

Reverted because the compile time impact is still too high.

isKnownViaNonRecursiveReasoning is used twice, we can do it just once.

Differential Revision: https://reviews.llvm.org/D92152

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 53fd668be05c..f8c9a2372a46 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9851,10 +9851,6 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
 assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
 
-
-  if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
-return true;
-
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
 return false;
@@ -9870,7 +9866,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   // We don't want more than one activation of the following loops on the stack
   // -- that can lead to O(n!) time complexity.
   if (WalkingBEDominatingConds)
-return false;
+return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
 
   SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true);
 
@@ -9911,42 +9907,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
-   DTN != HeaderDTN; DTN = DTN->getIDom()) {
-assert(DTN && "should reach the loop header before reaching the root!");
-
-BasicBlock *BB = DTN->getBlock();
-if (isImpliedViaGuard(BB, Pred, LHS, RHS))
-  return true;
-
-BasicBlock *PBB = BB->getSinglePredecessor();
-if (!PBB)
-  continue;
-
-BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
-if (!ContinuePredicate || !ContinuePredicate->isConditional())
-  continue;
-
-Value *Condition = ContinuePredicate->getCondition();
-
-// If we have an edge `E` within the loop body that dominates the only
-// latch, the condition guarding `E` also guards the backedge.  This
-// reasoning works only for loops with a single latch.
-
-BasicBlockEdge DominatingEdge(PBB, BB);
-if (DominatingEdge.isSingleEdge()) {
-  // We're constructively (and conservatively) enumerating edges within the
-  // loop body that dominate the latch.  The dominator tree better agree
-  // with us on this:
-  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
-
-  if (isImpliedCond(Pred, LHS, RHS, Condition,
-BB != ContinuePredicate->getSuccessor(0)))
-return true;
-}
-  }
-
-  return false;
+  return isBasicBlockEntryGuardedByCond(Latch, Pred, LHS, RHS);
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 8a07a49303d2..c39828923d5f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 035955f - Revert "Return "[SCEV] Use isBasicBlockEntryGuardedByCond in isLoopBackedgeGuardedByCond", 2nd try"

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-26T18:12:51+07:00
New Revision: 035955f9254179b2034fac8b16777973453e00d6

URL: 
https://github.com/llvm/llvm-project/commit/035955f9254179b2034fac8b16777973453e00d6
DIFF: 
https://github.com/llvm/llvm-project/commit/035955f9254179b2034fac8b16777973453e00d6.diff

LOG: Revert "Return "[SCEV] Use isBasicBlockEntryGuardedByCond in 
isLoopBackedgeGuardedByCond", 2nd try"

This reverts commit f690986f314fbf3f4b3ca9ec0b87b95166008dc1.

Compile time then and again...

Added: 


Modified: 
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

Removed: 




diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index f8c9a2372a46..53fd668be05c 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9851,6 +9851,10 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
 assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) &&
"This cannot be done on broken IR!");
 
+
+  if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS))
+return true;
+
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch)
 return false;
@@ -9866,7 +9870,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   // We don't want more than one activation of the following loops on the stack
   // -- that can lead to O(n!) time complexity.
   if (WalkingBEDominatingConds)
-return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS);
+return false;
 
   SaveAndRestore ClearOnExit(WalkingBEDominatingConds, true);
 
@@ -9907,7 +9911,42 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop 
*L,
   if (isImpliedViaGuard(Latch, Pred, LHS, RHS))
 return true;
 
-  return isBasicBlockEntryGuardedByCond(Latch, Pred, LHS, RHS);
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+   DTN != HeaderDTN; DTN = DTN->getIDom()) {
+assert(DTN && "should reach the loop header before reaching the root!");
+
+BasicBlock *BB = DTN->getBlock();
+if (isImpliedViaGuard(BB, Pred, LHS, RHS))
+  return true;
+
+BasicBlock *PBB = BB->getSinglePredecessor();
+if (!PBB)
+  continue;
+
+BranchInst *ContinuePredicate = dyn_cast(PBB->getTerminator());
+if (!ContinuePredicate || !ContinuePredicate->isConditional())
+  continue;
+
+Value *Condition = ContinuePredicate->getCondition();
+
+// If we have an edge `E` within the loop body that dominates the only
+// latch, the condition guarding `E` also guards the backedge.  This
+// reasoning works only for loops with a single latch.
+
+BasicBlockEdge DominatingEdge(PBB, BB);
+if (DominatingEdge.isSingleEdge()) {
+  // We're constructively (and conservatively) enumerating edges within the
+  // loop body that dominate the latch.  The dominator tree better agree
+  // with us on this:
+  assert(DT.dominates(DominatingEdge, Latch) && "should be!");
+
+  if (isImpliedCond(Pred, LHS, RHS, Condition,
+BB != ContinuePredicate->getSuccessor(0)))
+return true;
+}
+  }
+
+  return false;
 }
 
 bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll 
b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index c39828923d5f..8a07a49303d2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -69,7 +69,7 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 
%radix, %struct.Vector2*
 ; CHECK-NEXT:[[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
 ; CHECK-NEXT:[[TMP29:%.*]] = load i16, i16* [[LSR_IV810]], align 2
 ; CHECK-NEXT:store i16 [[TMP29]], i16* [[UGLYGEP2]], align 2
-; CHECK-NEXT:[[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 2
+; CHECK-NEXT:[[LSR_IV_NEXT]] = add i64 [[LSR_IV]], 2
 ; CHECK-NEXT:[[LSR_IV_NEXT3:%.*]] = inttoptr i64 [[LSR_IV_NEXT]] to i16*
 ; CHECK-NEXT:[[SCEVGEP9:%.*]] = getelementptr [33 x i16], [33 x i16]* 
[[LSR_IV8]], i64 0, i64 1
 ; CHECK-NEXT:[[TMP3]] = bitcast i16* [[SCEVGEP9]] to [33 x i16]*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] faf1838 - [IndVars] LCSSA Phi users should not prevent widening

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-27T11:19:54+07:00
New Revision: faf183874cb6f434815e2b1c0b59cd452219f89e

URL: 
https://github.com/llvm/llvm-project/commit/faf183874cb6f434815e2b1c0b59cd452219f89e
DIFF: 
https://github.com/llvm/llvm-project/commit/faf183874cb6f434815e2b1c0b59cd452219f89e.diff

LOG: [IndVars] LCSSA Phi users should not prevent widening

When widening an IndVar that has LCSSA Phi users outside
the loop, we can safely widen it as usual and then truncate
the result outside the loop without hurting the performance.

Differential Revision: https://reviews.llvm.org/D91593
Reviewed By: skatkov

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 290f3671afca..d37fe74a0039 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1542,16 +1542,26 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   auto AnotherOpExtKind = ExtKind;
 
   // Check that all uses are either s/zext, or narrow def (in case of we are
-  // widening the IV increment).
+  // widening the IV increment), or single-input LCSSA Phis.
   SmallVector ExtUsers;
+  SmallVector LCSSAPhiUsers;
   for (Use &U : NarrowUse->uses()) {
-if (U.getUser() == NarrowDef)
+Instruction *User = cast(U.getUser());
+if (User == NarrowDef)
   continue;
-Instruction *User = nullptr;
+if (!L->contains(User)) {
+  auto *LCSSAPhi = cast(User);
+  // Make sure there is only 1 input, so that we don't have to split
+  // critical edges.
+  if (LCSSAPhi->getNumOperands() != 1)
+return false;
+  LCSSAPhiUsers.push_back(LCSSAPhi);
+  continue;
+}
 if (ExtKind == SignExtended)
-  User = dyn_cast(U.getUser());
+  User = dyn_cast(User);
 else
-  User = dyn_cast(U.getUser());
+  User = dyn_cast(User);
 if (!User || User->getType() != WideType)
   return false;
 ExtUsers.push_back(User);
@@ -1630,6 +1640,21 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
 User->replaceAllUsesWith(WideBO);
 DeadInsts.emplace_back(User);
   }
+
+  for (PHINode *User : LCSSAPhiUsers) {
+assert(User->getNumOperands() == 1 && "Checked before!");
+Builder.SetInsertPoint(User);
+auto *WidePN =
+Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide");
+BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor();
+assert(LoopExitingBlock && L->contains(LoopExitingBlock) &&
+   "Not a LCSSA Phi?");
+WidePN->addIncoming(WideBO, LoopExitingBlock);
+Builder.SetInsertPoint(User->getParent()->getFirstNonPHI());
+auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType());
+User->replaceAllUsesWith(TruncPN);
+DeadInsts.emplace_back(User);
+  }
   return true;
 }
 

diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index 2bb37d23866e..fb9b198fe8af 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -697,20 +697,18 @@ define i32 @test14(i32 %start, i32* %p, i32* %q) {
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; CHECK-NEXT:[[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]]
 ; CHECK:   backedge:
-; CHECK-NEXT:[[INDEX:%.*]] = zext i32 [[FOO]] to i64
-; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[TMP1]]
 ; CHECK-NEXT:store i32 1, i32* [[STORE_ADDR]], align 4
-; CHECK-NEXT:[[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 
[[INDEX]]
-; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4
 ; CHECK-NEXT:[[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
 ; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
 ; CHECK:   exit:
-; CHECK-NEXT:ret i32 -1
+; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 -1 to i32
+; CHECK-NEXT:ret i32 [[TMP2]]
 ; CHECK:   failure:
 ; CHECK-NEXT:unreachable
 ;
@@ -750,24 +748,23 @@ define i32 @test15(i32 %start, i32* %p, i32* %q) {
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] =

[llvm-branch-commits] [llvm] 0077e16 - [Test] Add some more tests showing how we fail to widen IV

2020-11-26 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-27T14:03:38+07:00
New Revision: 0077e1680f83cf2ae014c7f3c2847fe106e24a5f

URL: 
https://github.com/llvm/llvm-project/commit/0077e1680f83cf2ae014c7f3c2847fe106e24a5f
DIFF: 
https://github.com/llvm/llvm-project/commit/0077e1680f83cf2ae014c7f3c2847fe106e24a5f.diff

LOG: [Test] Add some more tests showing how we fail to widen IV

Added: 


Modified: 
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index fb9b198fe8af..dd095c008772 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -794,3 +794,413 @@ failure:
   call void @use(i32 %foo)
   unreachable
 }
+
+; TODO: We can widen here despite the icmp user of %foo in guarded block.
+define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
+; CHECK-LABEL: @test16_unsigned_pos1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
+; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[START]], -1
+; CHECK-NEXT:br label [[LOOP:%.*]]
+; CHECK:   loop:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP2]], -1
+; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
+; CHECK:   guarded:
+; CHECK-NEXT:[[ICMP_USER3:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:br i1 [[ICMP_USER3]], label [[BACKEDGE]], label 
[[SIDE_EXIT:%.*]]
+; CHECK:   backedge:
+; CHECK-NEXT:[[INDEX:%.*]] = zext i32 [[FOO]] to i64
+; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:store i32 1, i32* [[STORE_ADDR]], align 4
+; CHECK-NEXT:[[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NEXT:[[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]]
+; CHECK:   exit:
+; CHECK-NEXT:call void @use(i32 -1)
+; CHECK-NEXT:ret i32 -1
+; CHECK:   failure:
+; CHECK-NEXT:[[FOO_LCSSA2:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ]
+; CHECK-NEXT:call void @use(i32 [[FOO_LCSSA2]])
+; CHECK-NEXT:unreachable
+; CHECK:   side_exit:
+; CHECK-NEXT:ret i32 0
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [%start, %entry], [%iv.next.1, %backedge]
+  %cond = icmp eq i32 %iv, 0
+  %foo = add i32 %iv, -1
+  br i1 %cond, label %exit, label %guarded
+
+guarded:
+  %icmp_user = icmp ult i32 %foo, %x
+  br i1 %icmp_user, label %backedge, label %side_exit
+
+backedge:
+  %index = zext i32 %foo to i64
+  %store.addr = getelementptr i32, i32* %p, i64 %index
+  store i32 1, i32* %store.addr
+  %load.addr = getelementptr i32, i32* %q, i64 %index
+  %stop = load i32, i32* %q
+  %loop.cond = icmp eq i32 %stop, 0
+  %iv.next.1 = add i32 %iv, -1
+  br i1 %loop.cond, label %loop, label %failure
+
+exit:
+  call void @use(i32 %foo)
+  ret i32 %foo
+
+failure:
+  call void @use(i32 %foo)
+  unreachable
+
+side_exit:
+  ret i32 0
+}
+
+; TODO: We can widen here despite the icmp user of %foo in guarded block.
+define i32 @test16_unsigned_pos2(i32 %start, i32* %p, i32* %q, i32 %x) {
+; CHECK-LABEL: @test16_unsigned_pos2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
+; CHECK-NEXT:br label [[LOOP:%.*]]
+; CHECK:   loop:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; CHECK-NEXT:[[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP1]], -1
+; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
+; CHECK:   guarded:
+; CHECK-NEXT:[[ICMP_USER:%.*]] = icmp ne i32 [[FOO]], [[X:%.*]]
+; CHECK-NEXT:br i1 [[ICMP_USER]], label [[BACKEDGE]], label 
[[SIDE_EXIT:%.*]]
+; CHECK:   backedge:
+; CHECK-NEXT:[[INDEX:%.*]] = zext i32 [[FOO]] to i64
+; CHECK-NEXT:[[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:store i32 1, i32* [[STORE_ADDR]], align 4
+; CHECK-NEXT:[[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 
[[INDEX]]
+; CHECK-NEXT:[[STOP:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NEXT:[[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:

[llvm-branch-commits] [llvm] 0c9c6dd - [IndVars] ICmpInst should not prevent IV widening

2020-11-29 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-11-30T10:51:31+07:00
New Revision: 0c9c6ddf17bb01ae350a899b3395bb078aa0c62e

URL: 
https://github.com/llvm/llvm-project/commit/0c9c6ddf17bb01ae350a899b3395bb078aa0c62e
DIFF: 
https://github.com/llvm/llvm-project/commit/0c9c6ddf17bb01ae350a899b3395bb078aa0c62e.diff

LOG: [IndVars] ICmpInst should not prevent IV widening

If we decided to widen IV with zext, then unsigned comparisons
should not prevent widening (same for sext/sign comparisons).
The result of comparison in wider type does not change in this case.

Differential Revision: https://reviews.llvm.org/D92207
Reviewed By: nikic

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d37fe74a0039..e281c66a4267 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1541,10 +1541,14 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
   auto AnotherOpExtKind = ExtKind;
 
-  // Check that all uses are either s/zext, or narrow def (in case of we are
-  // widening the IV increment), or single-input LCSSA Phis.
+  // Check that all uses are either:
+  // - narrow def (in case of we are widening the IV increment);
+  // - single-input LCSSA Phis;
+  // - comparison of the chosen type;
+  // - extend of the chosen type (raison d'etre).
   SmallVector ExtUsers;
   SmallVector LCSSAPhiUsers;
+  SmallVector ICmpUsers;
   for (Use &U : NarrowUse->uses()) {
 Instruction *User = cast(U.getUser());
 if (User == NarrowDef)
@@ -1558,6 +1562,19 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   LCSSAPhiUsers.push_back(LCSSAPhi);
   continue;
 }
+if (auto *ICmp = dyn_cast(User)) {
+  auto Pred = ICmp->getPredicate();
+  // We have 3 types of predicates: signed, unsigned and equality
+  // predicates. For equality, it's legal to widen icmp for either sign and
+  // zero extend. For sign extend, we can also do so for signed predicates,
+  // likeweise for zero extend we can widen icmp for unsigned predicates.
+  if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
+return false;
+  if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
+return false;
+  ICmpUsers.push_back(ICmp);
+  continue;
+}
 if (ExtKind == SignExtended)
   User = dyn_cast(User);
 else
@@ -1655,6 +1672,26 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
 User->replaceAllUsesWith(TruncPN);
 DeadInsts.emplace_back(User);
   }
+
+  for (ICmpInst *User : ICmpUsers) {
+Builder.SetInsertPoint(User);
+auto ExtendedOp = [&](Value * V)->Value * {
+  if (V == NarrowUse)
+return WideBO;
+  if (ExtKind == ZeroExtended)
+return Builder.CreateZExt(V, WideBO->getType());
+  else
+return Builder.CreateSExt(V, WideBO->getType());
+};
+auto Pred = User->getPredicate();
+auto *LHS = ExtendedOp(User->getOperand(0));
+auto *RHS = ExtendedOp(User->getOperand(1));
+auto *WideCmp =
+Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
+User->replaceAllUsesWith(WideCmp);
+DeadInsts.emplace_back(User);
+  }
+
   return true;
 }
 

diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index dd095c008772..d3b117e73602 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -795,37 +795,36 @@ failure:
   unreachable
 }
 
-; TODO: We can widen here despite the icmp user of %foo in guarded block.
 define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
 ; CHECK-LABEL: @test16_unsigned_pos1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[START]], -1
+; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
+; CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[X:%.*]] to i64
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP2]], -1
+; CHECK-NEXT:[[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
 ; CHECK:   guarded:
-; CHECK-NEXT:[[ICMP_USER3:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]]
-; CHECK-NEXT:br i1 [[ICMP_USER

[llvm-branch-commits] [llvm] 391a47e - [Test] One more IndVars test

2020-12-01 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-02T13:16:34+07:00
New Revision: 391a47e227b528ff4791cb48fb300bc5009077b2

URL: 
https://github.com/llvm/llvm-project/commit/391a47e227b528ff4791cb48fb300bc5009077b2
DIFF: 
https://github.com/llvm/llvm-project/commit/391a47e227b528ff4791cb48fb300bc5009077b2.diff

LOG: [Test] One more IndVars test

Added: 


Modified: 
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index d3b117e73602..94cd63257cf4 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -1203,3 +1203,48 @@ failure:
 side_exit:
   ret i32 0
 }
+
+define i32 @test17(i32* %p, i32 %len) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[LEN:%.*]] to i64
+; CHECK-NEXT:br label [[LOOP:%.*]]
+; CHECK:   loop:
+; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:[[COND_1:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
+; CHECK-NEXT:br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
+; CHECK:   backedge:
+; CHECK-NEXT:[[ADDR:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], 
i64 [[TMP1]]
+; CHECK-NEXT:[[LOADED:%.*]] = load atomic i32, i32* [[ADDR]] unordered, 
align 4
+; CHECK-NEXT:[[COND_2:%.*]] = icmp eq i32 [[LOADED]], 0
+; CHECK-NEXT:[[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
+; CHECK:   exit:
+; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 -1 to i32
+; CHECK-NEXT:ret i32 [[TMP2]]
+; CHECK:   failure:
+; CHECK-NEXT:unreachable
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %iv.next, %backedge ], [ %len, %entry ]
+  %iv.next = add i32 %iv, -1
+  %cond_1 = icmp eq i32 %iv, 0
+  br i1 %cond_1, label %exit, label %backedge
+
+backedge:
+  %iv.next.wide = zext i32 %iv.next to i64
+  %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next.wide
+  %loaded = load atomic i32, i32* %addr unordered, align 4
+  %cond_2 = icmp eq i32 %loaded, 0
+  br i1 %cond_2, label %failure, label %loop
+
+exit:
+  ret i32 %iv.next
+
+failure:
+  unreachable
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 16bee4d - [Test] One CodeGen test showing missing opportunity on move elimination

2020-12-01 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-02T13:16:34+07:00
New Revision: 16bee4d36835982bf3aa58263c6974760a55010c

URL: 
https://github.com/llvm/llvm-project/commit/16bee4d36835982bf3aa58263c6974760a55010c
DIFF: 
https://github.com/llvm/llvm-project/commit/16bee4d36835982bf3aa58263c6974760a55010c.diff

LOG: [Test] One CodeGen test showing missing opportunity on move elimination

Added: 
llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll

Modified: 


Removed: 




diff  --git a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll 
b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
new file mode 100644
index ..510301d9b3b3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; TODO: We can get rid of movq here by using 
diff erent offset and %rax.
+define i32 @test(i32* %p, i64 %len, i32 %x) {
+; CHECK-LABEL: test:
+; CHECK:   ## %bb.0: ## %entry
+; CHECK-NEXT:movq %rsi, %rax
+; CHECK-NEXT:.p2align 4, 0x90
+; CHECK-NEXT:  LBB0_1: ## %loop
+; CHECK-NEXT:## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:subq $1, %rax
+; CHECK-NEXT:jb LBB0_4
+; CHECK-NEXT:  ## %bb.2: ## %backedge
+; CHECK-NEXT:## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:cmpl %edx, -4(%rdi,%rsi,4)
+; CHECK-NEXT:movq %rax, %rsi
+; CHECK-NEXT:jne LBB0_1
+; CHECK-NEXT:  ## %bb.3: ## %failure
+; CHECK-NEXT:ud2
+; CHECK-NEXT:  LBB0_4: ## %exit
+; CHECK-NEXT:movl $-1, %eax
+; CHECK-NEXT:retq
+entry:
+  br label %loop
+
+loop: ; preds = %backedge, %entry
+  %iv = phi i64 [ %iv.next, %backedge ], [ %len, %entry ]
+  %iv.next = add nsw i64 %iv, -1
+  %cond_1 = icmp eq i64 %iv, 0
+  br i1 %cond_1, label %exit, label %backedge
+
+backedge: ; preds = %loop
+  %addr = getelementptr inbounds i32, i32* %p, i64 %iv.next
+  %loaded = load atomic i32, i32* %addr unordered, align 4
+  %cond_2 = icmp eq i32 %loaded, %x
+  br i1 %cond_2, label %failure, label %loop
+
+exit: ; preds = %loop
+  ret i32 -1
+
+failure:  ; preds = %backedge
+  unreachable
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4bd35cd - Revert "[IndVars] ICmpInst should not prevent IV widening"

2020-12-03 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-03T18:01:41+07:00
New Revision: 4bd35cdc3ae1874c6d070c5d410b3f591de54ee6

URL: 
https://github.com/llvm/llvm-project/commit/4bd35cdc3ae1874c6d070c5d410b3f591de54ee6
DIFF: 
https://github.com/llvm/llvm-project/commit/4bd35cdc3ae1874c6d070c5d410b3f591de54ee6.diff

LOG: Revert "[IndVars] ICmpInst should not prevent IV widening"

This reverts commit 0c9c6ddf17bb01ae350a899b3395bb078aa0c62e.

We are seeing some failures with this patch locally. Not clear
if it's causing them or just triggering a problem in another
place. Reverting while investigating.

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index e281c66a4267..d37fe74a0039 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1541,14 +1541,10 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
   auto AnotherOpExtKind = ExtKind;
 
-  // Check that all uses are either:
-  // - narrow def (in case of we are widening the IV increment);
-  // - single-input LCSSA Phis;
-  // - comparison of the chosen type;
-  // - extend of the chosen type (raison d'etre).
+  // Check that all uses are either s/zext, or narrow def (in case of we are
+  // widening the IV increment), or single-input LCSSA Phis.
   SmallVector ExtUsers;
   SmallVector LCSSAPhiUsers;
-  SmallVector ICmpUsers;
   for (Use &U : NarrowUse->uses()) {
 Instruction *User = cast(U.getUser());
 if (User == NarrowDef)
@@ -1562,19 +1558,6 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   LCSSAPhiUsers.push_back(LCSSAPhi);
   continue;
 }
-if (auto *ICmp = dyn_cast(User)) {
-  auto Pred = ICmp->getPredicate();
-  // We have 3 types of predicates: signed, unsigned and equality
-  // predicates. For equality, it's legal to widen icmp for either sign and
-  // zero extend. For sign extend, we can also do so for signed predicates,
-  // likeweise for zero extend we can widen icmp for unsigned predicates.
-  if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
-return false;
-  if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
-return false;
-  ICmpUsers.push_back(ICmp);
-  continue;
-}
 if (ExtKind == SignExtended)
   User = dyn_cast(User);
 else
@@ -1672,26 +1655,6 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
 User->replaceAllUsesWith(TruncPN);
 DeadInsts.emplace_back(User);
   }
-
-  for (ICmpInst *User : ICmpUsers) {
-Builder.SetInsertPoint(User);
-auto ExtendedOp = [&](Value * V)->Value * {
-  if (V == NarrowUse)
-return WideBO;
-  if (ExtKind == ZeroExtended)
-return Builder.CreateZExt(V, WideBO->getType());
-  else
-return Builder.CreateSExt(V, WideBO->getType());
-};
-auto Pred = User->getPredicate();
-auto *LHS = ExtendedOp(User->getOperand(0));
-auto *RHS = ExtendedOp(User->getOperand(1));
-auto *WideCmp =
-Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
-User->replaceAllUsesWith(WideCmp);
-DeadInsts.emplace_back(User);
-  }
-
   return true;
 }
 

diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index 94cd63257cf4..72e97682274e 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -795,36 +795,37 @@ failure:
   unreachable
 }
 
+; TODO: We can widen here despite the icmp user of %foo in guarded block.
 define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
 ; CHECK-LABEL: @test16_unsigned_pos1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
-; CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[X:%.*]] to i64
+; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[START]], -1
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; CHECK-NEXT:[[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP2]], -1
 ; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
 ; CHECK:   guarded:
-; CHECK-NEXT:[[ICMP_USER_WIDE4:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT:br i1 [[ICMP_USER_WIDE4]], label [[BACKEDGE]], l

[llvm-branch-commits] [llvm] 3df0dac - [IndVars] Quick fix LHS/RHS bug

2020-12-03 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-04T12:34:42+07:00
New Revision: 3df0daceb2c684c8bd704595e70c08500bb051ed

URL: 
https://github.com/llvm/llvm-project/commit/3df0daceb2c684c8bd704595e70c08500bb051ed
DIFF: 
https://github.com/llvm/llvm-project/commit/3df0daceb2c684c8bd704595e70c08500bb051ed.diff

LOG: [IndVars] Quick fix LHS/RHS bug

The code relies on fact that LHS is the NarrowDef but never
really checks it. Adding the conservative restrictive check,
will follow-up with handling of case where RHS is a NarrowDef.

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index d37fe74a0039..8842dfed3672 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1595,6 +1595,9 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse 
DU) {
   return false;
 const SCEV *LHS = SE->getSCEV(OBO->getOperand(0));
 const SCEV *RHS = SE->getSCEV(OBO->getOperand(1));
+// TODO: Support case for NarrowDef = NarrowUse->getOperand(1).
+if (NarrowUse->getOperand(0) != NarrowDef)
+  return false;
 if (!SE->isKnownNegative(RHS))
   return false;
 bool ProvedSubNUW = SE->isKnownPredicateAt(



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 12b6c5e - Return "[IndVars] ICmpInst should not prevent IV widening"

2020-12-03 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-04T12:34:43+07:00
New Revision: 12b6c5e68282be7beac597300cf90a9d6ae3e1dd

URL: 
https://github.com/llvm/llvm-project/commit/12b6c5e68282be7beac597300cf90a9d6ae3e1dd
DIFF: 
https://github.com/llvm/llvm-project/commit/12b6c5e68282be7beac597300cf90a9d6ae3e1dd.diff

LOG: Return "[IndVars] ICmpInst should not prevent IV widening"

This reverts commit 4bd35cdc3ae1874c6d070c5d410b3f591de54ee6.

The patch was reverted during the investigation. The investigation
shown that the patch did not cause any trouble, but just exposed
the existing problem that is addressed by the previous patch
"[IndVars] Quick fix LHS/RHS bug". Returning without changes.

Added: 


Modified: 
llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll

Removed: 




diff  --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp 
b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 8842dfed3672..c02264aec600 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -1541,10 +1541,14 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   bool CanZeroExtend = ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap();
   auto AnotherOpExtKind = ExtKind;
 
-  // Check that all uses are either s/zext, or narrow def (in case of we are
-  // widening the IV increment), or single-input LCSSA Phis.
+  // Check that all uses are either:
+  // - narrow def (in case of we are widening the IV increment);
+  // - single-input LCSSA Phis;
+  // - comparison of the chosen type;
+  // - extend of the chosen type (raison d'etre).
   SmallVector ExtUsers;
   SmallVector LCSSAPhiUsers;
+  SmallVector ICmpUsers;
   for (Use &U : NarrowUse->uses()) {
 Instruction *User = cast(U.getUser());
 if (User == NarrowDef)
@@ -1558,6 +1562,19 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
   LCSSAPhiUsers.push_back(LCSSAPhi);
   continue;
 }
+if (auto *ICmp = dyn_cast(User)) {
+  auto Pred = ICmp->getPredicate();
+  // We have 3 types of predicates: signed, unsigned and equality
+  // predicates. For equality, it's legal to widen icmp for either sign and
+  // zero extend. For sign extend, we can also do so for signed predicates,
+  // likeweise for zero extend we can widen icmp for unsigned predicates.
+  if (ExtKind == ZeroExtended && ICmpInst::isSigned(Pred))
+return false;
+  if (ExtKind == SignExtended && ICmpInst::isUnsigned(Pred))
+return false;
+  ICmpUsers.push_back(ICmp);
+  continue;
+}
 if (ExtKind == SignExtended)
   User = dyn_cast(User);
 else
@@ -1658,6 +1675,26 @@ bool 
WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) {
 User->replaceAllUsesWith(TruncPN);
 DeadInsts.emplace_back(User);
   }
+
+  for (ICmpInst *User : ICmpUsers) {
+Builder.SetInsertPoint(User);
+auto ExtendedOp = [&](Value * V)->Value * {
+  if (V == NarrowUse)
+return WideBO;
+  if (ExtKind == ZeroExtended)
+return Builder.CreateZExt(V, WideBO->getType());
+  else
+return Builder.CreateSExt(V, WideBO->getType());
+};
+auto Pred = User->getPredicate();
+auto *LHS = ExtendedOp(User->getOperand(0));
+auto *RHS = ExtendedOp(User->getOperand(1));
+auto *WideCmp =
+Builder.CreateICmp(Pred, LHS, RHS, User->getName() + ".wide");
+User->replaceAllUsesWith(WideCmp);
+DeadInsts.emplace_back(User);
+  }
+
   return true;
 }
 

diff  --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll 
b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index 72e97682274e..94cd63257cf4 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -795,37 +795,36 @@ failure:
   unreachable
 }
 
-; TODO: We can widen here despite the icmp user of %foo in guarded block.
 define i32 @test16_unsigned_pos1(i32 %start, i32* %p, i32* %q, i32 %x) {
 ; CHECK-LABEL: @test16_unsigned_pos1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:[[TMP1:%.*]] = add i32 [[START]], -1
+; CHECK-NEXT:[[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
+; CHECK-NEXT:[[TMP2:%.*]] = zext i32 [[X:%.*]] to i64
 ; CHECK-NEXT:br label [[LOOP:%.*]]
 ; CHECK:   loop:
 ; CHECK-NEXT:[[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], 
[[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:[[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0
-; CHECK-NEXT:[[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:[[FOO:%.*]] = add i32 [[TMP2]], -1
+; CHECK-NEXT:[[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]]
 ; CHECK:   guarded:
-; CHECK-NEXT:[[ICMP_USER3:%.*]] = icmp ult i

[llvm-branch-commits] [llvm] 55009a0 - [Test] Auto-update test checks

2020-12-07 Thread Max Kazantsev via llvm-branch-commits

Author: Max Kazantsev
Date: 2020-12-07T18:33:47+07:00
New Revision: 55009a0ff8bc3ff9ec91075726d44579dedaf8d3

URL: 
https://github.com/llvm/llvm-project/commit/55009a0ff8bc3ff9ec91075726d44579dedaf8d3
DIFF: 
https://github.com/llvm/llvm-project/commit/55009a0ff8bc3ff9ec91075726d44579dedaf8d3.diff

LOG: [Test] Auto-update test checks

Added: 


Modified: 
llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll

Removed: 




diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll 
b/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll
index a69d6adc0f03..dd99b4ea1e0f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; We find it is very bad to allow LSR formula containing SCEVAddRecExpr Reg
 ; from siblings of current loop. When one loop is LSR optimized, it can
@@ -9,10 +10,75 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 @cond = common local_unnamed_addr global i64 0, align 8
 
 ; Check there is no extra lsr.iv generated in foo.
+define void @foo(i64 %N) local_unnamed_addr {
 ; CHECK-LABEL: @foo(
-; CHECK-NOT: lsr.iv{{[0-9]+}} =
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:br label [[DO_BODY:%.*]]
+; CHECK:   do.body:
+; CHECK-NEXT:[[I_0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], 
[[DO_BODY]] ]
+; CHECK-NEXT:tail call void @goo(i64 [[I_0]], i64 [[I_0]])
+; CHECK-NEXT:[[INC]] = add nuw nsw i64 [[I_0]], 1
+; CHECK-NEXT:[[T0:%.*]] = load i64, i64* @cond, align 8
+; CHECK-NEXT:[[TOBOOL:%.*]] = icmp eq i64 [[T0]], 0
+; CHECK-NEXT:br i1 [[TOBOOL]], label [[DO_BODY2_PREHEADER:%.*]], label 
[[DO_BODY]]
+; CHECK:   do.body2.preheader:
+; CHECK-NEXT:br label [[DO_BODY2:%.*]]
+; CHECK:   do.body2:
+; CHECK-NEXT:[[I_1:%.*]] = phi i64 [ [[INC3:%.*]], [[DO_BODY2]] ], [ 0, 
[[DO_BODY2_PREHEADER]] ]
+; CHECK-NEXT:[[TMP0:%.*]] = add i64 [[INC]], [[I_1]]
+; CHECK-NEXT:tail call void @goo(i64 [[I_1]], i64 [[TMP0]])
+; CHECK-NEXT:[[INC3]] = add nuw nsw i64 [[I_1]], 1
+; CHECK-NEXT:[[T1:%.*]] = load i64, i64* @cond, align 8
+; CHECK-NEXT:[[TOBOOL6:%.*]] = icmp eq i64 [[T1]], 0
+; CHECK-NEXT:br i1 [[TOBOOL6]], label [[DO_BODY8_PREHEADER:%.*]], label 
[[DO_BODY2]]
+; CHECK:   do.body8.preheader:
+; CHECK-NEXT:[[TMP1:%.*]] = add i64 [[INC]], [[INC3]]
+; CHECK-NEXT:br label [[DO_BODY8:%.*]]
+; CHECK:   do.body8:
+; CHECK-NEXT:[[I_2:%.*]] = phi i64 [ [[INC9:%.*]], [[DO_BODY8]] ], [ 0, 
[[DO_BODY8_PREHEADER]] ]
+; CHECK-NEXT:[[J_2:%.*]] = phi i64 [ [[INC10:%.*]], [[DO_BODY8]] ], [ 
[[TMP1]], [[DO_BODY8_PREHEADER]] ]
+; CHECK-NEXT:tail call void @goo(i64 [[I_2]], i64 [[J_2]])
+; CHECK-NEXT:[[INC9]] = add nuw nsw i64 [[I_2]], 1
+; CHECK-NEXT:[[INC10]] = add nsw i64 [[J_2]], 1
+; CHECK-NEXT:[[T2:%.*]] = load i64, i64* @cond, align 8
+; CHECK-NEXT:[[TOBOOL12:%.*]] = icmp eq i64 [[T2]], 0
+; CHECK-NEXT:br i1 [[TOBOOL12]], label [[DO_BODY14_PREHEADER:%.*]], label 
[[DO_BODY8]]
+; CHECK:   do.body14.preheader:
+; CHECK-NEXT:br label [[DO_BODY14:%.*]]
+; CHECK:   do.body14:
+; CHECK-NEXT:[[I_3:%.*]] = phi i64 [ [[INC15:%.*]], [[DO_BODY14]] ], [ 0, 
[[DO_BODY14_PREHEADER]] ]
+; CHECK-NEXT:[[J_3:%.*]] = phi i64 [ [[INC16:%.*]], [[DO_BODY14]] ], [ 
[[INC10]], [[DO_BODY14_PREHEADER]] ]
+; CHECK-NEXT:tail call void @goo(i64 [[I_3]], i64 [[J_3]])
+; CHECK-NEXT:[[INC15]] = add nuw nsw i64 [[I_3]], 1
+; CHECK-NEXT:[[INC16]] = add nsw i64 [[J_3]], 1
+; CHECK-NEXT:[[T3:%.*]] = load i64, i64* @cond, align 8
+; CHECK-NEXT:[[TOBOOL18:%.*]] = icmp eq i64 [[T3]], 0
+; CHECK-NEXT:br i1 [[TOBOOL18]], label [[DO_BODY20_PREHEADER:%.*]], label 
[[DO_BODY14]]
+; CHECK:   do.body20.preheader:
+; CHECK-NEXT:br label [[DO_BODY20:%.*]]
+; CHECK:   do.body20:
+; CHECK-NEXT:[[I_4:%.*]] = phi i64 [ [[INC21:%.*]], [[DO_BODY20]] ], [ 0, 
[[DO_BODY20_PREHEADER]] ]
+; CHECK-NEXT:[[J_4:%.*]] = phi i64 [ [[INC22:%.*]], [[DO_BODY20]] ], [ 
[[INC16]], [[DO_BODY20_PREHEADER]] ]
+; CHECK-NEXT:tail call void @goo(i64 [[I_4]], i64 [[J_4]])
+; CHECK-NEXT:[[INC21]] = add nuw nsw i64 [[I_4]], 1
+; CHECK-NEXT:[[INC22]] = add nsw i64 [[J_4]], 1
+; CHECK-NEXT:[[T4:%.*]] = load i64, i64* @cond, align 8
+; CHECK-NEXT:[[TOBOOL24:%.*]] = icmp eq i64 [[T4]], 0
+; CHECK-NEXT:br i1 [[TOBOOL24]], label [[DO_BODY26_PREHEADER:%.*]], label 
[[DO_BODY20]]
+; CHECK:   do.body26.preheader:
+; CHECK-NEXT:br label [[DO_BODY26:%.*]]
+; CHECK:   do.body26:
+; CHECK-NEXT:[[I_5:%.*]] = phi i64 [ [[INC27:%.*]], [[DO_BODY26]] ], [ 0, 
[[DO_BODY26_PREHEADER]] ]
+; CHECK-NEXT:[[J_5:%.*]] = phi i64 [ [[INC28:%.*]], [