[llvm-branch-commits] [llvm] 638a188 - [LV] Generate RT checks up-front and remove them if required.

2020-09-13 Thread Florian Hahn via llvm-branch-commits

Author: Florian Hahn
Date: 2020-09-13T18:47:56+01:00
New Revision: 638a188b6a0262fe26ad62353d71cdd384c40bd9

URL: 
https://github.com/llvm/llvm-project/commit/638a188b6a0262fe26ad62353d71cdd384c40bd9
DIFF: 
https://github.com/llvm/llvm-project/commit/638a188b6a0262fe26ad62353d71cdd384c40bd9.diff

LOG: [LV] Generate RT checks up-front and remove them if required.

Differential Revision: https://reviews.llvm.org/D75980

Added: 
llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll

Modified: 
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Utils/LoopVersioning.cpp
llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll

llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll

Removed: 




diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h 
b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 70c8c84c857b..bb72c19f8532 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -35,6 +35,7 @@ class MemorySSAUpdater;
 class OptimizationRemarkEmitter;
 class PredIteratorCache;
 class ScalarEvolution;
+class ScalarEvolutionExpander;
 class SCEV;
 class SCEVExpander;
 class TargetLibraryInfo;
@@ -446,7 +447,7 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
 std::pair
 addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
  const SmallVectorImpl &PointerChecks,
- ScalarEvolution *SE);
+ SCEVExpander &Expander);
 
 } // end namespace llvm
 

diff  --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h 
b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 77360cb2671d..829bdcbb2588 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
@@ -199,6 +200,8 @@ class SCEVExpander : public SCEVVisitor {
 ChainedPhis.clear();
   }
 
+  ScalarEvolution *getSE() { return &SE; }
+
   /// Return a vector containing all instructions inserted during expansion.
   SmallVector getAllInsertedInstructions() const {
 SmallVector Result;
@@ -509,10 +512,12 @@ class SCEVExpanderCleaner {
   SCEVExpanderCleaner(SCEVExpander &Expander, DominatorTree &DT)
   : Expander(Expander), DT(DT), ResultUsed(false) {}
 
-  ~SCEVExpanderCleaner();
+  ~SCEVExpanderCleaner() { cleanup(); }
 
   /// Indicate that the result of the expansion is used.
   void markResultUsed() { ResultUsed = true; }
+
+  void cleanup();
 };
 } // namespace llvm
 

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp 
b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index d7cd9b19b8d5..dd808619f67a 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1574,7 +1574,8 @@ struct PointerBounds {
 /// in \p TheLoop.  \return the values for the bounds.
 static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
   Loop *TheLoop, Instruction *Loc,
-  SCEVExpander &Exp, ScalarEvolution *SE) {
+  SCEVExpander &Exp) {
+  ScalarEvolution *SE = Exp.getSE();
   // TODO: Add helper to retrieve pointers to CG.
   Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
   const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1613,16 +1614,15 @@ static PointerBounds expandBounds(const 
RuntimeCheckingPtrGroup *CG,
 /// lower bounds for both pointers in the check.
 static SmallVector, 4>
 expandBounds(const SmallVectorImpl &PointerChecks, Loop 
*L,
- Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {
+ Instruction *Loc, SCEVExpander &Exp) {
   SmallVector, 4> ChecksWithBounds;
 
   // Here we're relying on the SCEV Expander's cache to only emit code for the
   // same bounds once.
   transform(PointerChecks, std::back_inserter(ChecksWithBounds),
 [&](const RuntimePointerCheck &Check) {
-  PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),
-Second =
-expandBounds(Check.second, L, Loc, Exp, SE);
+  PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
+Second = expandBounds(Check.second, L, Loc, Exp);
   return std::make_pair(First, Second);
 });
 
@@ -1632,

[llvm-branch-commits] [llvm] d96c31f - [LV] Allow large RT checks, if they are a fraction of the scalar cost.

2020-09-13 Thread Florian Hahn via llvm-branch-commits

Author: Florian Hahn
Date: 2020-09-13T18:48:41+01:00
New Revision: d96c31f3b74cf84a294132012aa2b9289aa950b4

URL: 
https://github.com/llvm/llvm-project/commit/d96c31f3b74cf84a294132012aa2b9289aa950b4
DIFF: 
https://github.com/llvm/llvm-project/commit/d96c31f3b74cf84a294132012aa2b9289aa950b4.diff

LOG: [LV] Allow large RT checks, if they are a fraction of the scalar cost.

Differential Revision: https://reviews.llvm.org/D75981

Added: 

llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll

Modified: 
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Removed: 




diff  --git 
a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h 
b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 46d107128ce1..e5b519bc0928 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -171,7 +171,8 @@ class LoopVectorizationRequirements {
 
   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 
-  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
+  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints,
+   bool CanIgnoreRTThreshold);
 
 private:
   unsigned NumRuntimePointerChecks = 0;

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 157620c30b98..da88272e9d03 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -242,8 +242,9 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata 
*Arg) {
   }
 }
 
-bool LoopVectorizationRequirements::doesNotMeet(
-Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+bool LoopVectorizationRequirements::doesNotMeet(Function *F, Loop *L,
+const LoopVectorizeHints 
&Hints,
+bool IgnoreRTThreshold) {
   const char *PassName = Hints.vectorizeAnalysisPassName();
   bool Failed = false;
   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
@@ -262,8 +263,12 @@ bool LoopVectorizationRequirements::doesNotMeet(
   NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
   bool ThresholdReached =
   NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
-  if ((ThresholdReached && !Hints.allowReordering()) ||
-  PragmaThresholdReached) {
+  bool DoubleThresholdReached =
+  NumRuntimePointerChecks >
+  2 * VectorizerParams::RuntimeMemoryCheckThreshold;
+  if ((!IgnoreRTThreshold && ((ThresholdReached && !Hints.allowReordering()) ||
+  PragmaThresholdReached)) ||
+  (DoubleThresholdReached && !Hints.allowReordering())) {
 ORE.emit([&]() {
   return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
 L->getStartLoc(),

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp 
b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f9a0e6f35f50..b4ba9e5f8684 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -411,7 +411,9 @@ static Optional 
getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) {
 
   return None;
 }
+
 struct GeneratedRTChecks;
+
 namespace llvm {
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
@@ -1432,9 +1434,6 @@ class LoopVectorizationCostModel {
 Scalars.clear();
   }
 
-private:
-  unsigned NumPredStores = 0;
-
   /// \return An upper bound for the vectorization factor, a power-of-2 larger
   /// than zero. One is returned if vectorization should best be avoided due
   /// to cost.
@@ -1449,16 +1448,21 @@ class LoopVectorizationCostModel {
   /// actually taken place).
   using VectorizationCostTy = std::pair;
 
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
+  float ScalarCost;
+
+private:
+  unsigned NumPredStores = 0;
+
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare 
diff erent
   /// vector widths. The cost that is returned is *not* normalized by
   /// the factor width.
   VectorizationCostTy expectedCost(ElementCount VF);
 
-  /// Returns the execution time cost of an instruction for a given vector
-  /// width. Vector width of one means scalar.
-  VectorizationCostTy getInstructionCost(Instructio