[llvm-branch-commits] [llvm] 638a188 - [LV] Generate RT checks up-front and remove them if required.
Author: Florian Hahn Date: 2020-09-13T18:47:56+01:00 New Revision: 638a188b6a0262fe26ad62353d71cdd384c40bd9 URL: https://github.com/llvm/llvm-project/commit/638a188b6a0262fe26ad62353d71cdd384c40bd9 DIFF: https://github.com/llvm/llvm-project/commit/638a188b6a0262fe26ad62353d71cdd384c40bd9.diff LOG: [LV] Generate RT checks up-front and remove them if required. Differential Revision: https://reviews.llvm.org/D75980 Added: llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll Modified: llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Utils/LoopVersioning.cpp llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll Removed: diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 70c8c84c857b..bb72c19f8532 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -35,6 +35,7 @@ class MemorySSAUpdater; class OptimizationRemarkEmitter; class PredIteratorCache; class ScalarEvolution; +class ScalarEvolutionExpander; class SCEV; class SCEVExpander; class TargetLibraryInfo; @@ -446,7 +447,7 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, std::pair addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl &PointerChecks, - ScalarEvolution *SE); + SCEVExpander &Expander); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 77360cb2671d..829bdcbb2588 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" @@ -199,6 +200,8 @@ class SCEVExpander : public SCEVVisitor { ChainedPhis.clear(); } + ScalarEvolution *getSE() { return &SE; } + /// Return a vector containing all instructions inserted during expansion. SmallVector getAllInsertedInstructions() const { SmallVector Result; @@ -509,10 +512,12 @@ class SCEVExpanderCleaner { SCEVExpanderCleaner(SCEVExpander &Expander, DominatorTree &DT) : Expander(Expander), DT(DT), ResultUsed(false) {} - ~SCEVExpanderCleaner(); + ~SCEVExpanderCleaner() { cleanup(); } /// Indicate that the result of the expansion is used. void markResultUsed() { ResultUsed = true; } + + void cleanup(); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index d7cd9b19b8d5..dd808619f67a 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1574,7 +1574,8 @@ struct PointerBounds { /// in \p TheLoop. \return the values for the bounds. static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, - SCEVExpander &Exp, ScalarEvolution *SE) { + SCEVExpander &Exp) { + ScalarEvolution *SE = Exp.getSE(); // TODO: Add helper to retrieve pointers to CG. Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue; const SCEV *Sc = SE->getSCEV(Ptr); @@ -1613,16 +1614,15 @@ static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG, /// lower bounds for both pointers in the check. static SmallVector, 4> expandBounds(const SmallVectorImpl &PointerChecks, Loop *L, - Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) { + Instruction *Loc, SCEVExpander &Exp) { SmallVector, 4> ChecksWithBounds; // Here we're relying on the SCEV Expander's cache to only emit code for the // same bounds once. transform(PointerChecks, std::back_inserter(ChecksWithBounds), [&](const RuntimePointerCheck &Check) { - PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE), -Second = -expandBounds(Check.second, L, Loc, Exp, SE); + PointerBounds First = expandBounds(Check.first, L, Loc, Exp), +Second = expandBounds(Check.second, L, Loc, Exp); return std::make_pair(First, Second); }); @@ -1632,
[llvm-branch-commits] [llvm] d96c31f - [LV] Allow large RT checks, if they are a fraction of the scalar cost.
Author: Florian Hahn Date: 2020-09-13T18:48:41+01:00 New Revision: d96c31f3b74cf84a294132012aa2b9289aa950b4 URL: https://github.com/llvm/llvm-project/commit/d96c31f3b74cf84a294132012aa2b9289aa950b4 DIFF: https://github.com/llvm/llvm-project/commit/d96c31f3b74cf84a294132012aa2b9289aa950b4.diff LOG: [LV] Allow large RT checks, if they are a fraction of the scalar cost. Differential Revision: https://reviews.llvm.org/D75981 Added: llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll Modified: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp Removed: diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 46d107128ce1..e5b519bc0928 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -171,7 +171,8 @@ class LoopVectorizationRequirements { void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } - bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); + bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints, + bool CanIgnoreRTThreshold); private: unsigned NumRuntimePointerChecks = 0; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 157620c30b98..da88272e9d03 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -242,8 +242,9 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { } } -bool LoopVectorizationRequirements::doesNotMeet( -Function *F, Loop *L, const LoopVectorizeHints &Hints) { +bool LoopVectorizationRequirements::doesNotMeet(Function *F, Loop *L, +const LoopVectorizeHints &Hints, +bool IgnoreRTThreshold) { const char *PassName = Hints.vectorizeAnalysisPassName(); bool Failed = false; if (UnsafeAlgebraInst && !Hints.allowReordering()) { @@ -262,8 +263,12 @@ bool LoopVectorizationRequirements::doesNotMeet( NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; bool ThresholdReached = NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; - if ((ThresholdReached && !Hints.allowReordering()) || - PragmaThresholdReached) { + bool DoubleThresholdReached = + NumRuntimePointerChecks > + 2 * VectorizerParams::RuntimeMemoryCheckThreshold; + if ((!IgnoreRTThreshold && ((ThresholdReached && !Hints.allowReordering()) || + PragmaThresholdReached)) || + (DoubleThresholdReached && !Hints.allowReordering())) { ORE.emit([&]() { return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", L->getStartLoc(), diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f9a0e6f35f50..b4ba9e5f8684 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -411,7 +411,9 @@ static Optional getSmallBestKnownTC(ScalarEvolution &SE, Loop *L) { return None; } + struct GeneratedRTChecks; + namespace llvm { /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). @@ -1432,9 +1434,6 @@ class LoopVectorizationCostModel { Scalars.clear(); } -private: - unsigned NumPredStores = 0; - /// \return An upper bound for the vectorization factor, a power-of-2 larger /// than zero. One is returned if vectorization should best be avoided due /// to cost. @@ -1449,16 +1448,21 @@ class LoopVectorizationCostModel { /// actually taken place). using VectorizationCostTy = std::pair; + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + + float ScalarCost; + +private: + unsigned NumPredStores = 0; + /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare diff erent /// vector widths. The cost that is returned is *not* normalized by /// the factor width. VectorizationCostTy expectedCost(ElementCount VF); - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instructio