This patchset introduces one regression at utest's compiler_time_stamp. Not sure what's the root cause, will continue to investigate it when I back from the vacation next week.
> -----Original Message----- > From: Gong, Zhigang > Sent: Tuesday, September 30, 2014 12:17 PM > To: [email protected] > Cc: Gong, Zhigang > Subject: [PATCH 2/2] GBE: Add a customized loop unrolling handling > mechanism. > > By default, the unrolling threshold is relatively small. > Thus some relative large loops which access private array will not be > unrolled, > thus those private array can't be scalarized latter. And the private array is > allocated in stack which is extreme slow for Gen backend currently. > > To increase the unrolling threshold for all loops is not a good idea, as most > of > the loops don't need to do unrolling for this purpose and a large unrolling > threshold will cause a big code size and unecessary big register pressure > which > may lead to register spilling. > > So this patch introduce a trade-off pass to identify those loops which still > have > private load/store in the outer most of the loop. Then add a metadata to it to > indicate aggresive unrolling on those loops. Then do another round loop > unrolling. > > This patch with the previous small patch, can bring significant performance > improvement for some cases. I just tested with some opencv test cases, and > observed it can bring 2x to 10x improvement. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/CMakeLists.txt | 1 + > backend/src/llvm/llvm_gen_backend.hpp | 4 + > backend/src/llvm/llvm_to_gen.cpp | 12 +- > backend/src/llvm/llvm_unroll.cpp | 229 > ++++++++++++++++++++++++++++++++++ > 4 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 > backend/src/llvm/llvm_unroll.cpp > > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index > e57227a..e408365 100644 > --- a/backend/src/CMakeLists.txt > +++ b/backend/src/CMakeLists.txt > @@ -84,6 +84,7 @@ set (GBE_SRC > llvm/llvm_loadstore_optimization.cpp > llvm/llvm_gen_backend.hpp > llvm/llvm_gen_ocl_function.hxx > + llvm/llvm_unroll.cpp > llvm/llvm_to_gen.hpp > backend/gen/gen_mesa_disasm.c > backend/gen_insn_selection.cpp > diff --git a/backend/src/llvm/llvm_gen_backend.hpp > b/backend/src/llvm/llvm_gen_backend.hpp > index f73aafe..76b60df 100644 > --- a/backend/src/llvm/llvm_gen_backend.hpp > +++ b/backend/src/llvm/llvm_gen_backend.hpp > @@ -27,6 +27,7 @@ > #define __GBE_LLVM_GEN_BACKEND_HPP__ > > #include "llvm/Pass.h" > +#include "llvm/Analysis/LoopPass.h" > #include "sys/platform.hpp" > #include "sys/map.hpp" > #include "sys/hash_map.hpp" > @@ -98,6 +99,9 @@ namespace gbe > /*! Passer the printf function call. */ > llvm::FunctionPass* createPrintfParserPass(); > > + /* customized loop unrolling pass. */ llvm::LoopPass > + *createCustomLoopUnrollPass(); > + > /*! Add all the function call of ocl to our bitcode. */ > llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); > > diff --git a/backend/src/llvm/llvm_to_gen.cpp > b/backend/src/llvm/llvm_to_gen.cpp > index 26d2a49..6fbf41f 100644 > --- a/backend/src/llvm/llvm_to_gen.cpp > +++ b/backend/src/llvm/llvm_to_gen.cpp > @@ -152,9 +152,17 @@ namespace gbe > MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars > MPM.add(createLoopIdiomPass()); // Recognize idioms > like memset. > MPM.add(createLoopDeletionPass()); // Delete dead loops > - MPM.add(createLoopUnrollPass()); // Unroll small loops > - if(optLevel > 0) > + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll > loops > + if(optLevel > 0) { > + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); > + MPM.add(createGVNPass()); // Remove > redundancies > + } > + MPM.add(createCustomLoopUnrollPass()); //1024, 32, 1024, 512)); > //Unroll loops > + MPM.add(createLoopUnrollPass()); //1024, 32, 1024, 512)); //Unroll > loops > + if(optLevel > 0) { > + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); > MPM.add(createGVNPass()); // Remove > redundancies > + } > MPM.add(createMemCpyOptPass()); // Remove memcpy > / form memset > MPM.add(createSCCPPass()); // Constant prop > with SCCP > > diff --git a/backend/src/llvm/llvm_unroll.cpp > b/backend/src/llvm/llvm_unroll.cpp > new file mode 100644 > index 0000000..b71c0ab > --- /dev/null > +++ b/backend/src/llvm/llvm_unroll.cpp > @@ -0,0 +1,229 @@ > +/* > + * Copyright © 2012 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + */ > + > +#include <set> > +#include "llvm/Config/llvm-config.h" > +#if LLVM_VERSION_MINOR <= 2 > +#include "llvm/Function.h" > +#include "llvm/InstrTypes.h" > +#include "llvm/Instructions.h" > +#include "llvm/IntrinsicInst.h" > +#include "llvm/Module.h" > +#else > +#include "llvm/IR/Function.h" > +#include "llvm/IR/InstrTypes.h" > +#include "llvm/IR/Instructions.h" > +#include "llvm/IR/IntrinsicInst.h" > +#include "llvm/IR/Module.h" > +#endif /* LLVM_VERSION_MINOR <= 2 */ > +#include "llvm/Pass.h" > +#if LLVM_VERSION_MINOR <= 1 > +#include "llvm/Support/IRBuilder.h" > +#elif LLVM_VERSION_MINOR == 2 > +#include "llvm/IRBuilder.h" > +#else > +#include "llvm/IR/IRBuilder.h" > +#endif /* LLVM_VERSION_MINOR <= 1 */ > +#include "llvm/Support/raw_ostream.h" > +#include "llvm/PassManager.h" > +#include "llvm/Transforms/Scalar.h" > +#include "llvm/Analysis/ScalarEvolution.h" > +#include "llvm/Analysis/LoopPass.h" > +#include "llvm/Analysis/TargetTransformInfo.h" > +#include "llvm/IR/Dominators.h" > +#include "llvm/llvm_gen_backend.hpp" > +#include "sys/map.hpp" > + > + > +using namespace llvm; > + > +namespace gbe { > + class CustomLoopUnroll : public LoopPass > + { > + public: > + static char ID; > + CustomLoopUnroll() : > + LoopPass(ID) {} > + > + void getAnalysisUsage(AnalysisUsage &AU) const { > + AU.addRequired<LoopInfo>(); > + AU.addPreserved<LoopInfo>(); > + AU.addRequiredID(LoopSimplifyID); > + AU.addPreservedID(LoopSimplifyID); > + AU.addRequiredID(LCSSAID); > + AU.addPreservedID(LCSSAID); > + AU.addRequired<ScalarEvolution>(); > + AU.addPreserved<ScalarEvolution>(); > + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. > + // If loop unroll does not preserve dom info then LCSSA pass on next > + // loop will receive invalid dom info. > + // For now, recreate dom info, if loop is unrolled. > + AU.addPreserved<DominatorTreeWrapperPass>(); > + > + } > + > + // Returns the value associated with the given metadata node name > (for > + // example, "llvm.loop.unroll.count"). If no such named metadata > node > + // exists, then nullptr is returned. > + static const ConstantInt *GetUnrollMetadataValue(const Loop *L, > + StringRef > Name) { > + MDNode *LoopID = L->getLoopID(); > + if (!LoopID) return nullptr; > + > + // First operand should refer to the loop id itself. > + assert(LoopID->getNumOperands() > 0 && "requires at least one > operand"); > + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); > + > + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { > + const MDNode *MD = > dyn_cast<MDNode>(LoopID->getOperand(i)); > + if (!MD) continue; > + > + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); > + if (!S) continue; > + > + if (Name.equals(S->getString())) { > + assert(MD->getNumOperands() == 2 && > + "Unroll hint metadata should have two operands."); > + return cast<ConstantInt>(MD->getOperand(1)); > + } > + } > + return nullptr; > + } > + > + void setUnrollID(Loop *L, bool enable) { > + if (!enable && disabledLoops.find(L) != disabledLoops.end()) > + return; > + LLVMContext &Context = L->getHeader()->getContext(); > + SmallVector<Value *, 2> forceUnroll; > + forceUnroll.push_back(MDString::get(Context, > "llvm.loop.unroll.enable")); > + forceUnroll.push_back(ConstantInt::get(Type::getInt1Ty(Context), > enable)); > + MDNode *forceUnrollNode = MDNode::get(Context, forceUnroll); > + SmallVector<Value *, 4> Vals; > + Vals.push_back(NULL); > + Vals.push_back(forceUnrollNode); > + MDNode *NewLoopID = MDNode::get(Context, Vals); > + // Set operand 0 to refer to the loop id itself. > + NewLoopID->replaceOperandWith(0, NewLoopID); > + L->setLoopID(NewLoopID); > + if (!enable) > + disabledLoops.insert(L); > + } > + > + static bool hasPrivateLoadStore(Loop *L) { > + const std::vector<Loop*> subLoops = L->getSubLoops(); > + std::set<BasicBlock*> subBlocks, blocks; > + > + for(auto l : subLoops) > + for(auto bb : l->getBlocks()) > + subBlocks.insert(bb); > + for(auto bb : L->getBlocks()) > + if (subBlocks.find(bb) == subBlocks.end()) > + blocks.insert(bb); > + for(auto bb : blocks) { > + for (BasicBlock::iterator inst = bb->begin(), instE = bb->end(); > inst != instE; ++inst) { > + unsigned addrSpace = 0; > + if (isa<LoadInst>(*inst)) { > + LoadInst *ld = cast<LoadInst>(&*inst); > + addrSpace = ld->getPointerAddressSpace(); > + } > + else if (isa<StoreInst>(*inst)) { > + StoreInst *st = cast<StoreInst>(&*inst); > + addrSpace = st->getPointerAddressSpace(); > + } > + if (addrSpace == 0) > + return true; > + } > + } > + return false; > + } > + // If one loop has very large self trip count > + // we don't want to unroll it. > + // self trip count means trip count divide by the parent's trip count. > for > example > + // for (int i = 0; i < 16; i++) { > + // for (int j = 0; j < 4; j++) { > + // for (int k = 0; k < 2; k++) { > + // ... > + // } > + // ... > + // } > + // The inner loops j and k could be unrolled, but the loop i will not > be > unrolled. > + // The return value true means the L could be unrolled, otherwise, it > could not > + // be unrolled. > + bool handleParentLoops(Loop *L, LPPassManager &LPM) { > + Loop *currL = L; > + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); > + BasicBlock *latchBlock = currL->getLoopLatch(); > + unsigned currTripCount = 0; > + bool shouldUnroll = true; > + if (latchBlock) > + currTripCount = SE->getSmallConstantTripCount(L, latchBlock); > + > + while(currL) { > + Loop *parentL = currL->getParentLoop(); > + unsigned parentTripCount = 0; > + if (parentL) { > + BasicBlock *parentLatchBlock = parentL->getLoopLatch(); > + if (parentLatchBlock) > + parentTripCount = SE->getSmallConstantTripCount(parentL, > parentLatchBlock); > + } > + if (parentTripCount == 0 || currTripCount == 0 || currTripCount / > parentTripCount > 16) { > + if (currL == L) > + shouldUnroll = false; > + setUnrollID(currL, false); > + if (currL != L) > + LPM.deleteLoopFromQueue(currL); > + } > + currL = parentL; > + currTripCount = parentTripCount; > + } > + return shouldUnroll; > + } > + > + // Analyze the outermost BBs of this loop, if there are > + // some private load or store, we change it's loop meta data > + // to indicate more aggresive unrolling on it. > + virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { > + const ConstantInt *Enable = GetUnrollMetadataValue(L, > "llvm.loop.unroll.enable"); > + if (Enable) > + return false; > + const ConstantInt *Count = GetUnrollMetadataValue(L, > "llvm.loop.unroll.count"); > + if (Count) > + return false; > + > + if (!handleParentLoops(L, LPM)) > + return false; > + > + if (!hasPrivateLoadStore(L)) > + return false; > + setUnrollID(L, true); > + return true; > + } > + > + virtual const char *getPassName() const { > + return "SPIR backend: custom loop unrolling pass"; > + } > + private: > + std::set<Loop *> disabledLoops; > + > + }; > + > + char CustomLoopUnroll::ID = 0; > + > + LoopPass *createCustomLoopUnrollPass() { > + return new CustomLoopUnroll(); > + } > +} // end namespace > -- > 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
