LGTM Ruiling
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > rander.wang > Sent: Friday, June 16, 2017 9:50 AM > To: [email protected] > Cc: Wang, Rander <[email protected]> > Subject: [Beignet] [PATCH V2] backend: refine load/store merging algorithm > > Now it works for sequence: load(0), load(1), load(2) > but it cant work for load(2), load(0), load(1). because > it compared the last merged load and the new one not all > the loads > > for sequence: load(0), load(1), load(2). the load(0) is the > start, can find that load(1) is successor without space, so > put it to a merge fifo. then the start is moving to the top > of fifo load(1), and compared with load(2). Also load(2) can be merged > > for load(2), load(0), load(1). load(2) cant be merged with > load(0) for a space between them. So skip load(0) and mov to next > load(1).And this load(1) can be merged. But it never go back merge > load(0) > > Now change the algorithm. > (1) find all loads maybe merged arround the start by the distance to > the start. the distance is depended on data type, for 32bit data, > the > distance is 4. Put them in a list > > (2) sort the list by the distance from the start. > > (3) search the continuous sequence including the start to merge > > V2: (1)refine the sort and compare algoritm. First find all the IO > in small offset compared to start. Then call std:sort > (2)check the number of candidate IO to be favorable to performance > for most cases there is no chance to merge IO > > Signed-off-by: rander.wang <[email protected]> > --- > backend/src/llvm/llvm_loadstore_optimization.cpp | 87 > +++++++++++++++++++++--- > 1 file changed, 78 insertions(+), 9 deletions(-) > > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp > b/backend/src/llvm/llvm_loadstore_optimization.cpp > index 5aa38be..c91c1a0 100644 > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp > @@ -67,7 +67,7 @@ namespace gbe { > bool isSimpleLoadStore(Value *I); > bool optimizeLoadStore(BasicBlock &BB); > > - bool isLoadStoreCompatible(Value *A, Value *B); > + bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* > elementSize, > int maxVecSize); > void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > bool findConsecutiveAccess(BasicBlock &BB, > @@ -109,7 +109,7 @@ namespace gbe { > return NULL; > } > > - bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B) { > + bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, Value *B, > int *dist, int* elementSize, int maxVecSize) { > Value *ptrA = getPointerOperand(A); > Value *ptrB = getPointerOperand(B); > unsigned ASA = getAddressSpace(A); > @@ -136,7 +136,11 @@ namespace gbe { > // The Instructions are connsecutive if the size of the first load/store > is > // the same as the offset. > int64_t sz = TD->getTypeStoreSize(Ty); > - return ((-offset) == sz); > + *dist = -offset; > + *elementSize = sz; > + > + //a insn with small distance from the search load/store is a candidate > one > + return (abs(-offset) < sz*maxVecSize); > } > > void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, > SmallVector<Instruction*, 16> &merged) { > @@ -163,6 +167,25 @@ namespace gbe { > values[i]->replaceAllUsesWith(S); > } > } > + > + class mergedInfo{ > + public: > + Instruction* mInsn; > + int mOffset; > + > + void init(Instruction* insn, int offset) > + { > + mInsn = insn; > + mOffset = offset; > + } > + }; > + > + struct offsetSorter { > + bool operator()(mergedInfo* m0, mergedInfo* m1) const { > + return m0->mOffset < m1->mOffset; > + } > + }; > + > // When searching for consecutive memory access, we do it in a small > window, > // if the window is too large, it would take up too much compiling time. > // An Important rule we have followed is don't try to change load/store > order. > @@ -177,7 +200,6 @@ namespace gbe { > > if(!isSimpleLoadStore(&*start)) return false; > > - merged.push_back(&*start); > unsigned targetAddrSpace = getAddressSpace(&*start); > > BasicBlock::iterator E = BB.end(); > @@ -187,11 +209,27 @@ namespace gbe { > unsigned maxLimit = maxVecSize * 8; > bool reordered = false; > > - for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) { > - if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) { > - if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) { > - merged.push_back(&*J); > - } > + bool ready = false; > + int elementSize; > + > + SmallVector<mergedInfo *, 16> searchInsnArray; > + mergedInfo meInfoArray[16]; > + int indx = 0; > + meInfoArray[indx++].init(&*start, 0); > + > + searchInsnArray.push_back(&meInfoArray[0]); > + BasicBlock::iterator I = start; > + ++I; > + > + for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) { > + if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) { > + int distance; > + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance, > &elementSize, maxVecSize)) > + { > + meInfoArray[indx].init(&*I, distance); > + searchInsnArray.push_back(&meInfoArray[indx]); > + indx++; > + } > } else if((isLoad && isa<StoreInst>(*J))) { > // simple stop to keep read/write order > StoreInst *st = cast<StoreInst>(&*J); > @@ -214,6 +252,37 @@ namespace gbe { > if(merged.size() >= maxVecSize) break; > } > > + if(indx > 1) > + { > + //try to sort the load/store by the offset from the start > + //the farthest is at the beginning. this is easy to find the > + //continuous load/store > + std::sort(searchInsnArray.begin(), searchInsnArray.end(), > offsetSorter()); > + > + // try to find continuous loadstore insn in the candidate array > + for(unsigned i = 0; i < searchInsnArray.size(); i++) > + { > + unsigned j; > + for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); j++) > + { > + if(searchInsnArray[i+j+1]->mOffset - searchInsnArray[i+j]->mOffset > != > elementSize) > + break; > + > + //this means the search load/store which offset is 0, is in the > sequence > + if(searchInsnArray[i+j]->mOffset == 0 || > searchInsnArray[i+j+1]->mOffset > == 0) > + ready = true; > + } > + > + if(j > 0 && ready) > + { > + for(unsigned k = 0; k < j+1; k++) > + merged.push_back(searchInsnArray[i+k]->mInsn); > + > + break; > + } > + } > + } > + > return reordered; > } > > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
