Ping for review including this 2 patches and the previous 2 patches. Thanks.
On Thu, Aug 28, 2014 at 10:46:03AM +0800, Zhigang Gong wrote: > Previous restrication is that the vector size must be multiple > of DWORD. This restrication prevent the vload2/3 of char or > vload3 of ushort to be optimized. This patch relax this restrication > on the vload path. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/backend/gen_context.cpp | 6 ++-- > backend/src/backend/gen_insn_selection.cpp | 39 > +++++++++++------------- > backend/src/llvm/llvm_gen_backend.cpp | 3 +- > backend/src/llvm/llvm_loadstore_optimization.cpp | 3 +- > 4 files changed, 24 insertions(+), 27 deletions(-) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index ba4a8f8..883fa39 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -1693,7 +1693,7 @@ namespace gbe > void GenContext::emitUnpackByteInstruction(const SelectionInstruction > &insn) { > const GenRegister src = ra->genReg(insn.src(0)); > for(uint32_t i = 0; i < insn.dstNum; i++) { > - p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, > insn.dstNum, i)); > + p->MOV(ra->genReg(insn.dst(i)), GenRegister::splitReg(src, > insn.extra.elem, i)); > } > } > > @@ -1702,12 +1702,12 @@ namespace gbe > p->push(); > if(simdWidth == 8) { > for(uint32_t i = 0; i < insn.srcNum; i++) > - p->MOV(GenRegister::splitReg(dst, insn.srcNum, i), > ra->genReg(insn.src(i))); > + p->MOV(GenRegister::splitReg(dst, insn.extra.elem, i), > ra->genReg(insn.src(i))); > } else { > // when destination expands two registers, the source must span two > registers. > p->curr.execWidth = 8; > for(uint32_t i = 0; i < insn.srcNum; i++) { > - GenRegister dsti = GenRegister::splitReg(dst, insn.srcNum, i); > + GenRegister dsti = GenRegister::splitReg(dst, insn.extra.elem, i); > GenRegister src = ra->genReg(insn.src(i)); > > p->curr.quarterControl = 0; > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 8478616..1258e54 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -575,10 +575,10 @@ namespace gbe > void BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti); > /*! DWord scatter (for constant cache read) */ > void DWORD_GATHER(Reg dst, Reg addr, uint32_t bti); > - /*! Unpack the uint to char4 */ > - void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t > elemNum); > - /*! pack the char4 to uint */ > - void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t > elemNum); > + /*! Unpack the uint to charN */ > + void UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t > elemSize, uint32_t elemNum); > + /*! pack the charN to uint */ > + void PACK_BYTE(const GenRegister dst, const GenRegister *src, uint32_t > elemSize, uint32_t elemNum); > /*! Extended math function (2 arguments) */ > void MATH(Reg dst, uint32_t function, Reg src0, Reg src1); > /*! Extended math function (1 argument) */ > @@ -1255,16 +1255,18 @@ namespace gbe > srcVector->reg = &insn->src(0); > } > > - void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const > GenRegister src, uint32_t elemNum) { > + void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const > GenRegister src, uint32_t elemSize, uint32_t elemNum) { > SelectionInstruction *insn = this->appendInsn(SEL_OP_UNPACK_BYTE, > elemNum, 1); > insn->src(0) = src; > + insn->extra.elem = 4 / elemSize; > for(uint32_t i = 0; i < elemNum; i++) > insn->dst(i) = dst[i]; > } > - void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister > *src, uint32_t elemNum) { > + void Selection::Opaque::PACK_BYTE(const GenRegister dst, const GenRegister > *src, uint32_t elemSize, uint32_t elemNum) { > SelectionInstruction *insn = this->appendInsn(SEL_OP_PACK_BYTE, 1, > elemNum); > for(uint32_t i = 0; i < elemNum; i++) > insn->src(i) = src[i]; > + insn->extra.elem = 4 / elemSize; > insn->dst(0) = dst; > } > > @@ -2862,9 +2864,7 @@ namespace gbe > for(uint32_t i = 0; i < valueNum; i++) > dst[i] = sel.selReg(insn.getValue(i), getType(family)); > > - uint32_t tmpRegNum = typeSize*valueNum / 4; > - if (tmpRegNum == 0) > - tmpRegNum = 1; > + uint32_t tmpRegNum = (typeSize*valueNum + 3) / 4; > vector<GenRegister> tmp(tmpRegNum); > vector<GenRegister> tmp2(tmpRegNum); > vector<Register> tmpReg(tmpRegNum); > @@ -2875,15 +2875,10 @@ namespace gbe > > readDWord(sel, tmp, tmp2, address, tmpRegNum, insn.getAddressSpace(), > bti); > > - if (valueNum > 1) { > - for(uint32_t i = 0; i < tmpRegNum; i++) > - sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], 4/typeSize); > - } > - else { > - if (elemSize == GEN_BYTE_SCATTER_WORD) > - sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UW), > sel.unpacked_uw(tmpReg[0])); > - else if (elemSize == GEN_BYTE_SCATTER_BYTE) > - sel.MOV(GenRegister::retype(dst[0], GEN_TYPE_UB), > sel.unpacked_ub(tmpReg[0])); > + for(uint32_t i = 0; i < tmpRegNum; i++) { > + unsigned int elemNum = (valueNum - i * (4 / typeSize)) > 4/typeSize ? > + 4/typeSize : (valueNum - i * (4 / typeSize)); > + sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, tmp[i], typeSize, > elemNum); > } > } > > @@ -2948,7 +2943,7 @@ namespace gbe > for(uint32_t i = 0; i < valueNum; i++) > dst[i] = sel.selReg(insn.getValue(i), getType(family)); > > - uint32_t effectDataNum = typeSize*valueNum / 4; > + uint32_t effectDataNum = (typeSize*valueNum + 3) / 4; > vector<GenRegister> tmp(effectDataNum + 1); > vector<GenRegister> tmp2(effectDataNum + 1); > vector<GenRegister> effectData(effectDataNum); > @@ -2986,7 +2981,9 @@ namespace gbe > getEffectByteData(sel, effectData, tmp, effectDataNum, address, > simdWidth); > > for(uint32_t i = 0; i < effectDataNum; i++) { > - sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], > 4/typeSize); > + unsigned int elemNum = (valueNum - i * (4 / typeSize)) > > 4/typeSize ? > + 4/typeSize : (valueNum - i * (4 / > typeSize)); > + sel.UNPACK_BYTE(dst.data() + i * 4/typeSize, effectData[i], > typeSize, elemNum); > } > } else { > GBE_ASSERT(insn.getValueNum() == 1); > @@ -3148,7 +3145,7 @@ namespace gbe > vector<GenRegister> tmp(tmpRegNum); > for(uint32_t i = 0; i < tmpRegNum; i++) { > tmp[i] = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD)); > - sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, 4/typeSize); > + sel.PACK_BYTE(tmp[i], value.data() + i * 4/typeSize, typeSize, > 4/typeSize); > } > > sel.UNTYPED_WRITE(addr, tmp.data(), tmpRegNum, bti); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index b956bc6..8f0d5c2 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -3515,7 +3515,8 @@ handle_write_image: > emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, > elemType, isLoad, binding, dwAligned); > } > } > - else if((dataFamily==ir::FAMILY_WORD && elemNum%2==0) || (dataFamily > == ir::FAMILY_BYTE && elemNum%4 == 0)) { > + else if((dataFamily == ir::FAMILY_WORD && (isLoad || elemNum % 2 == > 0)) || > + (dataFamily == ir::FAMILY_BYTE && (isLoad || elemNum % 4 == > 0))) { > emitBatchLoadOrStore(type, elemNum, llvmValues, ptr, addrSpace, > elemType, isLoad, binding, dwAligned); > } else { > for (uint32_t elemID = 0; elemID < elemNum; elemID++) { > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp > b/backend/src/llvm/llvm_loadstore_optimization.cpp > index 19726b0..ae91af7 100644 > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp > @@ -259,8 +259,7 @@ namespace gbe { > while(size > 1) { > unsigned vecSize = (size >= 16) ? 16 : > (size >= 8 ? 8 : > - (size >= 4 ? 4 : > - (size >= 2 ? 2 : size))); > + (size >= 4 ? 4 : size)); > SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos, > merged.begin() + pos + vecSize); > if(isLoad) > mergeLoad(BB, mergedVec); > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
