LGTM, will push it latter. Thanks for the patch.
On Thu, Aug 22, 2013 at 04:52:04PM +0800, Ruiling Song wrote: > When group size not aligned to simdWidth, prediction any8/16h will > calculate pmask also using flag register bits mapped to non-active > lanes. As flag register is not cleared by default, any8/16h used > for jmpi instruction may cause wrong jump, and possibly infinite loop. > > So, we clear Flag register to 0 to make any8/16h prediction work correct. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/backend/gen_context.cpp | 13 +++++++++++++ > backend/src/backend/gen_context.hpp | 1 + > backend/src/backend/gen_insn_selection.cpp | 3 +++ > 3 files changed, 17 insertions(+) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 6eeab51..a029719 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -88,6 +88,18 @@ namespace gbe > } > } > > + void GenContext::clearFlagRegister(void) { > + // when group size not aligned to simdWidth, flag register need clear to > + // make prediction(any8/16h) work correctly > + p->push(); > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->curr.execWidth = 1; > + p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), > GenRegister::immud(0x0)); > + p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), > GenRegister::immud(0x0)); > + p->pop(); > + } > + > void GenContext::emitStackPointer(void) { > using namespace ir; > > @@ -1091,6 +1103,7 @@ namespace gbe > schedulePostRegAllocation(*this, *this->sel); > if (OCL_OUTPUT_REG_ALLOC) > ra->outputAllocation(); > + this->clearFlagRegister(); > this->emitStackPointer(); > this->emitInstructionStream(); > this->patchBranches(); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 8b481d0..f66ec95 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -61,6 +61,7 @@ namespace gbe > INLINE const ir::Function &getFunction(void) const { return fn; } > /*! Simd width chosen for the current function */ > INLINE uint32_t getSimdWidth(void) const { return simdWidth; } > + void clearFlagRegister(void); > /*! Emit the per-lane stack pointer computation */ > void emitStackPointer(void); > /*! Emit the instructions */ > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 55db48e..bca08ba 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -2621,6 +2621,9 @@ namespace gbe > sel.CMP(GEN_CONDITIONAL_G, ip, GenRegister::immuw(nextLabel)); > > // Branch to the jump target > + // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may > not > + // work correct, as flag register bits mapped to non-active lanes > tend > + // to be zero. > if (simdWidth == 8) > sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; > else if (simdWidth == 16) > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
