From: Junyan He <[email protected]> BDW supports 32 by 32 instruction, so we can refine the MUL instruction of long by using it.
Signed-off-by: Junyan He <[email protected]> --- backend/src/backend/gen8_context.cpp | 31 +++++++++++++++++++++++++++ backend/src/backend/gen8_context.hpp | 1 + backend/src/backend/gen_context.hpp | 2 +- backend/src/backend/gen_insn_selection.cpp | 34 ++++++++++++++++++++++-------- 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 18a3425..85b72a5 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -102,6 +102,37 @@ namespace gbe } } + void Gen8Context::emitI64MULInstruction(const SelectionInstruction &insn) + { + GenRegister src0 = ra->genReg(insn.src(0)); + GenRegister src1 = ra->genReg(insn.src(1)); + GenRegister dst = ra->genReg(insn.dst(0)); + GenRegister res = ra->genReg(insn.dst(1)); + + src0.type = src1.type = GEN_TYPE_UD; + dst.type = GEN_TYPE_UL; + res.type = GEN_TYPE_UL; + + /* Low 32 bits X low 32 bits. */ + GenRegister s0l = src0.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src0, GEN_TYPE_UD) : GenRegister::unpacked_ud(src0.nr, src0.subnr); + GenRegister s1l = src1.hstride == GEN_HORIZONTAL_STRIDE_0 ? + GenRegister::retype(src1, GEN_TYPE_UD) : GenRegister::unpacked_ud(src1.nr, src1.subnr); + p->MUL(dst, s0l, s1l); + + /* Low 32 bits X high 32 bits. */ + GenRegister s1h = GenRegister::offset(s1l, 0, 4); + p->MUL(res, s0l, s1h); + p->SHL(res, res, GenRegister::immud(32)); + p->ADD(dst, dst, res); + + /* High 32 bits X low 32 bits. */ + GenRegister s0h = GenRegister::offset(s0l, 0, 4); + p->MUL(res, s0h, s1l); + p->SHL(res, res, GenRegister::immud(32)); + p->ADD(dst, dst, res); + } + void Gen8Context::packLongVec(GenRegister unpacked, GenRegister packed, uint32_t simd) { GBE_ASSERT(packed.subnr == 0); diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp index 54cc29d..7e6d3b3 100644 --- a/backend/src/backend/gen8_context.hpp +++ b/backend/src/backend/gen8_context.hpp @@ -54,6 +54,7 @@ namespace gbe virtual void emitBinaryWithTempInstruction(const SelectionInstruction &insn); virtual void emitWrite64Instruction(const SelectionInstruction &insn); virtual void emitRead64Instruction(const SelectionInstruction &insn); + virtual void emitI64MULInstruction(const SelectionInstruction &insn); protected: virtual GenEncoder* generateEncoder(void) { return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID); diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 3593d66..2c97092 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -163,7 +163,7 @@ namespace gbe void emitSpillRegInstruction(const SelectionInstruction &insn); void emitUnSpillRegInstruction(const SelectionInstruction &insn); void emitGetImageInfoInstruction(const SelectionInstruction &insn); - void emitI64MULInstruction(const SelectionInstruction &insn); + virtual void emitI64MULInstruction(const SelectionInstruction &insn); void emitI64DIVREMInstruction(const SelectionInstruction &insn); void scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); void scratchRead(const GenRegister dst, const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 60f45f7..b63252a 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -615,7 +615,7 @@ namespace gbe /*! Get image information */ void GET_IMAGE_INFO(uint32_t type, GenRegister *dst, uint32_t dst_num, uint32_t bti); /*! Multiply 64-bit integers */ - void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]); + void I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long); /*! 64-bit integer division */ void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); /*! 64-bit integer remainder of division */ @@ -1354,13 +1354,23 @@ namespace gbe insn->extra.function = function; } - void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister tmp[6]) { - SelectionInstruction *insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); + void Selection::Opaque::I64MUL(Reg dst, Reg src0, Reg src1, GenRegister *tmp, bool native_long) { + SelectionInstruction *insn = NULL; + if (native_long) + insn = this->appendInsn(SEL_OP_I64MUL, 2, 2); + else + insn = this->appendInsn(SEL_OP_I64MUL, 7, 2); + insn->dst(0) = dst; insn->src(0) = src0; insn->src(1) = src1; - for(int i = 0; i < 6; i++) - insn->dst(i + 1) = tmp[i]; + + if (native_long) { + insn->dst(1) = tmp[0]; + } else { + for (int i = 0; i < 6; i++) + insn->dst(i + 1) = tmp[i]; + } } void Selection::Opaque::I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]) { @@ -2387,10 +2397,16 @@ namespace gbe sel.pop(); return false; } else if (type == TYPE_S64 || type == TYPE_U64) { - GenRegister tmp[6]; - for(int i = 0; i < 6; i++) - tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); - sel.I64MUL(dst, src0, src1, tmp); + if (sel.hasLongType()) { + GenRegister tmp; + tmp = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64); + sel.I64MUL(dst, src0, src1, &tmp, true); + } else { + GenRegister tmp[6]; + for(int i = 0; i < 6; i++) + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); + sel.I64MUL(dst, src0, src1, tmp, false); + } } else sel.MUL(dst, src0, src1); break; -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
