If the dst bool value is not in the liveIn set, then we don't need to care about those inactive lanes as they don't hold any active data.
Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 34 ++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 6cccc11..74a1e70 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2519,20 +2519,32 @@ namespace gbe const Opcode opcode = insn.getOpcode(); const Type type = insn.getType(); const Register dst = insn.getDst(0); - const Register tmpDst = sel.reg(FAMILY_BOOL); + Register tmpDst; + + const ir::BasicBlock *insnBlock = insn.getParent(); + const ir::Liveness &liveness = sel.ctx.getLiveness(); + const ir::Liveness::UEVar &livein = liveness.getLiveIn(insnBlock); + if (!livein.contains(dst)) + tmpDst = dst; + else + tmpDst = sel.reg(FAMILY_BOOL); // Limit the compare to the active lanes. Use the same compare as for f0.0 sel.push(); const LabelIndex label = insn.getParent()->getLabelIndex(); const GenRegister blockip = sel.selReg(ocl::blockip, TYPE_U16); const GenRegister labelReg = GenRegister::immuw(label); + sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.physicalFlag = 0; sel.curr.flagIndex = uint16_t(tmpDst); - sel.CMP(GEN_CONDITIONAL_G, blockip, labelReg); - sel.curr.execWidth = 1; - sel.AND(sel.selReg(dst, TYPE_BOOL), sel.selReg(dst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL)); - sel.XOR(sel.selReg(tmpDst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL), GenRegister::immuw(0xFFFF)); + if (tmpDst != dst) { + sel.CMP(GEN_CONDITIONAL_G, blockip, labelReg); + sel.curr.execWidth = 1; + sel.AND(sel.selReg(dst, TYPE_BOOL), sel.selReg(dst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL)); + sel.XOR(sel.selReg(tmpDst, TYPE_BOOL), sel.selReg(tmpDst, TYPE_BOOL), GenRegister::immuw(0xFFFF)); + } else + sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg); sel.pop(); // Look for immediate values for the right source @@ -2570,11 +2582,13 @@ namespace gbe } else sel.CMP(getGenCompare(opcode), src0, src1); sel.pop(); - sel.push(); - sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.execWidth = 1; - sel.OR(sel.selReg(dst, TYPE_U16), sel.selReg(dst, TYPE_U16), sel.selReg(tmpDst, TYPE_U16)); - sel.pop(); + if (tmpDst != dst) { + sel.push(); + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.execWidth = 1; + sel.OR(sel.selReg(dst, TYPE_U16), sel.selReg(dst, TYPE_U16), sel.selReg(tmpDst, TYPE_U16)); + sel.pop(); + } return true; } }; -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
