From: Luo Xionghu <[email protected]> as slm doesn't support A64 stateless access, the atomic_long couldn't be fully supported, we just add the code for further use.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/backend/gen8_encoder.cpp | 14 +++++---- backend/src/backend/gen8_encoder.hpp | 2 +- backend/src/backend/gen_encoder.cpp | 2 +- backend/src/backend/gen_encoder.hpp | 2 +- backend/src/backend/gen_insn_selection.cpp | 47 +++++++++++++++++++++--------- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index d320290..9af8cee 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -169,14 +169,17 @@ namespace gbe } } - unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) { + unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) { Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; uint32_t msg_length = 0; uint32_t response_length = 0; + assert(srcNum <= 3); if (this->curr.execWidth == 8) { - msg_length = srcNum + 1; - response_length = 1; + msg_length = srcNum + 1 + type_long; + if(srcNum == 3 && type_long) + msg_length++; + response_length = 1 + type_long; } else if (this->curr.execWidth == 16) { msg_length = 2 * (srcNum + 1); response_length = 2; @@ -189,7 +192,7 @@ namespace gbe gen8_insn->bits3.gen8_atomic_a64.bti = bti; gen8_insn->bits3.gen8_atomic_a64.return_data = 1; gen8_insn->bits3.gen8_atomic_a64.aop_type = function; - gen8_insn->bits3.gen8_atomic_a64.data_size = 0; + gen8_insn->bits3.gen8_atomic_a64.data_size = type_long; return gen8_insn->bits3.ud; } @@ -203,7 +206,8 @@ namespace gbe this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); - setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum); + int type_long = (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) ? 1: 0; + setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum, type_long); } unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum) { diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index 3e23df6..d83cde5 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -69,7 +69,7 @@ namespace gbe GenRegister src1 = GenRegister::null()); virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null()); virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum); - virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum); + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long); virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t accN); diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 564f207..9cdb41d 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -594,7 +594,7 @@ namespace gbe NOT_SUPPORTED; return insn->bits3.ud; } - unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum) { + unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long) { GBE_ASSERT(0); return 0; } diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 5b4f4c2..ecb5051 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -241,7 +241,7 @@ namespace gbe unsigned msg_length, unsigned response_length, bool header_present = false, bool end_of_thread = false); virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum); - virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum); + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, unsigned function, unsigned bti, unsigned srcNum, int type_long); virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize); diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 14a1930..f982817 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -5582,28 +5582,46 @@ namespace gbe sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM)); } else if (simdWidth == 16) { vector<GenRegister> msgs; + RegisterFamily family = sel.getRegisterFamily(insn.getDst(0)); + Type type = getType(family); for (unsigned k = 0; k < msgPayload; k++) { - msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32)); + msgs.push_back(sel.selReg(sel.reg(family), type)); } sel.push(); /* first quarter */ sel.curr.execWidth = 8; sel.curr.quarterControl = GEN_COMPRESSION_Q1; sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 0)); - if(msgPayload > 1) - sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0)); - if(msgPayload > 2) - sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0)); + if(msgPayload > 1) { + if(family == ir::FAMILY_QWORD) + sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 0)); + else + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0)); + } + if(msgPayload > 2) { + if(family == ir::FAMILY_QWORD) + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 0)); + else + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0)); + } sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM)); /* second quarter */ sel.curr.execWidth = 8; sel.curr.quarterControl = GEN_COMPRESSION_Q2; sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), GenRegister::Qn(addrQ, 1)); - if(msgPayload > 1) - sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1)); - if(msgPayload > 2) - sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1)); + if(msgPayload > 1) { + if(family == ir::FAMILY_QWORD) + sel.MOV(GenRegister::Qn(msgs[0], 1), GenRegister::Qn(src1, 1)); + else + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1)); + } + if(msgPayload > 2) { + if(family == ir::FAMILY_QWORD) + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src2, 1)); + else + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1)); + } sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, bti, sel.getBTITemps(AM)); sel.pop(); } @@ -5633,17 +5651,18 @@ namespace gbe msgPayload = srcNum; } - GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32); - GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32); + Type type = getType(sel.getRegisterFamily(insn.getDst(0))); + GenRegister dst = sel.selReg(insn.getDst(0), type); + GenRegister src0 = sel.selReg(insn.getAddressRegister(), type); GenRegister src1 = src0, src2 = src0; - if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32); - if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32); + if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), type); + if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), type); GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp; if (AM == AM_DynamicBti || AM == AM_StaticBti) { if (AM == AM_DynamicBti) { Register btiReg = insn.getBtiReg(); - sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM)); + sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, sel.selReg(btiReg, type), sel.getBTITemps(AM)); } else { unsigned SI = insn.getSurfaceIndex(); sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, GenRegister::immud(SI), sel.getBTITemps(AM)); -- 2.1.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
