From: Luo Xionghu <[email protected]> handle the byte/word/dword/qword input accordingly.
v2: fix build issue. v3: remove duplicate code and unnessesary code. Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/llvm/llvm_gen_backend.cpp | 71 ++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 86030b9..7922ddb 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3286,10 +3286,77 @@ error: break; case Intrinsic::ctlz: { - ir::Type srcType = getType(ctx, I.getType()); + Type *llvmDstType = I.getType(); + ir::Type dstType = getType(ctx, llvmDstType); + Type *llvmSrcType = I.getOperand(0)->getType(); + ir::Type srcType = getUnsignedType(ctx, llvmSrcType); + const ir::Register dst = this->getRegister(&I); const ir::Register src = this->getRegister(I.getOperand(0)); - ctx.ALU1(ir::OP_LZD, srcType, dst, src); + int imm_value = 0; + if(srcType == ir::TYPE_U16) { + imm_value = 16; + }else if(srcType == ir::TYPE_U8) { + imm_value = 24; + }else if(srcType == ir::TYPE_U64) { + imm_value = 32; + } + + if(srcType == ir::TYPE_U16 || srcType == ir::TYPE_U8) { + ir::ImmediateIndex imm; + ir::Type tmpType = ir::TYPE_S32; + imm = ctx.newIntegerImmediate(imm_value, tmpType); + const ir::RegisterFamily family = getFamily(tmpType); + const ir::Register immReg = ctx.reg(family); + ctx.LOADI(ir::TYPE_S32, immReg, imm); + + ir::Register tmp0 = ctx.reg(getFamily(tmpType)); + ir::Register tmp1 = ctx.reg(getFamily(tmpType)); + ir::Register tmp2 = ctx.reg(getFamily(tmpType)); + ctx.CVT(tmpType, srcType, tmp0, src); + ctx.ALU1(ir::OP_LZD, tmpType, tmp1, tmp0); + ctx.SUB(tmpType, tmp2, tmp1, immReg); + ctx.CVT(dstType, tmpType, dst, tmp2); + } + else if(srcType == ir::TYPE_U64) { + ir::ImmediateIndex imm; + ir::Type tmpType = ir::TYPE_U32; + imm = ctx.newIntegerImmediate(imm_value, srcType); + const ir::RegisterFamily family = getFamily(srcType); + const ir::Register immReg = ctx.reg(family); + ctx.LOADI(ir::TYPE_S64, immReg, imm); + + const ir::RegisterFamily tmpFamily = getFamily(tmpType); + const ir::ImmediateIndex imm32 = ctx.newIntegerImmediate(32, tmpType); + const ir::Register imm32Reg = ctx.reg(tmpFamily); + ctx.LOADI(ir::TYPE_S32, imm32Reg, imm32); + + ir::Register tmp0 = ctx.reg(getFamily(srcType)); + ir::Register tmp1 = ctx.reg(getFamily(tmpType)); + ir::Register tmp2 = ctx.reg(getFamily(tmpType)); + ir::Register tmp3 = ctx.reg(getFamily(tmpType)); + ir::Register tmp4 = ctx.reg(getFamily(tmpType)); + ir::Register tmp5 = ctx.reg(getFamily(tmpType)); + ir::Register tmp6 = ctx.reg(getFamily(tmpType)); + ir::Register cmp = ctx.reg(ir::FAMILY_BOOL); + + ctx.SHR(srcType, tmp0, src, immReg); + ctx.CVT(tmpType, srcType, tmp1, tmp0); + + ctx.ALU1(ir::OP_LZD, tmpType, tmp2, tmp1); + ctx.LT(tmpType, cmp, tmp2, imm32Reg); + + ctx.CVT(tmpType, srcType, tmp3, src); + ctx.ALU1(ir::OP_LZD, tmpType, tmp4, tmp3); + ctx.ADD(tmpType, tmp5, tmp4, imm32Reg); + + ctx.SEL(tmpType, tmp6, cmp, tmp2, tmp5); + ctx.CVT(dstType, tmpType, dst, tmp6); + } + else + { + ctx.ALU1(ir::OP_LZD, dstType, dst, src); + } } break; case Intrinsic::fma: -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
