LGTM, will push latter. Thanks.
On Fri, Nov 22, 2013 at 07:51:56PM +0800, Yang Rong wrote: > Use convert instruction in ir, and ALU1 in gen selection. > > Signed-off-by: Yang Rong <[email protected]> > --- > backend/src/backend/gen/gen_mesa_disasm.c | 2 ++ > backend/src/backend/gen_context.cpp | 2 ++ > backend/src/backend/gen_defs.hpp | 2 ++ > backend/src/backend/gen_encoder.cpp | 2 ++ > backend/src/backend/gen_encoder.hpp | 2 ++ > backend/src/backend/gen_insn_selection.cpp | 16 +++++++++++++--- > backend/src/backend/gen_insn_selection.hxx | 2 ++ > backend/src/ir/instruction.cpp | 22 ++++++++++++++++------ > backend/src/ir/instruction.hpp | 4 ++++ > backend/src/ir/instruction.hxx | 2 ++ > backend/src/llvm/llvm_gen_backend.cpp | 8 ++++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 3 +++ > 12 files changed, 58 insertions(+), 9 deletions(-) > > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c > b/backend/src/backend/gen/gen_mesa_disasm.c > index f911e7c..1f5adc9 100644 > --- a/backend/src/backend/gen/gen_mesa_disasm.c > +++ b/backend/src/backend/gen/gen_mesa_disasm.c > @@ -65,6 +65,8 @@ static const struct { > [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, > [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, > [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, > + [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, > + [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, > > [GEN_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, > [GEN_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 6007904..4902085 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -158,6 +158,8 @@ namespace gbe > case SEL_OP_RNDU: p->RNDU(dst, src); break; > case SEL_OP_RNDE: p->RNDE(dst, src); break; > case SEL_OP_RNDZ: p->RNDZ(dst, src); break; > + case SEL_OP_F16TO32: p->F16TO32(dst, src); break; > + case SEL_OP_F32TO16: p->F32TO16(dst, src); break; > case SEL_OP_LOAD_INT64_IMM: p->LOAD_INT64_IMM(dst, src.value.i64); > break; > case SEL_OP_CONVI64_TO_I: > { > diff --git a/backend/src/backend/gen_defs.hpp > b/backend/src/backend/gen_defs.hpp > index 27ce58c..ffa38c0 100644 > --- a/backend/src/backend/gen_defs.hpp > +++ b/backend/src/backend/gen_defs.hpp > @@ -125,6 +125,8 @@ enum opcode { > GEN_OPCODE_ASR = 12, > GEN_OPCODE_CMP = 16, > GEN_OPCODE_CMPN = 17, > + GEN_OPCODE_F32TO16 = 19, > + GEN_OPCODE_F16TO32 = 20, > GEN_OPCODE_JMPI = 32, > GEN_OPCODE_IF = 34, > GEN_OPCODE_IFF = 35, > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index b0cc931..c372e36 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -940,6 +940,8 @@ namespace gbe > ALU1(RNDU) > ALU1(FBH) > ALU1(FBL) > + ALU1(F16TO32) > + ALU1(F32TO16) > ALU2(SEL) > ALU1(NOT) > ALU2(AND) > diff --git a/backend/src/backend/gen_encoder.hpp > b/backend/src/backend/gen_encoder.hpp > index d518c4a..13db6ae 100644 > --- a/backend/src/backend/gen_encoder.hpp > +++ b/backend/src/backend/gen_encoder.hpp > @@ -99,6 +99,8 @@ namespace gbe > ALU1(RNDE) > ALU1(RNDD) > ALU1(RNDU) > + ALU1(F16TO32) > + ALU1(F32TO16) > ALU2(SEL) > ALU1(NOT) > ALU2(AND) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index eef7847..3661c2b 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -426,6 +426,8 @@ namespace gbe > ALU1(LOAD_INT64_IMM) > ALU1(RNDZ) > ALU1(RNDE) > + ALU1(F16TO32) > + ALU1(F32TO16) > ALU2(SEL) > ALU2(SEL_INT64) > ALU1(NOT) > @@ -2643,14 +2645,22 @@ namespace gbe > const RegisterFamily srcFamily = getFamily(srcType); > const GenRegister dst = sel.selReg(insn.getDst(0), dstType); > const GenRegister src = sel.selReg(insn.getSrc(0), srcType); > + const Opcode opcode = insn.getOpcode(); > > - if(insn.getOpcode() == ir::OP_SAT_CVT) { > + if(opcode == ir::OP_SAT_CVT) { > sel.push(); > sel.curr.saturate = 1; > } > > // We need two instructions to make the conversion > - if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && > (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { > + if (opcode == OP_F16TO32) { > + sel.F16TO32(dst, src); > + } else if (opcode == OP_F32TO16) { > + GenRegister unpacked; > + unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD)); > + sel.F32TO16(unpacked, src); > + sel.MOV(dst, unpacked); > + } else if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && > (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { > GenRegister unpacked; > if (dstFamily == FAMILY_WORD) { > const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : > GEN_TYPE_W; > @@ -2695,7 +2705,7 @@ namespace gbe > } else > sel.MOV(dst, src); > > - if(insn.getOpcode() == ir::OP_SAT_CVT) > + if(opcode == ir::OP_SAT_CVT) > sel.pop(); > > return true; > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index 4499006..9fb5da7 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -10,6 +10,8 @@ DECL_SELECTION_IR(RNDE, UnaryInstruction) > DECL_SELECTION_IR(RNDD, UnaryInstruction) > DECL_SELECTION_IR(RNDU, UnaryInstruction) > DECL_SELECTION_IR(FRC, UnaryInstruction) > +DECL_SELECTION_IR(F16TO32, UnaryInstruction) > +DECL_SELECTION_IR(F32TO16, UnaryInstruction) > DECL_SELECTION_IR(SEL, BinaryInstruction) > DECL_SELECTION_IR(SEL_INT64, BinaryInstruction) > DECL_SELECTION_IR(AND, BinaryInstruction) > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index c624d7a..c0cf88a 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -283,13 +283,13 @@ namespace ir { > public NSrcPolicy<ConvertInstruction, 1> > { > public: > - ConvertInstruction(Type dstType, > + ConvertInstruction(Opcode opcode, > + Type dstType, > Type srcType, > Register dst, > - Register src, > - bool saturated=false) > + Register src) > { > - this->opcode = saturated ? OP_SAT_CVT : OP_CVT; > + this->opcode = opcode; > this->dst[0] = dst; > this->src[0] = src; > this->dstType = dstType; > @@ -1563,12 +1563,22 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, > getInfoType(void), getInfoType()) > > // CVT > Instruction CVT(Type dstType, Type srcType, Register dst, Register src) { > - return internal::ConvertInstruction(dstType, srcType, dst, > src).convert(); > + return internal::ConvertInstruction(OP_CVT, dstType, srcType, dst, > src).convert(); > } > > // saturated convert > Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register > src) { > - return internal::ConvertInstruction(dstType, srcType, dst, src, > true).convert(); > + return internal::ConvertInstruction(OP_SAT_CVT, dstType, srcType, dst, > src).convert(); > + } > + > + // CVT > + Instruction F16TO32(Type dstType, Type srcType, Register dst, Register > src) { > + return internal::ConvertInstruction(OP_F16TO32, dstType, srcType, dst, > src).convert(); > + } > + > + // saturated convert > + Instruction F32TO16(Type dstType, Type srcType, Register dst, Register > src) { > + return internal::ConvertInstruction(OP_F32TO16, dstType, srcType, dst, > src).convert(); > } > > // For all unary functions with given opcode > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index b7eebc0..46577c7 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -642,6 +642,10 @@ namespace ir { > Instruction CVT(Type dstType, Type srcType, Register dst, Register src); > /*! sat_cvt.{dstType <- srcType} dst src */ > Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register > src); > + /*! F16TO32.{dstType <- srcType} dst src */ > + Instruction F16TO32(Type dstType, Type srcType, Register dst, Register > src); > + /*! F32TO16.{dstType <- srcType} dst src */ > + Instruction F32TO16(Type dstType, Type srcType, Register dst, Register > src); > /*! atomic dst addr.space {src1 {src2}} */ > Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, > Tuple src); > /*! bra labelIndex */ > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index cf79e09..baaaca2 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -64,6 +64,8 @@ DECL_INSN(ORD, CompareInstruction) > DECL_INSN(BITCAST, BitCastInstruction) > DECL_INSN(CVT, ConvertInstruction) > DECL_INSN(SAT_CVT, ConvertInstruction) > +DECL_INSN(F16TO32, ConvertInstruction) > +DECL_INSN(F32TO16, ConvertInstruction) > DECL_INSN(ATOMIC, AtomicInstruction) > DECL_INSN(BRA, BranchInstruction) > DECL_INSN(RET, BranchInstruction) > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 9c85c1a..1c932bd 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2128,6 +2128,8 @@ namespace gbe > case GEN_OCL_SAT_CONV_F32_TO_I32: > case GEN_OCL_SAT_CONV_I32_TO_U32: > case GEN_OCL_SAT_CONV_F32_TO_U32: > + case GEN_OCL_CONV_F16_TO_F32: > + case GEN_OCL_CONV_F32_TO_F16: > this->newRegister(&I); > break; > default: > @@ -2689,6 +2691,12 @@ namespace gbe > DEF(ir::TYPE_U32, ir::TYPE_S32); > case GEN_OCL_SAT_CONV_F32_TO_U32: > DEF(ir::TYPE_U32, ir::TYPE_FLOAT); > + case GEN_OCL_CONV_F16_TO_F32: > + ctx.F16TO32(ir::TYPE_FLOAT, ir::TYPE_U16, getRegister(&I), > getRegister(I.getOperand(0))); > + break; > + case GEN_OCL_CONV_F32_TO_F16: > + ctx.F32TO16(ir::TYPE_U16, ir::TYPE_FLOAT, getRegister(&I), > getRegister(I.getOperand(0))); > + break; > #undef DEF > default: break; > } > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 71034ab..4b470e0 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -178,3 +178,6 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, > _Z15convert_int_satf) > > DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati) > DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) > + > +DECL_LLVM_GEN_FUNCTION(CONV_F16_TO_F32, __gen_ocl_f16to32) > +DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) > \ No newline at end of file > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
