Pushed, thanks.
On Wed, Sep 11, 2013 at 07:29:20AM +0000, Song, Ruiling wrote: > LGTM. > > -----Original Message----- > From: [email protected] > [mailto:[email protected]] On > Behalf Of Homer Hsing > Sent: Wednesday, September 11, 2013 11:05 AM > To: [email protected] > Subject: [Beignet] [PATCH v2] add 64-bit version of "hadd" > > v2: > keep top carry bit > > passed piglit test cases: > > piglit/framework/../bin/cl-program-tester > generated_tests/cl/builtin/int/builtin-long-hadd-1.0.generated.cl > piglit/framework/../bin/cl-program-tester > generated_tests/cl/builtin/int/builtin-ulong-hadd-1.0.generated.cl > > Signed-off-by: Homer Hsing <[email protected]> > --- > backend/src/backend/gen_context.cpp | 27 > ++++++++++++++++++++++ > backend/src/backend/gen_context.hpp | 1 + > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > backend/src/backend/gen_insn_selection.cpp | 19 +++++++++++++++ > backend/src/backend/gen_insn_selection.hxx | 1 + > backend/src/ir/instruction.cpp | 1 + > backend/src/ir/instruction.hpp | 2 ++ > backend/src/ir/instruction.hxx | 1 + > backend/src/llvm/llvm_gen_backend.cpp | 11 +++++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 3 ++- > backend/src/ocl_stdlib.tmpl.h | 15 ++++++++---- > 11 files changed, 77 insertions(+), 5 deletions(-) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index b7a7cd6..84cc094 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -458,6 +458,33 @@ namespace gbe > p->pop(); > } > > + void GenContext::emitI64HADDInstruction(const SelectionInstruction &insn) { > + GenRegister dest = ra->genReg(insn.dst(0)); > + GenRegister x = ra->genReg(insn.src(0)); > + GenRegister y = ra->genReg(insn.src(1)); > + GenRegister a = ra->genReg(insn.dst(1)); > + GenRegister b = ra->genReg(insn.dst(2)); > + GenRegister c = ra->genReg(insn.dst(3)); > + GenRegister d = ra->genReg(insn.dst(4)); > + a.type = b.type = c.type = d.type = GEN_TYPE_UD; > + loadBottomHalf(a, x); > + loadBottomHalf(b, y); > + loadTopHalf(c, x); > + loadTopHalf(d, y); > + addWithCarry(a, a, b); > + addWithCarry(c, c, b); > + addWithCarry(c, c, d); > + p->ADD(b, b, d); > + p->SHR(a, a, GenRegister::immud(1)); > + p->SHL(d, c, GenRegister::immud(31)); > + p->OR(a, a, d); > + p->SHR(c, c, GenRegister::immud(1)); > + p->SHL(d, b, GenRegister::immud(31)); > + p->OR(c, c, d); > + storeBottomHalf(dest, a); > + storeTopHalf(dest, c); > + } > + > void GenContext::emitI64ShiftInstruction(const SelectionInstruction &insn) > { > GenRegister dest = ra->genReg(insn.dst(0)); > GenRegister x = ra->genReg(insn.src(0)); diff --git > a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp > index 14e4550..4601242 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -96,6 +96,7 @@ namespace gbe > void emitBinaryInstruction(const SelectionInstruction &insn); > void emitBinaryWithTempInstruction(const SelectionInstruction &insn); > void emitTernaryInstruction(const SelectionInstruction &insn); > + void emitI64HADDInstruction(const SelectionInstruction &insn); > void emitI64ShiftInstruction(const SelectionInstruction &insn); > void emitI64CompareInstruction(const SelectionInstruction &insn); > void emitCompareInstruction(const SelectionInstruction &insn); diff > --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index af2b0ee..445b461 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -6,6 +6,7 @@ DECL_GEN7_SCHEDULE(Binary, 20, 4, 2) > DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2) > DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2) > DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) > +DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) > DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) > DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) > DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 0f62da6..49ef601 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -470,6 +470,8 @@ namespace gbe > #undef ALU2WithTemp > #undef ALU3 > #undef I64Shift > + /*! (x+y)>>1 without mod. overflow */ > + void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); > /*! Shift a 64-bit integer */ > void I64Shift(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, > GenRegister tmp[6]); > /*! Compare 64-bit integer */ > @@ -1074,6 +1076,15 @@ namespace gbe > insn->extra.function = conditional; > } > > + void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister > tmp[4]) { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); > + insn->dst(0) = dst; > + insn->src(0) = src0; > + insn->src(1) = src1; > + for(int i = 0; i < 4; i ++) > + insn->dst(i + 1) = tmp[i]; > + } > + > void Selection::Opaque::I64Shift(SelectionOpcode opcode, Reg dst, Reg > src0, Reg src1, GenRegister tmp[6]) { > SelectionInstruction *insn = this->appendInsn(opcode, 7, 2); > insn->dst(0) = dst; > @@ -1668,6 +1679,14 @@ namespace gbe > sel.RHADD(dst, src0, src1, temp); > break; > } > + case OP_I64HADD: > + { > + GenRegister tmp[4]; > + for(int i=0; i<4; i++) > + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); > + sel.I64HADD(dst, src0, src1, tmp); > + break; > + } > case OP_UPSAMPLE_SHORT: > sel.UPSAMPLE_SHORT(dst, src0, src1); > break; > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index ea19fab..0083f7d 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -62,6 +62,7 @@ DECL_SELECTION_IR(FBH, UnaryInstruction) > DECL_SELECTION_IR(FBL, UnaryInstruction) DECL_SELECTION_IR(HADD, > BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, > BinaryWithTempInstruction) > +DECL_SELECTION_IR(I64HADD, I64HADDInstruction) > DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) > DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) > DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) diff --git > a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index > 115d70e..6bbe37e 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -1348,6 +1348,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, > getInfoType(void), getInfoType()) > DECL_EMIT_FUNCTION(AND) > DECL_EMIT_FUNCTION(HADD) > DECL_EMIT_FUNCTION(RHADD) > + DECL_EMIT_FUNCTION(I64HADD) > > #undef DECL_EMIT_FUNCTION > > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 40a3d40..fab6d73 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -533,6 +533,8 @@ namespace ir { > Instruction HADD(Type type, Register dst, Register src0, Register src1); > /*! rhadd.type dst src */ > Instruction RHADD(Type type, Register dst, Register src0, Register src1); > + /*! i64hadd.type dst src */ > + Instruction I64HADD(Type type, Register dst, Register src0, Register > + src1); > /*! tan.type dst src */ > Instruction RCP(Type type, Register dst, Register src); > /*! abs.type dst src */ > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index c15e912..6af0899 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -77,6 +77,7 @@ DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, > UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, > BinaryInstruction) > +DECL_INSN(I64HADD, BinaryInstruction) > DECL_INSN(UPSAMPLE_SHORT, BinaryInstruction) DECL_INSN(UPSAMPLE_INT, > BinaryInstruction) DECL_INSN(UPSAMPLE_LONG, BinaryInstruction) diff --git > a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index e747d00..3c04565 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -1841,6 +1841,7 @@ namespace gbe > case GEN_OCL_USUB_SAT_LONG: > case GEN_OCL_HADD: > case GEN_OCL_RHADD: > + case GEN_OCL_I64HADD: > this->newRegister(&I); > break; > default: > @@ -2278,6 +2279,16 @@ namespace gbe > ctx.HADD(getUnsignedType(ctx, I.getType()), dst, src0, src1); > break; > } > + case GEN_OCL_I64HADD: > + { > + GBE_ASSERT(AI != AE); > + const ir::Register src0 = this->getRegister(*(AI++)); > + GBE_ASSERT(AI != AE); > + const ir::Register src1 = this->getRegister(*(AI++)); > + const ir::Register dst = this->getRegister(&I); > + ctx.I64HADD(ir::TYPE_U64, dst, src0, src1); > + break; > + } > case GEN_OCL_RHADD: { > GBE_ASSERT(AI != AE); const ir::Register src0 = > this->getRegister(*AI); ++AI; > GBE_ASSERT(AI != AE); const ir::Register src1 = > this->getRegister(*AI); ++AI; diff --git > a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index b712860..13d8f66 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -131,8 +131,9 @@ DECL_LLVM_GEN_FUNCTION(MUL_HI_UINT, > _Z16__gen_ocl_mul_hijj) DECL_LLVM_GEN_FUNCTION(FBH, __gen_ocl_fbh) > DECL_LLVM_GEN_FUNCTION(FBL, __gen_ocl_fbl) DECL_LLVM_GEN_FUNCTION(ABS, > __gen_ocl_abs) -DECL_LLVM_GEN_FUNCTION(HADD, __gen_ocl_hadd) > +DECL_LLVM_GEN_FUNCTION(HADD, _Z14__gen_ocl_haddjj) > DECL_LLVM_GEN_FUNCTION(RHADD, __gen_ocl_rhadd) > +DECL_LLVM_GEN_FUNCTION(I64HADD, _Z14__gen_ocl_haddmm) > DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) > DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) > DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) diff --git > a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index > a4f61ee..e87fea5 100644 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -462,7 +462,7 @@ INLINE_OVERLOADABLE ulong upsample(uint hi, uint lo) { > return __gen_ocl_upsample((long)hi, (long)lo); } > > -PURE CONST uint __gen_ocl_hadd(uint x, uint y); > +OVERLOADABLE uint __gen_ocl_hadd(uint x, uint y); > PURE CONST uint __gen_ocl_rhadd(uint x, uint y); #define DEC DEF(char); > DEF(uchar); DEF(short); DEF(ushort) #define DEF(type) INLINE_OVERLOADABLE > type hadd(type x, type y) { return (x + y) >> 1; } @@ -472,15 +472,22 @@ DEC > DEC #undef DEF #undef DEC -INLINE_OVERLOADABLE int hadd(int x, int y) { > return (x < 0 && y > 0) || (x > 0 && y < 0) ? ((x + y) >> 1) : > __gen_ocl_hadd(x, y); } > +INLINE_OVERLOADABLE int hadd(int x, int y) { > + return (x < 0 && y > 0) || (x > 0 && y < 0) ? > + ((x + y) >> 1) : > + __gen_ocl_hadd((uint)x, (uint)y); } > INLINE_OVERLOADABLE uint hadd(uint x, uint y) { return __gen_ocl_hadd(x, y); > } INLINE_OVERLOADABLE int rhadd(int x, int y) { return (x < 0 && y > 0) || > (x > 0 && y < 0) ? ((x + y + 1) >> 1) : __gen_ocl_rhadd(x, y); } > INLINE_OVERLOADABLE uint rhadd(uint x, uint y) { return __gen_ocl_rhadd(x, > y); } > +OVERLOADABLE ulong __gen_ocl_hadd(ulong x, ulong y); > INLINE_OVERLOADABLE long hadd(long x, long y) { > - return 0; > + return (x < 0 && y > 0) || (x > 0 && y < 0) ? > + ((x + y) >> 1) : > + __gen_ocl_hadd((ulong)x, (ulong)y); > } > INLINE_OVERLOADABLE ulong hadd(ulong x, ulong y) { > - return 0; > + return __gen_ocl_hadd(x, y); > } > INLINE_OVERLOADABLE long rhadd(long x, long y) { > return 0; > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
