From: Luo <[email protected]> the popcount function returns the number of non-zero bits in input. use GEN instruction cbit(Count Bits Set) to implement it.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/backend/gen/gen_mesa_disasm.c | 1 + backend/src/backend/gen_context.cpp | 1 + backend/src/backend/gen_defs.hpp | 1 + backend/src/backend/gen_encoder.cpp | 1 + backend/src/backend/gen_encoder.hpp | 1 + backend/src/backend/gen_insn_selection.cpp | 4 +++- backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/libocl/script/ocl_integer.def | 3 +-- backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 30 +++++++++++++++++++++++++++ backend/src/libocl/tmpl/ocl_integer.tmpl.h | 9 ++++++++ backend/src/llvm/llvm_gen_backend.cpp | 2 ++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + 15 files changed, 56 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c index 266b501..330dffb 100644 --- a/backend/src/backend/gen/gen_mesa_disasm.c +++ b/backend/src/backend/gen/gen_mesa_disasm.c @@ -66,6 +66,7 @@ static const struct { [GEN_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 }, + [GEN_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 }, [GEN_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 }, diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8844233..c37d2ee 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -205,6 +205,7 @@ namespace gbe case SEL_OP_READ_ARF: p->MOV(dst, src); break; case SEL_OP_FBH: p->FBH(dst, src); break; case SEL_OP_FBL: p->FBL(dst, src); break; + case SEL_OP_CBIT: p->CBIT(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; case SEL_OP_RNDD: p->RNDD(dst, src); break; case SEL_OP_RNDU: p->RNDU(dst, src); break; diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index 19aad95..3faacde 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -159,6 +159,7 @@ enum opcode { GEN_OPCODE_LZD = 74, GEN_OPCODE_FBH = 75, GEN_OPCODE_FBL = 76, + GEN_OPCODE_CBIT = 77, GEN_OPCODE_ADDC = 78, GEN_OPCODE_SUBB = 79, GEN_OPCODE_SAD2 = 80, diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 295e11d..bd6204a 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -901,6 +901,7 @@ namespace gbe ALU1(RNDU) ALU1(FBH) ALU1(FBL) + ALU1(CBIT) ALU1(F16TO32) ALU1(F32TO16) ALU2(SEL) diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 2c999ce..3f486d7 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -101,6 +101,7 @@ namespace gbe ALU1(MOV) ALU1(FBH) ALU1(FBL) + ALU1(CBIT) ALU2(SUBB) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index e3ee35d..e05a0a6 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -493,6 +493,7 @@ namespace gbe ALU2WithTemp(MUL_HI) ALU1(FBH) ALU1(FBL) + ALU1(CBIT) ALU2WithTemp(HADD) ALU2WithTemp(RHADD) ALU2(UPSAMPLE_SHORT) @@ -1861,7 +1862,7 @@ namespace gbe static ir::Type getType(const ir::Opcode opcode, const ir::Type insnType) { if (insnType == ir::TYPE_S64 || insnType == ir::TYPE_U64 || insnType == ir::TYPE_S8 || insnType == ir::TYPE_U8) return insnType; - if (opcode == ir::OP_FBH || opcode == ir::OP_FBL) + if (opcode == ir::OP_FBH || opcode == ir::OP_FBL || opcode == ir::OP_CBIT) return ir::TYPE_U32; if (insnType == ir::TYPE_S16 || insnType == ir::TYPE_U16) return insnType; @@ -1915,6 +1916,7 @@ namespace gbe case ir::OP_RNDZ: sel.RNDZ(dst, src); break; case ir::OP_FBH: sel.FBH(dst, src); break; case ir::OP_FBL: sel.FBL(dst, src); break; + case ir::OP_CBIT: sel.CBIT(dst, src); break; case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 7511b84..d80dc58 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -68,6 +68,7 @@ DECL_SELECTION_IR(MUL_HI, BinaryWithTempInstruction) DECL_SELECTION_IR(I64_MUL_HI, I64MULHIInstruction) DECL_SELECTION_IR(FBH, UnaryInstruction) DECL_SELECTION_IR(FBL, UnaryInstruction) +DECL_SELECTION_IR(CBIT, UnaryInstruction) DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD, I64HADDInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index e4e30ed..6c37f29 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1594,6 +1594,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex DECL_EMIT_FUNCTION(MOV) DECL_EMIT_FUNCTION(FBH) DECL_EMIT_FUNCTION(FBL) + DECL_EMIT_FUNCTION(CBIT) DECL_EMIT_FUNCTION(COS) DECL_EMIT_FUNCTION(SIN) DECL_EMIT_FUNCTION(LOG) diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 6807615..1c31171 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -585,6 +585,8 @@ namespace ir { Instruction FBH(Type type, Register dst, Register src); /*! fbl.type dst src */ Instruction FBL(Type type, Register dst, Register src); + /*! cbit.type dst src */ + Instruction CBIT(Type type, Register dst, Register src); /*! hadd.type dst src */ Instruction HADD(Type type, Register dst, Register src0, Register src1); /*! rhadd.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 5fed286..9a89069 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -86,6 +86,7 @@ DECL_INSN(MUL_HI, BinaryInstruction) DECL_INSN(I64_MUL_HI, BinaryInstruction) DECL_INSN(FBH, UnaryInstruction) DECL_INSN(FBL, UnaryInstruction) +DECL_INSN(CBIT, UnaryInstruction) DECL_INSN(HADD, BinaryInstruction) DECL_INSN(RHADD, BinaryInstruction) DECL_INSN(I64HADD, BinaryInstruction) diff --git a/backend/src/libocl/script/ocl_integer.def b/backend/src/libocl/script/ocl_integer.def index ec9177a..c35c242 100644 --- a/backend/src/libocl/script/ocl_integer.def +++ b/backend/src/libocl/script/ocl_integer.def @@ -23,8 +23,7 @@ uintn upsample (ushortn hi, ushortn lo) longn upsample (intn hi, uintn lo) ulongn upsample (uintn hi, uintn lo) -# XXX not implemented -#gentype popcount (gentype x) +gentype popcount (gentype x) ##fast_integer gentype mad24 (gentype x, gentype y, gentype z) diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl index 9230604..375a40f 100644 --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl @@ -19,6 +19,7 @@ PURE CONST uint __gen_ocl_fbh(uint); PURE CONST uint __gen_ocl_fbl(uint); +PURE CONST uint __gen_ocl_cbit(uint); OVERLOADABLE char clz(char x) { if (x < 0) @@ -86,6 +87,35 @@ OVERLOADABLE ulong clz(ulong x) { return v; } +OVERLOADABLE char popcount(char x) { + return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 24 : __gen_ocl_cbit(x); +} +OVERLOADABLE short popcount(short x) { + return x == 0 ? 0 : x < 0?__gen_ocl_cbit(x) - 16 : __gen_ocl_cbit(x); +} +#define SDEF(TYPE) \ +OVERLOADABLE TYPE popcount(TYPE x){ return x == 0? 0:__gen_ocl_cbit(x);} +SDEF(uchar); +SDEF(ushort); +SDEF(int); +SDEF(uint); +#undef SDEF + +OVERLOADABLE long popcount(long x) { + union { int i[2]; long x; } u; + u.x = x; + uint v = popcount(u.i[1]); + v += popcount(u.i[0]); + return v; +} + +OVERLOADABLE ulong popcount(ulong x) { + union { uint i[2]; ulong x; } u; + u.x = x; + uint v = popcount(u.i[1]); + v += popcount(u.i[0]); + return v; +} // sat #define SDEF(TYPE) \ diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h index 2869bb6..0b3dea4 100644 --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h @@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x); OVERLOADABLE long clz(long x); OVERLOADABLE ulong clz(ulong x); +OVERLOADABLE char popcount(char x); +OVERLOADABLE uchar popcount(uchar x); +OVERLOADABLE short popcount(short x); +OVERLOADABLE ushort popcount(ushort x); +OVERLOADABLE int popcount(int x); +OVERLOADABLE uint popcount(uint x); +OVERLOADABLE long popcount(long x); +OVERLOADABLE ulong popcount(ulong x); + OVERLOADABLE char mul_hi(char x, char y); OVERLOADABLE uchar mul_hi(uchar x, uchar y); OVERLOADABLE short mul_hi(short x, short y); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 39b441f..39e22d7 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2494,6 +2494,7 @@ namespace gbe regTranslator.newScalarProxy(ir::ocl::workdim, dst); break; case GEN_OCL_FBH: case GEN_OCL_FBL: + case GEN_OCL_CBIT: case GEN_OCL_COS: case GEN_OCL_SIN: case GEN_OCL_SQR: @@ -2779,6 +2780,7 @@ namespace gbe } case GEN_OCL_FBH: this->emitUnaryCallInst(I,CS,ir::OP_FBH); break; case GEN_OCL_FBL: this->emitUnaryCallInst(I,CS,ir::OP_FBL); break; + case GEN_OCL_CBIT: this->emitUnaryCallInst(I,CS,ir::OP_CBIT); break; case GEN_OCL_ABS: { const ir::Register src = this->getRegister(*AI); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index f508bcc..7434c78 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -151,6 +151,7 @@ DECL_LLVM_GEN_FUNCTION(I64RHADD, _Z15__gen_ocl_rhaddmm) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_SHORT, _Z18__gen_ocl_upsampless) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_INT, _Z18__gen_ocl_upsampleii) DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) +DECL_LLVM_GEN_FUNCTION(CBIT, __gen_ocl_cbit) // saturate convert DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath) -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
