From: Luo Xionghu <[email protected]> the fbh style is inefficient.
v2: use llvm.ctlz to call llvm intrinsic instead of beignet non-standard intrinsic call style; remove the non-standard clz call path. Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/libocl/CMakeLists.txt | 2 +- backend/src/libocl/src/ocl_clz.ll | 44 ++++++++++++++++ backend/src/libocl/tmpl/ocl_integer.tmpl.cl | 78 +++++------------------------ backend/src/libocl/tmpl/ocl_integer.tmpl.h | 9 ++++ 4 files changed, 67 insertions(+), 66 deletions(-) create mode 100644 backend/src/libocl/src/ocl_clz.ll diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt index 314d373..16f00ee 100644 --- a/backend/src/libocl/CMakeLists.txt +++ b/backend/src/libocl/CMakeLists.txt @@ -181,7 +181,7 @@ MACRO(ADD_LL_TO_BC_TARGET M) ) ENDMACRO(ADD_LL_TO_BC_TARGET) -SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset) +SET (OCL_LL_MODULES ocl_barrier ocl_memcpy ocl_memset ocl_clz) FOREACH(f ${OCL_LL_MODULES}) COPY_THE_LL(${f}) ADD_LL_TO_BC_TARGET(${f}) diff --git a/backend/src/libocl/src/ocl_clz.ll b/backend/src/libocl/src/ocl_clz.ll new file mode 100644 index 0000000..0863b6f --- /dev/null +++ b/backend/src/libocl/src/ocl_clz.ll @@ -0,0 +1,44 @@ +declare i8 @llvm.ctlz.i8(i8, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i64 @llvm.ctlz.i64(i64, i1) + +define i8 @clz_s8(i8 %x) nounwind readnone alwaysinline { + %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) + ret i8 %call +} + +define i8 @clz_u8(i8 %x) nounwind readnone alwaysinline { + %call = call i8 @llvm.ctlz.i8(i8 %x, i1 0) + ret i8 %call +} + +define i16 @clz_s16(i16 %x) nounwind readnone alwaysinline { + %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) + ret i16 %call +} + +define i16 @clz_u16(i16 %x) nounwind readnone alwaysinline { + %call = call i16 @llvm.ctlz.i16(i16 %x, i1 0) + ret i16 %call +} + +define i32 @clz_s32(i32 %x) nounwind readnone alwaysinline { + %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) + ret i32 %call +} + +define i32 @clz_u32(i32 %x) nounwind readnone alwaysinline { + %call = call i32 @llvm.ctlz.i32(i32 %x, i1 0) + ret i32 %call +} + +define i64 @clz_s64(i64 %x) nounwind readnone alwaysinline { + %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) + ret i64 %call +} + +define i64 @clz_u64(i64 %x) nounwind readnone alwaysinline { + %call = call i64 @llvm.ctlz.i64(i64 %x, i1 0) + ret i64 %call +} diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl index 6da0bab..a5e1dbc 100644 --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl @@ -19,6 +19,8 @@ PURE CONST uint __gen_ocl_fbh(uint); PURE CONST uint __gen_ocl_fbl(uint); + + PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uint); PURE CONST OVERLOADABLE uint __gen_ocl_cbit(int); PURE CONST OVERLOADABLE uint __gen_ocl_cbit(ushort); @@ -26,71 +28,17 @@ PURE CONST OVERLOADABLE uint __gen_ocl_cbit(short); PURE CONST OVERLOADABLE uint __gen_ocl_cbit(uchar); PURE CONST OVERLOADABLE uint __gen_ocl_cbit(char); -OVERLOADABLE char clz(char x) { - if (x < 0) - return 0; - if (x == 0) - return 8; - return __gen_ocl_fbh(x) - 24; -} - -OVERLOADABLE uchar clz(uchar x) { - if (x == 0) - return 8; - return __gen_ocl_fbh(x) - 24; -} - -OVERLOADABLE short clz(short x) { - if (x < 0) - return 0; - if (x == 0) - return 16; - return __gen_ocl_fbh(x) - 16; -} - -OVERLOADABLE ushort clz(ushort x) { - if (x == 0) - return 16; - return __gen_ocl_fbh(x) - 16; -} - -OVERLOADABLE int clz(int x) { - if (x < 0) - return 0; - if (x == 0) - return 32; - return __gen_ocl_fbh(x); -} - -OVERLOADABLE uint clz(uint x) { - if (x == 0) - return 32; - return __gen_ocl_fbh(x); -} - -OVERLOADABLE long clz(long x) { - union { int i[2]; long x; } u; - u.x = x; - if (u.i[1] & 0x80000000u) - return 0; - if (u.i[1] == 0 && u.i[0] == 0) - return 64; - uint v = clz(u.i[1]); - if(v == 32) - v += clz(u.i[0]); - return v; -} - -OVERLOADABLE ulong clz(ulong x) { - if (x == 0) - return 64; - union { uint i[2]; ulong x; } u; - u.x = x; - uint v = clz(u.i[1]); - if(v == 32) - v += clz(u.i[0]); - return v; -} +#define SDEF(TYPE, TYPE_NAME, SIZE) \ +OVERLOADABLE TYPE clz(TYPE x){ return clz_##TYPE_NAME##SIZE(x);} +SDEF(char, s, 8); +SDEF(uchar, u, 8); +SDEF(short, s, 16); +SDEF(ushort, u, 16); +SDEF(int, s, 32); +SDEF(uint, u, 32); +SDEF(long, s, 64); +SDEF(ulong, u, 64); +#undef SDEF #define SDEF(TYPE) \ OVERLOADABLE TYPE popcount(TYPE x){ return __gen_ocl_cbit(x);} diff --git a/backend/src/libocl/tmpl/ocl_integer.tmpl.h b/backend/src/libocl/tmpl/ocl_integer.tmpl.h index f067b8d..4b3b5ae 100644 --- a/backend/src/libocl/tmpl/ocl_integer.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.h @@ -45,6 +45,15 @@ OVERLOADABLE uint clz(uint x); OVERLOADABLE long clz(long x); OVERLOADABLE ulong clz(ulong x); +char clz_s8(char); +uchar clz_u8(uchar); +short clz_s16(short); +ushort clz_u16(ushort); +int clz_s32(int); +uint clz_u32(uint); +long clz_s64(long); +ulong clz_u64(ulong); + OVERLOADABLE char popcount(char x); OVERLOADABLE uchar popcount(uchar x); OVERLOADABLE short popcount(short x); -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
