From: Luo Xionghu <[email protected]> should use clz function instead of __builtin_clz. add zero input check.
Signed-off-by: Luo Xionghu <[email protected]> --- kernels/compiler_clz.cl | 2 +- utests/compiler_clz.cpp | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/kernels/compiler_clz.cl b/kernels/compiler_clz.cl index 7ab6261..87ecf1c 100644 --- a/kernels/compiler_clz.cl +++ b/kernels/compiler_clz.cl @@ -3,7 +3,7 @@ { \ __global TYPE* A = &src[get_global_id(0)]; \ __global TYPE* B = &dst[get_global_id(0)]; \ - *B = __builtin_clz(*A); \ + *B = clz(*A); \ } COMPILER_CLZ(uint) diff --git a/utests/compiler_clz.cpp b/utests/compiler_clz.cpp index 901e19b..b7516be 100644 --- a/utests/compiler_clz.cpp +++ b/utests/compiler_clz.cpp @@ -27,7 +27,8 @@ void test(const char *kernel_name) U max = get_max<U>(); OCL_MAP_BUFFER(0); - for (uint32_t i = 0; i < n; ++i) { + ((U*)buf_data[0])[0] = 0; + for (uint32_t i = 1; i < n; ++i) { ((U*)buf_data[0])[i] = max >> i; } OCL_UNMAP_BUFFER(0); @@ -36,17 +37,16 @@ void test(const char *kernel_name) locals[0] = 16; OCL_NDRANGE(1); OCL_MAP_BUFFER(1); - for (uint32_t i = 0; i < n; ++i) { + OCL_ASSERT(((U*)buf_data[1])[0] == sizeof(U)*8 ); + for (uint32_t i = 1; i < n; ++i) { if(sizeof(U) == 1 && i < 8 ) - OCL_ASSERT(((U*)buf_data[1])[i] == (i+24) ); + OCL_ASSERT(((U*)buf_data[1])[i] == i ); else if(sizeof(U) == 2 && i < 16 ) - OCL_ASSERT(((U*)buf_data[1])[i] == (i+16) ); + OCL_ASSERT(((U*)buf_data[1])[i] == i ); else if(sizeof(U) == 4 && i < 32 ) OCL_ASSERT(((U*)buf_data[1])[i] == i ); - else if(sizeof(U) == 8 && i < 32 ) - OCL_ASSERT(((U*)buf_data[1])[i] == 0 ); - else if(sizeof(U) == 8 && i > 31) - OCL_ASSERT(((U*)buf_data[1])[i] == (i-32) ); + else if(sizeof(U) == 8 ) + OCL_ASSERT(((U*)buf_data[1])[i] == i ); } OCL_UNMAP_BUFFER(1); -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
