This is used for phinode optimization debug. And also detect possible optimization bugs.
Signed-off-by: Ruiling Song <[email protected]> --- kernels/compiler_phi_opt.cl | 28 ++++++++++ utests/CMakeLists.txt | 1 + utests/compiler_phi_opt.cpp | 122 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 kernels/compiler_phi_opt.cl create mode 100644 utests/compiler_phi_opt.cpp diff --git a/kernels/compiler_phi_opt.cl b/kernels/compiler_phi_opt.cl new file mode 100644 index 0000000..3593b4c --- /dev/null +++ b/kernels/compiler_phi_opt.cl @@ -0,0 +1,28 @@ +__kernel void +compiler_phi_opt0(__global int *src, __global int *dst, __global int *table, int K) +{ + int gid = get_global_id(0); + int x = src[gid]; + + for (int i = 0; i < K; i++) { + x = x + table[i]; + } + dst[gid] = x; +} + +__kernel void +compiler_phi_opt1(__global int *src, __global int *src1, + __global int *dst, __global int *dst1, + __global int *table, int K) +{ + int gid = get_global_id(0); + int x = src[gid]; + int y = src1[gid]; + for (int i = 0; i < K; i++) { + x = y + table[i]; + y = x + table[i]; + } + dst[gid] = x; + dst1[gid] = y; +} + diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index f56c497..6b44910 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -134,6 +134,7 @@ set (utests_sources compiler_mad24.cpp compiler_mul24.cpp compiler_multiple_kernels.cpp + compiler_phi_opt.cpp compiler_radians.cpp compiler_rhadd.cpp compiler_rotate.cpp diff --git a/utests/compiler_phi_opt.cpp b/utests/compiler_phi_opt.cpp new file mode 100644 index 0000000..822a26e --- /dev/null +++ b/utests/compiler_phi_opt.cpp @@ -0,0 +1,122 @@ +#include "utest_helper.hpp" + +static void cpu(int global_id, int *src, int *dst, int *t, int K) { + int r = src[global_id]; + for (int i = 0; i < K;i++) { + r += t[i]; + } + dst[global_id] = r; +} + +void compiler_phi_opt0(void) +{ + const int32_t n = 16; + int cpu_dst[16], cpu_src[16]; + int table[n]; + int K = 4; + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_phi_opt", "compiler_phi_opt0"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, K * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_int), &K); + globals[0] = 16; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(2); + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_src[i] = ((int32_t*)buf_data[0])[i] = rand() % 16; + table[i] = ((int32_t*)buf_data[2])[i] = i; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i <(int32_t) n; ++i) cpu(i, cpu_src, cpu_dst, table, K); + + // Compare + OCL_MAP_BUFFER(1); + for (int32_t i = 0; i < n; ++i) + OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst[i]); + OCL_UNMAP_BUFFER(1); +} + +static void cpu1(int gid, int *src, int *src1, int *dst, int *dst1, int *t, int K) { + int x = src[gid]; + int y = src1[gid]; + for (int i = 0; i < K; i++) { + x = y + t[i]; + y = x + t[i]; + } + dst[gid] = x; + dst1[gid] = y; +} + +void compiler_phi_opt1(void) +{ + const int32_t n = 16; + int cpu_dst[16], cpu_src[16]; + int cpu_dst1[16], cpu_src1[16]; + int table[n]; + + int K = 4; + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_phi_opt", "compiler_phi_opt1"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(uint32_t), NULL); + OCL_CREATE_BUFFER(buf[4], 0, K * sizeof(uint32_t), NULL); + + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]); + OCL_SET_ARG(5, sizeof(cl_int), &K); + globals[0] = 16; + locals[0] = 16; + + // Run random tests + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(4); + for (int32_t i = 0; i < (int32_t) n; ++i) { + cpu_src[i] = ((int32_t*)buf_data[0])[i] = i;//rand() % 16; + cpu_src1[i] = ((int32_t*)buf_data[1])[i] = i;//rand() % 16; + table[i] = ((int32_t*)buf_data[4])[i] = 1; + } + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(4); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Run on CPU + for (int32_t i = 0; i <(int32_t) n; ++i) cpu1(i, cpu_src, cpu_src1, cpu_dst, cpu_dst1, table, K); + + // Compare + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (int32_t i = 0; i < n; ++i) { + OCL_ASSERT(((int32_t*)buf_data[2])[i] == cpu_dst[i]); + OCL_ASSERT(((int32_t*)buf_data[3])[i] == cpu_dst1[i]); + } + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_phi_opt0); +MAKE_UTEST_FROM_FUNCTION(compiler_phi_opt1); + + + -- 2.4.1 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
