From: Junyan He <[email protected]> Add these three cl files, one for src and dst are not aligned but have same offset to 4. second for src's %4 offset is bigger than the dst's third for src's %4 offset is small than the dst's
Signed-off-by: Junyan He <[email protected]> --- src/CMakeLists.txt | 4 ++- .../cl_internal_copy_buf_unalign_dst_offset.cl | 28 +++++++++++++++++++++ .../cl_internal_copy_buf_unalign_same_offset.cl | 19 ++++++++++++++ .../cl_internal_copy_buf_unalign_src_offset.cl | 29 ++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl create mode 100644 src/kernels/cl_internal_copy_buf_unalign_same_offset.cl create mode 100644 src/kernels/cl_internal_copy_buf_unalign_src_offset.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 95ff56f..9db53ad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,7 +18,9 @@ endforeach (KF) endmacro (MakeKernelBinStr) set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16) +set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset +cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset) MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") set(OPENCL_SRC diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl new file mode 100644 index 0000000..13f4162 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl @@ -0,0 +1,28 @@ +kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask, + unsigned int shift, unsigned int dw_mask) +{ + int i = get_global_id(0); + unsigned int tmp = 0; + + if (i > size -1) + return; + + /* last dw, need to be careful, not to overflow the source. */ + if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) { + tmp = ((src[src_offset + i] & ~dw_mask) >> shift); + } else { + tmp = ((src[src_offset + i] & ~dw_mask) >> shift) + | ((src[src_offset + i + 1] & dw_mask) << (32 - shift)); + } + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = tmp; + } +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl new file mode 100644 index 0000000..8510246 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl @@ -0,0 +1,19 @@ +kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask) +{ + int i = get_global_id(0); + if (i > size -1) + return; + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) + | (src[src_offset] & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (src[i+src_offset] & last_mask) + | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = src[i+src_offset]; + } +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl new file mode 100644 index 0000000..f98368a --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl @@ -0,0 +1,29 @@ +kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask, + unsigned int shift, unsigned int dw_mask, int src_less) +{ + int i = get_global_id(0); + unsigned int tmp = 0; + + if (i > size -1) + return; + + if (i == 0) { + tmp = ((src[src_offset + i] & dw_mask) << shift); + } else if (src_less && i == size - 1) { // not exceed the bound of source + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)); + } else { + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)) + | ((src[src_offset + i] & dw_mask) << shift); + } + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = tmp; + } +} -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
