OK -----Original Message----- From: Luo, Xionghu Sent: Monday, May 19, 2014 3:35 PM To: Lv, Meng Cc: [email protected] Subject: RE: [Beignet] [PATCH] [PATCH]improve the clEnqueueCopyBufferRect performance in some cases
Could you please move the kernel source code to file instead of staying in host code? You can refer to my pending patch "move enqueue_copy_image kernels outside of runtime code", thanks. Luo Xionghu Best Regards -----Original Message----- From: Beignet [mailto:[email protected]] On Behalf Of Yang, Rong R Sent: Monday, May 19, 2014 3:14 PM To: Lv, Meng; [email protected] Cc: Lv, Meng Subject: Re: [Beignet] [PATCH] [PATCH]improve the clEnqueueCopyBufferRect performance in some cases The one index indicate to one kernel string, because you add a new kernel for CL_ENQUEUE_COPY_BUFFER_RECT, you should also add a new index for it. And the file mode change 100644 => 100755, I think it is not necessary. -----Original Message----- From: Beignet [mailto:[email protected]] On Behalf Of Lv Meng Sent: Monday, May 05, 2014 10:50 AM To: [email protected] Cc: Lv, Meng Subject: [Beignet] [PATCH] [PATCH]improve the clEnqueueCopyBufferRect performance in some cases Signed-off-by: Lv Meng <[email protected]> --- src/cl_mem.c | 80 ++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 21 deletions(-) mode change 100644 => 100755 src/cl_mem.c diff --git a/src/cl_mem.c b/src/cl_mem.c old mode 100644 new mode 100755 index 44482f7..92f51d0 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -911,6 +911,17 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1}; + + // the src and dst mem rect is continuous, the copy is degraded to + buf copy if((region[0] == dst_row_pitch) && (region[0] == + src_row_pitch) && (region[1] * src_row_pitch == src_slice_pitch) && (region[1] * dst_row_pitch == dst_slice_pitch)){ + cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; + cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; + cl_int size = region[0]*region[1]*region[2]; + ret = cl_mem_copy(queue, src_buf, dst_buf,src_offset, dst_offset, size); + return ret; + } + if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; @@ -919,30 +930,57 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, cl_int index = CL_ENQUEUE_COPY_BUFFER_RECT; cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; - - static const char *str_kernel = - "kernel void __cl_cpy_buffer_rect ( \n" - " global char* src, global char* dst, \n" - " unsigned int region0, unsigned int region1, unsigned int region2, \n" - " unsigned int src_offset, unsigned int dst_offset, \n" - " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" - " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" - " int i = get_global_id(0); \n" - " int j = get_global_id(1); \n" - " int k = get_global_id(2); \n" - " if((i >= region0) || (j>= region1) || (k>=region2)) \n" - " return; \n" - " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" - " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" - " dst[dst_offset] = src[src_offset]; \n" - "}"; - - /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); + if( (src_offset % 4== 0) && (dst_offset % 4== 0) && (src_row_pitch % 4== 0) && (dst_row_pitch % 4== 0) + && (src_slice_pitch % 4== 0) && (dst_slice_pitch % 4== 0) && (global_sz[0] % 4 == 0) ){ + global_sz[0] /= 4; + src_offset /= 4; + dst_offset /= 4; + src_row_pitch /= 4; + dst_row_pitch /= 4; + src_slice_pitch /= 4; + dst_slice_pitch /= 4; + static const char *str_intkernel = + "kernel void __cl_cpy_buffer_rect ( \n" + " global int* src, global int* dst, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int src_offset, unsigned int dst_offset, \n" + " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" + " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " region0 >>= 2; \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" + " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" + " dst[dst_offset] = src[src_offset]; \n" + "}"; + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, + str_intkernel, NULL); } else { + static const char *str_kernel = + "kernel void __cl_cpy_buffer_rect ( \n" + " global char* src, global char* dst, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int src_offset, unsigned int dst_offset, \n" + " unsigned int src_row_pitch, unsigned int src_slice_pitch, \n" + " unsigned int dst_row_pitch, unsigned int dst_slice_pitch) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " src_offset += k * src_slice_pitch + j * src_row_pitch + i; \n" + " dst_offset += k * dst_slice_pitch + j * dst_row_pitch + i; \n" + " dst[dst_offset] = src[src_offset]; \n" + "}"; + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, + NULL); } - /* setup the kernel and run. */ - ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL); if (!ker) return CL_OUT_OF_RESOURCES; -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
