The patch LGTM, will push latter, thanks.
On Fri, Feb 06, 2015 at 11:52:30AM +0800, Chuanbo Weng wrote: > Before this patch, cl_mem_kernel_copy_image do cpu memory copy in order > to copy image array objects. This is very slow for large image size. > This patch implement image array copy in cl way, which dramatically > accelerate image array related clEnqueueCopyImage. > clCopyImage case in OpenCL conformance test will not be blocked anymore. > > Signed-off-by: Chuanbo Weng <[email protected]> > --- > src/CMakeLists.txt | 3 ++ > src/cl_context.h | 6 +++ > src/cl_mem.c | 43 > ++++++++++++++++------ > .../cl_internal_copy_image_1d_array_to_1d_array.cl | 21 +++++++++++ > .../cl_internal_copy_image_2d_array_to_2d.cl | 21 +++++++++++ > .../cl_internal_copy_image_2d_array_to_2d_array.cl | 23 ++++++++++++ > .../cl_internal_copy_image_2d_array_to_3d.cl | 23 ++++++++++++ > .../cl_internal_copy_image_2d_to_2d_array.cl | 21 +++++++++++ > .../cl_internal_copy_image_3d_to_2d_array.cl | 23 ++++++++++++ > 9 files changed, 172 insertions(+), 12 deletions(-) > create mode 100644 src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl > create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d.cl > create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl > create mode 100644 src/kernels/cl_internal_copy_image_2d_array_to_3d.cl > create mode 100644 src/kernels/cl_internal_copy_image_2d_to_2d_array.cl > create mode 100644 src/kernels/cl_internal_copy_image_3d_to_2d_array.cl > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt > index a55f84d..939f58d 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -46,6 +46,9 @@ cl_internal_copy_buf_unalign_dst_offset > cl_internal_copy_buf_unalign_src_offset > cl_internal_copy_buf_rect cl_internal_copy_buf_rect_align4 > cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d > cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d > cl_internal_copy_image_3d_to_3d > +cl_internal_copy_image_2d_to_2d_array > cl_internal_copy_image_1d_array_to_1d_array > +cl_internal_copy_image_2d_array_to_2d_array > cl_internal_copy_image_2d_array_to_2d > +cl_internal_copy_image_2d_array_to_3d cl_internal_copy_image_3d_to_2d_array > cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer > cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d > cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 > diff --git a/src/cl_context.h b/src/cl_context.h > index 38ad2fd..2ea0a73 100644 > --- a/src/cl_context.h > +++ b/src/cl_context.h > @@ -53,6 +53,12 @@ enum _cl_internal_ker_type { > CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d > CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d > CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d > + CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, //copy image 2d to image 2d > array > + CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image > 1d array > + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image > 2d array > + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, //copy image 2d array to image > 2d > + CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, //copy image 2d array to image > 3d > + CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to image 2d > array > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 2ec89a4..2920bfe 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -1610,27 +1610,43 @@ cl_mem_kernel_copy_image(cl_command_queue queue, > struct _cl_mem_image* src_image > ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, > cl_internal_copy_image_2d_to_3d_str, > (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); > } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { > + extern char cl_internal_copy_image_2d_to_2d_array_str[]; > + extern size_t cl_internal_copy_image_2d_to_2d_array_str_size; > > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, > + cl_internal_copy_image_2d_to_2d_array_str, > (size_t)cl_internal_copy_image_2d_to_2d_array_str_size, NULL); > } > } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { > if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { > + extern char cl_internal_copy_image_1d_array_to_1d_array_str[]; > + extern size_t cl_internal_copy_image_1d_array_to_1d_array_str_size; > > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, > + cl_internal_copy_image_1d_array_to_1d_array_str, > + (size_t)cl_internal_copy_image_1d_array_to_1d_array_str_size, > NULL); > } > } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { > if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { > + extern char cl_internal_copy_image_2d_array_to_2d_array_str[]; > + extern size_t cl_internal_copy_image_2d_array_to_2d_array_str_size; > > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, > + cl_internal_copy_image_2d_array_to_2d_array_str, > + (size_t)cl_internal_copy_image_2d_array_to_2d_array_str_size, > NULL); > } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + extern char cl_internal_copy_image_2d_array_to_2d_str[]; > + extern size_t cl_internal_copy_image_2d_array_to_2d_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D, > + cl_internal_copy_image_2d_array_to_2d_str, > + (size_t)cl_internal_copy_image_2d_array_to_2d_str_size, NULL); > } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + extern char cl_internal_copy_image_2d_array_to_3d_str[]; > + extern size_t cl_internal_copy_image_2d_array_to_3d_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D, > + cl_internal_copy_image_2d_array_to_3d_str, > + (size_t)cl_internal_copy_image_2d_array_to_3d_str_size, NULL); > } > } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { > if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { > @@ -1646,8 +1662,11 @@ cl_mem_kernel_copy_image(cl_command_queue queue, > struct _cl_mem_image* src_image > ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, > cl_internal_copy_image_3d_to_3d_str, > (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); > } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { > - cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, > src_image); > - return CL_SUCCESS; > + extern char cl_internal_copy_image_3d_to_2d_array_str[]; > + extern size_t cl_internal_copy_image_3d_to_2d_array_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, > + cl_internal_copy_image_3d_to_2d_array_str, > (size_t)cl_internal_copy_image_3d_to_2d_array_str_size, NULL); > } > } > > diff --git a/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl > b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl > new file mode 100644 > index 0000000..0c7c6e2 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl > @@ -0,0 +1,21 @@ > +kernel void __cl_copy_image_1d_array_to_1d_array(__read_only image1d_array_t > src_image, __write_only image1d_array_t dst_image, > + unsigned int region0, unsigned int region1, > unsigned int region2, > + unsigned int src_origin0, unsigned int > src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned int > dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int k = get_global_id(2); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int2 src_coord; > + int2 dst_coord; > + if((i >= region0) || (k>=region2)) > + return; > + > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin2 + k; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin2 + k; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl > b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl > new file mode 100644 > index 0000000..89e36c0 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d.cl > @@ -0,0 +1,21 @@ > +kernel void __cl_copy_image_2d_array_to_2d(__read_only image2d_array_t > src_image, __write_only image2d_t dst_image, > + unsigned int region0, unsigned int region1, > unsigned int region2, > + unsigned int src_origin0, unsigned int > src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned int > dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int j = get_global_id(1); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int4 src_coord; > + int2 dst_coord; > + if((i >= region0) || (j>= region1)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl > b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl > new file mode 100644 > index 0000000..3653660 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl > @@ -0,0 +1,23 @@ > +kernel void __cl_copy_image_2d_array_to_2d_array(__read_only image2d_array_t > src_image, __write_only image2d_array_t dst_image, > + unsigned int region0, unsigned int region1, > unsigned int region2, > + unsigned int src_origin0, unsigned int > src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned int > dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int4 src_coord; > + int4 dst_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2 + k; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2 + k; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > diff --git a/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl > b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl > new file mode 100644 > index 0000000..424f6b5 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_2d_array_to_3d.cl > @@ -0,0 +1,23 @@ > +kernel void __cl_copy_image_2d_array_to_3d(__read_only image2d_array_t > src_image, __write_only image3d_t dst_image, > + unsigned int region0, unsigned int region1, > unsigned int region2, > + unsigned int src_origin0, unsigned int > src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned int > dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int4 src_coord; > + int4 dst_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2 + k; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2 + k; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > diff --git a/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl > b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl > new file mode 100644 > index 0000000..4384f01 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_2d_to_2d_array.cl > @@ -0,0 +1,21 @@ > +kernel void __cl_copy_image_2d_to_2d_array(__read_only image2d_t src_image, > __write_only image2d_array_t dst_image, > + unsigned int region0, unsigned int > region1, unsigned int region2, > + unsigned int src_origin0, unsigned > int src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned > int dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int j = get_global_id(1); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int2 src_coord; > + int4 dst_coord; > + if((i >= region0) || (j>= region1)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > diff --git a/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl > b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl > new file mode 100644 > index 0000000..8041a32 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_3d_to_2d_array.cl > @@ -0,0 +1,23 @@ > +kernel void __cl_copy_image_3d_to_2d_array(__read_only image3d_t src_image, > __write_only image2d_array_t dst_image, > + unsigned int region0, unsigned int region1, > unsigned int region2, > + unsigned int src_origin0, unsigned int > src_origin1, unsigned int src_origin2, > + unsigned int dst_origin0, unsigned int > dst_origin1, unsigned int dst_origin2) > +{ > + int i = get_global_id(0); > + int j = get_global_id(1); > + int k = get_global_id(2); > + int4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | > CLK_FILTER_NEAREST; > + int4 src_coord; > + int4 dst_coord; > + if((i >= region0) || (j>= region1) || (k>=region2)) > + return; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + src_coord.z = src_origin2 + k; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + dst_coord.z = dst_origin2 + k; > + color = read_imagei(src_image, sampler, src_coord); > + write_imagei(dst_image, dst_coord, color); > +} > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
