The patchset LGTM, pushed, thanks. BYW: should also support align2 later.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > [email protected] > Sent: Thursday, May 25, 2017 15:10 > To: [email protected] > Cc: Yan Wang <[email protected]> > Subject: [Beignet] [PATCH v2 2/2] Fix bug of clEnqueueCopyBufferToImage > and clEnqueueCopyImageToBuffer. > > From: Yan Wang <[email protected]> > > "imagedim_non_pow_2" cases of basic modudle of confrmance shows > regression after use TILE_Y mode for large image by previous patch. > This bug comes from the non-align16 kernel of > clEnqueueCopyBufferToImage and clEnqueueCopyImageToBuffer. > It will force CL_RGBA/CL_UNORM_INT8/8191x8192 image of conformance > test to CL_R/CL_UNSIGNED_INT8/32764x8192 image for copying. > So it makes width as 8191 x 4 = 32764 and its width will exceed the maximum > width (16 x 1024 = 16384) of GEN surface state structure which only has 14 > bits. > So use align4 copy kernel to avoid this bug. > > Signed-off-by: Yan Wang <[email protected]> > --- > src/CMakeLists.txt | 1 + > src/cl_context.h | 2 + > src/cl_mem.c | 78 > ++++++++++++++-------- > .../cl_internal_copy_buffer_to_image_2d_align4.cl | 18 > +++++ .../cl_internal_copy_image_2d_to_buffer_align4.cl | 18 +++++ > 5 files changed, 89 insertions(+), 28 deletions(-) create mode 100644 > src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl > create mode 100644 > src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 77a1c87..6433566 > 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -53,6 +53,7 @@ cl_internal_copy_image_2d_array_to_2d_array > cl_internal_copy_image_2d_array_to_2 > cl_internal_copy_image_2d_array_to_3d > cl_internal_copy_image_3d_to_2d_array > cl_internal_copy_image_2d_to_buffer > cl_internal_copy_image_2d_to_buffer_align16 > cl_internal_copy_image_3d_to_buffer > cl_internal_copy_buffer_to_image_2d > cl_internal_copy_buffer_to_image_2d_align16 > cl_internal_copy_buffer_to_image_3d > +cl_internal_copy_buffer_to_image_2d_align4 > +cl_internal_copy_image_2d_to_buffer_align4 > cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 > cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign > cl_internal_fill_buf_align128 cl_internal_fill_image_1d diff --git > a/src/cl_context.h b/src/cl_context.h index 8ba499f..75bf895 100644 > --- a/src/cl_context.h > +++ b/src/cl_context.h > @@ -62,9 +62,11 @@ enum _cl_internal_ker_type { > CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY, //copy image 3d to > image 2d array > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16, > + CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4, > CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16, > + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d > CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, > pattern size=1 > CL_ENQUEUE_FILL_BUFFER_ALIGN2, //fill buffer with 2 aligne pattern, > pattern size=2 > diff --git a/src/cl_mem.c b/src/cl_mem.c index 0c49c3d..a8543c9 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -2146,6 +2146,36 @@ fail: > return ret; > } > > +#define ALIGN16 16 > +#define ALIGN4 4 > +#define ALIGN1 1 > + > +static size_t > +get_align_size_for_copy_kernel(struct _cl_mem_image* image, const > size_t origin0, const size_t region0, > + const size_t offset, cl_image_format *fmt) > +{ > + size_t align_size = 0; > + > + if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * > image->bpp) % ALIGN16 == 0) && > + ((origin0 * image->bpp) % ALIGN16 == 0) && (region0 % ALIGN16 == 0) > && (offset % ALIGN16 == 0)){ > + fmt->image_channel_order = CL_RGBA; > + fmt->image_channel_data_type = CL_UNSIGNED_INT32; > + align_size = ALIGN16; > + } > + else if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image- > >w * image->bpp) % ALIGN4 == 0) && > + ((origin0 * image->bpp) % ALIGN4 == 0) && (region0 % ALIGN4 == 0) && > (offset % ALIGN4 == 0)){ > + fmt->image_channel_order = CL_R; > + fmt->image_channel_data_type = CL_UNSIGNED_INT32; > + align_size = ALIGN4; > + } > + else{ > + fmt->image_channel_order = CL_R; > + fmt->image_channel_data_type = CL_UNSIGNED_INT8; > + align_size = ALIGN1; > + } > + > + return align_size; > +} > + > LOCAL cl_int > cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event > event, struct _cl_mem_image* image, cl_mem buffer, > const size_t *src_origin, const size_t dst_offset, > const size_t > *region) { @@ -2158,7 +2188,6 @@ > cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event > event, struct _cl_m > cl_image_format fmt; > size_t origin0, region0; > size_t kn_dst_offset; > - int align16 = 0; > size_t align_size = 1; > size_t w_saved; > > @@ -2176,18 +2205,7 @@ > cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event > event, struct _cl_m > w_saved = image->w; > region0 = region[0] * bpp; > kn_dst_offset = dst_offset; > - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * > image->bpp) % 16 == 0) && > - ((src_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && > (dst_offset % > 16 == 0)){ > - fmt.image_channel_order = CL_RGBA; > - fmt.image_channel_data_type = CL_UNSIGNED_INT32; > - align16 = 1; > - align_size = 16; > - } > - else{ > - fmt.image_channel_order = CL_R; > - fmt.image_channel_data_type = CL_UNSIGNED_INT8; > - align_size = 1; > - } > + align_size = get_align_size_for_copy_kernel(image, src_origin[0], > + region0, dst_offset, &fmt); > image->intel_fmt = cl_image_get_intel_format(&fmt); > image->w = (image->w * image->bpp) / align_size; > image->bpp = align_size; > @@ -2198,7 +2216,7 @@ > cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event > event, struct _cl_m > > /* setup the kernel and run. */ > if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { > - if(align16){ > + if(align_size == ALIGN16){ > extern char cl_internal_copy_image_2d_to_buffer_align16_str[]; > extern size_t cl_internal_copy_image_2d_to_buffer_align16_str_size; > > @@ -2206,6 +2224,14 @@ > cl_mem_copy_image_to_buffer(cl_command_queue queue, cl_event > event, struct _cl_m > cl_internal_copy_image_2d_to_buffer_align16_str, > (size_t)cl_internal_copy_image_2d_to_buffer_align16_str_size, > NULL); > } > + else if(align_size == ALIGN4){ > + extern char cl_internal_copy_image_2d_to_buffer_align4_str[]; > + extern size_t > + cl_internal_copy_image_2d_to_buffer_align4_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN4, > + cl_internal_copy_image_2d_to_buffer_align4_str, > + (size_t)cl_internal_copy_image_2d_to_buffer_align4_str_size, > NULL); > + } > else{ > extern char cl_internal_copy_image_2d_to_buffer_str[]; > extern size_t cl_internal_copy_image_2d_to_buffer_str_size; > @@ -2262,7 +2288,6 @@ > cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event > event, cl_mem buffe > cl_image_format fmt; > size_t origin0, region0; > size_t kn_src_offset; > - int align16 = 0; > size_t align_size = 1; > size_t w_saved = 0; > > @@ -2280,18 +2305,7 @@ > cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event > event, cl_mem buffe > w_saved = image->w; > region0 = region[0] * bpp; > kn_src_offset = src_offset; > - if((image->image_type == CL_MEM_OBJECT_IMAGE2D) && ((image->w * > image->bpp) % 16 == 0) && > - ((dst_origin[0] * bpp) % 16 == 0) && (region0 % 16 == 0) && > (src_offset % > 16 == 0)){ > - fmt.image_channel_order = CL_RGBA; > - fmt.image_channel_data_type = CL_UNSIGNED_INT32; > - align16 = 1; > - align_size = 16; > - } > - else{ > - fmt.image_channel_order = CL_R; > - fmt.image_channel_data_type = CL_UNSIGNED_INT8; > - align_size = 1; > - } > + align_size = get_align_size_for_copy_kernel(image, dst_origin[0], > + region0, src_offset, &fmt); > image->intel_fmt = cl_image_get_intel_format(&fmt); > image->w = (image->w * image->bpp) / align_size; > image->bpp = align_size; > @@ -2302,7 +2316,7 @@ > cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event > event, cl_mem buffe > > /* setup the kernel and run. */ > if(image->image_type == CL_MEM_OBJECT_IMAGE2D) { > - if(align16){ > + if(align_size == ALIGN16){ > extern char cl_internal_copy_buffer_to_image_2d_align16_str[]; > extern size_t cl_internal_copy_buffer_to_image_2d_align16_str_size; > > @@ -2310,6 +2324,14 @@ > cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_event > event, cl_mem buffe > cl_internal_copy_buffer_to_image_2d_align16_str, > (size_t)cl_internal_copy_buffer_to_image_2d_align16_str_size, > NULL); > } > + else if(align_size == ALIGN4){ > + extern char cl_internal_copy_buffer_to_image_2d_align4_str[]; > + extern size_t > + cl_internal_copy_buffer_to_image_2d_align4_str_size; > + > + ker = cl_context_get_static_kernel_from_bin(queue->ctx, > CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN4, > + cl_internal_copy_buffer_to_image_2d_align4_str, > + (size_t)cl_internal_copy_buffer_to_image_2d_align4_str_size, > NULL); > + } > else{ > extern char cl_internal_copy_buffer_to_image_2d_str[]; > extern size_t cl_internal_copy_buffer_to_image_2d_str_size; > diff --git a/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl > b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl > new file mode 100644 > index 0000000..79a3d8c > --- /dev/null > +++ b/src/kernels/cl_internal_copy_buffer_to_image_2d_align4.cl > @@ -0,0 +1,18 @@ > +kernel void __cl_copy_buffer_to_image_2d_align4(__write_only > image2d_t image, global uint* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int dst_origin0, unsigned > int dst_origin1, > unsigned int dst_origin2, > + unsigned int src_offset) { > + int i = get_global_id(0); > + int j = get_global_id(1); > + uint4 color = (uint4)(0); > + int2 dst_coord; > + if((i >= region0) || (j>= region1)) > + return; > + dst_coord.x = dst_origin0 + i; > + dst_coord.y = dst_origin1 + j; > + src_offset += j * region0 + i; > + color.x = buffer[src_offset]; > + write_imageui(image, dst_coord, color.x); } > + > diff --git a/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl > b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl > new file mode 100644 > index 0000000..dc76e02 > --- /dev/null > +++ b/src/kernels/cl_internal_copy_image_2d_to_buffer_align4.cl > @@ -0,0 +1,18 @@ > +kernel void __cl_copy_image_2d_to_buffer_align4( __read_only > image2d_t image, global uint* buffer, > + unsigned int region0, unsigned int > region1, unsigned int > region2, > + unsigned int src_origin0, unsigned > int src_origin1, > unsigned int src_origin2, > + unsigned int dst_offset) { > + > + int i = get_global_id(0); > + int j = get_global_id(1); > + if((i >= region0) || (j>= region1)) > + return; > + uint4 color; > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | > +CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; > + int2 src_coord; > + src_coord.x = src_origin0 + i; > + src_coord.y = src_origin1 + j; > + color = read_imageui(image, sampler, src_coord); > + *(buffer + dst_offset + region0*j + i) = color.x; } > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
