Just found one typo, should set color.x to buffer rather than color. I will fix it and no need to send new version.
will push it latter. Thanks. On Mon, Sep 09, 2013 at 04:10:22PM +0800, Yang Rong wrote: > Also fix the function cl_mem_kernel_copy_image 3D image error. > > Signed-off-by: Yang Rong <[email protected]> > --- > src/cl_api.c | 45 +++++++++++++++++++++++-- > src/cl_context.h | 8 +++-- > src/cl_enqueue.c | 1 + > src/cl_mem.c | 100 > ++++++++++++++++++++++++++++++++++++++++++++++++++++--- > src/cl_mem.h | 6 ++++ > 5 files changed, 150 insertions(+), 10 deletions(-) > > diff --git a/src/cl_api.c b/src/cl_api.c > index f014b41..ecc2f43 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -1869,8 +1869,49 @@ clEnqueueCopyImageToBuffer(cl_command_queue > command_queue, > const cl_event * event_wait_list, > cl_event * event) > { > - NOT_IMPLEMENTED; > - return 0; > + cl_int err = CL_SUCCESS; > + enqueue_data *data, no_wait_data = { 0 }; > + > + CHECK_QUEUE(command_queue); > + CHECK_IMAGE(src_mem, src_image); > + CHECK_MEM(dst_buffer); > + if (command_queue->ctx != src_mem->ctx || > + command_queue->ctx != dst_buffer->ctx) { > + err = CL_INVALID_CONTEXT; > + goto error; > + } > + > + if (dst_offset + region[0]*region[1]*region[2]*src_image->bpp > > dst_buffer->size) { > + err = CL_INVALID_VALUE; > + goto error; > + } > + > + if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || > + src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > > src_image->depth) { > + err = CL_INVALID_VALUE; > + goto error; > + } > + > + if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 > || region[2] != 1)) { > + err = CL_INVALID_VALUE; > + goto error; > + } > + > + cl_mem_copy_image_to_buffer(command_queue, src_image, dst_buffer, > src_origin, dst_offset, region); > + > + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, src_mem->ctx); > + > + data = &no_wait_data; > + data->type = EnqueueCopyImageToBuffer; > + data->queue = command_queue; > + > + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, > + event, data, CL_COMMAND_COPY_IMAGE_TO_BUFFER) == > CL_ENQUEUE_EXECUTE_IMM) { > + err = cl_command_queue_flush(command_queue); > + } > + > +error: > + return err; > } > > cl_int > diff --git a/src/cl_context.h b/src/cl_context.h > index 0342ef4..7016733 100644 > --- a/src/cl_context.h > +++ b/src/cl_context.h > @@ -46,9 +46,11 @@ enum _cl_internal_ker_type { > CL_ENQUEUE_COPY_IMAGE_1 = 3, //copy image 3d to image 2d > CL_ENQUEUE_COPY_IMAGE_2 = 4, //copy image 2d to image 3d > CL_ENQUEUE_COPY_IMAGE_3 = 5, //copy image 3d to image 3d > - CL_ENQUEUE_COPY_IMAGE_TO_BUFFER = 6, > - CL_ENQUEUE_COPY_BUFFER_TO_IMAGE = 7, > - CL_INTERNAL_KERNEL_MAX = 8 > + CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0 = 6, //copy image 2d to buffer > + CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_1 = 7, //copy image 3d tobuffer > + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_0 = 8, //copy buffer to image 2d > + CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_1 = 9, //copy buffer to image 3d > + CL_INTERNAL_KERNEL_MAX = 10 > }; > > struct _cl_context_prop { > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index 3c069fe..ef1a33a 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -400,6 +400,7 @@ cl_int cl_enqueue_handle(enqueue_data* data) > return cl_enqueue_unmap_mem_object(data); > case EnqueueCopyBufferRect: > case EnqueueCopyImage: > + case EnqueueCopyBufferToImage: > case EnqueueNDRangeKernel: > cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); > return CL_SUCCESS; > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 203f47e..7290370 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -629,7 +629,6 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem > src_buf, cl_mem dst_buf, > cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_slice_pitch); > > ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, > local_sz); > - cl_command_queue_finish(queue); > > return ret; > } > @@ -663,14 +662,14 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct > _cl_mem_image* src_image > static const char *str_kernel = > "#ifdef SRC_IMAGE_3D \n" > " #define SRC_IMAGE_TYPE image3d_t \n" > - " #define SRC_COORD_TYPE int3 \n" > + " #define SRC_COORD_TYPE int4 \n" > "#else \n" > " #define SRC_IMAGE_TYPE image2d_t \n" > " #define SRC_COORD_TYPE int2 \n" > "#endif \n" > "#ifdef DST_IMAGE_3D \n" > " #define DST_IMAGE_TYPE image3d_t \n" > - " #define DST_COORD_TYPE int3 \n" > + " #define DST_COORD_TYPE int4 \n" > "#else \n" > " #define DST_IMAGE_TYPE image2d_t \n" > " #define DST_COORD_TYPE int2 \n" > @@ -703,7 +702,6 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct > _cl_mem_image* src_image > " write_imagei(dst_image, src_coord, color); \n" > "}"; > > - > /* We use one kernel to copy the data. The kernel is lazily created. */ > assert(src_image->base.ctx == dst_image->base.ctx); > > @@ -725,11 +723,103 @@ cl_mem_kernel_copy_image(cl_command_queue queue, > struct _cl_mem_image* src_image > cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]); > > ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, > local_sz); > - cl_command_queue_finish(queue); > > return ret; > } > > +LOCAL cl_int > +cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* > image, cl_mem buffer, > + const size_t *src_origin, const size_t dst_offset, > const size_t *region) { > + cl_int ret; > + cl_kernel ker; > + size_t global_off[] = {0,0,0}; > + size_t global_sz[] = {1,1,1}; > + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; > + cl_int index = CL_ENQUEUE_COPY_IMAGE_TO_BUFFER_0; > + char option[40] = ""; > + uint32_t intel_fmt, bpp; > + cl_image_format fmt; > + size_t origin0, region0; > + > + if(region[1] == 1) local_sz[1] = 1; > + if(region[2] == 1) local_sz[2] = 1; > + global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; > + global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; > + global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; > + > + if(image->image_type == CL_MEM_OBJECT_IMAGE3D) { > + strcat(option, "-D IMAGE_3D"); > + index += 1; > + } > + > + static const char *str_kernel = > + "#ifdef IMAGE_3D \n" > + " #define IMAGE_TYPE image3d_t \n" > + " #define COORD_TYPE int4 \n" > + "#else \n" > + " #define IMAGE_TYPE image2d_t \n" > + " #define COORD_TYPE int2 \n" > + "#endif \n" > + "kernel void __cl_copy_image_to_buffer ( \n" > + " __read_only IMAGE_TYPE image, global uchar* buffer, \n" > + " unsigned int region0, unsigned int region1, unsigned int > region2, \n" > + " unsigned int src_origin0, unsigned int src_origin1, unsigned > int src_origin2, \n" > + " unsigned int dst_offset) { \n" > + " int i = get_global_id(0); \n" > + " int j = get_global_id(1); \n" > + " int k = get_global_id(2); \n" > + " uint4 color; \n" > + " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | > CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" > + " COORD_TYPE src_coord; \n" > + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" > + " return; \n" > + " src_coord.x = src_origin0 + i; \n" > + " src_coord.y = src_origin1 + j; \n" > + "#ifdef IMAGE_3D \n" > + " src_coord.z = src_origin2 + k; \n" > + "#endif \n" > + " color = read_imageui(image, sampler, src_coord); \n" > + " dst_offset += (k * region1 + j) * region0 + i; \n" > + " buffer[dst_offset] = color; \n" > + "}"; > + > + /* We use one kernel to copy the data. The kernel is lazily created. */ > + assert(image->base.ctx == buffer->ctx); > + > + fmt.image_channel_order = CL_R; > + fmt.image_channel_data_type = CL_UNSIGNED_INT8; > + intel_fmt = image->intel_fmt; > + bpp = image->bpp; > + image->intel_fmt = cl_image_get_intel_format(&fmt); > + image->w = image->w * image->bpp; > + image->bpp = 1; > + region0 = region[0] * bpp; > + origin0 = src_origin[0] * bpp; > + global_sz[0] = ((region0 + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; > + > + /* setup the kernel and run. */ > + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); > + if (!ker) > + return CL_OUT_OF_RESOURCES; > + > + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &image); > + cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &buffer); > + cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion0); > + cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); > + cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); > + cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin0); > + cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); > + cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); > + cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_offset); > + > + ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, > local_sz); > + > + image->intel_fmt = intel_fmt; > + image->bpp = bpp; > + image->w = image->w / bpp; > + > + return ret; > +} > LOCAL void* > cl_mem_map(cl_mem mem) > { > diff --git a/src/cl_mem.h b/src/cl_mem.h > index 530fe79..0a8c723 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -194,6 +194,12 @@ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, > cl_mem, cl_mem, > /* api clEnqueueCopyImage help function */ > extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct > _cl_mem_image*, struct _cl_mem_image*, > const size_t *, const size_t *, const > size_t *); > + > + > +/* api clEnqueueCopyImage help function */ > +extern cl_int cl_mem_copy_image_to_buffer(cl_command_queue, struct > _cl_mem_image*, cl_mem, > + const size_t *, const size_t, > const size_t *); > + > /* Directly map a memory object */ > extern void *cl_mem_map(cl_mem); > > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
