Also refine the whole memcpy's condition in function cl_enqueue_read_buffer_rect and cl_enqueue_write_buffer_rect.
V2: Add a mem_list to enqueue_data to fix utest error. Signed-off-by: Yang Rong <[email protected]> --- src/cl_api.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++------ src/cl_enqueue.c | 39 +++++++++++++++++++++++++++----- src/cl_enqueue.h | 6 +++-- src/cl_gt_device.h | 2 +- 4 files changed, 97 insertions(+), 15 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index aeca782..64e11d6 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1861,7 +1861,7 @@ error: cl_int clEnqueueCopyImageToBuffer(cl_command_queue command_queue, - cl_mem src_image, + cl_mem src_mem, cl_mem dst_buffer, const size_t * src_origin, const size_t * region, @@ -2005,7 +2005,6 @@ clEnqueueMapBuffer(cl_command_queue command_queue, data->mem_obj = buffer; data->offset = offset; data->size = size; - data->map_flags = map_flags; data->ptr = ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, @@ -2094,7 +2093,6 @@ clEnqueueMapImage(cl_command_queue command_queue, data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2]; data->row_pitch = *image_row_pitch; data->slice_pitch = *image_slice_pitch; - data->map_flags = map_flags; data->ptr = ptr; data->offset = offset; @@ -2253,8 +2251,11 @@ clEnqueueTask(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - NOT_IMPLEMENTED; - return 0; + const size_t global_size[3] = {1, 0, 0}; + const size_t local_size[3] = {1, 0, 0}; + + return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size, + num_events_in_wait_list, event_wait_list, event); } cl_int @@ -2269,8 +2270,58 @@ clEnqueueNativeKernel(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - NOT_IMPLEMENTED; - return 0; + cl_int err = CL_SUCCESS; + void *new_args = NULL; + enqueue_data *data, no_wait_data = { 0 }; + cl_int i; + + if(user_func == NULL || + (args == NULL && cb_args > 0) || + (args == NULL && num_mem_objects ==0) || + (args != NULL && cb_args == 0) || + (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) || + (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) { + err = CL_INVALID_VALUE; + goto error; + } + + //Per spec, need copy args + if (cb_args) + { + new_args = malloc(cb_args); + if (!new_args) + { + err = CL_OUT_OF_HOST_MEMORY; + goto error; + } + memcpy(new_args, args, cb_args); + + for (i=0; i<num_mem_objects; ++i) + { + CHECK_MEM(mem_list[i]); + args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change to new args + } + } + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx); + + data = &no_wait_data; + data->type = EnqueueNativeKernel; + data->mem_list = mem_list; + data->ptr = new_args; + data->size = cb_args; + data->offset = (size_t)num_mem_objects; + data->const_ptr = args_mem_loc; + data->user_func = user_func; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) { + err = cl_enqueue_handle(data); + if(event) cl_event_set_status(*event, CL_COMPLETE); + } + +error: + return err; } cl_int diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 989b044..660e1d8 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -66,8 +66,8 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data) offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; dst_ptr = (char *)data->ptr + offset; - if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch && - (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch))) + if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && + (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } @@ -131,8 +131,8 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data) offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2]; src_ptr = (char*)data->const_ptr + offset; - if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch && - (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch))) + if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch && + (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch))) { memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } @@ -351,6 +351,32 @@ error: return err; } +cl_int cl_enqueue_native_kernel(enqueue_data *data) +{ + cl_int err = CL_SUCCESS; + cl_uint num_mem_objects = (cl_uint)data->offset; + const cl_mem *mem_list = data->mem_list; + const void **args_mem_loc = (const void **)data->const_ptr; + cl_uint i; + + for (i=0; i<num_mem_objects; ++i) + { + const cl_mem buffer = mem_list[i]; + CHECK_MEM(buffer); + + *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer); + } + data->user_func(data->ptr); + + for (i=0; i<num_mem_objects; ++i) + { + cl_mem_unmap_auto(mem_list[i]); + } + + free(data->ptr); +error: + return err; +} cl_int cl_enqueue_handle(enqueue_data* data) { switch(data->type) { @@ -375,7 +401,10 @@ cl_int cl_enqueue_handle(enqueue_data* data) case EnqueueCopyBufferRect: case EnqueueCopyImage: case EnqueueNDRangeKernel: - cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); //goto default + cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); + return CL_SUCCESS; + case EnqueueNativeKernel: + return cl_enqueue_native_kernel(data); default: return CL_SUCCESS; } diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h index 848c7c4..f90f921 100644 --- a/src/cl_enqueue.h +++ b/src/cl_enqueue.h @@ -40,6 +40,7 @@ typedef enum { EnqueueMapImage, EnqueueUnmapMemObject, EnqueueNDRangeKernel, + EnqueueNativeKernel, EnqueueInvalid } enqueue_type; @@ -56,9 +57,10 @@ typedef struct _enqueue_data { size_t slice_pitch; /* Slice pitch */ size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */ size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */ - cl_map_flags map_flags; /* Map flags */ const void * const_ptr; /* Const ptr for memory read */ - void * ptr; /* ptr for write and return value */ + void * ptr; /* Ptr for write and return value */ + const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */ + void (*user_func)(void *); /* pointer to a host-callable user function */ } enqueue_data; /* Do real enqueue commands */ diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index feb4ab3..1eb790f 100644 --- a/src/cl_gt_device.h +++ b/src/cl_gt_device.h @@ -59,7 +59,7 @@ .endian_little = CL_TRUE, .available = CL_TRUE, .compiler_available = CL_FALSE, /* XXX */ -.execution_capabilities = CL_EXEC_KERNEL, +.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL, .queue_properties = CL_QUEUE_PROFILING_ENABLE, .platform = NULL, /* == intel_platform (set when requested) */ /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */ -- 1.8.1.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
