There is a unsync map function drm_intel_gem_bo_map_unsynchronized in drm, that can be used to do non-blocking map. But this function only map gtt, so force to use map gtt for all clEnqueueMapBuffer and clEnqueueMapImage.
Signed-off-by: Yang Rong <[email protected]> --- src/cl_api.c | 81 +++++++++++++++++++++++++++++++++++++++++++++-- src/cl_driver.h | 4 +++ src/cl_driver_defs.c | 1 + src/cl_enqueue.c | 82 +++++++----------------------------------------- src/cl_mem.c | 14 +++++++-- src/cl_mem.h | 5 ++- src/intel/intel_driver.c | 1 + 7 files changed, 113 insertions(+), 75 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 4f048ee..d4fdb7f 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; + void *ptr = NULL; + void *mem_ptr = NULL; + cl_int slot = -1; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); @@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue command_queue, goto error; } + if (!(ptr = cl_mem_map_auto(buffer, CL_FALSE))) { + err = CL_MAP_FAILURE; + goto error; + } + + ptr = (char*)ptr + offset; + + if(buffer->flags & CL_MEM_USE_HOST_PTR) { + assert(buffer->host_ptr); + //memcpy(buffer->host_ptr + offset, ptr, size); + mem_ptr = buffer->host_ptr + offset; + } else { + mem_ptr = ptr; + } + + /* Record the mapped address. */ + if (!buffer->mapped_ptr_sz) { + buffer->mapped_ptr_sz = 16; + buffer->mapped_ptr = (cl_mapped_ptr *)malloc( + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz); + if (!buffer->mapped_ptr) { + cl_mem_unmap_auto (buffer); + err = CL_OUT_OF_HOST_MEMORY; + ptr = NULL; + goto error; + } + + memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + slot = 0; + } else { + int i = 0; + for (; i < buffer->mapped_ptr_sz; i++) { + if (buffer->mapped_ptr[i].ptr == NULL) { + slot = i; + break; + } + } + + if (i == buffer->mapped_ptr_sz) { + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2); + if (!new_ptr) { + cl_mem_unmap_auto (buffer); + err = CL_OUT_OF_HOST_MEMORY; + ptr = NULL; + goto error; + } + memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + memcpy(new_ptr, buffer->mapped_ptr, + buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + slot = buffer->mapped_ptr_sz; + buffer->mapped_ptr_sz *= 2; + free(buffer->mapped_ptr); + buffer->mapped_ptr = new_ptr; + } + } + + assert(slot != -1); + buffer->mapped_ptr[slot].ptr = mem_ptr; + buffer->mapped_ptr[slot].v_ptr = ptr; + buffer->mapped_ptr[slot].size = size; + buffer->map_ref++; + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx); data = &no_wait_data; @@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, data->offset = offset; data->size = size; data->map_flags = map_flags; + data->ptr = ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { @@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, error: if (errcode_ret) *errcode_ret = err; - return data->ptr; + return mem_ptr; } void * @@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue command_queue, cl_int * errcode_ret) { cl_int err = CL_SUCCESS; + void *ptr = NULL; enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); @@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue command_queue, goto error; } + if (!(ptr = cl_mem_map_auto(image, CL_FALSE))) { + err = CL_MAP_FAILURE; + goto error; + } + + size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; + ptr = (char*)ptr + offset; + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx); data = &no_wait_data; @@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue command_queue, data->row_pitch = *image_row_pitch; data->slice_pitch = *image_slice_pitch; data->map_flags = map_flags; + data->ptr = ptr; if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) { @@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue command_queue, error: if (errcode_ret) *errcode_ret = err; - return data->ptr; //TODO: map and unmap first + return ptr; //TODO: map and unmap first } cl_int diff --git a/src/cl_driver.h b/src/cl_driver.h index 1a0ec38..0ce03fe 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap; typedef int (cl_buffer_map_gtt_cb)(cl_buffer); extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt; +/* Map a buffer in the GTT domain, non waiting the GPU read or write*/ +typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer); +extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync; + /* Unmap a buffer in the GTT domain */ typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer); extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt; diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index e7412de..7c4c866 100644 --- a/src/cl_driver_defs.c +++ b/src/cl_driver_defs.c @@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL; LOCAL cl_buffer_map_cb *cl_buffer_map = NULL; LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL; LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL; +LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL; LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL; LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL; LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL; diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index a112cc4..a1c2be9 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -32,7 +32,7 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data) cl_int err = CL_SUCCESS; void* src_ptr; - if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { + if (!(src_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) { err = CL_MAP_FAILURE; goto error; } @@ -50,7 +50,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data) cl_int err = CL_SUCCESS; void* dst_ptr; - if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { + if (!(dst_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) { err = CL_MAP_FAILURE; goto error; } @@ -72,7 +72,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data) const size_t* origin = data->origin; const size_t* region = data->region; - if (!(src_ptr = cl_mem_map_auto(image))) { + if (!(src_ptr = cl_mem_map_auto(image, CL_TRUE))) { err = CL_MAP_FAILURE; goto error; } @@ -116,7 +116,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data) const size_t *origin = data->origin; const size_t *region = data->region; - if (!(dst_ptr = cl_mem_map_auto(image))) { + if (!(dst_ptr = cl_mem_map_auto(image, CL_TRUE))) { err = CL_MAP_FAILURE; goto error; } @@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) void *ptr = NULL; cl_int err = CL_SUCCESS; - void *mem_ptr = NULL; - cl_int slot = -1; cl_mem buffer = data->mem_obj; - - if (!(ptr = cl_mem_map_auto(buffer))) { + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here + if (!(ptr = cl_mem_map_gtt(buffer))) { err = CL_MAP_FAILURE; + goto error; } - ptr = (char*)ptr + data->offset; + assert(data->ptr == ptr); if(buffer->flags & CL_MEM_USE_HOST_PTR) { assert(buffer->host_ptr); memcpy(buffer->host_ptr + data->offset, ptr, data->size); - mem_ptr = buffer->host_ptr + data->offset; - } else { - mem_ptr = ptr; - } - - /* Record the mapped address. */ - if (!buffer->mapped_ptr_sz) { - buffer->mapped_ptr_sz = 16; - buffer->mapped_ptr = (cl_mapped_ptr *)malloc( - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz); - if (!buffer->mapped_ptr) { - cl_mem_unmap_auto (buffer); - err = CL_OUT_OF_HOST_MEMORY; - ptr = NULL; - goto error; - } - - memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); - slot = 0; - } else { - int i = 0; - for (; i < buffer->mapped_ptr_sz; i++) { - if (buffer->mapped_ptr[i].ptr == NULL) { - slot = i; - break; - } - } - - if (i == buffer->mapped_ptr_sz) { - cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2); - if (!new_ptr) { - cl_mem_unmap_auto (buffer); - err = CL_OUT_OF_HOST_MEMORY; - ptr = NULL; - goto error; - } - memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); - memcpy(new_ptr, buffer->mapped_ptr, - buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); - slot = buffer->mapped_ptr_sz; - buffer->mapped_ptr_sz *= 2; - free(buffer->mapped_ptr); - buffer->mapped_ptr = new_ptr; - } } - assert(slot != -1); - buffer->mapped_ptr[slot].ptr = mem_ptr; - buffer->mapped_ptr[slot].v_ptr = ptr; - buffer->mapped_ptr[slot].size = data->size; - buffer->map_ref++; - - data->ptr = mem_ptr; - error: return err; } cl_int cl_enqueue_map_image(enqueue_data *data) { - void *ptr = NULL; cl_int err = CL_SUCCESS; - cl_mem image = data->mem_obj; - const size_t *origin = data->origin; - - if (!(ptr = cl_mem_map_auto(image))) { + void *ptr = NULL; + //because using unsync map in clEnqueueMapImage, so force use map_gtt here + if (!(ptr = cl_mem_map_gtt(image))) { err = CL_MAP_FAILURE; goto error; } - - size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; - data->ptr = (char*)ptr + offset; + assert(data->ptr == (char*)ptr + data->offset); error: return err; diff --git a/src/cl_mem.c b/src/cl_mem.c index f794ce7..5ce25e4 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -291,7 +291,7 @@ cl_mem_copy_image(cl_mem image, size_t slice_pitch, void* host_ptr) { - char* dst_ptr = cl_mem_map_auto(image); + char* dst_ptr = cl_mem_map_auto(image, CL_TRUE); if (row_pitch == image->row_pitch && (image->depth == 1 || slice_pitch == image->slice_pitch)) @@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem) return cl_buffer_get_virtual(mem->bo); } +LOCAL void * +cl_mem_map_gtt_unsync(cl_mem mem) +{ + cl_buffer_map_gtt_unsync(mem->bo); + assert(cl_buffer_get_virtual(mem->bo)); + return cl_buffer_get_virtual(mem->bo); +} + LOCAL cl_int cl_mem_unmap_gtt(cl_mem mem) { @@ -560,8 +568,10 @@ cl_mem_unmap_gtt(cl_mem mem) } LOCAL void* -cl_mem_map_auto(cl_mem mem) +cl_mem_map_auto(cl_mem mem, cl_bool sync) { + if(sync == CL_FALSE) + return cl_mem_map_gtt_unsync(mem); //drm only support map gtt unsync map if (mem->is_image && mem->tiling != CL_NO_TILE) return cl_mem_map_gtt(mem); else diff --git a/src/cl_mem.h b/src/cl_mem.h index 1b1709a..1826306 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -123,11 +123,14 @@ extern cl_int cl_mem_unmap(cl_mem); /* Directly map a memory object in GTT mode */ extern void *cl_mem_map_gtt(cl_mem); +/* Directly map a memory object in GTT mode, with out waiting gpu idle */ +extern void *cl_mem_map_gtt_unsync(cl_mem); + /* Unmap a memory object in GTT mode */ extern cl_int cl_mem_unmap_gtt(cl_mem); /* Directly map a memory object - tiled images are mapped in GTT mode */ -extern void *cl_mem_map_auto(cl_mem); +extern void *cl_mem_map_auto(cl_mem, cl_bool); /* Unmap a memory object - tiled images are unmapped in GTT mode */ extern cl_int cl_mem_unmap_auto(cl_mem); diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index 6c6b9fb..9959447 100644 --- a/src/intel/intel_driver.c +++ b/src/intel/intel_driver.c @@ -519,6 +519,7 @@ intel_setup_callbacks(void) cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap; cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt; cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt; + cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized; cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual; cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size; cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin; -- 1.8.1.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
