Pushed with slight change. Thanks.
On Fri, Nov 07, 2014 at 04:18:54PM +0800, Guo Yejun wrote: > userptr is used to wrap a memory pointer (page aligned) supplied > by user space into a buffer object accessed by GPU, and so no extra > copy is needed. It is supported starting from linux kernel 3.16 > and libdrm 2.4.58. > > This patch is originally finished by Zhenyu Wang <[email protected]>, > I did a little change and some code clean. > > No regression issue found on IVB+Ubuntu14.10 with libdrm upgraded with tests: > beignet/utests, piglit, OpenCV/test&perf, > conformance/basic&mem_host_flags&buffers > > V2: add page align limit for data size, add comments for kernel without > MMU_NOTIFIER > V3: add runtime check with host_unified_memory, return > CL_MEM_OBJECT_ALLOCATION_FAILURE if failed > Signed-off-by: Guo Yejun <[email protected]> > --- > CMakeLists.txt | 11 +++++++++-- > src/CMakeLists.txt | 5 +++++ > src/cl_api.c | 10 +++++++--- > src/cl_driver.h | 3 +++ > src/cl_driver_defs.c | 1 + > src/cl_enqueue.c | 19 ++++++++++++------- > src/cl_mem.c | 37 ++++++++++++++++++++++++++++++++----- > src/cl_mem.h | 2 ++ > src/cl_mem_gl.c | 2 +- > src/intel/intel_driver.c | 15 +++++++++++++++ > 10 files changed, 87 insertions(+), 18 deletions(-) > > diff --git a/CMakeLists.txt b/CMakeLists.txt > index 40cb74c..15386f9 100644 > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -108,7 +108,7 @@ ENDIF(X11_FOUND) > # DRM > pkg_check_modules(DRM REQUIRED libdrm) > IF(DRM_FOUND) > - MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX}") > + MESSAGE(STATUS "Looking for DRM - found at ${DRM_PREFIX} ${DRM_VERSION}") > INCLUDE_DIRECTORIES(${DRM_INCLUDE_DIRS}) > ELSE(DRM_FOUND) > MESSAGE(STATUS "Looking for DRM - not found") > @@ -118,7 +118,14 @@ ENDIF(DRM_FOUND) > pkg_check_modules(DRM_INTEL libdrm_intel>=2.4.52) > IF(DRM_INTEL_FOUND) > INCLUDE_DIRECTORIES(${DRM_INTEL_INCLUDE_DIRS}) > - MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX}") > + MESSAGE(STATUS "Looking for DRM Intel - found at ${DRM_INTEL_PREFIX} > ${DRM_INTEL_VERSION}") > + #userptr support starts from 2.4.57, but 2.4.58 is the actual stable > release > + IF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) > + MESSAGE(STATUS "Enable userptr support") > + SET(DRM_INTEL_USERPTR "enable") > + ELSE(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) > + MESSAGE(STATUS "Disable userptr support") > + ENDIF(DRM_INTEL_VERSION VERSION_GREATER 2.4.57) > ELSE(DRM_INTEL_FOUND) > MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found") > ENDIF(DRM_INTEL_FOUND) > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt > index fc5de89..7182bad 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -109,6 +109,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}") > SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}") > endif (OCLIcd_FOUND) > > +if (DRM_INTEL_USERPTR) > +SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}") > +SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}") > +endif (DRM_INTEL_USERPTR) > + > set(GIT_SHA1 "git_sha1.h") > add_custom_target(${GIT_SHA1} ALL > COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh > diff --git a/src/cl_api.c b/src/cl_api.c > index 05d3093..1f24638 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -2665,9 +2665,13 @@ clEnqueueMapBuffer(cl_command_queue command_queue, > ptr = data->ptr; > if(event) cl_event_set_status(*event, CL_COMPLETE); > } else { > - if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { > - err = CL_MAP_FAILURE; > - goto error; > + if (buffer->is_userptr) > + ptr = buffer->host_ptr; > + else { > + if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) { > + err = CL_MAP_FAILURE; > + goto error; > + } > } > } > err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size, NULL, NULL); > diff --git a/src/cl_driver.h b/src/cl_driver.h > index 638b791..8697ff2 100644 > --- a/src/cl_driver.h > +++ b/src/cl_driver.h > @@ -285,6 +285,9 @@ extern cl_gpgpu_walker_cb *cl_gpgpu_walker; > typedef cl_buffer (cl_buffer_alloc_cb)(cl_buffer_mgr, const char*, size_t, > size_t); > extern cl_buffer_alloc_cb *cl_buffer_alloc; > > +typedef cl_buffer (cl_buffer_alloc_userptr_cb)(cl_buffer_mgr, const char*, > void *, size_t, unsigned long); > +extern cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr; > + > /* Set a buffer's tiling mode */ > typedef cl_buffer (cl_buffer_set_tiling_cb)(cl_buffer, int tiling, size_t > stride); > extern cl_buffer_set_tiling_cb *cl_buffer_set_tiling; > diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c > index c31b6fc..1335c20 100644 > --- a/src/cl_driver_defs.c > +++ b/src/cl_driver_defs.c > @@ -29,6 +29,7 @@ LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = > NULL; > > /* Buffer */ > LOCAL cl_buffer_alloc_cb *cl_buffer_alloc = NULL; > +LOCAL cl_buffer_alloc_userptr_cb *cl_buffer_alloc_userptr = NULL; > LOCAL cl_buffer_set_tiling_cb *cl_buffer_set_tiling = NULL; > LOCAL cl_buffer_alloc_from_texture_cb *cl_buffer_alloc_from_texture = NULL; > LOCAL cl_buffer_release_from_texture_cb *cl_buffer_release_from_texture = > NULL; > diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c > index db0bce7..5bdb7cd 100644 > --- a/src/cl_enqueue.c > +++ b/src/cl_enqueue.c > @@ -234,11 +234,15 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) > mem->type == CL_MEM_SUBBUFFER_TYPE); > struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; > > - if(data->unsync_map == 1) > - //because using unsync map in clEnqueueMapBuffer, so force use map_gtt > here > - ptr = cl_mem_map_gtt(mem); > - else > - ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); > + if (mem->is_userptr) > + ptr = mem->host_ptr; > + else { > + if(data->unsync_map == 1) > + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt > here > + ptr = cl_mem_map_gtt(mem); > + else > + ptr = cl_mem_map_auto(mem, data->write_map ? 1 : 0); > + } > > if (ptr == NULL) { > err = CL_MAP_FAILURE; > @@ -246,7 +250,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) > } > data->ptr = ptr; > > - if(mem->flags & CL_MEM_USE_HOST_PTR) { > + if((mem->flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) { > assert(mem->host_ptr); > ptr = (char*)ptr + data->offset + buffer->sub_offset; > memcpy(mem->host_ptr + data->offset + buffer->sub_offset, ptr, > data->size); > @@ -331,7 +335,8 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data) > assert(mapped_ptr >= memobj->host_ptr && > mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); > /* Sync the data. */ > - memcpy(v_ptr, mapped_ptr, mapped_size); > + if (!memobj->is_userptr) > + memcpy(v_ptr, mapped_ptr, mapped_size); > } else { > CHECK_IMAGE(memobj, image); > > diff --git a/src/cl_mem.c b/src/cl_mem.c > index 16bd613..9e38670 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -33,6 +33,7 @@ > #include <assert.h> > #include <stdio.h> > #include <string.h> > +#include <unistd.h> > > #define FIELD_SIZE(CASE,TYPE) \ > case JOIN(CL_,CASE): \ > @@ -223,6 +224,7 @@ cl_mem_allocate(enum cl_mem_type type, > cl_mem_flags flags, > size_t sz, > cl_int is_tiled, > + void *host_ptr, > cl_int *errcode) > { > cl_buffer_mgr bufmgr = NULL; > @@ -251,6 +253,7 @@ cl_mem_allocate(enum cl_mem_type type, > mem->ref_n = 1; > mem->magic = CL_MAGIC_MEM_HEADER; > mem->flags = flags; > + mem->is_userptr = 0; > > if (sz != 0) { > /* Pinning will require stricter alignment rules */ > @@ -260,7 +263,28 @@ cl_mem_allocate(enum cl_mem_type type, > /* Allocate space in memory */ > bufmgr = cl_context_get_bufmgr(ctx); > assert(bufmgr); > + > +#ifdef HAS_USERPTR > + if (ctx->device->host_unified_memory) { > + /* currently only cl buf is supported, will add cl image support later > */ > + if ((flags & CL_MEM_USE_HOST_PTR) && host_ptr != NULL) { > + /* userptr not support tiling */ > + if (!is_tiled) { > + int page_size = getpagesize(); > + if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { > + mem->is_userptr = 1; > + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory > object", host_ptr, sz, 0); > + } > + } > + } > + } > + > + if (!mem->is_userptr) > + mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); > +#else > mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment); > +#endif > + > if (UNLIKELY(mem->bo == NULL)) { > err = CL_MEM_OBJECT_ALLOCATION_FAILURE; > goto error; > @@ -387,12 +411,15 @@ cl_mem_new_buffer(cl_context ctx, > sz = ALIGN(sz, 4); > > /* Create the buffer in video memory */ > - mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, &err); > + mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, flags, sz, CL_FALSE, data, > &err); > if (mem == NULL || err != CL_SUCCESS) > goto error; > > /* Copy the data if required */ > - if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR) > + if (flags & CL_MEM_COPY_HOST_PTR) > + cl_buffer_subdata(mem->bo, 0, sz, data); > + > + if ((flags & CL_MEM_USE_HOST_PTR) && !mem->is_userptr) > cl_buffer_subdata(mem->bo, 0, sz, data); > > if (flags & CL_MEM_USE_HOST_PTR || flags & CL_MEM_COPY_HOST_PTR) > @@ -762,7 +789,7 @@ _cl_mem_new_image(cl_context ctx, > sz = aligned_pitch * aligned_h * depth; > } > > - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != > CL_NO_TILE, &err); > + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != > CL_NO_TILE, NULL, &err); > if (mem == NULL || err != CL_SUCCESS) > goto error; > > @@ -1834,7 +1861,7 @@ LOCAL cl_mem cl_mem_new_libva_buffer(cl_context ctx, > cl_int err = CL_SUCCESS; > cl_mem mem = NULL; > > - mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, &err); > + mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err); > if (mem == NULL || err != CL_SUCCESS) > goto error; > > @@ -1875,7 +1902,7 @@ LOCAL cl_mem cl_mem_new_libva_image(cl_context ctx, > goto error; > } > > - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, &err); > + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, 0, 0, 0, NULL, &err); > if (mem == NULL || err != CL_SUCCESS) { > err = CL_OUT_OF_HOST_MEMORY; > goto error; > diff --git a/src/cl_mem.h b/src/cl_mem.h > index 95c5f05..2e9dd5a 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -92,6 +92,7 @@ typedef struct _cl_mem { > int map_ref; /* The mapped count. */ > uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ > cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ > + uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ > } _cl_mem; > > struct _cl_mem_image { > @@ -262,6 +263,7 @@ cl_mem_allocate(enum cl_mem_type type, > cl_mem_flags flags, > size_t sz, > cl_int is_tiled, > + void *host_ptr, > cl_int *errcode); > > void > diff --git a/src/cl_mem_gl.c b/src/cl_mem_gl.c > index 28d2ac6..3640908 100644 > --- a/src/cl_mem_gl.c > +++ b/src/cl_mem_gl.c > @@ -63,7 +63,7 @@ cl_mem_new_gl_texture(cl_context ctx, > goto error; > } > > - mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, &err); > + mem = cl_mem_allocate(CL_MEM_GL_IMAGE_TYPE, ctx, flags, 0, 0, NULL, &err); > if (mem == NULL || err != CL_SUCCESS) > goto error; > > diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c > index bb97220..fc037cc 100644 > --- a/src/intel/intel_driver.c > +++ b/src/intel/intel_driver.c > @@ -690,6 +690,20 @@ cl_buffer intel_share_image_from_libva(cl_context ctx, > return (cl_buffer)intel_bo; > } > > +static cl_buffer intel_buffer_alloc_userptr(cl_buffer_mgr bufmgr, const > char* name, void *data,size_t size, unsigned long flags) > +{ > +#ifdef HAS_USERPTR > + drm_intel_bo *bo; > + bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, > I915_TILING_NONE, 0, size, flags); > + /* Fallback to unsynchronized userptr allocation if kernel has no MMU > notifier enabled. */ > + if (bo == NULL) > + bo = drm_intel_bo_alloc_userptr((drm_intel_bufmgr *)bufmgr, name, data, > I915_TILING_NONE, 0, size, flags | I915_USERPTR_UNSYNCHRONIZED); > + return (cl_buffer)bo; > +#else > + return NULL; > +#endif > +} > + > static int32_t get_intel_tiling(cl_int tiling, uint32_t *intel_tiling) > { > switch (tiling) { > @@ -734,6 +748,7 @@ intel_setup_callbacks(void) > cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr; > cl_driver_get_device_id = (cl_driver_get_device_id_cb *) > intel_get_device_id; > cl_buffer_alloc = (cl_buffer_alloc_cb *) drm_intel_bo_alloc; > + cl_buffer_alloc_userptr = (cl_buffer_alloc_userptr_cb*) > intel_buffer_alloc_userptr; > cl_buffer_set_tiling = (cl_buffer_set_tiling_cb *) intel_buffer_set_tiling; > #if defined(HAS_EGL) > cl_buffer_alloc_from_texture = (cl_buffer_alloc_from_texture_cb *) > intel_alloc_buffer_from_texture; > -- > 2.1.0 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
