Thanks Zhigang, yes, I reproduced the failure in OpenCV, I'll take a look at it.
-----Original Message----- From: Zhigang Gong [mailto:[email protected]] Sent: Tuesday, December 23, 2014 5:11 PM To: Guo, Yejun Cc: [email protected] Subject: Re: [Beignet] [PATCH V2 1/2] remove the page align limitation for host_ptr of CL_MEM_USE_HOST_PTR This patch need more test case to prove the correctness if we use more than one userptr within the same page. I found at least one of such type of failure in the OpenCV test case as below: ./opencv_test_stitching --gtest_filter=OCL_PlaneWarperTest.Mat On Mon, Dec 22, 2014 at 04:54:59PM +0800, Guo Yejun wrote: > the current limitation is both value and size of host_ptr should be > page aligned, remove the limitation by recording the offset to the > page starting address inside the driver. > > v2: add assert for cl image path since only cl buffer is supported now. > add assert to ensure the offset is zero for the case user ptr not used. > separate the changes for driver and utest. > > Signed-off-by: Guo Yejun <[email protected]> > --- > src/cl_command_queue.c | 8 ++++++-- > src/cl_mem.c | 16 ++++++++++++---- > src/cl_mem.h | 1 + > 3 files changed, 19 insertions(+), 6 deletions(-) > > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index > 12530d7..28a4abd 100644 > --- a/src/cl_command_queue.c > +++ b/src/cl_command_queue.c > @@ -137,6 +137,10 @@ cl_command_queue_bind_image(cl_command_queue queue, > cl_kernel k) > int id = k->images[i].arg_idx; > struct _cl_mem_image *image; > assert(interp_kernel_get_arg_type(k->opaque, id) == > GBE_ARG_IMAGE); > + > + //currently, user ptr is not supported for cl image, so offset should be > always zero > + assert(k->args[id].mem->offset == 0); > + > image = cl_mem_image(k->args[id].mem); > set_image_info(k->curbe, &k->images[i], image); > cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, > image->offset, @@ -170,9 +174,9 @@ > cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) > offset = interp_kernel_get_curbe_offset(k->opaque, > GBE_CURBE_KERNEL_ARGUMENT, i); > if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { > struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; > - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, > buffer->sub_offset, k->args[i].mem->size, > interp_kernel_get_arg_bti(k->opaque, i)); > + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, > + k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, > + interp_kernel_get_arg_bti(k->opaque, i)); > } else { > - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, 0, > k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); > + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, > + k->args[i].mem->offset, k->args[i].mem->size, > + interp_kernel_get_arg_bti(k->opaque, i)); > } > } > > diff --git a/src/cl_mem.c b/src/cl_mem.c index 3225fd2..89f230e 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -266,6 +266,7 @@ cl_mem_allocate(enum cl_mem_type type, > mem->magic = CL_MAGIC_MEM_HEADER; > mem->flags = flags; > mem->is_userptr = 0; > + mem->offset = 0; > > if (sz != 0) { > /* Pinning will require stricter alignment rules */ @@ -285,10 > +286,11 @@ cl_mem_allocate(enum cl_mem_type type, > assert(host_ptr != NULL); > /* userptr not support tiling */ > if (!is_tiled) { > - if ((((unsigned long)host_ptr | sz) & (page_size - 1)) == 0) { > - mem->is_userptr = 1; > - mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory > object", host_ptr, sz, 0); > - } > + void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & > (~(page_size - 1))); > + mem->offset = host_ptr - aligned_host_ptr; > + mem->is_userptr = 1; > + size_t aligned_sz = ALIGN((mem->offset + sz), page_size); > + mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr > + memory object", aligned_host_ptr, aligned_sz, 0); > } > } > else if (flags & CL_MEM_ALLOC_HOST_PTR) { @@ -514,6 +516,8 @@ > cl_mem_new_sub_buffer(cl_mem buffer, > mem->ref_n = 1; > mem->magic = CL_MAGIC_MEM_HEADER; > mem->flags = flags; > + mem->offset = buffer->offset; > + mem->is_userptr = buffer->is_userptr; > sub_buf->parent = (struct _cl_mem_buffer*)buffer; > > cl_mem_add_ref(buffer); > @@ -1853,6 +1857,10 @@ cl_mem_unmap_gtt(cl_mem mem) LOCAL void* > cl_mem_map_auto(cl_mem mem, int write) { > + //if mem is not created from userptr, the offset should be always zero. > + if (!mem->is_userptr) > + assert(mem->offset == 0); > + > if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE) > return cl_mem_map_gtt(mem); > else { > diff --git a/src/cl_mem.h b/src/cl_mem.h index fd50220..e027f15 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -94,6 +94,7 @@ typedef struct _cl_mem { > uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */ > cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ > uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ > + size_t offset; /* offset of host_ptr to the page beginning, > only for CL_MEM_USE_HOST_PTR*/ > } _cl_mem; > > struct _cl_mem_image { > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
