From: Yang Rong <[email protected]> The extra exec info need reloc, otherwize gpu can't read/write. And it don't need set to curbe. So reloc it to unused binding table.
Signed-off-by: Yang Rong <[email protected]> --- src/cl_api.c | 31 +++++++++++++++++++++++++++++-- src/cl_command_queue.c | 38 +++++++++++++++++++++++++++++++++----- src/cl_command_queue.h | 7 +++++-- src/cl_command_queue_gen7.c | 7 +++++-- src/cl_kernel.c | 22 ++++++++++++++++++++++ src/cl_kernel.h | 6 +++++- src/cl_khr_icd.c | 3 ++- src/intel/intel_gpgpu.c | 10 ++++++---- 8 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index c5cb67a..bb5ec86 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -589,7 +589,7 @@ error: } void - clSVMFree (cl_context context, void* svm_pointer) +clSVMFree (cl_context context, void* svm_pointer) { cl_int err = CL_SUCCESS; CHECK_CONTEXT (context); @@ -1477,7 +1477,7 @@ error: } cl_int -clSetKernelArgSVMPointer (cl_kernel kernel, +clSetKernelArgSVMPointer(cl_kernel kernel, cl_uint arg_index, const void *arg_value) { @@ -1488,6 +1488,33 @@ clSetKernelArgSVMPointer (cl_kernel kernel, error: return err; } +cl_int +clSetKernelExecInfo(cl_kernel kernel, + cl_kernel_exec_info param_name, + size_t param_value_size, + const void *param_value) +{ + + cl_int err = CL_SUCCESS; + CHECK_KERNEL(kernel); + + if((param_name != CL_KERNEL_EXEC_INFO_SVM_PTRS && + param_name != CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM) || + param_value == NULL || param_value_size == 0) { + err = CL_INVALID_VALUE; + goto error; + } + + if(param_name == CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM && + *(cl_bool *)param_value == CL_TRUE) { + err = CL_INVALID_OPERATION; + goto error; + } + + err = cl_kernel_set_exec_info(kernel, param_value_size, param_value); +error: + return err; +} cl_int clGetKernelArgInfo(cl_kernel kernel, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index d95ad86..525d92b 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -123,7 +123,7 @@ set_image_info(char *curbe, } LOCAL cl_int -cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) +cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k, uint32_t *max_bti) { uint32_t i; GET_QUEUE_THREAD_GPGPU(queue); @@ -135,6 +135,8 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) image = cl_mem_image(k->args[id].mem); set_image_info(k->curbe, &k->images[i], image); + if(*max_bti < k->images[i].idx) + *max_bti = k->images[i].idx; cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image->offset + k->args[id].mem->offset, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, @@ -151,12 +153,12 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k) } LOCAL cl_int -cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) +cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k, uint32_t *max_bti) { GET_QUEUE_THREAD_GPGPU(queue); /* Bind all user buffers (given by clSetKernelArg) */ - uint32_t i; + uint32_t i, bti; enum gbe_arg_type arg_type; /* kind of argument */ for (i = 0; i < k->arg_n; ++i) { int32_t offset; // location of the address in the curbe @@ -166,15 +168,41 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); if (offset < 0) continue; + bti = interp_kernel_get_arg_bti(k->opaque, i); + if(*max_bti < bti) + *max_bti = bti; if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + buffer->sub_offset, k->args[i].mem->size, bti); } else { size_t mem_offset = 0; // if(k->args[i].is_svm) { mem_offset = (size_t)k->args[i].ptr - (size_t)k->args[i].mem->host_ptr; } - cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + mem_offset, k->args[i].mem->size, interp_kernel_get_arg_bti(k->opaque, i)); + cl_gpgpu_bind_buf(gpgpu, k->args[i].mem->bo, offset, k->args[i].mem->offset + mem_offset, k->args[i].mem->size, bti); + } + } + return CL_SUCCESS; +} + +LOCAL cl_int +cl_command_queue_bind_exec_info(cl_command_queue queue, cl_kernel k, uint32_t max_bti) +{ + uint32_t i; + size_t mem_offset, bti = max_bti; + cl_mem svm_mem; + + GET_QUEUE_THREAD_GPGPU(queue); + + for (i = 0; i < k->exec_info_n; i++) { + void *ptr = k->exec_info[i]; + if((svm_mem = cl_context_get_svm_from_ptr(k->program->ctx, ptr)) != NULL) { + mem_offset = (size_t)ptr - (size_t)svm_mem->host_ptr; + /* only need realloc in surface state, don't need realloc in curbe */ + cl_gpgpu_bind_buf(gpgpu, svm_mem->bo, -1, svm_mem->offset + mem_offset, svm_mem->size, bti++); + if(bti == BTI_WORKAROUND_IMAGE_OFFSET) + bti = max_bti + BTI_WORKAROUND_IMAGE_OFFSET; + assert(bti < BTI_MAX_ID); } } diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index 2cd6739..bdf1a43 100644 --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -84,10 +84,13 @@ extern int cl_command_queue_flush_gpgpu(cl_command_queue, cl_gpgpu); extern cl_int cl_command_queue_finish(cl_command_queue); /* Bind all the surfaces in the GPGPU state */ -extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel); +extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel, uint32_t *); /* Bind all the image surfaces in the GPGPU state */ -extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel); +extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel, uint32_t *); + +/* Bind all exec info to bind table */ +extern cl_int cl_command_queue_bind_exec_info(cl_command_queue, cl_kernel, uint32_t); /* Insert a user event to command's wait_events */ extern void cl_command_queue_insert_event(cl_command_queue, cl_event); diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 2edc3be..085f512 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -318,6 +318,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, cl_int err = CL_SUCCESS; size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2]; void* printf_info = NULL; + uint32_t max_bti = 0; /* Setup kernel */ kernel.name = "KERNEL"; @@ -365,9 +366,11 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, } /* Bind user buffers */ - cl_command_queue_bind_surface(queue, ker); + cl_command_queue_bind_surface(queue, ker, &max_bti); /* Bind user images */ - cl_command_queue_bind_image(queue, ker); + cl_command_queue_bind_image(queue, ker, &max_bti); + /* Bind all exec infos */ + cl_command_queue_bind_exec_info(queue, ker, max_bti); /* Bind all samplers */ cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz); diff --git a/src/cl_kernel.c b/src/cl_kernel.c index 723eac3..e67e442 100644 --- a/src/cl_kernel.c +++ b/src/cl_kernel.c @@ -57,6 +57,8 @@ cl_kernel_delete(cl_kernel k) } if (k->image_sz) cl_free(k->images); + if (k->exec_info) + cl_free(k->exec_info); k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(k); } @@ -254,6 +256,21 @@ cl_kernel_set_arg_svm_pointer(cl_kernel k, cl_uint index, const void *value) return 0; } +LOCAL cl_int +cl_kernel_set_exec_info(cl_kernel k, size_t n, const void *value) +{ + cl_int err = CL_SUCCESS; + assert(k != NULL); + + if (n == 0) return err; + TRY_ALLOC(k->exec_info, cl_calloc(n, 1)); + memcpy(k->exec_info, value, n); + k->exec_info_n = n / sizeof(void *); + +error: + return err; +} + LOCAL int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) @@ -410,6 +427,7 @@ cl_kernel_dup(cl_kernel from) to->curbe_sz = from->curbe_sz; to->sampler_sz = from->sampler_sz; to->image_sz = from->image_sz; + to->exec_info_n = from->exec_info_n; memcpy(to->compile_wg_sz, from->compile_wg_sz, sizeof(from->compile_wg_sz)); to->stack_size = from->stack_size; if (to->sampler_sz) @@ -419,6 +437,10 @@ cl_kernel_dup(cl_kernel from) memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0])); } else to->images = NULL; + if (to->exec_info_n) { /* Must always 0 here */ + TRY_ALLOC_NO_ERR(to->exec_info, cl_calloc(to->exec_info_n, sizeof(void *))); + memcpy(to->exec_info, from->exec_info, to->exec_info_n * sizeof(void *)); + } TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument))); if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz)); diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 5b3294b..87187bc 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -67,6 +67,8 @@ struct _cl_kernel { cl_argument *args; /* To track argument setting */ uint32_t arg_n:31; /* Number of arguments */ uint32_t ref_its_program:1; /* True only for the user kernel (created by clCreateKernel) */ + uint32_t exec_info_n; /* The kernel's exec info count */ + void** exec_info; /* The kernel's exec info */ }; /* Allocate an empty kernel */ @@ -103,7 +105,9 @@ extern int cl_kernel_set_arg(cl_kernel, extern int cl_kernel_set_arg_svm_pointer(cl_kernel, uint32_t arg_index, const void *arg_value); - +extern cl_int cl_kernel_set_exec_info(cl_kernel k, + size_t n, + const void *value); /* Get the argument information */ extern int cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c index 73d1924..a3d1bb0 100644 --- a/src/cl_khr_icd.c +++ b/src/cl_khr_icd.c @@ -183,7 +183,8 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = { (void *) clEnqueueSVMMap, (void *) clEnqueueSVMUnmap, (void *) NULL /* clCreateSamplerWithProperties */, - clSetKernelArgSVMPointer, + (void *) clSetKernelArgSVMPointer, + (void *) clSetKernelExecInfo, #endif }; diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 48396e0..ffdd122 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1439,10 +1439,12 @@ intel_gpgpu_bind_buf(intel_gpgpu_t *gpgpu, drm_intel_bo *buf, uint32_t offset, uint32_t internal_offset, size_t size, uint8_t bti) { assert(gpgpu->binded_n < max_buf_n); - gpgpu->binded_buf[gpgpu->binded_n] = buf; - gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset; - gpgpu->binded_offset[gpgpu->binded_n] = offset; - gpgpu->binded_n++; + if(offset != -1) { + gpgpu->binded_buf[gpgpu->binded_n] = buf; + gpgpu->target_buf_offset[gpgpu->binded_n] = internal_offset; + gpgpu->binded_offset[gpgpu->binded_n] = offset; + gpgpu->binded_n++; + } intel_gpgpu_setup_bti(gpgpu, buf, internal_offset, size, bti, I965_SURFACEFORMAT_RAW); } -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
