From: Junyan He <[email protected]> Add Gen's kernel related info should be parsed from the program's ELF file and stored in kernel_gen.
Signed-off-by: Junyan He <[email protected]> --- src/cl_kernel.h | 7 + src/gen/cl_gen.h | 89 ++++++++ src/gen/cl_kernel_gen.c | 553 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 649 insertions(+) create mode 100644 src/gen/cl_kernel_gen.c diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 8acd82a..4690c0b 100644 --- a/src/cl_kernel.h +++ b/src/cl_kernel.h @@ -47,6 +47,13 @@ typedef struct cl_argument { uint32_t is_svm:1; /* Indicate this argument is SVMPointer */ } cl_argument; +typedef struct _cl_kernel_for_device { + cl_device_id device; + void *exec_code; /* The binary for exec */ + cl_uint exec_code_sz; /* The binary for exec size */ +} _cl_kernel_for_device; +typedef _cl_kernel_for_device *cl_kernel_for_device; + /* One OCL function */ struct _cl_kernel { _cl_base_object base; diff --git a/src/gen/cl_gen.h b/src/gen/cl_gen.h index 2926bc7..f761652 100644 --- a/src/gen/cl_gen.h +++ b/src/gen/cl_gen.h @@ -35,6 +35,95 @@ #include <gelf.h> #include <string.h> +/*********************************** Kernel *****************************************/ +/* Special virtual registers for OpenCL */ +typedef enum cl_gen_virt_reg { + CL_GEN_VIRT_REG_LOCAL_ID_X = 0, + CL_GEN_VIRT_REG_LOCAL_ID_Y, + CL_GEN_VIRT_REG_LOCAL_ID_Z, + CL_GEN_VIRT_REG_LOCAL_SIZE_X, + CL_GEN_VIRT_REG_LOCAL_SIZE_Y, + CL_GEN_VIRT_REG_LOCAL_SIZE_Z, + CL_GEN_VIRT_REG_ENQUEUED_LOCAL_SIZE_X, + CL_GEN_VIRT_REG_ENQUEUED_LOCAL_SIZE_Y, + CL_GEN_VIRT_REG_ENQUEUED_LOCAL_SIZE_Z, + CL_GEN_VIRT_REG_GLOBAL_SIZE_X, + CL_GEN_VIRT_REG_GLOBAL_SIZE_Y, + CL_GEN_VIRT_REG_GLOBAL_SIZE_Z, + CL_GEN_VIRT_REG_GLOBAL_OFFSET_X, + CL_GEN_VIRT_REG_GLOBAL_OFFSET_Y, + CL_GEN_VIRT_REG_GLOBAL_OFFSET_Z, + CL_GEN_VIRT_REG_GROUP_NUM_X, + CL_GEN_VIRT_REG_GROUP_NUM_Y, + CL_GEN_VIRT_REG_GROUP_NUM_Z, + CL_GEN_VIRT_REG_WORK_DIM, + CL_GEN_VIRT_REG_IMAGE_INFO, + CL_GEN_VIRT_REG_KERNEL_ARGUMENT, + CL_GEN_VIRT_REG_EXTRA_ARGUMENT, + CL_GEN_VIRT_REG_BLOCK_IP, + CL_GEN_VIRT_REG_DW_BLOCK_IP, + CL_GEN_VIRT_REG_THREAD_NUM, + CL_GEN_VIRT_REG_PROFILING_BUF_POINTER, + CL_GEN_VIRT_REG_PROFILING_TIMESTAMP0, + CL_GEN_VIRT_REG_PROFILING_TIMESTAMP1, + CL_GEN_VIRT_REG_PROFILING_TIMESTAMP2, + CL_GEN_VIRT_REG_PROFILING_TIMESTAMP3, + CL_GEN_VIRT_REG_PROFILING_TIMESTAMP4, + CL_GEN_VIRT_REG_THREAD_ID, + CL_GEN_VIRT_REG_CONSTANT_ADDRSPACE, + CL_GEN_VIRT_REG_STACK_SIZE, + CL_GEN_VIRT_REG_LAST, // Invalid +} cl_gen_virt_reg; + +typedef struct _cl_gen_virt_phy_offset { + cl_int virt_reg; + cl_int phy_offset; + cl_uint size; +} _cl_gen_virt_phy_offset; +typedef _cl_gen_virt_phy_offset *cl_gen_virt_phy_offset; + +typedef struct _cl_gen_image_info_offset { + cl_int bti; + cl_int width; + cl_int height; + cl_int depth; + cl_int data_type; + cl_int channel_order; +} _cl_gen_image_info_offset; +typedef _cl_gen_image_info_offset *cl_gen_image_info_offset; + +typedef struct _cl_gen_arg_extra_info { + cl_int arg_offset; + cl_uint arg_align; // address align for ptr + cl_int arg_misc; //bti, image index +} _cl_gen_arg_extra_info; +typedef _cl_gen_arg_extra_info *cl_gen_arg_extra_info; + +typedef struct _cl_kernel_gen { + _cl_kernel_for_device kern_base; + cl_uint local_mem_size; + cl_uint barrier_slm_used; + cl_uint simd_width; + cl_uint scratch_size; + cl_uint stack_size; + cl_uint samper_info_num; + cl_uint *samper_info; + cl_uint arg_extra_info_num; + cl_gen_arg_extra_info arg_extra_info; + cl_uint image_info_num; + cl_gen_image_info_offset image_info; + cl_uint virt_reg_phy_offset_num; // The mapping between virtual reg and phy offset + cl_gen_virt_phy_offset virt_reg_phy_offset; +} _cl_kernel_gen; +typedef _cl_kernel_gen *cl_kernel_gen; + +extern size_t cl_kernel_get_max_workgroup_size_gen(cl_kernel kernel, cl_device_id device); +extern void *cl_kernel_new_gen(cl_device_id device, cl_kernel kernel); +extern void cl_kernel_delete_gen(cl_device_id device, cl_kernel kernel); +extern cl_int cl_kernel_get_info_gen(cl_device_id device, cl_kernel kernel, + cl_uint param_name, void *param_value); +extern cl_int cl_kernel_create_gen(cl_device_id device, cl_kernel kernel); + /*********************************** Program *****************************************/ typedef struct _cl_program_gen { _cl_program_for_device prog_base; diff --git a/src/gen/cl_kernel_gen.c b/src/gen/cl_kernel_gen.c new file mode 100644 index 0000000..f555212 --- /dev/null +++ b/src/gen/cl_kernel_gen.c @@ -0,0 +1,553 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "cl_gen.h" + +static int +cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device) +{ + const char *n = kernel->name; + const char *builtin_kernels_2d = "__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;" + "__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;" + "__cl_fill_image_2d_array;"; + const char *builtin_kernels_3d = "__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;" + "__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;" + "__cl_copy_buffer_to_image_3d;__cl_fill_image_3d"; + if (n == NULL || !strstr(device->built_in_kernels, n)) { + return 0; + } else if (strstr(builtin_kernels_2d, n)) { + return 2; + } else if (strstr(builtin_kernels_3d, n)) { + return 3; + } else + return 1; +} + +LOCAL size_t +cl_kernel_get_max_workgroup_size_gen(cl_kernel kernel, cl_device_id device) +{ + size_t work_group_size, thread_cnt; + cl_kernel_gen kernel_gen; + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + cl_uint simd_width = kernel_gen->simd_width; + cl_uint local_mem_size = kernel_gen->local_mem_size; + cl_int device_id = device->device_id; + cl_bool use_local_mem = CL_FALSE; + + if (local_mem_size) + use_local_mem = CL_TRUE; + + if (use_local_mem == CL_FALSE) { + int i = 0; + for (; i < kernel->arg_n; i++) { + if (kernel->args[i].arg_type == ArgTypePointer && + kernel->args[i].arg_addrspace == AddressSpaceLocal) { + use_local_mem = CL_TRUE; + break; + } + } + } + + if (use_local_mem == CL_FALSE) { + if (!IS_BAYTRAIL_T(device_id) || simd_width == 16) + work_group_size = simd_width * 64; + else + work_group_size = device->max_compute_unit * + device->max_thread_per_unit * simd_width; + } else { + thread_cnt = device->max_compute_unit * device->max_thread_per_unit / + device->sub_slice_count; + if (thread_cnt > 64) + thread_cnt = 64; + work_group_size = thread_cnt * simd_width; + } + + if (work_group_size > device->max_work_group_size) + work_group_size = device->max_work_group_size; + + return work_group_size; +} + +LOCAL void * +cl_kernel_new_gen(cl_device_id device, cl_kernel kernel) +{ + cl_kernel_gen gen_kernel = cl_calloc(1, sizeof(_cl_kernel_gen)); + if (gen_kernel == NULL) + return NULL; + + gen_kernel->kern_base.device = device; + return (void *)gen_kernel; +} + +LOCAL void +cl_kernel_delete_gen(cl_device_id device, cl_kernel kernel) +{ + cl_kernel_gen kernel_gen = NULL; + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + if (kernel_gen->samper_info) { + cl_free(kernel_gen->samper_info); + kernel_gen->samper_info = NULL; + } + if (kernel_gen->arg_extra_info) { + cl_free(kernel_gen->arg_extra_info); + kernel_gen->arg_extra_info = NULL; + } + if (kernel_gen->virt_reg_phy_offset) { + cl_free(kernel_gen->virt_reg_phy_offset); + kernel_gen->virt_reg_phy_offset = NULL; + } + if (kernel_gen->image_info) { + cl_free(kernel_gen->image_info); + kernel_gen->image_info = NULL; + } + + cl_free(kernel_gen); +} + +LOCAL cl_int +cl_kernel_get_info_gen(cl_device_id device, cl_kernel kernel, cl_uint param_name, void *param_value) +{ + cl_kernel_gen kernel_gen; + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + if (param_name == CL_KERNEL_WORK_GROUP_SIZE) { + *(size_t *)param_value = cl_kernel_get_max_workgroup_size_gen(kernel, device); + return CL_SUCCESS; + } else if (param_name == CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE) { + *(size_t *)param_value = kernel_gen->simd_width; + return CL_SUCCESS; + } else if (param_name == CL_KERNEL_PRIVATE_MEM_SIZE) { + *(size_t *)param_value = kernel_gen->stack_size; + return CL_SUCCESS; + } else if (param_name == CL_KERNEL_LOCAL_MEM_SIZE) { + *(size_t *)param_value = kernel_gen->local_mem_size; + return CL_SUCCESS; + } else if (param_name == CL_KERNEL_GLOBAL_WORK_SIZE) { + int dimension = cl_check_builtin_kernel_dimension(kernel, device); + if (!dimension) + return CL_INVALID_VALUE; + + if (dimension == 1) { + memcpy(param_value, device->max_1d_global_work_sizes, 3 * sizeof(size_t)); + return CL_SUCCESS; + } else if (dimension == 2) { + memcpy(param_value, device->max_2d_global_work_sizes, 3 * sizeof(size_t)); + return CL_SUCCESS; + } else if (dimension == 3) { + memcpy(param_value, device->max_3d_global_work_sizes, 3 * sizeof(size_t)); + return CL_SUCCESS; + } else + return CL_INVALID_VALUE; + } else if (param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) { + *(size_t *)param_value = kernel_gen->simd_width; + return CL_SUCCESS; + } + + return CL_INVALID_VALUE; +} + +static cl_int +cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel) +{ + cl_program prog = kernel->program; + cl_program_gen prog_gen; + cl_kernel_gen kernel_gen; + cl_int offset; + void *desc; + void *ptr; + cl_char *name; + cl_uint name_size; + cl_uint desc_size; + cl_uint desc_type; + cl_uint wg_sz_size; + cl_uint attr_size; + cl_uint arg_info_size; + int i; + char *arg_type_qual_str; + char *arg_access_qualifier_str; + + DEV_PRIVATE_DATA(prog, device, prog_gen); + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + assert(kernel->name); + + if (prog_gen->func_cl_info == NULL) + return CL_SUCCESS; + + offset = 0; + desc = NULL; + while (offset < prog_gen->func_cl_info_data->d_size) { + name_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset); + desc_size = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint)); + desc_type = *(cl_uint *)(prog_gen->func_cl_info_data->d_buf + offset + 2 * sizeof(cl_uint)); + name = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3; + + if (desc_type != 0x04) { + offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4); + continue; + } + + if (strcmp((char *)name, (char *)kernel->name) == 0) { // Find the kernel info slot + desc = prog_gen->func_cl_info_data->d_buf + offset + sizeof(cl_uint) * 3 + ALIGN(name_size, 4); + break; + } + + offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4); + } + + if (desc == NULL) + return CL_SUCCESS; + + ptr = desc; + attr_size = *(cl_uint *)ptr; + ptr += sizeof(cl_uint); + wg_sz_size = *(cl_uint *)ptr; + ptr += sizeof(cl_uint); + arg_info_size = *(cl_uint *)ptr; + ptr += sizeof(cl_uint); + + if (attr_size) { + if (kernel->kernel_attr && strcmp(kernel->kernel_attr, ptr) != 0) + return CL_INVALID_KERNEL_DEFINITION; + + if (kernel->kernel_attr == NULL) { + kernel->kernel_attr = cl_malloc(strlen(ptr) + 1); + if (kernel->kernel_attr == NULL) + return CL_OUT_OF_HOST_MEMORY; + memcpy(kernel->kernel_attr, ptr, strlen(ptr) + 1); + } + + ptr += ALIGN((strlen(kernel->kernel_attr) + 1), 4); + } + + if (wg_sz_size) { + for (i = 0; i < 3; i++) { + if (kernel->compile_wg_sz[i] > 0) { + if (kernel->compile_wg_sz[i] != *(size_t *)ptr) + return CL_INVALID_KERNEL_DEFINITION; + } else { + kernel->compile_wg_sz[i] = *(size_t *)ptr; + } + ptr += sizeof(size_t); + } + } + + if (arg_info_size) { + cl_uint arg_type_qualifier = 0; + cl_uint arg_access_qualifier = 0; + + for (i = 0; i < kernel->arg_n; i++) { + if (kernel->args[i].arg_type_name) { + if (strcmp(kernel->args[i].arg_type_name, ptr) != 0) + return CL_INVALID_KERNEL_DEFINITION; + } else { + kernel->args[i].arg_type_name = cl_malloc(strlen(ptr) + 1); + if (kernel->args[i].arg_type_name == NULL) + return CL_OUT_OF_HOST_MEMORY; + memcpy(kernel->args[i].arg_type_name, ptr, strlen(ptr) + 1); + } + ptr += strlen(ptr) + 1; + + arg_access_qualifier_str = ptr; + ptr += strlen(ptr) + 1; + + arg_type_qual_str = ptr; + ptr += strlen(ptr) + 1; + + if (kernel->args[i].arg_name) { + if (strcmp(kernel->args[i].arg_name, ptr) != 0) + return CL_INVALID_KERNEL_DEFINITION; + } else { + kernel->args[i].arg_name = cl_malloc(strlen(ptr) + 1); + if (kernel->args[i].arg_name == NULL) + return CL_OUT_OF_HOST_MEMORY; + memcpy(kernel->args[i].arg_name, ptr, strlen(ptr) + 1); + } + ptr += strlen(ptr) + 1; + + ptr = (void *)(ALIGN((long)ptr, 4)); + + if (!strcmp(arg_access_qualifier_str, "write_only")) { + arg_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY; + } else if (!strcmp(arg_access_qualifier_str, "read_only")) { + arg_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY; + } else if (!strcmp(arg_access_qualifier_str, "read_write")) { + arg_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE; + } else { + arg_access_qualifier = CL_KERNEL_ARG_ACCESS_NONE; + } + if (kernel->args[i].arg_access_qualifier > 0) { + if (kernel->args[i].arg_access_qualifier != arg_access_qualifier) + return CL_INVALID_KERNEL_DEFINITION; + } else { + kernel->args[i].arg_access_qualifier = arg_access_qualifier; + } + + arg_type_qualifier = 0; + if (strstr(arg_type_qual_str, "const") && (kernel->args[i].arg_type == ArgTypePointer)) + arg_type_qualifier = arg_type_qualifier | CL_KERNEL_ARG_TYPE_CONST; + if (strstr(arg_type_qual_str, "volatile")) + arg_type_qualifier = arg_type_qualifier | CL_KERNEL_ARG_TYPE_VOLATILE; + if (strstr(arg_type_qual_str, "restrict")) + arg_type_qualifier = arg_type_qualifier | CL_KERNEL_ARG_TYPE_RESTRICT; + if (strstr(arg_type_qual_str, "pipe")) + arg_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE; + if (kernel->args[i].arg_type_qualifier > 0) { + if (kernel->args[i].arg_type_qualifier != arg_type_qualifier) + return CL_INVALID_KERNEL_DEFINITION; + } else { + kernel->args[i].arg_type_qualifier = arg_type_qualifier; + } + } + } + + if (ptr - desc != desc_size) + return CL_INVALID_PROGRAM; + + return CL_SUCCESS; +} + +static cl_int +cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_Sym *p_sym_entry) +{ + cl_program prog = kernel->program; + cl_program_gen prog_gen; + cl_kernel_gen kernel_gen; + cl_int offset; + cl_char *name; + cl_uint name_size; + cl_uint desc_size; + cl_uint desc_type; + cl_uint arg_num; + void *ptr; + int i, j; + int cmp_arg; + DEV_PRIVATE_DATA(prog, device, prog_gen); + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + assert(kernel->name && kernel->name[0] != 0); // Must have a name for kernel func + assert(p_sym_entry->st_shndx == prog_gen->text_sec_index); + kernel_gen->kern_base.exec_code = prog_gen->text_data->d_buf + p_sym_entry->st_value; + kernel_gen->kern_base.exec_code_sz = p_sym_entry->st_size; + assert(kernel_gen->kern_base.exec_code < prog_gen->text_data->d_buf + prog_gen->text_data->d_size); + + /* Get all GPU related info for the kernel. */ + offset = 0; + while (offset < prog_gen->func_gpu_info_data->d_size) { + name_size = *(cl_uint *)(prog_gen->func_gpu_info_data->d_buf + offset); + desc_size = *(cl_uint *)(prog_gen->func_gpu_info_data->d_buf + offset + sizeof(cl_uint)); + desc_type = *(cl_uint *)(prog_gen->func_gpu_info_data->d_buf + offset + 2 * sizeof(cl_uint)); + name = prog_gen->func_gpu_info_data->d_buf + offset + sizeof(cl_uint) * 3; + if (desc_type != 0x03) { + offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4); + continue; + } + + if (strcmp((char *)name, (char *)kernel->name) != 0) { + offset += 3 * sizeof(cl_uint) + ALIGN(name_size, 4) + ALIGN(desc_size, 4); + continue; + } + + // Find the kernel info slot + cmp_arg = 0; + ptr = prog_gen->func_gpu_info_data->d_buf + offset + sizeof(cl_uint) * 3 + ALIGN(name_size, 4); + kernel_gen->simd_width = *(cl_uint *)(ptr); + kernel_gen->local_mem_size = *(cl_uint *)(ptr + sizeof(cl_uint)); + kernel_gen->scratch_size = *(cl_uint *)(ptr + 2 * sizeof(cl_uint)); + kernel_gen->stack_size = *(cl_uint *)(ptr + 3 * sizeof(cl_uint)); + kernel_gen->barrier_slm_used = *(cl_uint *)(ptr + 4 * sizeof(cl_uint)); + arg_num = *(cl_uint *)(ptr + 5 * sizeof(cl_uint)); + if (kernel->arg_n > 0 && kernel->arg_n != arg_num) + return CL_INVALID_KERNEL_DEFINITION; + + if (kernel->arg_n > 0) { + cmp_arg = 1; + } else { + kernel->arg_n = arg_num; + kernel->args = cl_calloc(arg_num, sizeof(cl_argument)); + if (kernel->args == NULL) + return CL_OUT_OF_HOST_MEMORY; + } + + kernel_gen->arg_extra_info = cl_calloc(arg_num, sizeof(_cl_gen_arg_extra_info)); + if (kernel_gen->arg_extra_info == NULL) + return CL_OUT_OF_HOST_MEMORY; + + kernel_gen->arg_extra_info_num = arg_num; + + ptr += 6 * sizeof(cl_uint); + + /* Setup all the arguments info or cmp them */ + if (cmp_arg == 0) { + for (i = 0; i < arg_num; i++) { + kernel->args[i].arg_no = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel->args[i].arg_size = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel->args[i].arg_type = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->arg_extra_info[i].arg_offset = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel->args[i].arg_addrspace = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->arg_extra_info[i].arg_align = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->arg_extra_info[i].arg_misc = *((cl_int *)ptr); + ptr += sizeof(cl_uint); // The BTI, image index, etc + } + } else { + cl_uint arg_no; + cl_argument *k_arg; + for (i = 0; i < arg_num; i++) { + arg_no = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + + k_arg = NULL; + for (j = 0; j < arg_num; j++) { // Find the same index arg + if (kernel->args[j].arg_no == arg_no) { + k_arg = &kernel->args[j]; + break; + } + } + assert(k_arg); + + if (k_arg->arg_size != *((cl_uint *)ptr)) + return CL_INVALID_KERNEL_DEFINITION; + ptr += sizeof(cl_uint); + if (k_arg->arg_type != *((cl_uint *)ptr)) + return CL_INVALID_KERNEL_DEFINITION; + ptr += sizeof(cl_uint); + + kernel_gen->arg_extra_info[i].arg_offset = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + + if (k_arg->arg_type != *((cl_uint *)ptr)) + return CL_INVALID_KERNEL_DEFINITION; + ptr += sizeof(cl_uint); + + kernel_gen->arg_extra_info[i].arg_align = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->arg_extra_info[i].arg_misc = *((cl_int *)ptr); + ptr += sizeof(cl_uint); // The BTI, image index, etc + } + } + + /* Setup the samplers. */ + kernel_gen->samper_info_num = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + if (kernel_gen->samper_info_num) { + kernel_gen->samper_info = cl_calloc(kernel_gen->samper_info_num, sizeof(cl_uint)); + if (kernel_gen->samper_info == NULL) + return CL_OUT_OF_HOST_MEMORY; + + for (i = 0; i < kernel_gen->samper_info_num; i++) { + kernel_gen->samper_info[i] = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + } + } + + /* Setup the image. */ + kernel_gen->image_info_num = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + if (kernel_gen->image_info_num) { + kernel_gen->image_info = + cl_calloc(kernel_gen->image_info_num, sizeof(_cl_gen_image_info_offset)); + if (kernel_gen->image_info == NULL) + return CL_OUT_OF_HOST_MEMORY; + + for (i = 0; i < kernel_gen->image_info_num; i++) { + kernel_gen->image_info[i].bti = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->image_info[i].width = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->image_info[i].height = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->image_info[i].depth = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->image_info[i].data_type = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->image_info[i].channel_order = *((cl_int *)ptr); + ptr += sizeof(cl_uint); + } + } + + /* Setup the special virtual reg/offset mapping. */ + kernel_gen->virt_reg_phy_offset_num = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + if (kernel_gen->virt_reg_phy_offset_num) { + kernel_gen->virt_reg_phy_offset = + cl_calloc(kernel_gen->virt_reg_phy_offset_num, sizeof(_cl_gen_virt_phy_offset)); + if (kernel_gen->virt_reg_phy_offset == NULL) + return CL_OUT_OF_HOST_MEMORY; + + for (i = 0; i < kernel_gen->virt_reg_phy_offset_num; i++) { + if (*((cl_uint *)ptr) >= CL_GEN_VIRT_REG_LAST || *((cl_uint *)ptr) < 0) + return CL_INVALID_PROGRAM; + + kernel_gen->virt_reg_phy_offset[i].virt_reg = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->virt_reg_phy_offset[i].phy_offset = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + kernel_gen->virt_reg_phy_offset[i].size = *((cl_uint *)ptr); + ptr += sizeof(cl_uint); + } + } + + return cl_program_gen_get_kernel_func_cl_info(device, kernel); + } + + return CL_INVALID_PROGRAM; +} + +LOCAL cl_int +cl_kernel_create_gen(cl_device_id device, cl_kernel kernel) +{ + cl_program prog = kernel->program; + GElf_Sym sym_entry; + GElf_Sym *p_sym_entry = NULL; + cl_program_gen prog_gen; + cl_kernel_gen kernel_gen; + char *name = NULL; + int i; + + DEV_PRIVATE_DATA(prog, device, prog_gen); + DEV_PRIVATE_DATA(kernel, device, kernel_gen); + + assert(kernel->name && kernel->name[0] != 0); // Must have a name for kernel func + + for (i = 0; i < (int)(prog_gen->symtab_entry_num); i++) { + p_sym_entry = gelf_getsym(prog_gen->symtab_data, i, &sym_entry); + assert(p_sym_entry == &sym_entry); + if ((p_sym_entry->st_info & 0x0f) != STT_FUNC) + continue; + if (((p_sym_entry->st_info & 0x0f0) >> 4) != STB_GLOBAL) + continue; + + name = p_sym_entry->st_name + prog_gen->strtab_data->d_buf; + assert(name); + if (strcmp((char *)name, (char *)kernel->name)) + continue; + + // Find the kernel info slot + return cl_program_gen_get_one_kernel_func(device, kernel, p_sym_entry); + } + + return CL_INVALID_KERNEL_NAME; +} -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
