LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Guo, Yejun > Sent: Thursday, December 3, 2015 16:53 > To: [email protected] > Subject: Re: [Beignet] [PATCH V2 1/2] make Beignet as intermedia layer of > CMRT > > Ping for review, thanks. > > -----Original Message----- > From: Guo, Yejun > Sent: Thursday, November 19, 2015 2:03 AM > To: [email protected] > Cc: Guo, Yejun > Subject: [PATCH V2 1/2] make Beignet as intermedia layer of CMRT > > CMRT is C for Media Runtime on Intel GPU, see > https://github.com/01org/cmrt. > There is a request to make Beignet as intermedia layer of CMRT, in other > words, application programer write OpenCL APIs to execute the CM kernel > on GPU, the following shows the key code, and please refer to the next > patch of unit test for detail. > > prog = clCreateProgramWithBinary("cm kernel"); clBuildProgram(prog); > kernel = clCreateKernel(prog, "kernel name"); image = clCreateImage(); > clSetKernelArg(kernel, image); clEnqueueNDRangeKernel(kernel); > > Inside Beignet, once cm kernel is invoked, the following relative APIs will be > directly passed to CMRT library (libcmrt.so) which is loaded via dlopen only > when necessary. Since we use this simple method to keep the code clean, > OpenCL spec is not strictly followed, and cl_event is not supported for this > case. > > v2: add comments about the cm queue in fuction cmrt_enqueue > Signed-off-by: Guo Yejun <[email protected]> > --- > CMakeLists.txt | 6 + > src/CMakeLists.txt | 7 ++ > src/cl_api.c | 39 ++++++- > src/cl_cmrt.cpp | 311 > +++++++++++++++++++++++++++++++++++++++++++++++++ > src/cl_cmrt.h | 45 +++++++ > src/cl_command_queue.c | 7 ++ > src/cl_command_queue.h | 2 + > src/cl_device_id.h | 3 + > src/cl_gt_device.h | 1 + > src/cl_kernel.c | 11 ++ > src/cl_kernel.h | 2 + > src/cl_mem.c | 7 ++ > src/cl_mem.h | 3 + > src/cl_program.c | 57 +++++++-- > src/cl_program.h | 10 +- > 15 files changed, 497 insertions(+), 14 deletions(-) create mode 100644 > src/cl_cmrt.cpp create mode 100644 src/cl_cmrt.h > > diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c6c373..3411b6a 100644 > --- a/CMakeLists.txt > +++ b/CMakeLists.txt > @@ -150,6 +150,12 @@ ELSE(DRM_INTEL_FOUND) > MESSAGE(FATAL_ERROR "Looking for DRM Intel (>= 2.4.52) - not found") > ENDIF(DRM_INTEL_FOUND) > > +# CMRT > +pkg_check_modules(CMRT libcmrt) > +IF(CMRT_FOUND) > +INCLUDE_DIRECTORIES(${CMRT_INCLUDE_DIRS}) > +ENDIF(CMRT_FOUND) > + > # Threads > Find_Package(Threads) > > diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c917e76..5f1872d > 100644 > --- a/src/CMakeLists.txt > +++ b/src/CMakeLists.txt > @@ -101,6 +101,13 @@ if (X11_FOUND) > x11/va_dri2.c) > endif (X11_FOUND) > > +if (CMRT_FOUND) > + set(CMAKE_CXX_FLAGS "-DHAS_CMRT ${CMAKE_CXX_FLAGS}") > + set(CMAKE_CXX_FLAGS "- > DCMRT_PATH=${CMRT_LIBRARY_DIRS}/libcmrt.so > +${CMAKE_CXX_FLAGS}") > + set(CMAKE_C_FLAGS "-DHAS_CMRT ${CMAKE_C_FLAGS}") > + set(OPENCL_SRC ${OPENCL_SRC} cl_cmrt.cpp) endif (CMRT_FOUND) > + > if (EGL_FOUND AND MESA_SOURCE_FOUND) > set (OPENCL_SRC ${OPENCL_SRC} cl_mem_gl.c cl_gl_api.c > x11/mesa_egl_extension.c x11/mesa_egl_res_share.c > intel/intel_dri_resource_sharing.c) > SET(CMAKE_CXX_FLAGS "-DHAS_EGL ${CMAKE_CXX_FLAGS}") diff --git > a/src/cl_api.c b/src/cl_api.c index ddd39cf..7cedf4b 100644 > --- a/src/cl_api.c > +++ b/src/cl_api.c > @@ -31,6 +31,7 @@ > #include "cl_accelerator_intel.h" > #include "cl_alloc.h" > #include "cl_utils.h" > +#include "cl_cmrt.h" > > #include "CL/cl.h" > #include "CL/cl_ext.h" > @@ -276,6 +277,10 @@ clRetainDevice(cl_device_id device) cl_int > clReleaseDevice(cl_device_id device) { > +#ifdef HAS_CMRT > + cmrt_destroy_device(device); > +#endif > + > // XXX stub for C++ Bindings > return CL_SUCCESS; > } > @@ -941,11 +946,11 @@ clBuildProgram(cl_program program, > INVALID_DEVICE_IF (device_list[0] != program->ctx->device); > } > > - /* TODO support create program from binary */ > assert(program->source_type == FROM_LLVM || > program->source_type == FROM_SOURCE || > program->source_type == FROM_LLVM_SPIR || > - program->source_type == FROM_BINARY); > + program->source_type == FROM_BINARY || > + program->source_type == FROM_CMRT); > if((err = cl_program_build(program, options)) != CL_SUCCESS) { > goto error; > } > @@ -1244,7 +1249,13 @@ clSetKernelArg(cl_kernel kernel, > { > cl_int err = CL_SUCCESS; > CHECK_KERNEL(kernel); > - err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value); > + > +#ifdef HAS_CMRT > + if (kernel->cmrt_kernel != NULL) > + err = cmrt_set_kernel_arg(kernel, arg_index, arg_size, arg_value); > + else > +#endif > + err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value); > error: > return err; > } > @@ -1532,6 +1543,12 @@ clFinish(cl_command_queue command_queue) > cl_int err = CL_SUCCESS; > > CHECK_QUEUE (command_queue); > + > +#ifdef HAS_CMRT > + if (command_queue->cmrt_event != NULL) > + return cmrt_wait_for_task_finished(command_queue); > +#endif > + > err = cl_command_queue_finish(command_queue); > > error: > @@ -2655,6 +2672,11 @@ clEnqueueMapBuffer(cl_command_queue > command_queue, > goto error; > } > > +#ifdef HAS_CMRT > + if (command_queue->cmrt_event != NULL) > + cmrt_wait_for_task_finished(command_queue); > +#endif > + > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, buffer->ctx); > > data = &no_wait_data; > @@ -2743,6 +2765,11 @@ clEnqueueMapImage(cl_command_queue > command_queue, > goto error; > } > > +#ifdef HAS_CMRT > + if (command_queue->cmrt_event != NULL) > + cmrt_wait_for_task_finished(command_queue); > +#endif > + > TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, > event, mem->ctx); > > data = &no_wait_data; > @@ -2948,6 +2975,12 @@ clEnqueueNDRangeKernel(cl_command_queue > command_queue, > goto error; > } > > +#ifdef HAS_CMRT > + if (kernel->cmrt_kernel != NULL) { > + err = cmrt_enqueue(command_queue, kernel, global_work_size, > local_work_size); > + goto error; > + } > +#endif > > /* XXX No event right now */ > //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported"); diff > --git a/src/cl_cmrt.cpp b/src/cl_cmrt.cpp new file mode 100644 index > 0000000..25e4d82 > --- /dev/null > +++ b/src/cl_cmrt.cpp > @@ -0,0 +1,311 @@ > +#include "cl_cmrt.h" > +#include "cl_device_id.h" > +#include "intel/intel_defines.h" > +#include "cl_command_queue.h" > + > +#include "cm_rt.h" //header file of libcmrt.so > +typedef INT (*CreateCmDeviceFunc)(CmDevice * &pDevice, UINT & > version, > + CmDriverContext * drivercontext, UINT > DevCreateOption); typedef > +INT (*DestroyCmDeviceFunc)(CmDevice * &pDevice); > + > +#include <dlfcn.h> > + > +static void* dlhCMRT = NULL; > +static CreateCmDeviceFunc pfnCreateCmDevice = NULL; static > +DestroyCmDeviceFunc pfnDestroyCmDevice = NULL; > + > +#define XSTR(x) #x > +#define STR(x) XSTR(x) > + > +class CmrtCleanup > +{ > +public: > + CmrtCleanup(){} > + ~CmrtCleanup() > + { > + if (dlhCMRT != NULL) > + dlclose(dlhCMRT); > + } > +}; > + > +enum CMRT_MEM_TYPE > +{ > + CMRT_BUFFER, > + CMRT_SURFACE2D, > +}; > + > +static CmrtCleanup cmrtCleanup; > + > +static bool LoadCmrtLibrary() > +{ > + if (dlhCMRT == NULL) { > + dlhCMRT = dlopen(STR(CMRT_PATH), RTLD_LAZY | RTLD_LOCAL); > + > + if (dlhCMRT == NULL) > + return false; > + > + pfnCreateCmDevice = (CreateCmDeviceFunc)dlsym(dlhCMRT, > "CreateCmDevice"); > + if (pfnCreateCmDevice == NULL) > + return false; > + > + pfnDestroyCmDevice = (DestroyCmDeviceFunc)dlsym(dlhCMRT, > "DestroyCmDevice"); > + if (pfnDestroyCmDevice == NULL) > + return false; > + } > + return true; > +} > + > +cl_int cmrt_build_program(cl_program p, const char *options) { > + CmDevice*& cmrt_device = (CmDevice*&)(p->ctx->device->cmrt_device); > + int result; > + if (cmrt_device == NULL) > + { > + if (!LoadCmrtLibrary()) > + return CL_DEVICE_NOT_AVAILABLE; //yes, the error is not accurate, > but i do not find a bettere one > + > + CmDriverContext ctx; > + ctx.shared_bufmgr = 1; > + ctx.bufmgr = (drm_intel_bufmgr*)cl_context_get_bufmgr(p->ctx); > + ctx.userptr_enabled = 0; > + ctx.deviceid = p->ctx->device->device_id; > + ctx.device_rev = -1; > + UINT version = 0; > + result = (*pfnCreateCmDevice)(cmrt_device, version, &ctx, > CM_DEVICE_CREATE_OPTION_DEFAULT); > + if (result != CM_SUCCESS) > + return CL_DEVICE_NOT_AVAILABLE; > + } > + > + CmProgram* cmrt_program = NULL; > + result = cmrt_device->LoadProgram(p->binary, p->binary_sz, > + cmrt_program, options); if (result != CM_SUCCESS) > + return CL_COMPILE_PROGRAM_FAILURE; > + > + p->cmrt_program = cmrt_program; > + cmrt_program->GetKernelCount(p->ker_n); > + return CL_SUCCESS; > +} > + > +cl_int cmrt_destroy_program(cl_program p) { > + CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device); > + CmProgram*& cmrt_program = (CmProgram*&)(p->cmrt_program); > + if (cmrt_device->DestroyProgram(cmrt_program) != CM_SUCCESS) > + return CL_INVALID_PROGRAM; > + return CL_SUCCESS; > +} > + > +cl_int cmrt_destroy_device(cl_device_id device) { > + CmDevice*& cmrt_device = (CmDevice*&)(device->cmrt_device); > + if ((*pfnDestroyCmDevice)(cmrt_device) != CM_SUCCESS) > + return CL_INVALID_DEVICE; > + return CL_SUCCESS; > +} > + > +void* cmrt_create_kernel(cl_program p, const char *name) { > + CmDevice* cmrt_device = (CmDevice*)(p->ctx->device->cmrt_device); > + CmKernel* cmrt_kernel = NULL; > + int result = cmrt_device->CreateKernel((CmProgram*)(p->cmrt_program), > +name, cmrt_kernel); > + if (result != CM_SUCCESS) > + return NULL; > + > + return cmrt_kernel; > +} > + > +cl_int cmrt_destroy_kernel(cl_kernel k) { > + CmDevice* cmrt_device = > +(CmDevice*)(k->program->ctx->device->cmrt_device); > + CmKernel*& cmrt_kernel = (CmKernel*&)(k->cmrt_kernel); > + if (cmrt_device->DestroyKernel(cmrt_kernel) != CM_SUCCESS) > + return CL_INVALID_KERNEL; > + return CL_SUCCESS; > +} > + > +cl_int cmrt_enqueue(cl_command_queue cq, cl_kernel k, const size_t* > +global_work_size, const size_t* local_work_size) { > + CmDevice* cmrt_device = > +(CmDevice*)(k->program->ctx->device->cmrt_device); > + CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel); > + > + int result = 0; > + > + cmrt_kernel->SetThreadCount(global_work_size[0]*global_work_size[1]); > + > + //no need to destory queue explicitly, //and there is only one queue > + instance within each device, //CreateQueue always returns the same > + instance > + CmQueue* pCmQueue = NULL; > + cmrt_device->CreateQueue(pCmQueue); > + > + CmTask *pKernelArray = NULL; > + cmrt_device->CreateTask(pKernelArray); > + > + pKernelArray->AddKernel(cmrt_kernel); > + > + CmEvent* e = NULL; > + > + if (local_work_size == NULL) { > + CmThreadSpace* pTS = NULL; > + cmrt_device->CreateThreadSpace(global_work_size[0], > global_work_size[1], pTS); > + result = pCmQueue->Enqueue(pKernelArray, e, pTS); > + } else { > + CmThreadGroupSpace* pTGS = NULL; > + cmrt_device->CreateThreadGroupSpace(global_work_size[0], > global_work_size[1], local_work_size[0], local_work_size[1], pTGS); > + result = pCmQueue->EnqueueWithGroup(pKernelArray, e, pTGS); > + cmrt_device->DestroyThreadGroupSpace(pTGS); > + } > + > + if (result != CM_SUCCESS) > + return CL_INVALID_OPERATION; > + > + cmrt_device->DestroyTask(pKernelArray); > + > + CmEvent*& olde = (CmEvent*&)cq->cmrt_event; if (olde != NULL) > + pCmQueue->DestroyEvent(e); > + > + cq->cmrt_event = e; > + > + return CL_SUCCESS; > +} > + > +static VA_CM_FORMAT GetCmrtFormat(_cl_mem_image* image) { > + switch (image->intel_fmt) > + { > + case I965_SURFACEFORMAT_B8G8R8A8_UNORM: > + return VA_CM_FMT_A8R8G8B8; > + case I965_SURFACEFORMAT_B8G8R8X8_UNORM: > + return VA_CM_FMT_X8R8G8B8; > + case I965_SURFACEFORMAT_A8_UNORM: > + return VA_CM_FMT_A8; > + case I965_SURFACEFORMAT_R10G10B10A2_UNORM: > + return VA_CM_FMT_A2B10G10R10; > + case I965_SURFACEFORMAT_R16G16B16A16_UNORM: > + return VA_CM_FMT_A16B16G16R16; > + case I965_SURFACEFORMAT_L8_UNORM: > + return VA_CM_FMT_L8; > + case I965_SURFACEFORMAT_R16_UINT: > + return VA_CM_FMT_R16U; > + case I965_SURFACEFORMAT_R8_UNORM: > + return VA_CM_FMT_R8U; > + case I965_SURFACEFORMAT_L16_UNORM: > + return VA_CM_FMT_L16; > + case I965_SURFACEFORMAT_R32_FLOAT: > + return VA_CM_FMT_R32F; > + default: > + return VA_CM_FMT_UNKNOWN; > + } > +} > + > +static bool CreateCmrtMemory(cl_mem mem) { > + if (mem->cmrt_mem != NULL) > + return true; > + > + CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device); > + int result; > + CmOsResource osResource; > + osResource.bo_size = mem->size; > + osResource.bo_flags = DRM_BO_HANDLE; > + osResource.bo = (drm_intel_bo*)mem->bo; if (IS_IMAGE(mem)) { > + _cl_mem_image* image = cl_mem_image(mem); > + if (CL_MEM_OBJECT_IMAGE2D != image->image_type) > + return CL_INVALID_ARG_VALUE; > + osResource.format = GetCmrtFormat(image); > + if (osResource.format == VA_CM_FMT_UNKNOWN) > + return false; > + osResource.aligned_width = image->row_pitch; > + osResource.aligned_height = mem->size / image->row_pitch; > + osResource.pitch = image->row_pitch; > + osResource.tile_type = image->tiling; > + osResource.orig_width = image->w; > + osResource.orig_height = image->h; > + CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem); > + result = cmrt_device->CreateSurface2D(&osResource, cmrt_surface2d); > + mem->cmrt_mem_type = CMRT_SURFACE2D; } else { > + osResource.format = VA_CM_FMT_BUFFER; > + osResource.buf_bytes = mem->size; > + CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem); > + result = cmrt_device->CreateBuffer(&osResource, cmrt_buffer); > + mem->cmrt_mem_type = CMRT_BUFFER; > + } > + > + if (result != CM_SUCCESS) > + return false; > + > + return true; > +} > + > +cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint index, size_t sz, const > +void *value) { > + if(value == NULL) > + return CL_INVALID_ARG_VALUE; > + > + CmKernel* cmrt_kernel = (CmKernel*)(k->cmrt_kernel); > + > + WORD argKind = -1; > + if (cmrt_kernel->GetArgKind(index, argKind) != CM_SUCCESS) > + return CL_INVALID_ARG_INDEX; > + > + int result; > + if (argKind == ARG_KIND_GENERAL) > + result = cmrt_kernel->SetKernelArg(index, sz, value); else { > + cl_mem mem = *(cl_mem*)value; > + if (mem->magic == CL_MAGIC_MEM_HEADER) { > + if (!CreateCmrtMemory(mem)) > + return CL_INVALID_ARG_VALUE; > + > + SurfaceIndex * memIndex = NULL; > + if (mem->cmrt_mem_type == CMRT_BUFFER) { > + CmBuffer* cmrt_buffer = (CmBuffer*)(mem->cmrt_mem); > + cmrt_buffer->GetIndex(memIndex); > + } else { > + CmSurface2D* cmrt_surface2d = (CmSurface2D*)(mem->cmrt_mem); > + cmrt_surface2d->GetIndex(memIndex); > + } > + result = cmrt_kernel->SetKernelArg(index, sizeof(SurfaceIndex), > memIndex); > + } else > + return CL_INVALID_ARG_VALUE; > + } > + > + if (result != CM_SUCCESS) > + return CL_INVALID_KERNEL_ARGS; > + > + return CL_SUCCESS; > +} > + > +cl_int cmrt_destroy_memory(cl_mem mem) > +{ > + CmDevice* cmrt_device = (CmDevice*)(mem->ctx->device->cmrt_device); > + if (mem->cmrt_mem_type == CMRT_BUFFER) { > + CmBuffer*& cmrt_buffer = (CmBuffer*&)(mem->cmrt_mem); > + cmrt_device->DestroySurface(cmrt_buffer); > + } else { > + CmSurface2D*& cmrt_surface2d = (CmSurface2D*&)(mem->cmrt_mem); > + cmrt_device->DestroySurface(cmrt_surface2d); > + } > + return CL_SUCCESS; > +} > + > +cl_int cmrt_destroy_event(cl_command_queue cq) { > + CmEvent*& cmrt_event = (CmEvent*&)(cq->cmrt_event); > + CmDevice* cmrt_device = (CmDevice*)(cq->ctx->device->cmrt_device); > + CmQueue* pCmQueue = NULL; > + cmrt_event->WaitForTaskFinished(); > + cmrt_device->CreateQueue(pCmQueue); > + pCmQueue->DestroyEvent(cmrt_event); > + return CL_SUCCESS; > +} > + > +cl_int cmrt_wait_for_task_finished(cl_command_queue cq) { > + CmEvent* cmrt_event = (CmEvent*)(cq->cmrt_event); > + cmrt_event->WaitForTaskFinished(); > + return CL_SUCCESS; > +} > diff --git a/src/cl_cmrt.h b/src/cl_cmrt.h new file mode 100644 index > 0000000..316095c > --- /dev/null > +++ b/src/cl_cmrt.h > @@ -0,0 +1,45 @@ > +/* > + * Copyright @2015 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + * Author: Guo Yejun <[email protected]> */ > + > +#ifndef __CL_CMRT_H__ > +#define __CL_CMRT_H__ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include "cl_kernel.h" > +#include "cl_program.h" > + > +cl_int cmrt_build_program(cl_program p, const char *options); cl_int > +cmrt_destroy_program(cl_program p); cl_int > +cmrt_destroy_device(cl_device_id device); > +void* cmrt_create_kernel(cl_program p, const char *name); cl_int > +cmrt_destroy_kernel(cl_kernel k); cl_int > cmrt_enqueue(cl_command_queue > +cq, cl_kernel k, const size_t* global_work_size, const size_t* > +local_work_size); cl_int cmrt_set_kernel_arg(cl_kernel k, cl_uint > +index, size_t sz, const void *value); cl_int cmrt_destroy_memory(cl_mem > +mem); cl_int cmrt_destroy_event(cl_command_queue cq); cl_int > +cmrt_wait_for_task_finished(cl_command_queue cq); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index > 033e7df..549f648 100644 > --- a/src/cl_command_queue.c > +++ b/src/cl_command_queue.c > @@ -31,6 +31,7 @@ > #include "cl_khr_icd.h" > #include "cl_event.h" > #include "performance.h" > +#include "cl_cmrt.h" > > #include <assert.h> > #include <stdio.h> > @@ -47,6 +48,7 @@ cl_command_queue_new(cl_context ctx) > queue->magic = CL_MAGIC_QUEUE_HEADER; > queue->ref_n = 1; > queue->ctx = ctx; > + queue->cmrt_event = NULL; > if ((queue->thread_data = cl_thread_data_create()) == NULL) { > goto error; > } > @@ -76,6 +78,11 @@ cl_command_queue_delete(cl_command_queue > queue) > assert(queue); > if (atomic_dec(&queue->ref_n) != 1) return; > > +#ifdef HAS_CMRT > + if (queue->cmrt_event != NULL) > + cmrt_destroy_event(queue); > +#endif > + > // If there is a list of valid events, we need to give them > // a chance to call the call-back function. > cl_event_update_last_events(queue,1); > diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index > 2cd6739..d1b8c44 100644 > --- a/src/cl_command_queue.h > +++ b/src/cl_command_queue.h > @@ -44,6 +44,8 @@ struct _cl_command_queue { > cl_command_queue prev, next; /* We chain the command queues > together */ > void *thread_data; /* Used to store thread context data > */ > cl_mem perf; /* Where to put the perf counters */ > + > + void* cmrt_event; /* the latest CmEvent* of the command > queue */ > }; > > /* The macro to get the thread specified gpgpu struct. */ diff --git > a/src/cl_device_id.h b/src/cl_device_id.h index e971735..619fa0a 100644 > --- a/src/cl_device_id.h > +++ b/src/cl_device_id.h > @@ -123,6 +123,9 @@ struct _cl_device_id { > uint32_t atomic_test_result; > uint32_t image_pitch_alignment; > uint32_t image_base_address_alignment; > + > + //inited as NULL, created only when cmrt kernel is used > + void* cmrt_device; //realtype: CmDevice* > }; > > /* Get a device from the given platform */ diff --git a/src/cl_gt_device.h > b/src/cl_gt_device.h index d8089c2..b4c610e 100644 > --- a/src/cl_gt_device.h > +++ b/src/cl_gt_device.h > @@ -131,3 +131,4 @@ DECL_INFO_STRING(spir_versions, > "1.2") .device_reference_count = 1, .image_pitch_alignment = > 1, .image_base_address_alignment = 4096, > +.cmrt_device = NULL > diff --git a/src/cl_kernel.c b/src/cl_kernel.c index b2d1955..b380abe 100644 > --- a/src/cl_kernel.c > +++ b/src/cl_kernel.c > @@ -28,6 +28,7 @@ > #include "CL/cl.h" > #include "cl_sampler.h" > #include "cl_accelerator_intel.h" > +#include "cl_cmrt.h" > > #include <stdio.h> > #include <string.h> > @@ -41,6 +42,15 @@ cl_kernel_delete(cl_kernel k) > uint32_t i; > if (k == NULL) return; > > +#ifdef HAS_CMRT > + if (k->cmrt_kernel != NULL) { > + cmrt_destroy_kernel(k); > + k->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ > + cl_free(k); > + return; > + } > +#endif > + > /* We are not done with the kernel */ > if (atomic_dec(&k->ref_n) > 1) return; > /* Release one reference on all bos we own */ @@ -71,6 +81,7 @@ > cl_kernel_new(cl_program p) > k->ref_n = 1; > k->magic = CL_MAGIC_KERNEL_HEADER; > k->program = p; > + k->cmrt_kernel = NULL; > > exit: > return k; > diff --git a/src/cl_kernel.h b/src/cl_kernel.h index 7f59162..05a882e 100644 > --- a/src/cl_kernel.h > +++ b/src/cl_kernel.h > @@ -69,6 +69,8 @@ struct _cl_kernel { > uint32_t arg_n:30; /* Number of arguments */ > uint32_t ref_its_program:1; /* True only for the user kernel (created by > clCreateKernel) */ > uint32_t vme:1; /* True only if it is a built-in kernel for > VME */ > + > + void* cmrt_kernel; /* CmKernel* */ > }; > > /* Allocate an empty kernel */ > diff --git a/src/cl_mem.c b/src/cl_mem.c index 9a6bb83..1f03c69 100644 > --- a/src/cl_mem.c > +++ b/src/cl_mem.c > @@ -27,6 +27,7 @@ > #include "cl_khr_icd.h" > #include "cl_kernel.h" > #include "cl_command_queue.h" > +#include "cl_cmrt.h" > > #include "CL/cl.h" > #include "CL/cl_intel.h" > @@ -268,6 +269,7 @@ cl_mem_allocate(enum cl_mem_type type, > mem->flags = flags; > mem->is_userptr = 0; > mem->offset = 0; > + mem->cmrt_mem = NULL; > if (mem->type == CL_MEM_IMAGE_TYPE) { > cl_mem_image(mem)->is_image_from_buffer = 0; > } > @@ -1166,6 +1168,11 @@ cl_mem_delete(cl_mem mem) > } > #endif > > +#ifdef HAS_CMRT > + if (mem->cmrt_mem != NULL) > + cmrt_destroy_memory(mem); > +#endif > + > /* iff we are a image, delete the 1d buffer if has. */ > if (IS_IMAGE(mem)) { > if (cl_mem_image(mem)->buffer_1d) { diff --git a/src/cl_mem.h > b/src/cl_mem.h index fb24115..c8f256d 100644 > --- a/src/cl_mem.h > +++ b/src/cl_mem.h > @@ -95,6 +95,9 @@ typedef struct _cl_mem { > cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */ > uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/ > size_t offset; /* offset of host_ptr to the page beginning, > only for > CL_MEM_USE_HOST_PTR*/ > + > + uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */ > + void* cmrt_mem; > } _cl_mem; > > struct _cl_mem_image { > diff --git a/src/cl_program.c b/src/cl_program.c index 98b6d51..1dca673 > 100644 > --- a/src/cl_program.c > +++ b/src/cl_program.c > @@ -25,6 +25,7 @@ > #include "cl_utils.h" > #include "cl_khr_icd.h" > #include "cl_gbe_loader.h" > +#include "cl_cmrt.h" > #include "CL/cl.h" > #include "CL/cl_intel.h" > > @@ -92,10 +93,17 @@ cl_program_delete(cl_program p) > p->ctx->programs = p->next; > pthread_mutex_unlock(&p->ctx->program_lock); > > - cl_free(p->bin); /* Free the blob */ > - for (i = 0; i < p->ker_n; ++i) /* Free the kernels */ > - cl_kernel_delete(p->ker[i]); > - cl_free(p->ker); > +#ifdef HAS_CMRT > + if (p->cmrt_program != NULL) > + cmrt_destroy_program(p); > + else > +#endif > + { > + cl_free(p->bin); /* Free the blob */ > + for (i = 0; i < p->ker_n; ++i) /* Free the kernels */ > + cl_kernel_delete(p->ker[i]); > + cl_free(p->ker); > + } > > /* Program belongs to their parent context */ > cl_context_delete(p->ctx); > @@ -123,6 +131,7 @@ cl_program_new(cl_context ctx) > p->ref_n = 1; > p->magic = CL_MAGIC_PROGRAM_HEADER; > p->ctx = ctx; > + p->cmrt_program = NULL; > p->build_log = calloc(1000, sizeof(char)); > if (p->build_log) > p->build_log_max_sz = 1000; > @@ -172,12 +181,14 @@ static const unsigned char > binary_type_header[BHI_MAX][BINARY_HEADER_LENGTH]= \ > {{'B','C', 0xC0, 0xDE}, > {1, 'B', 'C', 0xC0, 0xDE}, > {2, 'B', 'C', 0xC0, 0xDE}, > - {0, 'G','E', 'N', 'C'}}; > + {0, 'G','E', 'N', 'C'}, > + {'C','I', 'S', 'A'}, > + }; > > LOCAL cl_bool headerCompare(const unsigned char *BufPtr, > BINARY_HEADER_INDEX index) { > bool matched = true; > - int length = index == BHI_SPIR ? BINARY_HEADER_LENGTH - > 1 :BINARY_HEADER_LENGTH; > + int length = (index == BHI_SPIR || index == BHI_CMRT) ? > + BINARY_HEADER_LENGTH -1 :BINARY_HEADER_LENGTH; > int i = 0; > for (i = 0; i < length; ++i) > { > @@ -190,6 +201,7 @@ LOCAL cl_bool headerCompare(const unsigned char > *BufPtr, BINARY_HEADER_INDEX ind #define isLLVM_C_O(BufPtr) > headerCompare(BufPtr, BHI_COMPIRED_OBJECT) #define > isLLVM_LIB(BufPtr) headerCompare(BufPtr, BHI_LIBRARY) #define > isGenBinary(BufPtr) headerCompare(BufPtr, BHI_GEN_BINARY) > +#define isCMRT(BufPtr) headerCompare(BufPtr, BHI_CMRT) > > LOCAL cl_program > cl_program_create_from_binary(cl_context ctx, > @@ -236,8 +248,9 @@ cl_program_create_from_binary(cl_context ctx, > program->binary_sz = lengths[0]; > program->source_type = FROM_BINARY; > > - if(isSPIR((unsigned char*)program->binary)) { > - > + if (isCMRT((unsigned char*)program->binary)) { > + program->source_type = FROM_CMRT; > + }else if(isSPIR((unsigned char*)program->binary)) { > char* typed_binary; > TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char))); > memcpy(typed_binary+1, binaries[0], lengths[0]); @@ -518,6 +531,20 @@ > cl_program_build(cl_program p, const char *options) > goto error; > } > > +#if HAS_CMRT > + if (p->source_type == FROM_CMRT) { > + //only here we begins to invoke cmrt > + //break spec to return other errors such as CL_DEVICE_NOT_FOUND > + err = cmrt_build_program(p, options); > + if (err == CL_SUCCESS) { > + p->build_status = CL_BUILD_SUCCESS; > + p->binary_type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; > + return CL_SUCCESS; > + } else > + goto error; > + } > +#endif > + > if (!check_cl_version_option(p, options)) { > err = CL_BUILD_PROGRAM_FAILURE; > goto error; > @@ -833,6 +860,20 @@ cl_program_create_kernel(cl_program p, const char > *name, cl_int *errcode_ret) > cl_int err = CL_SUCCESS; > uint32_t i = 0; > > +#ifdef HAS_CMRT > + if (p->cmrt_program != NULL) { > + void* cmrt_kernel = cmrt_create_kernel(p, name); > + if (cmrt_kernel != NULL) { > + to = cl_kernel_new(p); > + to->cmrt_kernel = cmrt_kernel; > + goto exit; > + } else { > + err = CL_INVALID_KERNEL_NAME; > + goto error; > + } > + } > +#endif > + > /* Find the program first */ > for (i = 0; i < p->ker_n; ++i) { > assert(p->ker[i]); > diff --git a/src/cl_program.h b/src/cl_program.h index 63ad16d..899a31a > 100644 > --- a/src/cl_program.h > +++ b/src/cl_program.h > @@ -34,14 +34,16 @@ enum { > FROM_SOURCE = 0, > FROM_LLVM = 1, > FROM_BINARY = 2, > - FROM_LLVM_SPIR = 3 > + FROM_LLVM_SPIR = 3, > + FROM_CMRT = 4, > }; > > typedef enum _BINARY_HEADER_INDEX { > BHI_SPIR = 0, > BHI_COMPIRED_OBJECT = 1, > BHI_LIBRARY = 2, > - BHI_GEN_BINARY = 3, /*remember update BHI_MAX if add option.*/ > + BHI_GEN_BINARY = 3, > + BHI_CMRT = 4, > BHI_MAX, > }BINARY_HEADER_INDEX; > > @@ -61,13 +63,15 @@ struct _cl_program { > size_t binary_sz; /* The binary size. */ > uint32_t binary_type; /* binary type: COMPILED_OBJECT(LLVM IR), > LIBRARY(LLVM IR with option "-create-library"), or EXECUTABLE(GEN binary). > */ > uint32_t ker_n; /* Number of declared kernels */ > - uint32_t source_type:2; /* Built from binary, source or LLVM */ > + uint32_t source_type:3; /* Built from binary, source, CMRT or LLVM*/ > uint32_t is_built:1; /* Did we call clBuildProgram on it? */ > int32_t build_status; /* build status. */ > char *build_opts; /* The build options for this program */ > size_t build_log_max_sz; /*build log maximum size in byte.*/ > char *build_log; /* The build log for this program. */ > size_t build_log_sz; /* The actual build log size.*/ > + > + void* cmrt_program; /* real type: CmProgram* */ > }; > > /* Create a empty program */ > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
