From: Pan Xiuli <[email protected]> After the runtime refining, the OCL_OUTPUT_KERNEL_PERF is broken. Fix it for performance tuning.
Signed-off-by: Pan Xiuli <[email protected]> --- src/cl_api_context.c | 2 ++ src/cl_command_queue_gen7.c | 1 + src/cl_enqueue.c | 3 +++ src/cl_enqueue.h | 1 + src/performance.c | 1 - 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cl_api_context.c b/src/cl_api_context.c index e8184b1..7028f8d 100644 --- a/src/cl_api_context.c +++ b/src/cl_api_context.c @@ -19,6 +19,7 @@ #include "cl_context.h" #include "cl_device_id.h" #include "cl_alloc.h" +#include "performance.h" cl_context clCreateContext(const cl_context_properties *properties, @@ -55,6 +56,7 @@ clCreateContext(const cl_context_properties *properties, context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err); } while (0); + initialize_env_var(); if (errcode_ret) *errcode_ret = err; return context; diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index dd82a44..6f85148 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -492,6 +492,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue, event->exec_data.queue = queue; event->exec_data.gpgpu = gpgpu; event->exec_data.type = EnqueueNDRangeKernel; + event->exec_data.name = kernel.name; return CL_SUCCESS; diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 8350089..166dc55 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -25,6 +25,7 @@ #include "cl_utils.h" #include "cl_alloc.h" #include "cl_device_enqueue.h" +#include "performance.h" #include <stdio.h> #include <string.h> #include <assert.h> @@ -579,6 +580,8 @@ cl_enqueue_ndrange(enqueue_data *data, cl_int status) void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu); cl_gpgpu_sync(batch_buf); cl_gpgpu_unref_batch_buf(batch_buf); + if(b_output_kernel_perf) + time_end(data->queue->ctx, data->name, "", data->queue); } return err; diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h index 50a54fc..1532a59 100644 --- a/src/cl_enqueue.h +++ b/src/cl_enqueue.h @@ -81,6 +81,7 @@ typedef struct _enqueue_data { cl_bool mid_event_of_enq; /* For non-uniform ndrange, one enqueue have a sequence event, the last event need to parse device enqueue information. 0 : last event; 1: non-last event */ + const char* name; /* enqueue name */ } enqueue_data; /* Do real enqueue commands */ diff --git a/src/performance.c b/src/performance.c index 1e676c3..b8cdcc6 100644 --- a/src/performance.c +++ b/src/performance.c @@ -325,7 +325,6 @@ void time_start(cl_context context, const char * kernel_name, cl_command_queue c void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq) { - clFinish(cq); gettimeofday(&end, NULL); float t = (end.tv_sec - start.tv_sec)*1000 + (end.tv_usec - start.tv_usec)/1000.0f; insert(context, kernel_name, build_opt, t); -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
