Signed-off-by: rander <[email protected]>
---
benchmark/CMakeLists.txt | 6 +-
benchmark/benchmark_math.cpp | 69 ++++++++++++-----------
kernels/bench_math.cl | 127 ++++++++++++++++++++++---------------------
3 files changed, 106 insertions(+), 96 deletions(-)
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index f9b246b..e92b269 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -1,5 +1,7 @@
+cmake_minimum_required(VERSION 3.1)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../utests
+ ${CMAKE_CURRENT_SOURCE_DIR}/../
${CMAKE_CURRENT_SOURCE_DIR}/../include)
@@ -23,13 +25,13 @@ set (benchmark_sources
benchmark_math.cpp)
-SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK -std=c++11 ${CMAKE_CXX_FLAGS}")
SET(CMAKE_C_FLAGS "-DBUILD_BENCHMARK ${CMAKE_C_FLAGS}")
ADD_LIBRARY(benchmarks SHARED ${ADDMATHFUNC} ${benchmark_sources})
#TARGET_LINK_LIBRARIES(benchmarks cl m ${OPENGL_LIBRARIES}
${CMAKE_THREAD_LIBS_INIT})
-TARGET_LINK_LIBRARIES(benchmarks cl m)
+TARGET_LINK_LIBRARIES(benchmarks OpenCL pthread m)
ADD_EXECUTABLE(benchmark_run benchmark_run.cpp)
TARGET_LINK_LIBRARIES(benchmark_run benchmarks)
diff --git a/benchmark/benchmark_math.cpp b/benchmark/benchmark_math.cpp
index 72bc316..b47814a 100644
--- a/benchmark/benchmark_math.cpp
+++ b/benchmark/benchmark_math.cpp
@@ -8,119 +8,122 @@
#include <sys/time.h>
double benchmark_generic_math(const char* str_filename,
- const char* str_kernel)
+ const char* str_kernel,
+ float base,
+ float max)
{
double elapsed = 0;
struct timeval start,stop;
const size_t global_size = 1024 * 1024;
- const size_t local_size = 64;
+ const size_t local_size = 256;
/* Compute math OP, loop times on global size */
- cl_float base = 1.000002;
cl_float pwr = 1.0102003;
- uint32_t loop = 1000;
+ uint32_t loop = 128;
- /* Input set will be generated */
- float* src = (float*)calloc(sizeof(float), global_size);
- OCL_ASSERT(src != NULL);
- for(uint32_t i = 0; i < global_size; i++)
- src[i] = base + i * (base - 1);
+ float step;
+
+ step = (max - base) / loop;
/* Setup kernel and buffers */
OCL_CALL(cl_kernel_init, str_filename, str_kernel, SOURCE, "");
- OCL_CREATE_BUFFER(buf[0], 0, (global_size) * sizeof(float), NULL);
OCL_CREATE_BUFFER(buf[1], 0, (global_size) * sizeof(float), NULL);
- OCL_MAP_BUFFER(0);
- memcpy(buf_data[0], src, global_size * sizeof(float));
- OCL_UNMAP_BUFFER(0);
-
globals[0] = global_size;
locals[0] = local_size;
- OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
- OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
- OCL_SET_ARG(2, sizeof(cl_float), &pwr);
- OCL_SET_ARG(3, sizeof(cl_uint), &loop);
+ OCL_SET_ARG(0, sizeof(float), &base);
+ OCL_SET_ARG(1, sizeof(float), &step);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(3, sizeof(cl_float), &pwr);
+ OCL_SET_ARG(4, sizeof(cl_uint), &loop);
+
+ OCL_NDRANGE(1);
+ clFinish(queue);
/* Measure performance */
gettimeofday(&start,0);
OCL_NDRANGE(1);
+ OCL_NDRANGE(1);
+ OCL_NDRANGE(1);
+ OCL_NDRANGE(1);
clFinish(queue);
gettimeofday(&stop,0);
elapsed = time_subtract(&stop, &start, 0);
/* Show compute results */
+#if SHOWRESULT
OCL_MAP_BUFFER(1);
for(uint32_t i = 0; i < global_size; i += 8192)
printf("\t%.3f", ((float*)buf_data[1])[i]);
OCL_UNMAP_BUFFER(1);
-
- return BANDWIDTH(global_size * loop, elapsed);
+#endif
+ printf("-----------------------");
+ return BANDWIDTH(global_size * loop, elapsed / 4.0);
}
double benchmark_math_pow(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_pow");
+ return benchmark_generic_math("bench_math.cl", "bench_math_pow", 1.0, 128.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_pow, "Mop/s");
double benchmark_math_exp2(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_exp2");
+ return benchmark_generic_math("bench_math.cl", "bench_math_exp2", 0.1, 8.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp2, "Mop/s");
double benchmark_math_exp(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_exp");
+ return benchmark_generic_math("bench_math.cl", "bench_math_exp", 0.1, 4.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp, "Mop/s");
double benchmark_math_exp10(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_exp10");
+ return benchmark_generic_math("bench_math.cl", "bench_math_exp10", 0.1, 4.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp10, "Mop/s");
double benchmark_math_log2(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_log2");
+ return benchmark_generic_math("bench_math.cl", "bench_math_log2", 0.008,
1.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log2, "Mop/s");
double benchmark_math_log(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_log");
+ return benchmark_generic_math("bench_math.cl", "bench_math_log", 0.008, 1.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log, "Mop/s");
double benchmark_math_log10(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_log10");
+ return benchmark_generic_math("bench_math.cl", "bench_math_log10", 0.008,
0.01);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log10, "Mop/s");
double benchmark_math_sqrt(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_sqrt");
+ return benchmark_generic_math("bench_math.cl", "bench_math_sqrt", 0.1,
65537.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sqrt, "Mop/s");
double benchmark_math_sin(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_sin");
+ return benchmark_generic_math("bench_math.cl", "bench_math_sin", 0.001,
5000.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sin, "Mop/s");
double benchmark_math_cos(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_cos");
+ return benchmark_generic_math("bench_math.cl", "bench_math_cos", 0.001,
5000.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_cos, "Mop/s");
double benchmark_math_tan(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_tan");
+ return benchmark_generic_math("bench_math.cl", "bench_math_tan", 0.001,
5000.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_tan, "Mop/s");
double benchmark_math_asin(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_asin");
+ return benchmark_generic_math("bench_math.cl", "bench_math_asin", 0.001,
1.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_asin, "Mop/s");
double benchmark_math_acos(void){
- return benchmark_generic_math("bench_math.cl", "bench_math_acos");
+ return benchmark_generic_math("bench_math.cl", "bench_math_acos", 0.001,
1.0);
}
MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_acos, "Mop/s");
diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl
index 8d85d51..f0c1a81 100644
--- a/kernels/bench_math.cl
+++ b/kernels/bench_math.cl
@@ -3,19 +3,20 @@
/* benchmark pow performance */
kernel void bench_math_pow(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_powr(result, pwr); /* calls native */
+ result += native_powr((src + step*loop), pwr); /* calls native */
#else
- result = pow(result, pwr); /* calls internal slow */
+ result += pow((src + step*loop), pwr); /* calls internal slow */
#endif
}
dst[get_global_id(0)] = result;
@@ -23,15 +24,16 @@ kernel void bench_math_pow(
/* benchmark exp2 performance, exp2 is native */
kernel void bench_math_exp2(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
- result = exp2(result) * 0.1f;
+ result += exp2((src + step*loop));
dst[get_global_id(0)] = result;
}
@@ -39,21 +41,22 @@ kernel void bench_math_exp2(
/* benchmark exp performance */
/* calls internal fast (native) if (x > -0x1.6p1 && x < 0x1.6p1) */
kernel void bench_math_exp(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_exp((float)-0x1.6p1 - result * 0.1f); /* calls native */
+ result += native_exp((float)-0x1.6p1 - (src + step*loop)); /* calls native
*/
#elif defined(BENCHMARK_INTERNAL_FAST)
- result = exp((float)-0x1.6p1 + result * 0.1f); /* calls internal fast */
+ result += exp((float)-0x1.6p1 + (src + step*loop)); /* calls internal fast
*/
#else
- result = exp((float)-0x1.6p1 - result * 0.1f); /* calls internal slow */
+ result += exp((float)-0x1.6p1 - (src + step*loop)); /* calls internal slow
*/
#endif
}
@@ -63,21 +66,22 @@ kernel void bench_math_exp(
/* benchmark exp10 performance */
/* calls internal fast (native) if (x < -0x1.4p+5) || (x > +0x1.4p+5) */
kernel void bench_math_exp10(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_exp10((float)0x1.4p+5 + result * 0.1f); /* calls native */
+ result += native_exp10((float)0x1.4p+5 + (src + step*loop)); /* calls
native */
#elif defined(BENCHMARK_INTERNAL_FAST)
- result = exp10((float)-0x1.4p+5 - result * 0.1f); /* calls internal fast */
+ result += exp10((float)-0x1.4p+5 - (src + step*loop)); /* calls internal
fast */
#else
- result = exp10((float)-0x1.2p+5 - result * 0.1f); /* calls internal slow */
+ result += exp10((float)-0x1.2p+5 + (src + step*loop)); /* calls internal
slow */
#endif
}
@@ -87,21 +91,22 @@ kernel void bench_math_exp10(
/* benchmark log2 performance */
/* calls internal fast (native) if (x > 0x1.1p0) */
kernel void bench_math_log2(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_log2((float)0x1.1p0 + result * 0.0001f); /* calls native */
+ result += native_log2((float)0x1.1p0 + (src + step*loop)); /* calls
native */
#elif defined(BENCHMARK_INTERNAL_FAST)
- result = log2((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */
+ result += log2((float)0x1.1p0 + (src + step*loop)); /* calls internal
fast */
#else
- result = log2((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */
+ result += log2((float)0x1.1p0 - (src + step*loop)); /* calls internal
slow */
#endif
}
@@ -111,21 +116,22 @@ kernel void bench_math_log2(
/* benchmark log performance */
/* calls internal fast (native) if (x > 0x1.1p0) */
kernel void bench_math_log(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_log((float)0x1.1p0 + result * 0.0001f); /* calls native */
+ result += native_log((float)0x1.1p0 + (src + step*loop)); /* calls native
*/
#elif defined(BENCHMARK_INTERNAL_FAST)
- result = log((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */
+ result += log((float)0x1.1p0 + (src + step*loop)); /* calls internal fast
*/
#else
- result = log((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */
+ result += log((float)0x1.1p0 - (src + step*loop)); /* calls internal slow
*/
#endif
}
@@ -135,21 +141,22 @@ kernel void bench_math_log(
/* benchmark log10 performance */
/* calls internal fast (native) if (x > 0x1.1p0) */
kernel void bench_math_log10(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_log10((float)0x1.1p0 + result * 0.0001f); /* calls native
*/
+ result += native_log10((float)0x1.1p0 + (src + step*loop)); /* calls
native */
#elif defined(BENCHMARK_INTERNAL_FAST)
- result = log10((float)0x1.1p0 + result * 0.0001f); /* calls internal fast
*/
+ result += log10((float)0x1.1p0 + (src + step*loop)); /* calls internal
fast */
#else
- result = log10((float)0x1.1p0 - result * 0.0001f); /* calls internal slow
*/
+ result += log10((float)0x1.1p0 - (src + step*loop)); /* calls internal
slow */
#endif
}
@@ -158,38 +165,36 @@ kernel void bench_math_log10(
/* benchmark sqrt performance */
kernel void bench_math_sqrt(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
- result = sqrt(result) + sqrt(pwr + result);
+ result += sqrt((src + step*loop));
dst[get_global_id(0)] = result;
}
/* benchmark sin performance */
kernel void bench_math_sin(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_sin(result); /* calls native */
+ result += native_sin((src + step*loop)); /* calls native */
#else
- result = sin(result); /* calls internal, random complexity */
- //result = sin(0.1f + result); /* calls internal, (1) no reduction */
- //result = sin(2.f + result); /* calls internal, (2) fast reduction */
- //result = sin(4001 + result); /* calls internal, (3) slow reduction */
- result *= 0x1p-16;
+ result += sin((src + step*loop)); /* calls internal, random complexity */
#endif
}
@@ -198,23 +203,20 @@ kernel void bench_math_sin(
/* benchmark cos performance */
kernel void bench_math_cos(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_cos(result); /* calls native */
+ result += native_cos((src + step*loop)); /* calls native */
#else
- result = cos(result); /* calls internal, random complexity */
- //result = cos(0.1f + result); /* calls internal, (1) no reduction */
- //result = cos(2.f + result); /* calls internal, (2) fast reduction */
- //result = cos(4001.f + result); /* calls internal, (3) slow reduction */
- result *= 0x1p-16;
+ result += cos((src + step*loop)); /* calls internal, random complexity */
#endif
}
dst[get_global_id(0)] = result;
@@ -222,19 +224,20 @@ kernel void bench_math_cos(
/* benchmark native tan performance */
kernel void bench_math_tan(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
{
#if defined(BENCHMARK_NATIVE)
- result = native_tan(result); /* calls native */
+ result += native_tan((src + step*loop)); /* calls native */
#else
- result = tan(result); /* calls internal slow */
+ result += tan((src + step*loop)); /* calls internal slow */
#endif
}
@@ -243,30 +246,32 @@ kernel void bench_math_tan(
/* benchmark asin performance */
kernel void bench_math_asin(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
- result = asin(pwr - 1);
+ result += asin((src + step*loop));
dst[get_global_id(0)] = result;
}
/* benchmark acos performance */
kernel void bench_math_acos(
- global float *src,
+ float src,
+ float step,
global float *dst,
float pwr,
uint loop)
{
- float result = src[get_global_id(0)];
+ float result = 0;
for(; loop > 0; loop--)
- result = acos(pwr - 1);
+ result += acos((src + step*loop));
dst[get_global_id(0)] = result;
}
--
2.7.4
_______________________________________________
Beignet mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/beignet