On Tue, Dec 09, 2014 at 02:32:23PM +0800, Zhu Bingbing wrote: > Signed-off-by: Zhu Bingbing <[email protected]> > --- > benchmark/CMakeLists.txt | 3 +- > benchmark/benchmark_copy_image_1d.cpp | 55 > ++++++++++++++++++++++++++++++ > kernels/runtime_benchmark_copy_image_1d.cl | 8 +++++ > 3 files changed, 65 insertions(+), 1 deletion(-) > create mode 100644 benchmark/benchmark_copy_image_1d.cpp > create mode 100644 kernels/runtime_benchmark_copy_image_1d.cl > > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt > index ac2d8aa..86ac922 100644 > --- a/benchmark/CMakeLists.txt > +++ b/benchmark/CMakeLists.txt > @@ -12,7 +12,8 @@ set (benchmark_sources > ../utests/utest_helper.cpp > ../utests/vload_bench.cpp > enqueue_copy_buf.cpp > - benchmark_use_host_ptr_buffer.cpp) > + benchmark_use_host_ptr_buffer.cpp > + benchmark_copy_image_1d.cpp) > > > SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") > diff --git a/benchmark/benchmark_copy_image_1d.cpp > b/benchmark/benchmark_copy_image_1d.cpp > new file mode 100644 > index 0000000..07cf1b8 > --- /dev/null > +++ b/benchmark/benchmark_copy_image_1d.cpp > @@ -0,0 +1,55 @@ > +#include <sys/time.h> > +#include <string.h> > +#include "utests/utest_helper.hpp" > + > +static int benchmark_copy_image_1d(void) > +{ > + size_t i = 0; > + const size_t w = 512; > + cl_image_format format; > + cl_image_desc desc; > + cl_sampler sampler; > + struct timeval start; > + struct timeval stop; > + > + memset(&desc, 0x0, sizeof(cl_image_desc)); > + memset(&format, 0x0, sizeof(cl_image_format)); > + > + // Setup kernel and images > + OCL_CREATE_KERNEL("runtime_benchmark_copy_image_1d"); > + buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w); > + for (uint32_t i = 0; i < w; i++) > + ((uint32_t*)buf_data[0])[i] = i; > + > + format.image_channel_order = CL_RGBA; > + format.image_channel_data_type = CL_UNSIGNED_INT8; > + desc.image_type = CL_MEM_OBJECT_IMAGE1D; > + desc.image_width = w; > + desc.image_row_pitch = w * sizeof(uint32_t); > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, > buf_data[0]); > + > + desc.image_row_pitch = 0; > + OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST); > + > + // Run the kernel > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > + OCL_SET_ARG(1, sizeof(sampler), &sampler); > + globals[0] = w; > + locals[0] = 16; > + > + gettimeofday(&start,0); > + for (i = 0; i < 10000; i++) > + { > + OCL_NDRANGE(1); > + OCL_FINISH(); > + } > + gettimeofday(&stop,0); > + > + clReleaseMemObject(buf[0]); > + free(buf_data[0]); > + buf_data[0] = NULL; > + > + return time_subtract(&stop, &start, 0); > +} > + > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_image_1d); > diff --git a/kernels/runtime_benchmark_copy_image_1d.cl > b/kernels/runtime_benchmark_copy_image_1d.cl > new file mode 100644 > index 0000000..3aa0429 > --- /dev/null > +++ b/kernels/runtime_benchmark_copy_image_1d.cl > @@ -0,0 +1,8 @@ > +__kernel void > +runtime_benchmark_copy_image_1d(__read_only image1d_t src, sampler_t > sampler) > +{ > + int coord; > + int4 color; > + coord = (int)get_global_id(0); > + color = read_imagei(src, sampler, coord);
This is not the right method to measure sampler performance. It has the following 3 problems: 1. One work item only read one pixel which is very inefficient. 2. The color hasn't been used, so the read_imagei maybe optimized by the compiler. 3. Use dynamic sampler is very inefficient on Gen platform. Please use static sampler which is defined in kernel. You can refer the usage in kernels/test_copy_image1.cl. > +} > -- > 1.9.3 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
