On Tue, Dec 09, 2014 at 02:32:23PM +0800, Zhu Bingbing wrote:
> Signed-off-by: Zhu Bingbing <[email protected]>
> ---
>  benchmark/CMakeLists.txt                   |  3 +-
>  benchmark/benchmark_copy_image_1d.cpp      | 55 
> ++++++++++++++++++++++++++++++
>  kernels/runtime_benchmark_copy_image_1d.cl |  8 +++++
>  3 files changed, 65 insertions(+), 1 deletion(-)
>  create mode 100644 benchmark/benchmark_copy_image_1d.cpp
>  create mode 100644 kernels/runtime_benchmark_copy_image_1d.cl
> 
> diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
> index ac2d8aa..86ac922 100644
> --- a/benchmark/CMakeLists.txt
> +++ b/benchmark/CMakeLists.txt
> @@ -12,7 +12,8 @@ set (benchmark_sources
>    ../utests/utest_helper.cpp
>    ../utests/vload_bench.cpp
>    enqueue_copy_buf.cpp
> -  benchmark_use_host_ptr_buffer.cpp)
> +  benchmark_use_host_ptr_buffer.cpp
> +  benchmark_copy_image_1d.cpp)
>  
>  
>  SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
> diff --git a/benchmark/benchmark_copy_image_1d.cpp 
> b/benchmark/benchmark_copy_image_1d.cpp
> new file mode 100644
> index 0000000..07cf1b8
> --- /dev/null
> +++ b/benchmark/benchmark_copy_image_1d.cpp
> @@ -0,0 +1,55 @@
> +#include <sys/time.h>
> +#include <string.h>
> +#include "utests/utest_helper.hpp"
> +
> +static int benchmark_copy_image_1d(void)
> +{
> +  size_t i = 0;
> +  const size_t w = 512;
> +  cl_image_format format;
> +  cl_image_desc desc;
> +  cl_sampler sampler;
> +  struct timeval start;
> +  struct timeval stop;
> +
> +  memset(&desc, 0x0, sizeof(cl_image_desc));
> +  memset(&format, 0x0, sizeof(cl_image_format));
> +
> +  // Setup kernel and images
> +  OCL_CREATE_KERNEL("runtime_benchmark_copy_image_1d");
> +  buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w);
> +  for (uint32_t i = 0; i < w; i++)
> +      ((uint32_t*)buf_data[0])[i] = i;
> +
> +  format.image_channel_order = CL_RGBA;
> +  format.image_channel_data_type = CL_UNSIGNED_INT8;
> +  desc.image_type = CL_MEM_OBJECT_IMAGE1D;
> +  desc.image_width = w;
> +  desc.image_row_pitch = w * sizeof(uint32_t);
> +  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, 
> buf_data[0]);
> +
> +  desc.image_row_pitch = 0;
> +  OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
> +
> +  // Run the kernel
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  OCL_SET_ARG(1, sizeof(sampler), &sampler);
> +  globals[0] = w;
> +  locals[0] = 16;
> +
> +  gettimeofday(&start,0);
> +  for (i = 0; i < 10000; i++)
> +  {
> +    OCL_NDRANGE(1);
> +    OCL_FINISH();
> +  }
> +  gettimeofday(&stop,0);
> +
> +  clReleaseMemObject(buf[0]);
> +  free(buf_data[0]);
> +  buf_data[0] = NULL;
> +
> +  return time_subtract(&stop, &start, 0);
> +}
> +
> +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_image_1d);
> diff --git a/kernels/runtime_benchmark_copy_image_1d.cl 
> b/kernels/runtime_benchmark_copy_image_1d.cl
> new file mode 100644
> index 0000000..3aa0429
> --- /dev/null
> +++ b/kernels/runtime_benchmark_copy_image_1d.cl
> @@ -0,0 +1,8 @@
> +__kernel void
> +runtime_benchmark_copy_image_1d(__read_only image1d_t src,  sampler_t 
> sampler)
> +{
> +  int coord;
> +  int4 color;
> +  coord = (int)get_global_id(0);
> +  color = read_imagei(src, sampler, coord);

This is not the right method to measure sampler performance. It has the 
following 3 problems:

1. One work item only read one pixel which is very inefficient.
2. The color hasn't been used, so the read_imagei maybe optimized by the 
compiler.
3. Use dynamic sampler is very inefficient on Gen platform. Please use static 
sampler which is defined in kernel.
   You can refer the usage in kernels/test_copy_image1.cl.

> +}
> -- 
> 1.9.3
> 
> _______________________________________________
> Beignet mailing list
> [email protected]
> http://lists.freedesktop.org/mailman/listinfo/beignet
_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to