Yes, I will send a new version.
> -----Original Message----- > From: Zhigang Gong [mailto:[email protected]] > Sent: Wednesday, January 7, 2015 08:41 > To: Yang, Rong R > Cc: [email protected] > Subject: Re: [Beignet] [Patch V2] Add read buffer/image benchmark. > > Is this the last version or you still have a newer version in hand. > I remember you said you have a newer version a few days ago, just not sure. > > On Wed, Dec 24, 2014 at 03:44:33PM +0800, Yang Rong wrote: > > Add there two benchmark to compare the buffer and image performance > > > > V2: init the coord before read image. > > Signed-off-by: Yang Rong <[email protected]> > > --- > > benchmark/CMakeLists.txt | 4 ++- > > benchmark/benchmark_read_buffer.cpp | 49 > +++++++++++++++++++++++++++ > > benchmark/benchmark_read_image.cpp | 67 > +++++++++++++++++++++++++++++++++++++ > > kernels/compiler_read_buffer.cl | 15 +++++++++ > > kernels/compiler_read_image.cl | 25 ++++++++++++++ > > 5 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 > > benchmark/benchmark_read_buffer.cpp > > create mode 100644 benchmark/benchmark_read_image.cpp > > create mode 100644 kernels/compiler_read_buffer.cl create mode > > 100644 kernels/compiler_read_image.cl > > > > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index > > ac2d8aa..9a2bd77 100644 > > --- a/benchmark/CMakeLists.txt > > +++ b/benchmark/CMakeLists.txt > > @@ -12,7 +12,9 @@ set (benchmark_sources > > ../utests/utest_helper.cpp > > ../utests/vload_bench.cpp > > enqueue_copy_buf.cpp > > - benchmark_use_host_ptr_buffer.cpp) > > + benchmark_use_host_ptr_buffer.cpp > > + benchmark_read_buffer.cpp > > + benchmark_read_image.cpp) > > > > > > SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") > diff > > --git a/benchmark/benchmark_read_buffer.cpp > > b/benchmark/benchmark_read_buffer.cpp > > new file mode 100644 > > index 0000000..31a1f59 > > --- /dev/null > > +++ b/benchmark/benchmark_read_buffer.cpp > > @@ -0,0 +1,49 @@ > > +#include "utests/utest_helper.hpp" > > +#include <sys/time.h> > > + > > +int benchmark_read_buffer(void) > > +{ > > + struct timeval start,stop; > > + > > + const size_t n = 1024 * 1024; > > + int count = 16; > > + const size_t sz = 4 * n * count; > > + > > + OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(float), NULL); > > + OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(float), NULL); > > + OCL_CREATE_BUFFER(buf[2], 0, sz * sizeof(float), NULL); > > + > > + OCL_CREATE_KERNEL("compiler_read_buffer"); > > + > > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, > > + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); > > + > > + OCL_MAP_BUFFER(0); > > + OCL_MAP_BUFFER(1); > > + for (size_t i = 0; i < sz; i ++) { > > + ((float *)(buf_data[0]))[i] = rand(); > > + ((float *)(buf_data[1]))[i] = rand(); } OCL_UNMAP_BUFFER(0); > > + OCL_UNMAP_BUFFER(1); > > + > > + // Setup kernel and buffers > > + globals[0] = n; > > + locals[0] = 256; > > + > > + gettimeofday(&start,0); > > + for (size_t i=0; i<100; i++) { > > + OCL_NDRANGE(1); > > + } > > + OCL_FINISH(); > > + gettimeofday(&stop,0); > > + > > + clReleaseMemObject(buf[0]); > > + free(buf_data[0]); > > + buf_data[0] = NULL; > > + > > + return time_subtract(&stop, &start, 0); } > > + > > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_buffer); > > diff --git a/benchmark/benchmark_read_image.cpp > > b/benchmark/benchmark_read_image.cpp > > new file mode 100644 > > index 0000000..913b6e6 > > --- /dev/null > > +++ b/benchmark/benchmark_read_image.cpp > > @@ -0,0 +1,67 @@ > > +#include <string.h> > > +#include "utests/utest_helper.hpp" > > +#include <sys/time.h> > > + > > +int benchmark_read_image(void) > > +{ > > + struct timeval start,stop; > > + > > + const size_t x_count = 4; > > + const size_t y_count = 4; > > + const size_t w = 1024; > > + const size_t h = 1024; > > + const size_t sz = 4 * x_count * y_count * w * h; cl_image_format > > + format; cl_image_desc desc; > > + > > + memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0, > > + sizeof(cl_image_format)); > > + > > + // Setup kernel and images > > + OCL_CREATE_KERNEL("compiler_read_image"); > > + buf_data[0] = (uint32_t*) malloc(sizeof(float) * sz); buf_data[1] > > + = (uint32_t*) malloc(sizeof(float) * sz); for (uint32_t i = 0; i < > > + sz; ++i) { > > + ((float*)buf_data[0])[i] = rand(); > > + ((float*)buf_data[1])[i] = rand(); } > > + > > + format.image_channel_order = CL_RGBA; > > + format.image_channel_data_type = CL_FLOAT; desc.image_type = > > + CL_MEM_OBJECT_IMAGE2D; desc.image_width = w; desc.image_height > = > > + h; desc.image_row_pitch = w * sizeof(float) * 4; > > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, > &desc, > > + buf_data[0]); OCL_CREATE_IMAGE(buf[1], CL_MEM_COPY_HOST_PTR, > > + &format, &desc, buf_data[1]); OCL_CREATE_BUFFER(buf[2], 0, sz * > > + sizeof(float), NULL); > > + > > + free(buf_data[0]); > > + buf_data[0] = NULL; > > + free(buf_data[1]); > > + buf_data[1] = NULL; > > + > > + // Run the kernel > > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, > > + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); > > + globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] = 16; > > + > > + gettimeofday(&start,0); > > + for (size_t i=0; i<100; i++) { > > + OCL_NDRANGE(2); > > + } > > + OCL_FINISH(); > > + gettimeofday(&stop,0); > > + > > + clReleaseMemObject(buf[0]); > > + free(buf_data[0]); > > + buf_data[0] = NULL; > > + > > + return time_subtract(&stop, &start, 0); } > > + > > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image); > > diff --git a/kernels/compiler_read_buffer.cl > > b/kernels/compiler_read_buffer.cl new file mode 100644 index > > 0000000..b6c11bd > > --- /dev/null > > +++ b/kernels/compiler_read_buffer.cl > > @@ -0,0 +1,15 @@ > > +#define COUNT 16 > > + > > +__kernel void > > +compiler_read_buffer(__global float4* src0, __global float4* src1, > > +__global float4* dst) { > > + float4 sum = 0; > > + int offset = 0, i = 0; > > + int id = (int)get_global_id(0); > > + int sz = (int)get_global_size(0); > > + for(i=0; i<COUNT; i++) { > > + sum = sum + src0[offset + i] + src1[offset + i]; > > + offset += sz; > > + } > > + dst[id] = sum; > > +} > > diff --git a/kernels/compiler_read_image.cl > > b/kernels/compiler_read_image.cl new file mode 100644 index > > 0000000..f059743 > > --- /dev/null > > +++ b/kernels/compiler_read_image.cl > > @@ -0,0 +1,25 @@ > > +#define X_COUNT 4 > > +#define Y_COUNT 4 > > + > > +__kernel void > > +compiler_read_image(__read_only image2d_t src0, __read_only > image2d_t > > +src1, __global float4* dst) { > > + float4 sum = 0; > > + int2 coord; > > + int x_sz = (int)get_global_size(0); > > + int y_sz = (int)get_global_size(1); > > + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE| > > +CLK_ADDRESS_CLAMP| CLK_FILTER_NEAREST; > > + int i, j; > > + > > + int x = (int)get_global_id(0); > > + int y = (int)get_global_id(1); > > + > > + for(i=0; i<X_COUNT; i++) { > > + coord.x = x + i * x_sz; > > + for(j=0; j<Y_COUNT; j++) { > > + coord.y = y + j * y_sz; > > + sum = sum + read_imagef(src0, sampler, coord) + read_imagef(src1, > sampler, coord); > > + } > > + } > > + dst[y * x_sz + x] = sum; > > +} > > -- > > 1.8.3.2 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
