Pushed, thanks.
On Wed, Sep 11, 2013 at 06:08:54AM +0000, Yang, Rong R wrote: > LGTM, test ok, thanks. > > -----Original Message----- > From: [email protected] > [mailto:[email protected]] On > Behalf Of Homer Hsing > Sent: Thursday, August 29, 2013 1:41 PM > To: [email protected] > Subject: [Beignet] [PATCH] add built-in function "atan2" > > also improve the accuracy of built-in function "atan" > also add a test case > > Signed-off-by: Homer Hsing <[email protected]> > --- > backend/src/builtin_vector_proto.def | 3 +-- > backend/src/ocl_stdlib.tmpl.h | 29 +++++++++++++++++++++++- > kernels/builtin_atan2.cl | 4 ++++ > utests/CMakeLists.txt | 1 + > utests/builtin_atan2.cpp | 43 > ++++++++++++++++++++++++++++++++++++ > 5 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 > kernels/builtin_atan2.cl create mode 100644 utests/builtin_atan2.cpp > > diff --git a/backend/src/builtin_vector_proto.def > b/backend/src/builtin_vector_proto.def > index 2a3daf2..b24e7ea 100644 > --- a/backend/src/builtin_vector_proto.def > +++ b/backend/src/builtin_vector_proto.def > @@ -6,8 +6,7 @@ gentype asin (gentype) > gentype asinh (gentype) > gentype asinpi (gentype x) > gentype atan (gentype y_over_x) > -# XXX atan2 is a builtin function > -#gentype atan2 (gentype y, gentype x) > +gentype atan2 (gentype y, gentype x) > gentype atanh (gentype) > gentype atanpi (gentype x) > #gentype atan2pi (gentype y, gentype x) diff --git > a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index > ac1999d..7f1032f 100644 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -1311,7 +1311,13 @@ INLINE_OVERLOADABLE float > __gen_ocl_internal_atan(float x) { > x = 1 / x; > c = -1; > } > - return a + c * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / 5 - > __gen_ocl_pow(x, 7) / 7 + __gen_ocl_pow(x, 9) / 9 - __gen_ocl_pow(x, 11) / > 11); > + a += c*x; > + int i; > + int sign; > + for(i=3, sign=-1; i<63; i+=2, sign=-sign) { > + a += c*sign*__gen_ocl_pow(x,i)/i; > + } > + return a; > } > INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { > return __gen_ocl_internal_atan(x) / M_PI_F; @@ -1338,6 +1344,26 @@ > INLINE_OVERLOADABLE float __gen_ocl_internal_erfc(float x) { // XXX > work-around PTX profile #define sqrt native_sqrt INLINE_OVERLOADABLE float > rsqrt(float x) { return native_rsqrt(x); } > +INLINE_OVERLOADABLE float __gen_ocl_internal_atan2(float y, float x) { > + uint hx = *(uint *)(&x), ix = hx & 0x7FFFFFFF; > + uint hy = *(uint *)(&y), iy = hy & 0x7FFFFFFF; > + if (ix > 0x7F800000 || iy > 0x7F800000) > + return nan(0u); > + if (ix == 0) { > + if (y > 0) > + return M_PI_2_F; > + if (y < 0) > + return - M_PI_2_F; > + return nan(0u); > + } else { > + float z = __gen_ocl_internal_atan(y / x); > + if (x > 0) > + return z; > + if (y >= 0) > + return M_PI_F + z; > + return - M_PI_F + z; > + } > +} > INLINE_OVERLOADABLE float __gen_ocl_internal_fabs(float x) { return > __gen_ocl_fabs(x); } INLINE_OVERLOADABLE float > __gen_ocl_internal_trunc(float x) { return __gen_ocl_rndz(x); } > INLINE_OVERLOADABLE float __gen_ocl_internal_round(float x) { return > __gen_ocl_rnde(x); } @@ -1370,6 +1396,7 @@ INLINE_OVERLOADABLE float > __gen_ocl_internal_rint(float x) { #define tanpi __gen_ocl_internal_tanpi > #define tanh __gen_ocl_internal_tanh #define atan __gen_ocl_internal_atan > +#define atan2 __gen_ocl_internal_atan2 > #define atanpi __gen_ocl_internal_atanpi #define atanh > __gen_ocl_internal_atanh #define pow powr diff --git > a/kernels/builtin_atan2.cl b/kernels/builtin_atan2.cl new file mode 100644 > index 0000000..aba73be > --- /dev/null > +++ b/kernels/builtin_atan2.cl > @@ -0,0 +1,4 @@ > +kernel void builtin_atan2(global float *y, global float *x, global > +float *dst) { > + int i = get_global_id(0); > + dst[i] = atan2(y[i], x[i]); > +}; > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index > 08b4e32..835524d 100644 > --- a/utests/CMakeLists.txt > +++ b/utests/CMakeLists.txt > @@ -104,6 +104,7 @@ set (utests_sources > compiler_vector_load_store.cpp > compiler_cl_finish.cpp > get_cl_info.cpp > + builtin_atan2.cpp > builtin_bitselect.cpp > builtin_frexp.cpp > builtin_mad_sat.cpp > diff --git a/utests/builtin_atan2.cpp b/utests/builtin_atan2.cpp new file > mode 100644 index 0000000..29dd7b4 > --- /dev/null > +++ b/utests/builtin_atan2.cpp > @@ -0,0 +1,43 @@ > +#include <cmath> > +#include "utest_helper.hpp" > + > +void builtin_atan2(void) { > + const int n = 1024; > + float y[n], x[n]; > + > + // Setup kernel and buffers > + OCL_CREATE_KERNEL("builtin_atan2"); > + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); > + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); > + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); > + globals[0] = n; > + locals[0] = 16; > + > + OCL_MAP_BUFFER(0); > + OCL_MAP_BUFFER(1); > + for (int i = 0; i < n; ++i) { > + y[i] = ((float*) buf_data[0])[i] = (rand()&255) * 0.01f; > + x[i] = ((float*) buf_data[1])[i] = (rand()&255) * 0.01f; > + } > + OCL_UNMAP_BUFFER(0); > + OCL_UNMAP_BUFFER(1); > + > + OCL_NDRANGE(1); > + > + OCL_MAP_BUFFER(2); > + float *dst = (float*) buf_data[2]; > + for (int i = 0; i < n; ++i) { > + float cpu = atan2f(y[i], x[i]); > + float gpu = dst[i]; > + if (fabsf(cpu - gpu) >= 1e-2) { > + printf("%f %f %f %f\n", y[i], x[i], cpu, gpu); > + OCL_ASSERT(0); > + } > + } > + OCL_UNMAP_BUFFER(2); > +} > + > +MAKE_UTEST_FROM_FUNCTION (builtin_atan2); > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
