LGTM, pushed, thanks.
On Fri, Feb 21, 2014 at 05:51:33AM +0800, Guo Yejun wrote: > > Signed-off-by: Guo Yejun <[email protected]> > --- > backend/src/backend/program.cpp | 27 ++++++- > backend/src/builtin_vector_proto.def | 39 ++++++++- > backend/src/ocl_stdlib.tmpl.h | 147 > +++++++++++++++++++++++++++++++++- > 3 files changed, 207 insertions(+), 6 deletions(-) > > diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp > index 98fcded..c2ac83d 100644 > --- a/backend/src/backend/program.cpp > +++ b/backend/src/backend/program.cpp > @@ -461,8 +461,33 @@ namespace gbe { > > #define REDEF_MATH_FUNC(x) "#ifdef "#x"\n#undef "#x"\n#endif\n#define "#x" > __gen_ocl_internal_fastpath_"#x"\n" > std::string ocl_mathfunc_fastpath_str = > - REDEF_MATH_FUNC(sin) > + REDEF_MATH_FUNC(acosh) > + REDEF_MATH_FUNC(asinh) > + REDEF_MATH_FUNC(atanh) > + REDEF_MATH_FUNC(cbrt) > REDEF_MATH_FUNC(cos) > + REDEF_MATH_FUNC(cosh) > + REDEF_MATH_FUNC(cospi) > + REDEF_MATH_FUNC(exp) > + REDEF_MATH_FUNC(exp10) > + REDEF_MATH_FUNC(expm1) > + REDEF_MATH_FUNC(fmod) > + REDEF_MATH_FUNC(hypot) > + REDEF_MATH_FUNC(ilogb) > + REDEF_MATH_FUNC(ldexp) > + REDEF_MATH_FUNC(log) > + REDEF_MATH_FUNC(log2) > + REDEF_MATH_FUNC(log10) > + REDEF_MATH_FUNC(log1p) > + REDEF_MATH_FUNC(logb) > + REDEF_MATH_FUNC(remainder) > + REDEF_MATH_FUNC(rootn) > + REDEF_MATH_FUNC(sin) > + REDEF_MATH_FUNC(sincos) > + REDEF_MATH_FUNC(sinh) > + REDEF_MATH_FUNC(sinpi) > + REDEF_MATH_FUNC(tan) > + REDEF_MATH_FUNC(tanh) > "\n" > ; > > diff --git a/backend/src/builtin_vector_proto.def > b/backend/src/builtin_vector_proto.def > index 7bc7c48..103e661 100644 > --- a/backend/src/builtin_vector_proto.def > +++ b/backend/src/builtin_vector_proto.def > @@ -130,8 +130,43 @@ gentype tgamma (gentype) > gentype trunc (gentype) > > ##math function fast path > -gentype __gen_ocl_internal_fastpath_sin (gentype) > -gentype __gen_ocl_internal_fastpath_cos (gentype) > +gentype __gen_ocl_internal_fastpath_acosh (gentype x) > +gentype __gen_ocl_internal_fastpath_asinh (gentype x) > +gentype __gen_ocl_internal_fastpath_atanh (gentype x) > +gentype __gen_ocl_internal_fastpath_cbrt (gentype x) > +gentype __gen_ocl_internal_fastpath_cos (gentype x) > +gentype __gen_ocl_internal_fastpath_cosh (gentype x) > +gentype __gen_ocl_internal_fastpath_cospi (gentype x) > +gentype __gen_ocl_internal_fastpath_exp (gentype x) > +gentype __gen_ocl_internal_fastpath_exp10 (gentype x) > +gentype __gen_ocl_internal_fastpath_expm1 (gentype x) > +gentype __gen_ocl_internal_fastpath_fmod (gentype x, gentype y) > +gentype __gen_ocl_internal_fastpath_hypot (gentype x, gentype y) > +intn __gen_ocl_internal_fastpath_ilogb (floatn x) > +int __gen_ocl_internal_fastpath_ilogb (float x) > +intn __gen_ocl_internal_fastpath_ilogb (doublen x) > +int __gen_ocl_internal_fastpath_ilogb (double x) > +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, intn k) > +floatn __gen_ocl_internal_fastpath_ldexp (floatn x, int k) > +float __gen_ocl_internal_fastpath_ldexp (float x, int k) > +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, intn k) > +doublen __gen_ocl_internal_fastpath_ldexp (doublen x, int k) > +double __gen_ocl_internal_fastpath_ldexp (double x, int k) > +gentype __gen_ocl_internal_fastpath_log (gentype x) > +gentype __gen_ocl_internal_fastpath_log2 (gentype x) > +gentype __gen_ocl_internal_fastpath_log10 (gentype x) > +gentype __gen_ocl_internal_fastpath_log1p (gentype x) > +gentype __gen_ocl_internal_fastpath_logb (gentype x) > +gentype __gen_ocl_internal_fastpath_remainder (gentype x, gentype y) > +gentype __gen_ocl_internal_fastpath_rootn (gentype x, int n) > +gentype __gen_ocl_internal_fastpath_sin (gentype x) > +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __global gentype > *cosval) > +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __local gentype > *cosval) > +gentype __gen_ocl_internal_fastpath_sincos (gentype x, __private gentype > *cosval) > +gentype __gen_ocl_internal_fastpath_sinh (gentype x) > +gentype __gen_ocl_internal_fastpath_sinpi (gentype x) > +gentype __gen_ocl_internal_fastpath_tan (gentype x) > +gentype __gen_ocl_internal_fastpath_tanh (gentype x) > > ##half_native_math > #gentype half_cos (gentype x) > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index cea4700..46aab79 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -4715,14 +4715,155 @@ INLINE_OVERLOADABLE size_t > get_image_array_size(image1d_array_t image) > { return __gen_ocl_get_image_array_size(image); } > #endif > > -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin(float x) { > - return native_sin(x); > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_acosh (float x) > +{ > + return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_asinh (float x) > +{ > + return native_log(x + native_sqrt(x * x + 1)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_atanh (float x) > +{ > + return 0.5f * native_sqrt((1 + x) / (1 - x)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cbrt (float x) > +{ > + return __gen_ocl_pow(x, 0.3333333333f); > } > > -INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos(float x) { > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cos (float x) > +{ > return native_cos(x); > } > > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cosh (float x) > +{ > + return (1 + native_exp(-2 * x)) / (2 * native_exp(-x)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_cospi (float x) > +{ > + return __gen_ocl_cos(x * M_PI_F); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp (float x) > +{ > + return native_exp(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_exp10 (float x) > +{ > + return native_exp10(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_expm1 (float x) > +{ > + return __gen_ocl_pow(M_E_F, x) - 1; > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_fmod (float x, float y) > +{ > + return x-y*__gen_ocl_rndz(x/y); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_hypot (float x, float > y) > +{ > + return __gen_ocl_sqrt(x*x + y*y); > +} > + > +INLINE_OVERLOADABLE int __gen_ocl_internal_fastpath_ilogb (float x) > +{ > + return __gen_ocl_rndd(native_log2(x)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_ldexp (float x, int n) > +{ > + return __gen_ocl_pow(2, n) * x; > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log (float x) > +{ > + return native_log(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log2 (float x) > +{ > + return native_log2(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log10 (float x) > +{ > + return native_log10(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_log1p (float x) > +{ > + return native_log(x + 1); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_logb (float x) > +{ > + return __gen_ocl_rndd(native_log2(x)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_remainder (float x, > float y) > +{ > + return x-y*__gen_ocl_rnde(x/y); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_rootn(float x, int n) > +{ > + return __gen_ocl_pow(x, 1.f / n); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sin (float x) > +{ > + return native_sin(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, > __global float *cosval) > +{ > + *cosval = native_cos(x); > + return native_sin(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, > __local float *cosval) > +{ > + *cosval = native_cos(x); > + return native_sin(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sincos (float x, > __private float *cosval) > +{ > + *cosval = native_cos(x); > + return native_sin(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinh (float x) > +{ > + return (1 - native_exp(-2 * x)) / (2 * native_exp(-x)); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_sinpi (float x) > +{ > + return __gen_ocl_sin(x * M_PI_F); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tan (float x) > +{ > + return native_tan(x); > +} > + > +INLINE_OVERLOADABLE float __gen_ocl_internal_fastpath_tanh (float x) > +{ > + float y = native_exp(-2 * x); > + return (1 - y) / (1 + y); > +} > + > #pragma OPENCL EXTENSION cl_khr_fp64 : disable > > #undef DECL_IMAGE > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
