Ping for review. Thanks -----Original Message----- From: Song, Ruiling Sent: Friday, November 29, 2013 4:04 PM To: [email protected] Cc: Song, Ruiling Subject: [PATCH] GBE: improve asin/acos precision
Signed-off-by: Ruiling Song <[email protected]> --- backend/src/ocl_stdlib.tmpl.h | 77 +++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 555c63c..09a6909 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1362,51 +1362,68 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_tanh(float x) { return (1 - y) / (1 + y); } -typedef union -{ - float value; - int word; -} ieee_float_shape_type; - -#ifndef GET_FLOAT_WORD -#define GET_FLOAT_WORD(i,d) \ -do { \ - ieee_float_shape_type gf_u; \ - gf_u.value = (d); \ - (i) = gf_u.word; \ -} while (0) -#endif +INLINE float __gen_ocl_asin_util(float x) { +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + float + pS0 = 1.66666666666666657415e-01, + pS1 = -3.25565818622400915405e-01, + pS2 = 2.01212532134862925881e-01, + pS3 = -4.00555345006794114027e-02, + pS4 = 7.91534994289814532176e-04, + pS5 = 3.47933107596021167570e-05, + qS1 = -2.40339491173441421878e+00, + qS2 = 2.02094576023350569471e+00, + qS3 = -6.88283971605453293030e-01, + qS4 = 7.70381505559019352791e-02; + + float t = x*x; + float p = t*(pS0+t*(pS1+t*(pS2+t*(pS3+t*(pS4+t*pS5))))); + float q = 1.0+t*(qS1+t*(qS2+t*(qS3+t*qS4))); + float w = p / q; + return x + x*w; +} INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) { - int hx, ix; - GET_FLOAT_WORD(hx,x); - ix = hx&0x7fffffff; + uint ix; + union { uint i; float f; } u; + u.f = x; + ix = u.i & 0x7fffffff; if(ix == 0x3f800000) { return x * M_PI_2_F; /* asin(|1|)=+-pi/2 with inexact */ } if(ix > 0x3f800000) { /* |x|>= 1 */ - return (x-x) / (x-x); /* asin(|x|>1) is NaN */ + return NAN; /* asin(|x|>1) is NaN */ } + if(ix < 0x32000000) { /* if |x| < 2**-27 */ if(HUGE_VALF + x > FLT_ONE) return x; /* return x with inexact if x!=0*/ } - /* 1 > |x| >= 2**-27 */ - float sum = x, c = x, m = 1.0; - int n = 1; - do - { - c *= (2 * n - 1) * x * x; - m *= (2 * n); - sum += ( c / m / (2 * n + 1)); - n++; - }while( n < 30); - return sum; + + if(x < -0.5) { + return 2 * __gen_ocl_asin_util(native_sqrt((1+x) / 2)) - M_PI_2_F; + } else if(x > 0.5) { + return M_PI_2_F - 2 * __gen_ocl_asin_util(native_sqrt((1-x) / 2)); + } else { + return __gen_ocl_asin_util(x); + } } INLINE_OVERLOADABLE float __gen_ocl_internal_asinpi(float x) { return __gen_ocl_internal_asin(x) / M_PI_F; } INLINE_OVERLOADABLE float __gen_ocl_internal_acos(float x) { - return M_PI_2_F - __gen_ocl_internal_asin(x); + if(x > 0.5) + return 2 * __gen_ocl_asin_util(native_sqrt((1-x)/2)); + else + return M_PI_2_F - __gen_ocl_internal_asin(x); } INLINE_OVERLOADABLE float __gen_ocl_internal_acospi(float x) { return __gen_ocl_internal_acos(x) / M_PI_F; -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
