Also need to rebase to origin/master. The patch is too big. -----Original Message----- From: [email protected] [mailto:[email protected]] On Behalf Of [email protected] Sent: Thursday, December 19, 2013 2:17 AM To: [email protected] Cc: Lv, Meng Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of expm1, acosh, asinh, sinh
From: Lv Meng <[email protected]> Signed-off-by: Lv Meng <[email protected]> --- backend/src/ocl_stdlib.tmpl.h | 198 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 188 insertions(+), 10 deletions(-) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index e380b79..5e06919 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -1623,7 +1623,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) { INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); } INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); } INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } -INLINE_OVERLOADABLE float __gen_ocl_internal_expm1(float x) { return __gen_ocl_pow(M_E_F, x) - 1; } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) { return __gen_ocl_pow(x, 0.3333333333f); } @@ -1635,9 +1634,6 @@ INLINE_OVERLOADABLE float sincos(float x, local float *cosval) { BODY; } INLINE_OVERLOADABLE float sincos(float x, private float *cosval) { BODY; } #undef BODY -INLINE_OVERLOADABLE float __gen_ocl_internal_sinh(float x) { - return (1 - native_exp(-2 * x)) / (2 * native_exp(-x)); -} INLINE_OVERLOADABLE float __gen_ocl_internal_cosh(float x) { return (1 + native_exp(-2 * x)) / (2 * native_exp(-x)); } @@ -1735,12 +1731,6 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_atan(float x) { INLINE_OVERLOADABLE float __gen_ocl_internal_atanpi(float x) { return __gen_ocl_internal_atan(x) / M_PI_F; } -INLINE_OVERLOADABLE float __gen_ocl_internal_asinh(float x) { - return native_log(x + native_sqrt(x * x + 1)); -} -INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) { - return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1)); -} INLINE_OVERLOADABLE float __gen_ocl_internal_atanh(float x) { return 0.5f * native_sqrt((1 + x) / (1 - x)); } @@ -2025,6 +2015,194 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_fmod (float x, float y) { } return x; /* exact output */ } +INLINE_OVERLOADABLE float __gen_ocl_internal_expm1(float x) { + //return __gen_ocl_pow(M_E_F, x) - 1; + float Q1 = -3.3333335072e-02, /* 0xbd088889 */ + ln2_hi = 6.9313812256e-01, /* 0x3f317180 */ + ln2_lo = 9.0580006145e-06, /* 0x3717f7d1 */ + Q2 = 1.5873016091e-03, /* 0x3ad00d01 */ + Q3 = -7.9365076090e-05, /* 0xb8a670cd */ + Q4 = 4.0082177293e-06, /* 0x36867e54 */ + Q5 = -2.0109921195e-07, /* 0xb457edbb */ + huge = 1.0e30, + tiny = 1.0e-30, + ivln2 = 1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */ + one = 1.0, + o_threshold= 8.8721679688e+01; /* 0x42b17180 */ + float y,hi,lo,c,t,e,hxs,hfx,r1; + int k,xsb; + int hx; + GET_FLOAT_WORD(hx,x); + xsb = hx&0x80000000; + /* sign bit of x */ + //if(xsb==0) + //y=x; + //else + //y= -x; /* y = |x| */ + y = __gen_ocl_internal_fabs(x); + hx &= 0x7fffffff; /* high word of |x| */ + /* filter out huge and non-finite argument */ + if(hx >= 0x4195b844) { /* if |x|>=27*ln2 */ + if(hx >= 0x42b17218) { /* if |x|>=88.721... */ + if(hx>0x7f800000) + return x+x; /* NaN */ + if(hx==0x7f800000) + return (xsb==0)? x:-1.0;/* exp(+-inf)={inf,-1} */ + if(x > o_threshold) + return huge*huge; /* overflow */ + } + if(xsb!=0) { /* x < -27*ln2, return -1.0 with inexact */ + if(x+tiny<(float)0.0) /* raise inexact */ + return tiny-one; /* return -1 */ + } + } + /* argument reduction */ + if(hx > 0x3eb17218) {/* if |x| > 0.5 ln2 */ + if(hx < 0x3F851592) {/* and |x| < 1.5 ln2 */ + if(xsb==0){ + hi = x - ln2_hi; lo = ln2_lo; k = 1; + } else { + hi = x + ln2_hi; lo = -ln2_lo; k = -1; + } + } else { + k = ivln2*x+((xsb==0)?(float)0.5:(float)-0.5); + t = k; + hi = x - t*ln2_hi;/* t*ln2_hi is exact here */ + lo = t*ln2_lo; + } + x = hi - lo; + c = (hi-x)-lo; + } else if(hx < 0x33000000) { /* when |x|<2**-25, return x */ + //t = huge+x; /* return x with inexact flags when x!=0 */ + //return x - (t-(huge+x)); + return x; + } else k = 0; + /* x is now in primary range */ + hfx = (float)0.5*x; + hxs = x*hfx; + r1 = one+hxs*(Q1+hxs*(Q2+hxs*(Q3+hxs*(Q4+hxs*Q5)))); + t = (float)3.0-r1*hfx; + e = hxs*((r1-t)/((float)6.0 - x*t)); + if(k==0) + return x - (x*e-hxs); /* c is 0 */ + else{ + e = (x*(e-c)-c); + e -= hxs; + if(k== -1)return (float)0.5*(x-e)-(float)0.5; + if(k==1){ + if(x < (float)-0.25) + return -(float)2.0*(e-(x+(float)0.5)); + else + return (one+(float)2.0*(x-e)); + } + if (k <= -2 || k>56) { /* suffice to return exp(x)-1 */ + int i; + y = one-(e-x); + GET_FLOAT_WORD(i,y); + SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ + return y-one; + } + t = one; + if(k<23) { + int i; + SET_FLOAT_WORD(t,0x3f800000 - (0x1000000>>k)); /* t=1-2^-k */ + y = t-(e-x); + GET_FLOAT_WORD(i,y); + SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ + } else { + int i; + SET_FLOAT_WORD(t,((0x7f-k)<<23)); /* 2^-k */ + y = x-(e+t); + y += one; + GET_FLOAT_WORD(i,y); + SET_FLOAT_WORD(y,i+(k<<23)); /* add k to y's exponent */ + } + } + return y; +} + +INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) { + //return native_log(x + native_sqrt(x + 1) * native_sqrt(x - 1)); + float one = 1.0, + ln2 = 6.9314718246e-01;/* 0x3f317218 */ + float t; + int hx; + GET_FLOAT_WORD(hx,x); + if(hx<0x3f800000) { /* x < 1 */ + return (x-x)/(x-x); + } else if(hx >=0x4d800000) { /* x > 2**28 */ + if(hx >=0x7f800000) {/* x is inf of NaN */ + return x+x; + } else + return __gen_ocl_internal_log(x)+ln2;/* acosh(huge)=log(2x) */ + } else if (hx==0x3f800000) { + return 0.0; /* acosh(1) = 0 */ + } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ + t=x*x; + return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one))); + } else { /* 1<x<2 */ + t = x-one; + return log1p(t+__gen_ocl_sqrt((float)2.0*t+t*t)); + } +} +INLINE_OVERLOADABLE float __gen_ocl_internal_asinh(float x){ + //return native_log(x + native_sqrt(x * x + 1)); + float one = 1.0000000000e+00, /* 0x3F800000 */ + ln2 = 6.9314718246e-01, /* 0x3f317218 */ + huge= 1.0000000000e+30; + float w; + int hx,ix; + GET_FLOAT_WORD(hx,x); + ix = hx&0x7fffffff; + if(ix< 0x38000000) { /* |x|<2**-14 */ + if(huge+x>one) return x; /* return x inexact except 0 */ + } + if(ix>0x47000000) {/* |x| > 2**14 */ + if(ix>=0x7f800000) return x+x;/* x is inf or NaN */ + w = __gen_ocl_internal_log(__gen_ocl_internal_fabs(x))+ln2; + } else { + float xa = __gen_ocl_internal_fabs(x); + if (ix>0x40000000) {/* 2**14 > |x| > 2.0 */ + w = __gen_ocl_internal_log(2.0f*xa+one/(__gen_ocl_sqrt(xa*xa+one)+xa)); + } else { /* 2.0 > |x| > 2**-14 */ + float t = xa*xa; + w =log1p(xa+t/(one+__gen_ocl_sqrt(one+t))); + } + } + return __gen_ocl_internal_copysign(w, x); } INLINE_OVERLOADABLE float +__gen_ocl_internal_sinh(float x){ + //return (1 - native_exp(-2 * x)) / (2 * native_exp(-x)); + float one = 1.0, + shuge = 1.0e37; + float t,w,h; + int ix,jx; + GET_FLOAT_WORD(jx,x); + ix = jx&0x7fffffff; + /* x is INF or NaN */ + if(ix>=0x7f800000) return x+x; + h = 0.5; + if (jx<0) h = -h; + /* |x| in [0,22], return sign(x)*0.5*(E+E/(E+1))) */ + if (ix < 0x41b00000) { /* |x|<22 */ + if (ix<0x31800000) /* |x|<2**-28 */ + if(shuge+x>one) return x;/* sinh(tiny) = tiny with inexact */ + t = __gen_ocl_internal_expm1(__gen_ocl_internal_fabs(x)); + if(ix<0x3f800000) return h*((float)2.0*t-t*t/(t+one)); + return h*(t+t/(t+one)); + } + /* |x| in [22, log(maxdouble)] return 0.5*exp(|x|) */ + if (ix < 0x42b17180) return +h*__gen_ocl_internal_exp(__gen_ocl_internal_fabs(x)); + /* |x| in [log(maxdouble), overflowthresold] */ + if (ix<=0x42b2d4fc) { + w = __gen_ocl_internal_exp((float)0.5*__gen_ocl_internal_fabs(x)); + t = h*w; + return t*w; + } + /* |x| > overflowthresold, sinh(x) overflow */ + return x*shuge; +} + // TODO use llvm intrinsics definitions #define cos native_cos #define cospi __gen_ocl_internal_cospi -- 1.7.10.4 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
