Modified according to ruiling's comment and pushed. Thanks.
On Tue, Dec 31, 2013 at 06:04:48AM +0000, Song, Ruiling wrote: > > One comment. The patch Tested OK. > -----Original Message----- > From: [email protected] > [mailto:[email protected]] On Behalf Of Lv Meng > Sent: Monday, December 23, 2013 8:21 AM > To: [email protected] > Cc: Lv, Meng > Subject: [Beignet] [PATCH] [PATCH]GBE: improve precision of ldexp > > > Signed-off-by: Lv Meng <[email protected]> > --- > backend/src/ocl_stdlib.tmpl.h | 100 > +++++++++++++++++++++-------------------- > 1 file changed, 51 insertions(+), 49 deletions(-) > > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index 2345ecb..6ae7cd8 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -173,6 +173,12 @@ do { \ > } while (0) > #endif > > +int __ocl_finitef (float x){ > It is safe to declare it as "INLINE". > + unsigned ix; > + GEN_OCL_GET_FLOAT_WORD (ix, x); > + return (ix & 0x7fffffff) < 0x7f800000; } > + > #define HUGE_VALF (__ocl_inff()) > #define INFINITY (__ocl_inff()) > #define NAN (__ocl_nanf()) > @@ -1651,6 +1657,14 @@ INLINE_OVERLOADABLE float native_exp10(float x) { > return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float > __gen_ocl_internal_cbrt(float x) { > return __gen_ocl_pow(x, 0.3333333333f); } > +INLINE_OVERLOADABLE float __gen_ocl_internal_copysign(float x, float y) > +{ > + union { unsigned u; float f; } ux, uy; > + ux.f = x; > + uy.f = y; > + ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u); > + return ux.f; > +} > + > #define BODY \ > *cosval = native_cos(x); \ > return native_sin(x); > @@ -1688,6 +1702,37 @@ INLINE float __gen_ocl_asin_util(float x) { > float w = p / q; > return x + x*w; > } > +float __gen_ocl_scalbnf (float x, int n){ > + float two25 = 3.355443200e+07, /* 0x4c000000 */ > + twom25 = 2.9802322388e-08, /* 0x33000000 */ > + huge = 1.0e+30, > + tiny = 1.0e-30; > + int k,ix; > + GEN_OCL_GET_FLOAT_WORD(ix,x); > + k = (ix&0x7f800000)>>23; /* extract exponent */ > + if (k==0) { /* 0 or subnormal x */ > + if ((ix&0x7fffffff)==0) return x; /* +-0 */ > + x *= two25; > + GEN_OCL_GET_FLOAT_WORD(ix,x); > + k = ((ix&0x7f800000)>>23) - 25; > + } > + if (k==0xff) return x+x; /* NaN or Inf */ > + if (n< -50000) > + return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/ > + if (n> 50000 || k+n > 0xfe) > + return huge*__gen_ocl_internal_copysign(huge,x); /* overflow */ > + /* Now k and n are bounded we know that k = k+n does not overflow. */ > + k = k+n; > + if (k > 0) { /* normal result */ > + GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23)); > + return x; > + } > + if (k <= -25) > + return tiny*__gen_ocl_internal_copysign(tiny,x); /*underflow*/ > + k += 25; /* subnormal result */ > + GEN_OCL_SET_FLOAT_WORD(x,(ix&0x807fffff)|(k<<23)); > + return x*twom25; > +} > > INLINE_OVERLOADABLE float __gen_ocl_internal_asin(float x) { > uint ix; > @@ -1751,13 +1796,6 @@ INLINE_OVERLOADABLE float > __gen_ocl_internal_atanpi(float x) { INLINE_OVERLOADABLE float > __gen_ocl_internal_atanh(float x) { > return 0.5f * native_sqrt((1 + x) / (1 - x)); } -INLINE_OVERLOADABLE > float __gen_ocl_internal_copysign(float x, float y) { > - union { unsigned u; float f; } ux, uy; > - ux.f = x; > - uy.f = y; > - ux.u = (ux.u & 0x7fffffff) | (uy.u & 0x80000000u); > - return ux.f; > -} > INLINE_OVERLOADABLE float __gen_ocl_internal_erf(float x) { > return M_2_SQRTPI_F * (x - __gen_ocl_pow(x, 3) / 3 + __gen_ocl_pow(x, 5) / > 10 - __gen_ocl_pow(x, 7) / 42 + __gen_ocl_pow(x, 9) / 216); } @@ -2308,6 > +2346,11 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, > float p){ > return x; > } > > +INLINE_OVERLOADABLE float __gen_ocl_internal_ldexp(float x, int n) { > + if(!__ocl_finitef(x)||x==(float)0.0) return x; > + x = __gen_ocl_scalbnf(x,n); > + return x; > +} > > // TODO use llvm intrinsics definitions #define cos native_cos @@ -2338,6 > +2381,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_remainder(float x, > float p){ #define erfc __gen_ocl_internal_erfc #define fmod > __gen_ocl_internal_fmod #define remainder __gen_ocl_internal_remainder > +#define ldexp __gen_ocl_internal_ldexp > PURE CONST float __gen_ocl_mad(float a, float b, float c); > INLINE_OVERLOADABLE float mad(float a, float b, float c) { > return __gen_ocl_mad(a, b, c); > @@ -2551,48 +2595,6 @@ INLINE_OVERLOADABLE float remquo(float x, float y, > local int *quo) { BODY; } INLINE_OVERLOADABLE float remquo(float x, float y, > private int *quo) { BODY; } #undef BODY INLINE_OVERLOADABLE float > native_divide(float x, float y) { return x/y; } -INLINE_OVERLOADABLE float > ldexp(float x, int n) { > - union { float f; unsigned u; } u; > - u.f = x; > - unsigned s = u.u & 0x80000000u, v = u.u & 0x7fffffff, d = 0; > - if(v >= 0x7f800000) > - return x; > - if(v == 0) > - return x; > - int e = v >> 23; > - v &= 0x7fffff; > - if(e >= 1) > - v |= 0x800000; > - else { > - v <<= 1; > - while(v < 0x800000) { > - v <<= 1; > - e --; > - } > - } > - e = add_sat(e, n); > - if(e >= 255) { > - u.u = s | 0x7f800000; > - return u.f; > - } > - if(e > 0) { > - u.u = s | (e << 23) | (v & 0x7fffff); > - return u.f; > - } > - if(e <= -23) { > - u.u = s; > - return u.f; > - } > - while(e <= 0) { > - d = (d >> 1) | (v << 31); > - v >>= 1; > - e ++; > - } > - if(d > 0x80000000u) > - v ++; > - u.u = s | v; > - return u.f; > -} > INLINE_OVERLOADABLE float pown(float x, int n) { > if (x == 0 && n == 0) > return 1; > -- > 1.7.10.4 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
