cp from fdlibm and pass the cft after refined

Signed-off-by: rander <[email protected]>
---
 backend/src/libocl/tmpl/ocl_math_common.tmpl.cl | 75 +++++++++++++++++++++++++
 backend/src/libocl/tmpl/ocl_math_common.tmpl.h  |  1 +
 2 files changed, 76 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index e655955..d6ee279 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -236,6 +236,81 @@ OVERLOADABLE double asinh(double x)
        if(hx>0) return w; else return -w;
 }
 
+OVERLOADABLE double atan(double x)
+{
+       double atanhi[] = {
+         4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */
+         7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */
+         9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */
+         1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */
+       };
+
+       double atanlo[] = {
+         2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */
+         3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */
+         1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */
+         6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */
+       };
+
+       double aT[] = {
+         3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */
+        -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */
+         1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */
+        -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */
+         9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */
+        -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */
+         6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */
+        -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */
+         4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */
+        -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */
+         1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */
+       };
+
+       double one   = 1.0,
+       huge   = 1.0e300;
+       double w,s1,s2,z;
+       int ix,hx,id;
+
+       hx = __HI(x);
+       ix = hx&0x7fffffff;
+       if(ix>=0x44100000) {    /* if |x| >= 2^66 */
+               if(ix>0x7ff00000 ||(ix==0x7ff00000 && (__LO(x)!=0)))
+               return x+x;             /* NaN */
+               if(hx>0) return  atanhi[3]+atanlo[3];
+               else     return -atanhi[3]-atanlo[3];
+       } if (ix < 0x3fdc0000) {        /* |x| < 0.4375 */
+               if (ix < 0x3e200000) {  /* |x| < 2^-29 */
+               if(huge+x>one) return x;        /* raise inexact */
+               }
+               id = -1;
+       } else {
+       x = fabs(x);
+       if (ix < 0x3ff30000) {          /* |x| < 1.1875 */
+               if (ix < 0x3fe60000) {  /* 7/16 <=|x|<11/16 */
+               id = 0; x = (2.0*x-one)/(2.0+x);
+               } else {                        /* 11/16<=|x|< 19/16 */
+               id = 1; x  = (x-one)/(x+one);
+               }
+       } else {
+               if (ix < 0x40038000) {  /* |x| < 2.4375 */
+               id = 2; x  = (x-1.5)/(one+1.5*x);
+               } else {                        /* 2.4375 <= |x| < 2^66 */
+               id = 3; x  = -1.0/x;
+               }
+       }}
+       /* end of argument reduction */
+       z = x*x;
+       w = z*z;
+       /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */
+       s1 = z*(aT[0]+w*(aT[2]+w*(aT[4]+w*(aT[6]+w*(aT[8]+w*aT[10])))));
+       s2 = w*(aT[1]+w*(aT[3]+w*(aT[5]+w*(aT[7]+w*aT[9]))));
+       if (id<0) return x - x*(s1+s2);
+       else {
+               z = atanhi[id] - ((x*(s1+s2) - atanlo[id]) - x);
+               return (hx<0)? -z:z;
+       }
+}
+
 OVERLOADABLE double ceil(double x)
 {
     double ret;
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
index 56c9787..46dc788 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.h
@@ -26,6 +26,7 @@ OVERLOADABLE double acosh(double x);
 OVERLOADABLE double asin(double x);
 OVERLOADABLE double asinpi(double x);
 OVERLOADABLE double asinh(double x);
+OVERLOADABLE double atan(double x);
 OVERLOADABLE double ceil(double x);
 OVERLOADABLE double copysign(double x, double y);
 OVERLOADABLE double fabs(double x);
-- 
2.7.4

_______________________________________________
Beignet mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to