From: Lv Meng <[email protected]>

Signed-off-by: Lv Meng <[email protected]>
---
 backend/src/ocl_stdlib.tmpl.h |  104 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 103 insertions(+), 1 deletion(-)
 mode change 100644 => 100755 backend/src/ocl_stdlib.tmpl.h

diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
old mode 100644
new mode 100755
index e5f356e..850acd3
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -1851,13 +1851,115 @@ INLINE_OVERLOADABLE float 
__gen_ocl_internal_round(float x) {
 }
 INLINE_OVERLOADABLE float __gen_ocl_internal_floor(float x) { return 
__gen_ocl_rndd(x); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_ceil(float x)  { return 
__gen_ocl_rndu(x); }
-INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x)   { return 
native_exp(x); }
 INLINE_OVERLOADABLE float powr(float x, float y) { return __gen_ocl_pow(x,y); }
 INLINE_OVERLOADABLE float fmod(float x, float y) { return 
x-y*__gen_ocl_rndz(x/y); }
 INLINE_OVERLOADABLE float remainder(float x, float y) { return 
x-y*__gen_ocl_rnde(x/y); }
 INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) {
   return __gen_ocl_rnde(x);
 }
+
+typedef union{  float value;  int word;} ieee_float_shape_type;
+
+/* Get a 32 bit int from a float.  */
+#ifndef GET_FLOAT_WORD
+# define GET_FLOAT_WORD(i,d)           \
+do {                                                           \
+  ieee_float_shape_type gf_u;          \
+  gf_u.value = (d);                                    \
+  (i) = gf_u.word;                                     \
+} while (0)
+#endif
+
+/* Set a float from a 32 bit int.  */
+#ifndef SET_FLOAT_WORD
+# define SET_FLOAT_WORD(d,i)       \
+do {                                                           \
+  ieee_float_shape_type sf_u;          \
+  sf_u.word = (i);                                     \
+  (d) = sf_u.value;                                    \
+} while (0)
+#endif                                                 
+
+__constant float       
+ln2HI[2] = { 6.9313812256e-01,         /* 0x3f317180 */             
+                               -6.9313812256e-01,},    /* 0xbf317180 */        
+ln2LO[2] = { 9.0580006145e-06,         /* 0x3717f7d1 */             
+                               -9.0580006145e-06,},    /* 0xb717f7d1 */        
+halF[2]        = {0.5,-0.5,},  
+Zero[] = {0.0, -0.0,},         
+bp[] = {1.0, 1.5,},    
+dp_h[] = { 0.0, 5.84960938e-01,}, /* 0x3f15c000 */     
+dp_l[] = { 0.0, 1.56322085e-06,}; /* 0x35d1cfdc */
+
+INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x)   { 
+       //return native_exp(x); 
+       
+  float o_threshold = 8.8721679688e+01,  /* 0x42b17180 */                      
        
+  u_threshold = -1.0397208405e+02,  /* 0xc2cff1b5 */                           
+  twom100 = 7.8886090522e-31,   /* 2**-100=0x0d800000 */                       
        
+  ivln2         =      1.4426950216e+00, /* 0x3fb8aa3b =1/ln2 */               
                
+  one = 1.0,           
+  huge = 1.0e+30,                      
+  P1 = 1.6666667163e-01, /* 0x3e2aaaab */                              
+  P2 = -2.7777778450e-03, /* 0xbb360b61 */                             
+  P3 = 6.6137559770e-05, /* 0x388ab355 */                              
+  P4 = -1.6533901999e-06, /* 0xb5ddea0e */                             
+  P5 = 4.1381369442e-08; /* 0x3331bb4c */                              
+  float y,hi=0.0,lo=0.0,c,t;                           
+  int k=0,xsb;                         
+  unsigned hx;                         
+                                               
+  GET_FLOAT_WORD(hx,x);                                
+  xsb = (hx>>31)&1;            /* sign bit of x */     
+  hx &= 0x7fffffff;            /* high word of |x| */          
+                                               
+  /* filter out non-finite argument */                                 
+  if(hx >= 0x42b17218) {                       /* if |x|>=88.721... */ 
+    if(hx>0x7f800000)                          
+      return x+x;                      /* NaN */
+    if(hx==0x7f800000)                                 
+      return (xsb==0)? x:0.0;  /* exp(+-inf)={inf,0} */        
+    if(x > o_threshold) return huge*huge; /* overflow */                       
        
+    if(x < u_threshold) return twom100*twom100; /* underflow */                
        
+  }                            
+                                               
+  /* argument reduction */                                     
+  if(hx > 0x3eb17218) {                /* if  |x| > 0.5 ln2 */         
+    if(hx < 0x3F851592) {      /* and |x| < 1.5 ln2 */                 
+      hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb;                 
+    } else {                           
+      k  = ivln2*x+halF[xsb];          
+      t  = k;          
+      hi = x - t*ln2HI[0];     /* t*ln2HI is exact here */     
+      lo = t*ln2LO[0];                 
+    }                          
+    x  = hi - lo;                              
+  }                            
+  else if(hx < 0x31800000)  { /* when |x|<2**-28 */                    
+    if(huge+x>one) return one+x;/* trigger inexact */                          
+  }                            
+  else k = 0;                  
+                                               
+  /* x is now in primary range */                              
+  t  = x*x;                            
+  c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))));                           
+  if(k==0)     
+    return one-((x*c)/(c-(float)2.0)-x);                       
+  else
+    y = one-((lo-(x*c)/((float)2.0-c))-hi);    
+  if(k >= -125) {                      
+    unsigned hy;                               
+    GET_FLOAT_WORD(hy,y);                              
+    SET_FLOAT_WORD(y,hy+(k<<23));      /* add k to y's exponent */             
+    return y;                          
+  } else {                             
+    unsigned hy;                               
+    GET_FLOAT_WORD(hy,y);                              
+    SET_FLOAT_WORD(y,hy+((k+100)<<23)); /* add k to y's exponent */            
+    return y*twom100;                          
+  }                            
+}
+
 // TODO use llvm intrinsics definitions
 #define cos native_cos
 #define cospi __gen_ocl_internal_cospi
-- 
1.7.10.4

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to