for the input data with enough precision, use the native exp instruction, otherwise, use the software path to emulate the exp function.
Signed-off-by: Guo Yejun <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 2 ++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + backend/src/ocl_stdlib.tmpl.h | 14 ++++++++++---- 6 files changed, 17 insertions(+), 4 deletions(-) mode change 100644 => 100755 backend/src/backend/gen_insn_selection.cpp mode change 100644 => 100755 backend/src/ir/instruction.hpp mode change 100644 => 100755 backend/src/ir/instruction.hxx mode change 100644 => 100755 backend/src/llvm/llvm_gen_backend.cpp mode change 100644 => 100755 backend/src/llvm/llvm_gen_ocl_function.hxx diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp old mode 100644 new mode 100755 index 445fd6d..fc9f305 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1572,6 +1572,7 @@ namespace gbe case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; + case ir::OP_EXP: sel.MATH(dst, GEN_MATH_FUNCTION_EXP, src); break; case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp old mode 100644 new mode 100755 index 46577c7..e80badb --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -576,6 +576,8 @@ namespace ir { Instruction ABS(Type type, Register dst, Register src); /*! log.type dst src */ Instruction LOG(Type type, Register dst, Register src); + /*! exp.type dst src */ + Instruction EXP(Type type, Register dst, Register src); /*! sqr.type dst src */ Instruction SQR(Type type, Register dst, Register src); /*! rsq.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx old mode 100644 new mode 100755 index baaaca2..bb5229a --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -29,6 +29,7 @@ DECL_INSN(MOV, UnaryInstruction) DECL_INSN(COS, UnaryInstruction) DECL_INSN(SIN, UnaryInstruction) DECL_INSN(LOG, UnaryInstruction) +DECL_INSN(EXP, UnaryInstruction) DECL_INSN(SQR, UnaryInstruction) DECL_INSN(RSQ, UnaryInstruction) DECL_INSN(RCP, UnaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp old mode 100644 new mode 100755 index f178585..5780239 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2035,6 +2035,7 @@ namespace gbe case GEN_OCL_SQR: case GEN_OCL_RSQ: case GEN_OCL_LOG: + case GEN_OCL_EXP: case GEN_OCL_POW: case GEN_OCL_RCP: case GEN_OCL_ABS: @@ -2313,6 +2314,7 @@ namespace gbe case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; + case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break; case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx old mode 100644 new mode 100755 index c9e634c..de2890c --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -25,6 +25,7 @@ DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin) DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) +DECL_LLVM_GEN_FUNCTION(EXP, __gen_ocl_exp) DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow) DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp) DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index bbd056f..5c6e2be 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -688,6 +688,7 @@ PURE CONST float __gen_ocl_cos(float x); PURE CONST float __gen_ocl_sqrt(float x); PURE CONST float __gen_ocl_rsqrt(float x); PURE CONST float __gen_ocl_log(float x); +PURE CONST float __gen_ocl_exp(float x); PURE CONST float __gen_ocl_pow(float x, float y); PURE CONST float __gen_ocl_rcp(float x); PURE CONST float __gen_ocl_rndz(float x); @@ -2247,7 +2248,7 @@ INLINE_OVERLOADABLE float native_tan(float x) { INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) { return native_tan(x * M_PI_F); } -INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); } +INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(x); } INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); } INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) { @@ -2619,7 +2620,12 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { } INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { - //return native_exp(x); + //use native instruction when it has enough precision + if (x > 128 || x < -128) + { + return native_exp(x); + } + float o_threshold = 8.8721679688e+01, /* 0x42b17180 */ u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */ twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */ @@ -2890,7 +2896,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) { return 0.0; /* acosh(1) = 0 */ } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ t=x*x; - return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one))); + return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one))); } else { /* 1<x<2 */ t = x-one; return log1p(t+__gen_ocl_sqrt((float)2.0*t+t*t)); @@ -3363,7 +3369,7 @@ INLINE_OVERLOADABLE float hypot(float x, float y) { cn = __gen_ocl_sqrt (an * an + bn * bn); return ldexp (cn, e); }else{ - if (isinf (x) || isinf (y)) /* x or y is infinite. Return +Infinity. */ + if (isinf (x) || isinf (y)) /* x or y is infinite. Return +Infinity. */ return INFINITY; else /* x or y is NaN. Return NaN. */ return x + y; -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
