Some comments: 1. Don't change the file mode from 644 to 755. 2. Don't touch the line of code that you do not really modify. Other part of the patch looks good to me. But you need to rebase it to latest master.
Thanks! Ruiling -----Original Message----- From: [email protected] [mailto:[email protected]] On Behalf Of Guo Yejun Sent: Friday, January 17, 2014 7:01 AM To: [email protected]; Guo, Yejun Subject: [Beignet] [PATCH] GBE: use native exp instruction when enough precision for the input data with enough precision, use the native exp instruction, otherwise, use the software path to emulate the exp function. Signed-off-by: Guo Yejun <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/llvm/llvm_gen_backend.cpp | 2 ++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + backend/src/ocl_stdlib.tmpl.h | 14 ++++++++++---- 6 files changed, 17 insertions(+), 4 deletions(-) mode change 100644 => 100755 backend/src/backend/gen_insn_selection.cpp mode change 100644 => 100755 backend/src/ir/instruction.hpp mode change 100644 => 100755 backend/src/ir/instruction.hxx mode change 100644 => 100755 backend/src/llvm/llvm_gen_backend.cpp mode change 100644 => 100755 backend/src/llvm/llvm_gen_ocl_function.hxx diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp old mode 100644 new mode 100755 index 445fd6d..fc9f305 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1572,6 +1572,7 @@ namespace gbe case ir::OP_COS: sel.MATH(dst, GEN_MATH_FUNCTION_COS, src); break; case ir::OP_SIN: sel.MATH(dst, GEN_MATH_FUNCTION_SIN, src); break; case ir::OP_LOG: sel.MATH(dst, GEN_MATH_FUNCTION_LOG, src); break; + case ir::OP_EXP: sel.MATH(dst, GEN_MATH_FUNCTION_EXP, src); + break; case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp old mode 100644 new mode 100755 index 46577c7..e80badb --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -576,6 +576,8 @@ namespace ir { Instruction ABS(Type type, Register dst, Register src); /*! log.type dst src */ Instruction LOG(Type type, Register dst, Register src); + /*! exp.type dst src */ + Instruction EXP(Type type, Register dst, Register src); /*! sqr.type dst src */ Instruction SQR(Type type, Register dst, Register src); /*! rsq.type dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx old mode 100644 new mode 100755 index baaaca2..bb5229a --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -29,6 +29,7 @@ DECL_INSN(MOV, UnaryInstruction) DECL_INSN(COS, UnaryInstruction) DECL_INSN(SIN, UnaryInstruction) DECL_INSN(LOG, UnaryInstruction) +DECL_INSN(EXP, UnaryInstruction) DECL_INSN(SQR, UnaryInstruction) DECL_INSN(RSQ, UnaryInstruction) DECL_INSN(RCP, UnaryInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp old mode 100644 new mode 100755 index f178585..5780239 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2035,6 +2035,7 @@ namespace gbe case GEN_OCL_SQR: case GEN_OCL_RSQ: case GEN_OCL_LOG: + case GEN_OCL_EXP: case GEN_OCL_POW: case GEN_OCL_RCP: case GEN_OCL_ABS: @@ -2313,6 +2314,7 @@ namespace gbe case GEN_OCL_COS: this->emitUnaryCallInst(I,CS,ir::OP_COS); break; case GEN_OCL_SIN: this->emitUnaryCallInst(I,CS,ir::OP_SIN); break; case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; + case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); + break; case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break; case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx old mode 100644 new mode 100755 index c9e634c..de2890c --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -25,6 +25,7 @@ DECL_LLVM_GEN_FUNCTION(SIN, __gen_ocl_sin) DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) +DECL_LLVM_GEN_FUNCTION(EXP, __gen_ocl_exp) DECL_LLVM_GEN_FUNCTION(POW, __gen_ocl_pow) DECL_LLVM_GEN_FUNCTION(RCP, __gen_ocl_rcp) DECL_LLVM_GEN_FUNCTION(RNDZ, __gen_ocl_rndz) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index bbd056f..5c6e2be 100755 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -688,6 +688,7 @@ PURE CONST float __gen_ocl_cos(float x); PURE CONST float __gen_ocl_sqrt(float x); PURE CONST float __gen_ocl_rsqrt(float x); PURE CONST float __gen_ocl_log(float x); +PURE CONST float __gen_ocl_exp(float x); PURE CONST float __gen_ocl_pow(float x, float y); PURE CONST float __gen_ocl_rcp(float x); PURE CONST float __gen_ocl_rndz(float x); @@ -2247,7 +2248,7 @@ INLINE_OVERLOADABLE float native_tan(float x) { INLINE_OVERLOADABLE float __gen_ocl_internal_tanpi(float x) { return native_tan(x * M_PI_F); } -INLINE_OVERLOADABLE float native_exp(float x) { return __gen_ocl_pow(M_E_F, x); } +INLINE_OVERLOADABLE float native_exp(float x) { return +__gen_ocl_exp(x); } INLINE_OVERLOADABLE float native_exp2(float x) { return __gen_ocl_pow(2, x); } INLINE_OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); } INLINE_OVERLOADABLE float __gen_ocl_internal_cbrt(float x) { @@ -2619,7 +2620,12 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_rint(float x) { } INLINE_OVERLOADABLE float __gen_ocl_internal_exp(float x) { - //return native_exp(x); + //use native instruction when it has enough precision if (x > 128 || + x < -128) { + return native_exp(x); + } + float o_threshold = 8.8721679688e+01, /* 0x42b17180 */ u_threshold = -1.0397208405e+02, /* 0xc2cff1b5 */ twom100 = 7.8886090522e-31, /* 2**-100=0x0d800000 */ @@ -2890,7 +2896,7 @@ INLINE_OVERLOADABLE float __gen_ocl_internal_acosh(float x) { return 0.0; /* acosh(1) = 0 */ } else if (hx > 0x40000000) { /* 2**28 > x > 2 */ t=x*x; - return __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one))); + return + __gen_ocl_internal_log((float)2.0*x-one/(x+__gen_ocl_sqrt(t-one))); } else { /* 1<x<2 */ t = x-one; return log1p(t+__gen_ocl_sqrt((float)2.0*t+t*t)); @@ -3363,7 +3369,7 @@ INLINE_OVERLOADABLE float hypot(float x, float y) { cn = __gen_ocl_sqrt (an * an + bn * bn); return ldexp (cn, e); }else{ - if (isinf (x) || isinf (y)) /* x or y is infinite. Return +Infinity. */ + if (isinf (x) || isinf (y)) /* x or y is infinite. Return + +Infinity. */ return INFINITY; else /* x or y is NaN. Return NaN. */ return x + y; -- 1.7.9.5 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
