From: Luo Xionghu <[email protected]> translate native sqrt to llvm.sqrt for fast path. v2: deleted one more line of RSQ.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/libocl/tmpl/ocl_math.tmpl.cl | 2 +- backend/src/llvm/llvm_gen_backend.cpp | 2 -- backend/src/llvm/llvm_gen_ocl_function.hxx | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl index 8f726ff..fc0343c 100644 --- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl @@ -26,7 +26,7 @@ constant int __ocl_math_fastpath_flag = 1; PURE CONST float __gen_ocl_fabs(float x); CONST float __gen_ocl_sin(float x) __asm("llvm.sin" ".f32"); CONST float __gen_ocl_cos(float x) __asm("llvm.cos" ".f32"); -PURE CONST float __gen_ocl_sqrt(float x); +CONST float __gen_ocl_sqrt(float x) __asm("llvm.sqrt" ".f32"); PURE CONST float __gen_ocl_rsqrt(float x); PURE CONST float __gen_ocl_log(float x); PURE CONST float __gen_ocl_exp(float x); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 34c571e..0f4c813 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2854,7 +2854,6 @@ error: case GEN_OCL_FBH: case GEN_OCL_FBL: case GEN_OCL_CBIT: - case GEN_OCL_SQR: case GEN_OCL_RSQ: case GEN_OCL_LOG: case GEN_OCL_EXP: @@ -3344,7 +3343,6 @@ error: } case GEN_OCL_LOG: this->emitUnaryCallInst(I,CS,ir::OP_LOG); break; case GEN_OCL_EXP: this->emitUnaryCallInst(I,CS,ir::OP_EXP); break; - case GEN_OCL_SQR: this->emitUnaryCallInst(I,CS,ir::OP_SQR); break; case GEN_OCL_RSQ: this->emitUnaryCallInst(I,CS,ir::OP_RSQ); break; case GEN_OCL_RCP: this->emitUnaryCallInst(I,CS,ir::OP_RCP); break; case GEN_OCL_FABS: this->emitUnaryCallInst(I,CS,ir::OP_ABS); break; diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 0ae7ec2..ae14d9f 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -20,7 +20,6 @@ DECL_LLVM_GEN_FUNCTION(GET_WORK_DIM, __gen_ocl_get_work_dim) // Math function DECL_LLVM_GEN_FUNCTION(FABS, __gen_ocl_fabs) -DECL_LLVM_GEN_FUNCTION(SQR, __gen_ocl_sqrt) DECL_LLVM_GEN_FUNCTION(RSQ, __gen_ocl_rsqrt) DECL_LLVM_GEN_FUNCTION(LOG, __gen_ocl_log) DECL_LLVM_GEN_FUNCTION(EXP, __gen_ocl_exp) -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
