LGTM, pushed, thanks.
On Mon, Mar 24, 2014 at 04:27:31PM +0800, Yang Rong wrote: > Because GEN's select instruction with cmod .l and .ge will handle NaN case, so > use the compare and select instruction in gen ir for fmax and fmin, and will > be > optimized to one sel_cmp, need not check isnan. > > Signed-off-by: Yang Rong <[email protected]> > --- > backend/src/llvm/llvm_gen_backend.cpp | 18 ++++++++++++++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 2 ++ > backend/src/ocl_stdlib.tmpl.h | 10 ++++------ > 3 files changed, 24 insertions(+), 6 deletions(-) > > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 49fbc7b..c459f25 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2134,6 +2134,8 @@ namespace gbe > case GEN_OCL_UPSAMPLE_INT: > case GEN_OCL_UPSAMPLE_LONG: > case GEN_OCL_MAD: > + case GEN_OCL_FMAX: > + case GEN_OCL_FMIN: > case GEN_OCL_SADD_SAT_CHAR: > case GEN_OCL_SADD_SAT_SHORT: > case GEN_OCL_SADD_SAT_INT: > @@ -2622,6 +2624,22 @@ namespace gbe > ctx.MAD(getType(ctx, I.getType()), dst, src0, src1, src2); > break; > } > + case GEN_OCL_FMAX: > + case GEN_OCL_FMIN:{ > + GBE_ASSERT(AI != AE); const ir::Register src0 = > this->getRegister(*AI); ++AI; > + GBE_ASSERT(AI != AE); const ir::Register src1 = > this->getRegister(*AI); ++AI; > + const ir::Register dst = this->getRegister(&I); > + const ir::Register cmp = ctx.reg(ir::FAMILY_BOOL); > + //Becasue cmp's sources are same as sel's source, so cmp > instruction and sel > + //instruction will be merged to one sel_cmp instruction in the > gen selection > + //Add two intruction here for simple. > + if(it->second == GEN_OCL_FMAX) > + ctx.GE(getType(ctx, I.getType()), cmp, src0, src1); > + else > + ctx.LT(getType(ctx, I.getType()), cmp, src0, src1); > + ctx.SEL(getType(ctx, I.getType()), dst, cmp, src0, src1); > + break; > + } > case GEN_OCL_HADD: { > GBE_ASSERT(AI != AE); const ir::Register src0 = > this->getRegister(*AI); ++AI; > GBE_ASSERT(AI != AE); const ir::Register src1 = > this->getRegister(*AI); ++AI; > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 00d69f0..5bf794a 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -33,6 +33,8 @@ DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde) > DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu) > DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd) > DECL_LLVM_GEN_FUNCTION(MAD, __gen_ocl_mad) > +DECL_LLVM_GEN_FUNCTION(FMAX, __gen_ocl_fmax) > +DECL_LLVM_GEN_FUNCTION(FMIN, __gen_ocl_fmin) > > // Barrier function > DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local) > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index e3ac632..e823b5f 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -3169,6 +3169,8 @@ INLINE_OVERLOADABLE float > __gen_ocl_internal_exp10(float x){ > #define remainder __gen_ocl_internal_remainder > #define ldexp __gen_ocl_internal_ldexp > PURE CONST float __gen_ocl_mad(float a, float b, float c); > +PURE CONST float __gen_ocl_fmax(float a, float b); > +PURE CONST float __gen_ocl_fmin(float a, float b); > INLINE_OVERLOADABLE float mad(float a, float b, float c) { > return __gen_ocl_mad(a, b, c); > } > @@ -3224,14 +3226,10 @@ DECL_MIN_MAX_CLAMP(long) > DECL_MIN_MAX_CLAMP(ulong) > #undef DECL_MIN_MAX_CLAMP > INLINE_OVERLOADABLE float max(float a, float b) { > - if(isnan(b)) > - return a; > - return a > b ? a : b; > + return __gen_ocl_fmax(a, b); > } > INLINE_OVERLOADABLE float min(float a, float b) { > - if(isnan(b)) > - return a; > - return a < b ? a : b; > + return __gen_ocl_fmin(a, b); > } > INLINE_OVERLOADABLE float clamp(float v, float l, float u) { > return max(min(v, u), l); > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
