I don't see need/benefit in mixing "iround" (ie, float -> int) with "round" (ie, float -> float).
If this is a one-off, then you should just call lp_build_intrinsic_unary(builder, "llvm.ppc.altivec.vctsxs", ...) If you really need an generic intrinsic helper for iround, then please add a new lp_build_iround_foo(..., enum lp_build_round_mode mode) which takes enum lp_build_round_mode LP_BUILD_ROUND_NEAREST -> iround LP_BUILD_ROUND_FLOOR -> ifloor LP_BUILD_ROUND_CEIL -> iceil LP_BUILD_ROUND_TRUNCATE -> itrunc Jose ----- Original Message ----- > From: Adhemerval Zanella <[email protected]> > > This adds another rounding mode to the enum, which happens otherwise to > match SSE4.1's rounding modes. This should be safe as long as the > IROUND case never hits the SSE4.1 path. > > Reviewed-by: Adam Jackson <[email protected]> > Signed-off-by: Adhemerval Zanella <[email protected]> > --- > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 > +++++++++++++++++++---------- > 1 file changed, 19 insertions(+), 10 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > index ec05026..021cd6e 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > @@ -1360,10 +1360,17 @@ lp_build_int_to_float(struct lp_build_context *bld, > static boolean > arch_rounding_available(const struct lp_type type) > { > + /* SSE4 vector rounding. */ > if ((util_cpu_caps.has_sse4_1 && > (type.length == 1 || type.width*type.length == 128)) || > (util_cpu_caps.has_avx && type.width*type.length == 256)) > return TRUE; > + /* SSE2 vector to word. */ > + else if ((util_cpu_caps.has_sse2 && > + ((type.width == 32) && (type.length == 1 || type.length == 4))) > || > + (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) > + return TRUE; > + /* Altivec rounding and vector to word. */ > else if ((util_cpu_caps.has_altivec && > (type.width == 32 && type.length == 4))) > return TRUE; > @@ -1376,7 +1383,8 @@ enum lp_build_round_mode > LP_BUILD_ROUND_NEAREST = 0, > LP_BUILD_ROUND_FLOOR = 1, > LP_BUILD_ROUND_CEIL = 2, > - LP_BUILD_ROUND_TRUNCATE = 3 > + LP_BUILD_ROUND_TRUNCATE = 3, > + LP_BUILD_IROUND = 4 > }; > > /** > @@ -1400,6 +1408,7 @@ lp_build_round_sse41(struct lp_build_context *bld, > > assert(lp_check_value(type, a)); > assert(util_cpu_caps.has_sse4_1); > + assert(mode != LP_BUILD_IROUND); > > if (type.length == 1) { > LLVMTypeRef vec_type; > @@ -1526,8 +1535,6 @@ lp_build_iround_nearest_sse2(struct lp_build_context > *bld, > } > > > -/* > - */ > static INLINE LLVMValueRef > lp_build_round_altivec(struct lp_build_context *bld, > LLVMValueRef a, > @@ -1536,8 +1543,10 @@ lp_build_round_altivec(struct lp_build_context *bld, > LLVMBuilderRef builder = bld->gallivm->builder; > const struct lp_type type = bld->type; > const char *intrinsic = NULL; > + LLVMTypeRef ret_type = bld->vec_type; > > assert(type.floating); > + assert(type.width == 32); > > assert(lp_check_value(type, a)); > assert(util_cpu_caps.has_altivec); > @@ -1555,9 +1564,12 @@ lp_build_round_altivec(struct lp_build_context *bld, > case LP_BUILD_ROUND_TRUNCATE: > intrinsic = "llvm.ppc.altivec.vrfiz"; > break; > + case LP_BUILD_IROUND: > + ret_type = lp_build_int_vec_type(bld->gallivm, bld->type); > + intrinsic = "llvm.ppc.altivec.vctsxs"; > } > > - return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); > + return lp_build_intrinsic_unary(builder, intrinsic, ret_type, a); > } > > static INLINE LLVMValueRef > @@ -1565,7 +1577,9 @@ lp_build_round_arch(struct lp_build_context *bld, > LLVMValueRef a, > enum lp_build_round_mode mode) > { > - if (util_cpu_caps.has_sse4_1) > + if (util_cpu_caps.has_sse2 && (mode == LP_BUILD_IROUND)) > + return lp_build_iround_nearest_sse2(bld, a); > + else if (util_cpu_caps.has_sse4_1) > return lp_build_round_sse41(bld, a, mode); > else /* (util_cpu_caps.has_altivec) */ > return lp_build_round_altivec(bld, a, mode); > @@ -1893,11 +1907,6 @@ lp_build_iround(struct lp_build_context *bld, > > assert(lp_check_value(type, a)); > > - if ((util_cpu_caps.has_sse2 && > - ((type.width == 32) && (type.length == 1 || type.length == 4))) || > - (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { > - return lp_build_iround_nearest_sse2(bld, a); > - } > if (arch_rounding_available(type)) { > res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST); > } > -- > 1.7.11.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
