On Fri, 22 Jul 2011, Richard Guenther wrote: > On Fri, 22 Jul 2011, Richard Guenther wrote: > > > On Fri, 22 Jul 2011, Richard Guenther wrote: > > > > > On Thu, 21 Jul 2011, Joseph S. Myers wrote: > > > > > > > On Thu, 21 Jul 2011, Richard Guenther wrote: > > > > > > > > > Patch also handling wider modes and not starting with SImode but > > > > > the mode of int: > > > > > > > > Use of target int for anything not about C ABIs is certainly wrong. > > > > This > > > > might be about what operations the target does efficiently, or what > > > > functions are present in libgcc (both of which would be functions of > > > > machine modes), but it's not about the choice of C int. > > > > > > Ok. Given rths last suggestion I'm testing the following which > > > checks all integer modes (but never will widen - optabs.c will do > > > that if it turns out to be profitable). > > > > Err, I should refresh the patch before sending it ... here it goes. > > Regresses vectorization on i?86 because that defines floathi expanders > but the vectorizer does not recognize a short -> float conversion > as that requires different sized vectors (the int -> short truncation > is also a complication for it). One of the cases that fail to vectorize > is > > #define N 40 > float image[N][N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > float out[N]; > > /* Outer-loop vectorization. */ > > __attribute__ ((noinline)) void > foo (){ > int i,j; > > for (i = 0; i < N; i++) { > for (j = 0; j < N; j++) { > image[j][i] = j+i; > } > } > } > > where VRP figures out that j+i can be represented as a short. > > Now, I can dumb down the VRP code to only consider doing the > unsigned -> signed demotion and never truncate (unless maybe > the target does not support floatMODE of the current input mode). > > Other suggestions?
The following implements the above - do not truncate if the target claims to be able to do the unsigned -> float conversion in the original modes, but unconditionally try to change it to signed -> float. Bootstrapped and tested on x86_64-unknown-linux-gnu. Does this look sensible as a start? We can always improve things incrementally when we discover a case that is worthwhile. Thanks, Richard. 2011-07-22 Richard Guenther <rguent...@suse.de> PR tree-optimization/49715 * tree-vrp.c: Include expr.h and optabs.h. (range_fits_type_): New function. (simplify_float_conversion_using_ranges): Likewise. (simplify_stmt_using_ranges): Call it. * Makefile.in (tree-vrp.o): Add $(EXPR_H) and $(OPTABS_H) dependencies. * optabs.c (can_float_p): Export. * optabs.h (can_float_p): Declare. * gcc.target/i386/pr49715-1.c: New testcase. * gcc.target/i386/pr49715-2.c: Likewise. Index: gcc/tree-vrp.c =================================================================== *** gcc/tree-vrp.c.orig 2011-07-21 17:46:16.000000000 +0200 --- gcc/tree-vrp.c 2011-07-22 14:41:46.000000000 +0200 *************** along with GCC; see the file COPYING3. *** 40,45 **** --- 40,47 ---- #include "tree-ssa-propagate.h" #include "tree-chrec.h" #include "gimple-fold.h" + #include "expr.h" + #include "optabs.h" /* Type of value ranges. See value_range_d for a description of these *************** simplify_conversion_using_ranges (gimple *** 7448,7453 **** --- 7450,7548 ---- return true; } + /* Return whether the value range *VR fits in an integer type specified + by PRECISION and UNSIGNED_P. */ + + static bool + range_fits_type_p (value_range_t *vr, unsigned precision, bool unsigned_p) + { + double_int tem; + + /* We can only handle constant ranges. */ + if (vr->type != VR_RANGE + || TREE_CODE (vr->min) != INTEGER_CST + || TREE_CODE (vr->max) != INTEGER_CST) + return false; + + tem = double_int_ext (tree_to_double_int (vr->min), precision, unsigned_p); + if (!double_int_equal_p (tree_to_double_int (vr->min), tem)) + return false; + + tem = double_int_ext (tree_to_double_int (vr->max), precision, unsigned_p); + if (!double_int_equal_p (tree_to_double_int (vr->max), tem)) + return false; + + return true; + } + + /* Simplify a conversion from integral SSA name to float in STMT. */ + + static bool + simplify_float_conversion_using_ranges (gimple_stmt_iterator *gsi, gimple stmt) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + value_range_t *vr = get_value_range (rhs1); + enum machine_mode fltmode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))); + enum machine_mode mode; + tree tem; + gimple conv; + + /* We can only handle constant ranges. */ + if (vr->type != VR_RANGE + || TREE_CODE (vr->min) != INTEGER_CST + || TREE_CODE (vr->max) != INTEGER_CST) + return false; + + /* First check if we can use a signed type in place of an unsigned. */ + if (TYPE_UNSIGNED (TREE_TYPE (rhs1)) + && (can_float_p (fltmode, TYPE_MODE (TREE_TYPE (rhs1)), 0) + != CODE_FOR_nothing) + && range_fits_type_p (vr, GET_MODE_PRECISION + (TYPE_MODE (TREE_TYPE (rhs1))), 0)) + mode = TYPE_MODE (TREE_TYPE (rhs1)); + /* If we can do the conversion in the current input mode do nothing. */ + else if (can_float_p (fltmode, TYPE_MODE (TREE_TYPE (rhs1)), + TYPE_UNSIGNED (TREE_TYPE (rhs1)))) + return false; + /* Otherwise search for a mode we can use, starting from the narrowest + integer mode available. */ + else + { + mode = GET_CLASS_NARROWEST_MODE (MODE_INT); + do + { + /* If we cannot do a signed conversion to float from mode + or if the value-range does not fit in the signed type + try with a wider mode. */ + if (can_float_p (fltmode, mode, 0) != CODE_FOR_nothing + && range_fits_type_p (vr, GET_MODE_PRECISION (mode), 0)) + break; + + mode = GET_MODE_WIDER_MODE (mode); + /* But do not widen the input. Instead leave that to the + optabs expansion code. */ + if (GET_MODE_PRECISION (mode) > TYPE_PRECISION (TREE_TYPE (rhs1))) + return false; + } + while (mode != VOIDmode); + if (mode == VOIDmode) + return false; + } + + /* It works, insert a truncation or sign-change before the + float conversion. */ + tem = create_tmp_var (build_nonstandard_integer_type + (GET_MODE_PRECISION (mode), 0), NULL); + conv = gimple_build_assign_with_ops (NOP_EXPR, tem, rhs1, NULL_TREE); + tem = make_ssa_name (tem, conv); + gimple_assign_set_lhs (conv, tem); + gsi_insert_before (gsi, conv, GSI_SAME_STMT); + gimple_assign_set_rhs1 (stmt, tem); + update_stmt (stmt); + + return true; + } + /* Simplify STMT using ranges if possible. */ static bool *************** simplify_stmt_using_ranges (gimple_stmt_ *** 7507,7512 **** --- 7602,7613 ---- return simplify_conversion_using_ranges (stmt); break; + case FLOAT_EXPR: + if (TREE_CODE (rhs1) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))) + return simplify_float_conversion_using_ranges (gsi, stmt); + break; + default: break; } Index: gcc/testsuite/gcc.target/i386/pr49715-1.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.target/i386/pr49715-1.c 2011-07-21 17:46:39.000000000 +0200 *************** *** 0 **** --- 1,9 ---- + /* { dg-do compile } */ + /* { dg-options "-O2 -msse -mfpmath=sse" } */ + + float func(unsigned x) + { + return (x & 0xfffff) * 0.01f; + } + + /* { dg-final { scan-assembler-times "cvtsi2ss" 1 } } */ Index: gcc/testsuite/gcc.target/i386/pr49715-2.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.target/i386/pr49715-2.c 2011-07-21 17:46:39.000000000 +0200 *************** *** 0 **** --- 1,12 ---- + /* { dg-do compile } */ + /* { dg-require-effective-target lp64 } */ + /* { dg-options "-O2" } */ + + double func(unsigned long long x) + { + if (x <= 0x7ffffffffffffffeULL) + return (x + 1) * 0.01; + return 0.0; + } + + /* { dg-final { scan-assembler-times "cvtsi2sdq" 1 } } */ Index: gcc/Makefile.in =================================================================== *** gcc/Makefile.in.orig 2011-07-19 12:59:09.000000000 +0200 --- gcc/Makefile.in 2011-07-22 11:09:16.000000000 +0200 *************** tree-vrp.o : tree-vrp.c $(CONFIG_H) $(SY *** 2504,2510 **** $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \ $(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \ $(CFGLOOP_H) $(SCEV_H) $(TIMEVAR_H) intl.h tree-pretty-print.h \ ! gimple-pretty-print.h gimple-fold.h tree-cfg.o : tree-cfg.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(FLAGS_H) output.h \ $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ --- 2504,2510 ---- $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \ $(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \ $(CFGLOOP_H) $(SCEV_H) $(TIMEVAR_H) intl.h tree-pretty-print.h \ ! gimple-pretty-print.h gimple-fold.h $(OPTABS_H) $(EXPR_H) tree-cfg.o : tree-cfg.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(FLAGS_H) output.h \ $(DIAGNOSTIC_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ Index: gcc/optabs.c =================================================================== *** gcc/optabs.c.orig 2011-07-11 17:02:51.000000000 +0200 --- gcc/optabs.c 2011-07-22 11:04:42.000000000 +0200 *************** can_fix_p (enum machine_mode fixmode, en *** 4626,4632 **** return CODE_FOR_nothing; } ! static enum insn_code can_float_p (enum machine_mode fltmode, enum machine_mode fixmode, int unsignedp) { --- 4626,4632 ---- return CODE_FOR_nothing; } ! enum insn_code can_float_p (enum machine_mode fltmode, enum machine_mode fixmode, int unsignedp) { Index: gcc/optabs.h =================================================================== *** gcc/optabs.h.orig 2011-06-22 16:01:28.000000000 +0200 --- gcc/optabs.h 2011-07-22 11:06:01.000000000 +0200 *************** extern void expand_fixed_convert (rtx, r *** 849,854 **** --- 849,857 ---- /* Generate code for a FLOAT_EXPR. */ extern void expand_float (rtx, rtx, int); + /* Return the insn_code for a FLOAT_EXPR. */ + enum insn_code can_float_p (enum machine_mode, enum machine_mode, int); + /* Generate code for a FIX_EXPR. */ extern void expand_fix (rtx, rtx, int);