On Thu, 25 Jun 2015, Richard Biener wrote: > > A first pattern moved from fold-const.c - albeit a very twisted one. > It triggers in gcc.dg/tree-ssa/pr52631.c. > > I've tried a nearly 1:1 translation (cut&paste and convert the > code structure to withs/ifs and then replace tree ops accordingly). > > One might question if the narrowing case should better be split > out generically, that is convert X & CST to narrowed X if CST > masks out upper bits (independent on the operation generating X). > > I'll leave any such improvements for followups. > > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Applied with additional * gcc.dg/tree-ssa/pr52631.c: Disable forwprop. Index: gcc/testsuite/gcc.dg/tree-ssa/pr52631.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/pr52631.c (revision 224893) +++ gcc/testsuite/gcc.dg/tree-ssa/pr52631.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-fre1-details" } */ +/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-fre1-details" } */ unsigned f(unsigned a) { > Richard. > > 2015-06-25 Richard Biener <rguent...@suse.de> > > * fold-const.c (fold_binary_loc): Move simplification of > (X <<>> C1) & C2 ... > * match.pd: ... here. > > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c (revision 224893) > +++ gcc/fold-const.c (working copy) > @@ -11516,106 +11516,6 @@ fold_binary_loc (location_t loc, > return build_int_cst (type, residue & low); > } > > - /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1)) > - (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1)) > - if the new mask might be further optimized. */ > - if ((TREE_CODE (arg0) == LSHIFT_EXPR > - || TREE_CODE (arg0) == RSHIFT_EXPR) > - && TYPE_PRECISION (TREE_TYPE (arg0)) <= HOST_BITS_PER_WIDE_INT > - && TREE_CODE (arg1) == INTEGER_CST > - && tree_fits_uhwi_p (TREE_OPERAND (arg0, 1)) > - && tree_to_uhwi (TREE_OPERAND (arg0, 1)) > 0 > - && (tree_to_uhwi (TREE_OPERAND (arg0, 1)) > - < TYPE_PRECISION (TREE_TYPE (arg0)))) > - { > - unsigned int shiftc = tree_to_uhwi (TREE_OPERAND (arg0, 1)); > - unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg1); > - unsigned HOST_WIDE_INT newmask, zerobits = 0; > - tree shift_type = TREE_TYPE (arg0); > - > - if (TREE_CODE (arg0) == LSHIFT_EXPR) > - zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1); > - else if (TREE_CODE (arg0) == RSHIFT_EXPR > - && TYPE_PRECISION (TREE_TYPE (arg0)) > - == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (arg0)))) > - { > - prec = TYPE_PRECISION (TREE_TYPE (arg0)); > - tree arg00 = TREE_OPERAND (arg0, 0); > - /* See if more bits can be proven as zero because of > - zero extension. */ > - if (TREE_CODE (arg00) == NOP_EXPR > - && TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (arg00, 0)))) > - { > - tree inner_type = TREE_TYPE (TREE_OPERAND (arg00, 0)); > - if (TYPE_PRECISION (inner_type) > - == GET_MODE_PRECISION (TYPE_MODE (inner_type)) > - && TYPE_PRECISION (inner_type) < prec) > - { > - prec = TYPE_PRECISION (inner_type); > - /* See if we can shorten the right shift. */ > - if (shiftc < prec) > - shift_type = inner_type; > - /* Otherwise X >> C1 is all zeros, so we'll optimize > - it into (X, 0) later on by making sure zerobits > - is all ones. */ > - } > - } > - zerobits = ~(unsigned HOST_WIDE_INT) 0; > - if (shiftc < prec) > - { > - zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc; > - zerobits <<= prec - shiftc; > - } > - /* For arithmetic shift if sign bit could be set, zerobits > - can contain actually sign bits, so no transformation is > - possible, unless MASK masks them all away. In that > - case the shift needs to be converted into logical shift. */ > - if (!TYPE_UNSIGNED (TREE_TYPE (arg0)) > - && prec == TYPE_PRECISION (TREE_TYPE (arg0))) > - { > - if ((mask & zerobits) == 0) > - shift_type = unsigned_type_for (TREE_TYPE (arg0)); > - else > - zerobits = 0; > - } > - } > - > - /* ((X << 16) & 0xff00) is (X, 0). */ > - if ((mask & zerobits) == mask) > - return omit_one_operand_loc (loc, type, > - build_int_cst (type, 0), arg0); > - > - newmask = mask | zerobits; > - if (newmask != mask && (newmask & (newmask + 1)) == 0) > - { > - /* Only do the transformation if NEWMASK is some integer > - mode's mask. */ > - for (prec = BITS_PER_UNIT; > - prec < HOST_BITS_PER_WIDE_INT; prec <<= 1) > - if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1) > - break; > - if (prec < HOST_BITS_PER_WIDE_INT > - || newmask == ~(unsigned HOST_WIDE_INT) 0) > - { > - tree newmaskt; > - > - if (shift_type != TREE_TYPE (arg0)) > - { > - tem = fold_build2_loc (loc, TREE_CODE (arg0), shift_type, > - fold_convert_loc (loc, shift_type, > - TREE_OPERAND (arg0, > 0)), > - TREE_OPERAND (arg0, 1)); > - tem = fold_convert_loc (loc, type, tem); > - } > - else > - tem = op0; > - newmaskt = build_int_cst_type (TREE_TYPE (op1), newmask); > - if (!tree_int_cst_equal (newmaskt, arg1)) > - return fold_build2_loc (loc, BIT_AND_EXPR, type, tem, > newmaskt); > - } > - } > - } > - > goto associate; > > case RDIV_EXPR: > Index: gcc/match.pd > =================================================================== > --- gcc/match.pd (revision 224893) > +++ gcc/match.pd (working copy) > @@ -738,6 +755,97 @@ (define_operator_list swapped_tcc_compar > && wi::eq_p (wi::lshift (@0, cand), @2)) > (cmp @1 { build_int_cst (TREE_TYPE (@1), cand); }))))) > > +/* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1)) > + (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1)) > + if the new mask might be further optimized. */ > +(for shift (lshift rshift) > + (simplify > + (bit_and (convert?@4 (shift@5 (convert1?@3 @0) INTEGER_CST@1)) > INTEGER_CST@2) > + (if (tree_nop_conversion_p (TREE_TYPE (@4), TREE_TYPE (@5)) > + && TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT > + && tree_fits_uhwi_p (@1) > + && tree_to_uhwi (@1) > 0 > + && tree_to_uhwi (@1) < TYPE_PRECISION (type)) > + (with > + { > + unsigned int shiftc = tree_to_uhwi (@1); > + unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (@2); > + unsigned HOST_WIDE_INT newmask, zerobits = 0; > + tree shift_type = TREE_TYPE (@3); > + unsigned int prec; > + > + if (shift == LSHIFT_EXPR) > + zerobits = ((((unsigned HOST_WIDE_INT) 1) << shiftc) - 1); > + else if (shift == RSHIFT_EXPR > + && (TYPE_PRECISION (shift_type) > + == GET_MODE_PRECISION (TYPE_MODE (shift_type)))) > + { > + prec = TYPE_PRECISION (TREE_TYPE (@3)); > + tree arg00 = @0; > + /* See if more bits can be proven as zero because of > + zero extension. */ > + if (@3 != @0 > + && TYPE_UNSIGNED (TREE_TYPE (@0))) > + { > + tree inner_type = TREE_TYPE (@0); > + if ((TYPE_PRECISION (inner_type) > + == GET_MODE_PRECISION (TYPE_MODE (inner_type))) > + && TYPE_PRECISION (inner_type) < prec) > + { > + prec = TYPE_PRECISION (inner_type); > + /* See if we can shorten the right shift. */ > + if (shiftc < prec) > + shift_type = inner_type; > + /* Otherwise X >> C1 is all zeros, so we'll optimize > + it into (X, 0) later on by making sure zerobits > + is all ones. */ > + } > + } > + zerobits = ~(unsigned HOST_WIDE_INT) 0; > + if (shiftc < prec) > + { > + zerobits >>= HOST_BITS_PER_WIDE_INT - shiftc; > + zerobits <<= prec - shiftc; > + } > + /* For arithmetic shift if sign bit could be set, zerobits > + can contain actually sign bits, so no transformation is > + possible, unless MASK masks them all away. In that > + case the shift needs to be converted into logical shift. */ > + if (!TYPE_UNSIGNED (TREE_TYPE (@3)) > + && prec == TYPE_PRECISION (TREE_TYPE (@3))) > + { > + if ((mask & zerobits) == 0) > + shift_type = unsigned_type_for (TREE_TYPE (@3)); > + else > + zerobits = 0; > + } > + } > + } > + /* ((X << 16) & 0xff00) is (X, 0). */ > + (if ((mask & zerobits) == mask) > + { build_int_cst (type, 0); }) > + (with { newmask = mask | zerobits; } > + (if (newmask != mask && (newmask & (newmask + 1)) == 0) > + (with > + { > + /* Only do the transformation if NEWMASK is some integer > + mode's mask. */ > + for (prec = BITS_PER_UNIT; > + prec < HOST_BITS_PER_WIDE_INT; prec <<= 1) > + if (newmask == (((unsigned HOST_WIDE_INT) 1) << prec) - 1) > + break; > + } > + (if (prec < HOST_BITS_PER_WIDE_INT > + || newmask == ~(unsigned HOST_WIDE_INT) 0) > + (with > + { tree newmaskt = build_int_cst_type (TREE_TYPE (@2), newmask); } > + (if (!tree_int_cst_equal (newmaskt, @2)) > + (if (shift_type != TREE_TYPE (@3) > + && single_use (@4) && single_use (@5)) > + (bit_and (convert (shift:shift_type (convert @3) @1)) { newmaskt; > })) > + (if (shift_type == TREE_TYPE (@3)) > + (bit_and @4 { newmaskt; })))))))))))) > + > /* Simplifications of conversions. */ > > /* Basic strip-useless-type-conversions / strip_nops. */ > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)