Hi! I've committed the following fix for the following testcase. When scalar_op1 is 0xffffffffffffffff8000000000000000 with 64-bit HWI, it matches EXACT_POWER_OF_2_OR_ZERO_P, but we should expand it as negation of the << 63 shift rather than the << 63 shift alone.
The patch also improves multiplication by 0x8000000000000000 in TImode, which can be done as << 63 shift, and multiplication by -(((TImode)1) << N) for N 0 through 62, which can be also expanded as negation of left shift. Furthermore I've noticed several places where we could invoke signed integer overflows inside of the compiler and fixed them. Bootstrapped/regtested on x86_64-linux and i686-linux, acked by Richard on IRC, committed to trunk. 2013-02-21 Jakub Jelinek <ja...@redhat.com> PR middle-end/56420 * expmed.c (EXACT_POWER_OF_2_OR_ZERO_P): Do subtraction in uhwi, to avoid signed wrapping. (expand_mult): Handle properly multiplication by ((dword_type) -1) << (BITS_PER_WORD - 1). Improve multiplication by ((dword_type) 1) << (BITS_PER_WORD - 1). Avoid undefined behavior in the compiler if coeff is HOST_WIDE_INT_MIN. (expand_divmod): Don't make ext_op1 static, change it's type to uhwi. Avoid undefined behavior in -INTVAL (op1). * gcc.dg/torture/pr56420.c: New test. --- gcc/expmed.c.jj 2013-02-13 21:46:52.000000000 +0100 +++ gcc/expmed.c 2013-02-21 19:25:05.692298011 +0100 @@ -64,7 +64,8 @@ static rtx expand_smod_pow2 (enum machin static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); /* Test whether a value is zero of a power of two. */ -#define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) +#define EXACT_POWER_OF_2_OR_ZERO_P(x) \ + (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0) struct init_expmed_rtl { @@ -3079,7 +3080,10 @@ expand_mult (enum machine_mode mode, rtx /* If we are multiplying in DImode, it may still be a win to try to work with shifts and adds. */ if (CONST_DOUBLE_HIGH (scalar_op1) == 0 - && CONST_DOUBLE_LOW (scalar_op1) > 0) + && (CONST_DOUBLE_LOW (scalar_op1) > 0 + || (CONST_DOUBLE_LOW (scalar_op1) < 0 + && EXACT_POWER_OF_2_OR_ZERO_P + (CONST_DOUBLE_LOW (scalar_op1))))) { coeff = CONST_DOUBLE_LOW (scalar_op1); is_neg = false; @@ -3109,7 +3113,8 @@ expand_mult (enum machine_mode mode, rtx use synth_mult. */ /* Special case powers of two. */ - if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) + if (EXACT_POWER_OF_2_OR_ZERO_P (coeff) + && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)) return expand_shift (LSHIFT_EXPR, mode, op0, floor_log2 (coeff), target, unsignedp); @@ -3124,13 +3129,24 @@ expand_mult (enum machine_mode mode, rtx result is interpreted as an unsigned coefficient. Exclude cost of op0 from max_cost to match the cost calculation of the synth_mult. */ + coeff = -(unsigned HOST_WIDE_INT) coeff; max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed) - neg_cost(speed, mode)); - if (max_cost > 0 - && choose_mult_variant (mode, -coeff, &algorithm, - &variant, max_cost)) + if (max_cost <= 0) + goto skip_synth; + + /* Special case powers of two. */ + if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)) + { + rtx temp = expand_shift (LSHIFT_EXPR, mode, op0, + floor_log2 (coeff), target, unsignedp); + return expand_unop (mode, neg_optab, temp, target, 0); + } + + if (choose_mult_variant (mode, coeff, &algorithm, &variant, + max_cost)) { - rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX, + rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX, &algorithm, variant); return expand_unop (mode, neg_optab, temp, target, 0); } @@ -3813,13 +3829,12 @@ expand_divmod (int rem_flag, enum tree_c int op1_is_constant, op1_is_pow2 = 0; int max_cost, extra_cost; static HOST_WIDE_INT last_div_const = 0; - static HOST_WIDE_INT ext_op1; bool speed = optimize_insn_for_speed_p (); op1_is_constant = CONST_INT_P (op1); if (op1_is_constant) { - ext_op1 = INTVAL (op1); + unsigned HOST_WIDE_INT ext_op1 = UINTVAL (op1); if (unsignedp) ext_op1 &= GET_MODE_MASK (mode); op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1) @@ -3967,7 +3982,7 @@ expand_divmod (int rem_flag, enum tree_c op1_is_pow2 = (op1_is_constant && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)) || (! unsignedp - && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ; + && EXACT_POWER_OF_2_OR_ZERO_P (-UINTVAL (op1)))))); } /* If one of the operands is a volatile MEM, copy it into a register. */ --- gcc/testsuite/gcc.dg/torture/pr56420.c.jj 2013-02-21 18:37:54.720565659 +0100 +++ gcc/testsuite/gcc.dg/torture/pr56420.c 2013-02-21 19:08:41.000000000 +0100 @@ -0,0 +1,37 @@ +/* PR middle-end/56420 */ +/* { dg-do run { target int128 } } */ + +extern void abort (void); + +__attribute__((noinline, noclone)) __uint128_t +foo (__uint128_t x) +{ + return x * (((__uint128_t) -1) << 63); +} + +__attribute__((noinline, noclone)) __uint128_t +bar (__uint128_t x) +{ + return x * (((__uint128_t) 1) << 63); +} + +__attribute__((noinline, noclone)) __uint128_t +baz (__uint128_t x) +{ + return x * -(((__uint128_t) 1) << 62); +} + +int +main () +{ + if (foo (1) != (((__uint128_t) -1) << 63) + || foo (8) != (((__uint128_t) -1) << 66)) + abort (); + if (bar (1) != (((__uint128_t) 1) << 63) + || bar (8) != (((__uint128_t) 1) << 66)) + abort (); + if (baz (1) != -(((__uint128_t) 1) << 62) + || baz (8) != ((-(((__uint128_t) 1) << 62)) << 3)) + abort (); + return 0; +} Jakub