Hi! Kai has reported his type demotion patches lead to a regression, which can be seen also without his patches by doing the type demotion by hand. test1 is optimized using *jcc_bt<mode>_mask instruction (combiner detects this), but test2 isn't. In that case combiner first merges the and with shift into *<shift_insn><mode>3_mask insn, and *jcc_bt<mode>_mask won't match, because we end up with (zero_extend:SI (subreg:QI (and:SI <something> (const_int 63)) 0)) and we don't simplify that.
So, my first approach was trying to simplify that, because nonzero_bits on the subreg operand say that no bits outside of QImode may be non-zero, both the zero_extend and subreg can be dropped. That is the simplify-rtx.c change. Then I've figured out that combine.c doesn't actually attempt to simplify this anyway, so that is the combine.c change. And lastly an i386 pattern was needed anyway. I've also attempted to simplify: (zero_extend:SI (subreg:QI (and:DI <something> (const_int 63)) 0)) into (subreg:SI (and:DI <something> (const_int 63)) 0) (very small change in simplify-rtx.c, just drop the requirement that zero_extend mode is as wide or wider than SUBREG_REG's mode, and when it is <= use gen_lowpart_no_emit instead of just returning the SUBREG_REG, but that unfortunately regressed the test1 case, we'd need some further i386.md tweaks. While in theory this folding looks like a useful simplification, because of this I'm wondering if other backends don't rely on those actually not being simplified. So, I've as an alternative implemented also an i386.md only fix. Thus, do we want the first patch, or first patch + also the above described further simplify-rtx.c change + some further i386.md tweaks, or just the second patch instead? Both have been bootstrapped/regtested on x86_64-linux and i686-linux. Jakub
2013-07-05 Jakub Jelinek <ja...@redhat.com> PR target/57819 * simplify-rtx.c (simplify_unary_operation_1) <case ZERO_EXTEND>: Simplify (zero_extend:SI (subreg:QI (and:SI (reg:SI) (const_int 63)) 0)). * combine.c (make_extraction): Create ZERO_EXTEND or SIGN_EXTEND using simplify_gen_unary instead of gen_rtx_*_EXTEND. * config/i386/i386.md (*jcc_bt<mode>_1): New define_insn_and_split. * gcc.target/i386/pr57819.c: New test. --- gcc/simplify-rtx.c.jj 2013-06-01 14:47:23.000000000 +0200 +++ gcc/simplify-rtx.c 2013-07-04 16:24:48.654817120 +0200 @@ -1470,6 +1470,29 @@ simplify_unary_operation_1 (enum rtx_cod } } + /* (zero_extend:M (subreg:N <X:O>)) is <X:O> (for M == O) or + (zero_extend:M <X:O>), if X doesn't have any bits outside of N mode + non-zero. E.g. + (zero_extend:SI (subreg:QI (and:SI (reg:SI) (const_int 63)) 0)) is + (and:SI (reg:SI) (const_int 63)). */ + if (GET_CODE (op) == SUBREG + && GET_MODE_PRECISION (GET_MODE (op)) + < GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))) + && GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))) + <= HOST_BITS_PER_WIDE_INT + && GET_MODE_PRECISION (mode) + >= GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op))) + && subreg_lowpart_p (op) + && (nonzero_bits (SUBREG_REG (op), GET_MODE (SUBREG_REG (op))) + & ~GET_MODE_MASK (GET_MODE (op))) == 0) + { + if (GET_MODE_PRECISION (mode) + == GET_MODE_PRECISION (GET_MODE (SUBREG_REG (op)))) + return SUBREG_REG (op); + return simplify_gen_unary (ZERO_EXTEND, mode, SUBREG_REG (op), + GET_MODE (SUBREG_REG (op))); + } + #if defined(POINTERS_EXTEND_UNSIGNED) && !defined(HAVE_ptr_extend) /* As we do not know which address space the pointer is referring to, we can do this only if the target does not support different pointer --- gcc/combine.c.jj 2013-05-04 14:40:40.000000000 +0200 +++ gcc/combine.c 2013-07-04 15:44:59.409575170 +0200 @@ -7326,7 +7326,8 @@ make_extraction (enum machine_mode mode, if (pos_rtx != 0 && GET_MODE_SIZE (pos_mode) > GET_MODE_SIZE (GET_MODE (pos_rtx))) { - rtx temp = gen_rtx_ZERO_EXTEND (pos_mode, pos_rtx); + rtx temp = simplify_gen_unary (ZERO_EXTEND, pos_mode, pos_rtx, + GET_MODE (pos_rtx)); /* If we know that no extraneous bits are set, and that the high bit is not set, convert extraction to cheaper one - either @@ -7340,7 +7341,8 @@ make_extraction (enum machine_mode mode, >> 1)) == 0))) { - rtx temp1 = gen_rtx_SIGN_EXTEND (pos_mode, pos_rtx); + rtx temp1 = simplify_gen_unary (SIGN_EXTEND, pos_mode, pos_rtx, + GET_MODE (pos_rtx)); /* Prefer ZERO_EXTENSION, since it gives more information to backends. */ --- gcc/config/i386/i386.md.jj 2013-06-27 18:47:32.000000000 +0200 +++ gcc/config/i386/i386.md 2013-07-04 15:58:24.429243358 +0200 @@ -10474,6 +10474,39 @@ (define_insn_and_split "*jcc_bt<mode>" PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) +;; Like *jcc_bt<mode>, but expect a SImode operand 2 instead of QImode +;; zero extended to SImode. +(define_insn_and_split "*jcc_bt<mode>_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (match_operand:SI 2 "register_operand" "r")) + (const_int 0)]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT || optimize_function_for_size_p (cfun)" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 3)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + ;; Avoid useless masking of bit offset operand. "and" in SImode is correct ;; also for DImode, this is what combine produces. (define_insn_and_split "*jcc_bt<mode>_mask" --- gcc/testsuite/gcc.target/i386/pr57819.c.jj 2013-07-04 16:27:46.900877301 +0200 +++ gcc/testsuite/gcc.target/i386/pr57819.c 2013-07-04 16:27:30.000000000 +0200 @@ -0,0 +1,38 @@ +/* PR target/57819 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=core2" } */ + +void foo (void); + +__extension__ typedef __INTPTR_TYPE__ intptr_t; + +int +test1 (intptr_t x, intptr_t n) +{ + n &= sizeof (intptr_t) * __CHAR_BIT__ - 1; + + if (x & ((intptr_t) 1 << n)) + foo (); + + return 0; +} + +int +test2 (intptr_t x, intptr_t n) +{ + if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1)))) + foo (); + + return 0; +} + +int +test3 (intptr_t x, intptr_t n) +{ + if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ - 1)))) + foo (); + + return 0; +} + +/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */
2013-07-05 Jakub Jelinek <ja...@redhat.com> PR target/57819 * config/i386/i386.md (*jcc_bt<mode>_mask_1): New define_insn_and_split. * gcc.target/i386/pr57819.c: New test. --- gcc/config/i386/i386.md.jj 2013-06-27 18:47:32.000000000 +0200 +++ gcc/config/i386/i386.md 2013-07-04 16:54:48.789218553 +0200 @@ -10510,6 +10510,45 @@ (define_insn_and_split "*jcc_bt<mode>_ma PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); }) +;; Like *jcc_bt<mode>_mask, but for the case where AND has been previously +;; combined with a shift. +(define_insn_and_split "*jcc_bt<mode>_mask_1" + [(set (pc) + (if_then_else (match_operator 0 "bt_comparison_operator" + [(zero_extract:SWI48 + (match_operand:SWI48 1 "register_operand" "r") + (const_int 1) + (zero_extend:SI + (subreg:QI + (and:SI + (match_operand:SI 2 "register_operand" "r") + (match_operand:SI 3 "const_int_operand" "n")) + 0)))]) + (label_ref (match_operand 4)) + (pc))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_USE_BT || optimize_function_for_size_p (cfun)) + && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1" + "#" + "&& 1" + [(set (reg:CCC FLAGS_REG) + (compare:CCC + (zero_extract:SWI48 + (match_dup 1) + (const_int 1) + (match_dup 2)) + (const_int 0))) + (set (pc) + (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)]) + (label_ref (match_dup 4)) + (pc)))] +{ + operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0); + + PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0]))); +}) + (define_insn_and_split "*jcc_btsi_1" [(set (pc) (if_then_else (match_operator 0 "bt_comparison_operator" --- gcc/testsuite/gcc.target/i386/pr57819.c.jj 2013-07-04 16:27:46.900877301 +0200 +++ gcc/testsuite/gcc.target/i386/pr57819.c 2013-07-04 16:27:30.000000000 +0200 @@ -0,0 +1,38 @@ +/* PR target/57819 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=core2" } */ + +void foo (void); + +__extension__ typedef __INTPTR_TYPE__ intptr_t; + +int +test1 (intptr_t x, intptr_t n) +{ + n &= sizeof (intptr_t) * __CHAR_BIT__ - 1; + + if (x & ((intptr_t) 1 << n)) + foo (); + + return 0; +} + +int +test2 (intptr_t x, intptr_t n) +{ + if (x & ((intptr_t) 1 << ((int) n & (sizeof (intptr_t) * __CHAR_BIT__ - 1)))) + foo (); + + return 0; +} + +int +test3 (intptr_t x, intptr_t n) +{ + if (x & ((intptr_t) 1 << ((int) n & ((int) sizeof (intptr_t) * __CHAR_BIT__ - 1)))) + foo (); + + return 0; +} + +/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */