Hi! As discussed in the PR, *bswapdi2_doubleword splitter would generate wrong code if there is overlap in between the destination register and the memory address. Without bswapdi2 pattern for !TARGET_64BIT, we generate sometimes better, usually same quality and sometimes slightly worse code, so the fix is to remove the pattern for 32-bit. Even with movbe it does the right thing. Preapproved in the PR by Uros, bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.
2012-11-02 Jakub Jelinek <ja...@redhat.com> PR target/55147 * config/i386/i386.md (bswapdi2): Limit to TARGET_64BIT. (*bswapdi2_doubleword): Removed. * gcc.target/i386/pr55147.c: New test. --- gcc/config/i386/i386.md.jj 2012-11-01 09:33:06.632302393 +0100 +++ gcc/config/i386/i386.md 2012-11-01 10:37:16.947243914 +0100 @@ -12669,55 +12669,10 @@ (define_insn "*popcountsi2_cmp_zext" (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] - "" + "TARGET_64BIT" { - if (TARGET_64BIT && !TARGET_MOVBE) - operands[1] = force_reg (DImode, operands[1]); -}) - -(define_insn_and_split "*bswapdi2_doubleword" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m") - (bswap:DI - (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))] - "!TARGET_64BIT - && !(MEM_P (operands[0]) && MEM_P (operands[1]))" - "#" - "&& reload_completed" - [(set (match_dup 2) - (bswap:SI (match_dup 1))) - (set (match_dup 0) - (bswap:SI (match_dup 3)))] -{ - split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]); - - if (REG_P (operands[0]) && REG_P (operands[1])) - { - emit_insn (gen_swapsi (operands[0], operands[2])); - emit_insn (gen_bswapsi2 (operands[0], operands[0])); - emit_insn (gen_bswapsi2 (operands[2], operands[2])); - DONE; - } - if (!TARGET_MOVBE) - { - if (MEM_P (operands[0])) - { - emit_insn (gen_bswapsi2 (operands[3], operands[3])); - emit_insn (gen_bswapsi2 (operands[1], operands[1])); - - emit_move_insn (operands[0], operands[3]); - emit_move_insn (operands[2], operands[1]); - } - if (MEM_P (operands[1])) - { - emit_move_insn (operands[2], operands[1]); - emit_move_insn (operands[0], operands[3]); - - emit_insn (gen_bswapsi2 (operands[2], operands[2])); - emit_insn (gen_bswapsi2 (operands[0], operands[0])); - } - DONE; - } + operands[1] = force_reg (DImode, operands[1]); }) (define_expand "bswapsi2" --- gcc/testsuite/gcc.target/i386/pr55147.c.jj 2012-11-01 10:36:33.304496388 +0100 +++ gcc/testsuite/gcc.target/i386/pr55147.c 2012-11-01 10:35:52.000000000 +0100 @@ -0,0 +1,25 @@ +/* PR target/55147 */ +/* { dg-do run } */ +/* { dg-options "-O1" } */ +/* { dg-additional-options "-march=i486" { target ia32 } } */ + +extern void abort (void); + +__attribute__((noclone, noinline)) unsigned int +foo (unsigned long long *p, int i) +{ + return __builtin_bswap64 (p[i]); +} + +int +main () +{ + unsigned long long p[64]; + int i; + for (i = 0; i < 64; i++) + p[i] = 0x123456789abcdef0ULL ^ (1ULL << i) ^ (1ULL << (63 - i)); + for (i = 0; i < 64; i++) + if (foo (p, i) != __builtin_bswap32 (p[i] >> 32)) + abort (); + return 0; +} Jakub