Hi all, In this patch we split X-register UBFX instructions that extract up to the edge of a W-register into a W-register LSR instruction. So for the example in the testcase instead of: UBFX X0, X0, 24, 8
we'd generate: LSR w0, w0, 24 An LSR is a simpler instruction and there's a higher chance that it can be combined with other instructions. To do this the patch separates the sign_extract case from the zero_extract case in the *<optab><mode> ANY_EXTRACT pattern and further splits the SImode/DImode patterns from the resulting zrero_extract pattern. The DImode zero_extract pattern then becomes a define_insn_and_split that splits into a zero_extend of an lshiftrt when the bitposition and width of the zero_extract add up to 32. Bootstrapped and tested on aarch64-none-linux-gnu. Since we're in stage 3 perhaps this is not for GCC 6, but it is fairly low risk. I'm happy for it to wait for the next release if necessary. Thanks, Kyrill 2016-12-08 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * config/aarch64/aarch64.md (*<optab><mode>): Split into... (*extv<mode>): ...This... (*extzvsi): ...This... (*extzvdi:): ... And this. Add splitting to lshiftrt when possible. 2016-12-08 Kyrylo Tkachov <kyrylo.tkac...@arm.com> * gcc.target/aarch64/ubfx_lsr_1.c: New test.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6b4d0ba633af2a549ded2f18962d9ed300f56e12..a6f659c26bb5156d652b6c1f09123e682e9ff648 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4327,16 +4327,51 @@ (define_expand "<optab>" ) -(define_insn "*<optab><mode>" +(define_insn "*extv<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") - (ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r") + (sign_extract:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand 2 "aarch64_simd_shift_imm_offset_<mode>" "n") (match_operand 3 "aarch64_simd_shift_imm_<mode>" "n")))] "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1, GET_MODE_BITSIZE (<MODE>mode) - 1)" - "<su>bfx\\t%<w>0, %<w>1, %3, %2" + "sbfx\\t%<w>0, %<w>1, %3, %2" + [(set_attr "type" "bfx")] +) + +(define_insn "*extzvsi" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extract:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 + "aarch64_simd_shift_imm_offset_si" "n") + (match_operand 3 + "aarch64_simd_shift_imm_si" "n")))] + "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), + 1, GET_MODE_BITSIZE (SImode) - 1)" + "ubfx\\t%w0, %w1, %3, %2" + [(set_attr "type" "bfx")] +) + +(define_insn_and_split "*extzvdi" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extract:DI (match_operand:DI 1 "register_operand" "r") + (match_operand 2 + "aarch64_simd_shift_imm_offset_di" "n") + (match_operand 3 + "aarch64_simd_shift_imm_di" "n")))] + "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), + 1, GET_MODE_BITSIZE (DImode) - 1)" + "ubfx\\t%x0, %x1, %3, %2" + ;; When the bitposition and width add up to 32 we can use a W-reg LSR + ;; instruction taking advantage of the implicit zero-extension of the X-reg. + "&& (INTVAL (operands[2]) + INTVAL (operands[3])) + == GET_MODE_BITSIZE (SImode)" + [(set (match_dup 0) + (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))] + { + operands[4] = gen_lowpart (SImode, operands[1]); + } [(set_attr "type" "bfx")] ) diff --git a/gcc/testsuite/gcc.target/aarch64/ubfx_lsr_1.c b/gcc/testsuite/gcc.target/aarch64/ubfx_lsr_1.c new file mode 100644 index 0000000000000000000000000000000000000000..bc083862976a88190dbef97a247be8a10b277a12 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ubfx_lsr_1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Check that an X-reg UBFX can be simplified into a W-reg LSR. */ + +int +f (unsigned long long x) +{ + x = (x >> 24) & 255; + return x + 1; +} + +/* { dg-final { scan-assembler "lsr\tw" } } */ +/* { dg-final { scan-assembler-not "ubfx\tx" } } */