Hi!
The testcase used to be compiled at -O2 by GCC8 and earlier to:
f1:
neg w1, w0, asr 16
and w1, w1, 65535
orr w0, w1, w0, lsl 16
ret
f2:
neg w1, w0
extr w0, w1, w0, 16
ret
but since GCC9 (r9-3594 for f1 and r9-6926 for f2) we compile it into:
f1:
mov w1, w0
sbfx x0, x1, 16, 16
neg w0, w0
bfi w0, w1, 16, 16
ret
f2:
neg w1, w0
sbfx x0, x0, 16, 16
bfi w0, w1, 16, 16
ret
instead, i.e. one insn longer each. With this patch we get:
f1:
mov w1, w0
neg w0, w1, asr 16
bfi w0, w1, 16, 16
ret
f2:
neg w1, w0
extr w0, w1, w0, 16
ret
i.e. identical f2 and same number of insns as in GCC8 in f1.
The combiner unfortunately doesn't try splitters when doing 2 -> 1
combination, so it can't be implemented as combine splitters, but
it could be implemented as define_insn_and_split if desirable.
Bootstrapped/regtested on aarch64-linux, ok for trunk?
2021-04-15 Jakub Jelinek <[email protected]>
PR target/100075
* config/aarch64/aarch64.md (*neg_asr_si2_extr, *extrsi5_insn_di): New
define_insn patterns.
* gcc.target/aarch64/pr100075.c: New test.
--- gcc/config/aarch64/aarch64.md.jj 2021-04-15 10:45:02.798853095 +0200
+++ gcc/config/aarch64/aarch64.md 2021-04-15 13:28:04.734754364 +0200
@@ -3572,6 +3572,18 @@ (define_insn "*neg_<shift>_si2_uxtw"
[(set_attr "autodetect_type" "alu_shift_<shift>_op2")]
)
+(define_insn "*neg_asr_si2_extr"
+ [(set (match_operand:SI 0 "register_operand" "r")
+ (neg:SI (match_operator 4 "subreg_lowpart_operator"
+ [(sign_extract:DI
+ (match_operand:DI 1 "register_operand" "r")
+ (match_operand 3 "aarch64_simd_shift_imm_offset_si" "n")
+ (match_operand 2 "aarch64_simd_shift_imm_offset_si"
"n"))])))]
+ "INTVAL (operands[2]) + INTVAL (operands[3]) == 32"
+ "neg\\t%w0, %w1, asr %2"
+ [(set_attr "autodetect_type" "alu_shift_asr_op2")]
+)
+
(define_insn "mul<mode>3"
[(set (match_operand:GPI 0 "register_operand" "=r")
(mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -5382,6 +5394,22 @@ (define_insn "*extrsi5_insn_uxtw_alt"
"extr\\t%w0, %w1, %w2, %4"
[(set_attr "type" "rotate_imm")]
)
+
+(define_insn "*extrsi5_insn_di"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand 3 "const_int_operand" "n"))
+ (match_operator:SI 6 "subreg_lowpart_operator"
+ [(zero_extract:DI
+ (match_operand:DI 2 "register_operand" "r")
+ (match_operand 5 "const_int_operand" "n")
+ (match_operand 4 "const_int_operand" "n"))])))]
+ "UINTVAL (operands[3]) < 32
+ && UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32
+ && UINTVAL (operands[4]) + UINTVAL (operands[5]) - 32 <= 64"
+ "extr\\t%w0, %w1, %w2, %4"
+ [(set_attr "type" "rotate_imm")]
+)
(define_insn "*ror<mode>3_insn"
[(set (match_operand:GPI 0 "register_operand" "=r")
--- gcc/testsuite/gcc.target/aarch64/pr100075.c.jj 2021-04-15
13:23:31.188852983 +0200
+++ gcc/testsuite/gcc.target/aarch64/pr100075.c 2021-04-15 13:23:10.612086048
+0200
@@ -0,0 +1,20 @@
+/* PR target/100075 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\tsbfx\tx[0-9]+, x[0-9]+, 16, 16} } } */
+/* { dg-final { scan-assembler {\tneg\tw[0-9]+, w[0-9]+, asr 16} } } */
+/* { dg-final { scan-assembler {\textr\tw[0-9]+, w[0-9]+, w[0-9]+, 16} } } */
+
+struct S { short x, y; };
+
+struct S
+f1 (struct S p)
+{
+ return (struct S) { -p.y, p.x };
+}
+
+struct S
+f2 (struct S p)
+{
+ return (struct S) { p.y, -p.x };
+}
Jakub