Andrew Pinski via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > After r6-2044-g98e30e515f184b, code like "((x & 0xff00ff00U) >> 8)" > would be optimized like (x >> 8) & 0xff00ffU which is normally better > except on aarch64, the shift right could be combined with another > operation in some cases. So we need to add a few define_splits > to the aarch64 backends that match "((x >> shift) & CST0) OP Y" > and splits it to: > TMP = X & CST1 > (TMP >> shift) OP Y > > Note this also gets us to matching rev16 back too so I added a > testcase to make sure we don't lose that matching any more. > Note when the generic patch to recognize those as bswap ROT 16, > we might regress again and need to add a few more patterns to > the aarch64 backend but will deal with that once that happens. > > OK? Bootstrapped and tested on aarch64 with no regressions. > > gcc/ChangeLog: > > * config/aarch64/aarch64.md: Add a new define_split > to help combine. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/rev16_2.c: New test. > * gcc.target/aarch64/shift_and_operator-1.c: New test. > --- > gcc/config/aarch64/aarch64.md | 21 ++++++++++ > gcc/testsuite/gcc.target/aarch64/rev16_2.c | 39 +++++++++++++++++++ > .../gcc.target/aarch64/shift_and_operator-1.c | 22 +++++++++++ > 3 files changed, 82 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/rev16_2.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index af9087508ac..41cc563f10c 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -4656,6 +4656,27 @@ (define_insn "*<LOGICAL:optab>_<SHIFT:optab><mode>3" > [(set_attr "type" "logic_shift_imm")] > ) > > +(define_split > + [(set (match_operand:GPI 0 "register_operand") > + (LOGICAL_OR_PLUS:GPI > + (and:GPI > + (lshiftrt:GPI (match_operand:GPI 1 "register_operand") > + (match_operand:QI 2 "aarch64_shift_imm_<mode>")) > + (match_operand:GPI 3 "aarch64_logical_immediate")) > + (match_operand:GPI 4 "register_operand")))] > + "can_create_pseudo_p () > + && aarch64_bitmask_imm (UINTVAL (operands[3]) << UINTVAL (operands[2]), > <MODE>mode)"
Formatting nit: long line > + [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 6))) > + (set (match_dup 0) (match_dup 7))] > + { > + operands[5] = gen_reg_rtx (<MODE>mode); > + operands[6] = gen_int_mode (UINTVAL (operands[3]) << UINTVAL > (operands[2]), <MODE>mode); Here too. > + rtx shift = gen_rtx_LSHIFTRT (<MODE>mode, operands[5], operands[2]); > + rtx_code new_code = <CODE>; > + operands[7] = gen_rtx_fmt_ee (new_code, <MODE>mode, shift, operands[4]); It should be possible to do the last three statements in the rtl pattern, e.g. as: [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 6))) (set (match_dup 0) (LOGICAL_OR_PLUS:GPI (lshiftrt:GPI (match_dup 5) (match_dup 2)) (match_dup 4)))] OK with those change, thanks. Richard > + } > +) > + > (define_split > [(set (match_operand:GPI 0 "register_operand") > (LOGICAL_OR_PLUS:GPI > diff --git a/gcc/testsuite/gcc.target/aarch64/rev16_2.c > b/gcc/testsuite/gcc.target/aarch64/rev16_2.c > new file mode 100644 > index 00000000000..621eb5dfbf0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/rev16_2.c > @@ -0,0 +1,39 @@ > +/* { dg-options "-O2" } */ > +/* { dg-do compile } */ > + > +extern void abort (void); > + > +typedef unsigned int __u32; > + > +__u32 > +__rev16_32_alt (__u32 x) > +{ > + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) > + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); > +} > + > +__u32 > +__rev16_32 (__u32 x) > +{ > + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) > + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); > +} > + > +typedef unsigned long long __u64; > + > +__u64 > +__rev16_64_alt (__u64 x) > +{ > + return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) > + | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); > +} > + > +__u64 > +__rev16_64 (__u64 x) > +{ > + return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) > + | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); > +} > + > +/* { dg-final { scan-assembler-times "rev16\\tx\[0-9\]+" 2 } } */ > +/* { dg-final { scan-assembler-times "rev16\\tw\[0-9\]+" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c > b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c > new file mode 100644 > index 00000000000..49152c5495a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/shift_and_operator-1.c > @@ -0,0 +1,22 @@ > +/* { dg-options "-O2" } */ > +/* { dg-do compile } */ > + > +unsigned f(unsigned x, unsigned b) > +{ > + return ((x & 0xff00ff00U) >> 8) | b; > +} > + > +unsigned f0(unsigned x, unsigned b) > +{ > + return ((x & 0xff00ff00U) >> 8) ^ b; > +} > +unsigned f1(unsigned x, unsigned b) > +{ > + return ((x & 0xff00ff00U) >> 8) + b; > +} > + > +/* { dg-final { scan-assembler-times "lsr\\tw\[0-9\]+" 0 } } */ > +/* { dg-final { scan-assembler-times "lsr 8" 3 } } */ > +/* { dg-final { scan-assembler-times "eor\\tw\[0-9\]+" 1 } } */ > +/* { dg-final { scan-assembler-times "add\\tw\[0-9\]+" 1 } } */ > +/* { dg-final { scan-assembler-times "orr\\tw\[0-9\]+" 1 } } */