> -----Original Message----- > From: Richard Earnshaw > Sent: 21 March 2014 13:57 > To: Ian Bolton > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [PATCH, ARM] Optimise NotDI AND/OR ZeroExtendSI for ARMv7A > > On 19/03/14 16:53, Ian Bolton wrote: > > This is a follow-on patch to one already committed: > > http://gcc.gnu.org/ml/gcc-patches/2014-02/msg01128.html > > > > It implements patterns to simplify our RTL as follows: > > > > OR (Not:DI (A:DI), ZeroExtend:DI (B:SI)) > > --> the top half can be done with a MVN > > > > AND (Not:DI (A:DI), ZeroExtend:DI (B:SI)) > > --> the top half becomes zero. > > > > I've added test cases for both of these and also the existing > > anddi_notdi patterns. The tests all pass. > > > > Full regression runs passed. > > > > OK for stage 1? > > > > Cheers, > > Ian > > > > > > 2014-03-19 Ian Bolton <ian.bol...@arm.com> > > > > gcc/ > > * config/arm/arm.md (*anddi_notdi_zesidi): New pattern > > * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern. > > > > testsuite/ > > * gcc.target/arm/anddi_notdi-1.c: New test. > > * gcc.target/arm/iordi_notdi-1.c: New test case. > > > > > > arm-and-ior-notdi-zeroextend-patch-v1.txt > > > > > > diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md > > index 2ddda02..d2d85ee 100644 > > --- a/gcc/config/arm/arm.md > > +++ b/gcc/config/arm/arm.md > > @@ -2962,6 +2962,28 @@ > > (set_attr "type" "multiple")] > > ) > > > > +(define_insn_and_split "*anddi_notdi_zesidi" > > + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") > > + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" > "0,?r")) > > + (zero_extend:DI > > + (match_operand:SI 1 "s_register_operand" "r,r"))))] > > The early clobber and register tying here is unnecessary. All of the > input operands are consumed in the first instruction, so you can > eliminate the ties and the restriction on the overlap. Something like > (untested): > > +(define_insn_and_split "*anddi_notdi_zesidi" > + [(set (match_operand:DI 0 "s_register_operand" "=r") > + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r")) > + (zero_extend:DI > + (match_operand:SI 1 "s_register_operand" "r"))))] > > Ok for stage-1 with that change (though I'd recommend a another test > run > to validate the above). > > R.
Thanks, Richard. Regression runs came back OK with that change, so I will consider this ready for stage 1. The patch is attached for reference. Cheers, Ian
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 2ddda02..4176b7ff 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -2962,6 +2962,28 @@ (set_attr "type" "multiple")] ) +(define_insn_and_split "*anddi_notdi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r")) + (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (const_int 0))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + (define_insn_and_split "*anddi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (sign_extend:DI diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 467c619..10bc8b1 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1418,6 +1418,30 @@ (set_attr "type" "multiple")] ) +(define_insn_and_split "*iordi_notdi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r")) + (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r,r"))))] + "TARGET_THUMB2" + "#" + "TARGET_THUMB2 && reload_completed" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (not:SI (match_dup 4)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[4] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + (define_insn_and_split "*iordi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (ior:DI (not:DI (sign_extend:DI diff --git a/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c new file mode 100644 index 0000000..cfb33fc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline --save-temps" } */ + +extern void abort (void); + +typedef long long s64int; +typedef int s32int; +typedef unsigned long long u64int; +typedef unsigned int u32int; + +s64int +anddi_di_notdi (s64int a, s64int b) +{ + return (a & ~b); +} + +s64int +anddi_di_notzesidi (s64int a, u32int b) +{ + return (a & ~(u64int) b); +} + +s64int +anddi_notdi_zesidi (s64int a, u32int b) +{ + return (~a & (u64int) b); +} + +s64int +anddi_di_notsesidi (s64int a, s32int b) +{ + return (a & ~(s64int) b); +} + +int main () +{ + s64int a64 = 0xdeadbeef0000ffffll; + s64int b64 = 0x000000005f470112ll; + s64int c64 = 0xdeadbeef300f0000ll; + + u32int c32 = 0x01124f4f; + s32int d32 = 0xabbaface; + + s64int z = anddi_di_notdi (c64, b64); + if (z != 0xdeadbeef20080000ll) + abort (); + + z = anddi_di_notzesidi (a64, c32); + if (z != 0xdeadbeef0000b0b0ll) + abort (); + + z = anddi_notdi_zesidi (c64, c32); + if (z != 0x0000000001104f4fll) + abort (); + + z = anddi_di_notsesidi (a64, d32); + if (z != 0x0000000000000531ll) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "bic\t" 6 } } */ + +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c index cda9c0e..249f080 100644 --- a/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c +++ b/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c @@ -9,19 +9,25 @@ typedef unsigned long long u64int; typedef unsigned int u32int; s64int -iordi_notdi (s64int a, s64int b) +iordi_di_notdi (s64int a, s64int b) { return (a | ~b); } s64int -iordi_notzesidi (s64int a, u32int b) +iordi_di_notzesidi (s64int a, u32int b) { return (a | ~(u64int) b); } s64int -iordi_notsesidi (s64int a, s32int b) +iordi_notdi_zesidi (s64int a, u32int b) +{ + return (~a | (u64int) b); +} + +s64int +iordi_di_notsesidi (s64int a, s32int b) { return (a | ~(s64int) b); } @@ -30,25 +36,30 @@ int main () { s64int a64 = 0xdeadbeef00000000ll; s64int b64 = 0x000000004f4f0112ll; + s64int c64 = 0xdeadbeef000f0000ll; u32int c32 = 0x01124f4f; s32int d32 = 0xabbaface; - s64int z = iordi_notdi (a64, b64); + s64int z = iordi_di_notdi (a64, b64); if (z != 0xffffffffb0b0feedll) abort (); - z = iordi_notzesidi (a64, c32); + z = iordi_di_notzesidi (a64, c32); if (z != 0xfffffffffeedb0b0ll) abort (); - z = iordi_notsesidi (a64, d32); + z = iordi_notdi_zesidi (c64, c32); + if (z != 0x21524110fff2ffffll) + abort (); + + z = iordi_di_notsesidi (a64, d32); if (z != 0xdeadbeef54450531ll) abort (); return 0; } -/* { dg-final { scan-assembler-times "orn\t" 5 { target arm_thumb2 } } } */ +/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */ /* { dg-final { cleanup-saved-temps } } */