On 16/03/14 12:30, Renlin Li wrote:
> Hi all,
> 
> Thank you for your suggestions, Richard. I have updated the patch 
> accordingly.
> 
> This is an optimization patch which will combine  "ubfiz" and "orr" 
> insns with a single "bfi" when certain conditions meet.
> 
> tmp = (x & m) | ( (y & n) << lsb) can be presented using
> 
>      and tmp, x, m
>      bfi tmp, y, #lsb, #width
> 
> if ((n+1) == 2^width) && (m & n << lsb) == 0.
> 
> A small test case is also added to verify it.
> 
> Is this Okay for stage-1?
> 
> Kind regards,
> Renlin Li
> 

This looks to me more like a 3 into two split operation where combine
needs some help to do the split, since the transformation is
non-trivial.  As such, I think you just need a define_split rather than
a define_insn_and_split (there's also no obvious reason why we would
want to defer this split until after register allocation).

Furthermore, you have an early-clobber situation here: it's important
that y and tmp aren't in the same register.  You appear to try to cater
for this by using an operand tie, but that's unnecessary in general (the
AND operation can write any usable register) and won't work in the
specific case where x = y.

R.

> 
> gcc/ChangeLog:
> 
> 2014-03-14  Renlin Li  <renlin...@arm.com>
> 
>      * config/aarch64/aarch64.md (*combine_bfi2<GPI:mode><SHORT:mode>, 
> *combine_bfi3<mode>): New.
> 
> gcc/testsuite:
> 
> 2014-03-14  Renlin Li  <renlin...@arm.com>
> 
>      * gcc.target/aarch64/combine_and_orr_1.c: New.
> 
> 
> patch.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 99a6ac8..6c2798b 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -3115,6 +3115,53 @@
>    [(set_attr "type" "bfm")]
>  )
>  
> +(define_insn_and_split "*combine_bfi2<GPI:mode><SHORT:mode>"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 
> "register_operand" "r")
> +                                      (match_operand 2 "const_int_operand" 
> "n"))
> +                          (match_operand 3 "const_int_operand" "n"))
> +                 (zero_extend:GPI (match_operand:SHORT 4 "register_operand" 
> "0"))))]
> +  "exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0
> +   && <SHORT:sizen> <= INTVAL (operands[2])"
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 0)
> +        (zero_extend:GPI (match_dup 4)))
> +   (set (zero_extract:GPI (match_dup 0)
> +                       (match_dup 3)
> +                       (match_dup 2))
> +     (match_dup 1))]
> +  {
> +      int tmp = (INTVAL (operands[3]) >> INTVAL (operands[2])) + 1;
> +      operands[3] = GEN_INT (exact_log2 (tmp));
> +  }
> +  [(set_attr "type" "multiple")]
> +)
> +
> +(define_insn_and_split "*combine_bfi3<mode>"
> +  [(set (match_operand:GPI 0 "register_operand" "=r")
> +        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> +                          (match_operand 2 "aarch64_logical_immediate" "n"))
> +                 (and:GPI (ashift:GPI (match_operand:GPI 3 
> "register_operand" "r")
> +                                      (match_operand 4 "const_int_operand" 
> "n"))
> +                          (match_operand 5 "const_int_operand" "n"))))]
> +  "exact_log2 ((INTVAL (operands[5]) >> INTVAL (operands[4])) + 1) >= 0
> +   && (INTVAL (operands[2]) & INTVAL (operands[5])) == 0"
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 0)
> +        (and:GPI (match_dup 1) (match_dup 2)))
> +   (set (zero_extract:GPI (match_dup 0)
> +                       (match_dup 5)
> +                       (match_dup 4))
> +     (match_dup 3))]
> +  {
> +      int tmp = (INTVAL (operands[5]) >> INTVAL (operands[4])) + 1;
> +      operands[5] = GEN_INT (exact_log2 (tmp));
> +  }
> +  [(set_attr "type" "multiple")]
> +)
> +
>  (define_insn "*extr_insv_lower_reg<mode>"
>    [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
>                         (match_operand 1 "const_int_operand" "n")
> diff --git a/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c 
> b/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c
> new file mode 100644
> index 0000000..b2c0194
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/combine_and_orr_1.c
> @@ -0,0 +1,51 @@
> +/* { dg-do run } */
> +/* { dg-options "-save-temps -O2" }  */
> +
> +extern void abort (void);
> +
> +unsigned int __attribute__ ((noinline))
> +foo1 (unsigned int major, unsigned int minor)
> +{
> +  unsigned int tmp = (minor & 0xff) | ((major & 0xfff) << 8);
> +  return tmp;
> +}
> +
> +unsigned int __attribute__ ((noinline))
> +foo2 (unsigned int major, unsigned int minor)
> +{
> +  unsigned int tmp = (minor & 0x1f) | ((major & 0xfff) << 8);
> +  return tmp;
> +}
> +
> +int
> +main (void)
> +{
> +  unsigned int major[10] = {1947662, 484254, 193508, 4219233, 2211215,
> +      3998162, 4240676, 1034099, 54412, 3195572};
> +  unsigned int minor[10] = {1027568, 21481, 2746675, 3121857, 2471080,
> +      3158801, 237587, 813307, 4073168, 1503494};
> +
> +  unsigned int result1[10] = {528112, 237289, 255027, 90561, 888744,
> +      119313, 336915, 488443, 298192, 177158};
> +  unsigned int result2[10] = {527888, 237065, 254995, 90369, 888584,
> +      119313, 336915, 488219, 298000, 177158};
> +
> +  unsigned int index = 0;
> +  unsigned result = 0;
> +  for (index; index < 10; ++index)
> +    {
> +
> +      result = foo1 (major[index], minor[index]);
> +      if (result != result1[index])
> +       abort ();
> +
> +      result = foo2 (major[index], minor[index]);
> +      if (result != result2[index])
> +       abort ();
> +    }
> +
> +  return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "bfi\tw\[0-9\]+, w\[0-9\]+, 8|5, 12" 2 
> } } */
> +/* { dg-final { cleanup-saved-temps } } */
> 


Reply via email to