On 24/07/15 11:55, Kyrill Tkachov wrote:
> 
> commit d562629e36ba013b8f77956a74139330d191bc30
> Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com>
> Date:   Fri Jul 17 16:30:01 2015 +0100
> 
>     [ARM][3/3] Expand mod by power of 2
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index e1bc727..6ade07c 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum 
> rtx_code outer_code,
>  
>      case MOD:
>      case UMOD:
> +      /* MOD by a power of 2 can be expanded as:
> +      rsbs    r1, r0, #0
> +      and     r0, r0, #(n - 1)
> +      and     r1, r1, #(n - 1)
> +      rsbpl   r0, r1, #0.  */
> +      if (code == MOD
> +       && CONST_INT_P (XEXP (x, 1))
> +       && exact_log2 (INTVAL (XEXP (x, 1))) > 0
> +       && mode == SImode)
> +     {
> +       *cost += COSTS_N_INSNS (3)
> +                + 2 * extra_cost->alu.logical
> +                + extra_cost->alu.arith;
> +       return true;
> +     }
> +
>        *cost = LIBCALL_COST (2);
>        return false;  /* All arguments must be in registers.  */
>  
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index f341109..8301648 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -1229,7 +1229,7 @@ (define_peephole2
>    ""
>  )
>  
> -(define_insn "*subsi3_compare0"
> +(define_insn "subsi3_compare0"
>    [(set (reg:CC_NOOV CC_REGNUM)
>       (compare:CC_NOOV
>        (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
> @@ -2158,7 +2158,7 @@ (define_expand "andsi3"
>  )
>  
>  ; ??? Check split length for Thumb-2
> -(define_insn_and_split "*arm_andsi3_insn"
> +(define_insn_and_split "arm_andsi3_insn"
>    [(set (match_operand:SI         0 "s_register_operand" "=r,l,r,r,r")
>       (and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
>               (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
> @@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
>    ""
>  )

This shouldn't be necessary - you are just adding another interface to produce 
an and insn.

>  
> +;; ARM-specific expansion of signed mod by power of 2
> +;; using conditional negate.
> +;; For r0 % n where n is a power of 2 produce:
> +;; rsbs    r1, r0, #0
> +;; and     r0, r0, #(n - 1)
> +;; and     r1, r1, #(n - 1)
> +;; rsbpl   r0, r1, #0
> +
> +(define_expand "modsi3"
> +  [(match_operand:SI 0 "register_operand" "")
> +   (match_operand:SI 1 "register_operand" "")
> +   (match_operand:SI 2 "const_int_operand" "")]
> +  "TARGET_32BIT"
> +  {
> +    HOST_WIDE_INT val = INTVAL (operands[2]);
> +
> +    if (val <= 0
> +       || exact_log2 (INTVAL (operands[2])) <= 0
> +       || !const_ok_for_arm (INTVAL (operands[2]) - 1))
> +      FAIL;
> +
> +    rtx mask = GEN_INT (val - 1);
> +
> +    /* In the special case of x0 % 2 we can do the even shorter:
> +     cmp     r0, #0
> +     and     r0, r0, #1
> +     rsblt   r0, r0, #0.  */
> +
> +    if (val == 2)
> +      {
> +     rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
> +     rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
> +
> +     emit_insn (gen_rtx_SET (cc_reg,
> +                     gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
> +
> +     rtx masked = gen_reg_rtx (SImode);
> +     emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));

Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise 
below.


> +     emit_move_insn (operands[0],
> +                     gen_rtx_IF_THEN_ELSE (SImode, cond,
> +                                           gen_rtx_NEG (SImode,
> +                                                        masked),
> +                                           masked));
> +     DONE;
> +      }
> +
> +    rtx neg_op = gen_reg_rtx (SImode);
> +    rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
> +                                                   operands[1]));
> +
> +    /* Extract the condition register and mode.  */
> +    rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
> +    rtx cc_reg = SET_DEST (cmp);
> +    rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
> +
> +    emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
> +
> +    rtx masked_neg = gen_reg_rtx (SImode);
> +    emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
> +
> +    /* We want a conditional negate here, but emitting COND_EXEC rtxes
> +       during expand does not always work.  Do an IF_THEN_ELSE instead.  */
> +    emit_move_insn (operands[0],
> +                 gen_rtx_IF_THEN_ELSE (SImode, cond,
> +                                       gen_rtx_NEG (SImode, masked_neg),
> +                                       operands[0]));
> +
> +
> +    DONE;
> +  }
> +)
> +
>  (define_expand "bswapsi2"
>    [(set (match_operand:SI 0 "s_register_operand" "=r")
>       (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c 
> b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> new file mode 100644
> index 0000000..2645c18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_2.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x 
> b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> new file mode 100644
> index 0000000..2b079a4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> +  return x % 2;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c 
> b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> new file mode 100644
> index 0000000..567332c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_256.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x 
> b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> new file mode 100644
> index 0000000..c1de42c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> +  return x % 256;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c 
> b/gcc/testsuite/gcc.target/arm/mod_2.c
> new file mode 100644
> index 0000000..93017a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_2.x"
> +
> +/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c 
> b/gcc/testsuite/gcc.target/arm/mod_256.c
> new file mode 100644
> index 0000000..92ab05a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_256.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_256.x"
> +
> +/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */


OK with those changes if no regressions.

Ramana

Reply via email to