On 24/07/15 11:55, Kyrill Tkachov wrote:
>
> commit d562629e36ba013b8f77956a74139330d191bc30
> Author: Kyrylo Tkachov <[email protected]>
> Date: Fri Jul 17 16:30:01 2015 +0100
>
> [ARM][3/3] Expand mod by power of 2
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index e1bc727..6ade07c 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -9556,6 +9556,22 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum
> rtx_code outer_code,
>
> case MOD:
> case UMOD:
> + /* MOD by a power of 2 can be expanded as:
> + rsbs r1, r0, #0
> + and r0, r0, #(n - 1)
> + and r1, r1, #(n - 1)
> + rsbpl r0, r1, #0. */
> + if (code == MOD
> + && CONST_INT_P (XEXP (x, 1))
> + && exact_log2 (INTVAL (XEXP (x, 1))) > 0
> + && mode == SImode)
> + {
> + *cost += COSTS_N_INSNS (3)
> + + 2 * extra_cost->alu.logical
> + + extra_cost->alu.arith;
> + return true;
> + }
> +
> *cost = LIBCALL_COST (2);
> return false; /* All arguments must be in registers. */
>
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index f341109..8301648 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -1229,7 +1229,7 @@ (define_peephole2
> ""
> )
>
> -(define_insn "*subsi3_compare0"
> +(define_insn "subsi3_compare0"
> [(set (reg:CC_NOOV CC_REGNUM)
> (compare:CC_NOOV
> (minus:SI (match_operand:SI 1 "arm_rhs_operand" "r,r,I")
> @@ -2158,7 +2158,7 @@ (define_expand "andsi3"
> )
>
> ; ??? Check split length for Thumb-2
> -(define_insn_and_split "*arm_andsi3_insn"
> +(define_insn_and_split "arm_andsi3_insn"
> [(set (match_operand:SI 0 "s_register_operand" "=r,l,r,r,r")
> (and:SI (match_operand:SI 1 "s_register_operand" "%r,0,r,r,r")
> (match_operand:SI 2 "reg_or_int_operand" "I,l,K,r,?n")))]
> @@ -11105,6 +11105,78 @@ (define_expand "thumb_legacy_rev"
> ""
> )
This shouldn't be necessary - you are just adding another interface to produce
an and insn.
>
> +;; ARM-specific expansion of signed mod by power of 2
> +;; using conditional negate.
> +;; For r0 % n where n is a power of 2 produce:
> +;; rsbs r1, r0, #0
> +;; and r0, r0, #(n - 1)
> +;; and r1, r1, #(n - 1)
> +;; rsbpl r0, r1, #0
> +
> +(define_expand "modsi3"
> + [(match_operand:SI 0 "register_operand" "")
> + (match_operand:SI 1 "register_operand" "")
> + (match_operand:SI 2 "const_int_operand" "")]
> + "TARGET_32BIT"
> + {
> + HOST_WIDE_INT val = INTVAL (operands[2]);
> +
> + if (val <= 0
> + || exact_log2 (INTVAL (operands[2])) <= 0
> + || !const_ok_for_arm (INTVAL (operands[2]) - 1))
> + FAIL;
> +
> + rtx mask = GEN_INT (val - 1);
> +
> + /* In the special case of x0 % 2 we can do the even shorter:
> + cmp r0, #0
> + and r0, r0, #1
> + rsblt r0, r0, #0. */
> +
> + if (val == 2)
> + {
> + rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
> + rtx cond = gen_rtx_LT (SImode, cc_reg, const0_rtx);
> +
> + emit_insn (gen_rtx_SET (cc_reg,
> + gen_rtx_COMPARE (CCmode, operands[1], const0_rtx)));
> +
> + rtx masked = gen_reg_rtx (SImode);
> + emit_insn (gen_arm_andsi3_insn (masked, operands[1], mask));
Use emit_insn (gen_andsi3 (masked, operands[1], mask) instead and likewise
below.
> + emit_move_insn (operands[0],
> + gen_rtx_IF_THEN_ELSE (SImode, cond,
> + gen_rtx_NEG (SImode,
> + masked),
> + masked));
> + DONE;
> + }
> +
> + rtx neg_op = gen_reg_rtx (SImode);
> + rtx_insn *insn = emit_insn (gen_subsi3_compare0 (neg_op, const0_rtx,
> + operands[1]));
> +
> + /* Extract the condition register and mode. */
> + rtx cmp = XVECEXP (PATTERN (insn), 0, 0);
> + rtx cc_reg = SET_DEST (cmp);
> + rtx cond = gen_rtx_GE (SImode, cc_reg, const0_rtx);
> +
> + emit_insn (gen_arm_andsi3_insn (operands[0], operands[1], mask));
> +
> + rtx masked_neg = gen_reg_rtx (SImode);
> + emit_insn (gen_arm_andsi3_insn (masked_neg, neg_op, mask));
> +
> + /* We want a conditional negate here, but emitting COND_EXEC rtxes
> + during expand does not always work. Do an IF_THEN_ELSE instead. */
> + emit_move_insn (operands[0],
> + gen_rtx_IF_THEN_ELSE (SImode, cond,
> + gen_rtx_NEG (SImode, masked_neg),
> + operands[0]));
> +
> +
> + DONE;
> + }
> +)
> +
> (define_expand "bswapsi2"
> [(set (match_operand:SI 0 "s_register_operand" "=r")
> (bswap:SI (match_operand:SI 1 "s_register_operand" "r")))]
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.c
> b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> new file mode 100644
> index 0000000..2645c18
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.c
> @@ -0,0 +1,7 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_2.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\t\[wx\]\[0-9\]*" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_2.x
> b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> new file mode 100644
> index 0000000..2b079a4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_2.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> + return x % 2;
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.c
> b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> new file mode 100644
> index 0000000..567332c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.c
> @@ -0,0 +1,6 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "mod_256.x"
> +
> +/* { dg-final { scan-assembler "csneg\t\[wx\]\[0-9\]*" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/mod_256.x
> b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> new file mode 100644
> index 0000000..c1de42c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/mod_256.x
> @@ -0,0 +1,5 @@
> +int
> +f (int x)
> +{
> + return x % 256;
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/mod_2.c
> b/gcc/testsuite/gcc.target/arm/mod_2.c
> new file mode 100644
> index 0000000..93017a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_2.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_2.x"
> +
> +/* { dg-final { scan-assembler "rsblt\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler-times "and\tr\[0-9\].*1" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/mod_256.c
> b/gcc/testsuite/gcc.target/arm/mod_256.c
> new file mode 100644
> index 0000000..92ab05a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mod_256.c
> @@ -0,0 +1,8 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-options "-O2 -mcpu=cortex-a57 -save-temps" } */
> +
> +#include "../aarch64/mod_256.x"
> +
> +/* { dg-final { scan-assembler "rsbpl\tr\[0-9\]*" } } */
> +/* { dg-final { scan-assembler "and\tr\[0-9\].*255" } } */
OK with those changes if no regressions.
Ramana