On 10/08/17 14:12, Jackson Woodruff wrote:
> Hi all,
> 
> This patch changes patterns in aarch64-simd.md to replace
> 
>     movi    v0.4s, 0
>     str    q0, [x0, 16]
> 
> With:
> 
>     stp xzr, xzr, [x0, 16]
> 
> When we are storing zeros to vectors like this:
> 
>     void f(uint32x4_t *p) {
>       uint32x4_t x = { 0, 0, 0, 0};
>       p[1] = x;
>     }
> 
> Bootstrapped and regtested on aarch64 with no regressions.
> OK for trunk?
> 
> Jackson
> 
> gcc/
> 
> 2017-08-09  Jackson Woodruff  <jackson.woodr...@arm.com>
> 
>     * aarch64-simd.md (mov<mode>): No longer force zero
>     immediate into register.
>     (*aarch64_simd_mov<mode>): Add new case for stp
>     using zero immediate.
> 
> 
> gcc/testsuite
> 
> 2017-08-09  Jackson Woodruff  <jackson.woodr...@arm.com>
> 
>     * gcc.target/aarch64/simd/neon_str_zero.c: New.
> 
> 
> patchfile
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 74de9b8c89dd5e4e3d87504594c969de0e0128ce..0149a742d34ae4fd5b3fd705b03c845f94aa1d59
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -23,7 +23,10 @@
>       (match_operand:VALL_F16 1 "general_operand" ""))]
>    "TARGET_SIMD"
>    "
> -    if (GET_CODE (operands[0]) == MEM)
> +    if (GET_CODE (operands[0]) == MEM
> +         && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
> +              && aarch64_legitimate_address_p (<MODE>mode, operands[0],
> +                                               PARALLEL, 1)))
>        operands[1] = force_reg (<MODE>mode, operands[1]);
>    "
>  )
> @@ -94,63 +97,70 @@
>  
>  (define_insn "*aarch64_simd_mov<mode>"
>    [(set (match_operand:VD 0 "nonimmediate_operand"
> -             "=w, m,  w, ?r, ?w, ?r, w")
> +             "=w, m,  m,  w, ?r, ?w, ?r, w")
>       (match_operand:VD 1 "general_operand"
> -             "m,  w,  w,  w,  r,  r, Dn"))]
> +             "m,  Dz, w,  w,  w,  r,  r, Dn"))]
>    "TARGET_SIMD
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> +   && ((register_operand (operands[0], <MODE>mode)
> +       || register_operand (operands[1], <MODE>mode))
> +      || (memory_operand (operands[0], <MODE>mode)
> +       && immediate_operand (operands[1], <MODE>mode)))"

Allowing any immediate here seems too lax - it allows any immediate
value which then could cause reload operations to be inserted (that in
turn might cause register pressure calculations to be incorrect).
Wouldn't it be better to use something like aarch64_simd_reg_or_zero?
Similarly below.

R.

>  {
>     switch (which_alternative)
>       {
>       case 0: return "ldr\\t%d0, %1";
> -     case 1: return "str\\t%d1, %0";
> -     case 2: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> -     case 3: return "umov\t%0, %1.d[0]";
> -     case 4: return "fmov\t%d0, %1";
> -     case 5: return "mov\t%0, %1";
> -     case 6:
> +     case 1: return "str\\txzr, %0";
> +     case 2: return "str\\t%d1, %0";
> +     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> +     case 4: return "umov\t%0, %1.d[0]";
> +     case 5: return "fmov\t%d0, %1";
> +     case 6: return "mov\t%0, %1";
> +     case 7:
>       return aarch64_output_simd_mov_immediate (operands[1],
>                                                 <MODE>mode, 64);
>       default: gcc_unreachable ();
>       }
>  }
> -  [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
> +  [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\
>                    neon_logic<q>, neon_to_gp<q>, f_mcr,\
>                    mov_reg, neon_move<q>")]
>  )
>  
>  (define_insn "*aarch64_simd_mov<mode>"
>    [(set (match_operand:VQ 0 "nonimmediate_operand"
> -             "=w, m,  w, ?r, ?w, ?r, w")
> +             "=w, Ump,  m,  w, ?r, ?w, ?r, w")
>       (match_operand:VQ 1 "general_operand"
> -             "m,  w,  w,  w,  r,  r, Dn"))]
> +             "m,  Dz, w,  w,  w,  r,  r, Dn"))]
>    "TARGET_SIMD
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> +   && ((register_operand (operands[0], <MODE>mode)
> +     || register_operand (operands[1], <MODE>mode))
> +       || (memory_operand (operands[0], <MODE>mode)
> +        && immediate_operand (operands[1], <MODE>mode)))"
>  {
>    switch (which_alternative)
>      {
>      case 0:
>       return "ldr\\t%q0, %1";
>      case 1:
> -     return "str\\t%q1, %0";
> +     return "stp\\txzr, xzr, %0";
>      case 2:
> -     return "mov\t%0.<Vbtype>, %1.<Vbtype>";
> +     return "str\\t%q1, %0";
>      case 3:
> +     return "mov\t%0.<Vbtype>, %1.<Vbtype>";
>      case 4:
>      case 5:
> -     return "#";
>      case 6:
> +     return "#";
> +    case 7:
>       return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
>      default:
>       gcc_unreachable ();
>      }
>  }
>    [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
> -                     neon_logic<q>, multiple, multiple, multiple,\
> -                     neon_move<q>")
> -   (set_attr "length" "4,4,4,8,8,8,4")]
> +                  neon_stp, neon_logic<q>, multiple, multiple,\
> +                  multiple, neon_move<q>")
> +   (set_attr "length" "4,4,4,4,8,8,8,4")]
>  )
>  
>  ;; When storing lane zero we can use the normal STR and its more permissive
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..07198de109432b530745cc540790303ae0245efb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/neon_str_zero.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1" } */
> +
> +#include <arm_neon.h>
> +
> +void
> +f (uint32x4_t *p)
> +{
> +  uint32x4_t x = { 0, 0, 0, 0};
> +  p[1] = x;
> +
> +  /* { dg-final { scan-assembler "stp\txzr, xzr," } } */
> +}
> +
> +void
> +g (float32x2_t *p)
> +{
> +  float32x2_t x = {0.0, 0.0};
> +  p[0] = x;
> +
> +  /* { dg-final { scan-assembler "str\txzr, " } } */
> +}
> 

Reply via email to