Re: [PATCH] aarch64: Use an expander for quad-word vec_pack_trunc pattern

Richard Sandiford via Gcc-patches Wed, 19 May 2021 06:34:39 -0700

Jonathan Wright <jonathan.wri...@arm.com> writes:
> Hi,
>
> The existing vec_pack_trunc RTL pattern emits an opaque two-
> instruction assembly code sequence that prevents proper instruction
> scheduling. This commit changes the pattern to an expander that emits
> individual xtn and xtn2 instructions.
>
> This commit also consolidates the duplicate truncation patterns.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?


OK.  Nice clean-up, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-05-17  Jonathan Wright  <jonathan.wri...@arm.com>
>
>         * config/aarch64/aarch64-simd.md (aarch64_simd_vec_pack_trunc_<mode>):
>         Remove as duplicate of...
>         (aarch64_xtn<mode>): This.
>         (aarch64_xtn2<mode>_le): Move position in file.
>         (aarch64_xtn2<mode>_be): Move position in file.
>         (aarch64_xtn2<mode>): Move position in file.
>         (vec_pack_trunc_<mode>): Define as an expander.
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> c67fa3fb6f0ca0a181a09a42677526d12e955c06..447b5575f2f5adbad4957e90787a4954af644b20
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1691,14 +1691,51 @@
>  ;; Narrowing operations.
>  
>  ;; For doubles.
> -(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
> - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> -       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
> - "TARGET_SIMD"
> - "xtn\\t%0.<Vntype>, %1.<Vtype>"
> +
> +(define_insn "aarch64_xtn<mode>"
> +  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> +     (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
> +  "TARGET_SIMD"
> +  "xtn\\t%0.<Vntype>, %1.<Vtype>"
>    [(set_attr "type" "neon_shift_imm_narrow_q")]
>  )
>  
> +(define_insn "aarch64_xtn2<mode>_le"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +     (vec_concat:<VNARROWQ2>
> +       (match_operand:<VNARROWQ> 1 "register_operand" "0")
> +       (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
> +  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
> +  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
> +  [(set_attr "type" "neon_shift_imm_narrow_q")]
> +)
> +
> +(define_insn "aarch64_xtn2<mode>_be"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +     (vec_concat:<VNARROWQ2>
> +       (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
> +       (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
> +  "TARGET_SIMD && BYTES_BIG_ENDIAN"
> +  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
> +  [(set_attr "type" "neon_shift_imm_narrow_q")]
> +)
> +
> +(define_expand "aarch64_xtn2<mode>"
> +  [(match_operand:<VNARROWQ2> 0 "register_operand")
> +   (match_operand:<VNARROWQ> 1 "register_operand")
> +   (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
> +  "TARGET_SIMD"
> +  {
> +    if (BYTES_BIG_ENDIAN)
> +      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
> +                                          operands[2]));
> +    else
> +      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
> +                                          operands[2]));
> +    DONE;
> +  }
> +)
> +
>  (define_expand "vec_pack_trunc_<mode>"
>   [(match_operand:<VNARROWD> 0 "register_operand")
>    (match_operand:VDN 1 "register_operand")
> @@ -1711,7 +1748,7 @@
>  
>    emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
>    emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
> -  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
> +  emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg));
>    DONE;
>  })
>  
> @@ -1901,20 +1938,25 @@
>  
>  ;; For quads.
>  
> -(define_insn "vec_pack_trunc_<mode>"
> - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
> +(define_expand "vec_pack_trunc_<mode>"
> + [(set (match_operand:<VNARROWQ2> 0 "register_operand")
>         (vec_concat:<VNARROWQ2>
> -      (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
> -      (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
> +      (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
> +      (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
>   "TARGET_SIMD"
>   {
> +   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
> +   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
> +   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
> +
> +   emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo]));
> +
>     if (BYTES_BIG_ENDIAN)
> -     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
> +     emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, 
> operands[hi]));
>     else
> -     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
> +     emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, 
> operands[hi]));
> +   DONE;
>   }
> -  [(set_attr "type" "multiple")
> -   (set_attr "length" "8")]
>  )
>  
>  ;; Widening operations.
> @@ -8570,13 +8612,6 @@
>    ""
>  )
>  
> -(define_expand "aarch64_xtn<mode>"
> -  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> -     (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
> -  "TARGET_SIMD"
> -  ""
> -)
> -
>  ;; Truncate a 128-bit integer vector to a 64-bit vector.
>  (define_insn "trunc<mode><Vnarrowq>2"
>    [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> @@ -8586,42 +8621,6 @@
>    [(set_attr "type" "neon_shift_imm_narrow_q")]
>  )
>  
> -(define_insn "aarch64_xtn2<mode>_le"
> -  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> -     (vec_concat:<VNARROWQ2>
> -       (match_operand:<VNARROWQ> 1 "register_operand" "0")
> -       (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
> -  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
> -  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
> -  [(set_attr "type" "neon_shift_imm_narrow_q")]
> -)
> -
> -(define_insn "aarch64_xtn2<mode>_be"
> -  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> -     (vec_concat:<VNARROWQ2>
> -       (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
> -       (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
> -  "TARGET_SIMD && BYTES_BIG_ENDIAN"
> -  "xtn2\t%0.<V2ntype>, %2.<Vtype>"
> -  [(set_attr "type" "neon_shift_imm_narrow_q")]
> -)
> -
> -(define_expand "aarch64_xtn2<mode>"
> -  [(match_operand:<VNARROWQ2> 0 "register_operand")
> -   (match_operand:<VNARROWQ> 1 "register_operand")
> -   (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
> -  "TARGET_SIMD"
> -  {
> -    if (BYTES_BIG_ENDIAN)
> -      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
> -                                          operands[2]));
> -    else
> -      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
> -                                          operands[2]));
> -    DONE;
> -  }
> -)
> -
>  (define_insn "aarch64_bfdot<mode>"
>    [(set (match_operand:VDQSF 0 "register_operand" "=w")
>       (plus:VDQSF

Re: [PATCH] aarch64: Use an expander for quad-word vec_pack_trunc pattern

Reply via email to