Joel Hutton via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi all,
>
> This patch adds support in the aarch64 backend for the vec_widen_shift 
> vect-pattern and makes a minor mid-end fix to support it.
>
> All 3 patches together bootstrapped and regression tested on aarch64.
>
> Ok for stage 1?
>
> gcc/ChangeLog:
>
> 2020-11-12  Joel Hutton  <joel.hut...@arm.com>
>
>         * config/aarch64/aarch64-simd.md: vec_widen_lshift_hi/lo<mode> 
> patterns
>         * tree-vect-stmts.c 
>         (vectorizable_conversion): Fix for widen_lshift case
>
> gcc/testsuite/ChangeLog:
>
> 2020-11-12  Joel Hutton  <joel.hut...@arm.com>
>
>         * gcc.target/aarch64/vect-widen-lshift.c: New test.
>
> From 97af35b2d2a505dcefd8474cbd4bc3441b83ab02 Mon Sep 17 00:00:00 2001
> From: Joel Hutton <joel.hut...@arm.com>
> Date: Thu, 12 Nov 2020 11:48:25 +0000
> Subject: [PATCH 3/3] [AArch64][vect] vec_widen_lshift pattern
>
> Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in
> mid-end.
> ---
>  gcc/config/aarch64/aarch64-simd.md            | 66 +++++++++++++++++++
>  .../gcc.target/aarch64/vect-widen-lshift.c    | 60 +++++++++++++++++
>  gcc/tree-vect-stmts.c                         |  9 ++-
>  3 files changed, 133 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> b4f56a2295926f027bd53e7456eec729af0cd6df..2bb39c530a1a861cb9bd3df0c2943f62bd6153d7
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -4711,8 +4711,74 @@
>    [(set_attr "type" "neon_sat_shift_reg<q>")]
>  )
>  
> +(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> +     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> +                      (match_operand:SI 2
> +                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +                      VSHLL))]
> +  "TARGET_SIMD"
> +  {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], 
> operands[1],
> +                                                  p, operands[2]));
> +    DONE;
> +  }
> +)
> +
> +(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
> +   [(set (match_operand:<VWIDE> 0 "register_operand")
> +     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> +                      (match_operand:SI 2
> +                        "immediate_operand" "i")]
> +                       VSHLL))]
> +   "TARGET_SIMD"
> +   {
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], 
> operands[1],
> +                                                   p, operands[2]));
> +    DONE;
> +   }
> +)
> +
>  ;; vshll_n
>  
> +(define_insn "aarch64_<sur>shll<mode>_internal"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> +     (unspec:<VWIDE> [(vec_select:<VHALF>
> +                         (match_operand:VQW 1 "register_operand" "w")
> +                         (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
> +                      (match_operand:SI 3
> +                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +                      VSHLL))]
> +  "TARGET_SIMD"
> +  {
> +    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> +      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> +    else
> +      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> +  }
> +  [(set_attr "type" "neon_shift_imm_long")]
> +)
> +
> +(define_insn "aarch64_<sur>shll2<mode>_internal"
> +  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> +     (unspec:<VWIDE> [(vec_select:<VHALF>
> +                         (match_operand:VQW 1 "register_operand" "w")
> +                         (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
> +                      (match_operand:SI 3
> +                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +                      VSHLL))]
> +  "TARGET_SIMD"
> +  {
> +    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> +      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> +    else
> +      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> +  }
> +  [(set_attr "type" "neon_shift_imm_long")]
> +)
> +
>  (define_insn "aarch64_<sur>shll_n<mode>"
>    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
>       (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c 
> b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..23ed93d1dcbc3ca559efa6708b4ed5855fb6a050
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c
> @@ -0,0 +1,60 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -save-temps" } */
> +#include <stdint.h>
> +#include <string.h>
> +

SVE targets will need a:

#pragma GCC target "+nosve"

here, since we'll generate different code for SVE.

> +#define ARR_SIZE 1024
> +
> +/* Should produce an shll,shll2 pair*/
> +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 16;
> +        foo[i+2] = a[i+2] << 16;
> +        foo[i+3] = a[i+3] << 16;
> +    }
> +}
> +
> +__attribute__((optimize (0)))
> +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 16;
> +        foo[i+2] = a[i+2] << 16;
> +        foo[i+3] = a[i+3] << 16;
> +    }
> +}
> +
> +
> +void __attribute__((optimize (0)))
> +init(uint16_t *a, uint16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE;i++)
> +    {
> +      a[i] = i;
> +      b[i] = 2*i;
> +    }
> +}
> +
> +int __attribute__((optimize (0)))
> +main()
> +{
> +    uint32_t foo_arr[ARR_SIZE];
> +    uint32_t bar_arr[ARR_SIZE];
> +    uint16_t a[ARR_SIZE];
> +    uint16_t b[ARR_SIZE];
> +
> +    init(a, b);
> +    sshll_opt(foo_arr, a, b);
> +    sshll_nonopt(bar_arr, a, b);
> +    if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0)
> +      return 1;
> +    return 0;
> +}
> +
> +/* { dg-final { scan-assembler-times "shll\t" 1} } */
> +/* { dg-final { scan-assembler-times "shll2\t" 1} } */

Very minor nit, sorry, but I think:

/* { dg-final { scan-assembler-times {\tshll\t} 1 } } */

would be better.  Using "…\t" works, but IIRC it shows up as a tab
character in the testsuite result summary too.

> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> f12fd158b13656ee24022ec7e445c53444be6554..1f40b59c0560eec675af1d9a0e3e818d47589de6
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -4934,8 +4934,13 @@ vectorizable_conversion (vec_info *vinfo,
>                        &vec_oprnds1);
>        if (code == WIDEN_LSHIFT_EXPR)
>       {
> -       vec_oprnds1.create (ncopies * ninputs);
> -       for (i = 0; i < ncopies * ninputs; ++i)
> +       int oprnds_size = ncopies * ninputs;
> +       /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here
> +        * should be obtained by the the size of vec_oprnds0.  */

This is redundant given Richard's comment, but FWIW, GCC style
is to indent without “*”s, so:

          /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here
             should be obtained by the the size of vec_oprnds0.  */

OK for the aarch64 bits with the testsuite changes above.

Thanks,
Richard

> +       if (slp_node)
> +         oprnds_size = vec_oprnds0.length ();
> +       vec_oprnds1.create (oprnds_size);
> +       for (i = 0; i < oprnds_size; ++i)
>           vec_oprnds1.quick_push (op1);
>       }
>        /* Arguments are ready.  Create the new vector stmts.  */

Reply via email to