Tamar Christina <tamar.christ...@arm.com> writes:
>> > +
>> > +(define_constraint "D3"
>> > +  "@internal
>> > + A constraint that matches vector of immediates that is with 0 to
>> > +(bits(mode)/2)-1."
>> > + (and (match_code "const,const_vector")
>> > +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
>> > +                  (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
>> 
>> Having this mapping for D2 and D3, with D2 corresponded to prec/2, kind-of
>> makes D3 a false mnemonic.  How about DL instead?  (L for "left-shift long" 
>> or
>> "low-part", take your pick)
>> 
>> Looks good otherwise.
>> 
>
> Wasn't sure if this was an ok with changes or not, so here's the final patch 😊

I was hoping to have another look before it went in.  But...

> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?

...yeah, LGTM, thanks.

Richard

> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>       PR target/106346
>       * config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>,
>       vec_widen_<sur>shiftl_hi_<mode>): Remove.
>       (aarch64_<sur>shll<mode>_internal): Renamed to...
>       (aarch64_<su>shll<mode>): .. This.
>       (aarch64_<sur>shll2<mode>_internal): Renamed to...
>       (aarch64_<su>shll2<mode>): .. This.
>       (aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new
>       optabs.
>       * config/aarch64/constraints.md (D2, DL): New.
>       * config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New.
>
> gcc/testsuite/ChangeLog:
>
>       PR target/106346
>       * gcc.target/aarch64/pr98772.c: Adjust assembly.
>       * gcc.target/aarch64/vect-widen-shift.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6387,105 +6387,67 @@ (define_insn 
> "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
>    [(set_attr "type" "neon_sat_shift_reg<q>")]
>  )
>  
> -(define_expand "vec_widen_<sur>shiftl_lo_<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -                      (match_operand:SI 2
> -                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -                      VSHLL))]
> -  "TARGET_SIMD"
> -  {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> -    emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], 
> operands[1],
> -                                                  p, operands[2]));
> -    DONE;
> -  }
> -)
> -
> -(define_expand "vec_widen_<sur>shiftl_hi_<mode>"
> -   [(set (match_operand:<VWIDE> 0 "register_operand")
> -     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -                      (match_operand:SI 2
> -                        "immediate_operand" "i")]
> -                       VSHLL))]
> -   "TARGET_SIMD"
> -   {
> -    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> -    emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], 
> operands[1],
> -                                                   p, operands[2]));
> -    DONE;
> -   }
> -)
> -
>  ;; vshll_n
>  
> -(define_insn "aarch64_<sur>shll<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -     (unspec:<VWIDE> [(vec_select:<VHALF>
> -                         (match_operand:VQW 1 "register_operand" "w")
> -                         (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
> -                      (match_operand:SI 3
> -                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -                      VSHLL))]
> +(define_insn "aarch64_<su>shll<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +     (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +                         (match_operand:VD_BHSI 1 "register_operand"))
> +                      (match_operand:<VWIDE> 2
> +                        "aarch64_simd_shll_imm_vec")))]
>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
> +  {@ [cons: =0, 1, 2]
> +     [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
> +     [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll2<mode>_internal"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -     (unspec:<VWIDE> [(vec_select:<VHALF>
> -                         (match_operand:VQW 1 "register_operand" "w")
> -                         (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
> -                      (match_operand:SI 3
> -                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> +(define_expand "aarch64_<sur>shll_n<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +     (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
> +                      (match_operand:SI 2
> +                        "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
>                        VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -(define_insn "aarch64_<sur>shll_n<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -     (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
> -                      (match_operand:SI 2
> -                        "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
> -                         VSHLL))]
> +;; vshll_high_n
> +
> +(define_insn "aarch64_<su>shll2<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +     (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
> +                       (vec_select:<VHALF>
> +                         (match_operand:VQW 1 "register_operand")
> +                         (match_operand:VQW 2 "vect_par_cnst_hi_half")))
> +                      (match_operand:<VWIDE> 3
> +                        "aarch64_simd_shll_imm_vec")))]
>    "TARGET_SIMD"
> -  {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +  {@ [cons: =0, 1, 2, 3]
> +     [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
> +     [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
>    }
>    [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
> -;; vshll_high_n
> -
> -(define_insn "aarch64_<sur>shll2_n<mode>"
> -  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> -     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
> -                      (match_operand:SI 2 "immediate_operand" "i")]
> -                         VSHLL))]
> +(define_expand "aarch64_<sur>shll2_n<mode>"
> +  [(set (match_operand:<VWIDE> 0 "register_operand")
> +     (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
> +                      (match_operand:SI 2
> +                        "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
> +                      VSHLL))]
>    "TARGET_SIMD"
>    {
> -    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
> -      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> -    else
> -      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
> +    rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
> +    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +    emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, 
> shft));
> +    DONE;
>    }
> -  [(set_attr "type" "neon_shift_imm_long")]
>  )
>  
>  ;; vrshr_n
> diff --git a/gcc/config/aarch64/constraints.md 
> b/gcc/config/aarch64/constraints.md
> index 
> 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90
>  100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -468,6 +468,20 @@ (define_constraint "D1"
>                       GET_MODE_UNIT_BITSIZE (mode) - 1,
>                       GET_MODE_UNIT_BITSIZE (mode) - 1)")))
>  
> +(define_constraint "D2"
> +  "@internal
> + A constraint that matches vector of immediates that is bits(mode)/2."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)")))
> +
> +(define_constraint "DL"
> +  "@internal
> + A constraint that matches vector of immediates for left shift long.
> + That is immediates between 0 to (bits(mode)/2)-1."
> + (and (match_code "const,const_vector")
> +      (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +                     (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)")))
> +
>  (define_constraint "Dr"
>    "@internal
>   A constraint that matches vector of immediates for right shifts."
> diff --git a/gcc/config/aarch64/predicates.md 
> b/gcc/config/aarch64/predicates.md
> index 
> d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82
>  100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec"
>                               HOST_WIDE_INT_1U
>                               << (GET_MODE_UNIT_BITSIZE  (mode) / 2 - 1))")))
>  
> +(define_predicate "aarch64_simd_shll_imm_vec"
> +  (and (match_code "const_vector")
> +       (match_test "aarch64_const_vec_all_same_in_range_p (op, 0,
> +                             GET_MODE_UNIT_BITSIZE (mode) / 2)")))
> +
>  (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
>    (and (match_code "const_int")
>         (match_test "IN_RANGE (INTVAL (op), 0, 8)")))
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c 
> b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> index 
> 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/pr98772.c
> +++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c
> @@ -155,4 +155,4 @@ int main ()
>  /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */
>  /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */
> -/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */
> +/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c 
> b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c
> @@ -0,0 +1,50 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -save-temps" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +#include <stdint.h>
> +#include <string.h>
> +
> +#pragma GCC target "+nosve"
> +
> +#define ARR_SIZE 1024
> +
> +/* Should produce an shll,shll2 pair*/
> +/*
> +** sshll_opt1:
> +**   ...
> +**   shll    v[0-9]+.4s, v[0-9]+.4h, 16
> +**   shll2   v[0-9]+.4s, v[0-9]+.8h, 16
> +**   ...
> +*/
> +void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 16;
> +        foo[i+2] = a[i+2] << 16;
> +        foo[i+3] = a[i+3] << 16;
> +    }
> +}
> +
> +/*
> +** sshll_opt2:
> +**   ...
> +**   sxtl    v[0-9]+.4s, v[0-9]+.4h
> +**   sxtl2   v[0-9]+.4s, v[0-9]+.8h
> +**   sshl    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +**   sshl    v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s
> +**   ...
> +*/
> +void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b)
> +{
> +    for( int i = 0; i < ARR_SIZE - 3;i=i+4)
> +    {
> +        foo[i]   = a[i]   << 16;
> +        foo[i+1] = a[i+1] << 15;
> +        foo[i+2] = a[i+2] << 14;
> +        foo[i+3] = a[i+3] << 17;
> +    }
> +}
> +
> +

Reply via email to