Tamar Christina <tamar.christ...@arm.com> writes: >> > + >> > +(define_constraint "D3" >> > + "@internal >> > + A constraint that matches vector of immediates that is with 0 to >> > +(bits(mode)/2)-1." >> > + (and (match_code "const,const_vector") >> > + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, >> > + (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)"))) >> >> Having this mapping for D2 and D3, with D2 corresponded to prec/2, kind-of >> makes D3 a false mnemonic. How about DL instead? (L for "left-shift long" >> or >> "low-part", take your pick) >> >> Looks good otherwise. >> > > Wasn't sure if this was an ok with changes or not, so here's the final patch 😊
I was hoping to have another look before it went in. But... > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? ...yeah, LGTM, thanks. Richard > Thanks, > Tamar > > gcc/ChangeLog: > > PR target/106346 > * config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>, > vec_widen_<sur>shiftl_hi_<mode>): Remove. > (aarch64_<sur>shll<mode>_internal): Renamed to... > (aarch64_<su>shll<mode>): .. This. > (aarch64_<sur>shll2<mode>_internal): Renamed to... > (aarch64_<su>shll2<mode>): .. This. > (aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new > optabs. > * config/aarch64/constraints.md (D2, DL): New. > * config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New. > > gcc/testsuite/ChangeLog: > > PR target/106346 > * gcc.target/aarch64/pr98772.c: Adjust assembly. > * gcc.target/aarch64/vect-widen-shift.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -6387,105 +6387,67 @@ (define_insn > "aarch64_<sur>q<r>shl<mode><vczle><vczbe>" > [(set_attr "type" "neon_sat_shift_reg<q>")] > ) > > -(define_expand "vec_widen_<sur>shiftl_lo_<mode>" > - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > - (match_operand:SI 2 > - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > - VSHLL))] > - "TARGET_SIMD" > - { > - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > - emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], > operands[1], > - p, operands[2])); > - DONE; > - } > -) > - > -(define_expand "vec_widen_<sur>shiftl_hi_<mode>" > - [(set (match_operand:<VWIDE> 0 "register_operand") > - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > - (match_operand:SI 2 > - "immediate_operand" "i")] > - VSHLL))] > - "TARGET_SIMD" > - { > - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > - emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], > operands[1], > - p, operands[2])); > - DONE; > - } > -) > - > ;; vshll_n > > -(define_insn "aarch64_<sur>shll<mode>_internal" > - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > - (unspec:<VWIDE> [(vec_select:<VHALF> > - (match_operand:VQW 1 "register_operand" "w") > - (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) > - (match_operand:SI 3 > - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > - VSHLL))] > +(define_insn "aarch64_<su>shll<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (ashift:<VWIDE> (ANY_EXTEND:<VWIDE> > + (match_operand:VD_BHSI 1 "register_operand")) > + (match_operand:<VWIDE> 2 > + "aarch64_simd_shll_imm_vec")))] > "TARGET_SIMD" > - { > - if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > - return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > - else > - return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > + {@ [cons: =0, 1, 2] > + [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2 > + [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2 > } > [(set_attr "type" "neon_shift_imm_long")] > ) > > -(define_insn "aarch64_<sur>shll2<mode>_internal" > - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > - (unspec:<VWIDE> [(vec_select:<VHALF> > - (match_operand:VQW 1 "register_operand" "w") > - (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) > - (match_operand:SI 3 > - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > +(define_expand "aarch64_<sur>shll_n<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand") > + (match_operand:SI 2 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>")] > VSHLL))] > "TARGET_SIMD" > { > - if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > - return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > - else > - return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > + rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]); > + emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft)); > + DONE; > } > - [(set_attr "type" "neon_shift_imm_long")] > ) > > -(define_insn "aarch64_<sur>shll_n<mode>" > - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > - (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") > - (match_operand:SI 2 > - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > - VSHLL))] > +;; vshll_high_n > + > +(define_insn "aarch64_<su>shll2<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (ashift:<VWIDE> (ANY_EXTEND:<VWIDE> > + (vec_select:<VHALF> > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "vect_par_cnst_hi_half"))) > + (match_operand:<VWIDE> 3 > + "aarch64_simd_shll_imm_vec")))] > "TARGET_SIMD" > - { > - if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > - return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; > - else > - return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; > + {@ [cons: =0, 1, 2, 3] > + [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3 > + [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3 > } > [(set_attr "type" "neon_shift_imm_long")] > ) > > -;; vshll_high_n > - > -(define_insn "aarch64_<sur>shll2_n<mode>" > - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > - (match_operand:SI 2 "immediate_operand" "i")] > - VSHLL))] > +(define_expand "aarch64_<sur>shll2_n<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand") > + (match_operand:SI 2 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>")] > + VSHLL))] > "TARGET_SIMD" > { > - if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > - return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; > - else > - return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; > + rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]); > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, > shft)); > + DONE; > } > - [(set_attr "type" "neon_shift_imm_long")] > ) > > ;; vrshr_n > diff --git a/gcc/config/aarch64/constraints.md > b/gcc/config/aarch64/constraints.md > index > 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90 > 100644 > --- a/gcc/config/aarch64/constraints.md > +++ b/gcc/config/aarch64/constraints.md > @@ -468,6 +468,20 @@ (define_constraint "D1" > GET_MODE_UNIT_BITSIZE (mode) - 1, > GET_MODE_UNIT_BITSIZE (mode) - 1)"))) > > +(define_constraint "D2" > + "@internal > + A constraint that matches vector of immediates that is bits(mode)/2." > + (and (match_code "const,const_vector") > + (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)"))) > + > +(define_constraint "DL" > + "@internal > + A constraint that matches vector of immediates for left shift long. > + That is immediates between 0 to (bits(mode)/2)-1." > + (and (match_code "const,const_vector") > + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, > + (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)"))) > + > (define_constraint "Dr" > "@internal > A constraint that matches vector of immediates for right shifts." > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index > d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82 > 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec" > HOST_WIDE_INT_1U > << (GET_MODE_UNIT_BITSIZE (mode) / 2 - 1))"))) > > +(define_predicate "aarch64_simd_shll_imm_vec" > + (and (match_code "const_vector") > + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, > + GET_MODE_UNIT_BITSIZE (mode) / 2)"))) > + > (define_predicate "aarch64_simd_shift_imm_bitsize_qi" > (and (match_code "const_int") > (match_test "IN_RANGE (INTVAL (op), 0, 8)"))) > diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c > b/gcc/testsuite/gcc.target/aarch64/pr98772.c > index > 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/pr98772.c > +++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c > @@ -155,4 +155,4 @@ int main () > /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */ > /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */ > /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */ > -/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */ > +/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c > b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c > @@ -0,0 +1,50 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -save-temps" } */ > +/* { dg-final { check-function-bodies "**" "" "" } } */ > +#include <stdint.h> > +#include <string.h> > + > +#pragma GCC target "+nosve" > + > +#define ARR_SIZE 1024 > + > +/* Should produce an shll,shll2 pair*/ > +/* > +** sshll_opt1: > +** ... > +** shll v[0-9]+.4s, v[0-9]+.4h, 16 > +** shll2 v[0-9]+.4s, v[0-9]+.8h, 16 > +** ... > +*/ > +void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 16; > + foo[i+2] = a[i+2] << 16; > + foo[i+3] = a[i+3] << 16; > + } > +} > + > +/* > +** sshll_opt2: > +** ... > +** sxtl v[0-9]+.4s, v[0-9]+.4h > +** sxtl2 v[0-9]+.4s, v[0-9]+.8h > +** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s > +** ... > +*/ > +void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 15; > + foo[i+2] = a[i+2] << 14; > + foo[i+3] = a[i+3] << 17; > + } > +} > + > +