Jonathan Wright <jonathan.wri...@arm.com> writes: > Hi, > > The existing vec_pack_trunc RTL pattern emits an opaque two- > instruction assembly code sequence that prevents proper instruction > scheduling. This commit changes the pattern to an expander that emits > individual xtn and xtn2 instructions. > > This commit also consolidates the duplicate truncation patterns. > > Regression tested and bootstrapped on aarch64-none-linux-gnu - no > issues. > > Ok for master?
OK. Nice clean-up, thanks. Richard > Thanks, > Jonathan > > --- > > gcc/ChangeLog: > > 2021-05-17 Jonathan Wright <jonathan.wri...@arm.com> > > * config/aarch64/aarch64-simd.md (aarch64_simd_vec_pack_trunc_<mode>): > Remove as duplicate of... > (aarch64_xtn<mode>): This. > (aarch64_xtn2<mode>_le): Move position in file. > (aarch64_xtn2<mode>_be): Move position in file. > (aarch64_xtn2<mode>): Move position in file. > (vec_pack_trunc_<mode>): Define as an expander. > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > c67fa3fb6f0ca0a181a09a42677526d12e955c06..447b5575f2f5adbad4957e90787a4954af644b20 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -1691,14 +1691,51 @@ > ;; Narrowing operations. > > ;; For doubles. > -(define_insn "aarch64_simd_vec_pack_trunc_<mode>" > - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] > - "TARGET_SIMD" > - "xtn\\t%0.<Vntype>, %1.<Vtype>" > + > +(define_insn "aarch64_xtn<mode>" > + [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] > + "TARGET_SIMD" > + "xtn\\t%0.<Vntype>, %1.<Vtype>" > [(set_attr "type" "neon_shift_imm_narrow_q")] > ) > > +(define_insn "aarch64_xtn2<mode>_le" > + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > + (vec_concat:<VNARROWQ2> > + (match_operand:<VNARROWQ> 1 "register_operand" "0") > + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] > + "TARGET_SIMD && !BYTES_BIG_ENDIAN" > + "xtn2\t%0.<V2ntype>, %2.<Vtype>" > + [(set_attr "type" "neon_shift_imm_narrow_q")] > +) > + > +(define_insn "aarch64_xtn2<mode>_be" > + [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > + (vec_concat:<VNARROWQ2> > + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w")) > + (match_operand:<VNARROWQ> 1 "register_operand" "0")))] > + "TARGET_SIMD && BYTES_BIG_ENDIAN" > + "xtn2\t%0.<V2ntype>, %2.<Vtype>" > + [(set_attr "type" "neon_shift_imm_narrow_q")] > +) > + > +(define_expand "aarch64_xtn2<mode>" > + [(match_operand:<VNARROWQ2> 0 "register_operand") > + (match_operand:<VNARROWQ> 1 "register_operand") > + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))] > + "TARGET_SIMD" > + { > + if (BYTES_BIG_ENDIAN) > + emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1], > + operands[2])); > + else > + emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1], > + operands[2])); > + DONE; > + } > +) > + > (define_expand "vec_pack_trunc_<mode>" > [(match_operand:<VNARROWD> 0 "register_operand") > (match_operand:VDN 1 "register_operand") > @@ -1711,7 +1748,7 @@ > > emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); > emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); > - emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); > + emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg)); > DONE; > }) > > @@ -1901,20 +1938,25 @@ > > ;; For quads. > > -(define_insn "vec_pack_trunc_<mode>" > - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") > +(define_expand "vec_pack_trunc_<mode>" > + [(set (match_operand:<VNARROWQ2> 0 "register_operand") > (vec_concat:<VNARROWQ2> > - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) > - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] > + (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")) > + (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))] > "TARGET_SIMD" > { > + rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode); > + int lo = BYTES_BIG_ENDIAN ? 2 : 1; > + int hi = BYTES_BIG_ENDIAN ? 1 : 2; > + > + emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo])); > + > if (BYTES_BIG_ENDIAN) > - return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; > + emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, > operands[hi])); > else > - return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; > + emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, > operands[hi])); > + DONE; > } > - [(set_attr "type" "multiple") > - (set_attr "length" "8")] > ) > > ;; Widening operations. > @@ -8570,13 +8612,6 @@ > "" > ) > > -(define_expand "aarch64_xtn<mode>" > - [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > - (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] > - "TARGET_SIMD" > - "" > -) > - > ;; Truncate a 128-bit integer vector to a 64-bit vector. > (define_insn "trunc<mode><Vnarrowq>2" > [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") > @@ -8586,42 +8621,6 @@ > [(set_attr "type" "neon_shift_imm_narrow_q")] > ) > > -(define_insn "aarch64_xtn2<mode>_le" > - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > - (vec_concat:<VNARROWQ2> > - (match_operand:<VNARROWQ> 1 "register_operand" "0") > - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] > - "TARGET_SIMD && !BYTES_BIG_ENDIAN" > - "xtn2\t%0.<V2ntype>, %2.<Vtype>" > - [(set_attr "type" "neon_shift_imm_narrow_q")] > -) > - > -(define_insn "aarch64_xtn2<mode>_be" > - [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") > - (vec_concat:<VNARROWQ2> > - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w")) > - (match_operand:<VNARROWQ> 1 "register_operand" "0")))] > - "TARGET_SIMD && BYTES_BIG_ENDIAN" > - "xtn2\t%0.<V2ntype>, %2.<Vtype>" > - [(set_attr "type" "neon_shift_imm_narrow_q")] > -) > - > -(define_expand "aarch64_xtn2<mode>" > - [(match_operand:<VNARROWQ2> 0 "register_operand") > - (match_operand:<VNARROWQ> 1 "register_operand") > - (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))] > - "TARGET_SIMD" > - { > - if (BYTES_BIG_ENDIAN) > - emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1], > - operands[2])); > - else > - emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1], > - operands[2])); > - DONE; > - } > -) > - > (define_insn "aarch64_bfdot<mode>" > [(set (match_operand:VDQSF 0 "register_operand" "=w") > (plus:VDQSF