https://gcc.gnu.org/g:89b2c7dc96c4944c306131b665a4738a8a99413e
commit r15-6393-g89b2c7dc96c4944c306131b665a4738a8a99413e Author: Tamar Christina <tamar.christ...@arm.com> Date: Fri Dec 20 14:34:32 2024 +0000 AArch64: Implement vector concat of partial SVE vectors [PR96342] This patch adds support for vector constructor from two partial SVE vectors into a full SVE vector. It also implements support for the standard vec_init obtab to do this. gcc/ChangeLog: PR target/96342 * config/aarch64/aarch64-protos.h (aarch64_sve_expand_vector_init_subvector): New. * config/aarch64/aarch64-sve.md (vec_init<mode><Vhalf>): New. (@aarch64_pack_partial<mode>): New. * config/aarch64/aarch64.cc (aarch64_sve_expand_vector_init_subvector): New. * config/aarch64/iterators.md (SVE_NO2E): New. (VHALF, Vhalf): Add SVE partial vectors. gcc/testsuite/ChangeLog: PR target/96342 * gcc.target/aarch64/vect-simd-clone-2.c: New test. Diff: --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-sve.md | 23 +++++++++++++++++++++ gcc/config/aarch64/aarch64.cc | 24 ++++++++++++++++++++++ gcc/config/aarch64/iterators.md | 20 ++++++++++++++++-- .../gcc.target/aarch64/vect-simd-clone-2.c | 13 ++++++++++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 7ab1316cf568..18764e407c13 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1028,6 +1028,7 @@ rtx aarch64_replace_reg_mode (rtx, machine_mode); void aarch64_split_sve_subreg_move (rtx, rtx, rtx); void aarch64_expand_prologue (void); void aarch64_expand_vector_init (rtx, rtx); +void aarch64_sve_expand_vector_init_subvector (rtx, rtx); void aarch64_sve_expand_vector_init (rtx, rtx); void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, const_tree, unsigned, bool = false); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a72ca2a500d3..6659bb4fcab3 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -2839,6 +2839,16 @@ } ) +(define_expand "vec_init<mode><Vhalf>" + [(match_operand:SVE_NO2E 0 "register_operand") + (match_operand 1 "")] + "TARGET_SVE" + { + aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]); + DONE; + } +) + ;; Shift an SVE vector left and insert a scalar into element 0. (define_insn "vec_shl_insert_<mode>" [(set (match_operand:SVE_FULL 0 "register_operand") @@ -9289,6 +9299,19 @@ "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" ) +;; Integer partial pack packing two partial SVE types into a single full SVE +;; type of the same element type. Use UZP1 on the wider type, which discards +;; the high part of each wide element. This allows to concat SVE partial types +;; into a wider vector. +(define_insn "@aarch64_pack_partial<mode>" + [(set (match_operand:SVE_NO2E 0 "register_operand" "=w") + (vec_concat:SVE_NO2E + (match_operand:<VHALF> 1 "register_operand" "w") + (match_operand:<VHALF> 2 "register_operand" "w")))] + "TARGET_SVE" + "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>" +) + ;; ------------------------------------------------------------------------- ;; ---- [INT<-INT] Unpacks ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index de4c0a078391..41cc2eeec9a4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24870,6 +24870,30 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals) aarch64_sve_expand_vector_init_insert_elems (target, v, nelts); } +/* Initialize register TARGET from the two vector subelements in PARALLEL + rtx VALS. */ + +void +aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals) +{ + machine_mode mode = GET_MODE (target); + int nelts = XVECLEN (vals, 0); + + gcc_assert (nelts == 2); + + rtx arg0 = XVECEXP (vals, 0, 0); + rtx arg1 = XVECEXP (vals, 0, 1); + + /* If we have two elements and are concatting vector. */ + machine_mode elem_mode = GET_MODE (arg0); + gcc_assert (VECTOR_MODE_P (elem_mode)); + + arg0 = force_reg (elem_mode, arg0); + arg1 = force_reg (elem_mode, arg1); + emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1)); + return; +} + /* Check whether VALUE is a vector constant in which every element is either a power of 2 or a negated power of 2. If so, return a constant vector of log2s, and flip CODE between PLUS and MINUS diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 89c72b24aeb7..34200b05a3ab 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -140,6 +140,10 @@ ;; VQ without 2 element modes. (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF]) +;; SVE modes without 2 element modes. +(define_mode_iterator SVE_NO2E [VNx16QI VNx8QI VNx4QI VNx8HI VNx4HI VNx8HF + VNx4HF VNx8BF VNx4BF VNx4SI VNx4SF]) + ;; 2 element quad vector modes. (define_mode_iterator VQ_2E [V2DI V2DF]) @@ -1737,7 +1741,13 @@ (V2DI "DI") (V2SF "SF") (V4SF "V2SF") (V4HF "V2HF") (V8HF "V4HF") (V2DF "DF") - (V8BF "V4BF")]) + (V8BF "V4BF") + (VNx16QI "VNx8QI") (VNx8QI "VNx4QI") + (VNx4QI "VNx2QI") + (VNx8HI "VNx4HI") (VNx4HI "VNx2HI") + (VNx8HF "VNx4HF") (VNx4HF "VNx2HF") + (VNx8BF "VNx4BF") (VNx4BF "VNx2BF") + (VNx4SI "VNx2SI") (VNx4SF "VNx2SF")]) ;; Half modes of all vector modes, in lower-case. (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") @@ -1745,7 +1755,13 @@ (V8HF "v4hf") (V8BF "v4bf") (V2SI "si") (V4SI "v2si") (V2DI "di") (V2SF "sf") - (V4SF "v2sf") (V2DF "df")]) + (V4SF "v2sf") (V2DF "df") + (VNx16QI "vnx8qi") (VNx8QI "vnx4qi") + (VNx4QI "vnx2qi") + (VNx8HI "vnx4hi") (VNx4HI "vnx2hi") + (VNx8HF "vnx4hf") (VNx4HF "vnx2hf") + (VNx8BF "vnx4bf") (VNx4BF "vnx2bf") + (VNx4SI "vnx2si") (VNx4SF "vnx2sf")]) ;; Single-element half modes of quad vector modes. (define_mode_attr V1HALF [(V2DI "V1DI") (V2DF "V1DF")]) diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c new file mode 100644 index 000000000000..a25cae2708dd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c99" } */ +/* { dg-additional-options "-O3 -march=armv8-a" } */ + +#pragma GCC target ("+sve") +extern char __attribute__ ((simd, const)) fn3 (int, char); +void test_fn3 (int *a, int *b, char *c, int n) +{ + for (int i = 0; i < n; ++i) + a[i] = (int) (fn3 (b[i], c[i]) + c[i]); +} + +/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */