https://gcc.gnu.org/g:89b2c7dc96c4944c306131b665a4738a8a99413e

commit r15-6393-g89b2c7dc96c4944c306131b665a4738a8a99413e
Author: Tamar Christina <tamar.christ...@arm.com>
Date:   Fri Dec 20 14:34:32 2024 +0000

    AArch64: Implement vector concat of partial SVE vectors [PR96342]
    
    This patch adds support for vector constructor from two partial SVE vectors 
into
    a full SVE vector. It also implements support for the standard vec_init 
obtab to
    do this.
    
    gcc/ChangeLog:
    
            PR target/96342
            * config/aarch64/aarch64-protos.h
            (aarch64_sve_expand_vector_init_subvector): New.
            * config/aarch64/aarch64-sve.md (vec_init<mode><Vhalf>): New.
            (@aarch64_pack_partial<mode>): New.
            * config/aarch64/aarch64.cc 
(aarch64_sve_expand_vector_init_subvector): New.
            * config/aarch64/iterators.md (SVE_NO2E): New.
            (VHALF, Vhalf): Add SVE partial vectors.
    
    gcc/testsuite/ChangeLog:
    
            PR target/96342
            * gcc.target/aarch64/vect-simd-clone-2.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-protos.h                |  1 +
 gcc/config/aarch64/aarch64-sve.md                  | 23 +++++++++++++++++++++
 gcc/config/aarch64/aarch64.cc                      | 24 ++++++++++++++++++++++
 gcc/config/aarch64/iterators.md                    | 20 ++++++++++++++++--
 .../gcc.target/aarch64/vect-simd-clone-2.c         | 13 ++++++++++++
 5 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 7ab1316cf568..18764e407c13 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1028,6 +1028,7 @@ rtx aarch64_replace_reg_mode (rtx, machine_mode);
 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
 void aarch64_expand_prologue (void);
 void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
 void aarch64_sve_expand_vector_init (rtx, rtx);
 void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
                                   const_tree, unsigned, bool = false);
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index a72ca2a500d3..6659bb4fcab3 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2839,6 +2839,16 @@
   }
 )
 
+(define_expand "vec_init<mode><Vhalf>"
+  [(match_operand:SVE_NO2E 0 "register_operand")
+   (match_operand 1 "")]
+  "TARGET_SVE"
+  {
+    aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand")
@@ -9289,6 +9299,19 @@
   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
+;; Integer partial pack packing two partial SVE types into a single full SVE
+;; type of the same element type.  Use UZP1 on the wider type, which discards
+;; the high part of each wide element.  This allows to concat SVE partial types
+;; into a wider vector.
+(define_insn "@aarch64_pack_partial<mode>"
+  [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
+       (vec_concat:SVE_NO2E
+         (match_operand:<VHALF> 1 "register_operand" "w")
+         (match_operand:<VHALF> 2 "register_operand" "w")))]
+  "TARGET_SVE"
+  "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT<-INT] Unpacks
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index de4c0a078391..41cc2eeec9a4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24870,6 +24870,30 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
     aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
 }
 
+/* Initialize register TARGET from the two vector subelements in PARALLEL
+   rtx VALS.  */
+
+void
+aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals)
+{
+  machine_mode mode = GET_MODE (target);
+  int nelts = XVECLEN (vals, 0);
+
+  gcc_assert (nelts == 2);
+
+  rtx arg0 = XVECEXP (vals, 0, 0);
+  rtx arg1 = XVECEXP (vals, 0, 1);
+
+  /* If we have two elements and are concatting vector.  */
+  machine_mode elem_mode = GET_MODE (arg0);
+  gcc_assert (VECTOR_MODE_P (elem_mode));
+
+  arg0 = force_reg (elem_mode, arg0);
+  arg1 = force_reg (elem_mode, arg1);
+  emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
+  return;
+}
+
 /* Check whether VALUE is a vector constant in which every element
    is either a power of 2 or a negated power of 2.  If so, return
    a constant vector of log2s, and flip CODE between PLUS and MINUS
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 89c72b24aeb7..34200b05a3ab 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -140,6 +140,10 @@
 ;; VQ without 2 element modes.
 (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF])
 
+;; SVE modes without 2 element modes.
+(define_mode_iterator SVE_NO2E [VNx16QI VNx8QI VNx4QI VNx8HI VNx4HI VNx8HF
+                               VNx4HF VNx8BF VNx4BF VNx4SI VNx4SF])
+
 ;; 2 element quad vector modes.
 (define_mode_iterator VQ_2E [V2DI V2DF])
 
@@ -1737,7 +1741,13 @@
                         (V2DI "DI")    (V2SF  "SF")
                         (V4SF "V2SF")  (V4HF "V2HF")
                         (V8HF "V4HF")  (V2DF  "DF")
-                        (V8BF "V4BF")])
+                        (V8BF "V4BF")
+                        (VNx16QI "VNx8QI") (VNx8QI "VNx4QI")
+                        (VNx4QI "VNx2QI")
+                        (VNx8HI "VNx4HI")  (VNx4HI "VNx2HI")
+                        (VNx8HF "VNx4HF")  (VNx4HF "VNx2HF")
+                        (VNx8BF "VNx4BF")  (VNx4BF "VNx2BF")
+                        (VNx4SI "VNx2SI")  (VNx4SF "VNx2SF")])
 
 ;; Half modes of all vector modes, in lower-case.
 (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
@@ -1745,7 +1755,13 @@
                         (V8HF  "v4hf") (V8BF  "v4bf")
                         (V2SI "si")    (V4SI  "v2si")
                         (V2DI "di")    (V2SF  "sf")
-                        (V4SF "v2sf")  (V2DF  "df")])
+                        (V4SF "v2sf")  (V2DF  "df")
+                        (VNx16QI "vnx8qi") (VNx8QI "vnx4qi")
+                        (VNx4QI "vnx2qi")
+                        (VNx8HI "vnx4hi")  (VNx4HI "vnx2hi")
+                        (VNx8HF "vnx4hf")  (VNx4HF "vnx2hf")
+                        (VNx8BF "vnx4bf")  (VNx4BF "vnx2bf")
+                        (VNx4SI "vnx2si")  (VNx4SF "vnx2sf")])
 
 ;; Single-element half modes of quad vector modes.
 (define_mode_attr V1HALF [(V2DI "V1DI")  (V2DF  "V1DF")])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c 
b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
new file mode 100644
index 000000000000..a25cae2708dd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+#pragma GCC target ("+sve")
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */

Reply via email to