> -----Original Message----- > From: Richard Sandiford <[email protected]> > Sent: Thursday, December 19, 2024 11:03 AM > To: Tamar Christina <[email protected]> > Cc: [email protected]; nd <[email protected]>; Richard Earnshaw > <[email protected]>; [email protected] > Subject: Re: [PATCH 7/7]AArch64: Implement vector concat of partial SVE > vectors > > Tamar Christina <[email protected]> writes: > >> > ;; 2 element quad vector modes. > >> > (define_mode_iterator VQ_2E [V2DI V2DF]) > >> > > >> > @@ -1678,7 +1686,15 @@ (define_mode_attr VHALF [(V8QI "V4QI") > (V16QI > >> "V8QI") > >> > (V2DI "DI") (V2SF "SF") > >> > (V4SF "V2SF") (V4HF "V2HF") > >> > (V8HF "V4HF") (V2DF "DF") > >> > - (V8BF "V4BF")]) > >> > + (V8BF "V4BF") > >> > + (VNx16QI "VNx8QI") (VNx8QI "VNx4QI") > >> > + (VNx4QI "VNx2QI") (VNx2QI "QI") > >> > + (VNx8HI "VNx4HI") (VNx4HI "VNx2HI") (VNx2HI > >> > "HI") > >> > + (VNx8HF "VNx4HF") (VNx4HF "VNx2HF") (VNx2HF > >> > "HF") > >> > + (VNx8BF "VNx4BF") (VNx4BF "VNx2BF") (VNx2BF > >> > "BF") > >> > + (VNx4SI "VNx2SI") (VNx2SI "SI") > >> > + (VNx4SF "VNx2SF") (VNx2SF "SF") > >> > + (VNx2DI "DI") (VNx2DF "DF")]) > >> > >> Are the x2 entries necessary, given that the new uses are restricted > >> to NO2E? > >> > > > > No, but I wanted to keep the symmetry with the Adv. SIMD modes. Since the > > mode attributes don't really control the number of alternatives I thought it > would > > be better to have the attributes be "fully" defined rather than only the > > subset I > use. > > But these are variable-length modes, so DI is only half of VNx2DI for > the minimum vector length. It's less than half for Neoverse V1 or A64FX. > > IMO it'd be better to leave them out for now and defined them when needed, > at which point the right choice would be more obvious. >
OK.
gcc/ChangeLog:
PR target/96342
* config/aarch64/aarch64-sve.md (vec_init<mode><Vhalf>): New.
(@aarch64_pack_partial<mode>): New.
* config/aarch64/aarch64.cc (aarch64_sve_expand_vector_init_subvector):
New.
* config/aarch64/iterators.md (SVE_NO2E): New.
(VHALF, Vhalf): Add SVE partial vectors.
gcc/testsuite/ChangeLog:
PR target/96342
* gcc.target/aarch64/vect-simd-clone-2.c: New test.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
-- inline copy of patch --
diff --git a/gcc/config/aarch64/aarch64-sve.md
b/gcc/config/aarch64/aarch64-sve.md
index
a72ca2a500d394598268c6adfe717eed94a304b3..8ed4221dbe5c49db97b37f186365fa391900eadb
100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2839,6 +2839,16 @@ (define_expand "vec_init<mode><Vel>"
}
)
+(define_expand "vec_init<mode><Vhalf>"
+ [(match_operand:SVE_NO2E 0 "register_operand")
+ (match_operand 1 "")]
+ "TARGET_SVE"
+ {
+ aarch64_sve_expand_vector_init (operands[0], operands[1]);
+ DONE;
+ }
+)
+
;; Shift an SVE vector left and insert a scalar into element 0.
(define_insn "vec_shl_insert_<mode>"
[(set (match_operand:SVE_FULL 0 "register_operand")
@@ -9289,6 +9299,19 @@ (define_insn "vec_pack_trunc_<Vwide>"
"uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; Integer partial pack packing two partial SVE types into a single full SVE
+;; type of the same element type. Use UZP1 on the wider type, which discards
+;; the high part of each wide element. This allows to concat SVE partial types
+;; into a wider vector.
+(define_insn "@aarch64_pack_partial<mode>"
+ [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
+ (vec_concat:SVE_NO2E
+ (match_operand:<VHALF> 1 "register_operand" "w")
+ (match_operand:<VHALF> 2 "register_operand" "w")))]
+ "TARGET_SVE"
+ "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
+)
+
;; -------------------------------------------------------------------------
;; ---- [INT<-INT] Unpacks
;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index
de4c0a0783912b54ac35d7c818c24574b27a4ca0..40214e318f3c4e30e619d96073b253887c973efc
100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -24859,6 +24859,17 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
v.quick_push (XVECEXP (vals, 0, i));
v.finalize ();
+ /* If we have two elements and are concatting vector. */
+ machine_mode elem_mode = GET_MODE (v.elt (0));
+ if (nelts == 2 && VECTOR_MODE_P (elem_mode))
+ {
+ /* We've failed expansion using a dup. Try using a cheeky truncate. */
+ rtx arg0 = force_reg (elem_mode, v.elt(0));
+ rtx arg1 = force_reg (elem_mode, v.elt(1));
+ emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
+ return;
+ }
+
/* If neither sub-vectors of v could be initialized specially,
then use INSR to insert all elements from v into TARGET.
??? This might not be optimal for vectors with large
@@ -24870,6 +24881,30 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
}
+/* Initialize register TARGET from the two vector subelements in PARALLEL
+ rtx VALS. */
+
+void
+aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals)
+{
+ machine_mode mode = GET_MODE (target);
+ int nelts = XVECLEN (vals, 0);
+
+ gcc_assert (nelts == 2);
+
+ rtx arg0 = XVECEXP (vals, 0, 0);
+ rtx arg1 = XVECEXP (vals, 0, 1);
+
+ /* If we have two elements and are concatting vector. */
+ machine_mode elem_mode = GET_MODE (arg0);
+ gcc_assert (VECTOR_MODE_P (elem_mode));
+
+ arg0 = force_reg (elem_mode, arg0);
+ arg1 = force_reg (elem_mode, arg1);
+ emit_insn (gen_aarch64_pack_partial (mode, target, arg0, arg1));
+ return;
+}
+
/* Check whether VALUE is a vector constant in which every element
is either a power of 2 or a negated power of 2. If so, return
a constant vector of log2s, and flip CODE between PLUS and MINUS
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index
89c72b24aeb791adbbd3edfdb131478d52b248e6..34200b05a3abf6d51919313de1027aa4988bcb8d
100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -140,6 +140,10 @@ (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
;; VQ without 2 element modes.
(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF])
+;; SVE modes without 2 element modes.
+(define_mode_iterator SVE_NO2E [VNx16QI VNx8QI VNx4QI VNx8HI VNx4HI VNx8HF
+ VNx4HF VNx8BF VNx4BF VNx4SI VNx4SF])
+
;; 2 element quad vector modes.
(define_mode_iterator VQ_2E [V2DI V2DF])
@@ -1737,7 +1741,13 @@ (define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI")
(V2DI "DI") (V2SF "SF")
(V4SF "V2SF") (V4HF "V2HF")
(V8HF "V4HF") (V2DF "DF")
- (V8BF "V4BF")])
+ (V8BF "V4BF")
+ (VNx16QI "VNx8QI") (VNx8QI "VNx4QI")
+ (VNx4QI "VNx2QI")
+ (VNx8HI "VNx4HI") (VNx4HI "VNx2HI")
+ (VNx8HF "VNx4HF") (VNx4HF "VNx2HF")
+ (VNx8BF "VNx4BF") (VNx4BF "VNx2BF")
+ (VNx4SI "VNx2SI") (VNx4SF "VNx2SF")])
;; Half modes of all vector modes, in lower-case.
(define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi")
@@ -1745,7 +1755,13 @@ (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi")
(V8HF "v4hf") (V8BF "v4bf")
(V2SI "si") (V4SI "v2si")
(V2DI "di") (V2SF "sf")
- (V4SF "v2sf") (V2DF "df")])
+ (V4SF "v2sf") (V2DF "df")
+ (VNx16QI "vnx8qi") (VNx8QI "vnx4qi")
+ (VNx4QI "vnx2qi")
+ (VNx8HI "vnx4hi") (VNx4HI "vnx2hi")
+ (VNx8HF "vnx4hf") (VNx4HF "vnx2hf")
+ (VNx8BF "vnx4bf") (VNx4BF "vnx2bf")
+ (VNx4SI "vnx2si") (VNx4SF "vnx2sf")])
;; Single-element half modes of quad vector modes.
(define_mode_attr V1HALF [(V2DI "V1DI") (V2DF "V1DF")])
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
new file mode 100644
index
0000000000000000000000000000000000000000..a25cae2708dd18cc91a7732f845419bbdb06c5c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+#pragma GCC target ("+sve")
+extern char __attribute__ ((simd, const)) fn3 (int, char);
+void test_fn3 (int *a, int *b, char *c, int n)
+{
+ for (int i = 0; i < n; ++i)
+ a[i] = (int) (fn3 (b[i], c[i]) + c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxvv_fn3\n} } } */
rb19031.patch
Description: rb19031.patch
