store_bit_field_1 has an optimization where if a target is not a memory operand
and the entire value is being set from something larger we can just wrap a
subreg around the source and emit a move.

For vector constructors this is however problematic because the subreg means
that the expansion of the constructor won't happen through vec_init anymore.

Complicated constructors which aren't natively supported by targets then ICE as
they wouldn't have been expanded so recog fails.

This patch blocks the optimization on non-constant vector constructors. Or 
non-uniform
non-constant vectors. I allowed constant vectors because if I read the code 
right
simplify-rtx should be able to perform the simplification of pulling out the 
element
or merging the constant values.  There are several testcases in 
aarch64-sve-pcs.exp
that test this as well. I allowed uniform non-constant vectors because they
would be folded into a vec_select later on.

Note that codegen is quite horrible, for what should only be an lsr.  But I'll
address that separately so that this patch is backportable.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master? and GCC 15, 14, 13?

Thanks,
Tamar


gcc/ChangeLog:

        PR target/120718
        * expmed.cc (store_bit_field_1): Only push subreg over uniform vector
        constructors.
        (foldable_value_with_subreg): New.

gcc/testsuite/ChangeLog:

        PR target/120718
        * gcc.target/aarch64/sve/pr120718.c: New test.

---
diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index 
be427dca5d9afeed2013954472dde3a5430169e0..a468aa5c0c3f20bd62a7afc1d245d64e87be5396
 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -740,6 +740,28 @@ store_bit_field_using_insv (const extraction_insn *insv, 
rtx op0,
   return false;
 }
 
+/* For non-constant vectors wrapping a subreg around the RTX will not make
+   the expression expand properly through vec_init.  For constant vectors
+   we can because simplification can just extract the element out by
+   by merging the values.  This can be done by simplify-rtx and so the
+   subreg will be eliminated.  However poly constants require vec_init as
+   they are a runtime value.  So only allow the subreg for simple integer
+   or floating point constants.  */
+
+static bool
+foldable_value_with_subreg (rtx value)
+{
+  if (GET_CODE (value) != CONST_VECTOR || const_vec_duplicate_p (value))
+    return true;
+
+  for (unsigned i = 0; i < const_vector_encoded_nelts (value); i++)
+    if (!CONST_INT_P (const_vector_elt (value, i))
+       && !CONST_DOUBLE_P (const_vector_elt (value, i)))
+      return false;
+
+  return true;
+}
+
 /* A subroutine of store_bit_field, with the same arguments.  Return true
    if the operation could be implemented.
 
@@ -795,7 +817,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
poly_uint64 bitnum,
   /* If the target is a register, overwriting the entire object, or storing
      a full-word or multi-word field can be done with just a SUBREG.  */
   if (!MEM_P (op0)
-      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
+      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
+      && foldable_value_with_subreg (value))
     {
       /* Use the subreg machinery either to narrow OP0 to the required
         words or to cope with mode punning between equal-sized modes.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
new file mode 100644
index 
0000000000000000000000000000000000000000..cb21d94792f0679a48cc20c3dcdf78c89c05a5c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+#include <arm_sve.h>
+typedef int __attribute__((vector_size(8))) v2si;
+typedef struct { int x; int y; } A;
+void bar(A a);
+void foo()
+{
+    A a;
+    *(v2si *)&a = (v2si){0, (int)svcntd_pat(SV_ALL)};
+    bar(a);
+}


-- 
diff --git a/gcc/expmed.cc b/gcc/expmed.cc
index be427dca5d9afeed2013954472dde3a5430169e0..a468aa5c0c3f20bd62a7afc1d245d64e87be5396 100644
--- a/gcc/expmed.cc
+++ b/gcc/expmed.cc
@@ -740,6 +740,28 @@ store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
   return false;
 }
 
+/* For non-constant vectors wrapping a subreg around the RTX will not make
+   the expression expand properly through vec_init.  For constant vectors
+   we can because simplification can just extract the element out by
+   by merging the values.  This can be done by simplify-rtx and so the
+   subreg will be eliminated.  However poly constants require vec_init as
+   they are a runtime value.  So only allow the subreg for simple integer
+   or floating point constants.  */
+
+static bool
+foldable_value_with_subreg (rtx value)
+{
+  if (GET_CODE (value) != CONST_VECTOR || const_vec_duplicate_p (value))
+    return true;
+
+  for (unsigned i = 0; i < const_vector_encoded_nelts (value); i++)
+    if (!CONST_INT_P (const_vector_elt (value, i))
+	&& !CONST_DOUBLE_P (const_vector_elt (value, i)))
+      return false;
+
+  return true;
+}
+
 /* A subroutine of store_bit_field, with the same arguments.  Return true
    if the operation could be implemented.
 
@@ -795,7 +817,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
   /* If the target is a register, overwriting the entire object, or storing
      a full-word or multi-word field can be done with just a SUBREG.  */
   if (!MEM_P (op0)
-      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode)))
+      && known_eq (bitsize, GET_MODE_BITSIZE (fieldmode))
+      && foldable_value_with_subreg (value))
     {
       /* Use the subreg machinery either to narrow OP0 to the required
 	 words or to cope with mode punning between equal-sized modes.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
new file mode 100644
index 0000000000000000000000000000000000000000..cb21d94792f0679a48cc20c3dcdf78c89c05a5c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr120718.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+#include <arm_sve.h>
+typedef int __attribute__((vector_size(8))) v2si;
+typedef struct { int x; int y; } A;
+void bar(A a);
+void foo()
+{
+    A a;
+    *(v2si *)&a = (v2si){0, (int)svcntd_pat(SV_ALL)};
+    bar(a);
+}

Reply via email to