Currently vec_init does not support VLA vec_init and we instead fall back to
storing piecewise through memory.

However there's no defined semantics for this.  This patch adds the semantics
that for VLA constructors the vector has to be cleared with zero before
piecewise being constructed from scalar elements.  This means unspecified
elements are initialized to zero.

Without this patch

#include <arm_sve.h>

svint32_t __attribute__ ((noipa))
func_init4 (int32_t a, int32_t b, int32_t c)
{
  svint32_t temp = {a, b, c};
  return temp;
}

compiles to:

func_init4:
        addvl   sp, sp, #-3
        movi    d30, #0
        str     z30, [sp, #2, mul vl]
        addvl   x3, sp, #2
        str     w0, [x3]
        addvl   x0, sp, #1
        add     x0, x0, 4
        ldr     z31, [sp, #2, mul vl]
        str     z31, [sp, #1, mul vl]
        str     w1, [x0]
        ldr     z31, [sp, #1, mul vl]
        str     z31, [sp]
        str     w2, [sp, 8]
        ldr     z0, [sp]
        addvl   sp, sp, #3
        ret

and with the patch

func_init4:
        fmov    s0, w2
        fmov    s0, s0
        insr    z0.s, w1
        insr    z0.s, w0
        ret

note that this is still not optimal as the

        fmov    s0, s0

that's doing the zero-ing of the vector is not actually needed since the
transfer instruction

        fmov    s0, w2

already zeros the destination SVE register.  But this is an AArch64 deficiency
that will be dealt with in the backend.

the optimal codegen here is:

 func_init4:
        orr     x1, x1, x2, lsl 32
        fmov    d0, x1
        insr    z0.s, w0
        ret

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

Co-Authored-By: Chris Bazley <[email protected]>

gcc/ChangeLog:

        * expr.cc (store_constructor): Handle VLA vec_init support and generic
        fall through piecewise copy.
        * doc/md.texi: Document change

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/copsi.c: New test.

---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7552,7 +7552,9 @@ Initialize the vector to given values.  Operand 0 is the 
vector to initialize
 and operand 1 is parallel containing values for individual fields.  The
 @var{n} mode is the mode of the elements, should be either element mode of
 the vector mode @var{m}, or a vector mode with the same element mode and
-smaller number of elements.
+smaller number of elements.  If @var{m} specifies a scalable vector mode,
+then operand 1 only specifies the minimum number of elements implied
+by @var{m} and elements beyond are zero initialized.
 
 @mdindex vec_duplicate@var{m}
 @item @samp{vec_duplicate@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 
de73215ccc6623fa90f4a90212fd8dc7c50991a9..373bec1322e7c554cbb314aed923abd7c3267ad8
 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@ fields_length (const_tree type)
   return count;
 }
 
-
 /* Store the value of constructor EXP into the rtx TARGET.
    TARGET is either a REG or a MEM; we know it cannot conflict, since
    safe_from_p has been called.
    CLEARED is true if TARGET is known to have been zero'd.
+   If the constructor EXP has a vector type then elements of TARGET for which
+   there is no corresponding element in EXP are zero'd.  For a variable-length
+   vector type, only elements up to the minimum number of subparts of the type
+   are explicitly zero'd; any elements beyond that are implicitly zero.
    SIZE is the number of bytes of TARGET we are allowed to modify: this
    may not be the same as the size of EXP if we are assigning to a field
    which has been packed to exclude padding bits.
@@ -8075,13 +8078,20 @@ store_constructor (tree exp, rtx target, int cleared, 
poly_int64 size,
                   similarly non-const type vectors. */
                icode = convert_optab_handler (vec_init_optab, mode, eltmode);
              }
+           else
+             {
+               /* Handle variable-length vector types.  */
+               icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+               const_n_elts = constant_lower_bound (n_elts);
+               cleared = 0;
+             }
 
-         if (const_n_elts && icode != CODE_FOR_nothing)
-           {
-             vector = rtvec_alloc (const_n_elts);
-             for (unsigned int k = 0; k < const_n_elts; k++)
-               RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
-           }
+           if (const_n_elts && icode != CODE_FOR_nothing)
+             {
+               vector = rtvec_alloc (const_n_elts);
+               for (unsigned int k = 0; k < const_n_elts; k++)
+                 RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
+             }
          }
 
        /* Compute the size of the elements in the CTOR.  It differs
@@ -8121,7 +8131,8 @@ store_constructor (tree exp, rtx target, int cleared, 
poly_int64 size,
                             || maybe_gt (4 * zero_count, 3 * count));
          }
 
-       if (need_to_clear && maybe_gt (size, 0) && !vector)
+       if (need_to_clear
+           && (maybe_gt (size, 0) || REG_P (target)))
          {
            if (REG_P (target))
              emit_move_insn (target, CONST0_RTX (mode));
@@ -8138,6 +8149,9 @@ store_constructor (tree exp, rtx target, int cleared, 
poly_int64 size,
            cleared = 1;
          }
 
+       /* Ensure that something has cleared the register.  */
+       gcc_assert ((need_to_clear && cleared) || !need_to_clear);
+
         if (MEM_P (target))
          alias = MEM_ALIAS_SET (target);
        else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c 
b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
new file mode 100644
index 
0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** func_init4:
+**     mov     z0\.d, x1
+**     insr    z0\.d, x0
+**     ret
+*/
+svint64_t __attribute__ ((noipa))
+func_init4 (int64_t a, int64_t b)
+{
+  svint64_t temp = { a, b };
+  return temp;
+}
+
+/*
+** func_init3:
+**     fmov    s0, w2
+**     fmov    s0, s0
+**     insr    z0\.s, w1
+**     insr    z0\.s, w0
+**     ret
+*/
+svint32_t __attribute__ ((noipa))
+func_init3 (int32_t a, int32_t b, int32_t c)
+{
+  svint32_t temp = { a, b, c };
+  return temp;
+}


-- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 1ef748796f5d0de63127b86c9903c9b12420bebf..be40cc695e071babe1928b555a11fd67af0d331b 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7552,7 +7552,9 @@ Initialize the vector to given values.  Operand 0 is the vector to initialize
 and operand 1 is parallel containing values for individual fields.  The
 @var{n} mode is the mode of the elements, should be either element mode of
 the vector mode @var{m}, or a vector mode with the same element mode and
-smaller number of elements.
+smaller number of elements.  If @var{m} specifies a scalable vector mode,
+then operand 1 only specifies the minimum number of elements implied
+by @var{m} and elements beyond are zero initialized.
 
 @mdindex vec_duplicate@var{m}
 @item @samp{vec_duplicate@var{m}}
diff --git a/gcc/expr.cc b/gcc/expr.cc
index de73215ccc6623fa90f4a90212fd8dc7c50991a9..373bec1322e7c554cbb314aed923abd7c3267ad8 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7498,11 +7498,14 @@ fields_length (const_tree type)
   return count;
 }
 
-
 /* Store the value of constructor EXP into the rtx TARGET.
    TARGET is either a REG or a MEM; we know it cannot conflict, since
    safe_from_p has been called.
    CLEARED is true if TARGET is known to have been zero'd.
+   If the constructor EXP has a vector type then elements of TARGET for which
+   there is no corresponding element in EXP are zero'd.  For a variable-length
+   vector type, only elements up to the minimum number of subparts of the type
+   are explicitly zero'd; any elements beyond that are implicitly zero.
    SIZE is the number of bytes of TARGET we are allowed to modify: this
    may not be the same as the size of EXP if we are assigning to a field
    which has been packed to exclude padding bits.
@@ -8075,13 +8078,20 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 		   similarly non-const type vectors. */
 		icode = convert_optab_handler (vec_init_optab, mode, eltmode);
 	      }
+	    else
+	      {
+		/* Handle variable-length vector types.  */
+		icode = convert_optab_handler (vec_init_optab, mode, eltmode);
+		const_n_elts = constant_lower_bound (n_elts);
+		cleared = 0;
+	      }
 
-	  if (const_n_elts && icode != CODE_FOR_nothing)
-	    {
-	      vector = rtvec_alloc (const_n_elts);
-	      for (unsigned int k = 0; k < const_n_elts; k++)
-		RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
-	    }
+	    if (const_n_elts && icode != CODE_FOR_nothing)
+	      {
+		vector = rtvec_alloc (const_n_elts);
+		for (unsigned int k = 0; k < const_n_elts; k++)
+		  RTVEC_ELT (vector, k) = CONST0_RTX (eltmode);
+	      }
 	  }
 
 	/* Compute the size of the elements in the CTOR.  It differs
@@ -8121,7 +8131,8 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 			     || maybe_gt (4 * zero_count, 3 * count));
 	  }
 
-	if (need_to_clear && maybe_gt (size, 0) && !vector)
+	if (need_to_clear
+	    && (maybe_gt (size, 0) || REG_P (target)))
 	  {
 	    if (REG_P (target))
 	      emit_move_insn (target, CONST0_RTX (mode));
@@ -8138,6 +8149,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	    cleared = 1;
 	  }
 
+	/* Ensure that something has cleared the register.  */
+	gcc_assert ((need_to_clear && cleared) || !need_to_clear);
+
         if (MEM_P (target))
 	  alias = MEM_ALIAS_SET (target);
 	else
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copsi.c b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
new file mode 100644
index 0000000000000000000000000000000000000000..d85403640b9ab894b378e741013eb27b76a7e19a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/copsi.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** func_init4:
+**	mov	z0\.d, x1
+**	insr	z0\.d, x0
+**	ret
+*/
+svint64_t __attribute__ ((noipa))
+func_init4 (int64_t a, int64_t b)
+{
+  svint64_t temp = { a, b };
+  return temp;
+}
+
+/*
+** func_init3:
+**	fmov	s0, w2
+**	fmov	s0, s0
+**	insr	z0\.s, w1
+**	insr	z0\.s, w0
+**	ret
+*/
+svint32_t __attribute__ ((noipa))
+func_init3 (int32_t a, int32_t b, int32_t c)
+{
+  svint32_t temp = { a, b, c };
+  return temp;
+}

Reply via email to