Hi All,

When registering masks for SIMD clone we end up using nmasks instead of
nvectors where nmasks seems to compute the number of input masks required for
the call given the current simdlen.

This is however wrong as vect_record_loop_mask wants to know how many masks you
want to create from the given vectype. i.e. which level of rgroups to create.

This ends up mismatching with vect_get_loop_mask which uses nvectors and if the
return type is narrower than the input types there will be a mismatch which
causes us to try to read from the given rgroup.  It only happens to work if the
function had an additional argument that's wider or if all elements and return
types are the same size.

This fixes it by using nvectors during registration as well, which has already
taken into account SLP and VF.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        PR middle-end/118273
        * tree-vect-stmts.cc (vectorizable_simd_clone_call): Use nvectors when
        doing mask registrations.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/vect-simd-clone-4.c: New test.

---
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c 
b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..9b52af70393333ffa4af2b49c7cef9ad93ca1525
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+#pragma GCC target ("+sve")
+
+extern char __attribute__ ((simd, const)) fn3 (short);
+void test_fn3 (float *a, float *b, double *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn3 (c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn3\n} } } */
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
833029fcb00108abc605042376e9811651d5cd64..21fb5cf5bd47ad9e37762909c6103adbf8752e2a
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4561,14 +4561,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
            case SIMD_CLONE_ARG_TYPE_MASK:
              if (loop_vinfo
                  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-               {
-                 unsigned nmasks
-                   = exact_div (ncopies * bestn->simdclone->simdlen,
-                                TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
-                 vect_record_loop_mask (loop_vinfo,
-                                        &LOOP_VINFO_MASKS (loop_vinfo),
-                                        nmasks, vectype, op);
-               }
+               vect_record_loop_mask (loop_vinfo,
+                                      &LOOP_VINFO_MASKS (loop_vinfo),
+                                      ncopies, vectype, op);
 
              break;
            }




-- 
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..9b52af70393333ffa4af2b49c7cef9ad93ca1525
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-simd-clone-4.c
@@ -0,0 +1,15 @@
+/* { dg-do compile }  */
+/* { dg-options "-std=c99" } */
+/* { dg-additional-options "-O3 -march=armv8-a" } */
+
+#pragma GCC target ("+sve")
+
+extern char __attribute__ ((simd, const)) fn3 (short);
+void test_fn3 (float *a, float *b, double *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+    a[i] = fn3 (c[i]);
+}
+
+/* { dg-final { scan-assembler {\s+_ZGVsMxv_fn3\n} } } */
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 833029fcb00108abc605042376e9811651d5cd64..21fb5cf5bd47ad9e37762909c6103adbf8752e2a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4561,14 +4561,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
 	    case SIMD_CLONE_ARG_TYPE_MASK:
 	      if (loop_vinfo
 		  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-		{
-		  unsigned nmasks
-		    = exact_div (ncopies * bestn->simdclone->simdlen,
-				 TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
-		  vect_record_loop_mask (loop_vinfo,
-					 &LOOP_VINFO_MASKS (loop_vinfo),
-					 nmasks, vectype, op);
-		}
+		vect_record_loop_mask (loop_vinfo,
+				       &LOOP_VINFO_MASKS (loop_vinfo),
+				       ncopies, vectype, op);
 
 	      break;
 	    }



Reply via email to