https://gcc.gnu.org/g:51291ad0f1f89a81de917110af96e019dcd5690c

commit r15-4463-g51291ad0f1f89a81de917110af96e019dcd5690c
Author: Tamar Christina <tamar.christ...@arm.com>
Date:   Fri Oct 18 10:37:28 2024 +0100

    middle-end: Fix GSI for gcond root [PR117140]
    
    When finding the gsi to use for code of the root statements we should use 
the
    one of the original statement rather than the gcond which may be inside a
    pattern.
    
    Without this the emitted instructions may be discarded later.
    
    gcc/ChangeLog:
    
            PR tree-optimization/117140
            * tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Use gsi from
            original statement.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/117140
            * gcc.dg/vect/vect-early-break_129-pr117140.c: New test.

Diff:
---
 .../gcc.dg/vect/vect-early-break_129-pr117140.c    | 94 ++++++++++++++++++++++
 gcc/tree-vect-slp.cc                               |  2 +-
 2 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c
new file mode 100644
index 000000000000..eec7f8db40c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_129-pr117140.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+typedef signed char int8_t;
+typedef short int int16_t;
+typedef int int32_t;
+typedef long long int int64_t;
+typedef unsigned char uint8_t;
+typedef short unsigned int uint16_t;
+typedef unsigned int uint32_t;
+typedef long long unsigned int uint64_t;
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_uint32_t (uint16_t *__restrict f, uint32_t *__restrict d,
+                       uint16_t x, uint16_t x2, uint32_t y, int n)
+{
+    for (int i = 0; i < n; ++i)
+        {
+            f[i * 2 + 0] = x;
+            f[i * 2 + 1] = x2;
+            d[i] = y;
+        }
+}
+
+void __attribute__ ((noinline, noclone))
+test_1_TYPE1_int64_t (int32_t *__restrict f, int64_t *__restrict d, int32_t x,
+                      int32_t x2, int64_t y, int n)
+{
+    for (int i = 0; i < n; ++i)
+        {
+            f[i * 2 + 0] = x;
+            f[i * 2 + 1] = x2;
+            d[i] = y;
+        }
+}
+
+int
+main (void)
+{
+        // This part is necessary for ice to appear though running it by 
itself does not trigger an ICE
+        int n_3_TYPE1_uint32_t = 32;
+        uint16_t x_3_uint16_t = 233;
+        uint16_t x2_3_uint16_t = 78;
+        uint32_t y_3_uint32_t = 1234;
+        uint16_t f_3_uint16_t[33 * 2 + 1] = { 0} ;
+        uint32_t d_3_uint32_t[33] = { 0} ;
+        test_1_TYPE1_uint32_t (f_3_uint16_t, d_3_uint32_t, x_3_uint16_t, 
x2_3_uint16_t, y_3_uint32_t, n_3_TYPE1_uint32_t);
+        for (int i = 0;
+                        i < n_3_TYPE1_uint32_t;
+                        ++i) {
+                if (f_3_uint16_t[i * 2 + 0] != x_3_uint16_t) __builtin_abort 
();
+                if (f_3_uint16_t[i * 2 + 1] != x2_3_uint16_t) __builtin_abort 
();
+                if (d_3_uint32_t[i] != y_3_uint32_t) __builtin_abort ();
+        }
+        for (int i = n_3_TYPE1_uint32_t;
+                        i < n_3_TYPE1_uint32_t + 1;
+                        ++i) {
+                if (f_3_uint16_t[i * 2 + 0] != 0) __builtin_abort ();
+                if (f_3_uint16_t[i * 2 + 1] != 0) __builtin_abort ();
+                if (d_3_uint32_t[i] != 0) __builtin_abort ();
+        }
+    // If ran without the above section, a different ice appears. see below
+    int n_3_TYPE1_int64_t = 32;
+    int32_t x_3_int32_t = 233;
+    int32_t x2_3_int32_t = 78;
+    int64_t y_3_int64_t = 1234;
+    int32_t f_3_int32_t[33 * 2 + 1] = { 0 };
+    int64_t d_3_int64_t[33] = { 0 };
+    test_1_TYPE1_int64_t (f_3_int32_t, d_3_int64_t, x_3_int32_t, x2_3_int32_t,
+                          y_3_int64_t, n_3_TYPE1_int64_t);
+    for (int i = 0; i < n_3_TYPE1_int64_t; ++i)
+        {
+            if (f_3_int32_t[i * 2 + 0] != x_3_int32_t)
+                __builtin_abort ();
+            if (f_3_int32_t[i * 2 + 1] != x2_3_int32_t)
+                __builtin_abort ();
+            if (d_3_int64_t[i] != y_3_int64_t)
+                __builtin_abort ();
+        }
+
+    for (int i = n_3_TYPE1_int64_t; i < n_3_TYPE1_int64_t + 1; ++i)
+        {
+            if (f_3_int32_t[i * 2 + 0] != 0)
+                __builtin_abort ();
+            if (f_3_int32_t[i * 2 + 1] != 0)
+                __builtin_abort ();
+            if (d_3_int64_t[i] != 0)
+                __builtin_abort ();
+        }
+
+    return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d35c2ea02dce..9276662fa0f1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -11167,7 +11167,7 @@ vectorize_slp_instance_root_stmt (vec_info *vinfo, 
slp_tree node, slp_instance i
         can't support lane > 1 at this time.  */
       gcc_assert (instance->root_stmts.length () == 1);
       auto root_stmt_info = instance->root_stmts[0];
-      auto last_stmt = STMT_VINFO_STMT (root_stmt_info);
+      auto last_stmt = STMT_VINFO_STMT (vect_orig_stmt (root_stmt_info));
       gimple_stmt_iterator rgsi = gsi_for_stmt (last_stmt);
       gimple *vec_stmt = NULL;
       gcc_assert (!SLP_TREE_VEC_DEFS (node).is_empty ());

Reply via email to