This avoids breaking LC SSA when SLP codegen pulled an out-of-loop
def into a loop when merging with in-loop defs for an external def.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-11-30  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/98064
        * tree-vect-loop.c (vectorizable_live_operation): Avoid
        breaking LC SSA for BB vectorization.

        * g++.dg/vect/pr98064.cc: New testcase.
---
 gcc/testsuite/g++.dg/vect/pr98064.cc | 25 +++++++++++++++++++++++++
 gcc/tree-vect-loop.c                 | 18 ++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/vect/pr98064.cc

diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc 
b/gcc/testsuite/g++.dg/vect/pr98064.cc
new file mode 100644
index 00000000000..74043ce7725
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr98064.cc
@@ -0,0 +1,25 @@
+// { dg-do compile }
+// { dg-additional-options "-O3" }
+
+const long long &min(const long long &__a, long long &__b) {
+  if (__b < __a)
+    return __b;
+  return __a;
+}
+extern long var_2;
+extern int var_3, var_8;
+extern long long var_5;
+extern unsigned short arr_353[];
+extern short arr_362[];
+extern int arr_518[];
+void test() {
+    for (char d = 0; d < 013; d += 4) {
+        for (char e = 0; e < 11; e++)
+            arr_353[e] = var_2 | min((long long)7, var_5);
+        for (int f = var_5; f; f += 4)
+            for (short g = var_8; g; g++)
+                arr_362[g] = 0;
+    }
+    for (short h = 5; (short)var_2; h += 5)
+        arr_518[h] = 0;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 48dfb4df00e..c8b4dc3a0c3 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8743,6 +8743,24 @@ vectorizable_live_operation (vec_info *vinfo,
                                   "def\n");
                continue;
              }
+           /* ???  It can also happen that we end up pulling a def into
+              a loop where replacing out-of-loop uses would require
+              a new LC SSA PHI node.  Retain the original scalar in
+              those cases as well.  PR98064.  */
+           if (TREE_CODE (new_tree) == SSA_NAME
+               && !SSA_NAME_IS_DEFAULT_DEF (new_tree)
+               && (gimple_bb (use_stmt)->loop_father
+                   != gimple_bb (vec_stmt)->loop_father)
+               && !flow_loop_nested_p (gimple_bb (vec_stmt)->loop_father,
+                                       gimple_bb (use_stmt)->loop_father))
+             {
+               if (dump_enabled_p ())
+                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                  "Using original scalar computation for "
+                                  "live lane because there is an out-of-loop "
+                                  "definition for it\n");
+               continue;
+             }
            FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
              SET_USE (use_p, new_tree);
            update_stmt (use_stmt);
-- 
2.26.2

Reply via email to