https://gcc.gnu.org/g:a4abf0fb902816a2508dda5956f407fc27821d88

commit r15-9249-ga4abf0fb902816a2508dda5956f407fc27821d88
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Apr 7 11:27:19 2025 +0200

    tree-optimization/119640 - ICE with vectorized shift placement
    
    When the whole shift is invariant but the shift amount needs
    to be converted and a vector shift used we can mess up placement
    of vector stmts because we do not make SLP scheduling aware of
    the need to insert code for it.  The following mitigates this
    by more conservative placement of such code in vectorizable_shift.
    
            PR tree-optimization/119640
            * tree-vect-stmts.cc (vectorizable_shift): Always insert code
            for one of our SLP operands before the code for the vector
            shift itself.
    
            * gcc.dg/vect/pr119640.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr119640.c | 17 +++++++++++++++++
 gcc/tree-vect-stmts.cc               | 11 +++++++----
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr119640.c 
b/gcc/testsuite/gcc.dg/vect/pr119640.c
new file mode 100644
index 000000000000..8872817ac31c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119640.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-funswitch-loops" } */
+
+int save, mask_nbits;
+
+void execute(long imm)
+{
+  long shift = 0;
+  int destReg[4];
+  for (unsigned i = 0; i < 4; i++)
+    {
+      if (imm)
+       shift = 1ULL << mask_nbits;
+      destReg[i] = shift;
+      save = destReg[0];
+    }
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3005ae6eaaea..7f874354e75e 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6750,13 +6750,16 @@ vectorizable_shift (vec_info *vinfo,
     {
       if (was_scalar_shift_arg)
        {
-         /* If the argument was the same in all lanes create
-            the correctly typed vector shift amount directly.  */
+         /* If the argument was the same in all lanes create the
+            correctly typed vector shift amount directly.  Note
+            we made SLP scheduling think we use the original scalars,
+            so place the compensation code next to the shift which
+            is conservative.  See PR119640 where it otherwise breaks.  */
          op1 = fold_convert (TREE_TYPE (vectype), op1);
          op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
-                                 !loop_vinfo ? gsi : NULL);
+                                 gsi);
          vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
-                                        !loop_vinfo ? gsi : NULL);
+                                        gsi);
          vec_oprnds1.create (slp_node->vec_stmts_size);
          for (k = 0; k < slp_node->vec_stmts_size; k++)
            vec_oprnds1.quick_push (vec_oprnd1);

Reply via email to