https://gcc.gnu.org/g:9bd19ff515c95af71b29bc6e232785532afa6823

commit r15-4239-g9bd19ff515c95af71b29bc6e232785532afa6823
Author: Richard Sandiford <richard.sandif...@arm.com>
Date:   Thu Oct 10 15:15:26 2024 +0100

    vect: Avoid divide by zero for permutes of extern VLA vectors
    
    My recent VLA SLP patches caused a regression with cross compilers
    in gcc.dg/torture/neon-sve-bridge.c.  There we have a VEC_PERM_EXPR
    created from two BIT_FIELD_REFs, with the child node being an
    external VLA vector:
    
    note:   node 0x3704a70 (max_nunits=1, refcnt=2) vector(2) long int
    note:   op: VEC_PERM_EXPR
    note:          stmt 0 val1Return_9 = BIT_FIELD_REF <sveReturn_8, 64, 0>;
    note:          stmt 1 val2Return_10 = BIT_FIELD_REF <sveReturn_8, 64, 64>;
    note:          lane permutation { 0[0] 0[1] }
    note:          children 0x3704b08
    note:   node (external) 0x3704b08 (max_nunits=1, refcnt=1) svint64_t
    note:          { }
    
    For this kind of external node, the SLP_TREE_LANES is normally
    the total number of lanes in the vector, but it is zero if the
    vector has variable length:
    
          auto nunits = TYPE_VECTOR_SUBPARTS (SLP_TREE_VECTYPE (vnode));
          unsigned HOST_WIDE_INT const_nunits;
          if (nunits.is_constant (&const_nunits))
            SLP_TREE_LANES (vnode) = const_nunits;
    
    This led to division by zero in:
    
          /* Check whether the output has N times as many lanes per vector.  */
          else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits,
                                        SLP_TREE_LANES (child) * nunits,
                                        &this_unpack_factor)
                   && (i == 0 || unpack_factor == this_unpack_factor))
            unpack_factor = this_unpack_factor;
    
    No repetition takes place for this kind of external node, so this
    patch goes with Richard's suggestion to check for external nodes
    that have no scalar statements.
    
    This didn't show up for my native testing since division by zero
    doesn't trap on AArch64.
    
    gcc/
            * tree-vect-slp.cc (vectorizable_slp_permutation_1): Set repeating_p
            to false if we have an external node for a pre-existing vector.

Diff:
---
 gcc/tree-vect-slp.cc | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9bf6ae4ec8e0..96f1992cfbff 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -10279,10 +10279,19 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, 
gimple_stmt_iterator *gsi,
        }
       auto op_nunits = TYPE_VECTOR_SUBPARTS (op_vectype);
       unsigned int this_unpack_factor;
+      /* Detect permutations of external, pre-existing vectors.  The external
+        node's SLP_TREE_LANES stores the total number of units in the vector,
+        or zero if the vector has variable length.
+
+        We are expected to keep the original VEC_PERM_EXPR for such cases.
+        There is no repetition to model.  */
+      if (SLP_TREE_DEF_TYPE (child) == vect_external_def
+         && SLP_TREE_SCALAR_OPS (child).is_empty ())
+       repeating_p = false;
       /* Check whether the input has twice as many lanes per vector.  */
-      if (children.length () == 1
-         && known_eq (SLP_TREE_LANES (child) * nunits,
-                      SLP_TREE_LANES (node) * op_nunits * 2))
+      else if (children.length () == 1
+              && known_eq (SLP_TREE_LANES (child) * nunits,
+                           SLP_TREE_LANES (node) * op_nunits * 2))
        pack_p = true;
       /* Check whether the output has N times as many lanes per vector.  */
       else if (constant_multiple_p (SLP_TREE_LANES (node) * op_nunits,

Reply via email to