The following fixes an oversight in vect_check_scalar_mask when the mask is external or constant. When doing BB vectorization we need to provide a group_size, best via an overload accepting the SLP node as argument.
When fixed we then run into the issue that we have not analyzed alignment of the .MASK_LOADs because they were not identified as loads by vect_gather_slp_loads. Fixed by reworking the detection. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. Richard. PR tree-optimization/112404 * tree-vectorizer.h (get_mask_type_for_scalar_type): Declare overload with SLP node argument. * tree-vect-stmts.cc (get_mask_type_for_scalar_type): Implement it. (vect_check_scalar_mask): Use it. * tree-vect-slp.cc (vect_gather_slp_loads): Properly identify loads also for nodes with children, like .MASK_LOAD. * tree-vect-loop.cc (vect_analyze_loop_2): Look at the representative for load nodes and check whether it is a grouped access before looking for load-lanes support. * gfortran.dg/pr112404.f90: New testcase. --- gcc/testsuite/gfortran.dg/pr112404.f90 | 23 +++++++++++++ gcc/tree-vect-loop.cc | 47 ++++++++++++++------------ gcc/tree-vect-slp.cc | 23 ++++++------- gcc/tree-vect-stmts.cc | 22 +++++++++++- gcc/tree-vectorizer.h | 1 + 5 files changed, 82 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr112404.f90 diff --git a/gcc/testsuite/gfortran.dg/pr112404.f90 b/gcc/testsuite/gfortran.dg/pr112404.f90 new file mode 100644 index 00000000000..573fa28164a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr112404.f90 @@ -0,0 +1,23 @@ +! { dg-do compile } +! { dg-options "-Ofast" } +! { dg-additional-options "-mavx2" { target avx2 } } + SUBROUTINE sfddagd( regime, znt, ite, jte ) + REAL, DIMENSION( ime, IN) :: regime, znt + REAL, DIMENSION( ite, jte) :: wndcor_u + LOGICAL wrf_dm_on_monitor + IF( int4 == 1 ) THEN + DO j=jts,jtf + DO i=itsu,itf + reg = regime(i-1, j) + IF( reg > 10.0 ) THEN + znt0 = znt(i-1, j) + znt(i, j) + IF( znt0 <= 0.2) THEN + wndcor_u(i,j) = 0.2 + ENDIF + ENDIF + ENDDO + ENDDO + IF ( wrf_dm_on_monitor()) THEN + ENDIF + ENDIF + END diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 362856a6507..5213aa0169c 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2943,17 +2943,19 @@ start_over: != IFN_LAST) { FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node) - { - stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT - (SLP_TREE_SCALAR_STMTS (load_node)[0]); - /* Use SLP for strided accesses (or if we can't - load-lanes). */ - if (STMT_VINFO_STRIDED_P (stmt_vinfo) - || vect_load_lanes_supported - (STMT_VINFO_VECTYPE (stmt_vinfo), - DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST) - break; - } + if (STMT_VINFO_GROUPED_ACCESS + (SLP_TREE_REPRESENTATIVE (load_node))) + { + stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT + (SLP_TREE_REPRESENTATIVE (load_node)); + /* Use SLP for strided accesses (or if we can't + load-lanes). */ + if (STMT_VINFO_STRIDED_P (stmt_vinfo) + || vect_load_lanes_supported + (STMT_VINFO_VECTYPE (stmt_vinfo), + DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST) + break; + } can_use_lanes = can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length (); @@ -3261,16 +3263,19 @@ again: "unsupported grouped store\n"); FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node) { - vinfo = SLP_TREE_SCALAR_STMTS (node)[0]; - vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); - bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); - size = DR_GROUP_SIZE (vinfo); - vectype = STMT_VINFO_VECTYPE (vinfo); - if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST - && ! vect_grouped_load_supported (vectype, single_element_p, - size)) - return opt_result::failure_at (vinfo->stmt, - "unsupported grouped load\n"); + vinfo = SLP_TREE_REPRESENTATIVE (node); + if (STMT_VINFO_GROUPED_ACCESS (vinfo)) + { + vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); + bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); + size = DR_GROUP_SIZE (vinfo); + vectype = STMT_VINFO_VECTYPE (vinfo); + if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST + && ! vect_grouped_load_supported (vectype, single_element_p, + size)) + return opt_result::failure_at (vinfo->stmt, + "unsupported grouped load\n"); + } } } diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 6b8a7b628b6..13137ede8d4 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2898,22 +2898,21 @@ vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node, if (!node || visited.add (node)) return; - if (SLP_TREE_CHILDREN (node).length () == 0) + if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) + return; + + if (SLP_TREE_CODE (node) != VEC_PERM_EXPR) { - if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) - return; - stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; - if (STMT_VINFO_GROUPED_ACCESS (stmt_info) + stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node); + if (STMT_VINFO_DATA_REF (stmt_info) && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) loads.safe_push (node); } - else - { - unsigned i; - slp_tree child; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - vect_gather_slp_loads (loads, child, visited); - } + + unsigned i; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + vect_gather_slp_loads (loads, child, visited); } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f895aaf3083..eefb1eec1ef 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2456,7 +2456,8 @@ vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype = STMT_VINFO_VECTYPE (stmt_info); if (!mask_vectype) - mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype)); + mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype), + mask_node_1); if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)) { @@ -13386,6 +13387,25 @@ get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, return truth_type_for (vectype); } +/* Function get_mask_type_for_scalar_type. + + Returns the mask type corresponding to a result of comparison + of vectors of specified SCALAR_TYPE as supported by target. + NODE, if nonnull, is the SLP tree node that will use the returned + vector type. */ + +tree +get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, + slp_tree node) +{ + tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node); + + if (!vectype) + return NULL; + + return truth_type_for (vectype); +} + /* Function get_same_sized_vectype Returns a vector type corresponding to SCALAR_TYPE of size diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 254d172231d..d2ddc2e4ad5 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2207,6 +2207,7 @@ extern tree get_related_vectype_for_scalar_type (machine_mode, tree, extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); +extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree); extern tree get_same_sized_vectype (tree, tree); extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); extern bool vect_get_loop_mask_type (loop_vec_info); -- 2.35.3