Hi Richi, on 2020/3/25 下午4:25, Richard Biener wrote: > On Tue, Mar 24, 2020 at 9:30 AM Kewen.Lin <li...@linux.ibm.com> wrote: >> >> Hi, >> >> The new version with refactoring has been attached. >> Bootstrapped/regtested on powerpc64le-linux-gnu (LE) P8 and P9. >> >> Is it ok for trunk? > > Yes. >
Thanks! I'm sorry that I forgot to update the nelts with new element number for smaller vector for the path constructing with smaller vectors. The difference against the previous one is: --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2251,12 +2251,13 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) /* First check if vec_init optab supports construction from vector pieces directly. */ scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype)); + poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode); machine_mode rmode; - if (related_vector_mode (vmode, elmode, nelts).exists (&rmode) + if (related_vector_mode (vmode, elmode, inelts).exists (&rmode) && (convert_optab_handler (vec_init_optab, vmode, rmode) != CODE_FOR_nothing)) { - *ptype = build_vector_type (TREE_TYPE (vtype), nelts); + *ptype = build_vector_type (TREE_TYPE (vtype), inelts); return vtype; } This new version has been bootstrapped/regtested on powerpc64le-linux-gnu (LE) P8 and x86_64-redhat-linux. May I install this new instead? BR, Kewen --------- gcc/ChangeLog 2020-MM-DD Kewen Lin <li...@gcc.gnu.org> PR tree-optimization/90332 * gcc/tree-vect-stmts.c (vector_vector_composition_type): New function. (get_group_load_store_type): Adjust to call vector_vector_composition_type, extend it to construct with scalar types. (vectorizable_load): Likewise.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 2ca8e494680..12beef6978c 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2220,6 +2220,62 @@ vect_get_store_rhs (stmt_vec_info stmt_info) gcc_unreachable (); } +/* Function VECTOR_VECTOR_COMPOSITION_TYPE + + This function returns a vector type which can be composed with NETLS pieces, + whose type is recorded in PTYPE. VTYPE should be a vector type, and has the + same vector size as the return vector. It checks target whether supports + pieces-size vector mode for construction firstly, if target fails to, check + pieces-size scalar mode for construction further. It returns NULL_TREE if + fails to find the available composition. + + For example, for (vtype=V16QI, nelts=4), we can probably get: + - V16QI with PTYPE V4QI. + - V4SI with PTYPE SI. + - NULL_TREE. */ + +static tree +vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) +{ + gcc_assert (VECTOR_TYPE_P (vtype)); + gcc_assert (known_gt (nelts, 0U)); + + machine_mode vmode = TYPE_MODE (vtype); + if (!VECTOR_MODE_P (vmode)) + return NULL_TREE; + + poly_uint64 vbsize = GET_MODE_BITSIZE (vmode); + unsigned int pbsize; + if (constant_multiple_p (vbsize, nelts, &pbsize)) + { + /* First check if vec_init optab supports construction from + vector pieces directly. */ + scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype)); + poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode); + machine_mode rmode; + if (related_vector_mode (vmode, elmode, inelts).exists (&rmode) + && (convert_optab_handler (vec_init_optab, vmode, rmode) + != CODE_FOR_nothing)) + { + *ptype = build_vector_type (TREE_TYPE (vtype), inelts); + return vtype; + } + + /* Otherwise check if exists an integer type of the same piece size and + if vec_init optab supports construction from it directly. */ + if (int_mode_for_size (pbsize, 0).exists (&elmode) + && related_vector_mode (vmode, elmode, nelts).exists (&rmode) + && (convert_optab_handler (vec_init_optab, rmode, elmode) + != CODE_FOR_nothing)) + { + *ptype = build_nonstandard_integer_type (pbsize, 1); + return build_vector_type (*ptype, nelts); + } + } + + return NULL_TREE; +} + /* A subroutine of get_load_store_type, with a subset of the same arguments. Handle the case where STMT_INFO is part of a grouped load or store. @@ -2300,8 +2356,7 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, by simply loading half of the vector only. Usually the construction with an upper zero half will be elided. */ dr_alignment_support alignment_support_scheme; - scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); - machine_mode vmode; + tree half_vtype; if (overrun_p && !masked_p && (((alignment_support_scheme @@ -2310,12 +2365,8 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, || alignment_support_scheme == dr_unaligned_supported) && known_eq (nunits, (group_size - gap) * 2) && known_eq (nunits, group_size) - && VECTOR_MODE_P (TYPE_MODE (vectype)) - && related_vector_mode (TYPE_MODE (vectype), elmode, - group_size - gap).exists (&vmode) - && (convert_optab_handler (vec_init_optab, - TYPE_MODE (vectype), vmode) - != CODE_FOR_nothing)) + && (vector_vector_composition_type (vectype, 2, &half_vtype) + != NULL_TREE)) overrun_p = false; if (overrun_p && !can_overrun_p) @@ -8915,47 +8966,24 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, { if (group_size < const_nunits) { - /* First check if vec_init optab supports construction from - vector elts directly. */ - scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); - machine_mode vmode; - if (VECTOR_MODE_P (TYPE_MODE (vectype)) - && related_vector_mode (TYPE_MODE (vectype), elmode, - group_size).exists (&vmode) - && (convert_optab_handler (vec_init_optab, - TYPE_MODE (vectype), vmode) - != CODE_FOR_nothing)) + /* First check if vec_init optab supports construction from vector + elts directly. Otherwise avoid emitting a constructor of + vector elements by performing the loads using an integer type + of the same size, constructing a vector of those and then + re-interpreting it as the original vector type. This avoids a + huge runtime penalty due to the general inability to perform + store forwarding from smaller stores to a larger load. */ + tree ptype; + tree vtype + = vector_vector_composition_type (vectype, + const_nunits / group_size, + &ptype); + if (vtype != NULL_TREE) { nloads = const_nunits / group_size; lnel = group_size; - ltype = build_vector_type (TREE_TYPE (vectype), group_size); - } - else - { - /* Otherwise avoid emitting a constructor of vector elements - by performing the loads using an integer type of the same - size, constructing a vector of those and then - re-interpreting it as the original vector type. - This avoids a huge runtime penalty due to the general - inability to perform store forwarding from smaller stores - to a larger load. */ - unsigned lsize - = group_size * TYPE_PRECISION (TREE_TYPE (vectype)); - unsigned int lnunits = const_nunits / group_size; - /* If we can't construct such a vector fall back to - element loads of the original vector type. */ - if (int_mode_for_size (lsize, 0).exists (&elmode) - && VECTOR_MODE_P (TYPE_MODE (vectype)) - && related_vector_mode (TYPE_MODE (vectype), elmode, - lnunits).exists (&vmode) - && (convert_optab_handler (vec_init_optab, vmode, elmode) - != CODE_FOR_nothing)) - { - nloads = lnunits; - lnel = group_size; - ltype = build_nonstandard_integer_type (lsize, 1); - lvectype = build_vector_type (ltype, nloads); - } + lvectype = vtype; + ltype = ptype; } } else @@ -9541,6 +9569,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, else { tree ltype = vectype; + tree new_vtype = NULL_TREE; /* If there's no peeling for gaps but we have a gap with slp loads then load the lower half of the vector only. See get_group_load_store_type for @@ -9553,10 +9582,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, (group_size - DR_GROUP_GAP (first_stmt_info)) * 2) && known_eq (nunits, group_size)) - ltype = build_vector_type (TREE_TYPE (vectype), - (group_size - - DR_GROUP_GAP - (first_stmt_info))); + { + tree half_vtype; + new_vtype + = vector_vector_composition_type (vectype, 2, + &half_vtype); + if (new_vtype != NULL_TREE) + ltype = half_vtype; + } data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, dataref_offset @@ -9584,10 +9617,21 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, build_zero_cst (ltype)); - new_stmt - = gimple_build_assign (vec_dest, - build_constructor - (vectype, v)); + gcc_assert (new_vtype != NULL_TREE); + if (new_vtype == vectype) + new_stmt = gimple_build_assign ( + vec_dest, build_constructor (vectype, v)); + else + { + tree new_vname = make_ssa_name (new_vtype); + new_stmt = gimple_build_assign ( + new_vname, build_constructor (new_vtype, v)); + vect_finish_stmt_generation (stmt_info, + new_stmt, gsi); + new_stmt = gimple_build_assign ( + vec_dest, build1 (VIEW_CONVERT_EXPR, vectype, + new_vname)); + } } } break;