Hi Richi,
on 2020/3/25 下午4:25, Richard Biener wrote:
> On Tue, Mar 24, 2020 at 9:30 AM Kewen.Lin <[email protected]> wrote:
>>
>> Hi,
>>
>> The new version with refactoring has been attached.
>> Bootstrapped/regtested on powerpc64le-linux-gnu (LE) P8 and P9.
>>
>> Is it ok for trunk?
>
> Yes.
>
Thanks! I'm sorry that I forgot to update the nelts with new element number
for smaller vector for the path constructing with smaller vectors.
The difference against the previous one is:
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2251,12 +2251,13 @@ vector_vector_composition_type (tree vtype, poly_uint64
nelts, tree *ptype)
/* First check if vec_init optab supports construction from
vector pieces directly. */
scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
+ poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
machine_mode rmode;
- if (related_vector_mode (vmode, elmode, nelts).exists (&rmode)
+ if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
&& (convert_optab_handler (vec_init_optab, vmode, rmode)
!= CODE_FOR_nothing))
{
- *ptype = build_vector_type (TREE_TYPE (vtype), nelts);
+ *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
return vtype;
}
This new version has been bootstrapped/regtested on
powerpc64le-linux-gnu (LE) P8 and x86_64-redhat-linux.
May I install this new instead?
BR,
Kewen
---------
gcc/ChangeLog
2020-MM-DD Kewen Lin <[email protected]>
PR tree-optimization/90332
* gcc/tree-vect-stmts.c (vector_vector_composition_type): New function.
(get_group_load_store_type): Adjust to call
vector_vector_composition_type,
extend it to construct with scalar types.
(vectorizable_load): Likewise.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2ca8e494680..12beef6978c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2220,6 +2220,62 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
gcc_unreachable ();
}
+/* Function VECTOR_VECTOR_COMPOSITION_TYPE
+
+ This function returns a vector type which can be composed with NETLS pieces,
+ whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
+ same vector size as the return vector. It checks target whether supports
+ pieces-size vector mode for construction firstly, if target fails to, check
+ pieces-size scalar mode for construction further. It returns NULL_TREE if
+ fails to find the available composition.
+
+ For example, for (vtype=V16QI, nelts=4), we can probably get:
+ - V16QI with PTYPE V4QI.
+ - V4SI with PTYPE SI.
+ - NULL_TREE. */
+
+static tree
+vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+{
+ gcc_assert (VECTOR_TYPE_P (vtype));
+ gcc_assert (known_gt (nelts, 0U));
+
+ machine_mode vmode = TYPE_MODE (vtype);
+ if (!VECTOR_MODE_P (vmode))
+ return NULL_TREE;
+
+ poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
+ unsigned int pbsize;
+ if (constant_multiple_p (vbsize, nelts, &pbsize))
+ {
+ /* First check if vec_init optab supports construction from
+ vector pieces directly. */
+ scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
+ poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
+ machine_mode rmode;
+ if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
+ && (convert_optab_handler (vec_init_optab, vmode, rmode)
+ != CODE_FOR_nothing))
+ {
+ *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
+ return vtype;
+ }
+
+ /* Otherwise check if exists an integer type of the same piece size and
+ if vec_init optab supports construction from it directly. */
+ if (int_mode_for_size (pbsize, 0).exists (&elmode)
+ && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
+ && (convert_optab_handler (vec_init_optab, rmode, elmode)
+ != CODE_FOR_nothing))
+ {
+ *ptype = build_nonstandard_integer_type (pbsize, 1);
+ return build_vector_type (*ptype, nelts);
+ }
+ }
+
+ return NULL_TREE;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
@@ -2300,8 +2356,7 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree
vectype, bool slp,
by simply loading half of the vector only. Usually
the construction with an upper zero half will be elided. */
dr_alignment_support alignment_support_scheme;
- scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
- machine_mode vmode;
+ tree half_vtype;
if (overrun_p
&& !masked_p
&& (((alignment_support_scheme
@@ -2310,12 +2365,8 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree
vectype, bool slp,
|| alignment_support_scheme == dr_unaligned_supported)
&& known_eq (nunits, (group_size - gap) * 2)
&& known_eq (nunits, group_size)
- && VECTOR_MODE_P (TYPE_MODE (vectype))
- && related_vector_mode (TYPE_MODE (vectype), elmode,
- group_size - gap).exists (&vmode)
- && (convert_optab_handler (vec_init_optab,
- TYPE_MODE (vectype), vmode)
- != CODE_FOR_nothing))
+ && (vector_vector_composition_type (vectype, 2, &half_vtype)
+ != NULL_TREE))
overrun_p = false;
if (overrun_p && !can_overrun_p)
@@ -8915,47 +8966,24 @@ vectorizable_load (stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
{
if (group_size < const_nunits)
{
- /* First check if vec_init optab supports construction from
- vector elts directly. */
- scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
- machine_mode vmode;
- if (VECTOR_MODE_P (TYPE_MODE (vectype))
- && related_vector_mode (TYPE_MODE (vectype), elmode,
- group_size).exists (&vmode)
- && (convert_optab_handler (vec_init_optab,
- TYPE_MODE (vectype), vmode)
- != CODE_FOR_nothing))
+ /* First check if vec_init optab supports construction from vector
+ elts directly. Otherwise avoid emitting a constructor of
+ vector elements by performing the loads using an integer type
+ of the same size, constructing a vector of those and then
+ re-interpreting it as the original vector type. This avoids a
+ huge runtime penalty due to the general inability to perform
+ store forwarding from smaller stores to a larger load. */
+ tree ptype;
+ tree vtype
+ = vector_vector_composition_type (vectype,
+ const_nunits / group_size,
+ &ptype);
+ if (vtype != NULL_TREE)
{
nloads = const_nunits / group_size;
lnel = group_size;
- ltype = build_vector_type (TREE_TYPE (vectype), group_size);
- }
- else
- {
- /* Otherwise avoid emitting a constructor of vector elements
- by performing the loads using an integer type of the same
- size, constructing a vector of those and then
- re-interpreting it as the original vector type.
- This avoids a huge runtime penalty due to the general
- inability to perform store forwarding from smaller stores
- to a larger load. */
- unsigned lsize
- = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
- unsigned int lnunits = const_nunits / group_size;
- /* If we can't construct such a vector fall back to
- element loads of the original vector type. */
- if (int_mode_for_size (lsize, 0).exists (&elmode)
- && VECTOR_MODE_P (TYPE_MODE (vectype))
- && related_vector_mode (TYPE_MODE (vectype), elmode,
- lnunits).exists (&vmode)
- && (convert_optab_handler (vec_init_optab, vmode, elmode)
- != CODE_FOR_nothing))
- {
- nloads = lnunits;
- lnel = group_size;
- ltype = build_nonstandard_integer_type (lsize, 1);
- lvectype = build_vector_type (ltype, nloads);
- }
+ lvectype = vtype;
+ ltype = ptype;
}
}
else
@@ -9541,6 +9569,7 @@ vectorizable_load (stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
else
{
tree ltype = vectype;
+ tree new_vtype = NULL_TREE;
/* If there's no peeling for gaps but we have a gap
with slp loads then load the lower half of the
vector only. See get_group_load_store_type for
@@ -9553,10 +9582,14 @@ vectorizable_load (stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
(group_size
- DR_GROUP_GAP (first_stmt_info)) * 2)
&& known_eq (nunits, group_size))
- ltype = build_vector_type (TREE_TYPE (vectype),
- (group_size
- - DR_GROUP_GAP
- (first_stmt_info)));
+ {
+ tree half_vtype;
+ new_vtype
+ = vector_vector_composition_type (vectype, 2,
+ &half_vtype);
+ if (new_vtype != NULL_TREE)
+ ltype = half_vtype;
+ }
data_ref
= fold_build2 (MEM_REF, ltype, dataref_ptr,
dataref_offset
@@ -9584,10 +9617,21 @@ vectorizable_load (stmt_vec_info stmt_info,
gimple_stmt_iterator *gsi,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
build_zero_cst (ltype));
- new_stmt
- = gimple_build_assign (vec_dest,
- build_constructor
- (vectype, v));
+ gcc_assert (new_vtype != NULL_TREE);
+ if (new_vtype == vectype)
+ new_stmt = gimple_build_assign (
+ vec_dest, build_constructor (vectype, v));
+ else
+ {
+ tree new_vname = make_ssa_name (new_vtype);
+ new_stmt = gimple_build_assign (
+ new_vname, build_constructor (new_vtype, v));
+ vect_finish_stmt_generation (stmt_info,
+ new_stmt, gsi);
+ new_stmt = gimple_build_assign (
+ vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
+ new_vname));
+ }
}
}
break;