On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford <[email protected]> wrote: > > This patch makes can_duplicate_and_interleave_p cope with mixtures of > vector sizes, by using queries based on get_vectype_for_scalar_type > instead of directly querying GET_MODE_SIZE (vinfo->vector_mode). > > int_mode_for_size is now the first check we do for a candidate mode, > so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids > unnecessary work and avoids trying to create scalar types that the > target might not support. > > This final patch in the series. As before, each patch tested individually > on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.
OK. Thanks, Richard. > > 2019-11-04 Richard Sandiford <[email protected]> > > gcc/ > * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an > element type rather than an element mode. > * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise. > Use get_vectype_for_scalar_type to query the natural types > for a given element type rather than basing everything on > GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size > query to MAX_FIXED_MODE_SIZE. > (duplicate_and_interleave): Update call accordingly. > * tree-vect-loop.c (vectorizable_reduction): Likewise. > > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2019-11-05 11:08:12.521631453 +0000 > +++ gcc/tree-vectorizer.h 2019-11-05 11:14:42.786884473 +0000 > @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree, > extern bool vect_slp_bb (basic_block); > extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); > extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); > -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, > - machine_mode, > +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, > unsigned int * = NULL, > tree * = NULL, tree * = NULL); > extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, > Index: gcc/tree-vect-slp.c > =================================================================== > --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000 > +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000 > @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st > return -1; > } > > -/* Check whether it is possible to load COUNT elements of type ELT_MODE > +/* Check whether it is possible to load COUNT elements of type ELT_TYPE > using the method implemented by duplicate_and_interleave. Return true > if so, returning the number of intermediate vectors in *NVECTORS_OUT > (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT > @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st > > bool > can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, > - machine_mode elt_mode, > - unsigned int *nvectors_out, > + tree elt_type, unsigned int *nvectors_out, > tree *vector_type_out, > tree *permutes) > { > - poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); > - poly_int64 nelts; > + tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, > count); > + if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type))) > + return false; > + > + machine_mode base_vector_mode = TYPE_MODE (base_vector_type); > + poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode); > unsigned int nvectors = 1; > for (;;) > { > scalar_int_mode int_mode; > poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; > - if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts) > - && int_mode_for_size (elt_bits, 0).exists (&int_mode)) > + if (int_mode_for_size (elt_bits, 1).exists (&int_mode)) > { > + /* Get the natural vector type for this SLP group size. */ > tree int_type = build_nonstandard_integer_type > (GET_MODE_BITSIZE (int_mode), 1); > - tree vector_type = build_vector_type (int_type, nelts); > - if (VECTOR_MODE_P (TYPE_MODE (vector_type))) > - { > + tree vector_type > + = get_vectype_for_scalar_type (vinfo, int_type, count); > + if (vector_type > + && VECTOR_MODE_P (TYPE_MODE (vector_type)) > + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), > + GET_MODE_SIZE (base_vector_mode))) > + { > + /* Try fusing consecutive sequences of COUNT / NVECTORS elements > + together into elements of type INT_TYPE and using the result > + to build NVECTORS vectors. */ > + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); > vec_perm_builder sel1 (nelts, 2, 3); > vec_perm_builder sel2 (nelts, 2, 3); > poly_int64 half_nelts = exact_div (nelts, 2); > @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v > && !GET_MODE_SIZE (vinfo->vector_mode).is_constant () > && (TREE_CODE (type) == BOOLEAN_TYPE > || !can_duplicate_and_interleave_p (vinfo, stmts.length (), > - TYPE_MODE (type)))) > + type))) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf > unsigned int nvectors = 1; > tree new_vector_type; > tree permutes[2]; > - if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE > (element_type), > + if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type, > &nvectors, &new_vector_type, > permutes)) > gcc_unreachable (); > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2019-11-05 10:57:41.658071173 +0000 > +++ gcc/tree-vect-loop.c 2019-11-05 11:14:42.782884501 +0000 > @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st > that value needs to be repeated for every instance of the > statement within the initial vector. */ > unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); > - scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out)); > if (!neutral_op > && !can_duplicate_and_interleave_p (loop_vinfo, group_size, > - elt_mode)) > + TREE_TYPE (vectype_out))) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
