Hi all,
This patch resolves bug:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96974
This is achieved by forcing a re-calculation of *stmt_vectype_out if an
incompatible combination of TYPE_VECTOR_SUBPARTS is detected, but with
an extra introduced max_nunits ceiling.
I am not 100% sure if this is the best way to go about fixing this,
because this is my first look at the vectorizer and I lack knowledge of
the wider context, so do let me know if you see a better way to do this!
I have added the previously ICE-ing reproducer as a new test.
This is compiled as "g++ -Ofast -march=armv8.2-a+sve
-fdisable-tree-fre4" for GCC11 and "g++ -Ofast -march=armv8.2-a+sve" for
GCC10.
(the non-fdisable-tree-fre4 version has gone latent on GCC11)
Bootstrapped and reg-tested on aarch64-linux-gnu.
Also reg-tested on aarch64-none-elf.
gcc/ChangeLog:
* tree-vect-stmts.c (get_vectype_for_scalar_type): Add new
parameter to core function and add new function overload.
(vect_get_vector_types_for_stmt): Add re-calculation logic.
gcc/testsuite/ChangeLog:
* g++.target/aarch64/sve/pr96974.C: New test.
diff --git a/gcc/testsuite/g++.target/aarch64/sve/pr96974.C b/gcc/testsuite/g++.target/aarch64/sve/pr96974.C
new file mode 100644
index 0000000000000000000000000000000000000000..2f6ebd6ce3dd8626f5e666edba77d2c925739b7d
--- /dev/null
+++ b/gcc/testsuite/g++.target/aarch64/sve/pr96974.C
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -march=armv8.2-a+sve -fdisable-tree-fre4" } */
+
+float a;
+int
+b ()
+{ return __builtin_lrintf(a); }
+
+struct c {
+ float d;
+ c() {
+ for (int e = 0; e < 9; e++)
+ coeffs[e] = d ? b() : 0;
+ }
+ int coeffs[10];
+} f;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index c2d1f39fe0f4bbc90ffa079cb6a8fcf87b76b3af..f8d3eac38718e18bf957b85109cccbc03e21c041 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -11342,7 +11342,7 @@ get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
tree
get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
- unsigned int group_size)
+ unsigned int group_size, unsigned int max_nunits)
{
/* For BB vectorization, we should always have a group size once we've
constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
@@ -11375,13 +11375,16 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
fail (in the latter case because GROUP_SIZE is too small
for the target), but it's possible that a target could have
a hole between supported vector types.
+ There is also the option to artificially pass a max_nunits,
+ which is smaller than GROUP_SIZE, if the use of GROUP_SIZE
+ would result in an incompatible mode for the target.
If GROUP_SIZE is not a power of 2, this has the effect of
trying the largest power of 2 that fits within the group,
even though the group is not a multiple of that vector size.
The BB vectorizer will then try to carve up the group into
smaller pieces. */
- unsigned int nunits = 1 << floor_log2 (group_size);
+ unsigned int nunits = 1 << floor_log2 (max_nunits);
do
{
vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
@@ -11394,6 +11397,14 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
return vectype;
}
+tree
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ unsigned int group_size)
+{
+ return get_vectype_for_scalar_type (vinfo, scalar_type,
+ group_size, group_size);
+}
+
/* Return the vector type corresponding to SCALAR_TYPE as supported
by the target. NODE, if nonnull, is the SLP tree node that will
use the returned vector type. */
@@ -12172,6 +12183,8 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
tree vectype;
tree scalar_type = NULL_TREE;
+ tree scalar_type_orig = NULL_TREE;
+
if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
{
vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -12210,6 +12223,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
"get vectype for scalar type: %T\n", scalar_type);
}
vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+ scalar_type_orig = scalar_type;
if (!vectype)
return opt_result::failure_at (stmt,
"not vectorized:"
@@ -12249,6 +12263,36 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
}
}
+ /* In rare cases with different types and sizes we may reach an invalid
+ combination where nunits_vectype has fewer TYPE_VECTOR_SUBPARTS than
+ *stmt_vectype_out. In that case attempt to re-calculate
+ *stmt_vectype_out with an imposed max taken from nunits_vectype. */
+ unsigned int max_nunits;
+ if (known_lt (TYPE_VECTOR_SUBPARTS (nunits_vectype),
+ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Incompatable number of vector subparts between %T and %T\n",
+ nunits_vectype, *stmt_vectype_out);
+
+ max_nunits = TYPE_VECTOR_SUBPARTS (nunits_vectype).to_constant ();
+ *stmt_vectype_out = get_vectype_for_scalar_type (vinfo, scalar_type_orig,
+ group_size,
+ max_nunits);
+
+ if (*stmt_vectype_out)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Re-calculated data-types to : %T, %T\n",
+ nunits_vectype, *stmt_vectype_out);
+ }
+ else
+ return opt_result::failure_at
+ (stmt, "Not vectorized: failed to re-calculate data-types.\n");
+ }
+
gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));