https://gcc.gnu.org/bugzilla/show_bug.cgi?id=124271
--- Comment #7 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to Hongtao Liu from comment #6)
> >
> > > + unsigned int inner_prec = outer_prec / 2;
> > > + poly_uint64 outer_nelts = TYPE_VECTOR_SUBPARTS (type);
> > > + tree inner_scalar = build_nonstandard_integer_type (inner_prec, 1);
> >
> > I think 1 here should be `TYPE_UNSIGNED (TREE_TYPE (type))` otherwise you
> > get a type mismatch I think.
static inline long mul32( long a, long b) {
return (a & 0xffffffff) * (b & 0xffffffff);
}
void many_mul3(long* __restrict a,
const long* __restrict b)
{
for (int i = 0; i < 1024; i++)
a[i] = mul32(a[i], b[i]);
}
Looks like vec_widen_mult_even_expr allow unsigned input and signed output
<bb 3> [local count: 1063004408]:
# ivtmp.20_32 = PHI <ivtmp.20_19(5), 0(2)>
vect__4.11_22 = MEM <const vector(4) long int> [(const long int *)b_10(D) +
ivtmp.20_32 * 1];
vect__6.7_26 = MEM <vector(4) long int> [(long int *)a_11(D) + ivtmp.20_32 *
1];
_17 = VIEW_CONVERT_EXPR<vector(8) unsigned int>(vect__4.11_22);
_24 = VIEW_CONVERT_EXPR<vector(8) unsigned int>(vect__6.7_26);
vect__16.13_18 = WIDEN_MULT_EVEN_EXPR <_17, _24>;
MEM <vector(4) long int> [(long int *)a_11(D) + ivtmp.20_32 * 1] =
vect__16.13_18;
ivtmp.20_19 = ivtmp.20_32 + 32;
if (ivtmp.20_19 != 8192)
But using TYPE_UNSIGNED (TREE_TYPE (type)) instead of 1 will generate
vec_widen_smult_even_optab which is incorrect
Others changed as suggested
diff --git a/gcc/match.pd b/gcc/match.pd
index 7f16fd4e081..d99d83bab84 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -436,6 +436,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ zeros; })
{ ones; } { zeros; })))))))))
+#if GIMPLE
+/* (x & lowhalf_mask) * (y & lowhalf_mask)
+ -> VEC_WIDEN_MULT_EVEN (VIEW_CONVERT (x), VIEW_CONVERT (y)). */
+(simplify
+ (mult (bit_and @0 VECTOR_CST@2)
+ (bit_and @1 @2))
+ (if (uniform_vector_p (@2)
+ && TYPE_VECTOR_SUBPARTS (type).is_constant ()
+ && TYPE_VECTOR_SUBPARTS (type).to_constant () > 1)
+ (with
+ {
+ auto elem = wi::to_wide (uniform_vector_p (@2));
+ unsigned int outer_prec = TYPE_PRECISION (TREE_TYPE (type));
+ unsigned int inner_prec = outer_prec / 2;
+ poly_uint64 outer_nelts = TYPE_VECTOR_SUBPARTS (type);
+ tree inner_scalar = build_nonstandard_integer_type (inner_prec, 1);
+ tree inner_type = build_vector_type (inner_scalar, outer_nelts * 2);
+ }
+ (if (GET_MODE_CLASS (TYPE_MODE (inner_type)) == MODE_VECTOR_INT
+ && elem == wi::mask (inner_prec, false, outer_prec)
+ && optab_handler (vec_widen_umult_even_optab,
+ TYPE_MODE (inner_type)) != CODE_FOR_nothing)
+ (vec_widen_mult_even (view_convert:inner_type @0)
+ (view_convert:inner_type @1))))))
+#endif
+