On Fri, May 23, 2025 at 2:31 PM Alexander Monakov <amona...@ispras.ru> wrote: > > In PR 105965 we accepted a request to form FMA instructions when the > source code is using a narrow generic vector that contains just one > element, corresponding to V1SF or V1DF mode, while the backend does not > expand fma patterns for such modes. > > For this to work under -ffp-contract=on, we either need to modify > backends, or emulate such degenerate-vector FMA via scalar FMA in > tree-vect-generic. Do the latter.
Can you instead apply the lowering during gimplification? That is because having an unsupported internal-function in the IL the user could not have emitted directly is somewhat bad. I thought the vector lowering could be generalized for more single-argument internal functions but then no such unsupported calls should exist in the first place. Richard. > gcc/c-family/ChangeLog: > > * c-gimplify.cc (fma_supported_p): Allow forming single-element > vector FMA when scalar FMA is available. > (c_gimplify_expr): Allow vector types. > > gcc/ChangeLog: > > * tree-vect-generic.cc (expand_vec1_fma): New helper. Use it... > (expand_vector_operations_1): ... here to handle IFN_FMA. > --- > gcc/c-family/c-gimplify.cc | 10 ++++++-- > gcc/tree-vect-generic.cc | 48 ++++++++++++++++++++++++++++++++++++-- > 2 files changed, 54 insertions(+), 4 deletions(-) > > diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc > index c6fb764656..1942d5019e 100644 > --- a/gcc/c-family/c-gimplify.cc > +++ b/gcc/c-family/c-gimplify.cc > @@ -875,7 +875,13 @@ c_build_bind_expr (location_t loc, tree block, tree body) > static bool > fma_supported_p (enum internal_fn fn, tree type) > { > - return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH); > + return (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH) > + /* Accept single-element vector FMA (see PR 105965) when the > + backend handles the scalar but not the vector mode. */ > + || (VECTOR_TYPE_P (type) > + && known_eq (TYPE_VECTOR_SUBPARTS (type), 1U) > + && direct_internal_fn_supported_p (fn, TREE_TYPE (type), > + OPTIMIZE_FOR_BOTH))); > } > > /* Gimplification of expression trees. */ > @@ -939,7 +945,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p > ATTRIBUTE_UNUSED, > /* For -ffp-contract=on we need to attempt FMA contraction only > during initial gimplification. Late contraction across statement > boundaries would violate language semantics. */ > - if (SCALAR_FLOAT_TYPE_P (type) > + if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)) > && flag_fp_contract_mode == FP_CONTRACT_ON > && cfun && !(cfun->curr_properties & PROP_gimple_any) > && fma_supported_p (IFN_FMA, type)) > diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc > index 3c68361870..954b84edce 100644 > --- a/gcc/tree-vect-generic.cc > +++ b/gcc/tree-vect-generic.cc > @@ -1983,6 +1983,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) > gsi_replace (gsi, g, false); > } > > +/* Expand IFN_FMA, assuming vector contains just one scalar. > + c_gimplify_expr can introduce it when performing FMA contraction. */ > + > +static void > +expand_vec1_fma (gimple_stmt_iterator *gsi) > +{ > + gcall *call = as_a <gcall *> (gsi_stmt (*gsi)); > + tree type = TREE_TYPE (gimple_call_arg (call, 0)); > + if (!VECTOR_TYPE_P (type)) > + return; > + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), 1U)); > + > + for (int i = 0; i < 3; i++) > + { > + tree arg = gimple_call_arg (call, i); > + arg = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (type), arg); > + gimple_call_set_arg (call, i, arg); > + } > + tree lhs = gimple_call_lhs (call); > + if (lhs) > + { > + tree new_lhs = make_ssa_name (TREE_TYPE (type)); > + gimple_call_set_lhs (call, new_lhs); > + tree ctor = build_constructor_single (type, 0, new_lhs); > + gimple *g = gimple_build_assign (lhs, CONSTRUCTOR, ctor); > + gsi_insert_after (gsi, g, GSI_NEW_STMT); > + } > + update_stmt (call); > +} > + > /* Process one statement. If we identify a vector operation, expand it. */ > > static void > @@ -1998,8 +2028,22 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi) > gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); > if (!stmt) > { > - if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT)) > - expand_vector_conversion (gsi); > + gcall *call = dyn_cast <gcall *> (gsi_stmt (*gsi)); > + if (!call || !gimple_call_internal_p (call)) > + return; > + switch (gimple_call_internal_fn (call)) > + { > + case IFN_VEC_CONVERT: > + return expand_vector_conversion (gsi); > + case IFN_FMA: > + case IFN_FMS: > + case IFN_FNMA: > + case IFN_FNMS: > + if (!direct_internal_fn_supported_p (call, OPTIMIZE_FOR_BOTH)) > + return expand_vec1_fma (gsi); > + default: > + break; > + } > return; > } > > -- > 2.49.0 >