On Fri, May 23, 2025 at 2:31 PM Alexander Monakov <amona...@ispras.ru> wrote:
>
> In PR 105965 we accepted a request to form FMA instructions when the
> source code is using a narrow generic vector that contains just one
> element, corresponding to V1SF or V1DF mode, while the backend does not
> expand fma patterns for such modes.
>
> For this to work under -ffp-contract=on, we either need to modify
> backends, or emulate such degenerate-vector FMA via scalar FMA in
> tree-vect-generic.  Do the latter.

Can you instead apply the lowering during gimplification?  That is because
having an unsupported internal-function in the IL the user could not have
emitted directly is somewhat bad.  I thought the vector lowering could
be generalized for more single-argument internal functions but then no
such unsupported calls should exist in the first place.

Richard.

> gcc/c-family/ChangeLog:
>
>         * c-gimplify.cc (fma_supported_p): Allow forming single-element
>         vector FMA when scalar FMA is available.
>         (c_gimplify_expr): Allow vector types.
>
> gcc/ChangeLog:
>
>         * tree-vect-generic.cc (expand_vec1_fma): New helper.  Use it...
>         (expand_vector_operations_1): ... here to handle IFN_FMA.
> ---
>  gcc/c-family/c-gimplify.cc | 10 ++++++--
>  gcc/tree-vect-generic.cc   | 48 ++++++++++++++++++++++++++++++++++++--
>  2 files changed, 54 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> index c6fb764656..1942d5019e 100644
> --- a/gcc/c-family/c-gimplify.cc
> +++ b/gcc/c-family/c-gimplify.cc
> @@ -875,7 +875,13 @@ c_build_bind_expr (location_t loc, tree block, tree body)
>  static bool
>  fma_supported_p (enum internal_fn fn, tree type)
>  {
> -  return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> +  return (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH)
> +         /* Accept single-element vector FMA (see PR 105965) when the
> +            backend handles the scalar but not the vector mode.  */
> +         || (VECTOR_TYPE_P (type)
> +             && known_eq (TYPE_VECTOR_SUBPARTS (type),  1U)
> +             && direct_internal_fn_supported_p (fn, TREE_TYPE (type),
> +                                                OPTIMIZE_FOR_BOTH)));
>  }
>
>  /* Gimplification of expression trees.  */
> @@ -939,7 +945,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p 
> ATTRIBUTE_UNUSED,
>         /* For -ffp-contract=on we need to attempt FMA contraction only
>            during initial gimplification.  Late contraction across statement
>            boundaries would violate language semantics.  */
> -       if (SCALAR_FLOAT_TYPE_P (type)
> +       if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))
>             && flag_fp_contract_mode == FP_CONTRACT_ON
>             && cfun && !(cfun->curr_properties & PROP_gimple_any)
>             && fma_supported_p (IFN_FMA, type))
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index 3c68361870..954b84edce 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -1983,6 +1983,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
>    gsi_replace (gsi, g, false);
>  }
>
> +/* Expand IFN_FMA, assuming vector contains just one scalar.
> +   c_gimplify_expr can introduce it when performing FMA contraction.  */
> +
> +static void
> +expand_vec1_fma (gimple_stmt_iterator *gsi)
> +{
> +  gcall *call = as_a <gcall *> (gsi_stmt (*gsi));
> +  tree type = TREE_TYPE (gimple_call_arg (call, 0));
> +  if (!VECTOR_TYPE_P (type))
> +    return;
> +  gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), 1U));
> +
> +  for (int i = 0; i < 3; i++)
> +    {
> +      tree arg = gimple_call_arg (call, i);
> +      arg = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (type), arg);
> +      gimple_call_set_arg (call, i, arg);
> +    }
> +  tree lhs = gimple_call_lhs (call);
> +  if (lhs)
> +    {
> +      tree new_lhs = make_ssa_name (TREE_TYPE (type));
> +      gimple_call_set_lhs (call, new_lhs);
> +      tree ctor = build_constructor_single (type, 0, new_lhs);
> +      gimple *g = gimple_build_assign (lhs, CONSTRUCTOR, ctor);
> +      gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +    }
> +  update_stmt (call);
> +}
> +
>  /* Process one statement.  If we identify a vector operation, expand it.  */
>
>  static void
> @@ -1998,8 +2028,22 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
>    gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
>    if (!stmt)
>      {
> -      if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
> -       expand_vector_conversion (gsi);
> +      gcall *call = dyn_cast <gcall *> (gsi_stmt (*gsi));
> +      if (!call || !gimple_call_internal_p (call))
> +       return;
> +      switch (gimple_call_internal_fn (call))
> +       {
> +       case IFN_VEC_CONVERT:
> +         return expand_vector_conversion (gsi);
> +       case IFN_FMA:
> +       case IFN_FMS:
> +       case IFN_FNMA:
> +       case IFN_FNMS:
> +         if (!direct_internal_fn_supported_p (call, OPTIMIZE_FOR_BOTH))
> +           return expand_vec1_fma (gsi);
> +       default:
> +         break;
> +       }
>        return;
>      }
>
> --
> 2.49.0
>

Reply via email to