On Mon, Apr 28, 2025 at 3:30 PM liuhongt <hongtao....@intel.com> wrote:
>
> For floating point, !flag_trapping_math is needed for the pattern which
> transforms 2 conversions to 1 conversion, and may lose 1 potential trap.
> There shouldn't be any accuracy issue.
>
> It also handles real_cst if it can be represented in different floating point
> types without loss of precision.
>
> Bootstrapp and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
>         PR tree-optimization/103771
>         * match.pd (cond_expr_convert_p): Extend the match to handle
>         scalar floating point type.
>         * tree-vect-patterns.cc
>         (vect_recog_cond_expr_convert_pattern): Handle floating point
>         type.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/pr103771-4.c: New test.
> ---
>  gcc/match.pd                               |  55 ++++++++-
>  gcc/testsuite/gcc.target/i386/pr103771-4.c | 132 +++++++++++++++++++++
>  gcc/tree-vect-patterns.cc                  |  41 +++++--
>  3 files changed, 214 insertions(+), 14 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-4.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ba036e52837..6f49f66aed2 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -11290,15 +11290,18 @@ and,
>
>  (match (cond_expr_convert_p @0 @2 @3 @6)
>   (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3))
> -  (if (INTEGRAL_TYPE_P (type)
> -       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> -       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
> -       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
> +  (if ((INTEGRAL_TYPE_P (type)
> +       || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
> +       && ((INTEGRAL_TYPE_P (TREE_TYPE (@2))
> +           && INTEGRAL_TYPE_P (TREE_TYPE (@3)))
> +           || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> +              && TREE_TYPE (@2) == TREE_TYPE (@3)))
>         && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
>         && TYPE_PRECISION (TREE_TYPE (@0))
>           == TYPE_PRECISION (TREE_TYPE (@2))
>         && TYPE_PRECISION (TREE_TYPE (@0))
>           == TYPE_PRECISION (TREE_TYPE (@3))
> +       && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@3))
>         /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in
>           signess when convert is truncation, but not ok for extension since
>           it's sign_extend vs zero_extend.  */

Can you instead of mangling in float support use separate (match like
for the below cases?

> @@ -11308,6 +11311,50 @@ and,
>         && single_use (@4)
>         && single_use (@5))))
>
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3))
> +  (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math
> +       && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))

so this fails to constrain the comparison types (above we check
INTEGRAL_TYPE_P),
if it happens to be a vector type using TYPE_PRECISION will ICE.

I think the main intent of the vectorizer pattern is to match up the
_size_ of the
vector elements, so maybe re-formulate the constraint this way with
operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))

This is also because precision on floats is not equal to the number of bits in
the mode.

> +       && TYPE_PRECISION (TREE_TYPE (@0))
> +         == TYPE_PRECISION (TREE_TYPE (@2))
> +       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
> +       && TREE_TYPE (@2) == TREE_TYPE (@3)
> +       && single_use (@4)
> +       && single_use (@5))))
> +
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3))
> +  (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math
> +       && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@0))
> +         == TYPE_PRECISION (TREE_TYPE (@2))
> +       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> +       && TREE_TYPE (@2) == TREE_TYPE (@3)

Please use types_match () instead of TREE_TYPE pointer compares.

> +       && single_use (@4)
> +       && single_use (@5))))
> +
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3))

I think the same issue exists for INTEGER_CSTs.

> +  (if ((INTEGRAL_TYPE_P (type)
> +       || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
> +       && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> +       && TYPE_PRECISION (TREE_TYPE (@0))
> +         == TYPE_PRECISION (TREE_TYPE (@3))
> +       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3))
> +       && single_use (@5)
> +       && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2))))

I'm not sure this is a good check?  Say, for type == double and
typeof(@3) == float
the REAL_CST can have extra precision that you'd drop when rewriting this as
(double)(cnd ? (float)@2 : @3).  You'd need to check the REAL_CST is exactly
representable in the type of @3.  Same for a possible integer case.  Same for
handling fix_trunc/float when one case is a constant.

Can you split the patch into two to separate out the handling of constants?

Thanks, and sorry for the delay.
Richard.

> +
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (REAL_CST@3))
> +  (if ((INTEGRAL_TYPE_P (type)
> +       || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
> +       && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
> +       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> +       && TYPE_PRECISION (TREE_TYPE (@0))
> +         == TYPE_PRECISION (TREE_TYPE (@2))
> +       && single_use (@4)
> +       && const_unop (CONVERT_EXPR, TREE_TYPE (@2), @3))))
> +
>  (for bit_op (bit_and bit_ior bit_xor)
>   (match (bitwise_induction_p @0 @2 @3)
>    (bit_op:c
> diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c 
> b/gcc/testsuite/gcc.target/i386/pr103771-4.c
> new file mode 100644
> index 00000000000..89a3c26183a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c
> @@ -0,0 +1,132 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */
> +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */
> +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 
> 10 "vect" { target { ! ia32 } } } } */
> +
> +void
> +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i] + d[i];
> +      if (a[i] < b[i])
> +       tmp = 0.0;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo1 (int* a, int* b, float* c, float* d, double* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i] + d[i];
> +      if (a[i] < b[i])
> +       tmp = 0.0;
> +      e[i] = tmp;
> +    }
> +}
> +
> +
> +void
> +foo2 (double* a, double* b, double* c, double* d, float* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i] + d[i];
> +      if (a[i] < b[i])
> +       tmp = 0.0;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo3 (long long* a, long long* b, double* c, double* d, float* __restrict e, 
> int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i] + d[i];
> +      if (a[i] < b[i])
> +       tmp = 0.0;
> +      e[i] = tmp;
> +    }
> +}
> +
> +
> +void
> +foo4 (float* a, float* b, int* c, int* d, long long* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      long long tmp = c[i];
> +      long long tmp2 = d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo5 (double* a, double* b, long long* c, long long* d, int* __restrict e, 
> int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      int tmp = (int)c[i];
> +      int tmp2 = (int)d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo6 (float* a, float* b, int* c, int* d, double* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      double tmp = c[i];
> +      double tmp2 = d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo7 (double* a, double* b, long long* c, long long* d, float* __restrict e, 
> int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i];
> +      float tmp2 = d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo8 (int* a, int* b, int* c, int* d, double* __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      double tmp = c[i];
> +      double tmp2 = d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> +
> +void
> +foo9 (long long* a, long long* b, long long* c, long long* d, float* 
> __restrict e, int n)
> +{
> +  for (int i = 0 ; i != n; i++)
> +    {
> +      float tmp = c[i];
> +      float tmp2 = d[i];
> +      if (a[i] < b[i])
> +       tmp = tmp2;
> +      e[i] = tmp;
> +    }
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 4f0a7ea162b..4da5a91e1a4 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1095,9 +1095,10 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
>                                       stmt_vec_info stmt_vinfo, tree 
> *type_out)
>  {
>    gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
> -  tree lhs, match[4], temp, type, new_lhs, op2;
> +  tree lhs, match[4], temp, type, new_lhs, op2, op1;
>    gimple *cond_stmt;
>    gimple *pattern_stmt;
> +  enum tree_code code = NOP_EXPR;
>
>    if (!last_stmt)
>      return NULL;
> @@ -1105,29 +1106,49 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
>    lhs = gimple_assign_lhs (last_stmt);
>
>    /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
> -     TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
> +/     TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
>    if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
>      return NULL;
>
>    vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
>
> +  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
> +    code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : 
> CONVERT_EXPR;
> +  else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
> +    code = FIX_TRUNC_EXPR;
> +
> +  op1 = match[1];
>    op2 = match[2];
> -  type = TREE_TYPE (match[1]);
> -  if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
> +  type = TREE_TYPE (op1);
> +  /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
> +     SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
> +     Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
> +     or CONVERT_EXPR.  */
> +  if (TREE_CODE (op1) == REAL_CST)
> +    {
> +      op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
> +      type = TREE_TYPE (op2);
> +    }
> +  else if (TREE_CODE (op2) == REAL_CST)
> +    op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
> +  else if (code == NOP_EXPR)
>      {
> -      op2 = vect_recog_temp_ssa_var (type, NULL);
> -      gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
> -      append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
> -                             get_vectype_for_scalar_type (vinfo, type));
> +      if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
> +       {
> +         op2 = vect_recog_temp_ssa_var (type, NULL);
> +         gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
> +         append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
> +                                 get_vectype_for_scalar_type (vinfo, type));
> +       }
>      }
>
>    temp = vect_recog_temp_ssa_var (type, NULL);
>    cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
> -                                                match[1], op2));
> +                                                op1, op2));
>    append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
>                           get_vectype_for_scalar_type (vinfo, type));
>    new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> -  pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
> +  pattern_stmt = gimple_build_assign (new_lhs, code, temp);
>    *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
>
>    if (dump_enabled_p ())
> --
> 2.34.1
>

Reply via email to