On Wed, May 14, 2025 at 7:28 AM liuhongt <[email protected]> wrote:
>
> So it won't do the unsafe truncation for double(1.000000000000001) to
> float(1.0)
> since there's precision loss.
> It's guarded by testcase pr103771-6.c
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> REAL_CST is handled if it can be represented in different floating
> point types without loss of precision or under fast math.
>
> gcc/ChangeLog:
>
> PR tree-optimization/103771
> * match.pd (cond_expr_convert_p): Extend the match to handle
> REAL_CST.
> * tree-vect-patterns.cc
> (vect_recog_cond_expr_convert_pattern): Handle REAL_CST.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr103771-5.c: New test.
> * gcc.target/i386/pr103771-6.c: New test.
> ---
> gcc/match.pd | 33 +++++++++++++
> gcc/testsuite/gcc.target/i386/pr103771-5.c | 54 ++++++++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr103771-6.c | 16 +++++++
> gcc/tree-vect-patterns.cc | 31 +++++++++----
> 4 files changed, 126 insertions(+), 8 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-5.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-6.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 789e3d33326..0c966675a3f 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -11346,6 +11346,39 @@ and,
> && single_use (@4)
> && single_use (@5))))
>
> +/* Floating point or integer comparison and floating point conversion
> + with REAL_CST. */
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3))
> + (if (!flag_trapping_math
> + && SCALAR_FLOAT_TYPE_P (type)
> + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3))
> + && !operand_equal_p (TYPE_SIZE (type),
> + TYPE_SIZE (TREE_TYPE (@0)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@3)))
> + && single_use (@5)
> + && (flag_unsafe_math_optimizations
> + || exact_real_truncate (TYPE_MODE (TREE_TYPE (@3)),
> + &TREE_REAL_CST (@2)))
So this looks good now. I don't like the const_unop check, can you
instead fail the pattern in the consumer when the const_unop you
repeat there returns NULL?
OK with that change.
Richard.
> + && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2))))
> +
> +/* Floating point or integer comparison and floating point conversion
> + with REAL_CST. */
> +(match (cond_expr_convert_p @0 @2 @3 @6)
> + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (REAL_CST@3))
> + (if (!flag_trapping_math
> + && SCALAR_FLOAT_TYPE_P (type)
> + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
> + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
> + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
> + TYPE_SIZE (TREE_TYPE (@2)))
> + && single_use (@4)
> + && (flag_unsafe_math_optimizations
> + || exact_real_truncate (TYPE_MODE (TREE_TYPE (@2)),
> + &TREE_REAL_CST (@3)))
> + && const_unop (CONVERT_EXPR, TREE_TYPE (@2), @3))))
> +
> (for bit_op (bit_and bit_ior bit_xor)
> (match (bitwise_induction_p @0 @2 @3)
> (bit_op:c
> diff --git a/gcc/testsuite/gcc.target/i386/pr103771-5.c
> b/gcc/testsuite/gcc.target/i386/pr103771-5.c
> new file mode 100644
> index 00000000000..bf94f53b88c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103771-5.c
> @@ -0,0 +1,54 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v4 -O3 -fno-trapping-math
> -fdump-tree-vect-details" } */
> +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */
> +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors"
> 4 "vect" { target { ! ia32 } } } } */
> +
> +void
> +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i] + d[i];
> + if (a[i] < b[i])
> + tmp = 0.0;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo1 (int* a, int* b, float* c, float* d, double* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i] + d[i];
> + if (a[i] < b[i])
> + tmp = 0.0;
> + e[i] = tmp;
> + }
> +}
> +
> +
> +void
> +foo2 (double* a, double* b, double* c, double* d, float* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i] + d[i];
> + if (a[i] < b[i])
> + tmp = 0.0;
> + e[i] = tmp;
> + }
> +}
> +
> +void
> +foo3 (long long* a, long long* b, double* c, double* d, float* __restrict e,
> int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + float tmp = c[i] + d[i];
> + if (a[i] < b[i])
> + tmp = 0.0;
> + e[i] = tmp;
> + }
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/i386/pr103771-6.c
> b/gcc/testsuite/gcc.target/i386/pr103771-6.c
> new file mode 100644
> index 00000000000..92de6f6249d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr103771-6.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64-v4 -O3 -fno-trapping-math
> -fdump-tree-vect-details" } */
> +/* { dg-final { scan-tree-dump-not "vect_recog_cond_expr_convert_pattern"
> "vect" } } */
> +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors"
> 1 "vect" { target { ! ia32 } } } } */
> +
> +void
> +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n)
> +{
> + for (int i = 0 ; i != n; i++)
> + {
> + double tmp = c[i] + d[i];
> + if (a[i] < b[i])
> + tmp = 1.000000000000001;
> + e[i] = tmp;
> + }
> +}
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index d8484766cf7..00b699f8144 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -1095,7 +1095,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
> stmt_vec_info stmt_vinfo, tree
> *type_out)
> {
> gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
> - tree lhs, match[4], temp, type, new_lhs, op2;
> + tree lhs, match[4], temp, type, new_lhs, op2, op1;
> gimple *cond_stmt;
> gimple *pattern_stmt;
> enum tree_code code = NOP_EXPR;
> @@ -1117,19 +1117,34 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
> else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
> code = FIX_TRUNC_EXPR;
>
> + op1 = match[1];
> op2 = match[2];
> - type = TREE_TYPE (match[1]);
> - if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
> + type = TREE_TYPE (op1);
> + /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from
> + SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p.
> + Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR
> + or CONVERT_EXPR. */
> + if (TREE_CODE (op1) == REAL_CST)
> {
> - op2 = vect_recog_temp_ssa_var (type, NULL);
> - gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
> - append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
> - get_vectype_for_scalar_type (vinfo, type));
> + op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1);
> + type = TREE_TYPE (op2);
> + }
> + else if (TREE_CODE (op2) == REAL_CST)
> + op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2);
> + else if (code == NOP_EXPR)
> + {
> + if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
> + {
> + op2 = vect_recog_temp_ssa_var (type, NULL);
> + gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
> + append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
> + get_vectype_for_scalar_type (vinfo, type));
> + }
> }
>
> temp = vect_recog_temp_ssa_var (type, NULL);
> cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
> - match[1], op2));
> + op1, op2));
> append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
> get_vectype_for_scalar_type (vinfo, type));
> new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> --
> 2.34.1
>