On Mon, Apr 28, 2025 at 3:30 PM liuhongt <hongtao....@intel.com> wrote: > > For floating point, !flag_trapping_math is needed for the pattern which > transforms 2 conversions to 1 conversion, and may lose 1 potential trap. > There shouldn't be any accuracy issue. > > It also handles real_cst if it can be represented in different floating point > types without loss of precision. > > Bootstrapp and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > > PR tree-optimization/103771 > * match.pd (cond_expr_convert_p): Extend the match to handle > scalar floating point type. > * tree-vect-patterns.cc > (vect_recog_cond_expr_convert_pattern): Handle floating point > type. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr103771-4.c: New test. > --- > gcc/match.pd | 55 ++++++++- > gcc/testsuite/gcc.target/i386/pr103771-4.c | 132 +++++++++++++++++++++ > gcc/tree-vect-patterns.cc | 41 +++++-- > 3 files changed, 214 insertions(+), 14 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-4.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index ba036e52837..6f49f66aed2 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -11290,15 +11290,18 @@ and, > > (match (cond_expr_convert_p @0 @2 @3 @6) > (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3)) > - (if (INTEGRAL_TYPE_P (type) > - && INTEGRAL_TYPE_P (TREE_TYPE (@2)) > - && INTEGRAL_TYPE_P (TREE_TYPE (@0)) > - && INTEGRAL_TYPE_P (TREE_TYPE (@3)) > + (if ((INTEGRAL_TYPE_P (type) > + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) > + && ((INTEGRAL_TYPE_P (TREE_TYPE (@2)) > + && INTEGRAL_TYPE_P (TREE_TYPE (@3))) > + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) > + && TREE_TYPE (@2) == TREE_TYPE (@3))) > && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) > && TYPE_PRECISION (TREE_TYPE (@0)) > == TYPE_PRECISION (TREE_TYPE (@2)) > && TYPE_PRECISION (TREE_TYPE (@0)) > == TYPE_PRECISION (TREE_TYPE (@3)) > + && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@3)) > /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in > signess when convert is truncation, but not ok for extension since > it's sign_extend vs zero_extend. */
Can you instead of mangling in float support use separate (match like for the below cases? > @@ -11308,6 +11311,50 @@ and, > && single_use (@4) > && single_use (@5)))) > > +(match (cond_expr_convert_p @0 @2 @3 @6) > + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3)) > + (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) so this fails to constrain the comparison types (above we check INTEGRAL_TYPE_P), if it happens to be a vector type using TYPE_PRECISION will ICE. I think the main intent of the vectorizer pattern is to match up the _size_ of the vector elements, so maybe re-formulate the constraint this way with operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))) This is also because precision on floats is not equal to the number of bits in the mode. > + && TYPE_PRECISION (TREE_TYPE (@0)) > + == TYPE_PRECISION (TREE_TYPE (@2)) > + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) > + && TREE_TYPE (@2) == TREE_TYPE (@3) > + && single_use (@4) > + && single_use (@5)))) > + > +(match (cond_expr_convert_p @0 @2 @3 @6) > + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3)) > + (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) > + && TYPE_PRECISION (TREE_TYPE (@0)) > + == TYPE_PRECISION (TREE_TYPE (@2)) > + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) > + && TREE_TYPE (@2) == TREE_TYPE (@3) Please use types_match () instead of TREE_TYPE pointer compares. > + && single_use (@4) > + && single_use (@5)))) > + > +(match (cond_expr_convert_p @0 @2 @3 @6) > + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3)) I think the same issue exists for INTEGER_CSTs. > + (if ((INTEGRAL_TYPE_P (type) > + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) > + && TYPE_PRECISION (TREE_TYPE (@0)) > + == TYPE_PRECISION (TREE_TYPE (@3)) > + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3)) > + && single_use (@5) > + && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2)))) I'm not sure this is a good check? Say, for type == double and typeof(@3) == float the REAL_CST can have extra precision that you'd drop when rewriting this as (double)(cnd ? (float)@2 : @3). You'd need to check the REAL_CST is exactly representable in the type of @3. Same for a possible integer case. Same for handling fix_trunc/float when one case is a constant. Can you split the patch into two to separate out the handling of constants? Thanks, and sorry for the delay. Richard. > + > +(match (cond_expr_convert_p @0 @2 @3 @6) > + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (REAL_CST@3)) > + (if ((INTEGRAL_TYPE_P (type) > + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) > + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) > + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) > + && TYPE_PRECISION (TREE_TYPE (@0)) > + == TYPE_PRECISION (TREE_TYPE (@2)) > + && single_use (@4) > + && const_unop (CONVERT_EXPR, TREE_TYPE (@2), @3)))) > + > (for bit_op (bit_and bit_ior bit_xor) > (match (bitwise_induction_p @0 @2 @3) > (bit_op:c > diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c > b/gcc/testsuite/gcc.target/i386/pr103771-4.c > new file mode 100644 > index 00000000000..89a3c26183a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c > @@ -0,0 +1,132 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */ > +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */ > +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" > 10 "vect" { target { ! ia32 } } } } */ > + > +void > +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i] + d[i]; > + if (a[i] < b[i]) > + tmp = 0.0; > + e[i] = tmp; > + } > +} > + > +void > +foo1 (int* a, int* b, float* c, float* d, double* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i] + d[i]; > + if (a[i] < b[i]) > + tmp = 0.0; > + e[i] = tmp; > + } > +} > + > + > +void > +foo2 (double* a, double* b, double* c, double* d, float* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i] + d[i]; > + if (a[i] < b[i]) > + tmp = 0.0; > + e[i] = tmp; > + } > +} > + > +void > +foo3 (long long* a, long long* b, double* c, double* d, float* __restrict e, > int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i] + d[i]; > + if (a[i] < b[i]) > + tmp = 0.0; > + e[i] = tmp; > + } > +} > + > + > +void > +foo4 (float* a, float* b, int* c, int* d, long long* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + long long tmp = c[i]; > + long long tmp2 = d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > + > +void > +foo5 (double* a, double* b, long long* c, long long* d, int* __restrict e, > int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + int tmp = (int)c[i]; > + int tmp2 = (int)d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > + > +void > +foo6 (float* a, float* b, int* c, int* d, double* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + double tmp = c[i]; > + double tmp2 = d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > + > +void > +foo7 (double* a, double* b, long long* c, long long* d, float* __restrict e, > int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i]; > + float tmp2 = d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > + > +void > +foo8 (int* a, int* b, int* c, int* d, double* __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + double tmp = c[i]; > + double tmp2 = d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > + > +void > +foo9 (long long* a, long long* b, long long* c, long long* d, float* > __restrict e, int n) > +{ > + for (int i = 0 ; i != n; i++) > + { > + float tmp = c[i]; > + float tmp2 = d[i]; > + if (a[i] < b[i]) > + tmp = tmp2; > + e[i] = tmp; > + } > +} > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index 4f0a7ea162b..4da5a91e1a4 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -1095,9 +1095,10 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, > stmt_vec_info stmt_vinfo, tree > *type_out) > { > gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); > - tree lhs, match[4], temp, type, new_lhs, op2; > + tree lhs, match[4], temp, type, new_lhs, op2, op1; > gimple *cond_stmt; > gimple *pattern_stmt; > + enum tree_code code = NOP_EXPR; > > if (!last_stmt) > return NULL; > @@ -1105,29 +1106,49 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, > lhs = gimple_assign_lhs (last_stmt); > > /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B; > - TYPE_PRECISION (A) == TYPE_PRECISION (C). */ > +/ TYPE_PRECISION (A) == TYPE_PRECISION (C). */ > if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL)) > return NULL; > > vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt); > > + if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))) > + code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : > CONVERT_EXPR; > + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1]))) > + code = FIX_TRUNC_EXPR; > + > + op1 = match[1]; > op2 = match[2]; > - type = TREE_TYPE (match[1]); > - if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) > + type = TREE_TYPE (op1); > + /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from > + SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p. > + Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR > + or CONVERT_EXPR. */ > + if (TREE_CODE (op1) == REAL_CST) > + { > + op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1); > + type = TREE_TYPE (op2); > + } > + else if (TREE_CODE (op2) == REAL_CST) > + op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2); > + else if (code == NOP_EXPR) > { > - op2 = vect_recog_temp_ssa_var (type, NULL); > - gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); > - append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, > - get_vectype_for_scalar_type (vinfo, type)); > + if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) > + { > + op2 = vect_recog_temp_ssa_var (type, NULL); > + gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); > + append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, > + get_vectype_for_scalar_type (vinfo, type)); > + } > } > > temp = vect_recog_temp_ssa_var (type, NULL); > cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3], > - match[1], op2)); > + op1, op2)); > append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, > get_vectype_for_scalar_type (vinfo, type)); > new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > - pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp); > + pattern_stmt = gimple_build_assign (new_lhs, code, temp); > *type_out = STMT_VINFO_VECTYPE (stmt_vinfo); > > if (dump_enabled_p ()) > -- > 2.34.1 >