For floating point, !flag_trapping_math is needed for the pattern which transforms 2 conversions to 1 conversion, and may lose 1 potential trap. There shouldn't be any accuracy issue.
It also handles real_cst if it can be represented in different floating point types without loss of precision. Bootstrapp and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR tree-optimization/103771 * match.pd (cond_expr_convert_p): Extend the match to handle scalar floating point type. * tree-vect-patterns.cc (vect_recog_cond_expr_convert_pattern): Handle floating point type. gcc/testsuite/ChangeLog: * gcc.target/i386/pr103771-4.c: New test. --- gcc/match.pd | 55 ++++++++- gcc/testsuite/gcc.target/i386/pr103771-4.c | 132 +++++++++++++++++++++ gcc/tree-vect-patterns.cc | 41 +++++-- 3 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr103771-4.c diff --git a/gcc/match.pd b/gcc/match.pd index ba036e52837..6f49f66aed2 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -11290,15 +11290,18 @@ and, (match (cond_expr_convert_p @0 @2 @3 @6) (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3)) - (if (INTEGRAL_TYPE_P (type) - && INTEGRAL_TYPE_P (TREE_TYPE (@2)) - && INTEGRAL_TYPE_P (TREE_TYPE (@0)) - && INTEGRAL_TYPE_P (TREE_TYPE (@3)) + (if ((INTEGRAL_TYPE_P (type) + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) + && ((INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && INTEGRAL_TYPE_P (TREE_TYPE (@3))) + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && TREE_TYPE (@2) == TREE_TYPE (@3))) && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@2)) && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (TREE_TYPE (@3)) + && tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@3)) /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in signess when convert is truncation, but not ok for extension since it's sign_extend vs zero_extend. */ @@ -11308,6 +11311,50 @@ and, && single_use (@4) && single_use (@5)))) +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3)) + (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@2)) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && TREE_TYPE (@2) == TREE_TYPE (@3) + && single_use (@4) + && single_use (@5)))) + +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3)) + (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@2)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && TREE_TYPE (@2) == TREE_TYPE (@3) + && single_use (@4) + && single_use (@5)))) + +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (REAL_CST@2) (convert@5 @3)) + (if ((INTEGRAL_TYPE_P (type) + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@3)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@3)) + && single_use (@5) + && const_unop (CONVERT_EXPR, TREE_TYPE (@3), @2)))) + +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (convert@4 @2) (REAL_CST@3)) + (if ((INTEGRAL_TYPE_P (type) + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) + && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && TYPE_PRECISION (TREE_TYPE (@0)) + == TYPE_PRECISION (TREE_TYPE (@2)) + && single_use (@4) + && const_unop (CONVERT_EXPR, TREE_TYPE (@2), @3)))) + (for bit_op (bit_and bit_ior bit_xor) (match (bitwise_induction_p @0 @2 @3) (bit_op:c diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c b/gcc/testsuite/gcc.target/i386/pr103771-4.c new file mode 100644 index 00000000000..89a3c26183a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c @@ -0,0 +1,132 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */ +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 10 "vect" { target { ! ia32 } } } } */ + +void +foo (float* a, float* b, float* c, float* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + +void +foo1 (int* a, int* b, float* c, float* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + + +void +foo2 (double* a, double* b, double* c, double* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + +void +foo3 (long long* a, long long* b, double* c, double* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i] + d[i]; + if (a[i] < b[i]) + tmp = 0.0; + e[i] = tmp; + } +} + + +void +foo4 (float* a, float* b, int* c, int* d, long long* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + long long tmp = c[i]; + long long tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo5 (double* a, double* b, long long* c, long long* d, int* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + int tmp = (int)c[i]; + int tmp2 = (int)d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo6 (float* a, float* b, int* c, int* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + double tmp = c[i]; + double tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo7 (double* a, double* b, long long* c, long long* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i]; + float tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo8 (int* a, int* b, int* c, int* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + double tmp = c[i]; + double tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo9 (long long* a, long long* b, long long* c, long long* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i]; + float tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 4f0a7ea162b..4da5a91e1a4 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1095,9 +1095,10 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, tree *type_out) { gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); - tree lhs, match[4], temp, type, new_lhs, op2; + tree lhs, match[4], temp, type, new_lhs, op2, op1; gimple *cond_stmt; gimple *pattern_stmt; + enum tree_code code = NOP_EXPR; if (!last_stmt) return NULL; @@ -1105,29 +1106,49 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, lhs = gimple_assign_lhs (last_stmt); /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B; - TYPE_PRECISION (A) == TYPE_PRECISION (C). */ +/ TYPE_PRECISION (A) == TYPE_PRECISION (C). */ if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL)) return NULL; vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt); + if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))) + code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR; + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1]))) + code = FIX_TRUNC_EXPR; + + op1 = match[1]; op2 = match[2]; - type = TREE_TYPE (match[1]); - if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) + type = TREE_TYPE (op1); + /* When op1/op2 is REAL_CST, the conversion must be CONVERT_EXPR from + SCALAR_FLOAT_TYPE_P which is restricted in gimple_cond_expr_convert_p. + Otherwise, the conversion could be FLOAT_EXPR, FIX_TRUNC_EXPR + or CONVERT_EXPR. */ + if (TREE_CODE (op1) == REAL_CST) + { + op1 = const_unop (CONVERT_EXPR, TREE_TYPE (op2), op1); + type = TREE_TYPE (op2); + } + else if (TREE_CODE (op2) == REAL_CST) + op2 = const_unop (FLOAT_EXPR, TREE_TYPE (op1), op2); + else if (code == NOP_EXPR) { - op2 = vect_recog_temp_ssa_var (type, NULL); - gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); - append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, - get_vectype_for_scalar_type (vinfo, type)); + if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) + { + op2 = vect_recog_temp_ssa_var (type, NULL); + gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]); + append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt, + get_vectype_for_scalar_type (vinfo, type)); + } } temp = vect_recog_temp_ssa_var (type, NULL); cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3], - match[1], op2)); + op1, op2)); append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, get_vectype_for_scalar_type (vinfo, type)); new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); - pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp); + pattern_stmt = gimple_build_assign (new_lhs, code, temp); *type_out = STMT_VINFO_VECTYPE (stmt_vinfo); if (dump_enabled_p ()) -- 2.34.1