https://gcc.gnu.org/g:e3385fed3fc61ac505fb5c1ffa09758a6f2489eb

commit r16-605-ge3385fed3fc61ac505fb5c1ffa09758a6f2489eb
Author: liuhongt <hongtao....@intel.com>
Date:   Mon Apr 7 20:12:00 2025 -0700

    Extend vect_recog_cond_expr_convert_pattern to handle floating point type.
    
    For floating point, !flag_trapping_math is needed for the pattern
    which transforms 2 conversions to 1 conversion, and may lose 1
    potential trap. There shouldn't be any accuracy issue.
    
    gcc/ChangeLog:
    
            PR tree-optimization/103771
            * match.pd (cond_expr_convert_p): Extend the match to handle
            scalar floating point type.
            * tree-vect-patterns.cc
            (vect_recog_cond_expr_convert_pattern): Handle floating point
            type.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr103771-4.c: New test.

Diff:
---
 gcc/match.pd                               | 52 +++++++++++++++----
 gcc/testsuite/gcc.target/i386/pr103771-4.c | 82 ++++++++++++++++++++++++++++++
 gcc/tree-vect-patterns.cc                  |  8 ++-
 3 files changed, 131 insertions(+), 11 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f4050687647b..96136404f5e4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -11308,26 +11308,58 @@ and,
 (match (ctz_table_index @1 @2 @3)
   (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
 
+/* Floatint point/integer comparison and integer->integer
+   or floating point -> float point conversion.  */
 (match (cond_expr_convert_p @0 @2 @3 @6)
  (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3))
-  (if (INTEGRAL_TYPE_P (type)
-       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
-       && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-       && INTEGRAL_TYPE_P (TREE_TYPE (@3))
-       && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0))
-       && TYPE_PRECISION (TREE_TYPE (@0))
-         == TYPE_PRECISION (TREE_TYPE (@2))
-       && TYPE_PRECISION (TREE_TYPE (@0))
-         == TYPE_PRECISION (TREE_TYPE (@3))
+  (if ((INTEGRAL_TYPE_P (type)
+       || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type)))
+       && ((INTEGRAL_TYPE_P (TREE_TYPE (@2))
+           && INTEGRAL_TYPE_P (TREE_TYPE (@3)))
+          || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
+              && types_match (TREE_TYPE (@2), TREE_TYPE (@3))))
+       && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
+       && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
+                          TYPE_SIZE (TREE_TYPE (@2)))
+       && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
+                          TYPE_SIZE (TREE_TYPE (@3)))
        /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in
          signess when convert is truncation, but not ok for extension since
          it's sign_extend vs zero_extend.  */
-       && (TYPE_PRECISION (TREE_TYPE (@0)) > TYPE_PRECISION (type)
+       && (known_gt (tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (@0))),
+                    tree_to_poly_uint64 (TYPE_SIZE (type)))
           || (TYPE_UNSIGNED (TREE_TYPE (@2))
               == TYPE_UNSIGNED (TREE_TYPE (@3))))
        && single_use (@4)
        && single_use (@5))))
 
+/* Floating point or integer comparison and integer to floating point
+   conversion.  */
+(match (cond_expr_convert_p @0 @2 @3 @6)
+ (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3))
+  (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math
+       && INTEGRAL_TYPE_P (TREE_TYPE (@2))
+       && types_match (TREE_TYPE (@2), TREE_TYPE (@3))
+       && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0)))
+       && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
+                          TYPE_SIZE (TREE_TYPE (@2)))
+       && single_use (@4)
+       && single_use (@5))))
+
+/* Floating point or integer comparison and floating point to integer
+   conversion.  */
+(match (cond_expr_convert_p @0 @2 @3 @6)
+ (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3))
+  (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math
+       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2))
+       && types_match (TREE_TYPE (@2), TREE_TYPE (@3))
+       && !operand_equal_p (TYPE_SIZE (type),
+                           TYPE_SIZE (TREE_TYPE (@0)))
+       && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)),
+                          TYPE_SIZE (TREE_TYPE (@2)))
+       && single_use (@4)
+       && single_use (@5))))
+
 (for bit_op (bit_and bit_ior bit_xor)
  (match (bitwise_induction_p @0 @2 @3)
   (bit_op:c
diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c 
b/gcc/testsuite/gcc.target/i386/pr103771-4.c
new file mode 100644
index 000000000000..299337dce6db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c
@@ -0,0 +1,82 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */
+/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */
+/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 6 
"vect" { target { ! ia32 } } } } */
+
+void
+foo (float* a, float* b, int* c, int* d, long long* __restrict e, int n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      long long tmp = c[i];
+      long long tmp2 = d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
+
+void
+foo1 (double* a, double* b, long long* c, long long* d, int* __restrict e, int 
n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      int tmp = (int)c[i];
+      int tmp2 = (int)d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
+
+void
+foo2 (float* a, float* b, int* c, int* d, double* __restrict e, int n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      double tmp = c[i];
+      double tmp2 = d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
+
+void
+foo3 (double* a, double* b, long long* c, long long* d, float* __restrict e, 
int n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      float tmp = c[i];
+      float tmp2 = d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
+
+void
+foo4 (int* a, int* b, int* c, int* d, double* __restrict e, int n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      double tmp = c[i];
+      double tmp2 = d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
+
+void
+foo5 (long long* a, long long* b, long long* c, long long* d, float* 
__restrict e, int n)
+{
+  for (int i = 0 ; i != n; i++)
+    {
+      float tmp = c[i];
+      float tmp2 = d[i];
+      if (a[i] < b[i])
+       tmp = tmp2;
+      e[i] = tmp;
+    }
+}
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index ca19add83c05..d8484766cf76 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -1098,6 +1098,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
   tree lhs, match[4], temp, type, new_lhs, op2;
   gimple *cond_stmt;
   gimple *pattern_stmt;
+  enum tree_code code = NOP_EXPR;
 
   if (!last_stmt)
     return NULL;
@@ -1111,6 +1112,11 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
 
   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
 
+  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)))
+    code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR;
+  else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1])))
+    code = FIX_TRUNC_EXPR;
+
   op2 = match[2];
   type = TREE_TYPE (match[1]);
   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
@@ -1127,7 +1133,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
                          get_vectype_for_scalar_type (vinfo, type));
   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
-  pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
+  pattern_stmt = gimple_build_assign (new_lhs, code, temp);
   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
 
   if (dump_enabled_p ())

Reply via email to