The following testcase

void f (float *__restrict c, int *__restrict d, int n)
{
    for (int i = 0; i < n; i++)
    {
      if (d[i] > 1000)
        c[i] = __builtin_sqrtf (c[i]);
    }
}

compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs to be
predicated on the conditional.  It's invalid to execute the branch and use a
select to extract it later unless using -fno-trapping-math.

This change in if-conversion changes what we used to generate:

  _26 = _4 > 1000;
  _34 = _33 + _2;
  _5 = (float *) _34;
  _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
  _7 = __builtin_sqrtf (_6);
  .MASK_STORE (_5, 32B, _26, _7);

into

  _26 = _4 > 1000;
  _34 = _33 + _2;
  _5 = (float *) _34;
  _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
  _7 = .COND_SQRT (_26, _6, _6);
  .MASK_STORE (_5, 32B, _26, _7);

which correctly results in

.L3:
        ld1w    z0.s, p7/z, [x1, x3, lsl 2]
        cmpgt   p7.s, p7/z, z0.s, z31.s
        ld1w    z30.s, p7/z, [x0, x3, lsl 2]
        fsqrt   z30.s, p7/m, z30.s
        st1w    z30.s, p7, [x0, x3, lsl 2]
        incw    x3
        whilelo p7.s, w3, w2
        b.any   .L3

instead of

.L3:
        ld1w    z0.s, p7/z, [x1, x3, lsl 2]
        cmpgt   p7.s, p7/z, z0.s, z31.s
        ld1w    z30.s, p7/z, [x0, x3, lsl 2]
        fsqrt   z30.s, p6/m, z30.s
        st1w    z30.s, p7, [x0, x3, lsl 2]
        incw    x3
        whilelo p7.s, w3, w2
        b.any   .L3

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Any comments?

Thanks,
Tamar

gcc/ChangeLog:

        PR tree-optimization/122103
        * tree-if-conv.cc (ifcvt_can_predicate): Support gimple_call_builtin_p.
        (if_convertible_stmt_p, predicate_rhs_code,
        predicate_statements): Likewise.

---
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 
bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc99343c0b837f88a5b8fa1a
 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
   if (gimple_assign_single_p (stmt))
     return ifcvt_can_use_mask_load_store (stmt);
 
+  if (gimple_call_builtin_p (stmt))
+    if (tree callee = gimple_call_fndecl (stmt))
+      {
+       auto ifn = associated_internal_fn (callee);
+       auto cond_ifn = get_conditional_internal_fn (ifn);
+       tree type = TREE_TYPE (gimple_call_fntype (stmt));
+       return (cond_ifn != IFN_LAST
+               && vectorized_internal_fn_supported_p (cond_ifn, type));
+      }
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
   tree_code code = gimple_assign_rhs_code (stmt);
   tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
   tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
@@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt, 
vec<data_reference_p> refs)
                  }
          }
 
+       /* Check if the call can trap and if so require predication.  */
+       if (gimple_could_trap_p (stmt))
+         {
+           if (ifcvt_can_predicate (stmt))
+             {
+               gimple_set_plf (stmt, GF_PLF_2, true);
+               need_to_predicate = true;
+               return true;
+             }
+           else
+             {
+               if (dump_file && (dump_flags & TDF_DETAILS))
+                 fprintf (dump_file, "stmt could trap...\n");
+               return false;
+             }
+         }
+
        /* There are some IFN_s that are used to replace builtins but have the
           same semantics.  Even if MASK_CALL cannot handle them vectorable_call
           will insert the proper selection, so do not block conversion.  */
@@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt, 
hash_set<tree_ssa_name_hash> *ssa_names,
    SSA names defined earlier in STMT's block.  */
 
 static gimple *
-predicate_rhs_code (gassign *stmt, tree mask, tree cond,
+predicate_rhs_code (gimple *stmt, tree mask, tree cond,
                    hash_set<tree_ssa_name_hash> *ssa_names)
 {
-  tree lhs = gimple_assign_lhs (stmt);
-  tree_code code = gimple_assign_rhs_code (stmt);
-  unsigned int nops = gimple_num_ops (stmt);
-  internal_fn cond_fn = get_conditional_internal_fn (code);
+  internal_fn cond_fn;
+  if (is_gimple_assign (stmt))
+    {
+      tree_code code = gimple_assign_rhs_code (stmt);
+      cond_fn = get_conditional_internal_fn (code);
+    }
+  else if (tree callee = gimple_call_fndecl (stmt))
+    {
+      auto ifn = associated_internal_fn (callee);
+      cond_fn = get_conditional_internal_fn (ifn);
+    }
+  else
+    return NULL;
+
+  if (cond_fn == IFN_LAST)
+    {
+      gcc_assert (!gimple_could_trap_p (stmt));
+      return NULL;
+    }
+
+  tree lhs = gimple_get_lhs (stmt);
+  unsigned int nops = gimple_num_args (stmt) + 1;
 
   /* Construct the arguments to the conditional internal function.   */
   auto_vec<tree, 8> args;
   args.safe_grow (nops + 1, true);
   args[0] = mask;
-  for (unsigned int i = 1; i < nops; ++i)
-    args[i] = gimple_op (stmt, i);
+  for (unsigned int i = 0; i < nops - 1; ++i)
+    args[i+1] = gimple_arg (stmt, i);
   args[nops] = NULL_TREE;
 
   /* Look for uses of the result to see whether they are COND_EXPRs that can
@@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
        {
-         gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
-         if (!stmt)
+         gimple *stmt = gsi_stmt (gsi);
+         if (!is_gimple_assign (stmt)
+             && !gimple_call_builtin_p (stmt))
            ;
          else if (is_false_predicate (cond)
                   && gimple_vdef (stmt))
@@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
              continue;
            }
          else if (gimple_plf (stmt, GF_PLF_2)
-                  && is_gimple_assign (stmt))
+                  && (is_gimple_assign (stmt)
+                      || gimple_call_builtin_p (stmt)))
            {
-             tree lhs = gimple_assign_lhs (stmt);
+             tree lhs = gimple_get_lhs (stmt);
+             /* ?? Assume that calls without an LHS are not data processing
+                and so no issues with traps.  */
+             if (!lhs)
+               continue;
              tree mask;
              gimple *new_stmt;
              gimple_seq stmts = NULL;
@@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
                  vect_masks.safe_push (mask);
                }
              if (gimple_assign_single_p (stmt))
-               new_stmt = predicate_load_or_store (&gsi, stmt, mask);
+               new_stmt = predicate_load_or_store (&gsi,
+                                                   as_a <gassign *> (stmt),
+                                                   mask);
              else
                new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
 
-             gsi_replace (&gsi, new_stmt, true);
+             if (new_stmt)
+               gsi_replace (&gsi, new_stmt, true);
            }
          else if (gimple_needing_rewrite_undefined (stmt))
            rewrite_to_defined_unconditional (&gsi);


-- 
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index bb30c4fb35facf3289a7239af0d39d2d1b8e47c6..4666a06b98425191cc99343c0b837f88a5b8fa1a 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
   if (gimple_assign_single_p (stmt))
     return ifcvt_can_use_mask_load_store (stmt);
 
+  if (gimple_call_builtin_p (stmt))
+    if (tree callee = gimple_call_fndecl (stmt))
+      {
+	auto ifn = associated_internal_fn (callee);
+	auto cond_ifn = get_conditional_internal_fn (ifn);
+	tree type = TREE_TYPE (gimple_call_fntype (stmt));
+	return (cond_ifn != IFN_LAST
+		&& vectorized_internal_fn_supported_p (cond_ifn, type));
+      }
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
   tree_code code = gimple_assign_rhs_code (stmt);
   tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
   tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
@@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
 		  }
 	  }
 
+	/* Check if the call can trap and if so require predication.  */
+	if (gimple_could_trap_p (stmt))
+	  {
+	    if (ifcvt_can_predicate (stmt))
+	      {
+		gimple_set_plf (stmt, GF_PLF_2, true);
+		need_to_predicate = true;
+		return true;
+	      }
+	    else
+	      {
+		if (dump_file && (dump_flags & TDF_DETAILS))
+		  fprintf (dump_file, "stmt could trap...\n");
+		return false;
+	      }
+	  }
+
 	/* There are some IFN_s that are used to replace builtins but have the
 	   same semantics.  Even if MASK_CALL cannot handle them vectorable_call
 	   will insert the proper selection, so do not block conversion.  */
@@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
    SSA names defined earlier in STMT's block.  */
 
 static gimple *
-predicate_rhs_code (gassign *stmt, tree mask, tree cond,
+predicate_rhs_code (gimple *stmt, tree mask, tree cond,
 		    hash_set<tree_ssa_name_hash> *ssa_names)
 {
-  tree lhs = gimple_assign_lhs (stmt);
-  tree_code code = gimple_assign_rhs_code (stmt);
-  unsigned int nops = gimple_num_ops (stmt);
-  internal_fn cond_fn = get_conditional_internal_fn (code);
+  internal_fn cond_fn;
+  if (is_gimple_assign (stmt))
+    {
+      tree_code code = gimple_assign_rhs_code (stmt);
+      cond_fn = get_conditional_internal_fn (code);
+    }
+  else if (tree callee = gimple_call_fndecl (stmt))
+    {
+      auto ifn = associated_internal_fn (callee);
+      cond_fn = get_conditional_internal_fn (ifn);
+    }
+  else
+    return NULL;
+
+  if (cond_fn == IFN_LAST)
+    {
+      gcc_assert (!gimple_could_trap_p (stmt));
+      return NULL;
+    }
+
+  tree lhs = gimple_get_lhs (stmt);
+  unsigned int nops = gimple_num_args (stmt) + 1;
 
   /* Construct the arguments to the conditional internal function.   */
   auto_vec<tree, 8> args;
   args.safe_grow (nops + 1, true);
   args[0] = mask;
-  for (unsigned int i = 1; i < nops; ++i)
-    args[i] = gimple_op (stmt, i);
+  for (unsigned int i = 0; i < nops - 1; ++i)
+    args[i+1] = gimple_arg (stmt, i);
   args[nops] = NULL_TREE;
 
   /* Look for uses of the result to see whether they are COND_EXPRs that can
@@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
 	{
-	  gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
-	  if (!stmt)
+	  gimple *stmt = gsi_stmt (gsi);
+	  if (!is_gimple_assign (stmt)
+	      && !gimple_call_builtin_p (stmt))
 	    ;
 	  else if (is_false_predicate (cond)
 		   && gimple_vdef (stmt))
@@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
 	      continue;
 	    }
 	  else if (gimple_plf (stmt, GF_PLF_2)
-		   && is_gimple_assign (stmt))
+		   && (is_gimple_assign (stmt)
+		       || gimple_call_builtin_p (stmt)))
 	    {
-	      tree lhs = gimple_assign_lhs (stmt);
+	      tree lhs = gimple_get_lhs (stmt);
+	      /* ?? Assume that calls without an LHS are not data processing
+		 and so no issues with traps.  */
+	      if (!lhs)
+		continue;
 	      tree mask;
 	      gimple *new_stmt;
 	      gimple_seq stmts = NULL;
@@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
 		  vect_masks.safe_push (mask);
 		}
 	      if (gimple_assign_single_p (stmt))
-		new_stmt = predicate_load_or_store (&gsi, stmt, mask);
+		new_stmt = predicate_load_or_store (&gsi,
+						    as_a <gassign *> (stmt),
+						    mask);
 	      else
 		new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
 
-	      gsi_replace (&gsi, new_stmt, true);
+	      if (new_stmt)
+		gsi_replace (&gsi, new_stmt, true);
 	    }
 	  else if (gimple_needing_rewrite_undefined (stmt))
 	    rewrite_to_defined_unconditional (&gsi);

Reply via email to