https://gcc.gnu.org/g:7fcd3ed36c68d39b1d51137d5bdf0bd91b99be60

commit r16-6510-g7fcd3ed36c68d39b1d51137d5bdf0bd91b99be60
Author: Tamar Christina <[email protected]>
Date:   Mon Jan 5 20:55:34 2026 +0000

    vect: teach if-convert to predicate __builtin calls [PR122103]
    
    The following testcase
    
    void f (float *__restrict c, int *__restrict d, int n)
    {
        for (int i = 0; i < n; i++)
        {
          if (d[i] > 1000)
            c[i] = __builtin_sqrtf (c[i]);
        }
    }
    
    compiled with -O3 -march=armv9-a -fno-math-errno -ftrapping-math needs to be
    predicated on the conditional.  It's invalid to execute the branch and use a
    select to extract it later unless using -fno-trapping-math.
    
    This change in if-conversion changes what we used to generate:
    
      _26 = _4 > 1000;
      _34 = _33 + _2;
      _5 = (float *) _34;
      _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
      _7 = __builtin_sqrtf (_6);
      .MASK_STORE (_5, 32B, _26, _7);
    
    into
    
      _26 = _4 > 1000;
      _34 = _33 + _2;
      _5 = (float *) _34;
      _6 = .MASK_LOAD (_5, 32B, _26, 0.0);
      _7 = .COND_SQRT (_26, _6, _6);
      .MASK_STORE (_5, 32B, _26, _7);
    
    which correctly results in
    
    .L3:
            ld1w    z0.s, p7/z, [x1, x3, lsl 2]
            cmpgt   p7.s, p7/z, z0.s, z31.s
            ld1w    z30.s, p7/z, [x0, x3, lsl 2]
            fsqrt   z30.s, p7/m, z30.s
            st1w    z30.s, p7, [x0, x3, lsl 2]
            incw    x3
            whilelo p7.s, w3, w2
            b.any   .L3
    
    instead of
    
    .L3:
            ld1w    z0.s, p7/z, [x1, x3, lsl 2]
            cmpgt   p7.s, p7/z, z0.s, z31.s
            ld1w    z30.s, p7/z, [x0, x3, lsl 2]
            fsqrt   z30.s, p6/m, z30.s
            st1w    z30.s, p7, [x0, x3, lsl 2]
            incw    x3
            whilelo p7.s, w3, w2
            b.any   .L3
    
    gcc/ChangeLog:
    
            PR tree-optimization/122103
            * tree-if-conv.cc (ifcvt_can_predicate): Support 
gimple_call_builtin_p.
            (if_convertible_stmt_p, predicate_rhs_code,
            predicate_statements): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/122103
            * gcc.target/aarch64/sve/pr122103_1.c: New test.
            * gcc.target/aarch64/sve/pr122103_2.c: New test.
            * gcc.target/aarch64/sve/pr122103_3.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/pr122103_1.c | 93 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/pr122103_2.c | 27 +++++++
 gcc/testsuite/gcc.target/aarch64/sve/pr122103_3.c | 27 +++++++
 gcc/tree-if-conv.cc                               | 83 ++++++++++++++++----
 4 files changed, 217 insertions(+), 13 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_1.c
new file mode 100644
index 000000000000..f06f8611393d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_1.c
@@ -0,0 +1,93 @@
+/* { dg-do run }  */
+/* { dg-require-effective-target glibc }  */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv8-a+sve" }  */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <math.h>
+#include <fenv.h>
+#include <signal.h>
+
+#pragma STDC FENV_ACCESS ON
+
+__attribute__((noinline))
+void f(float *__restrict c, int *__restrict d, int n)
+{
+    for (int i = 0; i < n; i++) {
+        if (d[i] > 1000)
+            c[i] = __builtin_sqrtf(c[i]);
+    }
+}
+
+static void on_fpe(int sig)
+{
+    (void)sig;
+    puts("SIGFPE: trapped FP exception (likely FE_INVALID from sqrt on a 
negative/sNaN lane)");
+    fflush(stdout);
+    __builtin_abort ();
+}
+
+int main(void)
+{
+    signal(SIGFPE, on_fpe);
+
+    // Clear flags and enable trap on invalid operations.
+    feclearexcept(FE_ALL_EXCEPT);
+    feenableexcept(FE_INVALID);
+
+    // Choose a length that is NOT a multiple of typical SVE VL (unknown at 
runtime),
+    // and includes plenty of inactive lanes.
+    const int n = 37;
+
+    float *c = aligned_alloc(64, (size_t)n * sizeof(float));
+    int   *d = aligned_alloc(64, (size_t)n * sizeof(int));
+    if (!c || !d) return 1;
+
+    // Construct data:
+    // - For lanes where d<=1000, put negative values in c (sqrt would be 
FE_INVALID if executed).
+    // - For lanes where d>1000, put positive values in c (legal sqrt).
+    for (int i = 0; i < n; i++) {
+        if ((i % 3) == 0) {
+            d[i] = 1001;      // active
+            c[i] = 4.0f;      // sqrt OK
+        } else {
+            d[i] = 0;         // inactive
+            c[i] = -1.0f;     // sqrt would be invalid if wrongly executed
+        }
+    }
+
+    // Call f. Correct behavior: no SIGFPE, and only positions with d>1000 are 
modified.
+    f(c, d, n);
+
+    // If traps are unavailable, at least report raised flags.
+    int raised = fetestexcept(FE_ALL_EXCEPT);
+    if (raised) {
+        printf("FP flags raised: 0x%x\n", raised);
+    } else {
+        puts("No FP flags raised.");
+    }
+
+    // Check results.
+    int ok = 1;
+    for (int i = 0; i < n; i++) {
+        if (d[i] > 1000) {
+            if (!(c[i] == 2.0f)) { // sqrt(4) = 2
+                printf("Mismatch at %d: expected 2.0, got %g\n", i, c[i]);
+                ok = 0;
+            }
+        } else {
+            if (!(c[i] == -1.0f)) { // must remain unchanged
+                printf("Clobber at %d: expected -1.0 unchanged, got %g\n", i, 
c[i]);
+                ok = 0;
+            }
+        }
+    }
+
+    puts(ok ? "OK" : "FAIL");
+    free(c);
+    free(d);
+    return ok ? 0 : 2;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_2.c
new file mode 100644
index 000000000000..db2c1e70079d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile }  */
+/* { dg-options "-O3 -fno-math-errno -ftrapping-math -march=armv9-a" }  */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+**     ...
+**     whilelo p([0-9]+).s, wzr, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p\1.s, p\1/z, z[0-9]+.s, z[0-9]+.s
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     fsqrt   z[0-9]+.s, p\1/m, z[0-9]+.s
+**     st1w    z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     incw    x[0-9]+
+**     whilelo p\1.s, w[0-9]+, w[0-9]+
+**     ...
+*/
+void f (float *__restrict c, int *__restrict d, int n)
+{
+    for (int i = 0; i < n; i++)
+    {
+      if (d[i] > 1000)
+        c[i] = __builtin_sqrtf (c[i]);
+    }
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr122103_3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_3.c
new file mode 100644
index 000000000000..7232101202b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr122103_3.c
@@ -0,0 +1,27 @@
+/* { dg-do compile }  */
+/* { dg-options "-O3 -fno-math-errno -fno-trapping-math -march=armv9-a" }  */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f:
+**     ...
+**     whilelo p([0-9]+).s, wzr, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p\1.s, p\1/z, z[0-9]+.s, z[0-9]+.s
+**     ld1w    z[0-9]+.s, p\1/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     fsqrt   z[0-9]+.s, p[0-9]+/m, z[0-9]+.s
+**     st1w    z[0-9]+.s, p\1, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     incw    x[0-9]+
+**     whilelo p\1.s, w[0-9]+, w[0-9]+
+**     ...
+*/
+void f (float *__restrict c, int *__restrict d, int n)
+{
+    for (int i = 0; i < n; i++)
+    {
+      if (d[i] > 1000)
+        c[i] = __builtin_sqrtf (c[i]);
+    }
+}
+
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 21ffea1544cb..53227bd64c16 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1006,6 +1006,19 @@ ifcvt_can_predicate (gimple *stmt)
   if (gimple_assign_single_p (stmt))
     return ifcvt_can_use_mask_load_store (stmt);
 
+  if (gimple_call_builtin_p (stmt))
+    if (tree callee = gimple_call_fndecl (stmt))
+      {
+       auto ifn = associated_internal_fn (callee);
+       auto cond_ifn = get_conditional_internal_fn (ifn);
+       tree type = TREE_TYPE (gimple_call_fntype (stmt));
+       return (cond_ifn != IFN_LAST
+               && vectorized_internal_fn_supported_p (cond_ifn, type));
+      }
+
+  if (!is_gimple_assign (stmt))
+    return false;
+
   tree_code code = gimple_assign_rhs_code (stmt);
   tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
   tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
@@ -1150,6 +1163,23 @@ if_convertible_stmt_p (gimple *stmt, 
vec<data_reference_p> refs)
                  }
          }
 
+       /* Check if the call can trap and if so require predication.  */
+       if (gimple_could_trap_p (stmt))
+         {
+           if (ifcvt_can_predicate (stmt))
+             {
+               gimple_set_plf (stmt, GF_PLF_2, true);
+               need_to_predicate = true;
+               return true;
+             }
+           else
+             {
+               if (dump_file && (dump_flags & TDF_DETAILS))
+                 fprintf (dump_file, "stmt could trap...\n");
+               return false;
+             }
+         }
+
        /* There are some IFN_s that are used to replace builtins but have the
           same semantics.  Even if MASK_CALL cannot handle them vectorable_call
           will insert the proper selection, so do not block conversion.  */
@@ -2840,20 +2870,38 @@ value_available_p (gimple *stmt, 
hash_set<tree_ssa_name_hash> *ssa_names,
    SSA names defined earlier in STMT's block.  */
 
 static gimple *
-predicate_rhs_code (gassign *stmt, tree mask, tree cond,
+predicate_rhs_code (gimple *stmt, tree mask, tree cond,
                    hash_set<tree_ssa_name_hash> *ssa_names)
 {
-  tree lhs = gimple_assign_lhs (stmt);
-  tree_code code = gimple_assign_rhs_code (stmt);
-  unsigned int nops = gimple_num_ops (stmt);
-  internal_fn cond_fn = get_conditional_internal_fn (code);
+  internal_fn cond_fn;
+  if (is_gimple_assign (stmt))
+    {
+      tree_code code = gimple_assign_rhs_code (stmt);
+      cond_fn = get_conditional_internal_fn (code);
+    }
+  else if (tree callee = gimple_call_fndecl (stmt))
+    {
+      auto ifn = associated_internal_fn (callee);
+      cond_fn = get_conditional_internal_fn (ifn);
+    }
+  else
+    return NULL;
+
+  if (cond_fn == IFN_LAST)
+    {
+      gcc_assert (!gimple_could_trap_p (stmt));
+      return NULL;
+    }
+
+  tree lhs = gimple_get_lhs (stmt);
+  unsigned int nops = gimple_num_args (stmt) + 1;
 
   /* Construct the arguments to the conditional internal function.   */
   auto_vec<tree, 8> args;
   args.safe_grow (nops + 1, true);
   args[0] = mask;
-  for (unsigned int i = 1; i < nops; ++i)
-    args[i] = gimple_op (stmt, i);
+  for (unsigned int i = 0; i < nops - 1; ++i)
+    args[i+1] = gimple_arg (stmt, i);
   args[nops] = NULL_TREE;
 
   /* Look for uses of the result to see whether they are COND_EXPRs that can
@@ -3030,8 +3078,9 @@ predicate_statements (loop_p loop)
 
       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
        {
-         gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
-         if (!stmt)
+         gimple *stmt = gsi_stmt (gsi);
+         if (!is_gimple_assign (stmt)
+             && !gimple_call_builtin_p (stmt))
            ;
          else if (is_false_predicate (cond)
                   && gimple_vdef (stmt))
@@ -3042,9 +3091,14 @@ predicate_statements (loop_p loop)
              continue;
            }
          else if (gimple_plf (stmt, GF_PLF_2)
-                  && is_gimple_assign (stmt))
+                  && (is_gimple_assign (stmt)
+                      || gimple_call_builtin_p (stmt)))
            {
-             tree lhs = gimple_assign_lhs (stmt);
+             tree lhs = gimple_get_lhs (stmt);
+             /* ?? Assume that calls without an LHS are not data processing
+                and so no issues with traps.  */
+             if (!lhs)
+               continue;
              tree mask;
              gimple *new_stmt;
              gimple_seq stmts = NULL;
@@ -3080,11 +3134,14 @@ predicate_statements (loop_p loop)
                  vect_masks.safe_push (mask);
                }
              if (gimple_assign_single_p (stmt))
-               new_stmt = predicate_load_or_store (&gsi, stmt, mask);
+               new_stmt = predicate_load_or_store (&gsi,
+                                                   as_a <gassign *> (stmt),
+                                                   mask);
              else
                new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
 
-             gsi_replace (&gsi, new_stmt, true);
+             if (new_stmt)
+               gsi_replace (&gsi, new_stmt, true);
            }
          else if (gimple_needing_rewrite_undefined (stmt))
            rewrite_to_defined_unconditional (&gsi);

Reply via email to