https://gcc.gnu.org/g:fef86de89576db4aee1ad248a7886545ef6967d7

commit r15-4116-gfef86de89576db4aee1ad248a7886545ef6967d7
Author: Victor Do Nascimento <victor.donascime...@arm.com>
Date:   Wed Aug 7 13:37:47 2024 +0100

    middle-end: reorder masking priority of math functions
    
    Given the categorization of math built-in functions as `ECF_CONST',
    when if-converting their uses, their calls are not masked and are thus
    called with an all-true predicate.
    
    This, however, is not appropriate where built-ins have library
    equivalents, wherein they may exhibit highly architecture-specific
    behaviors. For example, vectorized implementations may delegate the
    computation of values outside a certain acceptable numerical range to
    special (non-vectorized) routines which considerably slow down
    computation.
    
    As numerical simulation programs often do bounds check on input values
    prior to math calls, conditionally assigning default output values for
    out-of-bounds input and skipping the math call altogether, these
    fallback implementations should seldom be called in the execution of
    vectorized code.  If, however, we don't apply any masking to these
    math functions, we end up effectively executing both if and else
    branches for these values, leading to considerable performance
    degradation on scientific workloads.
    
    We therefore invert the order of handling of math function calls in
    `if_convertible_stmt_p' to prioritize the handling of their
    library-provided implementations over the equivalent internal function.
    
    gcc/ChangeLog:
    
            * tree-if-conv.cc (if_convertible_stmt_p): Check for explicit
            function declaration before IFN fallback.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/vect/vect-fncall-mask-math.c: New.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-fncall-mask-math.c | 33 +++++++++++++++++++++++
 gcc/tree-if-conv.cc                               | 18 ++++++-------
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask-math.c 
b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask-math.c
new file mode 100644
index 000000000000..15e22da28079
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask-math.c
@@ -0,0 +1,33 @@
+/* Test the correct application of masking to autovectorized math function 
calls.
+   Test is currently set to xfail pending the release of the relevant lmvec
+   support. */
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" 
{ target { aarch64*-*-* } } } */
+
+#include <math.h>
+
+const int N = 20;
+const float lim = 101.0;
+const float cst =  -1.0;
+float tot =   0.0;
+
+float b[20];
+float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */
+               [10 ... 19] = 100.0 };    /* Else branch.  */
+
+int main (void)
+{
+  #pragma omp simd
+  for (int i = 0; i < N; i += 1)
+    {
+      if (a[i] > lim)
+       b[i] = cst;
+      else
+       b[i] = expf (a[i]);
+      tot += b[i];
+    }
+  return (0);
+}
+
+/* { dg-final { scan-tree-dump-not { gimple_call <expf, _2, _1>} ifcvt { xfail 
{ aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump { gimple_call <.MASK_CALL, _2, expf, _1, _30>} 
ifcvt { xfail { aarch64*-*-* } } } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 3b04d1e8d34f..90c754a48147 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1133,15 +1133,6 @@ if_convertible_stmt_p (gimple *stmt, 
vec<data_reference_p> refs)
 
     case GIMPLE_CALL:
       {
-       /* There are some IFN_s that are used to replace builtins but have the
-          same semantics.  Even if MASK_CALL cannot handle them vectorable_call
-          will insert the proper selection, so do not block conversion.  */
-       int flags = gimple_call_flags (stmt);
-       if ((flags & ECF_CONST)
-           && !(flags & ECF_LOOPING_CONST_OR_PURE)
-           && gimple_call_combined_fn (stmt) != CFN_LAST)
-         return true;
-
        tree fndecl = gimple_call_fndecl (stmt);
        if (fndecl)
          {
@@ -1160,6 +1151,15 @@ if_convertible_stmt_p (gimple *stmt, 
vec<data_reference_p> refs)
                  }
          }
 
+       /* There are some IFN_s that are used to replace builtins but have the
+          same semantics.  Even if MASK_CALL cannot handle them vectorable_call
+          will insert the proper selection, so do not block conversion.  */
+       int flags = gimple_call_flags (stmt);
+       if ((flags & ECF_CONST)
+           && !(flags & ECF_LOOPING_CONST_OR_PURE)
+           && gimple_call_combined_fn (stmt) != CFN_LAST)
+         return true;
+
        return false;
       }

Reply via email to