https://gcc.gnu.org/g:abbfe1e2616d8f18ccd7f9ea65d92aecec7258ea

commit r14-10788-gabbfe1e2616d8f18ccd7f9ea65d92aecec7258ea
Author: Victor Do Nascimento <victor.donascime...@arm.com>
Date:   Mon Sep 23 17:10:18 2024 +0100

    middle-end: Fix ifcvt predicate generation for masked function calls
    
    Up until now, due to a latent bug in the code for the ifcvt pass,
    irrespective of the branch taken in a conditional statement, the
    original condition for the if statement was used in masking the
    function call.
    
    Thus, for code such as:
    
      if (a[i] > limit)
        b[i] = fixed_const;
      else
        b[i] = fn (a[i]);
    
    we would generate the following (wrong) if-converted tree code:
    
      _1 = a[i_1];
      _2 = _1 > limit;
      _3 = .MASK_CALL (fn, _1, _2);
      cstore_4 = _2 ? fixed_const : _3;
    
    as opposed to the correct expected sequence:
    
      _1 = a[i_1];
      _2 = _1 > limit;
      _3 = ~_2;
      _4 = .MASK_CALL (fn, _1, _3);
      cstore_5 = _2 ? fixed_const : _4;
    
    This patch ensures that the correct predicate mask generation is
    carried out such that, upon autovectorization, the correct vector
    lanes are selected in the vectorized function call.
    
    gcc/ChangeLog:
    
            * tree-if-conv.cc (predicate_statements): Fix handling of
            predicated function calls.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.dg/vect/vect-fncall-mask.c: New.

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c | 31 ++++++++++++++++++++++++++++
 gcc/tree-if-conv.cc                          | 14 ++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c 
b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
new file mode 100644
index 000000000000..554488e06308
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c
@@ -0,0 +1,31 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" 
{ target { aarch64*-*-* } } } */
+
+extern int __attribute__ ((simd, const)) fn (int);
+
+const int N = 20;
+const float lim = 101.0;
+const float cst =  -1.0;
+float tot =   0.0;
+
+float b[20];
+float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */
+               [10 ... 19] = 100.0 };    /* Else branch.  */
+
+int main (void)
+{
+  #pragma omp simd
+  for (int i = 0; i < N; i += 1)
+    {
+      if (a[i] > lim)
+       b[i] = cst;
+      else
+       b[i] = fn (a[i]);
+      tot += b[i];
+    }
+  return (0);
+}
+
+/* { dg-final { scan-tree-dump {gimple_assign <gt_expr, _12, _1, 1.01e\+2, 
NULL>} ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_assign <bit_not_expr, _34, _12, NULL, 
NULL>} ifcvt } } */
+/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} 
ifcvt } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index c4c3ed41a447..873a5c66629f 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -2827,6 +2827,7 @@ predicate_statements (loop_p loop)
                 This will cause the vectorizer to match the "in branch"
                 clone variants, and serves to build the mask vector
                 in a natural way.  */
+             tree mask = cond;
              gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
              tree orig_fn = gimple_call_fn (call);
              int orig_nargs = gimple_call_num_args (call);
@@ -2834,7 +2835,18 @@ predicate_statements (loop_p loop)
              args.safe_push (orig_fn);
              for (int i = 0; i < orig_nargs; i++)
                args.safe_push (gimple_call_arg (call, i));
-             args.safe_push (cond);
+             /* If `swap', we invert the mask used for the if branch for use
+                when masking the function call.  */
+             if (swap)
+               {
+                 gimple_seq stmts = NULL;
+                 tree true_val
+                   = constant_boolean_node (true, TREE_TYPE (mask));
+                 mask = gimple_build (&stmts, BIT_XOR_EXPR,
+                                      TREE_TYPE (mask), mask, true_val);
+                 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
+               }
+             args.safe_push (mask);
 
              /* Replace the call with a IFN_MASK_CALL that has the extra
                 condition parameter. */

Reply via email to