https://gcc.gnu.org/g:5c2fdfc24e343ce332fce2b6616386e2da8ba9a6

commit r16-5374-g5c2fdfc24e343ce332fce2b6616386e2da8ba9a6
Author: Richard Biener <[email protected]>
Date:   Tue Nov 18 11:30:26 2025 +0100

    tree-optimization/122736 - OMP SIMD call mask recording
    
    When recording the mask for loop masking of OMP SIMD calls we
    currently fail to provide the correct vector type and number of
    copies in all cases.  The following tries to correct this.
    
            PR tree-optimization/122736
            * tree-vect-stmts.cc (vectorizable_simd_clone_call): Compute
            num_mask_args for all mask modes.  Pass the mask vector
            type to vect_record_loop_mask and adjust ncopies according
            to the number of mask arguments.
    
            * gcc.target/i386/vect-pr122736.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.target/i386/vect-pr122736.c | 22 ++++++++++++++++++++++
 gcc/tree-vect-stmts.cc                        | 27 +++++++++++++++++++--------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/vect-pr122736.c 
b/gcc/testsuite/gcc.target/i386/vect-pr122736.c
new file mode 100644
index 000000000000..2719a52d1061
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pr122736.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
+
+#pragma omp declare simd
+double __attribute__((noinline))
+baz (double x)
+{
+  return x;
+}
+
+#pragma omp declare simd
+double
+foo (double d)
+{
+  return baz (d);
+}
+
+double __attribute__((noipa))
+fn (double x)
+{
+  return foo (x);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index bc15ac9d0852..0c23a9f23e21 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4333,10 +4333,11 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
     return false;
 
   unsigned int num_mask_args = 0;
-  if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
-    for (i = 0; i < nargs; i++)
-      if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
-       num_mask_args++;
+  for (i = 0; i < bestn->simdclone->nargs; i++)
+    if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
+      num_mask_args++;
+  if (!SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+    gcc_assert (num_mask_args <= 1);
 
   for (i = 0; i < nargs; i++)
     {
@@ -4483,10 +4484,20 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
            case SIMD_CLONE_ARG_TYPE_MASK:
              if (loop_vinfo
                  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-               vect_record_loop_mask (loop_vinfo,
-                                      &LOOP_VINFO_MASKS (loop_vinfo),
-                                      ncopies, vectype, op);
-
+               {
+                 tree arg_vectype;
+                 if (SCALAR_INT_MODE_P
+                       (TYPE_MODE (bestn->simdclone->args[i].vector_type)))
+                   arg_vectype = build_truth_vector_type_for_mode
+                       (exact_div (bestn->simdclone->simdlen, num_mask_args),
+                        TYPE_MODE (bestn->simdclone->args[i].vector_type));
+                 else
+                   arg_vectype = bestn->simdclone->args[i].vector_type;
+                 vect_record_loop_mask (loop_vinfo,
+                                        &LOOP_VINFO_MASKS (loop_vinfo),
+                                        ncopies * num_mask_args, arg_vectype,
+                                        op);
+               }
              break;
            }
        }

Reply via email to