Hi,
The attached patch tries to fix PR91272.
Does it look OK ?

With patch, I see following failures for aarch64-sve.exp:
FAIL: gcc.target/aarch64/sve/clastb_1.c -march=armv8.2-a+sve
scan-assembler \\tclastb\\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\\.s
FAIL: gcc.target/aarch64/sve/clastb_2.c -march=armv8.2-a+sve
scan-assembler \\tclastb\\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\\.s
FAIL: gcc.target/aarch64/sve/clastb_3.c -march=armv8.2-a+sve
scan-assembler \\tclastb\\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\\.b
FAIL: gcc.target/aarch64/sve/clastb_5.c -march=armv8.2-a+sve
scan-assembler \\tclastb\\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\\.d

For instance, in clastb_1.c, it now emits:
        clastb  s1, p1, s1, z0.s
while using a fully predicated loop.
Should I adjust the tests ?

Thanks,
Prathamesh
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
index d4f9b0b6a94..6e69b264e9b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_1.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -17,4 +17,5 @@ condition_reduction (int *a, int min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
index 2c49bd3b0f0..d1a743972a7 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_2.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #include <stdint.h>
 
@@ -23,4 +23,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
index 35344f446c6..71e85c03cc0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_3.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint8_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7]+, w[0-9]+, z[0-9]+\.b} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
index ce58abd6161..b4db170ea06 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_4.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE int16_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tw[0-9]+, p[0-7], w[0-9]+, z[0-9]+\.h} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
index 2b9783d6627..878d9f60913 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_5.c
@@ -1,8 +1,9 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE uint64_t
 
 #include "clastb_2.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\tx[0-9]+, p[0-7], x[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
index c47d303f730..38632a21be1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_6.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define N 32
 
@@ -21,4 +21,5 @@ condition_reduction (TYPE *a, TYPE min_v)
   return last;
 }
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
index 3345f874a39..e5307d2edc8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_7.c
@@ -1,7 +1,8 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
 
 #define TYPE double
 #include "clastb_6.c"
 
+/* { dg-final { scan-tree-dump "using a fully-masked loop." "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
index d86a428a7fa..583fc8d8d6d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
-/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -msve-vector-bits=256 --save-temps" } */
 
 #include <stdint.h>
 
@@ -19,6 +19,7 @@ TEST_TYPE (uint16_t);
 TEST_TYPE (uint32_t);
 TEST_TYPE (uint64_t);
 
+/* { dg-final { scan-tree-dump-times "using a fully-masked loop." 4 "vect" } } */
 /* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */
 /* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index a70d52eb2ca..82814e2c2af 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6428,6 +6428,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
   if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
     {
       if (reduction_type != FOLD_LEFT_REDUCTION
+	  && reduction_type != EXTRACT_LAST_REDUCTION
 	  && !mask_by_cond_expr
 	  && (cond_fn == IFN_LAST
 	      || !direct_internal_fn_supported_p (cond_fn, vectype_in,
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index acdd90784dc..2cad2cb94c8 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10016,7 +10016,8 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
       /* See whether another part of the vectorized code applies a loop
 	 mask to the condition, or to its inverse.  */
 
-      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+	  && reduction_type != EXTRACT_LAST_REDUCTION)
 	{
 	  scalar_cond_masked_key cond (cond_expr, ncopies);
 	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
@@ -10116,6 +10117,15 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
           vec_then_clause = vec_oprnds2[i];
           vec_else_clause = vec_oprnds3[i];
 
+          if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+	      && reduction_type == EXTRACT_LAST_REDUCTION)
+	    {
+	      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+	      unsigned vec_num = vec_oprnds0.length ();
+	      loop_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+					      vectype, vec_num * j + i);
+	    }
+
 	  if (swap_cond_operands)
 	    std::swap (vec_then_clause, vec_else_clause);
 
@@ -10180,7 +10190,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 	     vec != { 0, ... } (masked in the MASK_LOAD,
 	     unmasked in the VEC_COND_EXPR).  */
 
-	  if (loop_mask)
+	  if (loop_mask && reduction_type != EXTRACT_LAST_REDUCTION)
 	    {
 	      if (COMPARISON_CLASS_P (vec_compare))
 		{
@@ -10220,6 +10230,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 		  vec_compare = vec_compare_name;
 		}
+
+	      if (loop_mask)
+		{
+		  tree tmp = make_ssa_name (vec_cmp_type);
+		  gassign *g = gimple_build_assign (tmp, BIT_AND_EXPR,
+						    vec_compare, loop_mask);
+		  vect_finish_stmt_generation (stmt_info, g, gsi);
+		  vec_compare = tmp;
+		}
+
 	      gcall *new_stmt = gimple_build_call_internal
 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
 		 vec_then_clause);

Reply via email to