From: Pan Li <[email protected]>
The test case of RISC-V vx-6-u8.c is failed for the vaaddu.vx asm check
when --param=gpr2vr-cost=2 recently. After some investigation, it is
failed to vectorize afte some middle-end changes. The depth_limit is 2
of the func vec_slp_has_scalar_use, and then return -1 by design. Then the
slp_insntance got 12 in size and we may see log similar as below:
*_2 1 times vec_to_scalar costs 3 in epilogue
*_2 1 times vec_to_scalar costs 3 in epilogue
*_2 1 times vec_to_scalar costs 3 in epilogue
*_2 1 times vec_to_scalar costs 3 in epilogue
Vector cost: 18
Scalar cost: 9
And then cannot vectorize due to cost consideration.
This PATCH would like to adjust the depth_limit to 3 suggested by
Richard.
gcc/ChangeLog:
* tree-vect-slp.cc (vec_slp_has_scalar_use): Adjust the
depth_limit from 2 to 3.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/sat_add-cost-1.c: New test.
Signed-off-by: Pan Li <[email protected]>
---
.../riscv/rvv/autovec/sat_add-cost-1.c | 59 +++++++++++++++++++
gcc/tree-vect-slp.cc | 2 +-
2 files changed, 60 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
new file mode 100644
index 00000000000..6f9ef08d3c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/sat_add-cost-1.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d --param=gpr2vr-cost=2
-fdump-tree-optimized" } */
+
+#include <stdint.h>
+
+#define T uint8_t
+
+T
+test_sat_add (T a, T b)
+{
+ return (a + b) | (-(T)((T)(a + b) < a));
+}
+
+void
+test_sat_add_cost_1 (T * restrict out, T * restrict in,
+ T x, unsigned n)
+{
+ unsigned k = 0;
+ T tmp = x + 3;
+
+ while (k < n)
+ {
+ tmp = tmp ^ 0x82;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+
+ out[k + 0] = test_sat_add (in[k + 0], tmp);
+ out[k + 1] = test_sat_add (in[k + 1], tmp);
+ k += 2;
+ }
+}
+
+/* { dg-final { scan-tree-dump ".SAT_ADD " "optimized" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 70470612411..2989f255523 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -8888,7 +8888,7 @@ vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
hash_map<tree, int> &scalar_use_map,
int depth = 0)
{
- const int depth_limit = 2;
+ const int depth_limit = 3;
imm_use_iterator use_iter;
gimple *use_stmt;
--
2.43.0