Follow Richard's suggestions, we should not model address cost in the loop
vectorizer for select_vl or decrement IV since other style vectorization doesn't
do that.
To make cost model comparison apple to apple.
This patch set COST from 2 to 1 which turns out have better codegen
in various codegen for RVV.
Ok for trunk ?
PR target/111153
gcc/ChangeLog:
* tree-vect-loop.cc (vect_estimate_min_profitable_iters): Remove
address cost for select_vl/decrement IV.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/costmodel/riscv/rvv/pr111153.c: Moved to...
* gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c: ...here.
* gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c: New test.
---
.../vect/costmodel/riscv/rvv/pr111153-1.c | 18 ++++++++++++++++++
.../riscv/rvv/{pr111153.c => pr11153-2.c} | 4 ++--
gcc/tree-vect-loop.cc | 10 ++++------
3 files changed, 24 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
rename gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/{pr111153.c =>
pr11153-2.c} (93%)
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
new file mode 100644
index 00000000000..51c91f7410c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize
-mtune=generic-ooo -ffast-math" } */
+
+#define DEF_REDUC_PLUS(TYPE)
\
+ TYPE __attribute__ ((noinline, noclone))
\
+ reduc_plus_##TYPE (TYPE *__restrict a, int n)
\
+ {
\
+ TYPE r = 0;
\
+ for (int i = 0; i < n; ++i)
\
+ r += a[i];
\
+ return r;
\
+ }
+
+#define TEST_PLUS(T) T (int) T (float)
+
+TEST_PLUS (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-not {vsetivli\s+zero,\s*4} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
similarity index 93%
rename from gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
rename to gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
index 06e08ec5f2e..d361f1fc7fa 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr111153.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr11153-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize
-mtune=generic-ooo" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -ffast-math"
} */
#define DEF_REDUC_PLUS(TYPE)
\
TYPE __attribute__ ((noinline, noclone))
\
@@ -11,7 +11,7 @@
return r;
\
}
-#define TEST_PLUS(T) T (int)
+#define TEST_PLUS(T) T (int) T (float)
TEST_PLUS (DEF_REDUC_PLUS)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 19e38b8637b..7a3db5f098b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -4872,12 +4872,10 @@ vect_estimate_min_profitable_iters (loop_vec_info
loop_vinfo,
unsigned int length_update_cost = 0;
if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
- /* For decrement IV style, we use a single SELECT_VL since
- beginning to calculate the number of elements need to be
- processed in current iteration, and a SHIFT operation to
- compute the next memory address instead of adding vectorization
- factor. */
- length_update_cost = 2;
+ /* For decrement IV style, Each only need a single SELECT_VL
+ or MIN since beginning to calculate the number of elements
+ need to be processed in current iteration. */
+ length_update_cost = 1;
else
/* For increment IV stype, Each may need two MINs and one MINUS to
update lengths in body for next iteration. */
--
2.36.1