LGTM, thanks for this!!!!
On Sat, Jun 10, 2023 at 8:42 AM <juzhe.zh...@rivai.ai> wrote: > > From: Juzhe-Zhong <juzhe.zh...@rivai.ai> > > Consider this following example: > void vec_add(int32_t *restrict c, int32_t *restrict a, int32_t *restrict b, > int N) { > for (long i = 0; i < N; i++) { > c[i] = a[i] + b[i]; > } > } > > After this patch: > vec_add: > ble a3,zero,.L5 > .L3: > vsetvli a5,a3,e32,m1,ta,ma > vle32.v v2,0(a1) > vle32.v v1,0(a2) > vsetvli a6,zero,e32,m1,ta,ma ===> redundant vsetvl. > slli a4,a5,2 > vadd.vv v1,v1,v2 > sub a3,a3,a5 > vsetvli zero,a5,e32,m1,ta,ma ===> redundant vsetvl. > vse32.v v1,0(a0) > add a1,a1,a4 > add a2,a2,a4 > add a0,a0,a4 > bne a3,zero,.L3 > .L5: > ret > > We can get close-to-optimal codegen but with some redundant vsetvls. > This is not the big issue which will be easily addressed in RISC-V backend. > > I am going to add a standalone PASS "AVL propagation" (avlprop) to addresse > such issue. > > gcc/ChangeLog: > > * config/riscv/autovec.md (select_vl<mode>): New pattern. > * config/riscv/riscv-protos.h (expand_select_vl): New function. > * config/riscv/riscv-v.cc (expand_select_vl): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/ternop/ternop-2.c: Adapt test. > * gcc.target/riscv/rvv/autovec/ternop/ternop-5.c: Ditto. > * gcc.target/riscv/rvv/autovec/partial/select_vl-1.c: New test. > > --- > gcc/config/riscv/autovec.md | 14 ++++++++++ > gcc/config/riscv/riscv-protos.h | 1 + > gcc/config/riscv/riscv-v.cc | 12 +++++++++ > .../riscv/rvv/autovec/partial/select_vl-1.c | 26 +++++++++++++++++++ > .../riscv/rvv/autovec/ternop/ternop-2.c | 2 +- > .../riscv/rvv/autovec/ternop/ternop-5.c | 2 +- > 6 files changed, 55 insertions(+), 2 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c > > diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md > index 9f4492db23c..b7070099f29 100644 > --- a/gcc/config/riscv/autovec.md > +++ b/gcc/config/riscv/autovec.md > @@ -626,3 +626,17 @@ > } > [(set_attr "type" "vimuladd") > (set_attr "mode" "<MODE>")]) > + > +;; ========================================================================= > +;; == SELECT_VL > +;; ========================================================================= > + > +(define_expand "select_vl<mode>" > + [(match_operand:P 0 "register_operand") > + (match_operand:P 1 "vector_length_operand") > + (match_operand:P 2 "")] > + "TARGET_VECTOR" > +{ > + riscv_vector::expand_select_vl (operands); > + DONE; > +}) > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h > index 66c1f535d60..6db3a46c682 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -246,6 +246,7 @@ void expand_vec_series (rtx, rtx, rtx); > void expand_vec_init (rtx, rtx); > void expand_vcond (rtx *); > void expand_vec_perm (rtx, rtx, rtx, rtx); > +void expand_select_vl (rtx *); > /* Rounding mode bitfield for fixed point VXRM. */ > enum vxrm_field_enum > { > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 477a22cd2b0..e1b85a5af91 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2447,4 +2447,16 @@ expand_vec_perm_const (machine_mode vmode, > machine_mode op_mode, rtx target, > return ret; > } > > +/* Generate no side effects vsetvl to get the vector length. */ > +void > +expand_select_vl (rtx *ops) > +{ > + poly_int64 nunits = rtx_to_poly_int64 (ops[2]); > + /* We arbitrary picked QImode as inner scalar mode to get vector mode. > + since vsetvl only demand ratio. We let VSETVL PASS to optimize it. */ > + scalar_int_mode mode = QImode; > + machine_mode rvv_mode = get_vector_mode (mode, nunits).require (); > + emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1])); > +} > + > } // namespace riscv_vector > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c > new file mode 100644 > index 00000000000..74bbf40ee9f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/select_vl-1.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param > riscv-autovec-preference=scalable -fno-vect-cost-model > -fno-tree-loop-distribute-patterns -fdump-tree-optimized-details" } */ > + > +#include <stdint-gcc.h> > + > +#define TEST_TYPE(TYPE) > \ > + __attribute__ ((noipa)) void select_vl_##TYPE (TYPE *__restrict dst, > \ > + TYPE *__restrict a, int n) > \ > + { > \ > + for (int i = 0; i < n; i++) > \ > + dst[i] = a[i]; > \ > + } > + > +#define TEST_ALL() > \ > + TEST_TYPE (int8_t) > \ > + TEST_TYPE (uint8_t) > \ > + TEST_TYPE (int16_t) > \ > + TEST_TYPE (uint16_t) > \ > + TEST_TYPE (int32_t) > \ > + TEST_TYPE (uint32_t) > \ > + TEST_TYPE (int64_t) > \ > + TEST_TYPE (uint64_t) > \ > + TEST_TYPE (float) > \ > + TEST_TYPE (double) > + > +TEST_ALL () > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c > index 89eeaf6315f..e52e07ddd09 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d > --param=riscv-autovec-preference=scalable" } */ > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d > --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */ > > #include <stdint-gcc.h> > > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c > index a9a7198feb4..49c85efbf3a 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/ternop/ternop-5.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d > --param=riscv-autovec-preference=scalable" } */ > +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d > --param=riscv-autovec-preference=scalable -fno-schedule-insns" } */ > > #include <stdint-gcc.h> > > -- > 2.36.3 >