From: Juzhe-Zhong <juzhe.zh...@rivai.ai> This patch is to optimize the codegen of RVV VLS auto-vectorizaiton codegen due to alignment.
void __attribute__((noinline, noclone)) f3 (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count) { for (int i = 0; i < count; ++i) dst[i] = op1[i] + op2[i]; } Before this patch: f3: ble a3,zero,.L1 srli a5,a1,2 negw a5,a5 andi a4,a5,3 sext.w a3,a3 beq a4,zero,.L3 lw a7,0(a1) lw a6,0(a2) andi a5,a5,2 addw a6,a6,a7 sw a6,0(a0) beq a5,zero,.L3 lw a7,4(a1) lw a5,4(a2) li a6,3 addw a5,a5,a7 sw a5,4(a0) bne a4,a6,.L3 lw a6,8(a2) lw a5,8(a1) addw a5,a5,a6 sw a5,8(a0) .L3: subw a3,a3,a4 slli a6,a4,2 slli a5,a3,32 srli a5,a5,32 add a1,a1,a6 add a2,a2,a6 add a0,a0,a6 li a3,4 .L6: mv a4,a5 bleu a5,a3,.L5 li a4,4 .L5: vsetvli zero,a4,e32,m1,ta,ma vle32.v v1,0(a1) vle32.v v2,0(a2) vsetivli zero,4,e32,m1,ta,ma sub a5,a5,a4 vadd.vv v1,v1,v2 vsetvli zero,a4,e32,m1,ta,ma vse32.v v1,0(a0) addi a1,a1,16 addi a2,a2,16 addi a0,a0,16 bne a5,zero,.L6 .L1: ret After this patch: f3: ble a3,zero,.L1 li a4,4 .L4: mv a5,a3 bleu a3,a4,.L3 li a5,4 .L3: vsetvli zero,a5,e32,m1,ta,ma vle32.v v2,0(a1) vle32.v v1,0(a2) vsetivli zero,4,e32,m1,ta,ma sub a3,a3,a5 vadd.vv v1,v1,v2 vsetvli zero,a5,e32,m1,ta,ma vse32.v v1,0(a0) addi a2,a2,16 addi a0,a0,16 addi a1,a1,16 bne a3,zero,.L4 .L1: ret The TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE is directly coming from ARM SVE. The TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST is same as GCN port that vectorize all cases by default. We will need to support accurate vector cost model in the future. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_simd_vector_alignment_reachable): New function. (riscv_vectorization_cost): New function. (TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE): New target hook. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New target hook. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/align-2.c: New test. --- gcc/config/riscv/riscv.cc | 39 +++++++++++++++++++ .../gcc.target/riscv/rvv/autovec/align-2.c | 12 ++++++ 2 files changed, 51 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index a5776a550b2..54306327cb3 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -7517,6 +7517,39 @@ riscv_vectorize_preferred_vector_alignment (const_tree type) return TYPE_ALIGN (type); } +/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */ + +static bool +riscv_simd_vector_alignment_reachable (const_tree type, bool is_packed) +{ + if (is_packed) + return false; + + /* For fixed-length vectors, check that the vectorizer will aim for + full-vector alignment. This isn't true for generic GCC vectors + that are wider than the ABI maximum of 128 bits. */ + poly_uint64 preferred_alignment + = riscv_vectorize_preferred_vector_alignment (type); + if (TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST + && maybe_ne (wi::to_widest (TYPE_SIZE (type)), preferred_alignment)) + return false; + + /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */ + return true; +} + +/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */ + +int +riscv_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost), + tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign)) +{ + /* TODO: Always vectorize. The vectorization COST model is not accurate, + we will need to support accurate vectorization COST model according + to '-mtune' in the future. */ + return 1; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -7792,6 +7825,12 @@ riscv_vectorize_preferred_vector_alignment (const_tree type) #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \ riscv_vectorize_preferred_vector_alignment +#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE +#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ + riscv_simd_vector_alignment_reachable +#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ + riscv_vectorization_cost struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c new file mode 100644 index 00000000000..812584e9d25 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/align-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 --param riscv-autovec-preference=fixed-vlmax" } */ + +void __attribute__((noinline, noclone)) +f (int * __restrict dst, int * __restrict op1, int * __restrict op2, int count) +{ + for (int i = 0; i < count; ++i) + dst[i] = op1[i] + op2[i]; +} + +/* { dg-final { scan-assembler-not "lw" } } */ +/* { dg-final { scan-assembler-not "sw" } } */ -- 2.36.1