[gcc r14-9349] RISC-V: Refactor expand_vec_cmp [NFC]
https://gcc.gnu.org/g:1cd8254ebad7b73993d2acee80a7caf37c21878a commit r14-9349-g1cd8254ebad7b73993d2acee80a7caf37c21878a Author: demin.han Date: Mon Feb 26 14:50:15 2024 +0800 RISC-V: Refactor expand_vec_cmp [NFC] There are two expand_vec_cmp functions. They have same structure and similar code. We can use default arguments instead of overloading. Tested on RV32 and RV64. gcc/ChangeLog: * config/riscv/riscv-protos.h (expand_vec_cmp): Change proto * config/riscv/riscv-v.cc (expand_vec_cmp): Use default arguments (expand_vec_cmp_float): Adapt arguments Signed-off-by: demin.han Diff: --- gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-v.cc | 44 + 2 files changed, 15 insertions(+), 31 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 80efdf2b7e5..b8735593805 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -603,7 +603,7 @@ bool simm5_p (rtx); bool neg_simm5_p (rtx); #ifdef RTX_CODE bool has_vi_variant_p (rtx_code, rtx); -void expand_vec_cmp (rtx, rtx_code, rtx, rtx); +void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr); bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); void expand_cond_len_unop (unsigned, rtx *); void expand_cond_len_binop (unsigned, rtx *); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 2d32db06dd1..967f4e38287 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2775,7 +2775,8 @@ vectorize_related_mode (machine_mode vector_mode, scalar_mode element_mode, /* Expand an RVV comparison. */ void -expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1) +expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1, rtx mask, + rtx maskoff) { machine_mode mask_mode = GET_MODE (target); machine_mode data_mode = GET_MODE (op0); @@ -2785,8 +2786,8 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1) { rtx lt = gen_reg_rtx (mask_mode); rtx gt = gen_reg_rtx (mask_mode); - expand_vec_cmp (lt, LT, op0, op1); - expand_vec_cmp (gt, GT, op0, op1); + expand_vec_cmp (lt, LT, op0, op1, mask, maskoff); + expand_vec_cmp (gt, GT, op0, op1, mask, maskoff); icode = code_for_pred (IOR, mask_mode); rtx ops[] = {target, lt, gt}; emit_vlmax_insn (icode, BINARY_MASK_OP, ops); @@ -2794,33 +2795,16 @@ expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1) } rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1); - rtx ops[] = {target, cmp, op0, op1}; - emit_vlmax_insn (icode, COMPARE_OP, ops); -} - -void -expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0, - rtx op1) -{ - machine_mode mask_mode = GET_MODE (target); - machine_mode data_mode = GET_MODE (op0); - insn_code icode = get_cmp_insn_code (code, data_mode); - - if (code == LTGT) + if (!mask && !maskoff) { - rtx lt = gen_reg_rtx (mask_mode); - rtx gt = gen_reg_rtx (mask_mode); - expand_vec_cmp (lt, LT, mask, maskoff, op0, op1); - expand_vec_cmp (gt, GT, mask, maskoff, op0, op1); - icode = code_for_pred (IOR, mask_mode); - rtx ops[] = {target, lt, gt}; - emit_vlmax_insn (icode, BINARY_MASK_OP, ops); - return; + rtx ops[] = {target, cmp, op0, op1}; + emit_vlmax_insn (icode, COMPARE_OP, ops); +} + else +{ + rtx ops[] = {target, mask, maskoff, cmp, op0, op1}; + emit_vlmax_insn (icode, COMPARE_OP_MU, ops); } - - rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1); - rtx ops[] = {target, mask, maskoff, cmp, op0, op1}; - emit_vlmax_insn (icode, COMPARE_OP_MU, ops); } /* Expand an RVV floating-point comparison: @@ -2898,7 +2882,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1, else { /* vmfeq.vvv0, vb, vb, v0.t */ - expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1); + expand_vec_cmp (eq0, EQ, op1, op1, eq0, eq0); } break; default: @@ -2916,7 +2900,7 @@ expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1, if (code == ORDERED) emit_move_insn (target, eq0); else -expand_vec_cmp (eq0, code, eq0, eq0, op0, op1); +expand_vec_cmp (eq0, code, op0, op1, eq0, eq0); if (can_invert_p) {
[gcc r14-9380] RISC-V: Fix ICE in riscv vector costs
https://gcc.gnu.org/g:42187c6968af9907de1f9b7563d87de739857684 commit r14-9380-g42187c6968af9907de1f9b7563d87de739857684 Author: demin.han Date: Wed Mar 6 17:34:34 2024 +0800 RISC-V: Fix ICE in riscv vector costs The following code can result in ICE: -march=rv64gcv --param riscv-autovec-lmul=dynamic -O3 char *jpeg_difference7_input_buf; void jpeg_difference7(int *diff_buf) { unsigned width; int samp, Rb; while (--width) { Rb = samp = *jpeg_difference7_input_buf; *diff_buf++ = -(int)(samp + (long)Rb >> 1); } } One biggest_mode update missed in one branch and trigger assertion fail. gcc_assert (biggest_size >= mode_size); Tested On RV64 and no regression. PR target/114264 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc: Fix ICE gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr114264.c: New test. Signed-off-by: demin.han Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 2 ++ gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c | 15 +++ 2 files changed, 17 insertions(+) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index adf9c197df5..5ac8655b4d8 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -414,6 +414,8 @@ compute_local_live_ranges ( auto *r = get_live_range (live_ranges, arg); gcc_assert (r); (*r).second = MAX (point, (*r).second); + biggest_mode = get_biggest_mode ( + biggest_mode, TYPE_MODE (TREE_TYPE (arg))); } } else diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c new file mode 100644 index 000..7853f292af7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114264.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=dynamic" } */ + +char *jpeg_difference7_input_buf; +void +jpeg_difference7 (int *diff_buf) +{ + unsigned width; + int samp, Rb; + while (--width) +{ + Rb = samp = *jpeg_difference7_input_buf; + *diff_buf++ = -(int) (samp + (long) Rb >> 1); +} +}
[gcc r14-9826] RISC-V: Minor fix for max_point
https://gcc.gnu.org/g:aa2ab7b79a87c25d113752401a6026c6823dfe57 commit r14-9826-gaa2ab7b79a87c25d113752401a6026c6823dfe57 Author: demin.han Date: Mon Apr 1 16:20:46 2024 +0800 RISC-V: Minor fix for max_point The program points start from 1, so max_point should be equal to length(). Tested on RV64 and no regression. gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc: Use length() Signed-off-by: demin.han Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index f462c272a6e..5ceb313c118 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -752,7 +752,7 @@ update_local_live_ranges ( We will be likely using one more vector variable. */ unsigned int max_point - = (*program_points_per_bb.get (bb)).length () - 1; + = (*program_points_per_bb.get (bb)).length (); auto *live_ranges = live_ranges_per_bb.get (bb); bool existed_p = false; tree var = type == load_vec_info_type
[gcc r15-47] RISC-V: Refine the condition for add additional vars in RVV cost model
https://gcc.gnu.org/g:ca2f531cc5db4f1020d4329976610356033e0246 commit r15-47-gca2f531cc5db4f1020d4329976610356033e0246 Author: demin.han Date: Tue Mar 26 16:52:12 2024 +0800 RISC-V: Refine the condition for add additional vars in RVV cost model The adjacent_dr_p is sufficient and unnecessary condition for contiguous access. So unnecessary live-ranges are added and result in smaller LMUL. This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment load/store. Tested on RV64 and no regression. PR target/114506 gcc/ChangeLog: * config/riscv/riscv-vector-costs.cc (non_contiguous_memory_access_p): Rename (need_additional_vector_vars_p): Rename and refine condition gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test. Signed-off-by: demin.han Diff: --- gcc/config/riscv/riscv-vector-costs.cc | 23 ++ .../gcc.dg/vect/costmodel/riscv/rvv/pr114506.c | 23 ++ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index d27bb68a7b9..4582b0db425 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -563,14 +563,24 @@ get_store_value (gimple *stmt) return gimple_assign_rhs1 (stmt); } -/* Return true if it is non-contiguous load/store. */ +/* Return true if addtional vector vars needed. */ static bool -non_contiguous_memory_access_p (stmt_vec_info stmt_info) +need_additional_vector_vars_p (stmt_vec_info stmt_info) { enum stmt_vec_info_type type = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); - return ((type == load_vec_info_type || type == store_vec_info_type) - && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info))); + if (type == load_vec_info_type || type == store_vec_info_type) +{ + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER) + return true; + + machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); + int lmul = riscv_get_v_regno_alignment (mode); + if (DR_GROUP_SIZE (stmt_info) * lmul > RVV_M8) + return true; +} + return false; } /* Return the LMUL of the current analysis. */ @@ -739,10 +749,7 @@ update_local_live_ranges ( stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); enum stmt_vec_info_type type = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info)); - if (non_contiguous_memory_access_p (stmt_info) - /* LOAD_LANES/STORE_LANES doesn't need a perm indice. */ - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) - != VMAT_LOAD_STORE_LANES) + if (need_additional_vector_vars_p (stmt_info)) { /* For non-adjacent load/store STMT, we will potentially convert it into: diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c new file mode 100644 index 000..a88d24b2d2d --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */ + +float a[32000], b[32000], c[32000], d[32000]; +float aa[256][256], bb[256][256], cc[256][256]; + +void +s2275 () +{ + for (int i = 0; i < 256; i++) +{ + for (int j = 0; j < 256; j++) + { + aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i]; + } + a[i] = b[i] + c[i] * d[i]; +} +} + +/* { dg-final { scan-assembler-times {e32,m8} 1 } } */ +/* { dg-final { scan-assembler-not {e32,m4} } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ +/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it has unexpected spills" "vect" } } */