https://gcc.gnu.org/g:7a46f679b3ad302431a0fdd1dc30cca9712c92ec
commit 7a46f679b3ad302431a0fdd1dc30cca9712c92ec Author: Robin Dapp <rd...@ventanamicro.com> Date: Mon Feb 26 13:09:15 2024 +0100 RISC-V: Add initial cost handling for segment loads/stores. This patch makes segment loads and stores more expensive. It adds segment_permute_2 as well as 3 to 8 cost fields to the common vector costs and adds handling to adjust_stmt_cost. gcc/ChangeLog: * config/riscv/riscv-protos.h (struct common_vector_cost): Add segment_permute cost. * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): Handle segment loads/stores. * config/riscv/riscv.cc: Initialize segment_permute_[2-8] to 1. gcc/testsuite/ChangeLog: * gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: Adjust test. (cherry picked from commit e0b9c8ad7098fb08a25a61fe17d4274dd73e5145) Diff: --- gcc/config/riscv/riscv-protos.h | 9 ++ gcc/config/riscv/riscv-vector-costs.cc | 163 +++++++++++++++------ gcc/config/riscv/riscv.cc | 14 ++ .../gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c | 4 +- 4 files changed, 146 insertions(+), 44 deletions(-) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 565ead1382a..004ceb1031b 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -222,6 +222,15 @@ struct common_vector_cost const int gather_load_cost; const int scatter_store_cost; + /* Segment load/store permute cost. */ + const int segment_permute_2; + const int segment_permute_3; + const int segment_permute_4; + const int segment_permute_5; + const int segment_permute_6; + const int segment_permute_7; + const int segment_permute_8; + /* Cost of a vector-to-scalar operation. */ const int vec_to_scalar_cost; diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 4582b0db425..0a88e142a93 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -1052,6 +1052,25 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const return vector_costs::better_main_loop_than_p (other); } +/* Returns the group size i.e. the number of vectors to be loaded by a + segmented load/store instruction. Return 0 if it is no segmented + load/store. */ +static int +segment_loadstore_group_size (enum vect_cost_for_stmt kind, + stmt_vec_info stmt_info) +{ + if (stmt_info + && (kind == vector_load || kind == vector_store) + && STMT_VINFO_DATA_REF (stmt_info)) + { + stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); + if (stmt_info + && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + return DR_GROUP_SIZE (stmt_info); + } + return 0; +} + /* Adjust vectorization cost after calling riscv_builtin_vectorization_cost. For some statement, we would like to further fine-grain tweak the cost on top of riscv_builtin_vectorization_cost handling which doesn't have any @@ -1076,55 +1095,115 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, case vector_load: case vector_store: { - /* Unit-stride vector loads and stores do not have offset addressing - as opposed to scalar loads and stores. - If the address depends on a variable we need an additional - add/sub for each load/store in the worst case. */ - if (stmt_info && stmt_info->stmt) + if (stmt_info && stmt_info->stmt && STMT_VINFO_DATA_REF (stmt_info)) { - data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - class loop *father = stmt_info->stmt->bb->loop_father; - if (!loop && father && !father->inner && father->superloops) + /* Segment loads and stores. When the group size is > 1 + the vectorizer will add a vector load/store statement for + each vector in the group. Here we additionally add permute + costs for each. */ + /* TODO: Indexed and ordered/unordered cost. */ + int group_size = segment_loadstore_group_size (kind, stmt_info); + if (group_size > 1) + { + switch (group_size) + { + case 2: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_2; + else + stmt_cost += costs->vls->segment_permute_2; + break; + case 3: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_3; + else + stmt_cost += costs->vls->segment_permute_3; + break; + case 4: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_4; + else + stmt_cost += costs->vls->segment_permute_4; + break; + case 5: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_5; + else + stmt_cost += costs->vls->segment_permute_5; + break; + case 6: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_6; + else + stmt_cost += costs->vls->segment_permute_6; + break; + case 7: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_7; + else + stmt_cost += costs->vls->segment_permute_7; + break; + case 8: + if (riscv_v_ext_vector_mode_p (loop->vector_mode)) + stmt_cost += costs->vla->segment_permute_8; + else + stmt_cost += costs->vls->segment_permute_8; + break; + default: + break; + } + } + else { - tree ref; - if (TREE_CODE (dr->ref) != MEM_REF - || !(ref = TREE_OPERAND (dr->ref, 0)) - || TREE_CODE (ref) != SSA_NAME) - break; + /* Unit-stride vector loads and stores do not have offset + addressing as opposed to scalar loads and stores. + If the address depends on a variable we need an additional + add/sub for each load/store in the worst case. */ + data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + class loop *father = stmt_info->stmt->bb->loop_father; + if (!loop && father && !father->inner && father->superloops) + { + tree ref; + if (TREE_CODE (dr->ref) != MEM_REF + || !(ref = TREE_OPERAND (dr->ref, 0)) + || TREE_CODE (ref) != SSA_NAME) + break; - if (SSA_NAME_IS_DEFAULT_DEF (ref)) - break; + if (SSA_NAME_IS_DEFAULT_DEF (ref)) + break; - if (memrefs.contains ({ref, cst0})) - break; + if (memrefs.contains ({ref, cst0})) + break; - memrefs.add ({ref, cst0}); + memrefs.add ({ref, cst0}); - /* In case we have not seen REF before and the base address - is a pointer operation try a bit harder. */ - tree base = DR_BASE_ADDRESS (dr); - if (TREE_CODE (base) == POINTER_PLUS_EXPR - || TREE_CODE (base) == POINTER_DIFF_EXPR) - { - /* Deconstruct BASE's first operand. If it is a binary - operation, i.e. a base and an "offset" store this - pair. Only increase the stmt_cost if we haven't seen - it before. */ - tree argp = TREE_OPERAND (base, 1); - typedef std::pair<tree, tree> addr_pair; - addr_pair pair; - if (TREE_CODE_CLASS (TREE_CODE (argp)) == tcc_binary) + /* In case we have not seen REF before and the base + address is a pointer operation try a bit harder. */ + tree base = DR_BASE_ADDRESS (dr); + if (TREE_CODE (base) == POINTER_PLUS_EXPR + || TREE_CODE (base) == POINTER_DIFF_EXPR) { - tree argp0 = tree_strip_nop_conversions - (TREE_OPERAND (argp, 0)); - tree argp1 = TREE_OPERAND (argp, 1); - pair = addr_pair (argp0, argp1); - if (memrefs.contains (pair)) - break; - - memrefs.add (pair); - stmt_cost += builtin_vectorization_cost (scalar_stmt, - NULL_TREE, 0); + /* Deconstruct BASE's first operand. If it is a + binary operation, i.e. a base and an "offset" + store this pair. Only increase the stmt_cost if + we haven't seen it before. */ + tree argp = TREE_OPERAND (base, 1); + typedef std::pair<tree, tree> addr_pair; + addr_pair pair; + if (TREE_CODE_CLASS (TREE_CODE (argp)) == tcc_binary) + { + tree argp0 = tree_strip_nop_conversions + (TREE_OPERAND (argp, 0)); + tree argp1 = TREE_OPERAND (argp, 1); + pair = addr_pair (argp0, argp1); + if (memrefs.contains (pair)) + break; + + memrefs.add (pair); + stmt_cost + += builtin_vectorization_cost (scalar_stmt, + NULL_TREE, 0); + } } } } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 4067505270e..e058095aeba 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -367,6 +367,13 @@ static const common_vector_cost rvv_vls_vector_cost = { 1, /* fp_stmt_cost */ 1, /* gather_load_cost */ 1, /* scatter_store_cost */ + 1, /* segment_permute (2) */ + 1, /* segment_permute (3) */ + 1, /* segment_permute (4) */ + 1, /* segment_permute (5) */ + 1, /* segment_permute (6) */ + 1, /* segment_permute (7) */ + 1, /* segment_permute (8) */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* permute_cost */ @@ -383,6 +390,13 @@ static const scalable_vector_cost rvv_vla_vector_cost = { 1, /* fp_stmt_cost */ 1, /* gather_load_cost */ 1, /* scatter_store_cost */ + 1, /* segment_permute (2) */ + 1, /* segment_permute (3) */ + 1, /* segment_permute (4) */ + 1, /* segment_permute (5) */ + 1, /* segment_permute (6) */ + 1, /* segment_permute (7) */ + 1, /* segment_permute (8) */ 1, /* vec_to_scalar_cost */ 1, /* scalar_to_vec_cost */ 1, /* permute_cost */ diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c index a7ee7b0b613..6fd47f07a99 100644 --- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c +++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c @@ -21,8 +21,8 @@ void move_replacements (rtx *x, rtx *y, int n_replacements) } } -/* { dg-final { scan-assembler {e64,m2} } } */ -/* { dg-final { scan-assembler-not {e64,m4} } } */ +/* { dg-final { scan-assembler-not {e64,m2} } } */ +/* { dg-final { scan-assembler {e64,m4} } } */ /* { dg-final { scan-assembler-not {jr} } } */ /* { dg-final { scan-assembler {ret} } } */ /* { dg-final { scan-assembler-not {sp} } } */