Applies after the recent 9 patch series:
"RISC-V: Improve const vector costing and expansion"
https://inbox.sourceware.org/gcc-patches/[email protected]/T/#t
This isn't functional due to RTX hash collisions. It was incredibly useful and
helped me catch a few tricky bugs like:
"RISC-V: Handle 0.0 floating point pattern costing"
Current flow is susceptible to hash collisions.
Ideal flow would be:
Costing: Insert into hashmap<rtx, vec<(const_rtx, enum)>>
Expand: Check for membership in hashmap
-> Not in hashmap: ignore, this wasn't costed
-> In hashmap: Iterate over vec
-> if RTX not in hashmap: Ignore, this wasn't costed (hash collision)
-> if RTX in hashmap: Assert enum is expected
Example of hash collision:
hash -663464470:
(const_vector:RVVM4DI repeat [
(const_int 8589934593 [0x200000001])
])
hash -663464470:
(const_vector:RVVM4SF repeat [
(const_double:SF 1.0e+0 [0x0.8p+1])
(const_double:SF 2.0e+0 [0x0.8p+2])
])
Segher pointed out that collisions are inevitible (~80k 32 bit hashes
have a >50% chance of containing a collision)
If this is worth adding then these are the next questions:
* How heavy-weight is it to store a copy of every costed
const RTX vector (and ideally other costed expressions later).
* Does this belong in release or gated behind rtx checking?
Signed-off-by: Patrick O'Neill <[email protected]>
---
gcc/config/riscv/riscv-v.cc | 29 +++++++++++++++++++++++++++++
gcc/config/riscv/riscv-v.h | 14 ++++++++++++++
gcc/config/riscv/riscv.cc | 27 ++++++++++++++++++++++++---
3 files changed, 67 insertions(+), 3 deletions(-)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 6ba4b6291bb..db43312f59f 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1164,6 +1164,10 @@ expand_vector_init_trailing_same_elem (rtx target,
static void
expand_const_vector (rtx target, rtx src)
{
+ riscv_const_expect_p* expected_pattern = NULL;
+ if (EXPECTED_CONST_PATTERN)
+ expected_pattern = EXPECTED_CONST_PATTERN->get (src);
+
machine_mode mode = GET_MODE (target);
rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
rtx elt;
@@ -1171,6 +1175,8 @@ expand_const_vector (rtx target, rtx src)
{
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_BOOL);
gcc_assert (rtx_equal_p (elt, const0_rtx)
|| rtx_equal_p (elt, const1_rtx));
rtx ops[] = {result, src};
@@ -1180,11 +1186,16 @@ expand_const_vector (rtx target, rtx src)
we use vmv.v.i instruction. */
else if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src))
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_VMV_VI);
rtx ops[] = {result, src};
emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
}
else
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_DUPLICATE_INT_FP);
+
/* Emit vec_duplicate<mode> split pattern before RA so that
we could have a better optimization opportunity in LICM
which will hoist vmv.v.x outside the loop and in fwprop && combine
@@ -1221,6 +1232,8 @@ expand_const_vector (rtx target, rtx src)
rtx base, step;
if (const_vec_series_p (src, &base, &step))
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_SERIES);
expand_vec_series (result, base, step);
if (result != target)
@@ -1314,6 +1327,8 @@ expand_const_vector (rtx target, rtx src)
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
if (builder.single_step_npatterns_p ())
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_PATTERN_SINGLE_STEP);
/* Describe the case by choosing NPATTERNS = 4 as an example. */
insn_code icode;
@@ -1453,6 +1468,8 @@ expand_const_vector (rtx target, rtx src)
}
else if (builder.interleaved_stepped_npatterns_p ())
{
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_PATTERN_INTERLEAVED);
rtx base1 = builder.elt (0);
rtx base2 = builder.elt (1);
poly_int64 step1
@@ -1555,6 +1572,11 @@ expand_const_vector (rtx target, rtx src)
if (emit_catch_all_pattern)
{
+ /* Ensure the vector cost emitted by riscv_const_insns expected this
+ pattern to be handled by the catch all pattern. */
+ if (expected_pattern)
+ gcc_assert (*expected_pattern == RVV_CATCH_ALL);
+
int nelts = XVECLEN (src, 0);
/* Optimize trailing same elements sequence:
@@ -1566,6 +1588,13 @@ expand_const_vector (rtx target, rtx src)
to/reading from the stack to initialize vectors. */
expand_vector_init_insert_elems (result, builder, nelts);
}
+ else
+ {
+ /* Ensure the vector cost emitted by riscv_const_insns expected this
+ pattern to be handled by an optimized pattern. */
+ if (expected_pattern)
+ gcc_assert (*expected_pattern != RVV_CATCH_ALL);
+ }
if (result != target)
emit_move_insn (target, result);
diff --git a/gcc/config/riscv/riscv-v.h b/gcc/config/riscv/riscv-v.h
index 4635b5415c7..3e62334b890 100644
--- a/gcc/config/riscv/riscv-v.h
+++ b/gcc/config/riscv/riscv-v.h
@@ -22,8 +22,22 @@
#ifndef GCC_RISCV_V_H
#define GCC_RISCV_V_H
+#include "hash-map.h"
#include "rtx-vector-builder.h"
+enum riscv_const_expect_p
+{
+ RVV_DUPLICATE_BOOL,
+ RVV_DUPLICATE_VMV_VI,
+ RVV_DUPLICATE_INT_FP,
+ RVV_SERIES,
+ RVV_PATTERN_SINGLE_STEP,
+ RVV_PATTERN_INTERLEAVED,
+ RVV_CATCH_ALL,
+};
+
+extern hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN;
+
using namespace riscv_vector;
namespace riscv_vector {
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d1b8c27f4ac..8b752afeae7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -681,6 +681,10 @@ static const struct riscv_tune_info
riscv_tune_info_table[] = {
function. */
static bool riscv_save_frame_pointer;
+/* Global variable used in riscv-v.cc to ensure accurate costs are emitted
+ for constant vectors. */
+hash_map<const_rtx, enum riscv_const_expect_p> *EXPECTED_CONST_PATTERN = NULL;
+
typedef enum
{
PUSH_IDX = 0,
@@ -2131,6 +2135,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
case CONST_VECTOR:
{
+ /* Used to assert we aren't mislabeling optimized/fallthrough
+ patterns and are emitting accurate costs. */
+ if (!EXPECTED_CONST_PATTERN)
+ EXPECTED_CONST_PATTERN = new hash_map<const_rtx,
riscv_const_expect_p>;
+
/* TODO: This is not accurate, we will need to
adapt the COST of CONST_VECTOR in the future
for the following cases:
@@ -2148,8 +2157,11 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
if (const_vec_duplicate_p (x, &elt))
{
if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
- /* Duplicate values of 0/1 can be emitted using vmv.v.i. */
- return 1;
+ {
+ EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_BOOL);
+ /* Duplicate values of 0/1 can be emitted using vmv.v.i. */
+ return 1;
+ }
/* We don't allow CONST_VECTOR for DI vector on RV32
system since the ELT constant value can not held
@@ -2162,13 +2174,17 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
/* Constants in range -16 ~ 15 integer or 0.0 floating-point
can be emitted using vmv.v.i. */
if (satisfies_constraint_vi (x) || satisfies_constraint_Wc0 (x))
- return 1;
+ {
+ EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_VMV_VI);
+ return 1;
+ }
/* Any int/FP constants can always be broadcast from a
scalar register. Loading of a floating-point
constant incurs a literal-pool access. Allow this in
order to increase vectorization possibilities. */
int n = riscv_const_insns (elt, allow_new_pseudos);
+ EXPECTED_CONST_PATTERN->put (x, RVV_DUPLICATE_INT_FP);
if (CONST_DOUBLE_P (elt))
return 1 + 4; /* vfmv.v.f + memory access. */
else
@@ -2186,6 +2202,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
rtx base, step;
if (const_vec_series_p (x, &base, &step))
{
+ EXPECTED_CONST_PATTERN->put (x, RVV_SERIES);
/* This cost is not accurate, we will need to adapt the COST
accurately according to BASE && STEP. */
return 1;
@@ -2216,6 +2233,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
if (builder.single_step_npatterns_p ())
{
+ EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_SINGLE_STEP);
if (builder.npatterns_all_equal_p ())
{
/* TODO: This cost is not accurate. */
@@ -2229,6 +2247,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
}
else if (builder.interleaved_stepped_npatterns_p ())
{
+ EXPECTED_CONST_PATTERN->put (x, RVV_PATTERN_INTERLEAVED);
/* TODO: This cost is not accurate. */
return 1;
}
@@ -2256,6 +2275,8 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
/* Our vslide1up/down insn def does not handle HF. */
return 0;
+ EXPECTED_CONST_PATTERN->put (x, RVV_CATCH_ALL);
+
/* We already checked for a fully const vector above. Calculate
the number of leading/trailing elements covered by the splat. */
int leading_ndups = 1;
--
2.34.1