From: Ju-Zhe Zhong <[email protected]>
Hi, Richi.
I tried to understand your last email and to refactor the do-while loop using
VECTOR_CST_NELTS.
This patch works fine for LEN_MASK_STORE and compiler can CSE redundant store.
I have appended testcase in this patch to test VN for LEN_MASK_STORE.
I am not sure whether I am on the same page with you.
Feel free to correct me, Thanks.
gcc/ChangeLog:
* tree-ssa-sccvn.cc (vn_reference_lookup_3): Add LEN_MASK_STORE and fix
LEN_STORE
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c: New test.
---
.../rvv/autovec/partial/len_maskstore_vn-1.c | 30 +++++++++++++++++++
gcc/tree-ssa-sccvn.cc | 24 +++++++++++----
2 files changed, 49 insertions(+), 5 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
diff --git
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
new file mode 100644
index 00000000000..0b2d03693dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/len_maskstore_vn-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv_zvl256b -mabi=ilp32d --param
riscv-autovec-preference=fixed-vlmax -O3 -fdump-tree-fre5" } */
+
+void __attribute__((noinline,noclone))
+foo (int *out, int *res)
+{
+ int mask[] = { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
+ int i;
+ for (i = 0; i < 16; ++i)
+ {
+ if (mask[i])
+ out[i] = i;
+ }
+ int o0 = out[0];
+ int o7 = out[7];
+ int o14 = out[14];
+ int o15 = out[15];
+ res[0] = o0;
+ res[2] = o7;
+ res[4] = o14;
+ res[6] = o15;
+}
+
+/* Vectorization produces .LEN_MASK_STORE, unrolling will unroll the two
+ vector iterations. FRE5 after that should be able to CSE
+ out[7] and out[15], but leave out[0] and out[14] alone. */
+/* { dg-final { scan-tree-dump " = o0_\[0-9\]+;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = 7;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = o14_\[0-9\]+;" "fre5" } } */
+/* { dg-final { scan-tree-dump " = 15;" "fre5" } } */
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 11061a374a2..242d82d6274 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -3304,6 +3304,16 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void
*data_,
if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
return (void *)-1;
break;
+ case IFN_LEN_MASK_STORE:
+ len = gimple_call_arg (call, 2);
+ bias = gimple_call_arg (call, 5);
+ if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
+ return (void *)-1;
+ mask = gimple_call_arg (call, internal_fn_mask_index (fn));
+ mask = vn_valueize (mask);
+ if (TREE_CODE (mask) != VECTOR_CST)
+ return (void *)-1;
+ break;
default:
return (void *)-1;
}
@@ -3344,11 +3354,17 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void
*data_,
tree vectype = TREE_TYPE (def_rhs);
unsigned HOST_WIDE_INT elsz
= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+ /* Set initial len value is the UINT_MAX, so mask_idx < actual_len
+ is always true for MASK_STORE. */
+ unsigned actual_len = UINT_MAX;
+ if (len)
+ actual_len = tree_to_uhwi (len) + tree_to_shwi (bias);
+ unsigned nunits
+ = MIN (actual_len, VECTOR_CST_NELTS (mask).coeffs[0]);
if (mask)
{
HOST_WIDE_INT start = 0, length = 0;
- unsigned mask_idx = 0;
- do
+ for (unsigned mask_idx = 0; mask_idx < nunits; mask_idx++)
{
if (integer_zerop (VECTOR_CST_ELT (mask, mask_idx)))
{
@@ -3371,9 +3387,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void
*data_,
}
else
length += elsz;
- mask_idx++;
}
- while (known_lt (mask_idx, TYPE_VECTOR_SUBPARTS (vectype)));
if (length != 0)
{
pd.rhs_off = start;
@@ -3389,7 +3403,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void
*data_,
{
pd.offset = offset2i;
pd.size = (tree_to_uhwi (len)
- + -tree_to_shwi (bias)) * BITS_PER_UNIT;
+ + tree_to_shwi (bias)) * BITS_PER_UNIT;
if (BYTES_BIG_ENDIAN)
pd.rhs_off = pd.size - tree_to_uhwi (TYPE_SIZE (vectype));
else
--
2.36.1