Hi,
This patch adds zero-length handling to gimple_fold_partial_store and
gimple_fold_partial_load. If length + bias is zero, we replace the
load/store with zero. For riscv's vector loads this is OK because
zero-length loads are defined as real no-ops rather than "else-value
loading" loads. All other architectures zero anyway.
At the same time the patch removes the mask_p argument of
gimple_fold_partial_load and _store. We can easily get the mask index
from the IFN.
Bootstrapped and regtested on x86, power10. aarch64 still running.
Regtested on riscv64.
Regards
Robin
PR tree-optimization/122635
gcc/ChangeLog:
* gimple-fold.cc (gimple_fold_partial_load_store_mem_ref):
Replace zero-length load with zero constant.
(gimple_fold_partial_load): Remove mask_p argument.
(gimple_fold_partial_store): Remove store if we optimized to
zero constant.
(gimple_fold_call): Remove mask_p argument.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/pr122635-1.c: New test.
* gcc.target/riscv/rvv/autovec/pr122635-2.c: New test.
* gcc.target/powerpc/p9-vec-length-epil-8.c: Expect two lxvl
less.
---
gcc/gimple-fold.cc | 96 ++++++++++---------
.../gcc.target/powerpc/p9-vec-length-epil-8.c | 2 +-
.../gcc.target/riscv/rvv/autovec/pr122635-1.c | 20 ++++
.../gcc.target/riscv/rvv/autovec/pr122635-2.c | 18 ++++
4 files changed, 91 insertions(+), 45 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 3fc76313622..159e37460a9 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5759,46 +5759,48 @@ arith_overflowed_p (enum tree_code code, const_tree
type,
/* If IFN_{MASK,LEN,MASK_LEN}_LOAD/STORE call CALL is unconditional,
return a MEM_REF for the memory it references, otherwise return null.
- VECTYPE is the type of the memory vector. MASK_P indicates it's for
- MASK if true, otherwise it's for LEN. */
+ VECTYPE is the type of the memory vector. */
static tree
-gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype, bool mask_p)
+gimple_fold_partial_load_store_mem_ref (gcall *call, tree vectype)
{
tree ptr = gimple_call_arg (call, 0);
tree alias_align = gimple_call_arg (call, 1);
if (!tree_fits_uhwi_p (alias_align))
return NULL_TREE;
- if (mask_p)
+ internal_fn ifn = gimple_call_internal_fn (call);
+ int mask_index = internal_fn_mask_index (ifn);
+ int len_index = internal_fn_len_index (ifn);
+
+ if (mask_index != -1)
{
- tree mask = gimple_call_arg (call, 2);
+ tree mask = gimple_call_arg (call, mask_index);
if (!integer_all_onesp (mask))
return NULL_TREE;
}
- else
+
+ if (len_index != -1)
{
- internal_fn ifn = gimple_call_internal_fn (call);
- int len_index = internal_fn_len_index (ifn);
- tree basic_len = gimple_call_arg (call, len_index);
- if (!poly_int_tree_p (basic_len))
+ tree len = gimple_call_arg (call, len_index);
+ if (!poly_int_tree_p (len))
return NULL_TREE;
tree bias = gimple_call_arg (call, len_index + 1);
gcc_assert (TREE_CODE (bias) == INTEGER_CST);
- /* For LEN_LOAD/LEN_STORE/MASK_LEN_LOAD/MASK_LEN_STORE,
- we don't fold when (bias + len) != VF. */
- if (maybe_ne (wi::to_poly_widest (basic_len) + wi::to_widest (bias),
- GET_MODE_NUNITS (TYPE_MODE (vectype))))
- return NULL_TREE;
-
- /* For MASK_LEN_{LOAD,STORE}, we should also check whether
- the mask is all ones mask. */
- if (ifn == IFN_MASK_LEN_LOAD || ifn == IFN_MASK_LEN_STORE)
+ poly_widest_int wlen = wi::to_poly_widest (len)
+ + wi::to_widest (bias);
+ if (known_eq (wlen, 0))
{
- tree mask = gimple_call_arg (call, internal_fn_mask_index (ifn));
- if (!integer_all_onesp (mask))
- return NULL_TREE;
+ /* Length is 0, nothing loaded. Replace with zero.
+ ??? Actually we should replace with the else operand.
+ However, even for RVV a length of zero does not modify
+ the destination at all and this should be safe. */
+ return build_zero_cst (vectype);
}
+ /* For LEN_LOAD/LEN_STORE/MASK_LEN_LOAD/MASK_LEN_STORE,
+ we don't fold when len + bias != VF. */
+ else if (maybe_ne (wlen, GET_MODE_NUNITS (TYPE_MODE (vectype))))
+ return NULL_TREE;
}
unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align);
@@ -5808,18 +5810,17 @@ gimple_fold_partial_load_store_mem_ref (gcall *call,
tree vectype, bool mask_p)
return fold_build2 (MEM_REF, vectype, ptr, offset);
}
-/* Try to fold IFN_{MASK,LEN}_LOAD call CALL. Return true on success.
- MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
+/* Try to fold IFN_{MASK,LEN}_LOAD call CALL. Return true on success. */
static bool
-gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call, bool mask_p)
+gimple_fold_partial_load (gimple_stmt_iterator *gsi, gcall *call)
{
tree lhs = gimple_call_lhs (call);
if (!lhs)
return false;
if (tree rhs
- = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs), mask_p))
+ = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (lhs)))
{
gassign *new_stmt = gimple_build_assign (lhs, rhs);
gimple_set_location (new_stmt, gimple_location (call));
@@ -5830,23 +5831,34 @@ gimple_fold_partial_load (gimple_stmt_iterator *gsi,
gcall *call, bool mask_p)
return false;
}
-/* Try to fold IFN_{MASK,LEN}_STORE call CALL. Return true on success.
- MASK_P indicates it's for MASK if true, otherwise it's for LEN. */
+/* Try to fold IFN_{MASK,LEN}_STORE call CALL. Return true on success. */
static bool
-gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
- bool mask_p)
+gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call)
{
internal_fn ifn = gimple_call_internal_fn (call);
+
tree rhs = gimple_call_arg (call, internal_fn_stored_value_index (ifn));
if (tree lhs
- = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs), mask_p))
+ = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs)))
{
- gassign *new_stmt = gimple_build_assign (lhs, rhs);
- gimple_set_location (new_stmt, gimple_location (call));
- gimple_move_vops (new_stmt, call);
- gsi_replace (gsi, new_stmt, false);
- return true;
+ /* For zero-length stores, lhs is a zero constant. Just remove
+ the store as it's a no-op. */
+ if (zerop (lhs))
+ {
+ unlink_stmt_vdef (call);
+ release_defs (call);
+ gsi_replace (gsi, gimple_build_nop (), true);
+ return true;
+ }
+ else
+ {
+ gassign *new_stmt = gimple_build_assign (lhs, rhs);
+ gimple_set_location (new_stmt, gimple_location (call));
+ gimple_move_vops (new_stmt, call);
+ gsi_replace (gsi, new_stmt, false);
+ return true;
+ }
}
return false;
}
@@ -6075,19 +6087,15 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool
inplace)
cplx_result = true;
uaddc_usubc = true;
break;
- case IFN_MASK_LOAD:
- changed |= gimple_fold_partial_load (gsi, stmt, true);
- break;
- case IFN_MASK_STORE:
- changed |= gimple_fold_partial_store (gsi, stmt, true);
- break;
case IFN_LEN_LOAD:
+ case IFN_MASK_LOAD:
case IFN_MASK_LEN_LOAD:
- changed |= gimple_fold_partial_load (gsi, stmt, false);
+ changed |= gimple_fold_partial_load (gsi, stmt);
break;
case IFN_LEN_STORE:
+ case IFN_MASK_STORE:
case IFN_MASK_LEN_STORE:
- changed |= gimple_fold_partial_store (gsi, stmt, false);
+ changed |= gimple_fold_partial_store (gsi, stmt);
break;
default:
break;
diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
index 34a2c8eb11b..5dff0d0ceb9 100644
--- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
+++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c
@@ -13,5 +13,5 @@
#include "p9-vec-length-8.h"
-/* { dg-final { scan-assembler-times {\mlxvl\M} 16 } } */
+/* { dg-final { scan-assembler-times {\mlxvl\M} 14 } } */
/* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
new file mode 100644
index 00000000000..0beb3d70866
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl
-mno-autovec-segment" } */
+
+typedef struct {
+ int a[6];
+ float b[3];
+} c;
+
+int d(c *e) {
+ int f =0;
+ for (; f < 3; f++) {
+ e->a[2 * f] = e->b[f];
+ e->a[2 * f + 1] = -e->a[2 * f];
+ e->a[2 * f] = f + 3 * e->a[2 * f];
+ e->a[2 * f + 1] = f + 3 * e->a[2 * f + 1];
+ }
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
new file mode 100644
index 00000000000..0de69b52cb0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr122635-2.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=rv64gcv_zvl256b -mabi=lp64d -mrvv-vector-bits=zvl
-mno-autovec-segment" } */
+
+typedef struct {
+ int A[6];
+ float b[];
+} a;
+
+int b(a *a) {
+ int b = 0;
+ for (; b < 3; b++) {
+ a->A[2 * b] = a->b[b] - b + a->A[2 * b];
+ a->A[2 * b + 1] = b * a->A[2 * b + 1];
+ }
+ return 0;
+}
+
+/* { dg-final { scan-assembler-not "vsetivli.*zero,0" } } */
--
2.51.1