From: "dragan.mladjenovic" <[email protected]>
This workaround adds mfuse-vect-init option which causes the back-end to
emit a single load for the vect_init if all the init elements come from
the consecutive memory locations and are in the right order.
gcc/
* config/mips/mips.cc (mips_fuse_vect_init_p): New function.
(mips_expand_vector_init): Detect init sequence that can be
fused into a single load.
* config/mips/mips.opt (mfuse-vect-init): New option.
gcc/testsuite/
* gcc.target/mips/msa-fuse-vect-init.c: New file.
Cherry-picked 4f440a87ad32b3549be8a0b89900d656ac70d4f8
and 1eb9d22dc480c962027eed522e0b26d0ebbd3e0b
from https://github.com/MIPS/gcc
Signed-off-by: Dragan Mladjenovic <[email protected]>
Signed-off-by: Faraz Shahbazker <[email protected]>
Signed-off-by: Aleksandar Rakic <[email protected]>
---
gcc/config/mips/mips.cc | 61 +++++++++++++++++++
gcc/config/mips/mips.opt | 3 +
.../gcc.target/mips/msa-fuse-vect-init.c | 18 ++++++
3 files changed, 82 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4d78607eb9a..9be4deb633a 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -22633,6 +22633,57 @@ mips_expand_vi_general (machine_mode vmode,
machine_mode imode,
emit_move_insn (target, mem);
}
+/* Return true if elements of vector initialization list should be loaded
+ via single "fused" vector load. */
+
+bool
+mips_fuse_vect_init_p (machine_mode imode, unsigned nelt, rtx vals)
+{
+ unsigned i;
+ rtx base;
+ rtx base1;
+ rtx first;
+ rtx next;
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT offset1;
+ unsigned min_align = GET_MODE_BITSIZE (imode);
+ unsigned step_size = GET_MODE_SIZE (imode);
+
+ if (!flag_fuse_vect_init)
+ return false;
+
+ first = XVECEXP (vals, 0, 0);
+
+ if (MEM_VOLATILE_P (first))
+ return false;
+
+ if (MEM_ALIGN (first) < min_align)
+ return false;
+
+ if (GET_MODE (first) != imode)
+ return false;
+
+ mips_split_plus (XEXP (first, 0), &base, &offset);
+
+ if (!REG_P (base))
+ return false;
+
+ for (i = 1; i < nelt; ++i)
+ {
+ next = XVECEXP (vals, 0, i);
+ if (MEM_VOLATILE_P (next)
+ || MEM_ALIGN (next) < min_align
+ || GET_MODE (next) != imode)
+ return false;
+ mips_split_plus (XEXP (next, 0), &base1, &offset1);
+ if (!rtx_equal_p (base, base1) || (offset1 - offset) != step_size)
+ return false;
+ offset = offset1;
+ }
+
+ return true;
+}
+
/* Expand a vector initialization. */
void
@@ -22643,6 +22694,7 @@ mips_expand_vector_init (rtx target, rtx vals)
unsigned i, nelt = GET_MODE_NUNITS (vmode);
unsigned nvar = 0, one_var = -1u;
bool all_same = true;
+ bool all_mem = true;
rtx x;
for (i = 0; i < nelt; ++i)
@@ -22650,6 +22702,8 @@ mips_expand_vector_init (rtx target, rtx vals)
x = XVECEXP (vals, 0, i);
if (!mips_constant_elt_p (x))
nvar++, one_var = i;
+ if (!MEM_P (x))
+ all_mem = false;
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false;
}
@@ -22710,6 +22764,13 @@ mips_expand_vector_init (rtx target, rtx vals)
}
else
{
+ if (all_mem && mips_fuse_vect_init_p (imode, nelt, vals))
+ {
+ rtx mem = widen_memory_access (XVECEXP (vals, 0, 0), vmode, 0);
+ emit_move_insn (target, mem);
+ return;
+ }
+
emit_move_insn (target, CONST0_RTX (vmode));
for (i = 0; i < nelt; ++i)
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 99fe9301900..f3b2ed473f3 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -508,3 +508,6 @@ Use Loongson EXTension (EXT) instructions.
mloongson-ext2
Target Var(TARGET_LOONGSON_EXT2)
Use Loongson EXTension R2 (EXT2) instructions.
+
+mfuse-vect-init
+Target Var(flag_fuse_vect_init) Undocumented Init(-1)
diff --git a/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
new file mode 100644
index 00000000000..faa1ff4eee6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-fuse-vect-init.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+/* { dg-additional-options "-mfuse-vect-init" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
+
+typedef int v4i32 __attribute__ ((vector_size(16)));
+
+void
+copy (int* src, v4i32* dst)
+{
+ v4i32 chunk = (v4i32){src[0], src[1], src[2], src[3]};
+ dst[0] = chunk;
+}
+
+/* { dg-final { scan-assembler-not "insert" } } */
+/* { dg-final { scan-assembler-times "\tld\\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "\tst\\\.w" 1 } } */
+
--
2.34.1