Hi,
This patch is the first patch in a series of patches fixing Big-Endian
lane numbering. The goal of this series of patches is to make proper
bridge between pure GCC big-endian view on lane numbering and internal
architected view.
Approach taken is to catch lane indexing when internal vector lane
indexes are passed to GCC lane indexing world view.
This will have a short-term impact on big-endian NEON intrinsics and
introduces a number of regressions. But this is the correct thing to do
to ensure that auto-vectorized and GCC vector extension code works
correctly.
This particular patch fixes vld1_<type> and vst1_<type> to generate st1
and ld1 instructions, correcting their BE behaviour.
Regression tested on aarch64-none-elf and aarch64_be-none-elf with
recent vec-perm with no unexpected issues.
Is it okay for trunk?
Regards,
Alex Velenko
gcc/
2014-01-16 Alex Velenko <alex.vele...@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_be_ld1<mode>):
New define_insn.
(aarch64_be_st1<mode>): Likewise.
(aarch_ld1<VALL:mode>): Define_expand modified.
(aarch_st1<VALL:mode>): Likewise.
* config/aarch64/aarch64.md (UNSPEC_LD1): New unspec definition.
(UNSPEC_ST1): Likewise
gcc/testsuite
2014-01-16 Alex Velenko <alex.vele...@arm.com>
* /gcc.target/aarch64/vld1-vst1_1.c: New test_case.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 158b3dca6da12322de0af80d35f593039d716de6..2f2e74f6bccd54accd265a55cc8dbcfe2db2e76f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3544,6 +3544,24 @@
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
)
+(define_insn "aarch64_be_ld1<mode>"
+ [(set (match_operand:VALLDI 0 "register_operand" "=w")
+ (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
+ UNSPEC_LD1))]
+ "TARGET_SIMD"
+ "ld1\\t{%0<Vmtype>}, %1"
+ [(set_attr "type" "neon_load1_1reg<q>")]
+)
+
+(define_insn "aarch64_be_st1<mode>"
+ [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
+ (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
+ UNSPEC_ST1))]
+ "TARGET_SIMD"
+ "st1\\t{%1<Vmtype>}, %0"
+ [(set_attr "type" "neon_store1_1reg<q>")]
+)
+
(define_split
[(set (match_operand:OI 0 "register_operand" "")
(match_operand:OI 1 "register_operand" ""))]
@@ -3762,7 +3780,11 @@
{
enum machine_mode mode = <VALL:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[1]);
- emit_move_insn (operands[0], mem);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
+ else
+ emit_move_insn (operands[0], mem);
DONE;
})
@@ -3988,7 +4010,11 @@
{
enum machine_mode mode = <VALL:MODE>mode;
rtx mem = gen_rtx_MEM (mode, operands[0]);
- emit_move_insn (mem, operands[1]);
+
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
+ else
+ emit_move_insn (mem, operands[1]);
DONE;
})
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c83622d6cad59883923f6eb0454c735c24a1eb3f..d5186f6211ec795672fc2631d7bbb1247a2d2773 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -81,6 +81,7 @@
UNSPEC_GOTSMALLPIC
UNSPEC_GOTSMALLTLS
UNSPEC_GOTTINYPIC
+ UNSPEC_LD1
UNSPEC_LD2
UNSPEC_LD3
UNSPEC_LD4
@@ -92,6 +93,7 @@
UNSPEC_SISD_SSHL
UNSPEC_SISD_USHL
UNSPEC_SSHL_2S
+ UNSPEC_ST1
UNSPEC_ST2
UNSPEC_ST3
UNSPEC_ST4
diff --git a/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1834a264708fe6ab901ac1a27544ca8ebb815cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vld1-vst1_1.c
@@ -0,0 +1,52 @@
+/* Test vld1 and vst1 maintain consistent indexing. */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+#include <arm_neon.h>
+
+extern void abort (void);
+
+int __attribute__ ((noinline))
+test_vld1_vst1 ()
+{
+ int8x8_t a;
+ int8x8_t b;
+ int i = 0;
+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ int8_t d[8];
+ a = vld1_s8 (c);
+ asm volatile ("":::"memory");
+ vst1_s8 (d, a);
+ asm volatile ("":::"memory");
+ for (; i < 8; i++)
+ if (c[i] != d[i])
+ return 1;
+ return 0;
+}
+
+int __attribute__ ((noinline))
+test_vld1q_vst1q ()
+{
+ int16x8_t a;
+ int16x8_t b;
+ int i = 0;
+ int16_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
+ int16_t d[8];
+ a = vld1q_s16 (c);
+ asm volatile ("":::"memory");
+ vst1q_s16 (d, a);
+ asm volatile ("":::"memory");
+ for (; i < 8; i++)
+ if (c[i] != d[i])
+ return 1;
+ return 0;
+}
+
+int
+main ()
+{
+ if (test_vld1_vst1 ())
+ abort ();
+ if (test_vld1q_vst1q ())
+ abort ();
+ return 0;
+}