Expands the use of eor3 where we'd otherwise use two vector eor's.

Bootstrapped and regression tested on aarch64-none-linux-gnu.

OK for trunk?

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (*eor3q<mode>4): New insn to be used by
        combine after reload to optimize any grouping of eor's that are using 
FP registers for
        scalar modes.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/eor3-opt.c: New test.

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
0d5b02a739fa74724d6dc8b658638d55b8db6890..3bf668e25b58a463f1d35387b1c6af7cc04e3a16
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9201,15 +9201,28 @@
 
 ;; sha3
 
-(define_insn "eor3q<mode>4"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-       (xor:VDQ_I
-        (xor:VDQ_I
-         (match_operand:VDQ_I 2 "register_operand" "w")
-         (match_operand:VDQ_I 3 "register_operand" "w"))
-        (match_operand:VDQ_I 1 "register_operand" "w")))]
+(define_insn_and_split "eor3q<mode>4"
+  [(set (match_operand:VSDQ_I 0 "register_operand")
+       (xor:VSDQ_I
+        (xor:VSDQ_I
+         (match_operand:VSDQ_I 2 "register_operand")
+         (match_operand:VSDQ_I 3 "register_operand"))
+        (match_operand:VSDQ_I 1 "register_operand")))]
   "TARGET_SHA3"
-  "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
+  {@ [ cons: =0 , %1 , 2 , 3 ]
+     [ w       ,  w , w , w ] eor3\t%0.16b, %1.16b, %2.16b, %3.16b
+     [ r       ,  r , r , r ] #
+  }
+  "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
+  [(const_int 0)]
+  {
+    machine_mode xor_mode = <MODE>mode == DImode ? DImode : SImode;
+    emit_move_insn (operands[0],
+                   gen_rtx_XOR (xor_mode, operands[1], operands[2]));
+    emit_move_insn (operands[0],
+                   gen_rtx_XOR (xor_mode, operands[0], operands[3]));
+    DONE;
+  }
   [(set_attr "type" "crypto_sha3")]
 )
 
diff --git a/gcc/testsuite/gcc.target/aarch64/eor3-opt.c 
b/gcc/testsuite/gcc.target/aarch64/eor3-opt.c
new file mode 100644
index 
0000000000000000000000000000000000000000..03ec86743f064c748c84e9526b919481fe05dd1d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eor3-opt.c
@@ -0,0 +1,175 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" {} } } */
+#include <arm_neon.h>
+
+#pragma GCC target ("+sha3")
+
+#define EOR_SCALAR_FN(type)                \
+type eor3_##type (type a, type b, type c) { \
+    return a ^ b ^ c;                      \
+}
+
+
+EOR_SCALAR_FN(uint64x1_t)
+/*
+** eor3_uint64x1_t:
+**     eor3    v0.16b, v0.16b, v1.16b, v2.16b
+**     ret
+*/
+EOR_SCALAR_FN(int64x1_t)
+/*
+** eor3_int64x1_t:
+**     eor3    v0.16b, v0.16b, v1.16b, v2.16b
+**     ret
+*/
+
+EOR_SCALAR_FN(uint64_t)
+/*
+** eor3_uint64_t:
+**     eor     x0, x0, x1
+**     eor     x0, x0, x2
+**     ret
+*/
+EOR_SCALAR_FN(int64_t)
+/*
+** eor3_int64_t:
+**     eor     x0, x0, x1
+**     eor     x0, x0, x2
+**     ret
+*/
+EOR_SCALAR_FN(uint32_t)
+/*
+** eor3_uint32_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+EOR_SCALAR_FN(int32_t)
+/*
+** eor3_int32_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+EOR_SCALAR_FN(uint16_t)
+/*
+** eor3_uint16_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+EOR_SCALAR_FN(int16_t)
+/*
+** eor3_int16_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+EOR_SCALAR_FN(uint8_t)
+/*
+** eor3_uint8_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+EOR_SCALAR_FN(int8_t)
+/*
+** eor3_int8_t:
+**     eor     w0, w0, w1
+**     eor     w0, w0, w2
+**     ret
+*/
+
+#define EOR_VEC_FN(type)                 \
+type eor3_##type (type a, type b, type c) \
+{                                        \
+  type res = a;                                  \
+  res[0] = a[0] ^ b[0] ^ c[0];           \
+  return res;                            \
+}
+
+EOR_VEC_FN(int32x4_t)
+/*
+** eor3_int32x4_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.s\[0\], v\1.s\[0\]
+**     ret
+*/
+EOR_VEC_FN(int32x2_t)
+/*
+** eor3_int32x2_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.s\[0\], v\1.s\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint32x4_t)
+/*
+** eor3_uint32x4_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.s\[0\], v\1.s\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint32x2_t)
+/*
+** eor3_uint32x2_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.s\[0\], v\1.s\[0\]
+**     ret
+*/
+EOR_VEC_FN(int16x8_t)
+/*
+** eor3_int16x8_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.h\[0\], v\1.h\[0\]
+**     ret
+*/
+EOR_VEC_FN(int16x4_t)
+/*
+** eor3_int16x4_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.h\[0\], v\1.h\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint16x8_t)
+/*
+** eor3_uint16x8_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.h\[0\], v\1.h\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint16x4_t)
+/*
+** eor3_uint16x4_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.h\[0\], v\1.h\[0\]
+**     ret
+*/
+EOR_VEC_FN(int8x16_t)
+/*
+** eor3_int8x16_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.b\[0\], v\1.b\[0\]
+**     ret
+*/
+EOR_VEC_FN(int8x8_t)
+/*
+** eor3_int8x8_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.b\[0\], v\1.b\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint8x16_t)
+/*
+** eor3_uint8x16_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.b\[0\], v\1.b\[0\]
+**     ret
+*/
+EOR_VEC_FN(uint8x8_t)
+/*
+** eor3_uint8x8_t:
+**     eor3    v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b
+**     ins     v0.b\[0\], v\1.b\[0\]
+**     ret
+*/

Reply via email to