From 48de8ba0edd6109e7d706b152a6ab34b78409bb2 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <ktkachov@nvidia.com>
Date: Thu, 3 Jul 2025 10:09:47 -0700
Subject: [PATCH 4/7] aarch64: Use EOR3 for DImode values

Similar to BCAX, we can use EOR3 for DImode, but we have to be careful
not to force GP<->SIMD moves unnecessarily, so add a splitter for that case.

So for input:
uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, c); }
uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, b, c); }

We generate the desired:
eor3_d_gp:
        eor     x1, x1, x2
        eor     x0, x1, x0
        ret

eor3_d:
        eor3    v0.16b, v0.16b, v1.16b, v2.16b
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com>

gcc/

	* config/aarch64/aarch64-simd.md (*eor3qdi4): New
	define_insn_and_split.

gcc/testsuite/

	* gcc.target/aarch64/simd/eor3_d.c: Add tests for DImode operands.
---
 gcc/config/aarch64/aarch64-simd.md            | 25 +++++++++++++++++++
 .../gcc.target/aarch64/simd/eor3_d.c          |  6 ++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index be6a16b4be8..cded19da0ff 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9191,6 +9191,31 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
+(define_insn_and_split "*eor3qdi4"
+  [(set (match_operand:DI 0 "register_operand" "=w,&r")
+	(xor:DI
+	 (xor:DI
+	  (match_operand:DI 2 "register_operand" "w,r")
+	  (match_operand:DI 3 "register_operand" "w,r"))
+	 (match_operand:DI 1 "register_operand" "w,r")))]
+  "TARGET_SHA3"
+  "@
+   eor3\t%0.16b, %1.16b, %2.16b, %3.16b
+   #"
+  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 4) (xor:DI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (xor:DI (match_dup 4) (match_dup 1)))]
+  {
+    if (reload_completed)
+      operands[4] = operands[0];
+    else if (can_create_pseudo_p ())
+      operands[4] = gen_reg_rtx (DImode);
+    else
+      FAIL;
+  }
+  [(set_attr "type" "crypto_sha3,multiple")]
+)
+
 (define_insn "aarch64_rax1qv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w")
 	(xor:V2DI
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
index 7f2b2b42268..6c9595b6acf 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/eor3_d.c
@@ -7,9 +7,13 @@
 
 #define EOR3(x,y,z)  ((x) ^ (y) ^ (z))
 
+/* Should not use EOR3 when inputs come from GP regs.  */
+uint64_t eor3_d_gp (uint64_t a, uint64_t b, uint64_t c) { return EOR3 (a, b, c); }
+
+uint64x1_t eor3_d (uint64x1_t a, uint64x1_t b, uint64x1_t c) { return EOR3 (a, b, c); }
 uint32x2_t bcax_s (uint32x2_t a, uint32x2_t b, uint32x2_t c) { return EOR3 (a, b, c); }
 uint16x4_t bcax_h (uint16x4_t a, uint16x4_t b, uint16x4_t c) { return EOR3 (a, b, c); }
 uint8x8_t bcax_b (uint8x8_t a, uint8x8_t b, uint8x8_t c) { return EOR3 (a, b, c); }
 
-/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 3 } } */
+/* { dg-final { scan-assembler-times {eor3\tv0.16b, v[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 4 } } */
 
-- 
2.44.0

