https://gcc.gnu.org/g:e840037fccceff0ad585946ff4d454f220c51759
commit r16-5212-ge840037fccceff0ad585946ff4d454f220c51759 Author: Andre Vieira <[email protected]> Date: Thu Nov 13 10:46:56 2025 +0000 aarch64: Use eor3 for more double xor cases Expands the use of eor3 where we'd otherwise use two vector eor's. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*eor3q<mode>4): New insn to be used by combine after reload to optimize any grouping of eor's that are using FP registers for scalar modes. gcc/testsuite/ChangeLog: * gcc.target/aarch64/eor3-opt.c: New test. Diff: --- gcc/config/aarch64/aarch64-simd.md | 12 ++ gcc/testsuite/gcc.target/aarch64/eor3-opt.c | 209 ++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e7c459dceb3a..1d2248ab57f5 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9449,6 +9449,18 @@ [(set_attr "type" "crypto_sha3")] ) +(define_insn "*eor3q<mode>4" + [(set (match_operand:ALLI 0 "register_operand" "=w") + (xor:ALLI + (xor:ALLI + (match_operand:ALLI 2 "register_operand" "w") + (match_operand:ALLI 3 "register_operand" "w")) + (match_operand:ALLI 1 "register_operand" "w")))] + "TARGET_SHA3 && reload_completed" + "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" + [(set_attr "type" "crypto_sha3")] +) + (define_insn "aarch64_rax1qv2di" [(set (match_operand:V2DI 0 "register_operand" "=w") (xor:V2DI diff --git a/gcc/testsuite/gcc.target/aarch64/eor3-opt.c b/gcc/testsuite/gcc.target/aarch64/eor3-opt.c new file mode 100644 index 000000000000..51f36f9e7806 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/eor3-opt.c @@ -0,0 +1,209 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" {} } } */ +#include <arm_neon.h> + +#pragma GCC target ("+sha3") + +#define EOR_SCALAR_FN(type) \ +type eor3_##type (type a, type b, type c) { \ + return a ^ b ^ c; \ +} + + +EOR_SCALAR_FN(uint64x1_t) +/* +** eor3_uint64x1_t: +** eor3 v0.16b, v0.16b, v1.16b, v2.16b +** ret +*/ +EOR_SCALAR_FN(int64x1_t) +/* +** eor3_int64x1_t: +** eor3 v0.16b, v0.16b, v1.16b, v2.16b +** ret +*/ + +#define EOR_VEC_FN(type) \ +type eor3_##type (type a, type b, type c) \ +{ \ + type res = a; \ + res[0] = a[0] ^ b[0] ^ c[0]; \ + return res; \ +} + +EOR_VEC_FN(int32x4_t) +/* +** eor3_int32x4_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.s\[0\], v\1.s\[0\] +** ret +*/ +EOR_VEC_FN(int32x2_t) +/* +** eor3_int32x2_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.s\[0\], v\1.s\[0\] +** ret +*/ +EOR_VEC_FN(uint32x4_t) +/* +** eor3_uint32x4_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.s\[0\], v\1.s\[0\] +** ret +*/ +EOR_VEC_FN(uint32x2_t) +/* +** eor3_uint32x2_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.s\[0\], v\1.s\[0\] +** ret +*/ +EOR_VEC_FN(int16x8_t) +/* +** eor3_int16x8_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.h\[0\], v\1.h\[0\] +** ret +*/ +EOR_VEC_FN(int16x4_t) +/* +** eor3_int16x4_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.h\[0\], v\1.h\[0\] +** ret +*/ +EOR_VEC_FN(uint16x8_t) +/* +** eor3_uint16x8_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.h\[0\], v\1.h\[0\] +** ret +*/ +EOR_VEC_FN(uint16x4_t) +/* +** eor3_uint16x4_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.h\[0\], v\1.h\[0\] +** ret +*/ +EOR_VEC_FN(int8x16_t) +/* +** eor3_int8x16_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.b\[0\], v\1.b\[0\] +** ret +*/ +EOR_VEC_FN(int8x8_t) +/* +** eor3_int8x8_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.b\[0\], v\1.b\[0\] +** ret +*/ +EOR_VEC_FN(uint8x16_t) +/* +** eor3_uint8x16_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.b\[0\], v\1.b\[0\] +** ret +*/ +EOR_VEC_FN(uint8x8_t) +/* +** eor3_uint8x8_t: +** eor3 v([0-2]).16b, v[0-2].16b, v[0-2].16b, v[0-2].16b +** ins v0.b\[0\], v\1.b\[0\] +** ret +*/ + +/* The following tests should not be optimized to EOR3 as that would involve + unnecessary register file moves. */ + +EOR_SCALAR_FN(uint64_t) +/* +** eor3_uint64_t: +** eor x1, x1, x2 +** eor x0, x1, x0 +** ret +*/ +EOR_SCALAR_FN(int64_t) +/* +** eor3_int64_t: +** eor x1, x1, x2 +** eor x0, x1, x0 +** ret +*/ +EOR_SCALAR_FN(uint32_t) +/* +** eor3_uint32_t: +** eor w1, w1, w2 +** eor w0, w1, w0 +** ret +*/ +EOR_SCALAR_FN(int32_t) +/* +** eor3_int32_t: +** eor w1, w1, w2 +** eor w0, w1, w0 +** ret +*/ +EOR_SCALAR_FN(uint16_t) +/* +** eor3_uint16_t: +** eor w1, w1, w2 +** eor w0, w0, w1 +** ret +*/ +EOR_SCALAR_FN(int16_t) +/* +** eor3_int16_t: +** eor w1, w1, w2 +** eor w0, w0, w1 +** ret +*/ +EOR_SCALAR_FN(uint8_t) +/* +** eor3_uint8_t: +** eor w1, w1, w2 +** eor w0, w0, w1 +** ret +*/ +EOR_SCALAR_FN(int8_t) +/* +** eor3_int8_t: +** eor w1, w1, w2 +** eor w0, w0, w1 +** ret +*/ + +void not_eor3_long(long *p) +{ + p[6] = p[4] ^ p[0] ^ (p[2] << 2); +} +/* +** not_eor3_long: +** ldr x1, \[x0\] +** ldr x2, \[x0, 32\] +** eor x2, x2, x1 +** ldr x1, \[x0, 16\] +** eor x1, x2, x1, lsl 2 +** str x1, \[x0, 48\] +** ret +*/ + +int64x2_t not_eor3_int64_t (int64x2_t a, int64_t b, int64_t c) +{ + int64x2_t res; + res[0] = a[0] ^ b ^ c; + return res; +} +/* +** not_eor3_int64_t: +** eor x0, x0, x1 +** fmov d31, x0 +** eor v0.8b, v31.8b, v0.8b +** fmov d0, d0 +** ret +*/ +
