https://gcc.gnu.org/g:1e5ff11142b2a37e7fd07a85248a0179bbb534be

commit r15-4874-g1e5ff11142b2a37e7fd07a85248a0179bbb534be
Author: Kyrylo Tkachov <ktkac...@nvidia.com>
Date:   Tue Oct 22 03:27:47 2024 -0700

    aarch64: Use canonical RTL representation for SVE2 XAR and extend it to 
fixed-width modes
    
    The MD pattern for the XAR instruction in SVE2 is currently expressed with
    non-canonical RTL by using a ROTATERT code with a constant rotate amount.
    Fix it by using the left ROTATE code.  This necessitates splitting out the
    expander separately to translate the immediate coming from the intrinsic
    from a right-rotate to a left-rotate immediate.
    
    Additionally, as the SVE2 XAR instruction is unpredicated and can handle all
    element sizes from .b to .d, it is a good fit for implementing the 
XOR+ROTATE
    operation for Advanced SIMD modes where the TARGET_SHA3 cannot be used
    (that can only handle V2DImode operands).  Therefore let's extend the 
accepted
    modes of the SVE2 patternt to include the Advanced SIMD integer modes.
    
    This leads to some tests for the svxar* intrinsics to fail because they now
    simplify to a plain EOR when the rotate amount is the width of the element.
    This simplification is desirable (EOR instructions have better or equal
    throughput than XAR, and they are non-destructive of their input) so the
    tests are adjusted.
    
    For V2DImode XAR operations we should prefer the Advanced SIMD version when
    it is available (TARGET_SHA3) because it is non-destructive, so restrict the
    SVE2 pattern accordingly.  Tests are added to confirm this.
    
    Bootstrapped and tested on aarch64-none-linux-gnu.
    Ok for mainline?
    
    Signed-off-by: Kyrylo Tkachov <ktkac...@nvidia.com>
    
    gcc/
    
            * config/aarch64/iterators.md (SVE_ASIMD_FULL_I): New mode iterator.
            * config/aarch64/aarch64-sve2.md (@aarch64_sve2_xar<mode>):
            Use SVE_ASIMD_FULL_I modes.  Use ROTATE code for the rotate step.
            Adjust output logic.
            * config/aarch64/aarch64-sve-builtins-sve2.cc (svxar_impl): Define.
            (svxar): Use the above.
    
    gcc/testsuite/
    
            * gcc.target/aarch64/xar_neon_modes.c: New test.
            * gcc.target/aarch64/xar_v2di_nonsve.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_s16.c: Scan for EOR rather 
than
            XAR.
            * gcc.target/aarch64/sve2/acle/asm/xar_s32.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_s64.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_s8.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_u16.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_u32.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_u64.c: Likewise.
            * gcc.target/aarch64/sve2/acle/asm/xar_u8.c: Likewise.

Diff:
---
 gcc/config/aarch64/aarch64-sve-builtins-sve2.cc    | 18 +++++++++-
 gcc/config/aarch64/aarch64-sve2.md                 | 30 +++++++++++------
 gcc/config/aarch64/iterators.md                    |  3 ++
 .../gcc.target/aarch64/sve2/acle/asm/xar_s16.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_s32.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_s64.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_s8.c      | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_u16.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_u32.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_u64.c     | 18 ++++++----
 .../gcc.target/aarch64/sve2/acle/asm/xar_u8.c      | 18 ++++++----
 gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c  | 39 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c | 16 +++++++++
 13 files changed, 191 insertions(+), 59 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 64f86035c30e..f0ab7400ef50 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -108,6 +108,22 @@ public:
   }
 };
 
+class svxar_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    /* aarch64_sve2_xar represents this operation with a left-rotate RTX.
+       Convert the right-rotate amount from the intrinsic to fit this.  */
+    machine_mode mode = e.vector_mode (0);
+    HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode)
+                       - INTVAL (e.args[2]);
+    e.args[2] = aarch64_simd_gen_const_vector_dup (mode, rot);
+    return e.use_exact_insn (code_for_aarch64_sve2_xar (mode));
+  }
+};
+
 class svcdot_impl : public function_base
 {
 public:
@@ -795,6 +811,6 @@ FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, 
UNSPEC_WHILEHS))
 FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
 FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
 FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
-FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+FUNCTION (svxar, svxar_impl,)
 
 } /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 5f2697c31797..8047f405a17c 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1266,18 +1266,28 @@
 ;; - XAR
 ;; -------------------------------------------------------------------------
 
+;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
+;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
+;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
+;; version should be preferred when available as it is non-destructive on its
+;; input.
 (define_insn "@aarch64_sve2_xar<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-       (rotatert:SVE_FULL_I
-         (xor:SVE_FULL_I
-           (match_operand:SVE_FULL_I 1 "register_operand")
-           (match_operand:SVE_FULL_I 2 "register_operand"))
-         (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
-  "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] xar\t%0.<Vetype>, %0.<Vetype>, 
%2.<Vetype>, #%3
-     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;xar\t%0.<Vetype>, 
%0.<Vetype>, %2.<Vetype>, #%3
+  [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
+       (rotate:SVE_ASIMD_FULL_I
+         (xor:SVE_ASIMD_FULL_I
+           (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
+           (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
+         (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
+  "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
+  {
+    operands[3]
+      = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
+                - INTVAL (unwrap_const_vec_duplicate (operands[3])));
+    if (which_alternative == 0)
+      return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
+    return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, 
#%3";
   }
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0bc98315bb68..8269b0cdcd92 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -446,6 +446,9 @@
 ;; All fully-packed SVE integer vector modes.
 (define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])
 
+;; All fully-packed SVE integer and Advanced SIMD integer modes.
+(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
+
 ;; All fully-packed SVE floating-point vector modes.
 (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
index 34351d52718e..f69ba3f7b06b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s16.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s16_untied, svint16_t,
 
 /*
 ** xar_16_s16_tied1:
-**     xar     z0\.h, z0\.h, z1\.h, #16
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_s16_tied1, svint16_t,
 
 /*
 ** xar_16_s16_tied2:
-**     xar     z0\.h, z0\.h, z1\.h, #16
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_s16_tied2, svint16_t,
 /*
 ** xar_16_s16_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.h, z0\.h, z2\.h, #16
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.h, z0\.h, z1\.h, #16
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
index 366a61728076..540f7b875ecc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s32.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s32_untied, svint32_t,
 
 /*
 ** xar_32_s32_tied1:
-**     xar     z0\.s, z0\.s, z1\.s, #32
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_s32_tied1, svint32_t,
 
 /*
 ** xar_32_s32_tied2:
-**     xar     z0\.s, z0\.s, z1\.s, #32
+** (
+**     eor     z0\.d, z0\.d, z1\.d
+** |
+**     eor     z0\.d, z1\.d, z0\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_s32_tied2, svint32_t,
 /*
 ** xar_32_s32_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.s, z0\.s, z2\.s, #32
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.s, z0\.s, z1\.s, #32
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
index dedda2ed0444..9491dbdb848a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s64.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s64_untied, svint64_t,
 
 /*
 ** xar_64_s64_tied1:
-**     xar     z0\.d, z0\.d, z1\.d, #64
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_s64_tied1, svint64_t,
 
 /*
 ** xar_64_s64_tied2:
-**     xar     z0\.d, z0\.d, z1\.d, #64
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_s64_tied2, svint64_t,
 /*
 ** xar_64_s64_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.d, z0\.d, z2\.d, #64
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.d, z0\.d, z1\.d, #64
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
index 904352b93da8..e62e5bca5ba2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_s8.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_s8_untied, svint8_t,
 
 /*
 ** xar_8_s8_tied1:
-**     xar     z0\.b, z0\.b, z1\.b, #8
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_s8_tied1, svint8_t,
 
 /*
 ** xar_8_s8_tied2:
-**     xar     z0\.b, z0\.b, z1\.b, #8
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_s8_tied2, svint8_t,
 /*
 ** xar_8_s8_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.b, z0\.b, z2\.b, #8
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.b, z0\.b, z1\.b, #8
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
index c7b9665aeed4..6269145bc6de 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u16.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u16_untied, svuint16_t,
 
 /*
 ** xar_16_u16_tied1:
-**     xar     z0\.h, z0\.h, z1\.h, #16
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_16_u16_tied1, svuint16_t,
 
 /*
 ** xar_16_u16_tied2:
-**     xar     z0\.h, z0\.h, z1\.h, #16
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_16_u16_tied2, svuint16_t,
 /*
 ** xar_16_u16_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.h, z0\.h, z2\.h, #16
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.h, z0\.h, z1\.h, #16
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
index 115ead7701c3..99efd14e1ed9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u32.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u32_untied, svuint32_t,
 
 /*
 ** xar_32_u32_tied1:
-**     xar     z0\.s, z0\.s, z1\.s, #32
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_32_u32_tied1, svuint32_t,
 
 /*
 ** xar_32_u32_tied2:
-**     xar     z0\.s, z0\.s, z1\.s, #32
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_32_u32_tied2, svuint32_t,
 /*
 ** xar_32_u32_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.s, z0\.s, z2\.s, #32
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.s, z0\.s, z1\.s, #32
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
index 1d0d90e90d60..5c770ffdadbf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u64.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u64_untied, svuint64_t,
 
 /*
 ** xar_64_u64_tied1:
-**     xar     z0\.d, z0\.d, z1\.d, #64
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_64_u64_tied1, svuint64_t,
 
 /*
 ** xar_64_u64_tied2:
-**     xar     z0\.d, z0\.d, z1\.d, #64
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_64_u64_tied2, svuint64_t,
 /*
 ** xar_64_u64_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.d, z0\.d, z2\.d, #64
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.d, z0\.d, z1\.d, #64
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
index 3b6161729cbc..5ae5323a08a3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/xar_u8.c
@@ -70,7 +70,11 @@ TEST_UNIFORM_Z (xar_2_u8_untied, svuint8_t,
 
 /*
 ** xar_8_u8_tied1:
-**     xar     z0\.b, z0\.b, z1\.b, #8
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,
@@ -79,7 +83,11 @@ TEST_UNIFORM_Z (xar_8_u8_tied1, svuint8_t,
 
 /*
 ** xar_8_u8_tied2:
-**     xar     z0\.b, z0\.b, z1\.b, #8
+** (
+**     eor     z0\.d, z1\.d, z0\.d
+** |
+**     eor     z0\.d, z0\.d, z1\.d
+** )
 **     ret
 */
 TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
@@ -89,11 +97,9 @@ TEST_UNIFORM_Z (xar_8_u8_tied2, svuint8_t,
 /*
 ** xar_8_u8_untied:
 ** (
-**     movprfx z0, z1
-**     xar     z0\.b, z0\.b, z2\.b, #8
+**     eor     z0\.d, z1\.d, z2\.d
 ** |
-**     movprfx z0, z2
-**     xar     z0\.b, z0\.b, z1\.b, #8
+**     eor     z0\.d, z2\.d, z1\.d
 ** )
 **     ret
 */
diff --git a/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c 
b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c
new file mode 100644
index 000000000000..750fbcfc48a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xar_neon_modes.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+sve2+nosha3"
+
+typedef char __attribute__ ((vector_size (16))) v16qi;
+typedef unsigned short __attribute__ ((vector_size (16))) v8hi;
+typedef unsigned int __attribute__ ((vector_size (16))) v4si;
+typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
+
+v16qi
+xar_v16qi (v16qi a, v16qi b) {
+  v16qi c = a ^ b;
+  return (c << 2) ^ (c >> 6);
+}
+/* { dg-final { scan-assembler {\txar\tz0.b, z[0-9]+.b, z[0-9]+.b, #6} } } */
+
+v8hi
+xar_v8hi (v8hi a, v8hi b) {
+  v8hi c = a ^ b;
+  return (c << 13) ^ (c >> 3);
+}
+/* { dg-final { scan-assembler {\txar\tz0.h, z[0-9]+.h, z[0-9]+.h, #3} } } */
+
+v4si
+xar_v4si (v4si a, v4si b) {
+  v4si c = a ^ b;
+  return (c << 9) ^ (c >> 23);
+}
+/* { dg-final { scan-assembler {\txar\tz0.s, z[0-9]+.s, z[0-9]+.s, #23} } } */
+
+/* When +sha3 for Advanced SIMD is not available we should still use the
+   SVE2 form of XAR.  */
+v2di
+xar_v2di (v2di a, v2di b) {
+  v2di c = a ^ b;
+  return (c << 22) ^ (c >> 42);
+}
+/* { dg-final { scan-assembler {\txar\tz0.d, z[0-9]+.d, z[0-9]+.d, #42} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c 
b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c
new file mode 100644
index 000000000000..b0f1a97222b3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xar_v2di_nonsve.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+sve2+sha3"
+
+typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
+
+/* Both +sve2 and +sha3 have V2DImode XAR instructions, but we should
+   prefer the Advanced SIMD one when both are available.  */
+v2di
+xar_v2di (v2di a, v2di b) {
+  v2di c = a ^ b;
+  return (c << 22) ^ (c >> 42);
+}
+/* { dg-final { scan-assembler {\txar\tv0.2d, v[0-9]+.2d, v[0-9]+.2d, 42} } } 
*/
+

Reply via email to