[gcc r15-6098] aarch64: Extend SVE2 bit-select instructions for Neon modes.

Soumya AR via Gcc-cvs Tue, 10 Dec 2024 20:12:44 -0800

https://gcc.gnu.org/g:65b7c8db9c61bcdfd07a3404047dd2d2beac4bbb


commit r15-6098-g65b7c8db9c61bcdfd07a3404047dd2d2beac4bbb
Author: Soumya AR <soum...@nvidia.com>
Date:   Wed Dec 11 09:32:35 2024 +0530

    aarch64: Extend SVE2 bit-select instructions for Neon modes.
    
    NBSL, BSL1N, and BSL2N are bit-select intructions on SVE2 with certain 
operands
    inverted. These can be extended to work with Neon modes.
    
    Since these instructions are unpredicated, duplicate patterns were added 
with
    the predicate removed to generate these instructions for Neon modes.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    
    Signed-off-by: Soumya AR <soum...@nvidia.com>
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-sve2.md
            (*aarch64_sve2_nbsl_unpred<mode>): New pattern to match unpredicated
            form.
            (*aarch64_sve2_bsl1n_unpred<mode>): Likewise.
            (*aarch64_sve2_bsl2n_unpred<mode>): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/bitsel.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-sve2.md            | 66 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/bitsel.c | 35 ++++++++++++++
 2 files changed, 101 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index 7e8a505f707c..5f41df7cf6ec 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1732,6 +1732,23 @@
   }
 )
 
+(define_insn "*aarch64_sve2_nbsl_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (and:VDQ_I
+             (xor:VDQ_I
+               (match_operand:VDQ_I 1 "register_operand")
+               (match_operand:VDQ_I 2 "register_operand"))
+             (match_operand:VDQ_I 3 "register_operand"))
+           (match_dup BSL_DUP))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] nbsl\t%Z0.d, 
%Z0.d, %Z<bsl_dup>.d, %Z3.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
%Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+  }
+)
+
 ;; Unpredicated bitwise select with inverted first operand.
 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
 (define_expand "@aarch64_sve2_bsl1n<mode>"
@@ -1777,6 +1794,23 @@
   }
 )
 
+(define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (xor:VDQ_I
+         (and:VDQ_I
+           (not:VDQ_I
+             (xor:VDQ_I
+               (match_operand:VDQ_I 1 "register_operand")
+               (match_operand:VDQ_I 2 "register_operand")))
+           (match_operand:VDQ_I 3 "register_operand"))
+         (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl1n\t%Z0.d, 
%Z0.d, %Z<bsl_dup>.d, %Z3.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
%Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+  }
+)
+
 ;; Unpredicated bitwise select with inverted second operand.
 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
 (define_expand "@aarch64_sve2_bsl2n<mode>"
@@ -1851,6 +1885,38 @@
   }
 )
 
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (ior:VDQ_I
+         (and:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))
+         (and:VDQ_I
+           (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
+           (not:VDQ_I (match_dup BSL_DUP)))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, 
%Z0.d, %Z3.d, %Z<bsl_dup>.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
%Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+  }
+)
+
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (ior:VDQ_I
+         (and:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))
+         (and:VDQ_I
+           (not:VDQ_I (match_dup BSL_DUP))
+           (not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, 
%Z0.d, %Z3.d, %Z<bsl_dup>.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, 
%Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c 
b/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c
new file mode 100644
index 000000000000..635bfefc17ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/bitsel.c
@@ -0,0 +1,35 @@
+/* { dg-options "-O2 -mcpu=neoverse-v2 --param 
aarch64-autovec-preference=asimd-only" } */
+
+#include <stdint.h>
+
+#define OPNBSL(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
+#define OPBSL1N(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
+#define OPBSL2N(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
+
+#define N 1024
+
+#define TYPE(N) int##N##_t
+
+#define TEST(SIZE, OP, SUFFIX)                                  \
+void __attribute__ ((noinline, noclone))                        \
+f_##SIZE##_##SUFFIX                                             \
+  (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b,              \
+   TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d)              \
+{                                                               \
+  for (int i = 0; i < N; i++)                                   \
+    a[i] = OP (b[i], c[i], d[i]);                               \
+}
+
+#define TEST_ALL(SIZE)                                          \
+  TEST(SIZE, OPNBSL, nbsl)                                      \
+  TEST(SIZE, OPBSL1N, bsl1n)                                    \
+  TEST(SIZE, OPBSL2N, bsl2n)
+
+TEST_ALL(8);
+TEST_ALL(16);
+TEST_ALL(32);
+TEST_ALL(64);
+
+/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, 
z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, 
z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, 
z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
\ No newline at end of file

[gcc r15-6098] aarch64: Extend SVE2 bit-select instructions for Neon modes.

Reply via email to