https://gcc.gnu.org/g:e5569a20cf3791553ac324269001a7c7c0e56242

commit r15-6099-ge5569a20cf3791553ac324269001a7c7c0e56242
Author: Soumya AR <soum...@nvidia.com>
Date:   Wed Dec 11 09:45:09 2024 +0530

    aarch64: Use SVE ASRD instruction with Neon modes.
    
    The ASRD instruction on SVE performs an arithmetic shift right by an 
immediate
    for divide.
    
    This patch enables the use of ASRD with Neon modes.
    
    For example:
    
    int in[N], out[N];
    
    void
    foo (void)
    {
      for (int i = 0; i < N; i++)
        out[i] = in[i] / 4;
    }
    
    compiles to:
    
            ldr     q31, [x1, x0]
            cmlt    v30.16b, v31.16b, #0
            and     z30.b, z30.b, 3
            add     v30.16b, v30.16b, v31.16b
            sshr    v30.16b, v30.16b, 2
            str     q30, [x0, x2]
            add     x0, x0, 16
            cmp     x0, 1024
    
    but can just be:
    
            ldp     q30, q31, [x0], 32
            asrd    z31.b, p7/m, z31.b, #2
            asrd    z30.b, p7/m, z30.b, #2
            stp     q30, q31, [x1], 32
            cmp     x0, x2
    
    This patch also adds the following overload:
            aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
    Depending on the data mode, the function returns a predicate with the
    appropriate bits set.
    
    The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
regression.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64.cc (aarch64_ptrue_reg): New overload.
            * config/aarch64/aarch64-protos.h (aarch64_ptrue_reg): Likewise.
            * config/aarch64/aarch64-sve.md: Extended sdiv_pow2<mode>3
            and *sdiv_pow2<mode>3 to support Neon modes.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/sve-asrd.c: New test.
    
    Co-authored-by: Richard Sandiford <richard.sandif...@arm.com>
    Signed-off-by: Soumya AR <soum...@nvidia.com>

Diff:
---
 gcc/config/aarch64/aarch64-protos.h             |  1 +
 gcc/config/aarch64/aarch64-sve.md               | 24 +++----
 gcc/config/aarch64/aarch64.cc                   | 16 +++++
 gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c | 86 +++++++++++++++++++++++++
 4 files changed, 115 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index db2baca58665..bd17486e9128 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1018,6 +1018,7 @@ void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
 rtx aarch64_ptrue_reg (machine_mode);
 rtx aarch64_ptrue_reg (machine_mode, unsigned int);
+rtx aarch64_ptrue_reg (machine_mode, machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 04326bca0e74..a72ca2a500d3 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -5009,34 +5009,34 @@
 
 ;; Unpredicated ASRD.
 (define_expand "sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+       (unspec:SVE_VDQ_I
          [(match_dup 3)
-          (unspec:SVE_I
-            [(match_operand:SVE_I 1 "register_operand")
+          (unspec:SVE_VDQ_I
+            [(match_operand:SVE_VDQ_I 1 "register_operand")
              (match_operand 2 "aarch64_simd_rshift_imm")]
             UNSPEC_ASRD)]
         UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
   }
 )
 
 ;; Predicated ASRD.
 (define_insn "*sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+       (unspec:SVE_VDQ_I
          [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_I
-            [(match_operand:SVE_I 2 "register_operand")
-             (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+          (unspec:SVE_VDQ_I
+            [(match_operand:SVE_VDQ_I 2 "register_operand")
+             (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
             UNSPEC_ASRD)]
          UNSPEC_PRED_X))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
-     [ w        , Upl , 0 ; *              ] asrd\t%0.<Vetype>, %1/m, 
%0.<Vetype>, #%3
-     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, 
%2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+     [ w        , Upl , 0 ; *              ] asrd\t%Z0.<Vetype>, %1/m, 
%Z0.<Vetype>, #%3
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, 
%Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
   }
 )
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3606dc174c2f..4d1b3cca0c42 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3778,6 +3778,22 @@ aarch64_ptrue_reg (machine_mode mode, unsigned int vl)
   return gen_lowpart (mode, reg);
 }
 
+/* Return a register of mode PRED_MODE for controlling data of mode DATA_MODE.
+
+   DATA_MODE can be a scalar, an Advanced SIMD vector, or an SVE vector.
+   If it's an N-byte scalar or an Advanced SIMD vector, the first N bits
+   of the predicate will be active and the rest will be inactive.
+   If DATA_MODE is an SVE mode, every bit of the predicate will be active.  */
+rtx
+aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
+{
+  if (aarch64_sve_mode_p (data_mode))
+    return aarch64_ptrue_reg (pred_mode);
+
+  auto size = GET_MODE_SIZE (data_mode).to_constant ();
+  return aarch64_ptrue_reg (pred_mode, size);
+}
+
 /* Return an all-false predicate register of mode MODE.  */
 
 rtx
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c 
b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
new file mode 100644
index 000000000000..341baae505c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define FUNC(TYPE, I)                                                          
\
+  TYPE M_##TYPE##_##I[I];                                                      
\
+  void asrd_##TYPE##_##I ()                                                    
\
+  {                                                                            
\
+    for (int i = 0; i < I; i++)                                                
\
+      {                                                                        
\
+       M_##TYPE##_##I[i] /= 4;                                                \
+      }                                                                        
\
+  }
+
+/*
+** asrd_int8_t_8:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
+**     ...
+*/
+FUNC(int8_t, 8);
+
+/*
+** asrd_int8_t_16:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.b, \1/m, z[0-9]+\.b, #2
+**     ...
+*/
+FUNC(int8_t, 16);
+
+/*
+** asrd_int16_t_4:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
+**     ...
+*/
+FUNC(int16_t, 4);
+
+/*
+** asrd_int16_t_8:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.h, \1/m, z[0-9]+\.h, #2
+**     ...
+*/
+FUNC(int16_t, 8);
+
+/*
+** asrd_int32_t_2:
+**     ...
+**     ptrue   (p[0-7]).b, vl8
+**     ...
+**     asrd    z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
+**     ...
+*/
+FUNC(int32_t, 2);
+
+/*
+** asrd_int32_t_4:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.s, \1/m, z[0-9]+\.s, #2
+**     ...
+*/
+FUNC(int32_t, 4);
+
+/*
+** asrd_int64_t_2:
+**     ...
+**     ptrue   (p[0-7]).b, vl16
+**     ...
+**     asrd    z[0-9]+\.d, \1/m, z[0-9]+\.d, #2
+**     ...
+*/
+FUNC(int64_t, 2);
+

Reply via email to