This implements the new vector optabs vec_<su>addh_narrow_hi_<mode>,
vec_<su>addh_narrow_lo_<mode>, vec_<su>addh_narrow<mode> adding support for
in-vectorizer recognition of addhn.

The existing codegen tests will now recognize the instructions through the 
optabs
rather than combine.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64-simd.md (vec_<su>addh_narrow_hi_<mode>,
        vec_<su>addh_narrow_lo_<mode>, vec_<su>addh_narrow<mode>): New.
        * config/aarch64/iterators.md (UNSPEC_SADDHN, UNSPEC_UADDHN): New.
        (su, ADDHN): Use them.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-addhn_1.c: New test.

---
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..905f7cfc23d6245f545094f12fc220e49dbf333e
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,61 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
   }
 )
 
+(define_expand "vec_<su>addh_narrow_hi_<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+       (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+                              (match_operand:VQN 2 "register_operand"))
+                    (match_operand:<VNARROWQ> 3 "register_operand")]
+                   ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                               GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_addhn2<mode>_insn_be (operands[0],
+                               operands[3], operands[1], operands[2], shft));
+    else
+      emit_insn (gen_aarch64_addhn2<mode>_insn_le (operands[0],
+                               operands[3], operands[1], operands[2], shft));
+    DONE;
+  }
+)
+
+(define_expand "vec_<su>addh_narrow_lo_<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+                              (match_operand:VQN 2 "register_operand"))]
+                   ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                               GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+                                            operands[2], shft));
+    DONE;
+  }
+)
+
+(define_expand "vec_<su>addh_narrow<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+       (unspec:VQN [(match_operand:VQN 1 "register_operand")
+                    (match_operand:VQN 2 "register_operand")
+                    (match_operand:VQN 3 "register_operand")
+                    (match_operand:VQN 4 "register_operand")]
+                   ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx low = gen_reg_rtx (<VNARROWQ>mode);
+    emit_insn (gen_vec_<su>addh_narrow_lo_<mode> (low, operands[1],
+                                                 operands[2]));
+    emit_insn (gen_vec_<su>addh_narrow_hi_<mode> (operands[0], operands[3],
+                                                 operands[4], low));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<su>abal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
c3771d9402baf1a09ad51e6149e65dcadf0adc20..f559c1508749766652e523640a6d7df9a5162dee
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,8 @@ (define_c_enum "unspec"
     UNSPEC_UHADD       ; Used in aarch64-simd.md.
     UNSPEC_SRHADD      ; Used in aarch64-simd.md.
     UNSPEC_URHADD      ; Used in aarch64-simd.md.
+    UNSPEC_SADDHN      ; Used in aarch64-simd.md.
+    UNSPEC_UADDHN      ; Used in aarch64-simd.md.
     UNSPEC_SHSUB       ; Used in aarch64-simd.md.
     UNSPEC_UHSUB       ; Used in aarch64-simd.md.
     UNSPEC_SQDMULH     ; Used in aarch64-simd.md.
@@ -3249,6 +3251,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
 
 (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
 
+(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN])
+
 (define_int_iterator BSL_DUP [1 2])
 
 (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4248,7 +4252,8 @@ (define_int_attr su [(UNSPEC_SADDV "s")
                     (UNSPEC_COND_SCVTF "s")
                     (UNSPEC_COND_UCVTF "u")
                     (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
-                    (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
+                    (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")
+                    (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")])
 
 (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
                      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
diff --git a/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c 
b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..60c1c6364cd46d6fd1e8368a90a307a5b8f08308
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c
@@ -0,0 +1,91 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target arm_neon_ok { target arm*-*-* } } */ 
+/* { dg-add-options arm_neon } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } 
while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                           
       \
+  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                             
       \
+  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                             
       \
+  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                             
       \
+  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                           
       \
+                                                                               
       \
+  void init_##A_TYPE##_##C_TYPE() {                                            
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++) {                                              
       \
+      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                              
       \
+      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                              
       \
+    }                                                                          
       \
+  }                                                                            
       \
+                                                                               
       \
+  void foo_##A_TYPE##_##C_TYPE() {                                             
       \
+    for (int i = 0; i < N; i++)                                                
       \
+      c_##A_TYPE##_##C_TYPE[i] =                                               
       \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                 
       \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                        
       \
+  }                                                                            
       \
+                                                                               
       \
+  void ref_##A_TYPE##_##C_TYPE##_compute() {                                   
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++)                                                
       \
+      ref_##A_TYPE##_##C_TYPE[i] =                                             
       \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                 
       \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                        
       \
+  }                                                                            
       \
+                                                                               
       \
+  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {                
       \
+    _Pragma ("GCC novector")                                                   
      \
+    for (int i = 0; i < N; i++) {                                              
       \
+      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {            
       \
+        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",               
       \
+               variant_name, i,                                                
       \
+               (long long)c_##A_TYPE##_##C_TYPE[i],                            
       \
+               (long long)ref_##A_TYPE##_##C_TYPE[i]);                         
       \
+        __builtin_abort ();                                                    
       \
+      }                                                                        
       \
+    }                                                                          
       \
+  }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE)                          \
+  do {                                                     \
+    init_##A_TYPE##_##C_TYPE();                            \
+    foo_##A_TYPE##_##C_TYPE();                             \
+    ref_##A_TYPE##_##C_TYPE##_compute();                   \
+    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
+  } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64 __)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+  check_vect ();
+
+  RUN_COMBO(int16_t, int8_t);
+  RUN_COMBO(uint16_t, uint8_t);
+  RUN_COMBO(int32_t, int16_t);
+  RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+  RUN_COMBO(int64_t, int32_t);
+  RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "addhn pattern recognized" 16 "vect" { 
target { aarch64-*-* } } } } */


-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..905f7cfc23d6245f545094f12fc220e49dbf333e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,61 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
   }
 )
 
+(define_expand "vec_<su>addh_narrow_hi_<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+	(unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+			       (match_operand:VQN 2 "register_operand"))
+		     (match_operand:<VNARROWQ> 3 "register_operand")]
+		    ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+				GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_addhn2<mode>_insn_be (operands[0],
+				operands[3], operands[1], operands[2], shft));
+    else
+      emit_insn (gen_aarch64_addhn2<mode>_insn_le (operands[0],
+				operands[3], operands[1], operands[2], shft));
+    DONE;
+  }
+)
+
+(define_expand "vec_<su>addh_narrow_lo_<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+			       (match_operand:VQN 2 "register_operand"))]
+		    ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx shft
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+				GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+    emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+					     operands[2], shft));
+    DONE;
+  }
+)
+
+(define_expand "vec_<su>addh_narrow<mode>"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+	(unspec:VQN [(match_operand:VQN 1 "register_operand")
+		     (match_operand:VQN 2 "register_operand")
+		     (match_operand:VQN 3 "register_operand")
+		     (match_operand:VQN 4 "register_operand")]
+		    ADDHN))]
+  "TARGET_SIMD"
+  {
+    rtx low = gen_reg_rtx (<VNARROWQ>mode);
+    emit_insn (gen_vec_<su>addh_narrow_lo_<mode> (low, operands[1],
+						  operands[2]));
+    emit_insn (gen_vec_<su>addh_narrow_hi_<mode> (operands[0], operands[3],
+						  operands[4], low));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<su>abal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 	(plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c3771d9402baf1a09ad51e6149e65dcadf0adc20..f559c1508749766652e523640a6d7df9a5162dee 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,8 @@ (define_c_enum "unspec"
     UNSPEC_UHADD	; Used in aarch64-simd.md.
     UNSPEC_SRHADD	; Used in aarch64-simd.md.
     UNSPEC_URHADD	; Used in aarch64-simd.md.
+    UNSPEC_SADDHN	; Used in aarch64-simd.md.
+    UNSPEC_UADDHN	; Used in aarch64-simd.md.
     UNSPEC_SHSUB	; Used in aarch64-simd.md.
     UNSPEC_UHSUB	; Used in aarch64-simd.md.
     UNSPEC_SQDMULH	; Used in aarch64-simd.md.
@@ -3249,6 +3251,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
 
 (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
 
+(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN])
+
 (define_int_iterator BSL_DUP [1 2])
 
 (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4248,7 +4252,8 @@ (define_int_attr su [(UNSPEC_SADDV "s")
 		     (UNSPEC_COND_SCVTF "s")
 		     (UNSPEC_COND_UCVTF "u")
 		     (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
-		     (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
+		     (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")
+		     (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")])
 
 (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
 		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
diff --git a/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..60c1c6364cd46d6fd1e8368a90a307a5b8f08308
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c
@@ -0,0 +1,91 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target arm_neon_ok { target arm*-*-* } } */ 
+/* { dg-add-options arm_neon } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                                  \
+  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                                    \
+  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                                  \
+                                                                                      \
+  void init_##A_TYPE##_##C_TYPE() {                                                   \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++) {                                                     \
+      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                                     \
+      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                                     \
+    }                                                                                 \
+  }                                                                                   \
+                                                                                      \
+  void foo_##A_TYPE##_##C_TYPE() {                                                    \
+    for (int i = 0; i < N; i++)                                                       \
+      c_##A_TYPE##_##C_TYPE[i] =                                                      \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void ref_##A_TYPE##_##C_TYPE##_compute() {                                          \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++)                                                       \
+      ref_##A_TYPE##_##C_TYPE[i] =                                                    \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {                       \
+    _Pragma ("GCC novector")							      \
+    for (int i = 0; i < N; i++) {                                                     \
+      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {                   \
+        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",                      \
+               variant_name, i,                                                       \
+               (long long)c_##A_TYPE##_##C_TYPE[i],                                   \
+               (long long)ref_##A_TYPE##_##C_TYPE[i]);                                \
+        __builtin_abort ();                                                           \
+      }                                                                               \
+    }                                                                                 \
+  }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE)                          \
+  do {                                                     \
+    init_##A_TYPE##_##C_TYPE();                            \
+    foo_##A_TYPE##_##C_TYPE();                             \
+    ref_##A_TYPE##_##C_TYPE##_compute();                   \
+    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
+  } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64 __)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+  check_vect ();
+
+  RUN_COMBO(int16_t, int8_t);
+  RUN_COMBO(uint16_t, uint8_t);
+  RUN_COMBO(int32_t, int16_t);
+  RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+  RUN_COMBO(int64_t, int32_t);
+  RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "addhn pattern recognized" 16 "vect" { target { aarch64-*-* } } } } */

Reply via email to