Hi,
After the recent update from Tamar, I noticed a few discrepancies
between ARM and AArch64 regarding a few poly64 intrinsics.
This patch:
- adds vtst_p64 and vtstq_p64 to AArch64's arm_neon.h
- adds vgetq_lane_p64, vset_lane_p64 and vsetq_lane_p64 to ARM's arm_neon.h
( vget_lane_p64 was already there)
- adds the corresponding tests, and moves the vget_lane_p64 ones out
of the #ifdef __aarch64__ zone.
Cross-tested on arm* and aarch64* targets.
OK?
Christophe
gcc/ChangeLog:
2016-12-12 Christophe Lyon <[email protected]>
* config/aarch64/arm_neon.h (vtst_p64): New.
(vtstq_p64): New.
* config/arm/arm_neon.h (vgetq_lane_p64): New.
(vset_lane_p64): New.
(vsetq_lane_p64): New.
gcc/testsuite/ChangeLog:
2016-12-12 Christophe Lyon <[email protected]>
* gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
(vget_lane_expected, vset_lane_expected, vtst_expected_poly64x1):
New.
(vmov_n_expected0, vmov_n_expected1, vmov_n_expected2)
(expected_vld_st2_0, expected_vld_st2_1, expected_vld_st3_0)
(expected_vld_st3_1, expected_vld_st3_2, expected_vld_st4_0)
(expected_vld_st4_1, expected_vld_st4_2, expected_vld_st4_3)
(vtst_expected_poly64x2): Move to aarch64-only section.
(vget_lane_p64, vgetq_lane_p64, vset_lane_p64, vsetq_lane_p64)
(vtst_p64, vtstq_p64): New tests.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index b846644..74d163e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -10882,6 +10882,13 @@ vtst_p16 (poly16x4_t a, poly16x4_t b)
return result;
}
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtst_p64 (poly64x1_t a, poly64x1_t b)
+{
+ return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0));
+}
+
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vtstq_p8 (poly8x16_t a, poly8x16_t b)
@@ -10906,6 +10913,18 @@ vtstq_p16 (poly16x8_t a, poly16x8_t b)
return result;
}
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtstq_p64 (poly64x2_t a, poly64x2_t b)
+{
+ uint64x2_t result;
+ __asm__ ("cmtst %0.2d, %1.2d, %2.2d"
+ : "=w"(result)
+ : "w"(a), "w"(b)
+ : /* No clobbers */);
+ return result;
+}
+
/* End of temporary inline asm implementations. */
/* Start of temporary inline asm for vldn, vstn and friends. */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index ab29da7..d199b41 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -5497,6 +5497,15 @@ vgetq_lane_s64 (int64x2_t __a, const int __b)
return (int64_t)__builtin_neon_vget_lanev2di (__a, __b);
}
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64_t __attribute__ ((__always_inline__))
+vgetq_lane_p64 (poly64x2_t __a, const int __b)
+{
+ return (poly64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b);
+}
+
+#pragma GCC pop_options
__extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vgetq_lane_u64 (uint64x2_t __a, const int __b)
{
@@ -5581,6 +5590,15 @@ vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int
__c)
return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a,
(int64x1_t) __b, __c);
}
+#pragma GCC push_options
+#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
+vset_lane_p64 (poly64_t __a, poly64x1_t __b, const int __c)
+{
+ return (poly64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a,
(int64x1_t) __b, __c);
+}
+
+#pragma GCC pop_options
__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c)
{
@@ -5661,6 +5679,12 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int
__c)
#pragma GCC push_options
#pragma GCC target ("fpu=crypto-neon-fp-armv8")
+__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__))
+vsetq_lane_p64 (poly64_t __a, poly64x2_t __b, const int __c)
+{
+ return (poly64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a,
(int64x2_t) __b, __c);
+}
+
__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__))
vcreate_p64 (uint64_t __a)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
index 8907b38..ba8fbeb 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c
@@ -39,17 +39,6 @@ VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = {
0xfffffffffffffff2 };
VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
0xfffffffffffffff2 };
-/* Expected results: vmov_n. */
-VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
- 0xfffffffffffffff0 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
- 0xfffffffffffffff1 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
-VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
- 0xfffffffffffffff2 };
-
/* Expected results: vext. */
VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 };
@@ -124,6 +113,29 @@ VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = {
0xfffffffffffffff0 };
VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0,
0x3333333333333333 };
+/* Expected results: vget_lane. */
+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+
+/* Expected results: vset_lane. */
+VECT_VAR_DECL(vset_lane_expected,poly,64,1) [] = { 0x88 };
+VECT_VAR_DECL(vset_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x11 };
+
+/* Expected results: vtst. */
+VECT_VAR_DECL(vtst_expected,uint,64,1) [] = { 0x0 };
+
+#ifdef __aarch64__
+/* Expected results: vmov_n. */
+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0,
+ 0xfffffffffffffff0 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1,
+ 0xfffffffffffffff1 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 };
+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2,
+ 0xfffffffffffffff2 };
+
/* Expected results: vldX_lane. */
VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0,
@@ -153,9 +165,9 @@ VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = {
0xfffffffffffffff3 };
VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa,
0xaaaaaaaaaaaaaaaa };
-/* Expected results: vget_lane. */
-VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0;
-VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0;
+/* Expected results: vtst. */
+VECT_VAR_DECL(vtst_expected,uint,64,2) [] = { 0x0, 0xffffffffffffffff };
+#endif
int main (void)
{
@@ -727,6 +739,107 @@ int main (void)
CHECK(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, "");
CHECK(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, "");
+ /* vget_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VGET_LANE/VGETQ_LANE"
+
+#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
+ VECT_VAR(vget_lane_vector, T1, W, N) =
vget##Q##_lane_##T2##W(VECT_VAR(vget_lane_vector1, T1, W, N), L); \
+ if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1,
W, N)) { \
+ fprintf(stderr, \
+ "ERROR in %s (%s line %d in result '%s') at type %s " \
+ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
+ TEST_MSG, __FILE__, __LINE__, \
+ STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
+ STR(VECT_NAME(T1, W, N)), \
+ VECT_VAR(vget_lane_vector, T1, W, N), \
+ VECT_VAR(vget_lane_expected, T1, W, N)); \
+ abort (); \
+ }
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vget_lane_vector1, poly, 64, 1);
+ DECL_VARIABLE(vget_lane_vector1, poly, 64, 2);
+
+ VLOAD(vget_lane_vector1, buffer, , poly, p, 64, 1);
+ VLOAD(vget_lane_vector1, buffer, q, poly, p, 64, 2);
+
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
+
+ TEST_VGET_LANE( , poly, p, 64, 1, 0);
+ TEST_VGET_LANE(q, poly, p, 64, 2, 0);
+
+
+ /* vset_lane_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VSET_LANE/VSETQ_LANE"
+
+#define TEST_VSET_LANE(Q, T1, T2, W, N, V, L) \
+ VECT_VAR(vset_lane_vector, T1, W, N) =
\
+ vset##Q##_lane_##T2##W(V, \
+ VECT_VAR(vset_lane_vector, T1, W, N),
\
+ L); \
+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vset_lane_vector, T1,
W, N))
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vset_lane_vector, poly, 64, 1);
+ DECL_VARIABLE(vset_lane_vector, poly, 64, 2);
+
+ CLEAN(result, uint, 64, 1);
+ CLEAN(result, uint, 64, 2);
+
+ VLOAD(vset_lane_vector, buffer, , poly, p, 64, 1);
+ VLOAD(vset_lane_vector, buffer, q, poly, p, 64, 2);
+
+ /* Choose value and lane arbitrarily. */
+ TEST_VSET_LANE(, poly, p, 64, 1, 0x88, 0);
+ TEST_VSET_LANE(q, poly, p, 64, 2, 0x11, 1);
+
+ CHECK(TEST_MSG, poly, 64, 1, PRIx64, vset_lane_expected, "");
+ CHECK(TEST_MSG, poly, 64, 2, PRIx64, vset_lane_expected, "");
+
+
+ /* vtst_p64 tests. */
+#undef TEST_MSG
+#define TEST_MSG "VTST"
+
+#define TEST_VTST1(INSN, Q, T1, T2, W, N) \
+ VECT_VAR(vtst_vector_res, uint, W, N) = \
+ INSN##Q##_##T2##W(VECT_VAR(vtst_vector, T1, W, N), \
+ VECT_VAR(vtst_vector2, T1, W, N)); \
+ vst1##Q##_u##W(VECT_VAR(result, uint, W, N), \
+ VECT_VAR(vtst_vector_res, uint, W, N))
+
+#define TEST_VTST(INSN, Q, T1, T2, W, N) \
+ TEST_VTST1(INSN, Q, T1, T2, W, N) \
+
+ /* Initialize input values. */
+ DECL_VARIABLE(vtst_vector, poly, 64, 1);
+ DECL_VARIABLE(vtst_vector2, poly, 64, 1);
+ DECL_VARIABLE(vtst_vector_res, uint, 64, 1);
+
+ CLEAN(result, uint, 64, 1);
+
+ VLOAD(vtst_vector, buffer, , poly, p, 64, 1);
+ VDUP(vtst_vector2, , poly, p, 64, 1, 5);
+
+ TEST_VTST(vtst, , poly, p, 64, 1);
+
+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, vtst_expected, "");
+
+ /* vtstq_p64 is supported by aarch64 only. */
+#ifdef __aarch64__
+ DECL_VARIABLE(vtst_vector, poly, 64, 2);
+ DECL_VARIABLE(vtst_vector2, poly, 64, 2);
+ DECL_VARIABLE(vtst_vector_res, uint, 64, 2);
+ CLEAN(result, uint, 64, 2);
+ VLOAD(vtst_vector, buffer, q, poly, p, 64, 2);
+ VDUP(vtst_vector2, q, poly, p, 64, 2, 5);
+ TEST_VTST(vtst, q, poly, p, 64, 2);
+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, vtst_expected, "");
+#endif
+
#ifdef __aarch64__
/* vmov_n_p64 tests. */
@@ -767,37 +880,6 @@ int main (void)
}
}
- /* vget_lane_p64 tests. */
-#undef TEST_MSG
-#define TEST_MSG "VGET_LANE/VGETQ_LANE"
-
-#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \
- VECT_VAR(vget_lane_vector, T1, W, N) =
vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \
- if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1,
W, N)) { \
- fprintf(stderr, \
- "ERROR in %s (%s line %d in result '%s') at type %s " \
- "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \
- TEST_MSG, __FILE__, __LINE__, \
- STR(VECT_VAR(vget_lane_expected, T1, W, N)), \
- STR(VECT_NAME(T1, W, N)), \
- VECT_VAR(vget_lane_vector, T1, W, N), \
- VECT_VAR(vget_lane_expected, T1, W, N)); \
- abort (); \
- }
-
- /* Initialize input values. */
- DECL_VARIABLE(vector, poly, 64, 1);
- DECL_VARIABLE(vector, poly, 64, 2);
-
- VLOAD(vector, buffer, , poly, p, 64, 1);
- VLOAD(vector, buffer, q, poly, p, 64, 2);
-
- VECT_VAR_DECL(vget_lane_vector, poly, 64, 1);
- VECT_VAR_DECL(vget_lane_vector, poly, 64, 2);
-
- TEST_VGET_LANE( , poly, p, 64, 1, 0);
- TEST_VGET_LANE(q, poly, p, 64, 2, 0);
-
/* vldx_lane_p64 tests. */
#undef TEST_MSG
#define TEST_MSG "VLDX_LANE/VLDXQ_LANE"