This patch remove inline assembly and reimplement all mvn/mvnq vector integer intrinsics through the standard "one_cmpl<mode>2" pattern was introduced later after the initial implementation of those intrinsics. that's why inline assembly was used historically.
OK for trunk? no regression on the exist advsimd-intrinsics/vmvn.c. 2016-05-16 Jiong Wang<jiong.w...@arm.com> gcc/ * config/aarch64/arm_neon.h (vmvn_s8): Reimplement using C operator. Remove inline assembly. (vmvn_s16): Likewise. (vmvn_s32): Likewise. (vmvn_u8): Likewise. (vmvn_u16): Likewise. (vmvn_u32): Likewise. (vmvnq_s8): Likewise. (vmvnq_s16): Likewise. (vmvnq_s32): Likewise. (vmvnq_u8): Likewise. (vmvnq_u16): Likewise. (vmvnq_u32): Likewise. (vmvn_p8): Likewise. (vmvnq_p16): Likewise.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 432a1fad9a6df6fef844896df5e8ad29cc31f548..ae4c429a87822a8807f2d2ec054d3194b39ef6ac 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -8093,161 +8093,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vmvn_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vmvn_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vmvn_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vmvn_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vmvn_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vmvn_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vmvn_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("mvn %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vmvnq_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vmvnq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vmvnq_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vmvnq_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vmvnq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vmvnq_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vmvnq_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("mvn %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vpadal_s8 (int16x4_t a, int8x8_t b) { @@ -18622,6 +18467,92 @@ vmulq_n_u32 (uint32x4_t __a, uint32_t __b) return __a * __b; } +/* vmvn */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmvn_p8 (poly8x8_t __a) +{ + return (poly8x8_t) ~((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vmvn_s8 (int8x8_t __a) +{ + return ~__a; +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vmvn_s16 (int16x4_t __a) +{ + return ~__a; +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vmvn_s32 (int32x2_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vmvn_u8 (uint8x8_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vmvn_u16 (uint16x4_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vmvn_u32 (uint32x2_t __a) +{ + return ~__a; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vmvnq_p8 (poly8x16_t __a) +{ + return (poly8x16_t) ~((int8x16_t) __a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vmvnq_s8 (int8x16_t __a) +{ + return ~__a; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vmvnq_s16 (int16x8_t __a) +{ + return ~__a; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vmvnq_s32 (int32x4_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vmvnq_u8 (uint8x16_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vmvnq_u16 (uint16x8_t __a) +{ + return ~__a; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vmvnq_u32 (uint32x4_t __a) +{ + return ~__a; +} + /* vneg */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))