olista01 created this revision. olista01 added a subscriber: cfe-commits. olista01 set the repository for this revision to rL LLVM. Herald added subscribers: rengolin, aemerson.
These two intrinsics are defined in arm_acle.h. __rev16l needs to rotate by 16 bits, bit it was actually rotating by 2 bits. For AArch64, where long is 64 bits, this would still be wrong. __rev16ll was incorrect, it reversed the bytes in each 32-bit word, rather than each 16-bit halfword. The correct implementation is to apply __rev16 to the top and bottom words of the 64-bit value. For AArch32 targets, these get compiled down to the hardware rev16 instruction at -O1 and above. For AArch64 targets, the 64-bit ones get compiled to two 32-bit rev16 instructions, because there is not currently a pattern for the 64-bit rev16 instruction. Repository: rL LLVM http://reviews.llvm.org/D14609 Files: lib/Headers/arm_acle.h test/CodeGen/arm_acle.c Index: test/CodeGen/arm_acle.c =================================================================== --- test/CodeGen/arm_acle.c +++ test/CodeGen/arm_acle.c @@ -186,27 +186,53 @@ // ARM-LABEL: test_rev16 // ARM: llvm.bswap -// ARM: lshr -// ARM: shl +// ARM: lshr {{.*}}, 16 +// ARM: shl {{.*}}, 16 // ARM: or uint32_t test_rev16(uint32_t t) { return __rev16(t); } // ARM-LABEL: test_rev16l -// ARM: llvm.bswap -// ARM: lshr -// ARM: shl -// ARM: or +// AArch32: llvm.bswap +// AArch32: lshr {{.*}}, 16 +// AArch32: shl {{.*}}, 16 +// AArch32: or +// AArch64: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32 +// AArch64: [[T2:%.*]] = trunc i64 [[T1]] to i32 +// AArch64: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]]) +// AArch64: [[T4:%.*]] = lshr i32 [[T3]], 16 +// AArch64: [[T5:%.*]] = shl i32 [[T3]], 16 +// AArch64: [[T6:%.*]] = or i32 [[T5]], [[T4]] +// AArch64: [[T7:%.*]] = zext i32 [[T6]] to i64 +// AArch64: [[T8:%.*]] = shl nuw i64 [[T7]], 32 +// AArch64: [[T9:%.*]] = trunc i64 [[IN]] to i32 +// AArch64: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]]) +// AArch64: [[T11:%.*]] = lshr i32 [[T10]], 16 +// AArch64: [[T12:%.*]] = shl i32 [[T10]], 16 +// AArch64: [[T13:%.*]] = or i32 [[T12]], [[T11]] +// AArch64: [[T14:%.*]] = zext i32 [[T13]] to i64 +// AArch64: [[T15:%.*]] = or i64 [[T8]], [[T14]] long test_rev16l(long t) { return __rev16l(t); } // ARM-LABEL: test_rev16ll -// ARM: llvm.bswap -// ARM: lshr -// ARM: shl -// ARM: or +// ARM: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32 +// ARM: [[T2:%.*]] = trunc i64 [[T1]] to i32 +// ARM: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]]) +// ARM: [[T4:%.*]] = lshr i32 [[T3]], 16 +// ARM: [[T5:%.*]] = shl i32 [[T3]], 16 +// ARM: [[T6:%.*]] = or i32 [[T5]], [[T4]] +// ARM: [[T7:%.*]] = zext i32 [[T6]] to i64 +// ARM: [[T8:%.*]] = shl nuw i64 [[T7]], 32 +// ARM: [[T9:%.*]] = trunc i64 [[IN]] to i32 +// ARM: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]]) +// ARM: [[T11:%.*]] = lshr i32 [[T10]], 16 +// ARM: [[T12:%.*]] = shl i32 [[T10]], 16 +// ARM: [[T13:%.*]] = or i32 [[T12]], [[T11]] +// ARM: [[T14:%.*]] = zext i32 [[T13]] to i64 +// ARM: [[T15:%.*]] = or i64 [[T8]], [[T14]] uint64_t test_rev16ll(uint64_t t) { return __rev16ll(t); } Index: lib/Headers/arm_acle.h =================================================================== --- lib/Headers/arm_acle.h +++ lib/Headers/arm_acle.h @@ -175,14 +175,18 @@ return __ror(__rev(t), 16); } -static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) - __rev16l(unsigned long t) { - return __rorl(__revl(t), sizeof(long) / 2); -} - static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rev16ll(uint64_t t) { - return __rorll(__revll(t), 32); + return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __rev16l(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __rev16(t); +#else + return __rev16ll(t); +#endif } /* REVSH */
Index: test/CodeGen/arm_acle.c =================================================================== --- test/CodeGen/arm_acle.c +++ test/CodeGen/arm_acle.c @@ -186,27 +186,53 @@ // ARM-LABEL: test_rev16 // ARM: llvm.bswap -// ARM: lshr -// ARM: shl +// ARM: lshr {{.*}}, 16 +// ARM: shl {{.*}}, 16 // ARM: or uint32_t test_rev16(uint32_t t) { return __rev16(t); } // ARM-LABEL: test_rev16l -// ARM: llvm.bswap -// ARM: lshr -// ARM: shl -// ARM: or +// AArch32: llvm.bswap +// AArch32: lshr {{.*}}, 16 +// AArch32: shl {{.*}}, 16 +// AArch32: or +// AArch64: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32 +// AArch64: [[T2:%.*]] = trunc i64 [[T1]] to i32 +// AArch64: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]]) +// AArch64: [[T4:%.*]] = lshr i32 [[T3]], 16 +// AArch64: [[T5:%.*]] = shl i32 [[T3]], 16 +// AArch64: [[T6:%.*]] = or i32 [[T5]], [[T4]] +// AArch64: [[T7:%.*]] = zext i32 [[T6]] to i64 +// AArch64: [[T8:%.*]] = shl nuw i64 [[T7]], 32 +// AArch64: [[T9:%.*]] = trunc i64 [[IN]] to i32 +// AArch64: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]]) +// AArch64: [[T11:%.*]] = lshr i32 [[T10]], 16 +// AArch64: [[T12:%.*]] = shl i32 [[T10]], 16 +// AArch64: [[T13:%.*]] = or i32 [[T12]], [[T11]] +// AArch64: [[T14:%.*]] = zext i32 [[T13]] to i64 +// AArch64: [[T15:%.*]] = or i64 [[T8]], [[T14]] long test_rev16l(long t) { return __rev16l(t); } // ARM-LABEL: test_rev16ll -// ARM: llvm.bswap -// ARM: lshr -// ARM: shl -// ARM: or +// ARM: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32 +// ARM: [[T2:%.*]] = trunc i64 [[T1]] to i32 +// ARM: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]]) +// ARM: [[T4:%.*]] = lshr i32 [[T3]], 16 +// ARM: [[T5:%.*]] = shl i32 [[T3]], 16 +// ARM: [[T6:%.*]] = or i32 [[T5]], [[T4]] +// ARM: [[T7:%.*]] = zext i32 [[T6]] to i64 +// ARM: [[T8:%.*]] = shl nuw i64 [[T7]], 32 +// ARM: [[T9:%.*]] = trunc i64 [[IN]] to i32 +// ARM: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]]) +// ARM: [[T11:%.*]] = lshr i32 [[T10]], 16 +// ARM: [[T12:%.*]] = shl i32 [[T10]], 16 +// ARM: [[T13:%.*]] = or i32 [[T12]], [[T11]] +// ARM: [[T14:%.*]] = zext i32 [[T13]] to i64 +// ARM: [[T15:%.*]] = or i64 [[T8]], [[T14]] uint64_t test_rev16ll(uint64_t t) { return __rev16ll(t); } Index: lib/Headers/arm_acle.h =================================================================== --- lib/Headers/arm_acle.h +++ lib/Headers/arm_acle.h @@ -175,14 +175,18 @@ return __ror(__rev(t), 16); } -static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) - __rev16l(unsigned long t) { - return __rorl(__revl(t), sizeof(long) / 2); -} - static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) __rev16ll(uint64_t t) { - return __rorll(__revll(t), 32); + return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t); +} + +static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__)) + __rev16l(unsigned long t) { +#if __SIZEOF_LONG__ == 4 + return __rev16(t); +#else + return __rev16ll(t); +#endif } /* REVSH */
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits