Author: Daniel Kiss Date: 2022-10-14T17:23:11+02:00 New Revision: 30b67c677c6baf0d6ef6c3051cf270133c43e4d2
URL: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2 DIFF: https://github.com/llvm/llvm-project/commit/30b67c677c6baf0d6ef6c3051cf270133c43e4d2.diff LOG: [AArch64] Make ACLE intrinsics always available part1 A given arch feature might enabled by a pragma or a function attribute so in this cases would be nice to use intrinsics. Today GCC offers the intrinsics without the march flag[1]. PR[2] for ACLE to clarify the intention and remove the need for -march flag for a given intrinsics. This is going to be more useful when D127812 lands. [1] https://godbolt.org/z/bxcMhav3z [2] https://github.com/ARM-software/acle/pull/214 Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D133359 Added: Modified: clang/include/clang/Basic/BuiltinsAArch64.def clang/lib/Headers/arm_acle.h clang/test/CodeGen/arm_acle.c clang/test/CodeGen/builtins-arm64.c Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 036df7435bfb..e6e375bc2b83 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -74,7 +74,7 @@ BUILTIN(__builtin_arm_dmb, "vUi", "nc") BUILTIN(__builtin_arm_dsb, "vUi", "nc") BUILTIN(__builtin_arm_isb, "vUi", "nc") -BUILTIN(__builtin_arm_jcvt, "Zid", "nc") +TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a") // Prefetch BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc") @@ -107,24 +107,24 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n") BUILTIN(__builtin_arm_ttest, "WUi", "nc") // Armv8.5-A FP rounding intrinsics -BUILTIN(__builtin_arm_rint32zf, "ff", "") -BUILTIN(__builtin_arm_rint32z, "dd", "") -BUILTIN(__builtin_arm_rint64zf, "ff", "") -BUILTIN(__builtin_arm_rint64z, "dd", "") -BUILTIN(__builtin_arm_rint32xf, "ff", "") -BUILTIN(__builtin_arm_rint32x, "dd", "") -BUILTIN(__builtin_arm_rint64xf, "ff", "") -BUILTIN(__builtin_arm_rint64x, "dd", "") +TARGET_BUILTIN(__builtin_arm_rint32zf, "ff", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint32z, "dd", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint64zf, "ff", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint64z, "dd", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint32xf, "ff", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint32x, "dd", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint64xf, "ff", "", "v8.5a") +TARGET_BUILTIN(__builtin_arm_rint64x, "dd", "", "v8.5a") // Armv8.5-A Random number generation intrinsics -BUILTIN(__builtin_arm_rndr, "iWUi*", "n") -BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n") +TARGET_BUILTIN(__builtin_arm_rndr, "iWUi*", "n", "rand") +TARGET_BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n", "rand") // Armv8.7-A load/store 64-byte intrinsics -BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n") -BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n") -BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n") -BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n") +TARGET_BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n", "ls64") +TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64") +TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64") +TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64") TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", "intrin.h", ALL_MS_LANGUAGES, "") diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index ed3fc1de1fd4..d73b6bf82d69 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -589,122 +589,123 @@ __smusdx(int16x2_t __a, int16x2_t __b) { #endif /* 9.7 CRC32 intrinsics */ -#if defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32 -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ + (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32b(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32b(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32h(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32h(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32w(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32w(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32d(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32d(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cb(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32cb(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32ch(uint32_t __a, uint16_t __b) { return __builtin_arm_crc32ch(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cw(uint32_t __a, uint32_t __b) { return __builtin_arm_crc32cw(__a, __b); } -static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32cd(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32cd(__a, __b); } #endif /* Armv8.3-A Javascript conversion intrinsic */ -#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT) -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("v8.3a"))) __jcvt(double __a) { return __builtin_arm_jcvt(__a); } #endif /* Armv8.5-A FP rounding intrinsics */ -#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_FRINT) -static __inline__ float __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32zf(float __a) { return __builtin_arm_rint32zf(__a); } -static __inline__ double __attribute__((__always_inline__, __nodebug__)) +static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32z(double __a) { return __builtin_arm_rint32z(__a); } -static __inline__ float __attribute__((__always_inline__, __nodebug__)) +static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64zf(float __a) { return __builtin_arm_rint64zf(__a); } -static __inline__ double __attribute__((__always_inline__, __nodebug__)) +static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64z(double __a) { return __builtin_arm_rint64z(__a); } -static __inline__ float __attribute__((__always_inline__, __nodebug__)) +static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32xf(float __a) { return __builtin_arm_rint32xf(__a); } -static __inline__ double __attribute__((__always_inline__, __nodebug__)) +static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint32x(double __a) { return __builtin_arm_rint32x(__a); } -static __inline__ float __attribute__((__always_inline__, __nodebug__)) +static __inline__ float __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64xf(float __a) { return __builtin_arm_rint64xf(__a); } -static __inline__ double __attribute__((__always_inline__, __nodebug__)) +static __inline__ double __attribute__((__always_inline__, __nodebug__, target("v8.5a"))) __rint64x(double __a) { return __builtin_arm_rint64x(__a); } #endif /* Armv8.7-A load/store 64-byte intrinsics */ -#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64) +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE typedef struct { uint64_t val[8]; } data512_t; -static __inline__ data512_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ data512_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_ld64b(const void *__addr) { - data512_t __value; - __builtin_arm_ld64b(__addr, __value.val); - return __value; + data512_t __value; + __builtin_arm_ld64b(__addr, __value.val); + return __value; } -static __inline__ void __attribute__((__always_inline__, __nodebug__)) +static __inline__ void __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64b(void *__addr, data512_t __value) { - __builtin_arm_st64b(__addr, __value.val); + __builtin_arm_st64b(__addr, __value.val); } -static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv(void *__addr, data512_t __value) { - return __builtin_arm_st64bv(__addr, __value.val); + return __builtin_arm_st64bv(__addr, __value.val); } -static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("ls64"))) __arm_st64bv0(void *__addr, data512_t __value) { - return __builtin_arm_st64bv0(__addr, __value.val); + return __builtin_arm_st64bv0(__addr, __value.val); } #endif @@ -759,12 +760,12 @@ __arm_st64bv0(void *__addr, data512_t __value) { #endif /* __ARM_FEATURE_TME */ /* Armv8.5-A Random number generation intrinsics */ -#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndr(uint64_t *__p) { return __builtin_arm_rndr(__p); } -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __attribute__((__always_inline__, __nodebug__, target("rand"))) __rndrrs(uint64_t *__p) { return __builtin_arm_rndrrs(__p); } diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 60c4d9a5855b..6003dd2c7ba2 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -124,7 +124,7 @@ void test_sevl(void) { __sevl(); } -#if __ARM_32BIT_STATE +#ifdef __ARM_32BIT_STATE // AArch32-LABEL: @test_dbg( // AArch32-NEXT: entry: // AArch32-NEXT: call void @llvm.arm.dbg(i32 0) @@ -1646,7 +1646,7 @@ void test_wsrf64(double v) { #endif } -#ifdef __ARM_64BIT_STATE +#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_JCVT) // AArch6483-LABEL: @test_jcvt( // AArch6483-NEXT: entry: // AArch6483-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.fjcvtzs(double [[V:%.*]]) @@ -1658,7 +1658,7 @@ int32_t test_jcvt(double v) { #endif -#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) +#if defined(__ARM_64BIT_STATE) && defined(__ARM_FEATURE_RNG) // AArch6485-LABEL: @test_rndr( // AArch6485-NEXT: entry: diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c index daa0b08a9e57..4619b6ba610f 100644 --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -59,6 +59,7 @@ void prefetch(void) { // CHECK: call {{.*}} @llvm.prefetch.p0(ptr null, i32 0, i32 3, i32 0) } +__attribute__((target("v8.5a"))) int32_t jcvt(double v) { //CHECK-LABEL: @jcvt( //CHECK: call i32 @llvm.aarch64.fjcvtzs @@ -133,6 +134,7 @@ unsigned int clsll(uint64_t v) { // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 // CHECK-NEXT: ret i32 [[TMP3]] // +__attribute__((target("rand"))) int rndr(uint64_t *__addr) { return __builtin_arm_rndr(__addr); } @@ -146,6 +148,7 @@ int rndr(uint64_t *__addr) { // CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 // CHECK-NEXT: ret i32 [[TMP3]] // +__attribute__((target("rand"))) int rndrrs(uint64_t *__addr) { return __builtin_arm_rndrrs(__addr); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits