https://github.com/DanielKristofKiss created https://github.com/llvm/llvm-project/pull/107417
Both feature has target feature so can be checked if the usage is valid. >From 9dadc9bffc40e02dff9ef6a1d79968c8980892f4 Mon Sep 17 00:00:00 2001 From: Daniel Kiss <daniel.k...@arm.com> Date: Thu, 5 Sep 2024 16:42:43 +0200 Subject: [PATCH] [Clang][ARM] Make CRC and DSP intrinsics always available. Both feature has target feature so can be checked if the usage is valid. --- clang/lib/Headers/arm_acle.h | 39 +++++++++--------- clang/test/CodeGen/arm_acle.c | 76 +++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 24 deletions(-) diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 1518b0c4c8428f..b1dc90f84ad36f 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -264,28 +264,28 @@ __rbitl(unsigned long __t) { } /* 8.3 16-bit multiplications */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulbb(int32_t __a, int32_t __b) { return __builtin_arm_smulbb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulbt(int32_t __a, int32_t __b) { return __builtin_arm_smulbt(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smultb(int32_t __a, int32_t __b) { return __builtin_arm_smultb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smultt(int32_t __a, int32_t __b) { return __builtin_arm_smultt(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulwb(int32_t __a, int32_t __b) { return __builtin_arm_smulwb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulwt(int32_t __a, int32_t __b) { return __builtin_arm_smulwt(__a, __b); } @@ -304,46 +304,46 @@ __smulwt(int32_t __a, int32_t __b) { #endif /* 8.4.2 Saturating addition and subtraction intrinsics */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qadd(int32_t __t, int32_t __v) { return __builtin_arm_qadd(__t, __v); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qsub(int32_t __t, int32_t __v) { return __builtin_arm_qsub(__t, __v); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qdbl(int32_t __t) { return __builtin_arm_qadd(__t, __t); } #endif /* 8.4.3 Accumulating multiplications */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlabb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlabt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabt(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlatb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlatt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatt(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlawb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlawt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawt(__a, __b, __c); } @@ -621,8 +621,6 @@ __rintnf(float __a) { #endif /* 8.8 CRC32 intrinsics */ -#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ - (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32b(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32b(__a, __b); @@ -662,7 +660,6 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target __crc32cd(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32cd(__a, __b); } -#endif /* 8.6 Floating-point data-processing intrinsics */ /* Armv8.3-A Javascript conversion intrinsic */ diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c index 1c41f1b5d23f0c..74de8246d7de6e 100644 --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32 // RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -target-feature +crc -target-feature +dsp -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch32 // RUN: %clang_cc1 -ffreestanding -Wno-error=implicit-function-declaration -triple aarch64-none-elf -target-feature +neon -target-feature +crc -target-feature +crypto -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64 // RUN: %clang_cc1 -ffreestanding -triple aarch64-none-elf -target-feature +v8.3a -target-feature +crc -O0 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483 @@ -638,12 +639,15 @@ uint32_t test_usat(int32_t t) { #endif /* 9.4.2 Saturating addition and subtraction intrinsics */ -#ifdef __ARM_FEATURE_DSP +#ifdef __ARM_32BIT_STATE // AArch32-LABEL: @test_qadd( // AArch32-NEXT: entry: // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_qadd(int32_t a, int32_t b) { return __qadd(a, b); } @@ -653,6 +657,9 @@ int32_t test_qadd(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qsub(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_qsub(int32_t a, int32_t b) { return __qsub(a, b); } @@ -664,6 +671,9 @@ extern int32_t f(); // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.qadd(i32 [[CALL]], i32 [[CALL]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_qdbl() { return __qdbl(f()); } @@ -672,12 +682,15 @@ int32_t test_qdbl() { /* * 9.3 16-bit multiplications */ -#if __ARM_FEATURE_DSP +#ifdef __ARM_32BIT_STATE // AArch32-LABEL: @test_smulbb( // AArch32-NEXT: entry: // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbb(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smulbb(int32_t a, int32_t b) { return __smulbb(a, b); } @@ -687,6 +700,9 @@ int32_t test_smulbb(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulbt(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smulbt(int32_t a, int32_t b) { return __smulbt(a, b); } @@ -696,6 +712,9 @@ int32_t test_smulbt(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultb(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smultb(int32_t a, int32_t b) { return __smultb(a, b); } @@ -705,6 +724,9 @@ int32_t test_smultb(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smultt(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smultt(int32_t a, int32_t b) { return __smultt(a, b); } @@ -714,6 +736,9 @@ int32_t test_smultt(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwb(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smulwb(int32_t a, int32_t b) { return __smulwb(a, b); } @@ -723,18 +748,24 @@ int32_t test_smulwb(int32_t a, int32_t b) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smulwt(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smulwt(int32_t a, int32_t b) { return __smulwt(a, b); } #endif /* 9.4.3 Accumultating multiplications */ -#if __ARM_FEATURE_DSP +#ifdef __ARM_32BIT_STATE // AArch32-LABEL: @test_smlabb( // AArch32-NEXT: entry: // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlabb(int32_t a, int32_t b, int32_t c) { return __smlabb(a, b, c); } @@ -744,6 +775,9 @@ int32_t test_smlabb(int32_t a, int32_t b, int32_t c) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlabt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlabt(int32_t a, int32_t b, int32_t c) { return __smlabt(a, b, c); } @@ -753,6 +787,9 @@ int32_t test_smlabt(int32_t a, int32_t b, int32_t c) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlatb(int32_t a, int32_t b, int32_t c) { return __smlatb(a, b, c); } @@ -762,6 +799,9 @@ int32_t test_smlatb(int32_t a, int32_t b, int32_t c) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlatt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlatt(int32_t a, int32_t b, int32_t c) { return __smlatt(a, b, c); } @@ -771,6 +811,9 @@ int32_t test_smlatt(int32_t a, int32_t b, int32_t c) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawb(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlawb(int32_t a, int32_t b, int32_t c) { return __smlawb(a, b, c); } @@ -780,6 +823,9 @@ int32_t test_smlawb(int32_t a, int32_t b, int32_t c) { // AArch32-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.smlawt(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) // AArch32-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_DSP +__attribute__((target("dsp"))) +#endif int32_t test_smlawt(int32_t a, int32_t b, int32_t c) { return __smlawt(a, b, c); } @@ -1335,6 +1381,9 @@ int32_t test_smusdx(int16x2_t a, int16x2_t b) { // AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32b(i32 [[A:%.*]], i32 [[TMP0]]) // AArch64-NEXT: ret i32 [[TMP1]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32b(uint32_t a, uint8_t b) { return __crc32b(a, b); } @@ -1351,6 +1400,9 @@ uint32_t test_crc32b(uint32_t a, uint8_t b) { // AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32h(i32 [[A:%.*]], i32 [[TMP0]]) // AArch64-NEXT: ret i32 [[TMP1]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32h(uint32_t a, uint16_t b) { return __crc32h(a, b); } @@ -1365,6 +1417,9 @@ uint32_t test_crc32h(uint32_t a, uint16_t b) { // AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32w(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch64-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32w(uint32_t a, uint32_t b) { return __crc32w(a, b); } @@ -1383,6 +1438,9 @@ uint32_t test_crc32w(uint32_t a, uint32_t b) { // AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32x(i32 [[A:%.*]], i64 [[B:%.*]]) // AArch64-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32d(uint32_t a, uint64_t b) { return __crc32d(a, b); } @@ -1399,6 +1457,9 @@ uint32_t test_crc32d(uint32_t a, uint64_t b) { // AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32cb(i32 [[A:%.*]], i32 [[TMP0]]) // AArch64-NEXT: ret i32 [[TMP1]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32cb(uint32_t a, uint8_t b) { return __crc32cb(a, b); } @@ -1415,6 +1476,9 @@ uint32_t test_crc32cb(uint32_t a, uint8_t b) { // AArch64-NEXT: [[TMP1:%.*]] = call i32 @llvm.aarch64.crc32ch(i32 [[A:%.*]], i32 [[TMP0]]) // AArch64-NEXT: ret i32 [[TMP1]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32ch(uint32_t a, uint16_t b) { return __crc32ch(a, b); } @@ -1429,6 +1493,9 @@ uint32_t test_crc32ch(uint32_t a, uint16_t b) { // AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cw(i32 [[A:%.*]], i32 [[B:%.*]]) // AArch64-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32cw(uint32_t a, uint32_t b) { return __crc32cw(a, b); } @@ -1447,6 +1514,9 @@ uint32_t test_crc32cw(uint32_t a, uint32_t b) { // AArch64-NEXT: [[TMP0:%.*]] = call i32 @llvm.aarch64.crc32cx(i32 [[A:%.*]], i64 [[B:%.*]]) // AArch64-NEXT: ret i32 [[TMP0]] // +#ifndef __ARM_FEATURE_CRC32 +__attribute__((target("crc"))) +#endif uint32_t test_crc32cd(uint32_t a, uint64_t b) { return __crc32cd(a, b); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits