This adds function attributes for avx2, sse2, etc. specialized functions. These attributes are supported by both clang and gcc and are necessary in order to build the code with clang. The existing gcc specific pragmas were left in place due to a comment in utils/bufferiszero.c which mentions the pragmas are needed before headers to work around a bug in gcc <= 4.8.
Signed-off-by: Tom Stellard <[email protected]> --- meson.build | 2 +- util/bufferiszero.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/meson.build b/meson.build index cf3e517e56..45573c0e44 100644 --- a/meson.build +++ b/meson.build @@ -2323,7 +2323,7 @@ config_host_data.set('CONFIG_AVX2_OPT', get_option('avx2') \ #pragma GCC target("avx2") #include <cpuid.h> #include <immintrin.h> - static int bar(void *a) { + __attribute__((target("avx2"))) static int bar(void *a) { __m256i x = *(__m256i *)a; return _mm256_testz_si256(x, x); } diff --git a/util/bufferiszero.c b/util/bufferiszero.c index ec3cd4ca15..e4f5628643 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -75,7 +75,7 @@ buffer_zero_int(const void *buf, size_t len) /* Note that each of these vectorized functions require len >= 64. */ -static bool +__attribute__((target("sse2"))) static bool buffer_zero_sse2(const void *buf, size_t len) { __m128i t = _mm_loadu_si128(buf); @@ -117,7 +117,7 @@ buffer_zero_sse2(const void *buf, size_t len) #pragma GCC target("sse4") #include <smmintrin.h> -static bool +__attribute__((target("sse4"))) static bool buffer_zero_sse4(const void *buf, size_t len) { __m128i t = _mm_loadu_si128(buf); @@ -150,7 +150,7 @@ buffer_zero_sse4(const void *buf, size_t len) #pragma GCC target("avx2") #include <immintrin.h> -static bool +__attribute__((target("avx2"))) static bool buffer_zero_avx2(const void *buf, size_t len) { /* Begin with an unaligned head of 32 bytes. */ @@ -184,7 +184,7 @@ buffer_zero_avx2(const void *buf, size_t len) #pragma GCC target("avx512f") #include <immintrin.h> -static bool +__attribute__((target("avx512f"))) static bool buffer_zero_avx512(const void *buf, size_t len) { /* Begin with an unaligned head of 64 bytes. */ -- 2.35.3
