On 09/12/2015 15:57, Richard Henderson wrote:
>> I think you means the ' __attribute__((target("avx2")))', I have tried
>> this way, the issue here is:
>> without the ' -mavx2' option for gcc, there are compiling error:
>> '__m256i undeclared', the __attribute__((target("avx2")))
>> can't solve this issue. Any idea?
>
> You're right that you can't use the normal __m256i, as it doesn't get
> declared.
It should be declared. *intrin.h uses #pragma GCC target and always
defines all vector types.
In fact, the following compiles for me with just "gcc foo.c" under
GCC 5.x:
#include <immintrin.h>
// #if defined CONFIG_IFUNC && defined CONFIG_AVX2
#pragma GCC push_options
#pragma GCC target("avx2")
#define AVX2_VECTYPE __m256i
#define AVX2_SPLAT(p) _mm256_set1_epi8(*(p))
#define AVX2_ALL_EQ(v1, v2) \
(_mm256_movemask_epi8(_mm256_cmpeq_epi8(v1, v2)) == 0xFFFFFFFF)
#define AVX2_VEC_OR(v1, v2) (_mm256_or_si256(v1, v2))
size_t buffer_find_nonzero_offset_avx2(const void *buf, size_t len)
{
const AVX2_VECTYPE *p = buf;
const AVX2_VECTYPE zero = (AVX2_VECTYPE){0};
size_t i;
if (!len) {
return 0;
}
for (i = 0; i < 4; i++) {
if (!AVX2_ALL_EQ(p[i], zero)) {
return i * sizeof(AVX2_VECTYPE);
}
}
for (i = 4; i < len / sizeof(AVX2_VECTYPE); i += 4) {
AVX2_VECTYPE tmp0 = AVX2_VEC_OR(p[i + 0], p[i + 1]);
AVX2_VECTYPE tmp1 = AVX2_VEC_OR(p[i + 2], p[i + 3]);
AVX2_VECTYPE tmp2 = AVX2_VEC_OR(p[i + 4], p[i + 5]);
AVX2_VECTYPE tmp3 = AVX2_VEC_OR(p[i + 6], p[i + 7]);
AVX2_VECTYPE tmp01 = AVX2_VEC_OR(tmp0, tmp1);
AVX2_VECTYPE tmp23 = AVX2_VEC_OR(tmp2, tmp3);
if (!AVX2_ALL_EQ(AVX2_VEC_OR(tmp01, tmp23), zero)) {
break;
}
}
return i * sizeof(AVX2_VECTYPE);
}
#pragma GCC pop_options
// #endif
so perhaps the configure test is testing the wrong thing?
Paolo