https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90487
Bug ID: 90487 Summary: optimize SSE & AVX char compares with subsequent movmskb [negation] Product: gcc Version: 9.1.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: kretz at kde dot org Target Milestone: --- Target: x86_64-*-*, i?86-*-* Testcase (cf. https://godbolt.org/z/7NiU7O): #include <x86intrin.h> template <typename T, size_t N> using V [[gnu::vector_size(N)]] = T; int good0(V<unsigned char, 16> a) { return 0xffff ^ _mm_movemask_epi8 (reinterpret_cast<__m128i>(a)); } int good1(V<unsigned char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(!a)); } // the following should be optimized to either good0 (prefer e.g. if compared against 0xffff) or good1: int f0(V<unsigned char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a <= 0x7f)); } int f1(V<unsigned char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a < 0x80)); } int f0(V< signed char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a >= 0)); } int f1(V< signed char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a > -1)); } int f0(V< char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a >= 0)); } int f1(V< char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i>(a > -1)); } #ifdef __AVX2__ int good0(V<unsigned char, 32> a) { return 0xffffffff ^ _mm256_movemask_epi8 (reinterpret_cast<__m256i>(a)); } int good1(V<unsigned char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i>(!a)); } // the following should be optimized to either good0 (prefer e.g. if compared against 0xffffffff) or good1: int f0(V<unsigned char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a <= 0x7f)); } int f1(V<unsigned char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a < 0x80)); } int f0(V< signed char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a >= 0)); } int f1(V< signed char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a > -1)); } int f0(V< char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a >= 0)); } int f1(V< char, 32> a) { return _mm256_movemask_epi8(reinterpret_cast<__m256i>(a > -1)); } #endif Compile with -O2 and either -mavx2 or -msse2. This PR is simply the negation of PR88152. I failed to cover these cases in the other PR and they are just as likely to appear as the ones in PR88152.