https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90487

            Bug ID: 90487
           Summary: optimize SSE & AVX char compares with subsequent
                    movmskb [negation]
           Product: gcc
           Version: 9.1.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: kretz at kde dot org
  Target Milestone: ---
            Target: x86_64-*-*, i?86-*-*

Testcase (cf. https://godbolt.org/z/7NiU7O):

#include <x86intrin.h>

template <typename T, size_t N>
using V [[gnu::vector_size(N)]] = T;

int good0(V<unsigned char, 16> a) { return 0xffff ^ _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a)); }
int good1(V<unsigned char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(!a)); }

// the following should be optimized to either good0 (prefer e.g. if compared
against 0xffff) or good1:
int f0(V<unsigned char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a <= 0x7f)); }
int f1(V<unsigned char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a <  0x80)); }
int f0(V<  signed char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a >=  0)); }
int f1(V<  signed char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a >  -1)); }
int f0(V<         char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a >=  0)); }
int f1(V<         char, 16> a) { return _mm_movemask_epi8  
(reinterpret_cast<__m128i>(a >  -1)); }

#ifdef __AVX2__
int good0(V<unsigned char, 32> a) { return 0xffffffff ^ _mm256_movemask_epi8  
(reinterpret_cast<__m256i>(a)); }
int good1(V<unsigned char, 32> a) { return _mm256_movemask_epi8  
(reinterpret_cast<__m256i>(!a)); }

// the following should be optimized to either good0 (prefer e.g. if compared
against 0xffffffff) or good1:
int f0(V<unsigned char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a <= 0x7f)); }
int f1(V<unsigned char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a <  0x80)); }
int f0(V<  signed char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a >=  0)); }
int f1(V<  signed char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a >  -1)); }
int f0(V<         char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a >=  0)); }
int f1(V<         char, 32> a) { return
_mm256_movemask_epi8(reinterpret_cast<__m256i>(a >  -1)); }
#endif

Compile with -O2 and either -mavx2 or -msse2. This PR is simply the negation of
PR88152. I failed to cover these cases in the other PR and they are just as
likely to appear as the ones in PR88152.

Reply via email to