https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88473

            Bug ID: 88473
           Summary: AVX512: constant folding on mask does not remove
                    unnecessary instructions
           Product: gcc
           Version: 8.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bugzi...@poradnik-webmastera.com
  Target Milestone: ---

[code]
#include <immintrin.h>

void test(void* data, void* data2)
{
    __m128i v = _mm_load_si128((__m128i const*)data);
    __mmask8 m = _mm_testn_epi16_mask(v, v);
    m = _kor_mask8(m, 0x0f);
    m = _kor_mask8(m, 0xf0);
    v = _mm_maskz_add_epi16(m, v, v);
    _mm_store_si128((__m128i*)data2, v);
}
[/code]

Code compiled using gcc 8.2 with -O3 -march=skylake-avx512 . gcc was able to
fold constant expressions and simplify masked vector add to non-masked one.
However original version of folded expression is still present in output:

[asm]
test(void*, void*):
  vmovdqa64 xmm0, XMMWORD PTR [rdi]
  mov eax, 15
  vptestnmw k1, xmm0, xmm0
  kmovb k2, eax
  vpaddw xmm0, xmm0, xmm0
  mov eax, -16
  kmovb k3, eax
  vmovaps XMMWORD PTR [rsi], xmm0
  korb k0, k1, k2
  korb k0, k0, k3
  ret
[/asm]

clang properly cleaned it up:

[asm]
test(void*, void*): # @test(void*, void*)
  vmovdqa xmm0, xmmword ptr [rdi]
  vpaddw xmm0, xmm0, xmm0
  vmovdqa xmmword ptr [rsi], xmm0
  ret
[/asm]

Reply via email to