https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95793

            Bug ID: 95793
           Summary: Nested function multi-versioning doesn't work
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

[hjl@gnu-cfl-2 pr95790]$ cat x.cc
#include <x86intrin.h>

extern char *buf;

__attribute__ ((target ("default")))
static int foo(const char *buf) {
  return 1;
}

__attribute__ ((target ("avx2")))
static int foo(const char *buf) {
  __m256i x = *(const __m256i_u *) buf;
  return __builtin_ia32_pmovmskb256 (x);
}

__attribute__ ((target ("avx512f")))
static int foo(const char *buf) {
  __m128i x = *(const __m128i_u *) buf;
  return __builtin_ia32_pmovmskb128 (x);
}

__attribute__ ((target ("default")))
int bar(int unsigned size) {
  int acc = 0;
  for (int i = 0; i < size; i++) {
    acc += foo(&buf[i]);
  }
  return acc;
}

__attribute__ ((target ("avx2")))
int bar(int unsigned size) {
  int acc = 0;
  for (int i = 0; i < size; i++) {
    acc += foo(&buf[i]);
  }
  return acc;
}
[hjl@gnu-cfl-2 pr95790]$ /usr/gcc-10.1.1-x32/bin/gcc -O2
-flax-vector-conversions -fno-asynchronous-unwind-tables -S x.cc
cat[hjl@gnu-cfl-2 pr95790]$ cat x.s
        .file   "x.cc"
        .text
        .p2align 4
        .globl  _Z3barj
        .type   _Z3barj, @function
_Z3barj:
.LFB5599:
        .cfi_startproc
        movl    %edi, %eax
        ret
        .cfi_endproc
.LFE5599:
        .size   _Z3barj, .-_Z3barj
        .p2align 4
        .globl  _Z3barj.avx2
        .type   _Z3barj.avx2, @function
_Z3barj.avx2:
.LFB5600:
        .cfi_startproc
        testl   %edi, %edi
        je      .L7
        movq    buf(%rip), %rdx
        leal    -1(%rdi), %ecx
        xorl    %r8d, %r8d
        leaq    1(%rdx), %rax
        addq    %rax, %rcx
        jmp     .L6
        .p2align 4,,10
        .p2align 3
.L11:
        addq    $1, %rax
.L6:
        vmovdqu (%rdx), %xmm1
        vinserti128     $0x1, 16(%rdx), %ymm1, %ymm0
        vpmovmskb       %ymm0, %edx
        addl    %edx, %r8d
        movq    %rax, %rdx
        cmpq    %rcx, %rax
        jne     .L11
        vzeroupper
        movl    %r8d, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L7:
        xorl    %r8d, %r8d
        movl    %r8d, %eax
        ret
        .cfi_endproc
.LFE5600:
        .size   _Z3barj.avx2, .-_Z3barj.avx2
        .ident  "GCC: (GNU) 10.1.1 20200523"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-2 pr95790]$ 

_Z3barj.avx2 is unused.

Reply via email to