https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95793
Bug ID: 95793 Summary: Nested function multi-versioning doesn't work Product: gcc Version: 11.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: hjl.tools at gmail dot com Target Milestone: --- [hjl@gnu-cfl-2 pr95790]$ cat x.cc #include <x86intrin.h> extern char *buf; __attribute__ ((target ("default"))) static int foo(const char *buf) { return 1; } __attribute__ ((target ("avx2"))) static int foo(const char *buf) { __m256i x = *(const __m256i_u *) buf; return __builtin_ia32_pmovmskb256 (x); } __attribute__ ((target ("avx512f"))) static int foo(const char *buf) { __m128i x = *(const __m128i_u *) buf; return __builtin_ia32_pmovmskb128 (x); } __attribute__ ((target ("default"))) int bar(int unsigned size) { int acc = 0; for (int i = 0; i < size; i++) { acc += foo(&buf[i]); } return acc; } __attribute__ ((target ("avx2"))) int bar(int unsigned size) { int acc = 0; for (int i = 0; i < size; i++) { acc += foo(&buf[i]); } return acc; } [hjl@gnu-cfl-2 pr95790]$ /usr/gcc-10.1.1-x32/bin/gcc -O2 -flax-vector-conversions -fno-asynchronous-unwind-tables -S x.cc cat[hjl@gnu-cfl-2 pr95790]$ cat x.s .file "x.cc" .text .p2align 4 .globl _Z3barj .type _Z3barj, @function _Z3barj: .LFB5599: .cfi_startproc movl %edi, %eax ret .cfi_endproc .LFE5599: .size _Z3barj, .-_Z3barj .p2align 4 .globl _Z3barj.avx2 .type _Z3barj.avx2, @function _Z3barj.avx2: .LFB5600: .cfi_startproc testl %edi, %edi je .L7 movq buf(%rip), %rdx leal -1(%rdi), %ecx xorl %r8d, %r8d leaq 1(%rdx), %rax addq %rax, %rcx jmp .L6 .p2align 4,,10 .p2align 3 .L11: addq $1, %rax .L6: vmovdqu (%rdx), %xmm1 vinserti128 $0x1, 16(%rdx), %ymm1, %ymm0 vpmovmskb %ymm0, %edx addl %edx, %r8d movq %rax, %rdx cmpq %rcx, %rax jne .L11 vzeroupper movl %r8d, %eax ret .p2align 4,,10 .p2align 3 .L7: xorl %r8d, %r8d movl %r8d, %eax ret .cfi_endproc .LFE5600: .size _Z3barj.avx2, .-_Z3barj.avx2 .ident "GCC: (GNU) 10.1.1 20200523" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-2 pr95790]$ _Z3barj.avx2 is unused.