https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120592

            Bug ID: 120592
           Summary: XMM register is used across ___tls_get_addr
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---
            Target: i386

For i386, ___tls_get_addr isn't a CALL:

(insn:TI 5 19 6 2 (parallel [
            (set (reg/f:SI 0 ax [101])
                (unspec:SI [
                        (reg:SI 3 bx [98])
                        (symbol_ref:SI ("x") [flags 0x10]  <var_decl
0x7fd38b5a0
130 x>)
                        (symbol_ref:SI ("___tls_get_addr"))
                        (reg/f:SI 7 sp)
                    ] UNSPEC_TLS_GD))
            (clobber (reg:SI 1 dx [103]))
            (clobber (reg:SI 2 cx [104]))
            (clobber (reg:CC 17 flags))
        ]) "x.c":6:10 1652 {*tls_global_dynamic_32_gnu}
     (expr_list:REG_DEAD (reg:SI 3 bx [98])
        (expr_list:REG_UNUSED (reg:CC 17 flags)
            (expr_list:REG_UNUSED (reg:SI 2 cx [104])
                (expr_list:REG_UNUSED (reg:SI 1 dx [103])
                    (nil))))))

As the result, XMM register is used across the ___tls_get_addr call:

[hjl@gnu-zen4-1 pr120590]$ cat x.c
typedef int v4si __attribute__((vector_size(16)));
typedef short v8hi __attribute__((vector_size(16)));
typedef char v16qi __attribute__((vector_size(32)));

extern v16qi b1;
extern v8hi h1;
extern v4si s1;

extern __thread int x;
extern int i;

void
foo (void)
{
  s1 = __extension__(v4si){0, 0, 0, 0};
  h1 = __extension__(v8hi){0, 0, 0, 0, 0, 0, 0, 0};
  i = x;
  b1 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
}
[hjl@gnu-zen4-1 pr120590]$ make
/export/build/gnu/tools-build/gcc-gitlab-test-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-gitlab-test-debug/build-x86_64-linux/gcc/
-O2 -fPIC -m32 -S x.c
[hjl@gnu-zen4-1 pr120590]$ cat x.s
        .file   "x.c"
        .text
        .p2align 4
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        .cfi_startproc
        pushl   %esi
        .cfi_def_cfa_offset 8
        .cfi_offset 6, -8
        pxor    %xmm0, %xmm0
        pushl   %ebx
        .cfi_def_cfa_offset 12
        .cfi_offset 3, -12
        call    __x86.get_pc_thunk.bx
        addl    $_GLOBAL_OFFSET_TABLE_, %ebx
        subl    $4, %esp
        .cfi_def_cfa_offset 16
        movl    s1@GOT(%ebx), %eax
        movl    i@GOT(%ebx), %esi
        movaps  %xmm0, (%eax)
        movl    h1@GOT(%ebx), %eax
        movaps  %xmm0, (%eax)
        leal    x@tlsgd(,%ebx,1), %eax
        call    ___tls_get_addr@PLT
        movl    (%eax), %eax
        movl    %eax, (%esi)
        movl    b1@GOT(%ebx), %eax
        movaps  %xmm0, (%eax)
        movaps  %xmm0, 16(%eax)
        addl    $4, %esp
        .cfi_def_cfa_offset 12
        popl    %ebx
        .cfi_restore 3
        .cfi_def_cfa_offset 8
        popl    %esi
        .cfi_restore 6
        .cfi_def_cfa_offset 4
        ret
        .cfi_endproc
.LFE0:
        .size   foo, .-foo
        .section       
.text.__x86.get_pc_thunk.bx,"axG",@progbits,__x86.get_pc_thunk.bx,comdat
        .globl  __x86.get_pc_thunk.bx
        .hidden __x86.get_pc_thunk.bx
        .type   __x86.get_pc_thunk.bx, @function
__x86.get_pc_thunk.bx:
.LFB1:
        .cfi_startproc
        movl    (%esp), %ebx
        ret
        .cfi_endproc
.LFE1:
        .ident  "GCC: (GNU) 16.0.0 20250608 (experimental)"
        .section        .note.GNU-stack,"",@progbits
[hjl@gnu-zen4-1 pr120590]$

Reply via email to