https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63805

--- Comment #15 from kelvin at gcc dot gnu.org ---
On my macbook, g++ (Apple LLVM version 7.0.2 (clang-700.1.81), Target:
x86_64-apple-darwin15.3.0), the test program does compile successfully.

For Martin's simplified example, it produces the following code:

__Z3barv:                               ## @_Z3barv
        .cfi_startproc
## BB#0:
        pushq   %rbp
Ltmp0:
        .cfi_def_cfa_offset 16
Ltmp1:
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
Ltmp2:
        .cfi_def_cfa_register %rbp
        subq    $80, %rsp
        movl    $16, %eax
        movl    %eax, %edi
        xorl    %ecx, %ecx
        leaq    -16(%rbp), %rdx
        movq    %rdx, %rsi
        callq   ___atomic_load
        leaq    -40(%rbp), %rdx
        xorl    %esi, %esi
        movl    $24, %eax
        movl    %eax, %edi
        movq    %rdx, %r8
        movq    %rdi, -48(%rbp)         ## 8-byte Spill
        movq    %r8, %rdi
        movq    -48(%rbp), %r8          ## 8-byte Reload
        movq    %rdx, -56(%rbp)         ## 8-byte Spill
        movq    %r8, %rdx
        callq   _memset
        movq    -56(%rbp), %rdx         ## 8-byte Reload
        movq    (%rdx), %rdi
        movq    %rdi, (%rsp)
        movq    8(%rdx), %rdi
        movq    %rdi, 8(%rsp)
        movq    16(%rdx), %rdi
        movq    %rdi, 16(%rsp)
        callq   __Z3foo1B
        addq    $80, %rsp
        popq    %rbp
        retq
        .cfi_endproc

I tried Alan's small-value test case on the Apple Mac compiler as well.  I made
minor changes to the source code to make the "stricter" type checking of that
environment happy.  The program compiles and runs ok on Apple Macintosh:

#include <iostream>

/* intel compiler claims to ignore packed __attribute__ */
struct foo { 
    char pad;
    char x[4];
} __attribute__((packed)) a;

int
bar (int val)
{
    int ret;
    char array1[4];
    char array2[4];

    *((int *) array1) = val;
    /* copies a.x to array2 and copies array1 to a.x, atomically. */
    __atomic_exchange (&a.x, &array1, &array2, 5);

    ret = *((int *) array2);
    return  ret;
}

int main ()
{
    std::cout << "Result from first __atomic_exchange is " << bar (42) << "\n";
    std::cout << "Result from second __atomic_exchange is " << bar (13) <<
"\n";
    std::cout << "Result from third __atomic_exchange is " << bar (25) << "\n";
}

Though I'm no longer fluent in reading Intel assembler, I'm pretty confident
that the struct is packed and the atomic exchange deals with an unaligned
address to a 4-byte quantity:

        .globl  __Z3bari
        .align  4, 0x90
__Z3bari:                               ## @_Z3bari
        .cfi_startproc
## BB#0:
        pushq   %rbp
Ltmp0:
        .cfi_def_cfa_offset 16
Ltmp1:
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
Ltmp2:
        .cfi_def_cfa_register %rbp
        subq    $16, %rsp
        leaq    _a(%rip), %rax
        addq    $1, %rax
        movl    $5, %edx
        movl    %edi, -4(%rbp)
        movl    -4(%rbp), %edi
        movl    %edi, -12(%rbp)
        movl    -12(%rbp), %esi
        movq    %rax, %rdi
        callq   ___atomic_exchange_4
        movl    %eax, -16(%rbp)
        movl    -16(%rbp), %eax
        movl    %eax, -8(%rbp)
        movl    -8(%rbp), %eax
        addq    $16, %rsp
        popq    %rbp
        retq
        .cfi_endproc

with _a declared as follows:

        .globl  _a                      ## @a
.zerofill __DATA,__common,_a,5,0

Reply via email to