https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63805
--- Comment #15 from kelvin at gcc dot gnu.org ---
On my macbook, g++ (Apple LLVM version 7.0.2 (clang-700.1.81), Target:
x86_64-apple-darwin15.3.0), the test program does compile successfully.
For Martin's simplified example, it produces the following code:
__Z3barv: ## @_Z3barv
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $80, %rsp
movl $16, %eax
movl %eax, %edi
xorl %ecx, %ecx
leaq -16(%rbp), %rdx
movq %rdx, %rsi
callq ___atomic_load
leaq -40(%rbp), %rdx
xorl %esi, %esi
movl $24, %eax
movl %eax, %edi
movq %rdx, %r8
movq %rdi, -48(%rbp) ## 8-byte Spill
movq %r8, %rdi
movq -48(%rbp), %r8 ## 8-byte Reload
movq %rdx, -56(%rbp) ## 8-byte Spill
movq %r8, %rdx
callq _memset
movq -56(%rbp), %rdx ## 8-byte Reload
movq (%rdx), %rdi
movq %rdi, (%rsp)
movq 8(%rdx), %rdi
movq %rdi, 8(%rsp)
movq 16(%rdx), %rdi
movq %rdi, 16(%rsp)
callq __Z3foo1B
addq $80, %rsp
popq %rbp
retq
.cfi_endproc
I tried Alan's small-value test case on the Apple Mac compiler as well. I made
minor changes to the source code to make the "stricter" type checking of that
environment happy. The program compiles and runs ok on Apple Macintosh:
#include <iostream>
/* intel compiler claims to ignore packed __attribute__ */
struct foo {
char pad;
char x[4];
} __attribute__((packed)) a;
int
bar (int val)
{
int ret;
char array1[4];
char array2[4];
*((int *) array1) = val;
/* copies a.x to array2 and copies array1 to a.x, atomically. */
__atomic_exchange (&a.x, &array1, &array2, 5);
ret = *((int *) array2);
return ret;
}
int main ()
{
std::cout << "Result from first __atomic_exchange is " << bar (42) << "\n";
std::cout << "Result from second __atomic_exchange is " << bar (13) <<
"\n";
std::cout << "Result from third __atomic_exchange is " << bar (25) << "\n";
}
Though I'm no longer fluent in reading Intel assembler, I'm pretty confident
that the struct is packed and the atomic exchange deals with an unaligned
address to a 4-byte quantity:
.globl __Z3bari
.align 4, 0x90
__Z3bari: ## @_Z3bari
.cfi_startproc
## BB#0:
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
subq $16, %rsp
leaq _a(%rip), %rax
addq $1, %rax
movl $5, %edx
movl %edi, -4(%rbp)
movl -4(%rbp), %edi
movl %edi, -12(%rbp)
movl -12(%rbp), %esi
movq %rax, %rdi
callq ___atomic_exchange_4
movl %eax, -16(%rbp)
movl -16(%rbp), %eax
movl %eax, -8(%rbp)
movl -8(%rbp), %eax
addq $16, %rsp
popq %rbp
retq
.cfi_endproc
with _a declared as follows:
.globl _a ## @a
.zerofill __DATA,__common,_a,5,0