configured with ../gcc-4.3-20080208/configure --enable-languages=c,c++,java
The same issue was also observed on i686-pc-linux x86-64 platforms. >Description: when an assignment to a variable is followed by 'cmpxchg' on the variable, there exists a dependency between the two statements. thus, the store has to complete before cmpxchg on the variable would be issued. The issue was not seen with -O0. It was seen when compiled with -O2. I have not tried -O1, though. For examples, when we have inline bool LockByte( volatile unsigned char &flag ) { volatile void *f = &flag; if ( CompareAndSwap1(f,1,0) != 0) { do { __asm__ __volatile__("pause;"); } while ( CompareAndSwap1(f,1,0) != 0) ; } return 0; } main(...) { ... char x = 0; LockByte(x); ... } even with a single thread, LockByte() hangs i.e., spins forever. It appears that the compiler misses the dependency and does not generate the store to the memory location. >How-To-Repeat: Compile the following program with -O2 option. i.e., g++ -O2 <pgm-name>. It hangs. g++ <pgm-name> generates an executable that completes and prints 'done'. #include <assert.h> #include <stdio.h> #define DECL_CMPSWP(S,T,X) \ static inline T machine_cmpswp##S (volatile void *ptr, T value, T comparand ) \ { \ T result; \ \ __asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \ : "=a"(result), "=m"(*(T *)ptr) \ : "q"(value), "0"(comparand) \ : "memory"); \ return result; \ } DECL_CMPSWP(1,unsigned char,"b"); #define CompareAndSwap1(a,b,c) machine_cmpswp1(a,b,c) inline bool LockByte( volatile unsigned char &flag ) { volatile void *f = &flag; if ( CompareAndSwap1(f,1,0) != 0) { do { __asm__ __volatile__("pause;"); } while ( CompareAndSwap1(f,1,0) != 0) ; } return 0; } static void TestTinyLock() { unsigned char flags[16]; for( int i=0; i<16; ++i ) flags[i] = i; flags[8] = 0; LockByte(flags[8]); for( int i=0; i<16; ++i ) assert( flags[i]==(i==8?1:i) ); } int main( int argc, char* argv[] ) { try { TestTinyLock(); } catch(...) { } printf("done\n"); return 0; } ----------------------------------------- 'g++ -O2 -S' output pushl %ecx leal -16(%ebp), %eax subl $88, %esp movl %eax, -52(%ebp) leal -84(%ebp), %eax movl $___gxx_personality_sj0, -60(%ebp) movl $LLSDA6, -56(%ebp) movl $L13, -48(%ebp) movl %esp, -44(%ebp) movl %eax, (%esp) call __Unwind_SjLj_Register call ___main xorl %eax, %eax leal -32(%ebp), %edx .p2align 4,,7 L2: movb %al, (%edx,%eax) addl $1, %eax cmpl $16, %eax jne L2 leal -24(%ebp), %ebx xorl %eax, %eax movl $1, %edx /APP # 44 "test_test.cpp" 1 lock cmpxchgb %dl,(%ebx) # 0 "" 2 /NO_APP testb %al, %al je L3 movl $1, %ecx xorl %edx, %edx .p2align 4,,7 L14: /APP # 52 "test_test.cpp" 1 pause; # 0 "" 2 /NO_APP movl %edx, %eax /APP # 44 "test_test.cpp" 1 lock cmpxchgb %cl,(%ebx) # 0 "" 2 /NO_APP testb %al, %al jne L14 L3: movl $0, -88(%ebp) jmp L8 .p2align 4,,7 L6: movzbl %al, %eax cmpl %edx, %eax je L7 movl $LC0, 8(%esp) movl $66, 4(%esp) movl $LC1, (%esp) movl $1, -80(%ebp) call ___assert L7: addl $1, -88(%ebp) cmpl $16, -88(%ebp) je L9 L8: >Fix: The work-around is to declare the variabe to be 'volatile' If we do * (volatile unsigned char *) &flags[8] = 0; instead, the generated executable completes. -- Summary: possible incorrect opitmization due to missed dependency Product: gcc Version: 4.3.0 Status: UNCONFIRMED Severity: critical Priority: P3 Component: c++ AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: kimwooyoung at gmail dot com GCC build triplet: i686-pc-cygwin GCC host triplet: i686-pc-cygwin GCC target triplet: i686-pc-cygwin http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35396