The " uint64_t test_noasm(uint64_t idx)" has same loop and the function is optimized out. I've changed code to constraint the loop iterations and compiler: - unrolled loop - did not eliminate the function as it does when asm is not used It looks like the " infinite loop" is not root cause.
inline uint64_t test_asm_inside_loop(uint64_t idx) { uint64_t result; for( int i = 0; i < capacity; ++i ) { asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); if( result > 128 ) return result; ++idx; } return 0; } Dump of assembler code for function _Z28compile_test_asm_inside_loopv: 0x0000000000400b40 <+0>: xor %eax,%eax 0x0000000000400b42 <+2>: mov $0x602080,%edx 0x0000000000400b47 <+7>: mov (%rdx,%rax,8),%rcx 0x0000000000400b4b <+11>: cmp $0x80,%rcx 0x0000000000400b52 <+18>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400b58 <+24>: mov $0x1,%eax 0x0000000000400b5d <+29>: mov (%rdx,%rax,8),%rsi 0x0000000000400b61 <+33>: cmp $0x80,%rsi 0x0000000000400b68 <+40>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400b6e <+46>: lea 0x1(%rax),%rdi 0x0000000000400b72 <+50>: mov (%rdx,%rdi,8),%r8 0x0000000000400b76 <+54>: cmp $0x80,%r8 0x0000000000400b7d <+61>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400b83 <+67>: lea 0x2(%rax),%r9 0x0000000000400b87 <+71>: mov (%rdx,%r9,8),%r10 0x0000000000400b8b <+75>: cmp $0x80,%r10 0x0000000000400b92 <+82>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400b98 <+88>: lea 0x3(%rax),%r11 0x0000000000400b9c <+92>: mov (%rdx,%r11,8),%rcx 0x0000000000400ba0 <+96>: cmp $0x80,%rcx 0x0000000000400ba7 <+103>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400bad <+109>: lea 0x4(%rax),%rsi 0x0000000000400bb1 <+113>: mov (%rdx,%rsi,8),%r8 0x0000000000400bb5 <+117>: cmp $0x80,%r8 0x0000000000400bbc <+124>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400bbe <+126>: lea 0x5(%rax),%r9 0x0000000000400bc2 <+130>: mov (%rdx,%r9,8),%r10 0x0000000000400bc6 <+134>: cmp $0x80,%r10 0x0000000000400bcd <+141>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400bcf <+143>: lea 0x6(%rax),%r11 0x0000000000400bd3 <+147>: mov (%rdx,%r11,8),%rcx 0x0000000000400bd7 <+151>: cmp $0x80,%rcx 0x0000000000400bde <+158>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400be0 <+160>: lea 0x7(%rax),%rsi 0x0000000000400be4 <+164>: mov (%rdx,%rsi,8),%r8 0x0000000000400be8 <+168>: cmp $0x80,%r8 0x0000000000400bef <+175>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400bf1 <+177>: lea 0x8(%rax),%r9 0x0000000000400bf5 <+181>: mov (%rdx,%r9,8),%r10 0x0000000000400bf9 <+185>: cmp $0x80,%r10 0x0000000000400c00 <+192>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400c02 <+194>: add $0x9,%rax 0x0000000000400c06 <+198>: mov (%rdx,%rax,8),%rax 0x0000000000400c0a <+202>: cmp $0x80,%rax 0x0000000000400c10 <+208>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400c12 <+210>: lea 0x9(%rdi),%r11 0x0000000000400c16 <+214>: mov (%rdx,%r11,8),%rcx 0x0000000000400c1a <+218>: cmp $0x80,%rcx 0x0000000000400c21 <+225>: ja 0x400c38 <_Z28compile_test_asm_inside_loopv+248> 0x0000000000400c23 <+227>: lea 0xa(%rdi),%rax 0x0000000000400c27 <+231>: cmp $0x400,%rax 0x0000000000400c2d <+237>: jne 0x400b5d <_Z28compile_test_asm_inside_loopv+29> 0x0000000000400c33 <+243>: repz retq 0x0000000000400c35 <+245>: nopl (%rax) 0x0000000000400c38 <+248>: repz retq -----Original Message----- From: Andrew Pinski [mailto:pins...@gmail.com] Sent: Saturday, October 07, 2017 3:04 PM To: Saldyrkine, Mikhail [Sec Div] Cc: gcc-bugs@gcc.gnu.org Subject: Re: GCC does not optimize out functions without side effects with asm statements inside loop even if return velue is ignored On Sat, Oct 7, 2017 at 8:39 AM, Saldyrkine, Mikhail <mikhail.saldyrk...@gs.com> wrote: > g++ (GCC) 6.3.1 20170216 (Red Hat 6.3.1-3) > > In the below case compile_test_asm_inside_loop invokes test_asm_inside_loop > and ignores results. > The call into test_asm_inside_loop is expected to be eliminated since return > value is not used and there is no side effect > The call elimination works fine without asm and without loop > It does not work with asm inside loop Because the loop could be an infinite loop and GCC does not know how many times the inline-asm is going to be called and if there are other side effects. Let's look at the function: inline uint64_t test_asm_inside_loop(uint64_t idx) { while(true) { uint64_t result; asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); if( result > 128 ) return result; ++idx; } } The loop is only broken out of when result is > 128. result from the inline-asm is used as the breakout from the loop. Thanks, Andrew > > TEST CODE > > #include <iostream> > #include <assert.h> > > using namespace std; > constexpr static size_t capacity = 1024; > uint64_t objects[capacity]; > > // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED > inline uint64_t test_noloop(uint64_t idx) { > uint64_t result; > asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); > if( result > 128 ) > return result; > return 0; > } > > // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED > inline uint64_t test_noasm(uint64_t idx) { > while(true) > { > if( objects[idx] > 128 ) > return objects[idx]; > ++idx; > } > } > > // THE FUNCTION IS KEEPT EVEN WHEN IF RESULT IS NOT USED - ASM INSIDE LOOP > CAUSING THE ISSUE > inline uint64_t test_asm_inside_loop(uint64_t idx) { > while(true) > { > uint64_t result; > asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) > ); > if( result > 128 ) > return result; > ++idx; > } > } > > void init() { > srand(time(nullptr)); > for( size_t i = 0; i < capacity - 1; ++i ) > objects[i] = random() % 256; > objects[capacity-1] = 255; > } > > // TETS THAT test_noasm AND test_asm_inside_loop PRODUCE SAME RESULT > void sanity_test() { > for( size_t i = 0; i < capacity; ++i ) { > assert( test_noasm(i) == test_asm_inside_loop(i)); > } > } > > void compile_test_noasm() { > test_noasm(0); > } > > void compile_test_noloop() { > test_noloop(0); > } > > void compile_test_asm_inside_loop() { > test_asm_inside_loop(0); > } > > int main( int argc, char* argv[] ) { > init(); > sanity_test(); > compile_test_noasm(); > compile_test_noloop(); > compile_test_asm_inside_loop(); > } > > COMPILATION AND DISASSEMBLER RESULTS: > > /opt/rh/devtoolset-6//root/bin/g++ -O3 -funroll-loops > loop_optimization.cpp; gdb -batch -ex "file a.out" -ex "disas > compile_test_noasm" -ex "disas compile_test_noloop" -ex "disas > compile_test_asm_inside_loop" > Dump of assembler code for function _Z18compile_test_noasmv: > 0x0000000000400970 <+0>: repz retq > End of assembler dump. > Dump of assembler code for function _Z19compile_test_noloopv: > 0x0000000000400980 <+0>: repz retq > End of assembler dump. > Dump of assembler code for function _Z28compile_test_asm_inside_loopv: > 0x0000000000400990 <+0>: xor %edx,%edx > 0x0000000000400992 <+2>: mov $0x601080,%ecx > 0x0000000000400997 <+7>: xor %eax,%eax > 0x0000000000400999 <+9>: mov (%rcx,%rdx,8),%rsi > 0x000000000040099d <+13>: cmp $0x80,%rsi > 0x00000000004009a4 <+20>: ja 0x4009c1 > <_Z28compile_test_asm_inside_loopv+49> > 0x00000000004009a6 <+22>: nopw %cs:0x0(%rax,%rax,1) > 0x00000000004009b0 <+32>: add $0x1,%rax > 0x00000000004009b4 <+36>: mov (%rcx,%rax,8),%rdi > 0x00000000004009b8 <+40>: cmp $0x80,%rdi > 0x00000000004009bf <+47>: jbe 0x4009b0 > <_Z28compile_test_asm_inside_loopv+32> > 0x00000000004009c1 <+49>: repz retq > End of assembler dump. > >