On Sat, Oct 7, 2017 at 8:39 AM, Saldyrkine, Mikhail <mikhail.saldyrk...@gs.com> wrote: > g++ (GCC) 6.3.1 20170216 (Red Hat 6.3.1-3) > > In the below case compile_test_asm_inside_loop invokes test_asm_inside_loop > and ignores results. > The call into test_asm_inside_loop is expected to be eliminated since return > value is not used and there is no side effect > The call elimination works fine without asm and without loop > It does not work with asm inside loop
Because the loop could be an infinite loop and GCC does not know how many times the inline-asm is going to be called and if there are other side effects. Let's look at the function: inline uint64_t test_asm_inside_loop(uint64_t idx) { while(true) { uint64_t result; asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); if( result > 128 ) return result; ++idx; } } The loop is only broken out of when result is > 128. result from the inline-asm is used as the breakout from the loop. Thanks, Andrew > > TEST CODE > > #include <iostream> > #include <assert.h> > > using namespace std; > constexpr static size_t capacity = 1024; > uint64_t objects[capacity]; > > // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED > inline uint64_t test_noloop(uint64_t idx) { > uint64_t result; > asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); > if( result > 128 ) > return result; > return 0; > } > > // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED > inline uint64_t test_noasm(uint64_t idx) { > while(true) > { > if( objects[idx] > 128 ) > return objects[idx]; > ++idx; > } > } > > // THE FUNCTION IS KEEPT EVEN WHEN IF RESULT IS NOT USED - ASM INSIDE LOOP > CAUSING THE ISSUE > inline uint64_t test_asm_inside_loop(uint64_t idx) { > while(true) > { > uint64_t result; > asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) > ); > if( result > 128 ) > return result; > ++idx; > } > } > > void init() { > srand(time(nullptr)); > for( size_t i = 0; i < capacity - 1; ++i ) > objects[i] = random() % 256; > objects[capacity-1] = 255; > } > > // TETS THAT test_noasm AND test_asm_inside_loop PRODUCE SAME RESULT > void sanity_test() { > for( size_t i = 0; i < capacity; ++i ) { > assert( test_noasm(i) == test_asm_inside_loop(i)); > } > } > > void compile_test_noasm() { > test_noasm(0); > } > > void compile_test_noloop() { > test_noloop(0); > } > > void compile_test_asm_inside_loop() { > test_asm_inside_loop(0); > } > > int main( int argc, char* argv[] ) { > init(); > sanity_test(); > compile_test_noasm(); > compile_test_noloop(); > compile_test_asm_inside_loop(); > } > > COMPILATION AND DISASSEMBLER RESULTS: > > /opt/rh/devtoolset-6//root/bin/g++ -O3 -funroll-loops > loop_optimization.cpp; gdb -batch -ex "file a.out" -ex "disas > compile_test_noasm" -ex "disas compile_test_noloop" -ex "disas > compile_test_asm_inside_loop" > Dump of assembler code for function _Z18compile_test_noasmv: > 0x0000000000400970 <+0>: repz retq > End of assembler dump. > Dump of assembler code for function _Z19compile_test_noloopv: > 0x0000000000400980 <+0>: repz retq > End of assembler dump. > Dump of assembler code for function _Z28compile_test_asm_inside_loopv: > 0x0000000000400990 <+0>: xor %edx,%edx > 0x0000000000400992 <+2>: mov $0x601080,%ecx > 0x0000000000400997 <+7>: xor %eax,%eax > 0x0000000000400999 <+9>: mov (%rcx,%rdx,8),%rsi > 0x000000000040099d <+13>: cmp $0x80,%rsi > 0x00000000004009a4 <+20>: ja 0x4009c1 > <_Z28compile_test_asm_inside_loopv+49> > 0x00000000004009a6 <+22>: nopw %cs:0x0(%rax,%rax,1) > 0x00000000004009b0 <+32>: add $0x1,%rax > 0x00000000004009b4 <+36>: mov (%rcx,%rax,8),%rdi > 0x00000000004009b8 <+40>: cmp $0x80,%rdi > 0x00000000004009bf <+47>: jbe 0x4009b0 > <_Z28compile_test_asm_inside_loopv+32> > 0x00000000004009c1 <+49>: repz retq > End of assembler dump. > >