g++ (GCC) 6.3.1 20170216 (Red Hat 6.3.1-3) In the below case compile_test_asm_inside_loop invokes test_asm_inside_loop and ignores results. The call into test_asm_inside_loop is expected to be eliminated since return value is not used and there is no side effect The call elimination works fine without asm and without loop It does not work with asm inside loop
TEST CODE #include <iostream> #include <assert.h> using namespace std; constexpr static size_t capacity = 1024; uint64_t objects[capacity]; // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED inline uint64_t test_noloop(uint64_t idx) { uint64_t result; asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); if( result > 128 ) return result; return 0; } // THE FUNCTION IS ELIMINATED BY COMPILER IF OUTPUT IS NOT USED inline uint64_t test_noasm(uint64_t idx) { while(true) { if( objects[idx] > 128 ) return objects[idx]; ++idx; } } // THE FUNCTION IS KEEPT EVEN WHEN IF RESULT IS NOT USED - ASM INSIDE LOOP CAUSING THE ISSUE inline uint64_t test_asm_inside_loop(uint64_t idx) { while(true) { uint64_t result; asm ("movq (%1,%2,8), %0" : "=r" (result) : "r" (objects), "r" (idx) ); if( result > 128 ) return result; ++idx; } } void init() { srand(time(nullptr)); for( size_t i = 0; i < capacity - 1; ++i ) objects[i] = random() % 256; objects[capacity-1] = 255; } // TETS THAT test_noasm AND test_asm_inside_loop PRODUCE SAME RESULT void sanity_test() { for( size_t i = 0; i < capacity; ++i ) { assert( test_noasm(i) == test_asm_inside_loop(i)); } } void compile_test_noasm() { test_noasm(0); } void compile_test_noloop() { test_noloop(0); } void compile_test_asm_inside_loop() { test_asm_inside_loop(0); } int main( int argc, char* argv[] ) { init(); sanity_test(); compile_test_noasm(); compile_test_noloop(); compile_test_asm_inside_loop(); } COMPILATION AND DISASSEMBLER RESULTS: /opt/rh/devtoolset-6//root/bin/g++ -O3 -funroll-loops loop_optimization.cpp; gdb -batch -ex "file a.out" -ex "disas compile_test_noasm" -ex "disas compile_test_noloop" -ex "disas compile_test_asm_inside_loop" Dump of assembler code for function _Z18compile_test_noasmv: 0x0000000000400970 <+0>: repz retq End of assembler dump. Dump of assembler code for function _Z19compile_test_noloopv: 0x0000000000400980 <+0>: repz retq End of assembler dump. Dump of assembler code for function _Z28compile_test_asm_inside_loopv: 0x0000000000400990 <+0>: xor %edx,%edx 0x0000000000400992 <+2>: mov $0x601080,%ecx 0x0000000000400997 <+7>: xor %eax,%eax 0x0000000000400999 <+9>: mov (%rcx,%rdx,8),%rsi 0x000000000040099d <+13>: cmp $0x80,%rsi 0x00000000004009a4 <+20>: ja 0x4009c1 <_Z28compile_test_asm_inside_loopv+49> 0x00000000004009a6 <+22>: nopw %cs:0x0(%rax,%rax,1) 0x00000000004009b0 <+32>: add $0x1,%rax 0x00000000004009b4 <+36>: mov (%rcx,%rax,8),%rdi 0x00000000004009b8 <+40>: cmp $0x80,%rdi 0x00000000004009bf <+47>: jbe 0x4009b0 <_Z28compile_test_asm_inside_loopv+32> 0x00000000004009c1 <+49>: repz retq End of assembler dump.