https://gcc.gnu.org/bugzilla/show_bug.cgi?id=119210
--- Comment #7 from xiezhiheng at huawei dot com --- For other information, https://godbolt.org/z/xdPYGsjYd LLVM seems always dominate block .LBB0_14 .LBB0_11: add x23, x23, #1 msr TPIDR2_EL0, xzr cmp x23, #64 b.eq .LBB0_14 .LBB0_12: ldr d0, [x20, x23, lsl #3] ldr d1, [x21, x23, lsl #3] mov x0, x24 mov w1, w23 sturh w25, [x29, #-72] msr TPIDR2_EL0, x26 bl printf smstart za <== smstart za before cbnz mrs x8, TPIDR2_EL0 sub x0, x29, #80 cbnz x8, .LBB0_11 bl __arm_tpidr2_restore b .LBB0_11 .LBB0_14: smstart sm mov x0, x20 mov x1, x21 bl example(double*, double*) mov x0, x22 bl example0(double*) smstop sm mov x23, xzr rdsvl x25, #1 sub x26, x29, #80 adrp x24, .L.str.1 add x24, x24, :lo12:.L.str.1 b .LBB0_16