https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114559

            Bug ID: 114559
           Summary: After function inlining some optimizations missing
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: antoshkka at gmail dot com
  Target Milestone: ---

Consider the example:

template <class Func>
int AtomicUpdate(int& atomic, Func updater) {
  int old_value = atomic;
  while (true) {
    const int new_value = updater(int{old_value});
    if (old_value == new_value) return old_value;
    if (__atomic_compare_exchange_n(&atomic, &old_value, new_value, 1, 5, 5))
return new_value;
  }
}

int AtomicMin(int& atomic, int value) {
  return AtomicUpdate(atomic, [value](int old_value) {
    return value < old_value ? value : old_value;
  });
}


With -O2 GCC produces the assembly:


AtomicMin(int&, int):
        mov     eax, DWORD PTR [rdi]
.L3:
        cmp     esi, eax
        mov     edx, eax
        cmovle  edx, esi
        jge     .L4
        lock cmpxchg    DWORD PTR [rdi], edx
        jne     .L3
.L1:
        mov     eax, edx
        ret
.L4:
        mov     edx, eax
        jmp     .L1


However, a more optimal assembly is possible:


AtomicMin(int&, int):                        # @AtomicMin(int&, int)
        mov     eax, dword ptr [rdi]
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        cmp     eax, esi
        jle     .LBB0_4
        lock            cmpxchg dword ptr [rdi], esi
        jne     .LBB0_1
        mov     eax, esi
.LBB0_4:
        ret


Note that manual inlining of the lambda improves the codegen:

int AtomicMin(int& atomic, int value) {
  int old_value = atomic;
  while (true) {
    const int new_value = (value < old_value ? value : old_value);
    if (old_value == new_value) return old_value;
    if (__atomic_compare_exchange_n(&atomic, &old_value, new_value, 1, 5, 5))
return new_value;
  }
}

Results in

AtomicMin(int&, int):
        mov     eax, DWORD PTR [rdi]
.L3:
        cmp     esi, eax
        mov     edx, eax
        cmovle  edx, esi
        jge     .L1
        lock cmpxchg    DWORD PTR [rdi], edx
        jne     .L3
.L1:
        mov     eax, edx
        ret


Godbolt playground: https://godbolt.org/z/G6YEGb15q

Reply via email to