https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116815

            Bug ID: 116815
           Summary: Make better use of overflow flags in codegen of
                    min/max(a, add/sub(a, b))
           Product: gcc
           Version: 15.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ktkachov at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

The LLVM report https://github.com/llvm/llvm-project/issues/109455 also applies
to GCC. We could do better in avoiding extra CMP instructions.
It may apply to other targets besides aarch64 as well

Testcase pasted:

#include <stdint.h>

#include <utility>

typedef uint8_t u8;
typedef int8_t i8;

typedef uint32_t u16;
typedef int32_t i16;

typedef uint32_t u32;
typedef int32_t i32;

typedef __uint128_t u128;
typedef __int128_t i128;

#define assume(cond)                 \
    do {                             \
        if (!(cond)) {               \
            __builtin_unreachable(); \
        }                            \
    } while (0)

template <typename T>
auto max(T a, T b) -> T {
    return a > b ? a : b;
}

template <typename T>
auto min(T a, T b) -> T {
    return a < b ? a : b;
}

template <typename T>
auto checked_add(T a, T b) -> std::pair<bool, T> {
    T sum;
    bool overflow = __builtin_add_overflow(a, b, &sum);
    return std::make_pair(!overflow, sum);
}

template <typename T>
auto checked_sub(T a, T b) -> std::pair<bool, T> {
    T sum;
    bool overflow = __builtin_sub_overflow(a, b, &sum);
    return std::make_pair(!overflow, sum);
}

extern "C" {

auto src_umax_add(u32 a, u32 b) -> u32 { return max(a, a + b); }
auto tgt_umax_add(u32 a, u32 b) -> u32 {
    auto [in_range, sum] = checked_add(a, b);
    return in_range ? sum : a;
}

auto src_umax_sub(u32 a, u32 b) -> u32 { return max(a, a - b); }
auto tgt_umax_sub(u32 a, u32 b) -> u32 {
    auto [in_range, sum] = checked_sub(a, b);
    return in_range ? a : sum;
}

auto src_umin_add(u32 a, u32 b) -> u32 { return min(a, a + b); }
auto tgt_umin_add(u32 a, u32 b) -> u32 {
    auto [in_range, sum] = checked_add(a, b);
    return in_range ? a : sum;
}

auto src_umin_sub(u32 a, u32 b) -> u32 { return min(a, a - b); }
auto tgt_umin_sub(u32 a, u32 b) -> u32 {
    auto [in_range, sum] = checked_sub(a, b);
    return in_range ? sum : a;
}

auto src_smax_add(i32 a, i32 b) -> i32 {
    assume(b >= 0);
    return max(a, a + b);
}
auto tgt_smax_add(i32 a, i32 b) -> i32 {
    auto [in_range, sum] = checked_add(a, b);
    return in_range ? sum : a;
}

auto src_smin_sub(i32 a, i32 b) -> i32 {
    assume(b >= 0);
    return min(a, a - b);
}
auto tgt_smin_sub(i32 a, i32 b) -> i32 {
    auto [in_range, sum] = checked_sub(a, b);
    return in_range ? sum : a;
}
}

Reply via email to