https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116815
Bug ID: 116815 Summary: Make better use of overflow flags in codegen of min/max(a, add/sub(a, b)) Product: gcc Version: 15.0 Status: UNCONFIRMED Keywords: missed-optimization Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: ktkachov at gcc dot gnu.org Target Milestone: --- Target: aarch64 The LLVM report https://github.com/llvm/llvm-project/issues/109455 also applies to GCC. We could do better in avoiding extra CMP instructions. It may apply to other targets besides aarch64 as well Testcase pasted: #include <stdint.h> #include <utility> typedef uint8_t u8; typedef int8_t i8; typedef uint32_t u16; typedef int32_t i16; typedef uint32_t u32; typedef int32_t i32; typedef __uint128_t u128; typedef __int128_t i128; #define assume(cond) \ do { \ if (!(cond)) { \ __builtin_unreachable(); \ } \ } while (0) template <typename T> auto max(T a, T b) -> T { return a > b ? a : b; } template <typename T> auto min(T a, T b) -> T { return a < b ? a : b; } template <typename T> auto checked_add(T a, T b) -> std::pair<bool, T> { T sum; bool overflow = __builtin_add_overflow(a, b, &sum); return std::make_pair(!overflow, sum); } template <typename T> auto checked_sub(T a, T b) -> std::pair<bool, T> { T sum; bool overflow = __builtin_sub_overflow(a, b, &sum); return std::make_pair(!overflow, sum); } extern "C" { auto src_umax_add(u32 a, u32 b) -> u32 { return max(a, a + b); } auto tgt_umax_add(u32 a, u32 b) -> u32 { auto [in_range, sum] = checked_add(a, b); return in_range ? sum : a; } auto src_umax_sub(u32 a, u32 b) -> u32 { return max(a, a - b); } auto tgt_umax_sub(u32 a, u32 b) -> u32 { auto [in_range, sum] = checked_sub(a, b); return in_range ? a : sum; } auto src_umin_add(u32 a, u32 b) -> u32 { return min(a, a + b); } auto tgt_umin_add(u32 a, u32 b) -> u32 { auto [in_range, sum] = checked_add(a, b); return in_range ? a : sum; } auto src_umin_sub(u32 a, u32 b) -> u32 { return min(a, a - b); } auto tgt_umin_sub(u32 a, u32 b) -> u32 { auto [in_range, sum] = checked_sub(a, b); return in_range ? sum : a; } auto src_smax_add(i32 a, i32 b) -> i32 { assume(b >= 0); return max(a, a + b); } auto tgt_smax_add(i32 a, i32 b) -> i32 { auto [in_range, sum] = checked_add(a, b); return in_range ? sum : a; } auto src_smin_sub(i32 a, i32 b) -> i32 { assume(b >= 0); return min(a, a - b); } auto tgt_smin_sub(i32 a, i32 b) -> i32 { auto [in_range, sum] = checked_sub(a, b); return in_range ? sum : a; } }