https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116815
Bug ID: 116815
Summary: Make better use of overflow flags in codegen of
min/max(a, add/sub(a, b))
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: ktkachov at gcc dot gnu.org
Target Milestone: ---
Target: aarch64
The LLVM report https://github.com/llvm/llvm-project/issues/109455 also applies
to GCC. We could do better in avoiding extra CMP instructions.
It may apply to other targets besides aarch64 as well
Testcase pasted:
#include <stdint.h>
#include <utility>
typedef uint8_t u8;
typedef int8_t i8;
typedef uint32_t u16;
typedef int32_t i16;
typedef uint32_t u32;
typedef int32_t i32;
typedef __uint128_t u128;
typedef __int128_t i128;
#define assume(cond) \
do { \
if (!(cond)) { \
__builtin_unreachable(); \
} \
} while (0)
template <typename T>
auto max(T a, T b) -> T {
return a > b ? a : b;
}
template <typename T>
auto min(T a, T b) -> T {
return a < b ? a : b;
}
template <typename T>
auto checked_add(T a, T b) -> std::pair<bool, T> {
T sum;
bool overflow = __builtin_add_overflow(a, b, &sum);
return std::make_pair(!overflow, sum);
}
template <typename T>
auto checked_sub(T a, T b) -> std::pair<bool, T> {
T sum;
bool overflow = __builtin_sub_overflow(a, b, &sum);
return std::make_pair(!overflow, sum);
}
extern "C" {
auto src_umax_add(u32 a, u32 b) -> u32 { return max(a, a + b); }
auto tgt_umax_add(u32 a, u32 b) -> u32 {
auto [in_range, sum] = checked_add(a, b);
return in_range ? sum : a;
}
auto src_umax_sub(u32 a, u32 b) -> u32 { return max(a, a - b); }
auto tgt_umax_sub(u32 a, u32 b) -> u32 {
auto [in_range, sum] = checked_sub(a, b);
return in_range ? a : sum;
}
auto src_umin_add(u32 a, u32 b) -> u32 { return min(a, a + b); }
auto tgt_umin_add(u32 a, u32 b) -> u32 {
auto [in_range, sum] = checked_add(a, b);
return in_range ? a : sum;
}
auto src_umin_sub(u32 a, u32 b) -> u32 { return min(a, a - b); }
auto tgt_umin_sub(u32 a, u32 b) -> u32 {
auto [in_range, sum] = checked_sub(a, b);
return in_range ? sum : a;
}
auto src_smax_add(i32 a, i32 b) -> i32 {
assume(b >= 0);
return max(a, a + b);
}
auto tgt_smax_add(i32 a, i32 b) -> i32 {
auto [in_range, sum] = checked_add(a, b);
return in_range ? sum : a;
}
auto src_smin_sub(i32 a, i32 b) -> i32 {
assume(b >= 0);
return min(a, a - b);
}
auto tgt_smin_sub(i32 a, i32 b) -> i32 {
auto [in_range, sum] = checked_sub(a, b);
return in_range ? sum : a;
}
}