https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115924
--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> --- Note better example: ``` #include <stdint.h> uint32_t f(uint32_t i2, uint32_t aa, uint32_t aa1) { return ((i2 >> 17) + (aa >> 17) + (aa1 >> 17)) << 17; } uint32_t fa(uint32_t i2, uint32_t aa, uint32_t aa1) { return ((i2 >> 17) + (aa1 >> 17) + (aa >> 17)) << 17; } uint32_t f2(uint32_t i2, uint32_t aa, uint32_t aa1) { return ((((i2 & -(1 << 17)) + aa) & -(1 << 17)) + aa1) & -(1 << 17); } uint32_t f2a(uint32_t i2, uint32_t aa, uint32_t aa1) { return ((((i2 & -(1 << 17)) + aa1) & -(1 << 17)) + aa) & -(1 << 17); } bool f3a(uint32_t i2, uint32_t aa, uint32_t aa1) { return f(i2, aa, aa1) == fa(i2, aa, aa1); } bool f3(uint32_t i2, uint32_t aa, uint32_t aa1) { return f(i2, aa, aa1) == f2a(i2, aa, aa1); } ``` GCC is able to optimize f3a but clang/LLVM does not.