https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107661
Bug ID: 107661 Summary: [13 Regression] lambdas get merged incorrectly in tempaltes, cause llvm-12 miscompilation Product: gcc Version: 13.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: middle-end Assignee: unassigned at gcc dot gnu.org Reporter: slyfox at gcc dot gnu.org Target Milestone: --- Created attachment 53888 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=53888&action=edit a.cc Initially observed the problem on llvm-12's test suite where 4 AMDGCN test fail: Failed Tests (4): LLVM :: CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll LLVM :: CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll LLVM :: CodeGen/AMDGPU/smem-war-hazard.mir LLVM :: CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll Digging deeper it looks liek llvm's class template<typename Ret, typename ...Params> class function_ref<Ret(Params...)> ... gets miscompiled in a very unusual way. I extracted smaller a.cc reproducer. It looks like as if gcc picked wrong (unused) lambda to inline into actually used code. Reproducing: $ ./gcc-13-HEAD/bin/gcc -Wall -O0 a.cc -o a $ ./gcc-13-HEAD/bin/gcc -Wall -O3 a.cc -o a ./bug_HEAD.bash: line 6: 1309437 Illegal instruction (core dumped) ./a $ ./gcc-13-HEAD/bin/gcc -Wall -O0 -DDISABLE_HACK a.cc -o a $ ./gcc-13-HEAD/bin/gcc -Wall -O3 -DDISABLE_HACK a.cc -o a $ ./gcc-13-HEAD/bin/gcc -v |& unnix Using built-in specs. COLLECT_GCC=/<<NIX>>/gcc-13.0.0/bin/gcc COLLECT_LTO_WRAPPER=/<<NIX>>/gcc-13.0.0/libexec/gcc/x86_64-unknown-linux-gnu/13.0.0/lto-wrapper Target: x86_64-unknown-linux-gnu Configured with: Thread model: posix Supported LTO compression algorithms: zlib gcc version 13.0.0 20221112 (experimental) (GCC) Full a.cc example (somewhat long, also attached): /// 'function_ref' is taken from llvm-12 as is without any modifications. /// The rest if severely maimed AMDGCN hasard verifier code. // How to break: // $ ./gcc-13-snap/bin/gcc -O3 a.cc -o a && ./a // Illegal instruction (core dumped) // $ ./gcc-13-snap/bin/gcc -O3 -DDISABLE_HACK a.cc -o a && ./a // <ok> #pragma GCC optimize "-O1" #pragma GCC optimize "-fipa-cp" #pragma GCC optimize "-fipa-cp-clone" // #define DISABLE_HACK 1 #include <cstdint> #include <limits> #include <type_traits> #include <utility> /// An efficient, type-erasing, non-owning reference to a callable. This is /// intended for use as the type of a function parameter that is not used /// after the function in question returns. /// /// This class does not own the callable, so it is not in general safe to store /// a function_ref. template<typename Fn> class function_ref; template<typename Ret, typename ...Params> class function_ref<Ret(Params...)> { Ret (*callback)(intptr_t callable, Params ...params) = nullptr; intptr_t callable; template<typename Callable> //__attribute__((noinline, noipa)) static Ret callback_fn(intptr_t callable, Params ...params) { return (*reinterpret_cast<Callable*>(callable))( std::forward<Params>(params)...); } public: __attribute__((noinline, noipa)) function_ref() = default; __attribute__((noinline, noipa)) function_ref(std::nullptr_t) {} template <typename Callable> //__attribute__((noinline, noipa)) function_ref( Callable &&callable, // This is not the copy-constructor. std::enable_if_t< !std::is_same<std::remove_cv_t<std::remove_reference_t<Callable>>, function_ref>::value> * = nullptr, // Functor must be callable and return a suitable type. std::enable_if_t<std::is_void<Ret>::value || std::is_convertible<decltype(std::declval<Callable>()( std::declval<Params>()...)), Ret>::value> * = nullptr) : callback(callback_fn<typename std::remove_reference<Callable>::type>), callable(reinterpret_cast<intptr_t>(&callable)) {} //__attribute__((noinline, noipa)) Ret operator()(Params ...params) const { return callback(callable, std::forward<Params>(params)...); } __attribute__((noinline, noipa)) explicit operator bool() const { return callback; } }; typedef int OI; typedef int OBB; typedef function_ref<bool(OI, int WaitStates)> IsExpiredFnT; typedef function_ref<bool(OI)> IsHazardFnT; __attribute__((noinline, noipa)) OI get_e( OBB MBB, OI I) { static int n = 0; switch (n++) { case 0: return I; case 1: return ++I; default: return I; } } __attribute__((noinline, noipa)) static OBB get_mbb_b(OBB MBB) { return MBB; } __attribute__((noinline, noipa)) static OBB get_mbb_e(OBB MBB) { static int n = 0; switch (n++) { case 0: return MBB + 1; default: return MBB; } } __attribute__((noinline)) static int getWaitStatesSince6(IsHazardFnT IsHazard, OBB MBB, OI I, int WaitStates, IsExpiredFnT IsExpired) { auto E = get_e(MBB, I); if (I != E) { WaitStates += 2; if (IsExpired(I, WaitStates)) return std::numeric_limits<int>::max(); } auto pri = get_mbb_b(MBB); auto pre = get_mbb_e(MBB); if (pri != pre) { OBB Pred = pri; getWaitStatesSince6(IsHazard, Pred, I, WaitStates, IsExpired); } return std::numeric_limits<int>::max(); } __attribute__((noinline)) // not a noclone static int getWaitStatesSince3(IsHazardFnT IsHazard, OI MI, IsExpiredFnT IsExpired) { return getWaitStatesSince6(IsHazard, 0, MI, 0, IsExpired); } __attribute__((noinline, noipa)) bool bug(OI MI) { auto IsHazardFn = [](OI I) __attribute__((noinline, noipa)) { return false; }; auto IsExpiredFn = [](OI MI, int) __attribute__((noinline, noipa)) { return true; }; ::getWaitStatesSince3(IsHazardFn, MI, IsExpiredFn); return true; } __attribute__((noinline, noipa)) int main() { bug(0); } #if defined(DISABLE_HACK) #else __attribute__((noinline, noipa)) int seemingly_unused_foo(IsHazardFnT IsHazard, int Limit, OI MI) { auto IsExpiredFn = [Limit] (OI, int WaitStates) { __builtin_trap(); return WaitStates >= Limit; }; return ::getWaitStatesSince3(IsHazard, MI, IsExpiredFn); } #endif