https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111366
--- Comment #15 from Mathieu Malaterre <malat at debian dot org> ---
For some reason the no-htm flag does not seems to work in my case:
% /usr/bin/c++ -O1 -mcpu=power8 -flto=auto -c skeleton_test.cc
skeleton_test.cc: In member function 'TestFloorLog2::operator()<int,
detail::CappedTagChecker<int, 1u, 0> >(int, detail::CappedTagChecker<int, 1u,
0>)void':
skeleton_test.cc:5:44: error: inlining failed in call to 'always_inline'
'hwy::PreventElision(int)': target specific option mismatch
5 | inline __attribute__((always_inline)) void PreventElision(int output) {
| ^~~~~~~~~~~~~~
skeleton_test.cc:36:26: note: called from here
36 | hwy::PreventElision(sum);
| ~~~~~~~~~~~~~~~~~~~^~~~~
with:
% cat skeleton_test.cc
#include <stddef.h>
#define HWY_PRAGMA(tokens) _Pragma(#tokens)
namespace hwy {
#define HWY_PUSH_ATTRIBUTES(targets_str) HWY_PRAGMA(GCC target targets_str)
inline __attribute__((always_inline)) void PreventElision(int output) {
asm("" : "+r"(output) : : "memory");
}
} // namespace hwy
#if HWY_TARGET == HWY_PPC10
#define HWY_BEFORE_NAMESPACE() HWY_PUSH_ATTRIBUTES(",cpu=power10,no-htm")
#endif
HWY_BEFORE_NAMESPACE() namespace detail {
template <typename, size_t, int> struct CappedTagChecker {};
}
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTag = detail::CappedTagChecker<T, kLimit, kPow2>;
template <class D> size_t Lanes(D);
template <typename, size_t, size_t kMinArg, class Test> struct ForeachCappedR {
static void Do(size_t min_lanes, size_t max_lanes) {
CappedTag<int, kMinArg> d;
Test()(int(), d);
Do(min_lanes, max_lanes);
}
};
template <class Test> struct ForPartialVectors {
template <typename T> void operator()(T t) {
(void)t;
ForeachCappedR<T, 1, 1, Test>::Do(1, 1);
}
};
struct TestFloorLog2 {
template <class T, class DF> void operator()(T, DF df) {
size_t count = Lanes(df);
int sum;
for (size_t i; count; ++i)
hwy::PreventElision(sum);
}
};
void TestAllFloorLog2() { ForPartialVectors<TestFloorLog2>()(float()); }