https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106322
--- Comment #18 from Mathieu Malaterre <malat at debian dot org> --- Brushed-up example (with Makefile): % more Makefile bytes.cc demo.cc :::::::::::::: Makefile :::::::::::::: CXXFLAGS := -O2 demo: demo.o bytes.o $(CXX) $(CXXFLAGS) -o $@ $^ -lhwy clean: rm -f bytes.o demo.o :::::::::::::: bytes.cc :::::::::::::: #include <cstring> bool BytesEqual2(const void *bytes1, const void *bytes2, const size_t size) { return memcmp(bytes1, bytes2, size) == 0; } :::::::::::::: demo.cc :::::::::::::: #include "hwy/aligned_allocator.h" #include "hwy/highway.h" #include <cstring> bool BytesEqual2(const void *p1, const void *p2, const size_t size); template <class D, class V> void AssertVecEqual2(D d, const uint16_t *expected, const V &actual) { const size_t N = 2; auto actual_lanes = hwy::AllocateAligned<uint16_t>(N); Store(actual, d, actual_lanes.get()); const uint8_t *expected_array = reinterpret_cast<const uint8_t *>(expected); const uint8_t *actual_array = reinterpret_cast<const uint8_t *>(actual_lanes.get()); for (size_t i = 0; i < N; ++i) { const uint8_t *expected_ptr = expected_array + i * 2; const uint8_t *actual_ptr = actual_array + i * 2; #if 1 // trigger bug if (!BytesEqual2(expected_ptr, actual_ptr, 2)) { #else // no bug if (std::memcmp(expected_ptr, actual_ptr, 2) != 0) { #endif abort(); } } } int main() { hwy::N_EMU128::FixedTag<uint16_t, 2> d; const size_t N = 2; hwy::AlignedFreeUniquePtr<uint16_t[]> in_lanes = hwy::AllocateAligned<uint16_t>(N); uint16_t expected_lanes[2]; in_lanes[0] = 65535; in_lanes[1] = 32767; expected_lanes[0] = 65534; expected_lanes[1] = 16383; hwy::N_EMU128::Vec128<uint16_t, 2> v = Load(d, in_lanes.get()); hwy::N_EMU128::Vec128<uint16_t, 2> actual = MulHigh(v, v); AssertVecEqual2(d, expected_lanes, actual); }