https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121634
--- Comment #2 from Xi Ruoyao <xry111 at gcc dot gnu.org> --- typedef short v8i16 __attribute__((vector_size(16))); typedef long __m128i __attribute__((__vector_size__(16))); __m128i __lsx_vmaddwod_w_h__1, WidenMulPairwiseAdd___trans_tmp_2; template <typename, int> using CappedTag = int; struct Vec128 { __m128i raw; }; Vec128 Zero(int); template <class D> using VFromD = decltype(Zero(D())); VFromD<int> WidenMulPairwiseAdd(Vec128 a, Vec128 b) { WidenMulPairwiseAdd___trans_tmp_2 = (__m128i)__builtin_lsx_vmaddwod_w_h( (__attribute__(( __vector_size__(4 * sizeof(int)))) int)__lsx_vmaddwod_w_h__1, (v8i16)a.raw, (v8i16)b.raw); return {WidenMulPairwiseAdd___trans_tmp_2}; } template <class D> using Vec = decltype(Zero(D())); struct TestWidenMulPairwiseAdd { void operator()(short, int dn) { Vec<decltype(dn)> bf0 = WidenMulPairwiseAdd(bf0, bf0); } }; template <int kMul> struct ForeachCappedR { static void Do(int, int) { CappedTag<short, kMul> d; TestWidenMulPairwiseAdd()(short(), d); } }; template <int kPow2 = 1> struct ForShrinkableVectors { template <typename T> void operator()(T) { int kMaxCapped; ForeachCappedR<kPow2>::Do(kPow2, kMaxCapped); } }; void TestAllWidenMulPairwiseAdd() { ForShrinkableVectors()(short()); }