https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106470
--- Comment #3 from Andrew Pinski <pinskia at gcc dot gnu.org> --- The other fix is to use _mm256_extract_epi16. E.g. inline unsigned short extract_epi16(__m256i v, int pos) { switch(pos){ case 0: return _mm256_extract_epi16(v, 0); case 1: return _mm256_extract_epi16(v, 1); case 2: return _mm256_extract_epi16(v, 2); case 3: return _mm256_extract_epi16(v, 3); case 4: return _mm256_extract_epi16(v, 4); case 5: return _mm256_extract_epi16(v, 5); case 6: return _mm256_extract_epi16(v, 6); case 7: return _mm256_extract_epi16(v, 7); case 8: return _mm256_extract_epi16(v, 8); case 9: return _mm256_extract_epi16(v, 9); case 10: return _mm256_extract_epi16(v, 10); case 11: return _mm256_extract_epi16(v, 11); case 12: return _mm256_extract_epi16(v, 12); case 13: return _mm256_extract_epi16(v, 13); case 14: return _mm256_extract_epi16(v, 14); case 15: return _mm256_extract_epi16(v, 15); } return 0; } ... for (size_t i = 0; i < 16; i++) { printf(" %04x", extract_epi16(tmp, i)); }