https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115833
--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> --- typedef unsigned short v4hi __attribute__((vector_size(8))); typedef unsigned int v4si __attribute__((vector_size(16))); v4hi foo (unsigned short a, unsigned short b, unsigned short c, unsigned short d) { return (v4hi){a, b, c, d}; } v4hi bar (unsigned short a, unsigned short b, unsigned short c, unsigned short d) { return __builtin_convertvector ((v4si){a, b, c, d}, v4hi); } maybe not: foo: .LFB0: .cfi_startproc movzwl %cx, %ecx movzwl %dx, %edx movzwl %si, %esi movzwl %di, %edi salq $16, %rcx orq %rdx, %rcx salq $16, %rcx orq %rsi, %rcx salq $16, %rcx orq %rdi, %rcx movq %rcx, %xmm0 ret bar: .LFB1: .cfi_startproc movzwl %di, %eax movzwl %si, %esi movzwl %dx, %edx movzwl %cx, %ecx movd %eax, %xmm0 movd %edx, %xmm1 movd %ecx, %xmm3 movd %esi, %xmm4 punpckldq %xmm3, %xmm1 pxor %xmm2, %xmm2 punpckldq %xmm4, %xmm0 punpcklqdq %xmm1, %xmm0 movdqa %xmm0, %xmm1 punpcklwd %xmm2, %xmm0 punpckhwd %xmm2, %xmm1 movdqa %xmm0, %xmm2 punpckhwd %xmm1, %xmm2 punpcklwd %xmm1, %xmm0 punpcklwd %xmm2, %xmm0 ret though bar() looks like I expected in .optimized: <bb 2> [local count: 1073741824]: _1 = (unsigned int) a_5(D); _2 = (unsigned int) b_6(D); _3 = (unsigned int) c_7(D); _4 = (unsigned int) d_8(D); _9 = {_1, _2, _3, _4}; _12 = VEC_PACK_TRUNC_EXPR <_9, { 0, 0, 0, 0 }>; _13 = BIT_FIELD_REF <_12, 64, 0>; return _13; it's a little bit better with SSE4: bar: .LFB1: .cfi_startproc movzwl %di, %eax movzwl %dx, %edx movzwl %si, %esi movzwl %cx, %ecx movd %eax, %xmm1 movd %edx, %xmm0 pinsrd $1, %ecx, %xmm0 pinsrd $1, %esi, %xmm1 punpcklqdq %xmm0, %xmm1 pxor %xmm0, %xmm0 pblendw $85, %xmm1, %xmm0 pxor %xmm1, %xmm1 packusdw %xmm1, %xmm0 ret but pxor %xmm0, %xmm0 pblendw $85, %xmm1, %xmm0 pxor %xmm1, %xmm1 packusdw %xmm1, %xmm0 is a bit odd for the packing. Possibly the target lacks a truncv4siv4hi operation (thus the explicit zero vector). Possibly x86 lacks a pack-lowpart/pack-highpart insn.