pinsrw is available for both reg and mem operand under sse2. pextrw requires sse4.1 for mem operands.
The patch change attr "isa" for pinsrw mem alternative from sse4_noavx to noavx, will enable below optimization. - movzwl (%rdi), %eax pxor %xmm1, %xmm1 - pinsrw $0, %eax, %xmm1 + pinsrw $0, (%rdi), %xmm1 movdqa %xmm1, %xmm0 Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Ok for trunk? gcc/ChangeLog: PR target/105066 * config/i386/sse.md (vec_set<mode>_0): Change attr "isa" of alternative 4 from sse4_noavx to noavx. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105066.c: New test. --- gcc/config/i386/sse.md | 4 ++-- gcc/testsuite/gcc.target/i386/pr105066.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr105066.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a9e18d38323..27e9629f4b0 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -10617,9 +10617,9 @@ (define_insn "vec_set<mode>_0" [(set (attr "isa") (cond [(eq_attr "alternative" "0,1,2") (const_string "avx512fp16") - (eq_attr "alternative" "3") + (eq_attr "alternative" "3,4") (const_string "noavx") - (eq_attr "alternative" "4,5,6") + (eq_attr "alternative" "5,6") (const_string "sse4_noavx") (eq_attr "alternative" "7,8,9") (const_string "avx") diff --git a/gcc/testsuite/gcc.target/i386/pr105066.c b/gcc/testsuite/gcc.target/i386/pr105066.c new file mode 100644 index 00000000000..c5c5b9e12de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105066.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mno-sse4.1" } */ +/* { dg-final { scan-assembler-not "movzwl" } } */ +/* { dg-final { scan-assembler {(?n)pinsrw[ \t]+\$0.*\(%} } } */ + +#include <immintrin.h> + +__m128i load16(void *p){ + return _mm_loadu_si16(p); +} -- 2.18.1