pinsrw is available for both reg and mem operand under sse2.
pextrw requires sse4.1 for mem operands.

The patch change attr "isa" for pinsrw mem alternative from sse4_noavx
to noavx, will enable below optimization.

-        movzwl  (%rdi), %eax
         pxor    %xmm1, %xmm1
-        pinsrw  $0, %eax, %xmm1
+        pinsrw  $0, (%rdi), %xmm1
         movdqa  %xmm1, %xmm0

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

        PR target/105066
        * config/i386/sse.md (vec_set<mode>_0): Change attr "isa" of
        alternative 4 from sse4_noavx to noavx.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/pr105066.c: New test.
---
 gcc/config/i386/sse.md                   |  4 ++--
 gcc/testsuite/gcc.target/i386/pr105066.c | 10 ++++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105066.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a9e18d38323..27e9629f4b0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10617,9 +10617,9 @@ (define_insn "vec_set<mode>_0"
   [(set (attr "isa")
        (cond [(eq_attr "alternative" "0,1,2")
                 (const_string "avx512fp16")
-              (eq_attr "alternative" "3")
+              (eq_attr "alternative" "3,4")
                 (const_string "noavx")
-              (eq_attr "alternative" "4,5,6")
+              (eq_attr "alternative" "5,6")
                 (const_string "sse4_noavx")
               (eq_attr "alternative" "7,8,9")
                 (const_string "avx")
diff --git a/gcc/testsuite/gcc.target/i386/pr105066.c 
b/gcc/testsuite/gcc.target/i386/pr105066.c
new file mode 100644
index 00000000000..c5c5b9e12de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105066.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-sse4.1" } */
+/* { dg-final { scan-assembler-not "movzwl" } } */
+/* { dg-final { scan-assembler {(?n)pinsrw[ \t]+\$0.*\(%} } } */
+
+#include <immintrin.h>
+
+__m128i load16(void *p){
+    return _mm_loadu_si16(p);
+}
-- 
2.18.1

Reply via email to