On 10/24/18, Uros Bizjak <ubiz...@gmail.com> wrote:
> On Wed, Oct 24, 2018 at 1:51 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>>
>> Hi Uros,
>>
>> Can you take a look at this?
>
> Many pmov(s|z)x instructions with memory operands are modelled in a wrong
> way.
>
> For example:
>
> (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
>   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
>     (any_extend:V8HI
>       (vec_select:V8QI
>         (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
>         (parallel [(const_int 0) (const_int 1)
>                (const_int 2) (const_int 3)
>                (const_int 4) (const_int 5)
>                (const_int 6) (const_int 7)]))))]
>
> should be defind for memory operands as:
>
> (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
>   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
>     (any_extend:V8HI
>       (match_operand:V8QI "memory_operand" "m,m,m")))]
>
> The insn, as is currently modelled, implies 128 bit read from memory,
> which is not the case. -masm-intel will fail.
>
> The correct solution would be to use a different pattern for memory
> operands (similar to the one above), and handle simplification of
> vec_select part in a generic way. Please also note that V8QI implies
> MMX registers, so there is danger of generating an unwanted move to
> MMX reg, which will disable x87 registers.
>

Since vectorizer works only with XMM and YMM registers, wee can't
change existing patterns.  As you pointed out that V8QI might trigger
MMX registers.  New patterns must match the existing patterns.  Otherwise
combiner won't work.   This is the updated patch to handle v8qi to v8si2 and
v4hi to v4di2.


-- 
H.J.
From cddd242a3465301a6488a30368cb0d5fef11a550 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Sat, 15 Sep 2018 20:54:42 -0700
Subject: [PATCH] x86: Add pmovzx/pmovsx patterns with SI/DI operands

Add pmovzx/pmovsx patterns with SI and DI operands for pmovzx/pmovsx
instructions which only read the low 4 or 8 bytes from the source.

The new patterns allow 8 byte register operands so that they work with
__m64 arguments passed in XMM registers.  If we don't need to handle
__m64 arguments, we can only allow memory operands.

gcc/

	PR target/87317
	* config/i386/sse.md (*sse4_1_<code>v8qiv8hi2<mask_name>): New
	pattern.
	(*avx2_<code>v8qiv8si2<mask_name>): Likewise.
	(*sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
	(*sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
	(*avx2_<code>v4hiv4di2<mask_name>): Likewise.
	(*sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
	(*sse4_1_<code>v2siv2di2<mask_name>): Likewise.

gcc/testsuite/

	PR target/87317
	* gcc.target/i386/pr87317-1.c: New file.
	* gcc.target/i386/pr87317-2.c: Likewise.
	* gcc.target/i386/pr87317-3.c: Likewise.
	* gcc.target/i386/pr87317-4.c: Likewise.
	* gcc.target/i386/pr87317-5.c: Likewise.
	* gcc.target/i386/pr87317-6.c: Likewise.
	* gcc.target/i386/pr87317-7.c: Likewise.
	* gcc.target/i386/pr87317-8.c: Likewise.
	* gcc.target/i386/pr87317-9.c: Likewise.
	* gcc.target/i386/pr87317-10.c: Likewise.
	* gcc.target/i386/pr87317-11.c: Likewise.
	* gcc.target/i386/pr87317-12.c: Likewise.
	* gcc.target/i386/pr87317-13.c: Likewise.
	* gcc.target/i386/pr87317-14.c: Likewise.
	* gcc.target/i386/pr87317-15.c: Likewise.

xx
---
 gcc/config/i386/sse.md                     | 134 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr87317-1.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-10.c |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-11.c |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-12.c |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-13.c |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-14.c |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-15.c |  22 ++++
 gcc/testsuite/gcc.target/i386/pr87317-2.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-3.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-4.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-5.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-6.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-7.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-8.c  |  13 ++
 gcc/testsuite/gcc.target/i386/pr87317-9.c  |  13 ++
 16 files changed, 338 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr87317-9.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ee73e1fdf80..76dd7bc7168 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15891,6 +15891,26 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>"
+  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
+	(any_extend:V8HI
+	  (vec_select:V8QI
+	    (subreg:V16QI
+	      (vec_concat:V2DI
+	        (match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+		(const_int 0)) 0)
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
@@ -15917,6 +15937,25 @@
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
+(define_insn "*avx2_<code>v8qiv8si2<mask_name>"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(any_extend:V8SI
+	  (vec_select:V8QI
+	    (subreg:V16QI
+	      (vec_concat:V2DI
+	        (match_operand:DI 1 "nonimmediate_operand" "vm")
+		(const_int 0)) 0)
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX2 && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "OI")])
+
 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
 	(any_extend:V4SI
@@ -15932,6 +15971,28 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*sse4_1_<code>v4qiv4si2<mask_name>"
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+	(any_extend:V4SI
+	  (vec_select:V4QI
+	    (subreg:V16QI
+	      (vec_merge:V4SI
+	        (vec_duplicate:V4SI
+		  (match_operand:SI 1 "memory_operand" "m,*m,m"))
+		(const_vector:V4SI
+		   [(const_int 0) (const_int 0)
+		    (const_int 0) (const_int 0)])
+		(const_int 1)) 0)
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
@@ -15968,6 +16029,24 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*sse4_1_<code>v4hiv4si2<mask_name>"
+  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
+	(any_extend:V4SI
+	  (vec_select:V4HI
+	    (subreg:V8HI
+	      (vec_concat:V2DI
+		(match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+		(const_int 0)) 0)
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
@@ -16035,6 +16114,23 @@
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
+(define_insn "*avx2_<code>v4hiv4di2<mask_name>"
+  [(set (match_operand:V4DI 0 "register_operand" "=v")
+	(any_extend:V4DI
+	  (vec_select:V4HI
+	    (subreg:V8HI
+	      (vec_concat:V2DI
+	        (match_operand:DI 1 "nonimmediate_operand" "vm")
+		(const_int 0)) 0)
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX2 && <mask_avx512vl_condition>"
+  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "OI")])
+
 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
   [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
 	(any_extend:V2DI
@@ -16049,6 +16145,27 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*sse4_1_<code>v2hiv2di2<mask_name>"
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+	(any_extend:V2DI
+	  (vec_select:V2HI
+	    (subreg:V8HI
+	      (vec_merge:V4SI
+	        (vec_duplicate:V4SI
+	          (match_operand:SI 1 "memory_operand" "m,*m,m"))
+		(const_vector:V4SI
+		   [(const_int 0) (const_int 0)
+		    (const_int 0) (const_int 0)])
+		(const_int 1)) 0)
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
@@ -16084,6 +16201,23 @@
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
+(define_insn "*sse4_1_<code>v2siv2di2<mask_name>"
+  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
+	(any_extend:V2DI
+	  (vec_select:V2SI
+	    (subreg:V4SI
+	      (vec_concat:V2DI
+		(match_operand:DI 1 "nonimmediate_operand" "Yrm,*xm,vm")
+		(const_int 0)) 0)
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
+  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,maybe_evex")
+   (set_attr "mode" "TI")])
+
 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
 ;; setting FLAGS_REG. But it is not a really compare instruction.
 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-1.c b/gcc/testsuite/gcc.target/i386/pr87317-1.c
new file mode 100644
index 00000000000..91f00368293
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+  data = _mm_cvtepu8_epi16(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-10.c b/gcc/testsuite/gcc.target/i386/pr87317-10.c
new file mode 100644
index 00000000000..99c657e9df8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-10.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+  data = _mm_cvtepu8_epi32(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-11.c b/gcc/testsuite/gcc.target/i386/pr87317-11.c
new file mode 100644
index 00000000000..a170f66166e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-11.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
+  __m256i x = _mm256_cvtepu16_epi64(data);
+  _mm256_storeu_si256((__m256i*)dst, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-12.c b/gcc/testsuite/gcc.target/i386/pr87317-12.c
new file mode 100644
index 00000000000..5c72f02cffe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-12.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
+  __m256i x = _mm256_cvtepu8_epi32(data);
+  _mm256_storeu_si256((__m256i*)dst, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-13.c b/gcc/testsuite/gcc.target/i386/pr87317-13.c
new file mode 100644
index 00000000000..e65ceb1ba51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-13.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+  __m128i y = _mm_movpi64_epi64(x);
+  __m256i z = _mm256_cvtepu8_epi32 (y);
+  _mm256_storeu_si256((__m256i*)dst, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-14.c b/gcc/testsuite/gcc.target/i386/pr87317-14.c
new file mode 100644
index 00000000000..81bb121e218
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-14.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+  __m128i y = _mm_movpi64_epi64(x);
+  __m256i z = _mm256_cvtepu16_epi64 (y);
+  _mm256_storeu_si256((__m256i*)dst, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-15.c b/gcc/testsuite/gcc.target/i386/pr87317-15.c
new file mode 100644
index 00000000000..1090966c4d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-15.c
@@ -0,0 +1,22 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -march=haswell" } */
+/* { dg-final { scan-assembler-times "vpmovsxwq" 1 } } */
+
+#include <immintrin.h>
+
+#define MAX 4
+
+long long int dst[MAX];
+short src[MAX];
+
+void
+foo (void)
+{
+  int i;
+  for (i = 0; i < MAX; i += 4)
+    {
+      __m128i data = _mm_cvtsi64_si128(*(long long int*)(src + i));
+      __m256i x = _mm256_cvtepi16_epi64(data);
+      _mm256_storeu_si256((__m256i*)(dst + i), x);
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-2.c b/gcc/testsuite/gcc.target/i386/pr87317-2.c
new file mode 100644
index 00000000000..e21f00334e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+  data = _mm_cvtepi16_epi32(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-3.c b/gcc/testsuite/gcc.target/i386/pr87317-3.c
new file mode 100644
index 00000000000..d4483f9c134
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+  data = _mm_cvtepi32_epi64(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-4.c b/gcc/testsuite/gcc.target/i386/pr87317-4.c
new file mode 100644
index 00000000000..dff24a9d657
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+  __m128i y = _mm_movpi64_epi64(x);
+  __m128i z = _mm_cvtepu8_epi16(y);
+  _mm_storeu_si128((__m128i*)dst, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-5.c b/gcc/testsuite/gcc.target/i386/pr87317-5.c
new file mode 100644
index 00000000000..574395894b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+  __m128i y = _mm_movpi64_epi64(x);
+  __m128i z = _mm_cvtepi16_epi32(y);
+  _mm_storeu_si128((__m128i*)dst, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-6.c b/gcc/testsuite/gcc.target/i386/pr87317-6.c
new file mode 100644
index 00000000000..9d27648d433
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, __m64 x)
+{
+  __m128i y = _mm_movpi64_epi64(x);
+  __m128i z = _mm_cvtepi32_epi64 (y);
+  _mm_storeu_si128((__m128i*)dst, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-7.c b/gcc/testsuite/gcc.target/i386/pr87317-7.c
new file mode 100644
index 00000000000..99c657e9df8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+  data = _mm_cvtepu8_epi32(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-8.c b/gcc/testsuite/gcc.target/i386/pr87317-8.c
new file mode 100644
index 00000000000..c688e3e6d08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-8.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmov(d|q)" } } */
+
+#include <immintrin.h>
+
+void
+f (void *dst, void *ptr)
+{
+  __m128i data = _mm_cvtsi32_si128(*(int*)ptr);
+  data = _mm_cvtepu16_epi64(data);
+  _mm_storeu_si128((__m128i*)dst, data);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr87317-9.c b/gcc/testsuite/gcc.target/i386/pr87317-9.c
new file mode 100644
index 00000000000..4311ed3ceb5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr87317-9.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=haswell" } */
+/* { dg-final { scan-assembler-not "vmovq" } } */
+
+#include <immintrin.h>
+
+int
+f (void *ptr)
+{
+  __m128i data = _mm_loadl_epi64((__m128i *)ptr);
+  data = _mm_cvtepu8_epi16(data);
+  return _mm_cvtsi128_si32(data);
+}
-- 
2.17.2

Reply via email to