Hi,

For [PR 92658], commit
r11-485-gf6e40195ec3d3b402a5f6c58dbf359479bc4cbfa added missing
sign/zero extend expanders for several vector modes. But they use
<code> iterator and do not match standard name extendmn. This patch is
to fix the name to standard name for sign_extend.

Bootstrap and tested on x86-64 machine. This patch is pre-approved by
Uros, committed.

gcc/ChangeLog:

    * config/i386/i386.md (optab): New code attr.
    * config/i386/sse.md (<code>v32qiv32hi2): Rename to ...
    (<optab>v32qiv32hi2) ... this.
    (<code>v16qiv16hi2): Likewise.
    (<code>v8qiv8hi2): Likewise.
    (<code>v16qiv16si2): Likewise.
    (<code>v8qiv8si2): Likewise.
    (<code>v4qiv4si2): Likewise.
    (<code>v16hiv16si2): Likewise.
    (<code>v8hiv8si2): Likewise.
    (<code>v4hiv4si2): Likewise.
    (<code>v8qiv8di2): Likewise.
    (<code>v4qiv4di2): Likewise.
    (<code>v2qiv2di2): Likewise.
    (<code>v8hiv8di2): Likewise.
    (<code>v4hiv4di2): Likewise.
    (<code>v2hiv2di2): Likewise.
    (<code>v8siv8di2): Likewise.
    (<code>v4siv4di2): Likewise.
    (<code>v2siv2di2): Likewise.

gcc/testsuite/ChangeLog:

    * testsuite/gcc.target/i386/pr92658-avx2-2.c: New test.
    * testsuite/gcc.target/i386/pr92658-avx512bw-2.c: Likewise.
    * testsuite/gcc.target/i386/pr92658-sse4-2.c: Likewise.

-- 
Regards,

Hongyu, Wang
From 053ac359e7317aac757e45f272071b93eaab02ab Mon Sep 17 00:00:00 2001
From: Hongyu Wang <hongyu.w...@intel.com>
Date: Fri, 25 Dec 2020 09:25:39 +0800
Subject: [PATCH] Fix standard name for zero/sign extend expanders

gcc/ChangeLog:

	* config/i386/i386.md (optab): New code attr.
	* config/i386/sse.md (<code>v32qiv32hi2): Rename to ...
	(<optab>v32qiv32hi2) ... this.
	(<code>v16qiv16hi2): Likewise.
	(<code>v8qiv8hi2): Likewise.
	(<code>v16qiv16si2): Likewise.
	(<code>v8qiv8si2): Likewise.
	(<code>v4qiv4si2): Likewise.
	(<code>v16hiv16si2): Likewise.
	(<code>v8hiv8si2): Likewise.
	(<code>v4hiv4si2): Likewise.
	(<code>v8qiv8di2): Likewise.
	(<code>v4qiv4di2): Likewise.
	(<code>v2qiv2di2): Likewise.
	(<code>v8hiv8di2): Likewise.
	(<code>v4hiv4di2): Likewise.
	(<code>v2hiv2di2): Likewise.
	(<code>v8siv8di2): Likewise.
	(<code>v4siv4di2): Likewise.
	(<code>v2siv2di2): Likewise.

gcc/testsuite/ChangeLog:

	* testsuite/gcc.target/i386/pr92658-avx2-2.c: New test.
	* testsuite/gcc.target/i386/pr92658-avx512bw-2.c: Likewise.
	* testsuite/gcc.target/i386/pr92658-sse4-2.c: Likewise.
---
 gcc/config/i386/i386.md                       |   4 +
 gcc/config/i386/sse.md                        |  36 +--
 .../gcc.target/i386/pr92658-avx2-2.c          | 192 ++++++++++++
 .../gcc.target/i386/pr92658-avx512bw-2.c      | 280 ++++++++++++++++++
 .../gcc.target/i386/pr92658-sse4-2.c          | 148 +++++++++
 5 files changed, 642 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-avx2-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-avx512bw-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 39c0eb3f915..d7cd3df995c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -980,6 +980,10 @@ (define_code_attr absneg_mnemonic [(abs "fabs") (neg "fchs")])
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
 
+;; Used for representing standard name for extend
+(define_code_attr optab [(sign_extend "extend")
+			 (zero_extend "zero_extend")])
+
 ;; Prefix for insn menmonic.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
 			     (div "i") (udiv "")])
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 94bb4457e39..141a99d254e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17502,7 +17502,7 @@ (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
-(define_expand "<code>v16qiv16hi2"
+(define_expand "<optab>v16qiv16hi2"
   [(set (match_operand:V16HI 0 "register_operand")
 	(any_extend:V16HI
 	  (match_operand:V16QI 1 "nonimmediate_operand")))]
@@ -17519,7 +17519,7 @@ (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "<code>v32qiv32hi2"
+(define_expand "<optab>v32qiv32hi2"
   [(set (match_operand:V32HI 0 "register_operand")
 	(any_extend:V32HI
 	  (match_operand:V32QI 1 "nonimmediate_operand")))]
@@ -17574,7 +17574,7 @@ (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
 	(any_extend:V8HI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
 
-(define_expand "<code>v8qiv8hi2"
+(define_expand "<optab>v8qiv8hi2"
   [(set (match_operand:V8HI 0 "register_operand")
 	(any_extend:V8HI
 	  (match_operand:V8QI 1 "nonimmediate_operand")))]
@@ -17598,7 +17598,7 @@ (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "<code>v16qiv16si2"
+(define_expand "<optab>v16qiv16si2"
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand")))]
@@ -17651,7 +17651,7 @@ (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
 	(any_extend:V8SI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
 
-(define_expand "<code>v8qiv8si2"
+(define_expand "<optab>v8qiv8si2"
   [(set (match_operand:V8SI 0 "register_operand")
 	(any_extend:V8SI
 	  (match_operand:V8QI 1 "nonimmediate_operand")))]
@@ -17714,7 +17714,7 @@ (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
 	(any_extend:V4SI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
 
-(define_expand "<code>v4qiv4si2"
+(define_expand "<optab>v4qiv4si2"
   [(set (match_operand:V4SI 0 "register_operand")
 	(any_extend:V4SI
 	  (match_operand:V4QI 1 "nonimmediate_operand")))]
@@ -17738,7 +17738,7 @@ (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "<code>v16hiv16si2"
+(define_expand "<optab>v16hiv16si2"
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand")))]
@@ -17755,7 +17755,7 @@ (define_insn "avx2_<code>v8hiv8si2<mask_name>"
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
-(define_expand "<code>v8hiv8si2"
+(define_expand "<optab>v8hiv8si2"
   [(set (match_operand:V8SI 0 "register_operand")
 	(any_extend:V8SI
 	  (match_operand:V8HI 1 "nonimmediate_operand")))]
@@ -17806,7 +17806,7 @@ (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
 	(any_extend:V4SI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
 
-(define_expand "<code>v4hiv4si2"
+(define_expand "<optab>v4hiv4si2"
   [(set (match_operand:V4SI 0 "register_operand")
 	(any_extend:V4SI
 	  (match_operand:V4HI 1 "nonimmediate_operand")))]
@@ -17864,7 +17864,7 @@ (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
 	(any_extend:V8DI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
 
-(define_expand "<code>v8qiv8di2"
+(define_expand "<optab>v8qiv8di2"
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8QI 1 "nonimmediate_operand")))]
@@ -17925,7 +17925,7 @@ (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
 	(any_extend:V4DI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
 
-(define_expand "<code>v4qiv4di2"
+(define_expand "<optab>v4qiv4di2"
   [(set (match_operand:V4DI 0 "register_operand")
 	(any_extend:V4DI
 	  (match_operand:V4QI 1 "nonimmediate_operand")))]
@@ -17953,7 +17953,7 @@ (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
    (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "TI")])
 
-(define_expand "<code>v2qiv2di2"
+(define_expand "<optab>v2qiv2di2"
   [(set (match_operand:V2DI 0 "register_operand")
 	(any_extend:V2DI
 	  (match_operand:V2QI 1 "register_operand")))]
@@ -17974,7 +17974,7 @@ (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "<code>v8hiv8di2"
+(define_expand "<optab>v8hiv8di2"
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand")))]
@@ -18023,7 +18023,7 @@ (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
 	(any_extend:V4DI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
 
-(define_expand "<code>v4hiv4di2"
+(define_expand "<optab>v4hiv4di2"
   [(set (match_operand:V4DI 0 "register_operand")
 	(any_extend:V4DI
 	  (match_operand:V4HI 1 "nonimmediate_operand")))]
@@ -18084,7 +18084,7 @@ (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
 	(any_extend:V2DI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
 
-(define_expand "<code>v2hiv2di2"
+(define_expand "<optab>v2hiv2di2"
   [(set (match_operand:V2DI 0 "register_operand")
 	(any_extend:V2DI
 	  (match_operand:V2HI 1 "nonimmediate_operand")))]
@@ -18108,7 +18108,7 @@ (define_insn "avx512f_<code>v8siv8di2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "<code>v8siv8di2"
+(define_expand "<optab>v8siv8di2"
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
@@ -18125,7 +18125,7 @@ (define_insn "avx2_<code>v4siv4di2<mask_name>"
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "OI")])
 
-(define_expand "<code>v4siv4di2"
+(define_expand "<optab>v4siv4di2"
   [(set (match_operand:V4DI 0 "register_operand" "=v")
 	(any_extend:V4DI
 	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
@@ -18174,7 +18174,7 @@ (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
 	(any_extend:V2DI (match_dup 1)))]
   "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
 
-(define_expand "<code>v2siv2di2"
+(define_expand "<optab>v2siv2di2"
   [(set (match_operand:V2DI 0 "register_operand")
 	(any_extend:V2DI
 	  (match_operand:V2SI 1 "nonimmediate_operand")))]
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx2-2.c b/gcc/testsuite/gcc.target/i386/pr92658-avx2-2.c
new file mode 100644
index 00000000000..7aad85819a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx2-2.c
@@ -0,0 +1,192 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2" } */
+
+typedef char v32qi __attribute__((vector_size (32)));
+typedef short v16hi __attribute__((vector_size (32)));
+typedef int v8si __attribute__((vector_size (32)));
+typedef long long v4di __attribute__((vector_size (32)));
+
+void
+foo_s8_s16 (v16hi * dst, v32qi * __restrict src)
+{
+  short tem[16];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16hi *) tem;
+}
+
+void
+bar_s8_s16 (v16hi * dst, v32qi src)
+{
+  short tem[16];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  tem[8] = src[8];
+  tem[9] = src[9];
+  tem[10] = src[10];
+  tem[11] = src[11];
+  tem[12] = src[12];
+  tem[13] = src[13];
+  tem[14] = src[14];
+  tem[15] = src[15];
+  dst[0] = *(v16hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbw" 2 } } */
+
+void
+foo_s8_s32 (v8si * dst, v32qi * __restrict src)
+{
+  int tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8si *) tem;
+}
+
+void
+bar_s8_s32 (v8si * dst, v32qi src)
+{
+  int tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbd" 2 } } */
+
+void
+foo_s8_s64 (v4di * dst, v32qi * __restrict src)
+{
+  long long tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4di *) tem;
+}
+
+void
+bar_s8_s64 (v4di * dst, v32qi src)
+{
+  long long tem[4];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbq" 2 { xfail *-*-* } } } */
+
+void
+foo_s16_s32 (v8si * dst, v16hi * __restrict src)
+{
+  int tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8si *) tem;
+}
+
+void
+bar_s16_s32 (v8si * dst, v16hi src)
+{
+  int tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwd" 2 } } */
+
+void
+foo_s16_s64 (v4di * dst, v16hi * __restrict src)
+{
+  long long tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4di *) tem;
+}
+
+void
+bar_s16_s64 (v4di * dst, v16hi src)
+{
+  long long tem[4];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwq" 2 } } */
+
+void
+foo_s32_s64 (v4di * dst, v8si * __restrict src)
+{
+  long long tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4di *) tem;
+}
+
+void
+bar_s32_s64 (v4di * dst, v8si src)
+{
+  long long tem[4];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  dst[0] = *(v4di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxdq" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-2.c b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-2.c
new file mode 100644
index 00000000000..811f21aa917
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-avx512bw-2.c
@@ -0,0 +1,280 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mavx512bw" } */
+
+typedef char v64qi __attribute__((vector_size (64)));
+typedef short v32hi __attribute__((vector_size (64)));
+typedef int v16si __attribute__((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+void
+foo_s8_s16 (v32hi * dst, v64qi * __restrict src)
+{
+  short tem[32];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  tem[16] = (*src)[16];
+  tem[17] = (*src)[17];
+  tem[18] = (*src)[18];
+  tem[19] = (*src)[19];
+  tem[20] = (*src)[20];
+  tem[21] = (*src)[21];
+  tem[22] = (*src)[22];
+  tem[23] = (*src)[23];
+  tem[24] = (*src)[24];
+  tem[25] = (*src)[25];
+  tem[26] = (*src)[26];
+  tem[27] = (*src)[27];
+  tem[28] = (*src)[28];
+  tem[29] = (*src)[29];
+  tem[30] = (*src)[30];
+  tem[31] = (*src)[31];
+  dst[0] = *(v32hi *) tem;
+}
+
+void
+bar_s8_s16 (v32hi * dst, v64qi src)
+{
+  short tem[32];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  tem[8] = src[8];
+  tem[9] = src[9];
+  tem[10] = src[10];
+  tem[11] = src[11];
+  tem[12] = src[12];
+  tem[13] = src[13];
+  tem[14] = src[14];
+  tem[15] = src[15];
+  tem[16] = src[16];
+  tem[17] = src[17];
+  tem[18] = src[18];
+  tem[19] = src[19];
+  tem[20] = src[20];
+  tem[21] = src[21];
+  tem[22] = src[22];
+  tem[23] = src[23];
+  tem[24] = src[24];
+  tem[25] = src[25];
+  tem[26] = src[26];
+  tem[27] = src[27];
+  tem[28] = src[28];
+  tem[29] = src[29];
+  tem[30] = src[30];
+  tem[31] = src[31];
+  dst[0] = *(v32hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbw" 2 } } */
+
+void
+foo_s8_s32 (v16si * dst, v64qi * __restrict src)
+{
+  int tem[16];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16si *) tem;
+}
+
+void
+bar_s8_s32 (v16si * dst, v64qi src)
+{
+  int tem[16];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  tem[8] = src[8];
+  tem[9] = src[9];
+  tem[10] = src[10];
+  tem[11] = src[11];
+  tem[12] = src[12];
+  tem[13] = src[13];
+  tem[14] = src[14];
+  tem[15] = src[15];
+  dst[0] = *(v16si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbd" 2 } } */
+
+void
+foo_s8_s64 (v8di * dst, v64qi * __restrict src)
+{
+  long long tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8di *) tem;
+}
+
+void
+bar_s8_s64 (v8di * dst, v64qi src)
+{
+  long long tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbq" 2 } } */
+
+void
+foo_s16_s32 (v16si * dst, v32hi * __restrict src)
+{
+  int tem[16];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  tem[8] = (*src)[8];
+  tem[9] = (*src)[9];
+  tem[10] = (*src)[10];
+  tem[11] = (*src)[11];
+  tem[12] = (*src)[12];
+  tem[13] = (*src)[13];
+  tem[14] = (*src)[14];
+  tem[15] = (*src)[15];
+  dst[0] = *(v16si *) tem;
+}
+
+void
+bar_s16_s32 (v16si * dst, v32hi src)
+{
+  int tem[16];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  tem[8] = src[8];
+  tem[9] = src[9];
+  tem[10] = src[10];
+  tem[11] = src[11];
+  tem[12] = src[12];
+  tem[13] = src[13];
+  tem[14] = src[14];
+  tem[15] = src[15];
+  dst[0] = *(v16si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwd" 2 } } */
+
+void
+foo_s16_s64 (v8di * dst, v32hi * __restrict src)
+{
+  long long tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8di *) tem;
+}
+
+void
+bar_s16_s64 (v8di * dst, v32hi src)
+{
+  long long tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwq" 2 } } */
+
+void
+foo_s32_s64 (v8di * dst, v16si * __restrict src)
+{
+  long long tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8di *) tem;
+}
+
+void
+bar_s32_s64 (v8di * dst, v16si src)
+{
+  long long tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxdq" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c
new file mode 100644
index 00000000000..ca174ce4abc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92658-sse4-2.c
@@ -0,0 +1,148 @@
+/* PR target/92658 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4.1" } */
+
+typedef char v16qi __attribute__((vector_size (16)));
+typedef short v8hi __attribute__((vector_size (16)));
+typedef int v4si __attribute__((vector_size (16)));
+typedef long long v2di __attribute__((vector_size (16)));
+
+void
+foo_s8_s16 (v8hi * dst, v16qi * __restrict src)
+{
+  short tem[8];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  tem[4] = (*src)[4];
+  tem[5] = (*src)[5];
+  tem[6] = (*src)[6];
+  tem[7] = (*src)[7];
+  dst[0] = *(v8hi *) tem;
+}
+
+void
+bar_s8_s16 (v8hi * dst, v16qi src)
+{
+  short tem[8];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  tem[4] = src[4];
+  tem[5] = src[5];
+  tem[6] = src[6];
+  tem[7] = src[7];
+  dst[0] = *(v8hi *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbw" 2 } } */
+
+void
+foo_s8_s32 (v4si * dst, v16qi * __restrict src)
+{
+  int tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4si *) tem;
+}
+
+void
+bar_s8_s32 (v4si * dst, v16qi src)
+{
+  int tem[4];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbd" 2 { xfail *-*-* } } } */
+
+void
+foo_s8_s64 (v2di * dst, v16qi * __restrict src)
+{
+  long long tem[2];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v2di *) tem;
+}
+
+void
+bar_s8_s64 (v2di * dst, v16qi src)
+{
+  long long tem[2];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxbq" 2 { xfail *-*-* } } } */
+
+void
+foo_s16_s32 (v4si * dst, v8hi * __restrict src)
+{
+  int tem[4];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  tem[2] = (*src)[2];
+  tem[3] = (*src)[3];
+  dst[0] = *(v4si *) tem;
+}
+
+void
+bar_s16_s32 (v4si * dst, v8hi src)
+{
+  int tem[4];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  tem[2] = src[2];
+  tem[3] = src[3];
+  dst[0] = *(v4si *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwd" 2 } } */
+
+void
+foo_s16_s64 (v2di * dst, v8hi * __restrict src)
+{
+  long long tem[2];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v2di *) tem;
+}
+
+void
+bar_s16_s64 (v2di * dst, v8hi src)
+{
+  long long tem[2];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxwq" 2 { xfail *-*-* } } } */
+
+void
+foo_s32_s64 (v2di * dst, v4si * __restrict src)
+{
+  long long tem[2];
+  tem[0] = (*src)[0];
+  tem[1] = (*src)[1];
+  dst[0] = *(v2di *) tem;
+}
+
+void
+bar_s32_s64 (v2di * dst, v4si src)
+{
+  long long tem[2];
+  tem[0] = src[0];
+  tem[1] = src[1];
+  dst[0] = *(v2di *) tem;
+}
+
+/* { dg-final { scan-assembler-times "pmovsxdq" 2 } } */
-- 
2.29.2

Reply via email to