diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpaddb-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpaddb-3.c
new file mode 100644
index 0000000..c99c967
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpaddb-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+char a[SIZE];
+char b[SIZE];
+char c[SIZE];
+volatile char c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_paddb ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] + b[i];
+}
+
+void
+check_paddb ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] + b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_paddb ();
+    check_paddb ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (char) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpaddb\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpaddd-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpaddd-3.c
new file mode 100644
index 0000000..ab6b658
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpaddd-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+int a[SIZE];
+int b[SIZE];
+int c[SIZE];
+volatile int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_paddd ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] + b[i];
+}
+
+void
+check_paddd ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] + b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_paddd ();
+    check_paddd ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpaddd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpaddq-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpaddq-3.c
new file mode 100644
index 0000000..066c6fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpaddq-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+long long int a[SIZE];
+long long int b[SIZE];
+long long int c[SIZE];
+volatile long long int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_paddq ()
+{
+  long long int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] + b[i];
+}
+
+void
+check_paddq ()
+{
+  long long int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] + b[i];
+}
+
+void static
+avx2_test (void)
+{
+  long long int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_paddq ();
+    check_paddq ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (long long int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpaddq\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpaddw-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpaddw-3.c
new file mode 100644
index 0000000..34ba45e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpaddw-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+short int a[SIZE];
+short int b[SIZE];
+short int c[SIZE];
+volatile short int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_paddw ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] + b[i];
+}
+
+void
+check_paddw ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] + b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_paddw ();
+    check_paddw ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof(short) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpaddw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpand-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpand-3.c
new file mode 100644
index 0000000..8f1b6c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpand-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+unsigned int a[SIZE];
+unsigned int b[SIZE];
+unsigned int c[SIZE];
+volatile unsigned int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_pand ()
+{
+  unsigned int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] & b[i];
+}
+
+void
+check_pand ()
+{
+  unsigned int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] & b[i];
+}
+
+void static
+avx2_test (void)
+{
+  unsigned int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_pand ();
+    check_pand ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (unsigned int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpand\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpmulld-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpmulld-3.c
new file mode 100644
index 0000000..5f656c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpmulld-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+int a[SIZE];
+int b[SIZE];
+int c[SIZE];
+volatile int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_pmulld ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] * b[i];
+}
+
+void
+check_pmulld ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] * b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_pmulld ();
+    check_pmulld ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpmulld\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpmullw-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpmullw-3.c
new file mode 100644
index 0000000..48ea215
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpmullw-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+short int a[SIZE];
+short int b[SIZE];
+short int c[SIZE];
+volatile short int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_pmulld ()
+{
+  short int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] * b[i];
+}
+
+void
+check_pmulld ()
+{
+  short int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] * b[i];
+}
+
+void static
+avx2_test (void)
+{
+  short int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_pmulld ();
+    check_pmulld ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (short int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpmullw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsrad-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsrad-3.c
new file mode 100644
index 0000000..008c86a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsrad-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+int a[SIZE];
+int b[SIZE];
+int c[SIZE];
+volatile int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psrad ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] >> 5;
+}
+
+void
+check_psrad ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] >> 5;
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psrad ();
+    check_psrad ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsrad\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsraw-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsraw-3.c
new file mode 100644
index 0000000..c805c00
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsraw-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+short int a[SIZE];
+short int b[SIZE];
+short int c[SIZE];
+volatile short int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psraw ()
+{
+  short int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] >> 5;
+}
+
+void
+check_psraw ()
+{
+  short int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] >> 5;
+}
+
+void static
+avx2_test (void)
+{
+  short int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psraw ();
+    check_psraw ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (short int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsraw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsrld-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsrld-3.c
new file mode 100644
index 0000000..09427b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsrld-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+unsigned int a[SIZE];
+unsigned int b[SIZE];
+unsigned int c[SIZE];
+volatile unsigned int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psrld ()
+{
+  unsigned int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] >> 5;
+}
+
+void
+check_psrld ()
+{
+  unsigned int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] >> 5;
+}
+
+void static
+avx2_test (void)
+{
+  unsigned int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psrld ();
+    check_psrld ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (unsigned int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsrld\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsrlw-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsrlw-3.c
new file mode 100644
index 0000000..439a784
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsrlw-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+unsigned short a[SIZE];
+unsigned short b[SIZE];
+unsigned short c[SIZE];
+volatile unsigned short c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psrld ()
+{
+  unsigned short i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] >> 5;
+}
+
+void
+check_psrld ()
+{
+  unsigned short i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] >> 5;
+}
+
+void static
+avx2_test (void)
+{
+  unsigned short i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psrld ();
+    check_psrld ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (unsigned short) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsrlw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsubb-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsubb-3.c
new file mode 100644
index 0000000..6e48971
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsubb-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+char a[SIZE];
+char b[SIZE];
+char c[SIZE];
+volatile char c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psubb ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] - b[i];
+}
+
+void
+check_psubb ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] - b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psubb ();
+    check_psubb ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (char) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsubb\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsubd-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsubd-3.c
new file mode 100644
index 0000000..d037d7e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsubd-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+int a[SIZE];
+int b[SIZE];
+int c[SIZE];
+volatile int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psubd ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] - b[i];
+}
+
+void
+check_psubd ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] - b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psubd ();
+    check_psubd ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsubd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsubq-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsubq-3.c
new file mode 100644
index 0000000..92e1e6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsubq-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+long long int a[SIZE];
+long long int b[SIZE];
+long long int c[SIZE];
+volatile long long int c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psubq ()
+{
+  long long int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] - b[i];
+}
+
+void
+check_psubq ()
+{
+  long long int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] - b[i];
+}
+
+void static
+avx2_test (void)
+{
+  long long int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psubq ();
+    check_psubq ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (long long int) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsubq\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpsubw-3.c b/gcc/testsuite/gcc.target/i386/avx2-vpsubw-3.c
new file mode 100644
index 0000000..52fc88b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx2-vpsubw-3.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-options "-mavx2 -O2 -ftree-vectorize -save-temps" } */
+/* { dg-require-effective-target avx2 } */
+
+#include "avx2-check.h"
+
+#define SIZE 256
+
+short a[SIZE];
+short b[SIZE];
+short c[SIZE];
+volatile short c_ref[SIZE];
+
+__attribute__ ((__noinline__))
+void
+gen_psubw ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c[i] = a[i] - b[i];
+}
+
+void
+check_psubw ()
+{
+  int i;
+  for (i = 0; i < SIZE; ++i)
+    c_ref[i] = a[i] - b[i];
+}
+
+void static
+avx2_test (void)
+{
+  int i, j;
+  for (i = 0; i < 4; ++i ) {
+    for ( j = 0; j < SIZE; ++j ) {
+      a[i] = i*i+i;
+      b[i] = i*i*i;
+    }
+
+    gen_psubw ();
+    check_psubw ();
+
+    if ( memcmp(c, c_ref, SIZE * sizeof (short) ) )
+      abort();
+  }
+}
+
+/* { dg-final { scan-assembler-times "vpsubw\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 1 } } */
+/* { dg-final { cleanup-saved-temps } } */
