Hi,

This patch adds several tests to check tails vectorization functionality.

Thanks,
Ilya
--
gcc/testsuite/

2016-07-05  Ilya Enkovich  <ilya.enkov...@intel.com>

        * lib/target-supports.exp (check_avx2_hw_available): New.
        (check_effective_target_avx2_runtime): New.
        * gcc.dg/vect/vect-tail-combine-1.c: New test.
        * gcc.dg/vect/vect-tail-combine-2.c: New test.
        * gcc.dg/vect/vect-tail-combine-3.c: New test.
        * gcc.dg/vect/vect-tail-combine-4.c: New test.
        * gcc.dg/vect/vect-tail-combine-5.c: New test.
        * gcc.dg/vect/vect-tail-combine-6.c: New test.
        * gcc.dg/vect/vect-tail-combine-7.c: New test.
        * gcc.dg/vect/vect-tail-combine-9.c: New test.
        * gcc.dg/vect/vect-tail-mask-1.c: New test.
        * gcc.dg/vect/vect-tail-mask-2.c: New test.
        * gcc.dg/vect/vect-tail-mask-3.c: New test.
        * gcc.dg/vect/vect-tail-mask-4.c: New test.
        * gcc.dg/vect/vect-tail-mask-5.c: New test.
        * gcc.dg/vect/vect-tail-mask-6.c: New test.
        * gcc.dg/vect/vect-tail-mask-7.c: New test.
        * gcc.dg/vect/vect-tail-mask-8.c: New test.
        * gcc.dg/vect/vect-tail-mask-9.c: New test.
        * gcc.dg/vect/vect-tail-nomask-1.c: New test.
        * gcc.dg/vect/vect-tail-nomask-2.c: New test.
        * gcc.dg/vect/vect-tail-nomask-3.c: New test.
        * gcc.dg/vect/vect-tail-nomask-4.c: New test.
        * gcc.dg/vect/vect-tail-nomask-5.c: New test.
        * gcc.dg/vect/vect-tail-nomask-6.c: New test.
        * gcc.dg/vect/vect-tail-nomask-7.c: New test.


diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c
new file mode 100644
index 0000000..134d789
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-1.c
@@ -0,0 +1,106 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size) __attribute__((weak));
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c,
+           int size)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          int * __restrict__ b,
+          int * __restrict__ c,
+          int size)
+{
+  for (int i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c
new file mode 100644
index 0000000..c513c5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-2.c
@@ -0,0 +1,134 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c,
+           int size)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    if (a[i] > 0)
+      b[i] = a[i] + c[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          int * __restrict__ b,
+          int * __restrict__ c,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+       {
+         a[i] = i;
+         b[i] = 0;
+         c[i] = 2 * i;
+       }
+      else
+       {
+         a[i] = -i;
+         b[i] = i;
+         c[i] = 0;
+       }
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      {
+       if (b[i] != a[i] + c[i])
+         __builtin_abort ();
+      }
+    else
+      {
+       if (b[i] != i)
+         __builtin_abort ();
+      }
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      {
+       if (b[i] != a[i] + c[i])
+         __builtin_abort ();
+      }
+    else
+      {
+       if (b[i] != i)
+         __builtin_abort ();
+      }
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c
new file mode 100644
index 0000000..17c5e95
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-3.c
@@ -0,0 +1,111 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    res += a[i] + b[i] * c[i];
+
+  return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c,
+           int size)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    res += a[i] + b[i] * c[i];
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          int * __restrict__ b,
+          int * __restrict__ c,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 1;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int res;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  res = test_citer (a, b, c);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  res = test_viter (a, b, c, SIZE);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c
new file mode 100644
index 0000000..854c1ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-4.c
@@ -0,0 +1,122 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+int __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    if (a[i] > 0)
+      res += b[i] + c[i];
+
+  return res;
+}
+
+int __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c,
+           int size)
+{
+  int res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    if (a[i] > 0)
+      res += b[i] + c[i];
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          int * __restrict__ b,
+          int * __restrict__ c,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+       {
+         a[i] = i;
+         b[i] = -i*2;
+         c[i] = i*2;
+       }
+      else
+       {
+         a[i] = -i;
+         b[i] = i;
+         c[i] = 10;
+       }
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  int res;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  res = test_citer (a, b, c);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  res = test_viter (a, b, c, SIZE);
+  if (res != 0)
+    __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c
new file mode 100644
index 0000000..9589715
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-5.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c)
+{
+  long long i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           int * __restrict__ b,
+           int * __restrict__ c,
+           int size)
+{
+  long long i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          int * __restrict__ b,
+          int * __restrict__ c,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  int *b;
+  int *c;
+  long long i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c
new file mode 100644
index 0000000..284b2aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-6.c
@@ -0,0 +1,107 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+void __attribute__((noinline))
+test_citer (long long * __restrict__ a,
+           long long * __restrict__ b,
+           long long * __restrict__ c)
+{
+  int i;
+
+  a = (long long *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+test_viter (long long * __restrict__ a,
+           long long * __restrict__ b,
+           long long * __restrict__ c,
+           int size)
+{
+  int i;
+
+  a = (long long *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (long long *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < size; i++)
+    c[i] = a[i] + b[i];
+}
+
+void __attribute__((noinline))
+init_data (long long * __restrict__ a,
+          long long * __restrict__ b,
+          long long * __restrict__ c,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      a[i] = i;
+      b[i] = -i;
+      c[i] = 0;
+      asm volatile("": : :"memory");
+    }
+  a[size] = b[size] = c[size] = size;
+}
+
+
+void __attribute__((noinline))
+run_test ()
+{
+  long long *a;
+  long long *b;
+  long long *c;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (long long)) != 
0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 
0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (long long)) != 
0)
+    return;
+
+  init_data (a, b, c, SIZE);
+  test_citer (a, b, c);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, SIZE);
+  test_viter (a, b, c, SIZE);
+  for (i = 0; i < SIZE; i++)
+    if (c[i] != a[i] + b[i])
+      __builtin_abort ();
+  if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c
new file mode 100644
index 0000000..b328285
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-7.c
@@ -0,0 +1,155 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#define SIZE 1023
+#define ALIGN 64
+
+extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, 
__SIZE_TYPE__ size);
+extern void free (void *);
+
+double __attribute__((noinline))
+test_citer (int * __restrict__ a,
+           long long * __restrict__ b,
+           float * __restrict__ c,
+           double * __restrict__ d)
+{
+  double res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (float *)__builtin_assume_aligned (c, ALIGN);
+  d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    {
+      a[i] = c[i] + 1;
+      if (b[i] < 0)
+       res += d[i];
+    }
+
+  return res;
+}
+
+double __attribute__((noinline))
+test_viter (int * __restrict__ a,
+           long long * __restrict__ b,
+           float * __restrict__ c,
+           double * __restrict__ d,
+           int size)
+{
+  double res = 0;
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (long long *)__builtin_assume_aligned (b, ALIGN);
+  c = (float *)__builtin_assume_aligned (c, ALIGN);
+  d = (double *)__builtin_assume_aligned (d, ALIGN);
+
+  for (i = 0; i < size; i++)
+    {
+      a[i] = c[i] + 1;
+      if (b[i] < 0)
+       res += d[i];
+    }
+
+  return res;
+}
+
+void __attribute__((noinline))
+init_data (int * __restrict__ a,
+          long long * __restrict__ b,
+          float * __restrict__ c,
+          double * __restrict__ d,
+          int size)
+{
+  int i;
+  for (i = 0; i < size; i++)
+    {
+      if (i % 2)
+       {
+         a[i] = 0;
+         b[i] = i;
+         c[i] = 2.5;
+         d[i] = 1;
+       }
+      else
+       {
+         a[i] = 0;
+         b[i] = -i;
+         c[i] = 2.5;
+         d[i] = -1;
+       }
+      asm volatile("": : :"memory");
+    }
+  a[size] = (int)size;
+  b[size] = (long long)size;
+  c[size] = (float)size;
+  d[size] = (double)size;
+}
+
+void __attribute__((noinline))
+run_test ()
+{
+  int *a;
+  long long *b;
+  float *c;
+  double *d;
+  double res;
+  int i;
+
+  if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
+    return;
+  if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (long long)) != 
0)
+    return;
+  if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (float)) != 0)
+    return;
+  if (posix_memalign ((void **)&d, ALIGN, (SIZE + 1) * sizeof (double)) != 0)
+    return;
+
+  init_data (a, b, c, d, SIZE);
+  res = test_citer (a, b, c, d);
+  res += SIZE / 2;
+  if (res > 0.01 || res < -0.01)
+    __builtin_abort ();
+  for (i = 0; i < SIZE; i++)
+    if (a[i] != 3)
+      __builtin_abort ();
+  if (a[SIZE] != (int)SIZE
+      || b[SIZE] != (long long)SIZE
+      || c[SIZE] != (float)SIZE
+      || d[SIZE] != (double)SIZE)
+    __builtin_abort ();
+
+  init_data (a, b, c, d, SIZE);
+  res = test_viter (a, b, c, d, SIZE);
+  res += SIZE / 2;
+  if (res > 0.01 || res < -0.01)
+    __builtin_abort ();
+  for (i = 0; i < SIZE; i++)
+    if (a[i] != 3)
+      __builtin_abort ();
+  if (a[SIZE] != (int)SIZE
+      || b[SIZE] != (long long)SIZE
+      || c[SIZE] != (float)SIZE
+      || d[SIZE] != (double)SIZE)
+    __builtin_abort ();
+
+  free (a);
+  free (b);
+  free (c);
+}
+
+int
+main (int argc, const char **argv)
+{
+  if (!posix_memalign)
+    return 0;
+
+  run_test ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE COMBINED \\(VS=32\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c
new file mode 100644
index 0000000..221835a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-combine-9.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=combine 
-fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 33
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+      int * __restrict__ b,
+      int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { 
target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE COMBINED \\(VS=64\\)" "vect" 
} } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c
new file mode 100644
index 0000000..7f50a17
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-1.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c
new file mode 100644
index 0000000..995631c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c
new file mode 100644
index 0000000..fe405bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-3.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c
new file mode 100644
index 0000000..3bbd054
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-4.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c
new file mode 100644
index 0000000..7541061
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-5.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c
new file mode 100644
index 0000000..2af1c6a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-6.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c
new file mode 100644
index 0000000..72f3119
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-7.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=32\\)" 2 "vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c
new file mode 100644
index 0000000..552e974
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-8.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#define SIZE 31
+#define ALIGN 64
+
+void
+test (int * __restrict__ a,
+      int * __restrict__ b,
+      int * __restrict__ c)
+{
+  int i;
+
+  a = (int *)__builtin_assume_aligned (a, ALIGN);
+  b = (int *)__builtin_assume_aligned (b, ALIGN);
+  c = (int *)__builtin_assume_aligned (c, ALIGN);
+
+  for (i = 0; i < SIZE; i++)
+    c[i] = a[i] + b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { 
target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=64\\)" 1 "vect" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c
new file mode 100644
index 0000000..61c0f80
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-mask-9.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=mask 
-fvect-epilogue-cost-model=dynamic -fvect-cost-model=dynamic" } */
+/* { dg-additional-options "-march=knl" { target { i?86-*-* x86_64-*-* } } } */
+
+#include "vect-tail-combine-9.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=64\\)" 1 "vect" { 
target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "LOOP EPILOGUE VECTORIZED AND MASKED 
\\(VS=64\\)" "vect" { target { i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
new file mode 100644
index 0000000..e3c40f7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-1.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c
new file mode 100644
index 0000000..cea2c1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-2.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-2.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c
new file mode 100644
index 0000000..18bbbc4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-3.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-3.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c
new file mode 100644
index 0000000..beb9e0f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-4.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-4.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c
new file mode 100644
index 0000000..329652f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-5.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-5.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c
new file mode 100644
index 0000000..3e9a405
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-6.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-6.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c 
b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c
new file mode 100644
index 0000000..a229414
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-7.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-weak "" } */
+/* { dg-additional-options "-ffast-math -ftree-vectorize-epilogues=nomask 
-fvect-epilogue-cost-model=unlimited -mavx2" { target avx2_runtime } } */
+
+#include "vect-tail-combine-7.c"
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED \\(VS=32\\)" 2 "vect" { 
target avx2_runtime } } } */
+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 
"vect" { target avx2_runtime } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 04ca176..8b54710 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1596,6 +1596,36 @@ proc check_avx_hw_available { } {
     }]
 }
 
+# Return 1 if the target supports executing AVX2 instructions, 0
+# otherwise.  Cache the result.
+
+proc check_avx2_hw_available { } {
+    return [check_cached_effective_target avx2_hw_available {
+       # If this is not the right target then we can skip the test.
+       if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
+           expr 0
+       } else {
+           check_runtime_nocache avx2_hw_available {
+               #include "cpuid.h"
+               int main ()
+               {
+                 unsigned int eax, ebx, ecx, edx;
+                 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+                     || ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
+                   return 1;
+
+                 if (__get_cpuid_max (0, NULL) < 7)
+                   return 1;
+
+                 __cpuid_count (7, 0, eax, ebx, ecx, edx);
+
+                 return (ebx & bit_AVX2) != bit_AVX2;
+               }
+           } ""
+       }
+    }]
+}
+
 # Return 1 if the target supports running SSE executables, 0 otherwise.
 
 proc check_effective_target_sse_runtime { } {
@@ -1629,6 +1659,17 @@ proc check_effective_target_avx_runtime { } {
     return 0
 }
 
+# Return 1 if the target supports running AVX2 executables, 0 otherwise.
+
+proc check_effective_target_avx2_runtime { } {
+    if { [check_effective_target_avx2]
+        && [check_avx2_hw_available]
+        && [check_avx_os_support_available] } {
+       return 1
+    }
+    return 0
+}
+
 # Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
 # move instructions for moves from GPR to FPR.
 

Reply via email to