On 10/25/18 2:08 PM, Paul Clarke wrote:
> This is part 2/2 for contributing PPC64LE support for X86 SSE3
> instrisics. This patch includes testsuite/gcc.target tests for the
> intrinsics defined in pmmintrin.h, copied from gcc.target/i386.
>
> Bootstrapped and tested on Linux POWER8 LE, POWER8 BE (64 & 32), and POWER7.
>
> OK for trunk?
>
> [gcc/testsuite]
>
> 2018-10-25  Paul A. Clarke  <p...@us.ibm.com>
>
>       * gcc.target/powerpc/sse3-check.h: New file.
>       * gcc.target/powerpc/ssse3-vals.h: New file.
>       * gcc.target/powerpc/ssse3-pabsb.c: New file.
>       * gcc.target/powerpc/ssse3-pabsd.c: New file.
>       * gcc.target/powerpc/ssse3-pabsw.c: New file.
>       * gcc.target/powerpc/ssse3-palignr.c: New file.
>       * gcc.target/powerpc/ssse3-phaddd.c: New file.
>       * gcc.target/powerpc/ssse3-phaddsw.c: New file.
>       * gcc.target/powerpc/ssse3-phaddw.c: New file.
>       * gcc.target/powerpc/ssse3-phsubd.c: New file.
>       * gcc.target/powerpc/ssse3-phsubsw.c: New file.
>       * gcc.target/powerpc/ssse3-phsubw.c: New file.
>       * gcc.target/powerpc/ssse3-pmaddubsw.c: New file.
>       * gcc.target/powerpc/ssse3-pmulhrsw.c: New file.
>       * gcc.target/powerpc/ssse3-pshufb.c: New file.
>       * gcc.target/powerpc/ssse3-psignb.c: New file.
>       * gcc.target/powerpc/ssse3-psignd.c: New file.
>       * gcc.target/powerpc/ssse3-psignw.c: New file.
>
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-check.h
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-check.h    (working copy)
> @@ -0,0 +1,43 @@
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include "m128-check.h"
> +
> +/* define DEBUG replace abort with printf on error.  */

One nit -- this comment appears to be incorrect, as the only place DEBUG is 
used,
you don't have abort() anywhere.

(I have a patch under review that questions why we would replace abort() rather
than supplement it with printf, anyway...)

Thanks,
Bill

> +//#define DEBUG 1
> +
> +#define TEST ssse3_test
> +
> +static void ssse3_test (void);
> +
> +static void
> +__attribute__ ((noinline))
> +do_test (void)
> +{
> +  ssse3_test ();
> +}
> +
> +int
> +main ()
> +{
> +#ifdef __BUILTIN_CPU_SUPPORTS__
> +  /* Most SSE intrinsic operations can be implemented via VMX
> +     instructions, but some operations may be faster / simpler
> +     using the POWER8 VSX instructions.  This is especially true
> +     when we are transferring / converting to / from __m64 types.
> +     The direct register transfer instructions from POWER8 are
> +     especially important.  So we test for arch_2_07.  */
> +  if (__builtin_cpu_supports ("arch_2_07"))
> +    {
> +      do_test ();
> +#ifdef DEBUG
> +      printf ("PASSED\n");
> +#endif
> +    }
> +#ifdef DEBUG
> +  else
> +    printf ("SKIPPED\n");
> +#endif
> +#endif /* __BUILTIN_CPU_SUPPORTS__ */
> +  return 0;
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsb.c    (working copy)
> @@ -0,0 +1,80 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsb (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi8 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsb128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi8 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *bout = (char *) r;
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    if (b1[i] < 0)
> +      bout[i] = -b1[i];
> +    else
> +      bout[i] = b1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result(&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsb (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsb (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsb128 (&vals[i + 0], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsd.c    (working copy)
> @@ -0,0 +1,79 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsd (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi32 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsd128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi32 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    if (i1[i] < 0)
> +      r[i] = -i1[i];
> +    else
> +      r[i] = i1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result(&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsd (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsd (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsd128 (&vals[i + 0], r);
> +      fail += chk_128(ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pabsw.c    (working copy)
> @@ -0,0 +1,81 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pabsw (int *i1, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  *(__m64 *) r = _mm_abs_pi16 (t1);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pabsw128 (int *i1, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  *(__m128i *) r = _mm_abs_epi16 (t1);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    if (s1[i] < 0)
> +      sout[i] = -s1[i];
> +    else
> +      sout[i] = s1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 4)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pabsw (&vals[i + 0], &r[0]);
> +      ssse3_test_pabsw (&vals[i + 2], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pabsw128 (&vals[i + 0], r);
> +      fail += chk_128 (ck, r);
> +    }
> +  
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-palignr.c  (working copy)
> @@ -0,0 +1,279 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +#include <string.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +
> +  switch (imm)
> +    {
> +    case 0:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
> +      break;
> +    case 1:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
> +      break;
> +    case 2:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
> +      break;
> +    case 3:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
> +      break;
> +    case 4:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
> +      break;
> +    case 5:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
> +      break;
> +    case 6:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
> +      break;
> +    case 7:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
> +      break;
> +    case 8:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
> +      break;
> +    case 9:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
> +      break;
> +    case 10:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
> +      break;
> +    case 11:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
> +      break;
> +    case 12:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
> +      break;
> +    case 13:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
> +      break;
> +    case 14:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
> +      break;
> +    case 15:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
> +      break;
> +    default:
> +      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
> +      break;
> +    }
> +
> +   _mm_empty();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +
> +  switch (imm)
> +    {
> +    case 0:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
> +      break;
> +    case 1:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
> +      break;
> +    case 2:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
> +      break;
> +    case 3:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
> +      break;
> +    case 4:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
> +      break;
> +    case 5:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
> +      break;
> +    case 6:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
> +      break;
> +    case 7:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
> +      break;
> +    case 8:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
> +      break;
> +    case 9:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
> +      break;
> +    case 10:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
> +      break;
> +    case 11:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
> +      break;
> +    case 12:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
> +      break;
> +    case 13:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
> +      break;
> +    case 14:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
> +      break;
> +    case 15:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
> +      break;
> +    case 16:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
> +      break;
> +    case 17:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
> +      break;
> +    case 18:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
> +      break;
> +    case 19:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
> +      break;
> +    case 20:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
> +      break;
> +    case 21:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
> +      break;
> +    case 22:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
> +      break;
> +    case 23:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
> +      break;
> +    case 24:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
> +      break;
> +    case 25:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
> +      break;
> +    case 26:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
> +      break;
> +    case 27:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
> +      break;
> +    case 28:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
> +      break;
> +    case 29:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
> +      break;
> +    case 30:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
> +      break;
> +    case 31:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
> +      break;
> +    default:
> +      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
> +      break;
> +    }
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  char buf [32];
> +  char *bout = (char *) r;
> +  int i;
> +
> +  memcpy (&buf[0], i2, 16);
> +  memcpy (&buf[16], i1, 16);
> +
> +  for (i = 0; i < 16; i++)
> +    if (imm >= 32 || imm + i >= 32)
> +      bout[i] = 0;
> +    else
> +      bout[i] = buf[imm + i];
> +}
> +
> +#ifndef __AVX__
> +static void
> +compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
> +{
> +  char buf [16];
> +  char *bout = (char *)r;
> +  int i;
> +
> +  /* Handle the first half */
> +  memcpy (&buf[0], i2, 8);
> +  memcpy (&buf[8], i1, 8);
> +
> +  for (i = 0; i < 8; i++)
> +    if (imm >= 16 || imm + i >= 16)
> +      bout[i] = 0;
> +    else
> +      bout[i] = buf[imm + i];
> +
> +  /* Handle the second half */
> +  memcpy (&buf[0], &i2[2], 8);
> +  memcpy (&buf[8], &i1[2], 8);
> +
> +  for (i = 0; i < 8; i++)
> +    if (imm >= 16 || imm + i >= 16)
> +      bout[i + 8] = 0;
> +    else
> +      bout[i + 8] = buf[imm + i];
> +}
> +#endif
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  unsigned int imm;
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    for (imm = 0; imm < 100; imm++)
> +      {
> +#ifndef __AVX__
> +     /* Manually compute the result */
> +     compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
> +
> +     /* Run the 64-bit tests */
> +     ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
> +     ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
> +     fail += chk_128 (ck, r);
> +#endif
> +
> +     /* Recompute the results for 128-bits */
> +     compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
> +
> +     /* Run the 128-bit tests */
> +     ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
> +     fail += chk_128 (ck, r);
> +      }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddd.c   (working copy)
> @@ -0,0 +1,81 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadd_pi32 (t1, t2);
> +  _mm_empty();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hadd_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result(int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 2; i++)
> +    r[i] = i1[2 * i] + i1[2 * i + 1];
> +  for (i = 0; i < 2; i++)
> +    r[i + 2] = i2[2 * i] + i2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddd (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddd (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddsw.c  (working copy)
> @@ -0,0 +1,95 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadds_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> + *(__m128i *) r = _mm_hadds_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word (int x)
> +{
> +  if (x > (int) 0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short) x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = signed_saturate_to_word(s1[2 * i] + s1[2 * i + 1]);
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = signed_saturate_to_word(s2[2 * i] + s2[2 * i + 1]);
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddsw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddsw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phaddw.c   (working copy)
> @@ -0,0 +1,84 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phaddw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hadd_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phaddw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hadd_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result(int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = s1[2 * i] + s1[2 * i + 1];
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = s2[2 * i] + s2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phaddw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phaddw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phaddw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubd.c   (working copy)
> @@ -0,0 +1,80 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hsub_pi32(t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hsub_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 2; i++)
> +    r[i] = i1[2 * i] - i1[2 * i + 1];
> +  for (i = 0; i < 2; i++)
> +    r[i + 2] = i2[2 * i] - i2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubd (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubd (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubsw.c  (working copy)
> @@ -0,0 +1,98 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +
> +  *(__m64 *) r = _mm_hsubs_pi16 (t1, t2);
> +
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_hsubs_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word (int x)
> +{
> +  if (x > (int )0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short)x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = signed_saturate_to_word (s1[2 * i] - s1[2 * i + 1]);
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = signed_saturate_to_word (s2[2 * i] - s2[2 * i + 1]);
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubsw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubsw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-phsubw.c   (working copy)
> @@ -0,0 +1,83 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_phsubw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_hsub_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_phsubw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +
> +  *(__m128i *) r = _mm_hsub_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    sout[i] = s1[2 * i] - s1[2 * i + 1];
> +  for (i = 0; i < 4; i++)
> +    sout[i + 4] = s2[2 * i] - s2[2 * i + 1];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_phsubw (&vals[i + 0], &vals[i + 2], &r[0]);
> +      ssse3_test_phsubw (&vals[i + 4], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_phsubw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmaddubsw.c        
> (working copy)
> @@ -0,0 +1,98 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pmaddubsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_maddubs_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pmaddubsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_maddubs_epi16 (t1, t2);
> +}
> +
> +static short
> +signed_saturate_to_word(int x)
> +{
> +  if (x > (int) 0x7fff)
> +    return 0x7fff;
> +
> +  if (x < (int) 0xffff8000)
> +    return 0x8000;
> +
> +  return (short) x;
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  unsigned char *ub1 = (unsigned char *) i1;
> +  char *sb2 = (char *) i2;
> +  short *sout = (short *) r;
> +  int t0;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    { 
> +      t0 = ((int) ub1[2 * i] * (int) sb2[2 * i] +
> +         (int) ub1[2 * i + 1] * (int) sb2[2 * i + 1]);
> +      sout[i] = signed_saturate_to_word (t0);
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pmaddubsw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pmaddubsw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pmaddubsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pmulhrsw.c (working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pmulhrsw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_mulhrs_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pmulhrsw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_mulhrs_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int t0;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    {
> +      t0 = (((int) s1[i] * (int) s2[i]) >> 14) + 1;
> +      sout[i] = (short) (t0 >> 1);
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_pmulhrsw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pmulhrsw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pmulhrsw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-pshufb.c   (working copy)
> @@ -0,0 +1,114 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_pshufb (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *)r = _mm_shuffle_pi8 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_pshufb128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *)r = _mm_shuffle_epi8 (t1, t2);
> +}
> +
> +#ifndef __AVX__
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result_64 (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +  char select;
> +
> +  for (i = 0; i < 16; i++)
> +    {
> +      select = b2[i];
> +      if (select & 0x80)
> +     bout[i] = 0;
> +      else if (i < 8)
> +     bout[i] = b1[select & 0x7];
> +      else
> +     bout[i] = b1[8 + (select & 0x7)];
> +    }
> +}
> +#endif
> +
> +static void
> +compute_correct_result_128 (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +  char select;
> +
> +  for (i = 0; i < 16; i++)
> +    {
> +      select = b2[i];
> +      if (select & 0x80)
> +     bout[i] = 0;
> +      else
> +     bout[i] = b1[select & 0xf];
> +    }
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +#ifndef __AVX__
> +      /* Manually compute the result */
> +      compute_correct_result_64 (&vals[i + 0], &vals[i + 4], ck);
> +
> +      /* Run the 64-bit tests */
> +      ssse3_test_pshufb (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_pshufb (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Recompute the result for 128-bits */
> +      compute_correct_result_128 (&vals[i + 0], &vals[i + 4], ck);
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_pshufb128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignb.c   (working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignb (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi8 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignb128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *) r = _mm_sign_epi8 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  char *b1 = (char *) i1;
> +  char *b2 = (char *) i2;
> +  char *bout = (char *) r;
> +  int i;
> +
> +  for (i = 0; i < 16; i++)
> +    if (b2[i] < 0)
> +      bout[i] = -b1[i];
> +    else if (b2[i] == 0)
> +      bout[i] = 0;
> +    else
> +      bout[i] = b1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignb (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignb (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignb128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignd.c   (working copy)
> @@ -0,0 +1,82 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignd (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi32 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignd128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> +  *(__m128i *)r = _mm_sign_epi32 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  int i;
> +
> +  for (i = 0; i < 4; i++)
> +    if (i2[i] < 0)
> +      r[i] = -i1[i];
> +    else if (i2[i] == 0)
> +      r[i] = 0;
> +    else
> +      r[i] = i1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignd (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignd (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignd128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-psignw.c   (working copy)
> @@ -0,0 +1,85 @@
> +/* { dg-do run } */
> +/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
> +/* { dg-require-effective-target p8vector_hw } */
> +
> +#ifndef CHECK_H
> +#define CHECK_H "ssse3-check.h"
> +#endif
> +
> +#ifndef TEST
> +#define TEST ssse3_test
> +#endif
> +
> +#include CHECK_H
> +#include "ssse3-vals.h"
> +
> +#include <tmmintrin.h>
> +
> +#ifndef __AVX__
> +/* Test the 64-bit form */
> +static void
> +ssse3_test_psignw (int *i1, int *i2, int *r)
> +{
> +  __m64 t1 = *(__m64 *) i1;
> +  __m64 t2 = *(__m64 *) i2;
> +  *(__m64 *) r = _mm_sign_pi16 (t1, t2);
> +  _mm_empty ();
> +}
> +#endif
> +
> +/* Test the 128-bit form */
> +static void
> +ssse3_test_psignw128 (int *i1, int *i2, int *r)
> +{
> +  /* Assumes incoming pointers are 16-byte aligned */
> +  __m128i t1 = *(__m128i *) i1;
> +  __m128i t2 = *(__m128i *) i2;
> + *(__m128i *) r = _mm_sign_epi16 (t1, t2);
> +}
> +
> +/* Routine to manually compute the results */
> +static void
> +compute_correct_result (int *i1, int *i2, int *r)
> +{
> +  short *s1 = (short *) i1;
> +  short *s2 = (short *) i2;
> +  short *sout = (short *) r;
> +  int i;
> +
> +  for (i = 0; i < 8; i++)
> +    if (s2[i] < 0)
> +      sout[i] = -s1[i];
> +    else if (s2[i] == 0)
> +      sout[i] = 0;
> +    else
> +      sout[i] = s1[i];
> +}
> +
> +static void
> +TEST (void)
> +{
> +  int i;
> +  int r [4] __attribute__ ((aligned(16)));
> +  int ck [4];
> +  int fail = 0;
> +
> +  for (i = 0; i < 256; i += 8)
> +    {
> +      /* Manually compute the result */
> +      compute_correct_result (&vals[i + 0], &vals[i + 4], ck);
> +
> +#ifndef __AVX__
> +      /* Run the 64-bit tests */
> +      ssse3_test_psignw (&vals[i + 0], &vals[i + 4], &r[0]);
> +      ssse3_test_psignw (&vals[i + 2], &vals[i + 6], &r[2]);
> +      fail += chk_128 (ck, r);
> +#endif
> +
> +      /* Run the 128-bit tests */
> +      ssse3_test_psignw128 (&vals[i + 0], &vals[i + 4], r);
> +      fail += chk_128 (ck, r);
> +    }
> +
> +  if (fail != 0)
> +    abort ();
> +}
> Index: gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
> ===================================================================
> diff --git a/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h 
> b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h
> new file mode 10644
> --- /dev/null (revision 0)
> +++ b/trunk/gcc/testsuite/gcc.target/powerpc/ssse3-vals.h     (working copy)
> @@ -0,0 +1,60 @@
> +/* Routine to check correctness of the results */
> +static int
> +chk_128 (int *v1, int *v2)
> +{
> +  int i;
> +  int n_fails = 0;
> +
> +  for (i = 0; i < 4; i++)
> +    if (v1[i] != v2[i])
> +      n_fails += 1;
> +
> +  return n_fails;
> +}
> +
> +static int vals [256] __attribute__ ((aligned(16))) =
> +{
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x5be800ee, 0x4f2d7b15,
> +  0x409d9291, 0xdd95f27f, 0x423986e3, 0x21a4d2cd, 0xa7056d84, 0x4f4e5a3b,
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
> +  0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
> +  0x73ef0244, 0xcd836329, 0x847f634f, 0xa7e3abcf, 0xb4c14764, 0x1ef42c06,
> +  0x504f29ac, 0x4ae7ca73, 0xaddde3c9, 0xf63ded2e, 0xa5d3553d, 0xa52ae05f,
> +  0x6fd3c83a, 0x7dc2b300, 0x76b05de7, 0xea8ebae5, 0x549568dd, 0x172f0358,
> +  0x917eadf0, 0x796fb0a7, 0xb39381af, 0xd0591d61, 0x731d2f17, 0xbc4b6f5d,
> +  0x8ec664c2, 0x3c199c19, 0x9c81db12, 0x6d85913b, 0x486107a9, 0xab6f4b26,
> +  0x5630d37c, 0x20836e85, 0x40d4e746, 0xdfbaba36, 0xbeacaa69, 0xb3c84083,
> +  0x8a688eb4, 0x08cde481, 0x66e7a190, 0x74ee1639, 0xb3942a19, 0xe0c40471,
> +  0x9b789489, 0x9751207a, 0x543a1524, 0x41da7ad6, 0x614bb563, 0xf86f57b1,
> +  0x69e62199, 0x2150cb12, 0x9ed74062, 0x429471f4, 0xad28502b, 0xf2e2d4d5,
> +  0x45b6ce09, 0xaaa5e649, 0xb46da484, 0x0a637515, 0xae7a3212, 0x5afc784c,
> +  0x776cfbbe, 0x9c542bb2, 0x64193aa8, 0x16e8a655, 0x4e3d2f92, 0xe05d7b72,
> +  0x89854ebc, 0x8c318814, 0xb81e76e0, 0x3f2625f5, 0x61b44852, 0x5209d7ad,
> +  0x842fe317, 0xd3cfcca1, 0x8d287cc7, 0x80f0c9a8, 0x4215f4e5, 0x563993d6,
> +  0x5d627433, 0xc4449e35, 0x5b4fe009, 0x3ef92286, 0xacbc8927, 0x549ab870,
> +  0x9ac5b959, 0xed8f1c91, 0x7ecf02cd, 0x989c0e8b, 0xa31d6918, 0x1dc2bcc1,
> +  0x99d3f3cc, 0x6857acc8, 0x45d7324a, 0xaebdf2e6, 0x7af2f2ae, 0x09716f73,
> +  0x7816e694, 0xc65493c0, 0x9f7e87bc, 0xaa96cd40, 0xbfb5bfc6, 0x01a2cce7,
> +  0x5f1d8c46, 0x45303efb, 0xb24607c3, 0xef2009a7, 0xba873753, 0xbefb14bc,
> +  0x74e53cd3, 0x70124708, 0x6eb4bdbd, 0xf3ba5e43, 0x4c94085f, 0x0c03e7e0,
> +  0x9a084931, 0x62735424, 0xaeee77c5, 0xdb34f90f, 0x6860cbdd, 0xaf77cf9f,
> +  0x95b28158, 0x23bd70d7, 0x9fbc3d88, 0x742e659e, 0x53bcfb48, 0xb8a63f6c,
> +  0x4dcf3373, 0x2b168627, 0x4fe20745, 0xd0af5e94, 0x22514e6a, 0xb8ef25c2,
> +  0x89ec781a, 0x13d9002b, 0x6d724500, 0x7fdbf63f, 0xb0e9ced5, 0xf919e0f3,
> +  0x00fef203, 0x8905d47a, 0x434e7517, 0x4aef8e2c, 0x689f51e8, 0xe513b7c3,
> +  0x72bbc5d2, 0x3a222f74, 0x05c3a0f9, 0xd5489d82, 0xb41fbe83, 0xec5d305f,
> +  0x5ea02b0b, 0xb176065b, 0xa8eb404e, 0x80349117, 0x210fd49e, 0x43898d0e,
> +  0x6c151b9c, 0x8742df18, 0x7b64de73, 0x1dbf52b2, 0x55c9cb19, 0xeb841f10,
> +  0x10b8ae76, 0x0764ecb6, 0xb7479018, 0x2672cb3f, 0x7ac9ac90, 0x4be5332c,
> +  0x8f1a0615, 0x4efb7a77, 0x16551a85, 0xdb2c3d66, 0x49179c07, 0x5dc4657e,
> +  0x5e76907e, 0xd7486a9c, 0x445204a4, 0x65cdc426, 0x33f86ded, 0xcba95dda,
> +  0x83351f16, 0xfedefad9, 0x639b620f, 0x86896a64, 0xba4099ba, 0x965f4a21,
> +  0x1247154f, 0x25604c42, 0x5862d692, 0xb1e9149e, 0x612516a5, 0x02c49bf8,
> +  0x631212bf, 0x9f69f54e, 0x168b63b0, 0x310a25ba, 0xa42a59cd, 0x084f0af9,
> +  0x44a06cec, 0x5c0cda40, 0xb932d721, 0x7c42bb0d, 0x213cd3f0, 0xedc7f5a4,
> +  0x7fb85859, 0x6b3da5ea, 0x61cd591e, 0xe8e9aa08, 0x4361fc34, 0x53d40d2a,
> +  0x0511ad1b, 0xf996b44c, 0xb5ead756, 0xc022138d, 0x6172adf1, 0xa4a0a3b4,
> +  0x8c2977b8, 0xa8e482ed, 0x04fcdd6b, 0x3f7b85d4, 0x4fca1e46, 0xa392ddca,
> +  0x569fc791, 0x346a706c, 0x543bf3eb, 0x895b3cde, 0x2146bb80, 0x26b3c168,
> +  0x929998db, 0x1ea472c9, 0x7207b36b, 0x6a8f10d4 
> +};
>

Reply via email to