On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu <hjl.to...@gmail.com> wrote:
> This patch restores proper checking the third argument on blendpd and
> and blendps.  It also adds 2 tests, including pblendw.  Tested on
> Linux/Intel64. OK to install?

The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because
src3 is used uninitialized.

Richard.

> Thanks.
>
> H.J.
> ---
> gcc/
>
> 2008-03-29  H.J. Lu  <hongjiu...@intel.com>
>
>        PR target/35757
>        * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue
>        proper error message for the third argument on blendpd and
>        blendps.
>
>        * config/i386/sse.md (blendbits): New.
>        (sse4_1_blendp<ssemodesuffixf2c>): Use it.
>
> gcc/testsuite/
>
> 2008-03-29  H.J. Lu  <hongjiu...@intel.com>
>
>        PR target/35757
>        * gcc.target/i386/sse4_1-blendps-2.c: New.
>        * gcc.target/i386/sse4_1-pblendw-2.c: Likewise.
>
> --- gcc/config/i386/i386.c.imm  2008-03-29 07:29:40.000000000 -0700
> +++ gcc/config/i386/i386.c      2008-03-29 13:55:36.000000000 -0700
> @@ -19791,9 +19791,14 @@ ix86_expand_sse_4_operands_builtin (enum
>
>       case CODE_FOR_sse4_1_roundsd:
>       case CODE_FOR_sse4_1_roundss:
> +      case CODE_FOR_sse4_1_blendps:
>        error ("the third argument must be a 4-bit immediate");
>        return const0_rtx;
>
> +      case CODE_FOR_sse4_1_blendpd:
> +       error ("the third argument must be a 2-bit immediate");
> +       return const0_rtx;
> +
>       default:
>        error ("the third argument must be an 8-bit immediate");
>        return const0_rtx;
> --- gcc/config/i386/sse.md.imm  2008-03-29 07:29:40.000000000 -0700
> +++ gcc/config/i386/sse.md      2008-03-29 14:01:10.000000000 -0700
> @@ -53,6 +53,9 @@
>  ;; Mapping of vector modes back to the scalar modes
>  (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")])
>
> +;; Mapping of immediate bits for blend instructions
> +(define_mode_attr blendbits [(V4SF "15") (V2DF "3")])
> +
>  ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
>
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
> @@ -6306,7 +6309,7 @@
>        (vec_merge:SSEMODEF2P
>          (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
>          (match_operand:SSEMODEF2P 1 "register_operand" "0")
> -         (match_operand:SI 3 "const_0_to_3_operand" "n")))]
> +         (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
>   "TARGET_SSE4_1"
>   "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
>   [(set_attr "type" "ssemov")
> --- gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c.imm        2008-03-29 
> 09:54:08.000000000 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c    2008-03-29 
> 09:57:35.000000000 -0700
> @@ -0,0 +1,77 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include <smmintrin.h>
> +#include <string.h>
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xe
> +
> +static void
> +init_blendps (float *src1, float *src2)
> +{
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM * 4; i++)
> +    {
> +      src1[i] = i * i * sign;
> +      src2[i] = (i + 20) * sign;
> +      sign = -sign;
> +    }
> +}
> +
> +static int
> +check_blendps (__m128 *dst, float *src1, float *src2)
> +{
> +  float tmp[4];
> +  int j;
> +
> +  memcpy (&tmp[0], src1, sizeof (tmp));
> +  for (j = 0; j < 4; j++)
> +    if ((MASK & (1 << j)))
> +      tmp[j] = src2[j];
> +
> +  return memcmp (dst, &tmp[0], sizeof (tmp));
> +}
> +
> +static void
> +sse4_1_test (void)
> +{
> +  __m128 x, y;
> +  union
> +    {
> +      __m128 x[NUM];
> +      float f[NUM * 4];
> +    } dst, src1, src2;
> +  union
> +    {
> +      __m128 x;
> +      float f[4];
> +    } src3;
> +  int i;
> +
> +  init_blendps (src1.f, src2.f);
> +
> +  /* Check blendps imm8, m128, xmm */
> +  for (i = 0; i < NUM; i++)
> +    {
> +      dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK);
> +      if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4]))
> +       abort ();
> +    }
> +
> +   /* Check blendps imm8, xmm, xmm */
> +  x = _mm_blend_ps (dst.x[2], src3.x, MASK);
> +  y = _mm_blend_ps (src3.x, dst.x[2], MASK);
> +
> +  if (check_blendps (&x, &dst.f[8], &src3.f[0]))
> +    abort ();
> +
> +  if (check_blendps (&y, &src3.f[0], &dst.f[8]))
> +    abort ();
> +}
> --- gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c.imm        2008-03-29 
> 09:55:29.000000000 -0700
> +++ gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c    2008-03-29 
> 09:57:25.000000000 -0700
> @@ -0,0 +1,79 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include <smmintrin.h>
> +#include <string.h>
> +
> +#define NUM 20
> +
> +#undef MASK
> +#define MASK 0xfe
> +
> +static void
> +init_pblendw (short *src1, short *src2)
> +{
> +  int i, sign = 1;
> +
> +  for (i = 0; i < NUM * 8; i++)
> +    {
> +      src1[i] = i * i * sign;
> +      src2[i] = (i + 20) * sign;
> +      sign = -sign;
> +    }
> +}
> +
> +static int
> +check_pblendw (__m128i *dst, short *src1, short *src2)
> +{
> +  short tmp[8];
> +  int j;
> +
> +  memcpy (&tmp[0], src1, sizeof (tmp));
> +  for (j = 0; j < 8; j++)
> +    if ((MASK & (1 << j)))
> +      tmp[j] = src2[j];
> +
> +  return memcmp (dst, &tmp[0], sizeof (tmp));
> +}
> +
> +static void
> +sse4_1_test (void)
> +{
> +  __m128i x, y;
> +  union
> +    {
> +      __m128i x[NUM];
> +      short s[NUM * 8];
> +    } dst, src1, src2;
> +  union
> +    {
> +      __m128i x;
> +      short s[8];
> +    } src3;
> +  int i;
> +
> +  init_pblendw (src1.s, src2.s);
> +
> +  /* Check pblendw imm8, m128, xmm */
> +  for (i = 0; i < NUM; i++)
> +    {
> +      dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
> +      if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
> +       abort ();
> +    }
> +
> +   /* Check pblendw imm8, xmm, xmm */
> +  src3.x = _mm_setzero_si128 ();
> +
> +  x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
> +  y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
> +
> +  if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
> +    abort ();
> +
> +  if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
> +    abort ();
> +}
>

Reply via email to