On Sat, Mar 29, 2008 at 10:11 PM, H.J. Lu <hjl.to...@gmail.com> wrote: > This patch restores proper checking the third argument on blendpd and > and blendps. It also adds 2 tests, including pblendw. Tested on > Linux/Intel64. OK to install?
The gcc.target/i386/sse4_1-blendps-2.c test randomly fails because src3 is used uninitialized. Richard. > Thanks. > > H.J. > --- > gcc/ > > 2008-03-29 H.J. Lu <hongjiu...@intel.com> > > PR target/35757 > * config/i386/i386.c (ix86_expand_sse_4_operands_builtin): Issue > proper error message for the third argument on blendpd and > blendps. > > * config/i386/sse.md (blendbits): New. > (sse4_1_blendp<ssemodesuffixf2c>): Use it. > > gcc/testsuite/ > > 2008-03-29 H.J. Lu <hongjiu...@intel.com> > > PR target/35757 > * gcc.target/i386/sse4_1-blendps-2.c: New. > * gcc.target/i386/sse4_1-pblendw-2.c: Likewise. > > --- gcc/config/i386/i386.c.imm 2008-03-29 07:29:40.000000000 -0700 > +++ gcc/config/i386/i386.c 2008-03-29 13:55:36.000000000 -0700 > @@ -19791,9 +19791,14 @@ ix86_expand_sse_4_operands_builtin (enum > > case CODE_FOR_sse4_1_roundsd: > case CODE_FOR_sse4_1_roundss: > + case CODE_FOR_sse4_1_blendps: > error ("the third argument must be a 4-bit immediate"); > return const0_rtx; > > + case CODE_FOR_sse4_1_blendpd: > + error ("the third argument must be a 2-bit immediate"); > + return const0_rtx; > + > default: > error ("the third argument must be an 8-bit immediate"); > return const0_rtx; > --- gcc/config/i386/sse.md.imm 2008-03-29 07:29:40.000000000 -0700 > +++ gcc/config/i386/sse.md 2008-03-29 14:01:10.000000000 -0700 > @@ -53,6 +53,9 @@ > ;; Mapping of vector modes back to the scalar modes > (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")]) > > +;; Mapping of immediate bits for blend instructions > +(define_mode_attr blendbits [(V4SF "15") (V2DF "3")]) > + > ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. > > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > @@ -6306,7 +6309,7 @@ > (vec_merge:SSEMODEF2P > (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") > (match_operand:SSEMODEF2P 1 "register_operand" "0") > - (match_operand:SI 3 "const_0_to_3_operand" "n")))] > + (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] > "TARGET_SSE4_1" > "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" > [(set_attr "type" "ssemov") > --- gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c.imm 2008-03-29 > 09:54:08.000000000 -0700 > +++ gcc/testsuite/gcc.target/i386/sse4_1-blendps-2.c 2008-03-29 > 09:57:35.000000000 -0700 > @@ -0,0 +1,77 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target sse4 } */ > +/* { dg-options "-O2 -msse4.1" } */ > + > +#include "sse4_1-check.h" > + > +#include <smmintrin.h> > +#include <string.h> > + > +#define NUM 20 > + > +#undef MASK > +#define MASK 0xe > + > +static void > +init_blendps (float *src1, float *src2) > +{ > + int i, sign = 1; > + > + for (i = 0; i < NUM * 4; i++) > + { > + src1[i] = i * i * sign; > + src2[i] = (i + 20) * sign; > + sign = -sign; > + } > +} > + > +static int > +check_blendps (__m128 *dst, float *src1, float *src2) > +{ > + float tmp[4]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + for (j = 0; j < 4; j++) > + if ((MASK & (1 << j))) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +sse4_1_test (void) > +{ > + __m128 x, y; > + union > + { > + __m128 x[NUM]; > + float f[NUM * 4]; > + } dst, src1, src2; > + union > + { > + __m128 x; > + float f[4]; > + } src3; > + int i; > + > + init_blendps (src1.f, src2.f); > + > + /* Check blendps imm8, m128, xmm */ > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blend_ps (src1.x[i], src2.x[i], MASK); > + if (check_blendps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4])) > + abort (); > + } > + > + /* Check blendps imm8, xmm, xmm */ > + x = _mm_blend_ps (dst.x[2], src3.x, MASK); > + y = _mm_blend_ps (src3.x, dst.x[2], MASK); > + > + if (check_blendps (&x, &dst.f[8], &src3.f[0])) > + abort (); > + > + if (check_blendps (&y, &src3.f[0], &dst.f[8])) > + abort (); > +} > --- gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c.imm 2008-03-29 > 09:55:29.000000000 -0700 > +++ gcc/testsuite/gcc.target/i386/sse4_1-pblendw-2.c 2008-03-29 > 09:57:25.000000000 -0700 > @@ -0,0 +1,79 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target sse4 } */ > +/* { dg-options "-O2 -msse4.1" } */ > + > +#include "sse4_1-check.h" > + > +#include <smmintrin.h> > +#include <string.h> > + > +#define NUM 20 > + > +#undef MASK > +#define MASK 0xfe > + > +static void > +init_pblendw (short *src1, short *src2) > +{ > + int i, sign = 1; > + > + for (i = 0; i < NUM * 8; i++) > + { > + src1[i] = i * i * sign; > + src2[i] = (i + 20) * sign; > + sign = -sign; > + } > +} > + > +static int > +check_pblendw (__m128i *dst, short *src1, short *src2) > +{ > + short tmp[8]; > + int j; > + > + memcpy (&tmp[0], src1, sizeof (tmp)); > + for (j = 0; j < 8; j++) > + if ((MASK & (1 << j))) > + tmp[j] = src2[j]; > + > + return memcmp (dst, &tmp[0], sizeof (tmp)); > +} > + > +static void > +sse4_1_test (void) > +{ > + __m128i x, y; > + union > + { > + __m128i x[NUM]; > + short s[NUM * 8]; > + } dst, src1, src2; > + union > + { > + __m128i x; > + short s[8]; > + } src3; > + int i; > + > + init_pblendw (src1.s, src2.s); > + > + /* Check pblendw imm8, m128, xmm */ > + for (i = 0; i < NUM; i++) > + { > + dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK); > + if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8])) > + abort (); > + } > + > + /* Check pblendw imm8, xmm, xmm */ > + src3.x = _mm_setzero_si128 (); > + > + x = _mm_blend_epi16 (dst.x[2], src3.x, MASK); > + y = _mm_blend_epi16 (src3.x, dst.x[2], MASK); > + > + if (check_pblendw (&x, &dst.s[16], &src3.s[0])) > + abort (); > + > + if (check_pblendw (&y, &src3.s[0], &dst.s[16])) > + abort (); > +} >