On Mon, 21 Mar 2016, Kirill Yukhin wrote: > Hello, > > Attached patch blocks third alternative of broadcast pattern > when compiled w/ -mavx512vl. > Issue is that third alternative is subject for subsequent splitting. > AVX-512VL allows higher XMM regnums (than SSE), so split generate > AVX2 broadcast insns, which will use XMMN, N>15. > > We have separate pattern for GPR->XMM broadcasting. > > Bootstrapped, regtested. > > Richard, > is it OK for main trunk?
Yes. Richard. > PR target/70293 > gcc/ > * config/i386 (define_insn "*vec_dup<mode>"/AVX2): Block > third alternative for AVX-512VL target, > gcc/testsuite/ > * gcc.target/i386/pr70293.c: New test. > > commit 954aa2747cc3387e5a61cbe0fd029ee7a938072e > Author: Kirill Yukhin <kirill.yuk...@intel.com> > Date: Fri Mar 18 17:30:42 2016 +0300 > > AVX-512. Disable reg->xmm broadcast in AVX pattern when AVX-VL512 is on. > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 3c521b3..fc6d597 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -17412,7 +17412,8 @@ > v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} > v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} > #" > - [(set_attr "type" "ssemov") > + [(set_attr "isa" "*,*,noavx512vl") > + (set_attr "type" "ssemov") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "maybe_evex") > (set_attr "mode" "<sseinsnmode>")]) > diff --git a/gcc/testsuite/gcc.target/i386/pr70293.c > b/gcc/testsuite/gcc.target/i386/pr70293.c > new file mode 100644 > index 0000000..4510166 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr70293.c > @@ -0,0 +1,38 @@ > +/* PR target/70293 */ > +/* { dg-do compile } */ > +/* { dg-require-effective-target lp64 } */ > +/* { dg-options "-mtune=westmere -mavx512vl -O2" } */ > + > +typedef short __v8hi __attribute__((__vector_size__(16))); > +typedef int __v8hu __attribute__((__vector_size__(16))); > +typedef long __m128i __attribute__((__vector_size__(16))); > +__m128i _mm_madd_epi16___B, _mm_mullo_epi16___A, > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER_xmm_b, > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER___trans_tmp_16, > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER___trans_tmp_13; > +int _mm_srli_epi16___B, scaled_bilinear_scanline_sse2_8888_8_8888_OVER_m, > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER_dst, > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER_wt; > +__m128i _mm_set_epi16(); > +void _mm_cvtsi128_si32(); > +void > +scaled_bilinear_scanline_sse2_8888_8_8888_OVER(int p1) { > + __m128i __trans_tmp_12, __trans_tmp_6, __trans_tmp_5, xmm_x = > _mm_set_epi16(); > + int mask; > + __trans_tmp_5 = > (__m128i){scaled_bilinear_scanline_sse2_8888_8_8888_OVER_wt}; > + __trans_tmp_6 = (__m128i)(__v8hi){p1, p1, p1, p1, p1, p1, p1, p1}; > + while (scaled_bilinear_scanline_sse2_8888_8_8888_OVER_dst) { > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER_m = mask++; > + if (scaled_bilinear_scanline_sse2_8888_8_8888_OVER_m) { > + __trans_tmp_12 = > + (__m128i)((__v8hu)_mm_mullo_epi16___A * (__v8hu)__trans_tmp_6); > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER_xmm_b = __trans_tmp_12; > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER___trans_tmp_13 = > + (__m128i)__builtin_ia32_psrlwi128((__v8hi)xmm_x, > _mm_srli_epi16___B); > + scaled_bilinear_scanline_sse2_8888_8_8888_OVER___trans_tmp_16 = > + (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__trans_tmp_5, > + (__v8hi)_mm_madd_epi16___B); > + _mm_cvtsi128_si32(); > + } > + } > +} > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)