On 04 May 21:51, Jakub Jelinek wrote: > Hi! > > In this case the situation is more complicated, because for > V*HI we need avx512bw and avx512vl, while for V*SI only avx512vl > is needed and both are in the same pattern. But we already have > a pattern that does the right thing right after the "ashr<mode>3" > - but as it is after it, the "ashr<mode>3" will win during recog > and will limit RA decisions. > > The testcase shows that moving the pattern improves it. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK for trunk.
-- Thanks, K > > 2016-05-04 Jakub Jelinek <ja...@redhat.com> > > * config/i386/sse.md (<mask_codefor>ashr<mode>3<mask_name>): Move > before the ashr<mode>3 pattern. > > * gcc.target/i386/avx512bw-vpsraw-3.c: New test. > * gcc.target/i386/avx512vl-vpsrad-3.c: New test. > > --- gcc/config/i386/sse.md.jj 2016-05-04 16:54:31.000000000 +0200 > +++ gcc/config/i386/sse.md 2016-05-04 16:55:31.155848054 +0200 > @@ -10088,6 +10088,20 @@ (define_expand "usadv32qi" > DONE; > }) > > +(define_insn "<mask_codefor>ashr<mode>3<mask_name>" > + [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") > + (ashiftrt:VI24_AVX512BW_1 > + (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") > + (match_operand:SI 2 "nonmemory_operand" "v,N")))] > + "TARGET_AVX512VL" > + "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, > %2}" > + [(set_attr "type" "sseishft") > + (set (attr "length_immediate") > + (if_then_else (match_operand 2 "const_int_operand") > + (const_string "1") > + (const_string "0"))) > + (set_attr "mode" "<sseinsnmode>")]) > + > (define_insn "ashr<mode>3" > [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") > (ashiftrt:VI24_AVX2 > @@ -10107,20 +10121,6 @@ (define_insn "ashr<mode>3" > (set_attr "prefix" "orig,vex") > (set_attr "mode" "<sseinsnmode>")]) > > -(define_insn "<mask_codefor>ashr<mode>3<mask_name>" > - [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v") > - (ashiftrt:VI24_AVX512BW_1 > - (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm") > - (match_operand:SI 2 "nonmemory_operand" "v,N")))] > - "TARGET_AVX512VL" > - "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, > %2}" > - [(set_attr "type" "sseishft") > - (set (attr "length_immediate") > - (if_then_else (match_operand 2 "const_int_operand") > - (const_string "1") > - (const_string "0"))) > - (set_attr "mode" "<sseinsnmode>")]) > - > (define_insn "<mask_codefor>ashrv2di3<mask_name>" > [(set (match_operand:V2DI 0 "register_operand" "=v,v") > (ashiftrt:V2DI > --- gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c.jj 2016-05-04 > 17:01:52.332810541 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512bw-vpsraw-3.c 2016-05-04 > 17:02:56.104966537 +0200 > @@ -0,0 +1,44 @@ > +/* { dg-do assemble { target { avx512bw && { avx512vl && { ! ia32 } } } } } > */ > +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ > + > +#include <x86intrin.h> > + > +void > +f1 (__m128i x, int y) > +{ > + register __m128i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm_srai_epi16 (a, y); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f2 (__m128i x) > +{ > + register __m128i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm_srai_epi16 (a, 16); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f3 (__m256i x, int y) > +{ > + register __m256i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm256_srai_epi16 (a, y); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f4 (__m256i x) > +{ > + register __m256i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm256_srai_epi16 (a, 16); > + asm volatile ("" : "+v" (a)); > +} > --- gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c.jj 2016-05-04 > 17:01:58.770725338 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512vl-vpsrad-3.c 2016-05-04 > 17:00:16.000000000 +0200 > @@ -0,0 +1,44 @@ > +/* { dg-do assemble { target { avx512vl && { ! ia32 } } } } */ > +/* { dg-options "-O2 -mavx512vl" } */ > + > +#include <x86intrin.h> > + > +void > +f1 (__m128i x, int y) > +{ > + register __m128i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm_srai_epi32 (a, y); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f2 (__m128i x) > +{ > + register __m128i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm_srai_epi32 (a, 16); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f3 (__m256i x, int y) > +{ > + register __m256i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm256_srai_epi32 (a, y); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f4 (__m256i x) > +{ > + register __m256i a __asm ("xmm16"); > + a = x; > + asm volatile ("" : "+v" (a)); > + a = _mm256_srai_epi32 (a, 16); > + asm volatile ("" : "+v" (a)); > +} > > Jakub