On Wed, Dec 12, 2018 at 11:36 PM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The following patch uses a new mode iterator to avoid 2 almost same > vptestm and 2 almost same vptestnm patterns, and adds patterns that zero > extend those results to wider k registers, because that is what the > instructions actually do. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2018-12-12 Jakub Jelinek <ja...@redhat.com> > > PR target/88461 > * config/i386/sse.md (VI1248_AVX512VLBW, AVX512ZEXTMASK): New > mode iterators. > (<avx512>_testm<mode>3<mask_scalar_merge_name>, > <avx512>_testnm<mode>3<mask_scalar_merge_name>): Merge patterns > with VI12_AVX512VL and VI48_AVX512VL iterators into ones with > VI1248_AVX512VLBW iterator. > (*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext, > *<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask, > *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext, > *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask): New > define_insns. > > * gcc.target/i386/pr88461.c: New test.
LGTM. Thanks, Uros. > --- gcc/config/i386/sse.md.jj 2018-12-03 21:56:35.252575482 +0100 > +++ gcc/config/i386/sse.md 2018-12-12 16:47:00.370433319 +0100 > @@ -12322,22 +12322,22 @@ (define_insn "*<code><mode>3_bcst" > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > -(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" > - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") > - (unspec:<avx512fmaskmode> > - [(match_operand:VI12_AVX512VL 1 "register_operand" "v") > - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] > - UNSPEC_TESTM))] > - "TARGET_AVX512BW" > - "vptestm<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" > - [(set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > +(define_mode_iterator VI1248_AVX512VLBW > + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") > + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") > + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") > + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") > + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") > + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) > + > +(define_mode_iterator AVX512ZEXTMASK > + [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) > > (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" > [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") > (unspec:<avx512fmaskmode> > - [(match_operand:VI48_AVX512VL 1 "register_operand" "v") > - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > UNSPEC_TESTM))] > "TARGET_AVX512F" > "vptestm<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" > @@ -12347,24 +12347,73 @@ (define_insn "<avx512>_testm<mode>3<mask > (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" > [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") > (unspec:<avx512fmaskmode> > - [(match_operand:VI12_AVX512VL 1 "register_operand" "v") > - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > UNSPEC_TESTNM))] > - "TARGET_AVX512BW" > + "TARGET_AVX512F" > "vptestnm<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" > [(set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > -(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" > - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") > - (unspec:<avx512fmaskmode> > - [(match_operand:VI48_AVX512VL 1 "register_operand" "v") > - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] > - UNSPEC_TESTNM))] > - "TARGET_AVX512F" > - "vptestnm<ssemodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" > +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext" > + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") > + (zero_extend:AVX512ZEXTMASK > + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > + UNSPEC_TESTM)))] > + "TARGET_AVX512BW > + && (<AVX512ZEXTMASK:MODE_SIZE> > + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" > + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) > + > +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask" > + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") > + (zero_extend:AVX512ZEXTMASK > + (and:<VI1248_AVX512VLBW:avx512fmaskmode> > + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > + UNSPEC_TESTM) > + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 > "register_operand" "Yk"))))] > + "TARGET_AVX512BW > + && (<AVX512ZEXTMASK:MODE_SIZE> > + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" > + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, > %2}" > + [(set_attr "prefix" "evex") > + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) > + > +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext" > + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") > + (zero_extend:AVX512ZEXTMASK > + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > + UNSPEC_TESTNM)))] > + "TARGET_AVX512BW > + && (<AVX512ZEXTMASK:MODE_SIZE> > + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" > + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "prefix" "evex") > + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) > + > +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask" > + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") > + (zero_extend:AVX512ZEXTMASK > + (and:<VI1248_AVX512VLBW:avx512fmaskmode> > + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> > + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") > + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] > + UNSPEC_TESTNM) > + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 > "register_operand" "Yk"))))] > + "TARGET_AVX512BW > + && (<AVX512ZEXTMASK:MODE_SIZE> > + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" > + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, > %1, %2}" > + [(set_attr "prefix" "evex") > + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) > > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > ;; > --- gcc/testsuite/gcc.target/i386/pr88461.c.jj 2018-12-12 16:54:03.779529123 > +0100 > +++ gcc/testsuite/gcc.target/i386/pr88461.c 2018-12-12 16:53:42.308879227 > +0100 > @@ -0,0 +1,16 @@ > +/* PR target/88461 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ > +/* { dg-final { scan-assembler-times "kmovw\[ \t]" 2 } } */ > + > +#include <x86intrin.h> > + > +int > +foo (const __m128i *data, int a) > +{ > + __m128i v = _mm_load_si128 (data); > + __mmask16 m = _mm_testn_epi16_mask (v, v); > + m = _kshiftli_mask16 (m, 1); > + m = _kandn_mask16 (m, a); > + return m; > +} > > Jakub