On Wed, Dec 12, 2018 at 11:36 PM Jakub Jelinek <ja...@redhat.com> wrote:
>
> Hi!
>
> The following patch uses a new mode iterator to avoid 2 almost same
> vptestm and 2 almost same vptestnm patterns, and adds patterns that zero
> extend those results to wider k registers, because that is what the
> instructions actually do.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-12-12  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/88461
>         * config/i386/sse.md (VI1248_AVX512VLBW, AVX512ZEXTMASK): New
>         mode iterators.
>         (<avx512>_testm<mode>3<mask_scalar_merge_name>,
>         <avx512>_testnm<mode>3<mask_scalar_merge_name>): Merge patterns
>         with VI12_AVX512VL and VI48_AVX512VL iterators into ones with
>         VI1248_AVX512VLBW iterator.
>         (*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext,
>         *<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask,
>         *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext,
>         *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask): New
>         define_insns.
>
>         * gcc.target/i386/pr88461.c: New test.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/sse.md.jj   2018-12-03 21:56:35.252575482 +0100
> +++ gcc/config/i386/sse.md      2018-12-12 16:47:00.370433319 +0100
> @@ -12322,22 +12322,22 @@ (define_insn "*<code><mode>3_bcst"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<sseinsnmode>")])
>
> -(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
> -  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> -       (unspec:<avx512fmaskmode>
> -        [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> -         (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
> -        UNSPEC_TESTM))]
> -  "TARGET_AVX512BW"
> -  "vptestm<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> -  [(set_attr "prefix" "evex")
> -   (set_attr "mode"  "<sseinsnmode>")])
> +(define_mode_iterator VI1248_AVX512VLBW
> +  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
> +   (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
> +   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
> +   (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
> +   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
> +   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
> +
> +(define_mode_iterator AVX512ZEXTMASK
> +  [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
>
>  (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
>         (unspec:<avx512fmaskmode>
> -        [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> -         (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
> +        [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +         (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
>          UNSPEC_TESTM))]
>    "TARGET_AVX512F"
>    "vptestm<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> @@ -12347,24 +12347,73 @@ (define_insn "<avx512>_testm<mode>3<mask
>  (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
>         (unspec:<avx512fmaskmode>
> -        [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
> -         (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
> +        [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +         (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
>          UNSPEC_TESTNM))]
> -  "TARGET_AVX512BW"
> +  "TARGET_AVX512F"
>    "vptestnm<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
>    [(set_attr "prefix" "evex")
>     (set_attr "mode"  "<sseinsnmode>")])
>
> -(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
> -  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
> -       (unspec:<avx512fmaskmode>
> -        [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
> -         (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
> -        UNSPEC_TESTNM))]
> -  "TARGET_AVX512F"
> -  "vptestnm<ssemodesuffix>\t{%2, %1, 
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
> +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
> +  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> +       (zero_extend:AVX512ZEXTMASK
> +         (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> +          [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +           (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> +          UNSPEC_TESTM)))]
> +  "TARGET_AVX512BW
> +   && (<AVX512ZEXTMASK:MODE_SIZE>
> +       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> +  "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "prefix" "evex")
> -   (set_attr "mode"  "<sseinsnmode>")])
> +   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
> +  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> +       (zero_extend:AVX512ZEXTMASK
> +         (and:<VI1248_AVX512VLBW:avx512fmaskmode>
> +           (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> +            [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +             (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> +            UNSPEC_TESTM)
> +           (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 
> "register_operand" "Yk"))))]
> +  "TARGET_AVX512BW
> +   && (<AVX512ZEXTMASK:MODE_SIZE>
> +       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> +  "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, 
> %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
> +  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> +       (zero_extend:AVX512ZEXTMASK
> +         (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> +          [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +           (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> +          UNSPEC_TESTNM)))]
> +  "TARGET_AVX512BW
> +   && (<AVX512ZEXTMASK:MODE_SIZE>
> +       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> +  "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
> +
> +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
> +  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk")
> +       (zero_extend:AVX512ZEXTMASK
> +         (and:<VI1248_AVX512VLBW:avx512fmaskmode>
> +           (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
> +            [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
> +             (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
> +            UNSPEC_TESTNM)
> +           (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 
> "register_operand" "Yk"))))]
> +  "TARGET_AVX512BW
> +   && (<AVX512ZEXTMASK:MODE_SIZE>
> +       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
> +  "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, 
> %1, %2}"
> +  [(set_attr "prefix" "evex")
> +   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
>
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  ;;
> --- gcc/testsuite/gcc.target/i386/pr88461.c.jj  2018-12-12 16:54:03.779529123 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr88461.c     2018-12-12 16:53:42.308879227 
> +0100
> @@ -0,0 +1,16 @@
> +/* PR target/88461 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */
> +/* { dg-final { scan-assembler-times "kmovw\[ \t]" 2 } } */
> +
> +#include <x86intrin.h>
> +
> +int
> +foo (const __m128i *data, int a)
> +{
> +  __m128i v = _mm_load_si128 (data);
> +  __mmask16 m = _mm_testn_epi16_mask (v, v);
> +  m = _kshiftli_mask16 (m, 1);
> +  m = _kandn_mask16 (m, a);
> +  return m;
> +}
>
>         Jakub

Reply via email to