Hi! The following patch uses a new mode iterator to avoid 2 almost same vptestm and 2 almost same vptestnm patterns, and adds patterns that zero extend those results to wider k registers, because that is what the instructions actually do.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-12-12 Jakub Jelinek <ja...@redhat.com> PR target/88461 * config/i386/sse.md (VI1248_AVX512VLBW, AVX512ZEXTMASK): New mode iterators. (<avx512>_testm<mode>3<mask_scalar_merge_name>, <avx512>_testnm<mode>3<mask_scalar_merge_name>): Merge patterns with VI12_AVX512VL and VI48_AVX512VL iterators into ones with VI1248_AVX512VLBW iterator. (*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext, *<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask, *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext, *<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask): New define_insns. * gcc.target/i386/pr88461.c: New test. --- gcc/config/i386/sse.md.jj 2018-12-03 21:56:35.252575482 +0100 +++ gcc/config/i386/sse.md 2018-12-12 16:47:00.370433319 +0100 @@ -12322,22 +12322,22 @@ (define_insn "*<code><mode>3_bcst" (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") - (unspec:<avx512fmaskmode> - [(match_operand:VI12_AVX512VL 1 "register_operand" "v") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] - UNSPEC_TESTM))] - "TARGET_AVX512BW" - "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" - [(set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) +(define_mode_iterator VI1248_AVX512VLBW + [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V16QI "TARGET_AVX512VL && TARGET_AVX512BW") + (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW") + (V8HI "TARGET_AVX512VL && TARGET_AVX512BW") + V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") + V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) + +(define_mode_iterator AVX512ZEXTMASK + [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI]) (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") (unspec:<avx512fmaskmode> - [(match_operand:VI48_AVX512VL 1 "register_operand" "v") - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] UNSPEC_TESTM))] "TARGET_AVX512F" "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" @@ -12347,24 +12347,73 @@ (define_insn "<avx512>_testm<mode>3<mask (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") (unspec:<avx512fmaskmode> - [(match_operand:VI12_AVX512VL 1 "register_operand" "v") - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] UNSPEC_TESTNM))] - "TARGET_AVX512BW" + "TARGET_AVX512F" "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>" - [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk") - (unspec:<avx512fmaskmode> - [(match_operand:VI48_AVX512VL 1 "register_operand" "v") - (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] - UNSPEC_TESTNM))] - "TARGET_AVX512F" - "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}" +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext" + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") + (zero_extend:AVX512ZEXTMASK + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_TESTM)))] + "TARGET_AVX512BW + && (<AVX512ZEXTMASK:MODE_SIZE> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) + +(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask" + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") + (zero_extend:AVX512ZEXTMASK + (and:<VI1248_AVX512VLBW:avx512fmaskmode> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_TESTM) + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))] + "TARGET_AVX512BW + && (<AVX512ZEXTMASK:MODE_SIZE> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" + "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) + +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext" + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") + (zero_extend:AVX512ZEXTMASK + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_TESTNM)))] + "TARGET_AVX512BW + && (<AVX512ZEXTMASK:MODE_SIZE> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) + +(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask" + [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=Yk") + (zero_extend:AVX512ZEXTMASK + (and:<VI1248_AVX512VLBW:avx512fmaskmode> + (unspec:<VI1248_AVX512VLBW:avx512fmaskmode> + [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_TESTNM) + (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))] + "TARGET_AVX512BW + && (<AVX512ZEXTMASK:MODE_SIZE> + > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))" + "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; --- gcc/testsuite/gcc.target/i386/pr88461.c.jj 2018-12-12 16:54:03.779529123 +0100 +++ gcc/testsuite/gcc.target/i386/pr88461.c 2018-12-12 16:53:42.308879227 +0100 @@ -0,0 +1,16 @@ +/* PR target/88461 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw" } */ +/* { dg-final { scan-assembler-times "kmovw\[ \t]" 2 } } */ + +#include <x86intrin.h> + +int +foo (const __m128i *data, int a) +{ + __m128i v = _mm_load_si128 (data); + __mmask16 m = _mm_testn_epi16_mask (v, v); + m = _kshiftli_mask16 (m, 1); + m = _kandn_mask16 (m, a); + return m; +} Jakub