Hi! Some time ago, patterns were added to optimize move mask followed by zero extension from 32 bits to 64 bit. As the testcase shows, the intrinsics actually return int, not unsigned int, so it will happen quite often that one actually needs sign extension instead of zero extension. Except for vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a difference, as we know the bit 31 will not be set (the source will have 2 or 4 doubles, 4 or 8 floats or 16 or 32 chars). So, for the floating point patterns, this patch just uses a code iterator so that we handle both zero extend and sign extend, and for the byte one adds a separate pattern for the 128-bit operand.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-01-30 Jakub Jelinek <ja...@redhat.com> PR target/91824 * config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ... (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this. Use any_extend code iterator instead of always zero_extend. (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ... (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this. Use any_extend code iterator instead of always zero_extend. (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ... (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this. Use any_extend code iterator instead of always zero_extend. (*sse2_pmovmskb_ext): New define_insn. (*sse2_pmovmskb_ext_lt): New define_insn_and_split. * gcc.target/i386/pr91824-2.c: New test. --- gcc/config/i386/sse.md.jj 2020-01-29 09:35:05.791247952 +0100 +++ gcc/config/i386/sse.md 2020-01-29 16:56:00.354739600 +0100 @@ -15815,9 +15815,9 @@ (define_insn "<sse>_movmsk<ssemodesuffix (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) -(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext" +(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(match_operand:VF_128_256 1 "register_operand" "x")] UNSPEC_MOVMSK)))] @@ -15844,9 +15844,9 @@ (define_insn_and_split "*<sse>_movmsk<ss (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt" +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(lt:VF_128_256 (match_operand:<sseintvecmode> 1 "register_operand" "x") @@ -15856,7 +15856,7 @@ (define_insn_and_split "*<sse>_movmsk<ss "#" "&& reload_completed" [(set (match_dup 0) - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] "operands[1] = gen_lowpart (<MODE>mode, operands[1]);" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -15880,9 +15880,9 @@ (define_insn_and_split "*<sse>_movmsk<ss (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>")]) -(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift" +(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI + (any_extend:DI (unspec:SI [(subreg:VF_128_256 (ashiftrt:<sseintvecmode> @@ -15893,7 +15893,7 @@ (define_insn_and_split "*<sse>_movmsk<ss "#" "&& reload_completed" [(set (match_dup 0) - (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] "operands[1] = gen_lowpart (<MODE>mode, operands[1]);" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") @@ -15932,6 +15932,23 @@ (define_insn "*<sse2_avx2>_pmovmskb_zext (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) +(define_insn "*sse2_pmovmskb_ext" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (unspec:SI + [(match_operand:V16QI 1 "register_operand" "x")] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "%vpmovmskb\t{%1, %k0|%k0, %1}" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI @@ -15968,6 +15985,28 @@ (define_insn_and_split "*<sse2_avx2>_pmo "" [(set_attr "type" "ssemov") (set (attr "prefix_data16") + (if_then_else + (match_test "TARGET_AVX") + (const_string "*") + (const_string "1"))) + (set_attr "prefix" "maybe_vex") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*sse2_pmovmskb_ext_lt" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (unspec:SI + [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x") + (match_operand:V16QI 2 "const0_operand" "C"))] + UNSPEC_MOVMSK)))] + "TARGET_64BIT && TARGET_SSE2" + "#" + "" + [(set (match_dup 0) + (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] + "" + [(set_attr "type" "ssemov") + (set (attr "prefix_data16") (if_then_else (match_test "TARGET_AVX") (const_string "*") --- gcc/testsuite/gcc.target/i386/pr91824-2.c.jj 2020-01-29 17:06:18.838474437 +0100 +++ gcc/testsuite/gcc.target/i386/pr91824-2.c 2020-01-29 17:06:01.070740609 +0100 @@ -0,0 +1,73 @@ +/* PR target/91824 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2" } */ +/* { dg-final { scan-assembler-not "cltq" } } */ +/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */ + +#include <x86intrin.h> + +unsigned long long +f1 (__m128i x) +{ + return _mm_movemask_epi8 (x); +} + +unsigned long long +f2 (__m128i x) +{ + return (unsigned) _mm_movemask_epi8 (x); +} + +unsigned long long +f3 (__m128 x) +{ + return _mm_movemask_ps (x); +} + +unsigned long long +f4 (__m128 x) +{ + return (unsigned) _mm_movemask_ps (x); +} + +unsigned long long +f5 (__m128d x) +{ + return _mm_movemask_pd (x); +} + +unsigned long long +f6 (__m128d x) +{ + return (unsigned) _mm_movemask_pd (x); +} + +unsigned long long +f7 (__m256 x) +{ + return _mm256_movemask_ps (x); +} + +unsigned long long +f8 (__m256 x) +{ + return (unsigned) _mm256_movemask_ps (x); +} + +unsigned long long +f9 (__m256d x) +{ + return _mm256_movemask_pd (x); +} + +unsigned long long +f10 (__m256d x) +{ + return (unsigned) _mm256_movemask_pd (x); +} + +unsigned long long +f11 (__m256i x) +{ + return (unsigned) _mm256_movemask_epi8 (x); +} Jakub