late_combine will combine lshift + zero into *lshifrtsi3_1_zext which cause extra mov between gpr and kmask, add ?k to the pattern.
gcc/ChangeLog: PR target/115610 * config/i386/i386.md (<*insnsi3_zext): Add alternative ?k, enable it only for lshiftrt and under avx512bw. * config/i386/sse.md (*klshrsi3_1_zext): New define_insn, and add corresponding define_split after it. --- gcc/config/i386/i386.md | 19 +++++++++++++------ gcc/config/i386/sse.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fd48e764469..57a10c1af48 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16836,10 +16836,10 @@ (define_insn "*bmi2_<insn>si3_1_zext" (set_attr "mode" "SI")]) (define_insn "*<insn>si3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r,?k") (zero_extend:DI - (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm") - (match_operand:QI 2 "nonmemory_operand" "cI,r,cI")))) + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm,rm,k") + (match_operand:QI 2 "nonmemory_operand" "cI,r,cI,I")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands, TARGET_APX_NDD)" @@ -16850,6 +16850,8 @@ (define_insn "*<insn>si3_1_zext" case TYPE_ISHIFTX: return "#"; + case TYPE_MSKLOG: + return "#"; default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)) @@ -16860,8 +16862,8 @@ (define_insn "*<insn>si3_1_zext" : "<shift>{l}\t{%2, %k0|%k0, %2}"; } } - [(set_attr "isa" "*,bmi2,apx_ndd") - (set_attr "type" "ishift,ishiftx,ishift") + [(set_attr "isa" "*,bmi2,apx_ndd,avx512bw") + (set_attr "type" "ishift,ishiftx,ishift,msklog") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand") @@ -16869,7 +16871,12 @@ (define_insn "*<insn>si3_1_zext" (match_test "optimize_function_for_size_p (cfun)"))) (const_string "0") (const_string "*"))) - (set_attr "mode" "SI")]) + (set_attr "mode" "SI") + (set (attr "enabled") + (if_then_else + (eq_attr "alternative" "3") + (symbol_ref "<CODE> == LSHIFTRT && TARGET_AVX512BW") + (const_string "*")))]) ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0be2dcd8891..20665a6f097 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2179,6 +2179,34 @@ (define_split (match_dup 2))) (unspec [(const_int 0)] UNSPEC_MASKOP)])]) +(define_insn "*klshrsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=k") + (zero_extend:DI + (lshiftrt:SI (match_operand:SI 1 "register_operand" "k") + (match_operand 2 "const_0_to_31_operand" "I")))) + (unspec [(const_int 0)] UNSPEC_MASKOP)] + "TARGET_AVX512BW" + "kshiftrd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "msklog") + (set_attr "prefix" "vex") + (set_attr "mode" "SI")]) + +(define_split + [(set (match_operand:DI 0 "mask_reg_operand") + (zero_extend:DI + (lshiftrt:SI + (match_operand:SI 1 "mask_reg_operand") + (match_operand 2 "const_0_to_31_operand")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW && reload_completed" + [(parallel + [(set (match_dup 0) + (zero_extend:DI + (lshiftrt:SI + (match_dup 1) + (match_dup 2)))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_insn "ktest<mode>" [(set (reg:CC FLAGS_REG) (unspec:CC -- 2.31.1