Hello, This patch extends support of masking instructions. Bootstrapped.
Is it ok for trunk? gcc/ * config/i386/i386.md (define_attr "isa"): Add avx512bw,noavx512bw. (define_attr "enabled"): Ditto. (define_split): Add 32/64-bit mask logic. (define_insn "*k<logic>qi"): New. (define_insn "*k<logic>hi"): New. (define_insn "*anddi_1"): Add mask version. (define_insn "*andsi_1"): Ditto. (define_insn "*<code><mode>_1"): Ditto. (define_insn "*<code>hi_1"): Ditto. (define_insn "kxnor<mode>"): New. (define_insn "kunpcksi"): New. (define_insn "kunpckdi"): New. (define_insn "*one_cmpl<mode>2_1"): Add mask version. (define_insn "*one_cmplhi2_1"): Ditto. -- Thanks, K diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4867e7e..7f14ddb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -740,7 +740,8 @@ ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, - avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" + avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, + fma_avx512f,avx512bw,noavx512bw" (const_string "base")) (define_attr "enabled" "" @@ -771,6 +772,8 @@ (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") (eq_attr "isa" "fma_avx512f") (symbol_ref "TARGET_FMA || TARGET_AVX512F") + (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") + (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") ] (const_int 1))) @@ -7510,21 +7513,45 @@ }) (define_split - [(set (match_operand:SWI12 0 "mask_reg_operand") - (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand") - (match_operand:SWI12 2 "mask_reg_operand"))) + [(set (match_operand:SWI1248x 0 "mask_reg_operand") + (any_logic:SWI1248x (match_operand:SWI1248x 1 "mask_reg_operand") + (match_operand:SWI1248x 2 "mask_reg_operand"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_AVX512F && reload_completed" +;;TODO removed avx512f check because mask_reg implies it. + "reload_completed" [(set (match_dup 0) - (any_logic:SWI12 (match_dup 1) - (match_dup 2)))]) + (any_logic:SWI1248x (match_dup 1) + (match_dup 2)))]) -(define_insn "*k<logic><mode>" - [(set (match_operand:SWI12 0 "mask_reg_operand" "=k") - (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k") - (match_operand:SWI12 2 "mask_reg_operand" "k")))] +(define_insn "*k<logic>qi" + [(set (match_operand:QI 0 "mask_reg_operand" "=k") + (any_logic:QI (match_operand:QI 1 "mask_reg_operand" "k") + (match_operand:QI 2 "mask_reg_operand" "k")))] + "TARGET_AVX512F" +{ + return TARGET_AVX512DQ ? "k<logic>b\t{%2, %1, %0|%0, %1, %2}" + : "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "mode" "QI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "*k<logic>hi" + [(set (match_operand:HI 0 "mask_reg_operand" "=k") + (any_logic:HI (match_operand:HI 1 "mask_reg_operand" "k") + (match_operand:HI 2 "mask_reg_operand" "k")))] "TARGET_AVX512F" "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; + [(set_attr "mode" "HI") + (set_attr "type" "msklog") + (set_attr "prefix" "vex")]) + +(define_insn "*k<logic><mode>" + [(set (match_operand:SWI48x 0 "mask_reg_operand" "=k") + (any_logic:SWI48x (match_operand:SWI48x 1 "mask_reg_operand" "k") + (match_operand:SWI48x 2 "mask_reg_operand" "k")))] + "TARGET_AVX512BW" + "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; [(set_attr "mode" "<MODE>") (set_attr "type" "msklog") (set_attr "prefix" "vex")]) @@ -7580,10 +7607,10 @@ }) (define_insn "*anddi_1" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,!k") (and:DI - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k") + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" { @@ -7592,6 +7619,9 @@ case TYPE_IMOVX: return "#"; + case TYPE_MSKLOG: + return "kandq\t{%2, %1, %0|%0, %1, %2}"; + default: gcc_assert (rtx_equal_p (operands[0], operands[1])); if (get_attr_mode (insn) == MODE_SI) @@ -7600,8 +7630,8 @@ return "and{q}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,alu,imovx") - (set_attr "length_immediate" "*,*,*,0") + [(set_attr "type" "alu,alu,alu,imovx,msklog") + (set_attr "length_immediate" "*,*,*,0,0") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7609,12 +7639,12 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "mode" "SI,DI,DI,SI")]) + (set_attr "mode" "SI,DI,DI,SI,DI")]) (define_insn "*andsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya") - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k") + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k") + (match_operand:SI 2 "x86_64_general_operand" "re,rm,L,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, SImode, operands)" { @@ -7623,12 +7653,15 @@ case TYPE_IMOVX: return "#"; + case TYPE_MSKLOG: + return "kandd\t{%2, %1, %0|%0, %1, %2}"; + default: gcc_assert (rtx_equal_p (operands[0], operands[1])); return "and{l}\t{%2, %0|%0, %2}"; } } - [(set_attr "type" "alu,alu,imovx") + [(set_attr "type" "alu,alu,imovx,msklog") (set (attr "prefix_rex") (if_then_else (and (eq_attr "type" "imovx") @@ -7636,7 +7669,7 @@ (match_operand 1 "ext_QIreg_operand"))) (const_string "1") (const_string "*"))) - (set_attr "length_immediate" "*,*,0") + (set_attr "length_immediate" "*,*,0,0") (set_attr "mode" "SI")]) ;; See comment for addsi_1_zext why we do use nonimmediate_operand @@ -7688,11 +7721,21 @@ (match_operand:QI 2 "general_operand" "qn,qmn,rn,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (AND, QImode, operands)" - "@ - and{b}\t{%2, %0|%0, %2} - and{b}\t{%2, %0|%0, %2} - and{l}\t{%k2, %k0|%k0, %k2} - kandw\t{%2, %1, %0|%0, %1, %2}" +{ + switch (which_alternative) + { + case 0: + case 1: + return "and{b}\t{%2, %0|%0, %2}"; + case 2: + return "and{l}\t{%k2, %k0|%k0, %k2}"; + case 3: + return TARGET_AVX512DQ ? "kandb\t{%2, %1, %0|%0, %1, %2}" + : "kandw\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} [(set_attr "type" "alu,alu,alu,msklog") (set_attr "mode" "QI,QI,SI,HI")]) @@ -7715,10 +7758,22 @@ (match_operand:SWI12 2 "register_operand" "r,r,k"))) (clobber (reg:CC FLAGS_REG))] "TARGET_AVX512F" - "@ - andn\t{%k2, %k1, %k0|%k0, %k1, %k2} - # - kandnw\t{%2, %1, %0|%0, %1, %2}" +{ + switch (which_alternative) + { + case 0: + return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}"; + case 1: + return "#"; + case 2: + if (TARGET_AVX512DQ && <MODE>mode == QImode) + return "kandnb\t{%2, %1, %0|%0, %1, %2}"; + else + return "kandnw\t{%2, %1, %0|%0, %1, %2}"; + default: + gcc_unreachable (); + } +} [(set_attr "isa" "bmi,*,avx512f") (set_attr "type" "bitmanip,*,msklog") (set_attr "prefix" "*,*,vex") @@ -8082,14 +8137,17 @@ "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") (define_insn "*<code><mode>_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm") + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,k") (any_or:SWI48 - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0") - (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))) + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,k") + (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,k"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" - "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" - [(set_attr "type" "alu") + "@ + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} + k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "alu,alu,msklog") (set_attr "mode" "<MODE>")]) (define_insn "*<code>hi_1" @@ -8177,19 +8235,36 @@ (match_operand:SWI12 2 "register_operand" "r,k")))) (clobber (reg:CC FLAGS_REG))] "TARGET_AVX512F" +{ + if (which_alternative == 1 && <MODE>mode == QImode && TARGET_AVX512DQ) + return "kxnorb\t{%2, %1, %0|%0, %1, %2}"; + return "kxnorw\t{%2, %1, %0|%0, %1, %2}"; +} + [(set_attr "type" "*,msklog") + (set_attr "prefix" "*,vex") + (set_attr "mode" "<MODE>")]) + +(define_insn "kxnor<mode>" + [(set (match_operand:SWI48x 0 "register_operand" "=r,!k") + (not:SWI48x + (xor:SWI48x + (match_operand:SWI48x 1 "register_operand" "0,k") + (match_operand:SWI48x 2 "register_operand" "r,k")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512BW" "@ # - kxnorw\t{%2, %1, %0|%0, %1, %2}" + kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "*,msklog") (set_attr "prefix" "*,vex") (set_attr "mode" "<MODE>")]) (define_split - [(set (match_operand:SWI12 0 "general_reg_operand") - (not:SWI12 - (xor:SWI12 + [(set (match_operand:SWI1248x 0 "general_reg_operand") + (not:SWI1248x + (xor:SWI1248x (match_dup 0) - (match_operand:SWI12 1 "general_reg_operand")))) + (match_operand:SWI1248x 1 "general_reg_operand")))) (clobber (reg:CC FLAGS_REG))] "TARGET_AVX512F && reload_completed" [(parallel [(set (match_dup 0) @@ -8199,6 +8274,8 @@ (set (match_dup 0) (not:HI (match_dup 0)))]) +;;There are kortrest[bdq] but no intrinsics for them. +;;We probably don't need to implement them. (define_insn "kortestzhi" [(set (reg:CCZ FLAGS_REG) (compare:CCZ @@ -8238,6 +8315,28 @@ (set_attr "type" "msklog") (set_attr "prefix" "vex")]) +(define_insn "kunpcksi" + [(set (match_operand:SI 0 "register_operand" "=k") + (ior:SI + (ashift:SI + (match_operand:SI 1 "register_operand" "k") + (const_int 16)) + (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" "k") 0))))] + "TARGET_AVX512BW" + "kunpckwd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mode" "SI")]) + +(define_insn "kunpckdi" + [(set (match_operand:DI 0 "register_operand" "=k") + (ior:DI + (ashift:DI + (match_operand:DI 1 "register_operand" "k") + (const_int 32)) + (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" "k") 0))))] + "TARGET_AVX512BW" + "kunpckdq\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "mode" "DI")]) + ;; See comment for addsi_1_zext why we do use nonimmediate_operand ;; ??? Special case for immediate operand is missing - it is tricky. (define_insn "*<code>si_2_zext" @@ -8837,11 +8936,15 @@ "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") (define_insn "*one_cmpl<mode>2_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") - (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))] + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,k") + (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,k")))] "ix86_unary_operator_ok (NOT, <MODE>mode, operands)" - "not{<imodesuffix>}\t%0" - [(set_attr "type" "negnot") + "@ + not{<imodesuffix>}\t%0 + knot<mskmodesuffix>\t{%1, %0|%0, %1}" + [(set_attr "isa" "*,avx512bw") + (set_attr "type" "negnot,msklog") + (set_attr "prefix" "*,vex") (set_attr "mode" "<MODE>")]) (define_insn "*one_cmplhi2_1" @@ -8861,10 +8964,21 @@ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k") (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] "ix86_unary_operator_ok (NOT, QImode, operands)" - "@ - not{b}\t%0 - not{l}\t%k0 - knotw\t{%1, %0|%0, %1}" +{ + switch (which_alternative) + { + case 0: + return "not{b}\t%0"; + case 1: + return "not{l}\t%k0"; + case 2: + if (TARGET_AVX512DQ) + return "knotb\t{%1, %0|%0, %1}"; + return "knotw\t{%1, %0|%0, %1}"; + default: + gcc_unreachable (); + } +} [(set_attr "isa" "*,*,avx512f") (set_attr "type" "negnot,negnot,msklog") (set_attr "prefix" "*,*,vex")