On Tue, Aug 12, 2014 at 1:29 PM, Kirill Yukhin <kirill.yuk...@gmail.com> wrote:
> This patch extends support of masking instructions. > > Bootstrapped. > > Is it ok for trunk? > > gcc/ > * config/i386/i386.md > (define_attr "isa"): Add avx512bw,noavx512bw. > (define_attr "enabled"): Ditto. > (define_split): Add 32/64-bit mask logic. > (define_insn "*k<logic>qi"): New. > (define_insn "*k<logic>hi"): New. > (define_insn "*anddi_1"): Add mask version. > (define_insn "*andsi_1"): Ditto. > (define_insn "*<code><mode>_1"): Ditto. > (define_insn "*<code>hi_1"): Ditto. > (define_insn "kxnor<mode>"): New. > (define_insn "kunpcksi"): New. > (define_insn "kunpckdi"): New. > (define_insn "*one_cmpl<mode>2_1"): Add mask version. > (define_insn "*one_cmplhi2_1"): Ditto. OK with a couple of small adjustments below. Thanks, Uros. > diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md > index 4867e7e..7f14ddb 100644 > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -740,7 +740,8 @@ > ;; Used to control the "enabled" attribute on a per-instruction basis. > (define_attr "isa" "base,x64,x64_sse4,x64_sse4_noavx,x64_avx,nox64, > sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx, > - > avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,fma_avx512f" > + avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, > + fma_avx512f,avx512bw,noavx512bw" > (const_string "base")) > > (define_attr "enabled" "" > @@ -771,6 +772,8 @@ > (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F") > (eq_attr "isa" "fma_avx512f") > (symbol_ref "TARGET_FMA || TARGET_AVX512F") > + (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW") > + (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW") > ] > (const_int 1))) > > @@ -7510,21 +7513,45 @@ > }) > > (define_split > - [(set (match_operand:SWI12 0 "mask_reg_operand") > - (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand") > - (match_operand:SWI12 2 "mask_reg_operand"))) > + [(set (match_operand:SWI1248x 0 "mask_reg_operand") > + (any_logic:SWI1248x (match_operand:SWI1248x 1 "mask_reg_operand") > + (match_operand:SWI1248x 2 "mask_reg_operand"))) > (clobber (reg:CC FLAGS_REG))] > - "TARGET_AVX512F && reload_completed" > +;;TODO removed avx512f check because mask_reg implies it. Please leave TARGET_AVX512F in the insn constraint, it shortcuts the pattern recognition for non AVX512F targets. > + "reload_completed" > [(set (match_dup 0) > - (any_logic:SWI12 (match_dup 1) > - (match_dup 2)))]) > + (any_logic:SWI1248x (match_dup 1) > + (match_dup 2)))]) > > -(define_insn "*k<logic><mode>" > - [(set (match_operand:SWI12 0 "mask_reg_operand" "=k") > - (any_logic:SWI12 (match_operand:SWI12 1 "mask_reg_operand" "k") > - (match_operand:SWI12 2 "mask_reg_operand" "k")))] > +(define_insn "*k<logic>qi" > + [(set (match_operand:QI 0 "mask_reg_operand" "=k") > + (any_logic:QI (match_operand:QI 1 "mask_reg_operand" "k") > + (match_operand:QI 2 "mask_reg_operand" "k")))] > + "TARGET_AVX512F" > +{ > + return TARGET_AVX512DQ ? "k<logic>b\t{%2, %1, %0|%0, %1, %2}" > + : "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; > +} > + [(set_attr "mode" "QI") > + (set_attr "type" "msklog") > + (set_attr "prefix" "vex")]) > + > +(define_insn "*k<logic>hi" > + [(set (match_operand:HI 0 "mask_reg_operand" "=k") > + (any_logic:HI (match_operand:HI 1 "mask_reg_operand" "k") > + (match_operand:HI 2 "mask_reg_operand" "k")))] > "TARGET_AVX512F" > "k<logic>w\t{%2, %1, %0|%0, %1, %2}"; > + [(set_attr "mode" "HI") > + (set_attr "type" "msklog") > + (set_attr "prefix" "vex")]) > + > +(define_insn "*k<logic><mode>" > + [(set (match_operand:SWI48x 0 "mask_reg_operand" "=k") > + (any_logic:SWI48x (match_operand:SWI48x 1 "mask_reg_operand" "k") > + (match_operand:SWI48x 2 "mask_reg_operand" "k")))] > + "TARGET_AVX512BW" > + "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; > [(set_attr "mode" "<MODE>") > (set_attr "type" "msklog") > (set_attr "prefix" "vex")]) > @@ -7580,10 +7607,10 @@ > }) The patterns above can be macroized using conditional modes, but we can live with the above for now. Maybe add a TODO marker here. > (define_insn "*anddi_1" > - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") > + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r,!k") > (and:DI > - (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm") > - (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L"))) > + (match_operand:DI 1 "nonimmediate_operand" "%0,0,0,qm,k") > + (match_operand:DI 2 "x86_64_szext_general_operand" "Z,re,rm,L,k"))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands)" > { > @@ -7592,6 +7619,9 @@ > case TYPE_IMOVX: > return "#"; > > + case TYPE_MSKLOG: > + return "kandq\t{%2, %1, %0|%0, %1, %2}"; > + > default: > gcc_assert (rtx_equal_p (operands[0], operands[1])); > if (get_attr_mode (insn) == MODE_SI) > @@ -7600,8 +7630,8 @@ > return "and{q}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "type" "alu,alu,alu,imovx") > - (set_attr "length_immediate" "*,*,*,0") > + [(set_attr "type" "alu,alu,alu,imovx,msklog") > + (set_attr "length_immediate" "*,*,*,0,0") > (set (attr "prefix_rex") > (if_then_else > (and (eq_attr "type" "imovx") > @@ -7609,12 +7639,12 @@ > (match_operand 1 "ext_QIreg_operand"))) > (const_string "1") > (const_string "*"))) > - (set_attr "mode" "SI,DI,DI,SI")]) > + (set_attr "mode" "SI,DI,DI,SI,DI")]) > > (define_insn "*andsi_1" > - [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya") > - (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm") > - (match_operand:SI 2 "x86_64_general_operand" "re,rm,L"))) > + [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r,Ya,!k") > + (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,qm,k") > + (match_operand:SI 2 "x86_64_general_operand" "re,rm,L,k"))) > (clobber (reg:CC FLAGS_REG))] > "ix86_binary_operator_ok (AND, SImode, operands)" > { > @@ -7623,12 +7653,15 @@ > case TYPE_IMOVX: > return "#"; > > + case TYPE_MSKLOG: > + return "kandd\t{%2, %1, %0|%0, %1, %2}"; > + > default: > gcc_assert (rtx_equal_p (operands[0], operands[1])); > return "and{l}\t{%2, %0|%0, %2}"; > } > } > - [(set_attr "type" "alu,alu,imovx") > + [(set_attr "type" "alu,alu,imovx,msklog") > (set (attr "prefix_rex") > (if_then_else > (and (eq_attr "type" "imovx") > @@ -7636,7 +7669,7 @@ > (match_operand 1 "ext_QIreg_operand"))) > (const_string "1") > (const_string "*"))) > - (set_attr "length_immediate" "*,*,0") > + (set_attr "length_immediate" "*,*,0,0") > (set_attr "mode" "SI")]) > > ;; See comment for addsi_1_zext why we do use nonimmediate_operand > @@ -7688,11 +7721,21 @@ > (match_operand:QI 2 "general_operand" "qn,qmn,rn,k"))) > (clobber (reg:CC FLAGS_REG))] > "ix86_binary_operator_ok (AND, QImode, operands)" > - "@ > - and{b}\t{%2, %0|%0, %2} > - and{b}\t{%2, %0|%0, %2} > - and{l}\t{%k2, %k0|%k0, %k2} > - kandw\t{%2, %1, %0|%0, %1, %2}" > +{ > + switch (which_alternative) > + { > + case 0: > + case 1: > + return "and{b}\t{%2, %0|%0, %2}"; > + case 2: > + return "and{l}\t{%k2, %k0|%k0, %k2}"; > + case 3: > + return TARGET_AVX512DQ ? "kandb\t{%2, %1, %0|%0, %1, %2}" > + : "kandw\t{%2, %1, %0|%0, %1, %2}"; > + default: > + gcc_unreachable (); > + } > +} > [(set_attr "type" "alu,alu,alu,msklog") > (set_attr "mode" "QI,QI,SI,HI")]) > > @@ -7715,10 +7758,22 @@ > (match_operand:SWI12 2 "register_operand" "r,r,k"))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_AVX512F" > - "@ > - andn\t{%k2, %k1, %k0|%k0, %k1, %k2} > - # > - kandnw\t{%2, %1, %0|%0, %1, %2}" > +{ > + switch (which_alternative) > + { > + case 0: > + return "andn\t{%k2, %k1, %k0|%k0, %k1, %k2}"; > + case 1: > + return "#"; > + case 2: > + if (TARGET_AVX512DQ && <MODE>mode == QImode) > + return "kandnb\t{%2, %1, %0|%0, %1, %2}"; > + else > + return "kandnw\t{%2, %1, %0|%0, %1, %2}"; > + default: > + gcc_unreachable (); > + } > +} > [(set_attr "isa" "bmi,*,avx512f") > (set_attr "type" "bitmanip,*,msklog") > (set_attr "prefix" "*,*,vex") > @@ -8082,14 +8137,17 @@ > "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") > > (define_insn "*<code><mode>_1" > - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm") > + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,rm,k") > (any_or:SWI48 > - (match_operand:SWI48 1 "nonimmediate_operand" "%0,0") > - (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>"))) > + (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,k") > + (match_operand:SWI48 2 "<general_operand>" "<g>,r<i>,k"))) > (clobber (reg:CC FLAGS_REG))] > "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" > - "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}" > - [(set_attr "type" "alu") > + "@ > + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} > + <logic>{<imodesuffix>}\t{%2, %0|%0, %2} > + k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "type" "alu,alu,msklog") > (set_attr "mode" "<MODE>")]) > > (define_insn "*<code>hi_1" > @@ -8177,19 +8235,36 @@ > (match_operand:SWI12 2 "register_operand" "r,k")))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_AVX512F" > +{ > + if (which_alternative == 1 && <MODE>mode == QImode && TARGET_AVX512DQ) > + return "kxnorb\t{%2, %1, %0|%0, %1, %2}"; > + return "kxnorw\t{%2, %1, %0|%0, %1, %2}"; > +} > + [(set_attr "type" "*,msklog") > + (set_attr "prefix" "*,vex") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "kxnor<mode>" > + [(set (match_operand:SWI48x 0 "register_operand" "=r,!k") > + (not:SWI48x > + (xor:SWI48x > + (match_operand:SWI48x 1 "register_operand" "0,k") > + (match_operand:SWI48x 2 "register_operand" "r,k")))) > + (clobber (reg:CC FLAGS_REG))] > + "TARGET_AVX512BW" > "@ > # > - kxnorw\t{%2, %1, %0|%0, %1, %2}" > + kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "type" "*,msklog") > (set_attr "prefix" "*,vex") > (set_attr "mode" "<MODE>")]) > > (define_split > - [(set (match_operand:SWI12 0 "general_reg_operand") > - (not:SWI12 > - (xor:SWI12 > + [(set (match_operand:SWI1248x 0 "general_reg_operand") > + (not:SWI1248x > + (xor:SWI1248x > (match_dup 0) > - (match_operand:SWI12 1 "general_reg_operand")))) > + (match_operand:SWI1248x 1 "general_reg_operand")))) > (clobber (reg:CC FLAGS_REG))] > "TARGET_AVX512F && reload_completed" > [(parallel [(set (match_dup 0) > @@ -8199,6 +8274,8 @@ > (set (match_dup 0) > (not:HI (match_dup 0)))]) > > +;;There are kortrest[bdq] but no intrinsics for them. > +;;We probably don't need to implement them. > (define_insn "kortestzhi" > [(set (reg:CCZ FLAGS_REG) > (compare:CCZ > @@ -8238,6 +8315,28 @@ > (set_attr "type" "msklog") > (set_attr "prefix" "vex")]) > > +(define_insn "kunpcksi" > + [(set (match_operand:SI 0 "register_operand" "=k") > + (ior:SI > + (ashift:SI > + (match_operand:SI 1 "register_operand" "k") > + (const_int 16)) > + (zero_extend:SI (subreg:HI (match_operand:SI 2 "register_operand" > "k") 0))))] > + "TARGET_AVX512BW" > + "kunpckwd\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "mode" "SI")]) > + > +(define_insn "kunpckdi" > + [(set (match_operand:DI 0 "register_operand" "=k") > + (ior:DI > + (ashift:DI > + (match_operand:DI 1 "register_operand" "k") > + (const_int 32)) > + (zero_extend:DI (subreg:SI (match_operand:DI 2 "register_operand" > "k") 0))))] > + "TARGET_AVX512BW" > + "kunpckdq\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "mode" "DI")]) > + > ;; See comment for addsi_1_zext why we do use nonimmediate_operand > ;; ??? Special case for immediate operand is missing - it is tricky. > (define_insn "*<code>si_2_zext" > @@ -8837,11 +8936,15 @@ > "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") > > (define_insn "*one_cmpl<mode>2_1" > - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm") > - (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0")))] > + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,k") > + (not:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,k")))] > "ix86_unary_operator_ok (NOT, <MODE>mode, operands)" > - "not{<imodesuffix>}\t%0" > - [(set_attr "type" "negnot") > + "@ > + not{<imodesuffix>}\t%0 > + knot<mskmodesuffix>\t{%1, %0|%0, %1}" > + [(set_attr "isa" "*,avx512bw") > + (set_attr "type" "negnot,msklog") > + (set_attr "prefix" "*,vex") > (set_attr "mode" "<MODE>")]) > > (define_insn "*one_cmplhi2_1" > @@ -8861,10 +8964,21 @@ > [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,!k") > (not:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,k")))] > "ix86_unary_operator_ok (NOT, QImode, operands)" > - "@ > - not{b}\t%0 > - not{l}\t%k0 > - knotw\t{%1, %0|%0, %1}" > +{ > + switch (which_alternative) > + { > + case 0: > + return "not{b}\t%0"; > + case 1: > + return "not{l}\t%k0"; > + case 2: > + if (TARGET_AVX512DQ) > + return "knotb\t{%1, %0|%0, %1}"; > + return "knotw\t{%1, %0|%0, %1}"; > + default: > + gcc_unreachable (); > + } > +} > [(set_attr "isa" "*,*,avx512f") > (set_attr "type" "negnot,negnot,msklog") > (set_attr "prefix" "*,*,vex") > >